;;; llm-openai.scm - OpenAI provider implementation for llm egg
;;;
;;; BSD-3-Clause License
;;; Copyright (c) 2025, Rolando Abarca

(module llm-openai

(openai-http-client
 openai-call-api
 openai-provider
 openai-prepare-message
 openai-build-payload
 openai-parse-response
 openai-format-tool-result
 openai-get-model-pricing
 openai-extract-tool-calls
 openai-generate-image
 openai-transcribe-audio
 *openai-default-model*
 *openai-default-temperature*
 *openai-default-max-tokens*)

(import
 scheme
 chicken.base
 chicken.condition
 chicken.process-context
 chicken.port
 chicken.io
 chicken.string
 chicken.pathname
 chicken.tcp
 openssl
 http-client
 medea
 intarweb
 uri-common
 srfi-1
 srfi-13
 logger
 llm-provider
 llm-common)

(logger/install LLM-OPENAI)

;;; ================================================================
;;; OpenAI Model Configuration
;;; ================================================================

;; Model configurations with pricing (cost per 1M tokens in USD, Standard tier)
;; See https://platform.openai.com/docs/pricing for current pricing
(define *openai-model-configs*
  '(;; GPT-5.x models
    (gpt-5.2 . ((input-price-per-1m . 1.75)
                (output-price-per-1m . 14.00)))
    (gpt-5.1 . ((input-price-per-1m . 1.25)
                (output-price-per-1m . 10.00)))
    (gpt-5 . ((input-price-per-1m . 1.25)
              (output-price-per-1m . 10.00)))
    (gpt-5-mini . ((input-price-per-1m . 0.25)
                   (output-price-per-1m . 2.00)))
    (gpt-5-nano . ((input-price-per-1m . 0.05)
                   (output-price-per-1m . 0.40)))
    (gpt-5.2-pro . ((input-price-per-1m . 21.00)
                    (output-price-per-1m . 168.00)))
    (gpt-5-pro . ((input-price-per-1m . 15.00)
                  (output-price-per-1m . 120.00)))
    ;; GPT-4.x models
    (gpt-4.1 . ((input-price-per-1m . 2.00)
                (output-price-per-1m . 8.00)))
    (gpt-4.1-mini . ((input-price-per-1m . 0.40)
                     (output-price-per-1m . 1.60)))
    (gpt-4.1-nano . ((input-price-per-1m . 0.10)
                     (output-price-per-1m . 0.40)))
    (gpt-4o . ((input-price-per-1m . 2.50)
               (output-price-per-1m . 10.00)))
    (gpt-4o-mini . ((input-price-per-1m . 0.15)
                    (output-price-per-1m . 0.60)))
    ;; o-series reasoning models
    (o1 . ((input-price-per-1m . 15.00)
           (output-price-per-1m . 60.00)))
    (o1-pro . ((input-price-per-1m . 150.00)
               (output-price-per-1m . 600.00)))
    (o3 . ((input-price-per-1m . 2.00)
           (output-price-per-1m . 8.00)))
    (o3-pro . ((input-price-per-1m . 20.00)
               (output-price-per-1m . 80.00)))
    (o4-mini . ((input-price-per-1m . 1.10)
                (output-price-per-1m . 4.40)))
    (o3-mini . ((input-price-per-1m . 1.10)
                (output-price-per-1m . 4.40)))
    (o1-mini . ((input-price-per-1m . 1.10)
                (output-price-per-1m . 4.40)))))

;; Default settings for OpenAI
(define *openai-default-model* "gpt-5-nano")
(define *openai-default-temperature* 1)
(define *openai-default-max-tokens* 4000)

;;; ================================================================
;;; HTTP Client Infrastructure
;;; ================================================================

;; Get OpenAI API key from environment
(define (get-api-key)
  (or (get-environment-variable "OPENAI_API_KEY")
      (error "OPENAI_API_KEY environment variable must be set")))

;; Helper to detect if payload contains multipart form data
(define (multipart-payload? payload)
  (and (list? payload)
       (any (lambda (entry)
              (and (pair? entry)
                   (list? entry)
                   (memq 'file: entry)))
            payload)))

;; Helper to check if response should be text (not JSON)
(define (text-response? payload)
  (and (multipart-payload? payload)
       (let ((response-format (alist-ref 'response_format payload equal?)))
         (equal? response-format "text"))))

;; Default HTTP client implementation using http-client
;; Handles both JSON payloads and multipart form data
(define (default-openai-http-client endpoint payload)
  (let* ((api-key (get-api-key))
         ;; Split endpoint on "/" to create proper path segments
         (endpoint-segments (string-split endpoint "/"))
         (uri (make-uri scheme: 'https
                        host: "api.openai.com"
                        path: `(/ "v1" ,@endpoint-segments)))
         (is-multipart (multipart-payload? payload))
         (is-text-response (text-response? payload)))
    (condition-case
        (if is-multipart
            ;; For multipart, we need to manually set auth header in the request-vars
            ;; because http-client auto-detects multipart only when using raw URIs
            ;; Use longer timeout for file uploads (10 minutes)
            (let* ((req-headers (headers `((authorization #(,(string-append "Bearer " api-key) raw)))))
                   (req (make-request uri: uri
                                      method: 'POST
                                      headers: req-headers)))
              (let-values (((data req resp) (with-input-from-request req
                                                                     payload
                                                                     (if is-text-response
                                                                         read-string
                                                                         read-json))))
                data))
            ;; For JSON payloads, use the standard approach
            (let* ((req-headers (headers `((content-type application/json)
                                           (authorization #(,(string-append "Bearer " api-key) raw)))))
                   (req (make-request uri: uri
                                      method: 'POST
                                      headers: req-headers)))
              (let-values (((data req resp) (with-input-from-request req
                                                                     (json->string payload)
                                                                     read-json)))
                data)))
      [var (http client-error)
           (let ((resp (get-condition-property var 'client-error 'response))
                 (body (get-condition-property var 'client-error 'body)))
             (e "API Error: " var)
             (e body)
             #f)])))

;; Parameter for dependency injection
;; The http-client should be a procedure that takes (endpoint payload) and returns response alist
(define openai-http-client (make-parameter default-openai-http-client))

;; Helper to make OpenAI API requests using the injected client
(define (openai-call-api endpoint payload)
  ((openai-http-client) endpoint payload))

;;; ================================================================
;;; Provider Interface Implementation
;;; ================================================================

;; Convert a chat message to OpenAI format
;; For cost savings, file attachments are NOT sent to the model by default.
;; Instead, files are described as text annotations.
;;
;; If include-file is #t, the file WILL be embedded
(define (openai-prepare-message msg include-file)
  (let ((role (alist-ref 'role msg))
        (content (alist-ref 'content msg))
        (file-data (alist-ref 'file_data msg))
        (file-type (alist-ref 'file_type msg))
        (file-name (alist-ref 'file_name msg)))
    (cond
     ;; If include-file is false and we have file data, convert to text description
     ((and file-data (not include-file))
      (let ((type-desc (cond
                        ((image-mime-type? file-type) "image")
                        ((pdf-mime-type? file-type) "PDF document")
                        (else "file"))))
        `((role . ,role)
          (content . ,(conc (or content "") "\n[Attached " type-desc ": " (or file-name "file") "]")))))

     ;; PDF with base64 data: use type "file" format
     ((and file-data (pdf-mime-type? file-type))
      (d "Preparing PDF message with existing base64 data")
      `((role . ,role)
        (content . #(((type . "text") (text . ,content))
                     ((type . "file")
                      (file . ((filename . ,file-name)
                               (file_data . ,(string-append "data:application/pdf;base64," file-data)))))))))

     ;; Image with base64 data: use image_url format
     ((and file-data (image-mime-type? file-type))
      `((role . ,role)
        (content . #(((type . "text") (text . ,content))
                     ((type . "image_url")
                      (image_url . ((url . ,(conc "data:" file-type ";base64," file-data)))))))))

     ;; Regular message: pass through unchanged
     (else msg))))

;; Build OpenAI API payload
;; Prepares messages for OpenAI by converting file messages to image_url format
(define (openai-build-payload messages tools model temperature max-tokens)
  (let* (;; Convert file messages to proper OpenAI format (with image_url)
         (prepared-messages (map (lambda (m) (openai-prepare-message m #f)) messages))
         (base `((model . ,(or model *openai-default-model*))
                 (temperature . ,(or temperature *openai-default-temperature*))
                 (max_completion_tokens . ,(or max-tokens *openai-default-max-tokens*))
                 (messages . ,(list->vector prepared-messages)))))
    (if (and tools (> (vector-length tools) 0))
        (append base `((tools . ,tools)
                       (tool_choice . "auto")))
        base)))

;; Parse OpenAI API response
;; Extracts message content, tool calls, and finish reason from response
;; Returns: alist with 'message, 'content, 'tool-calls, 'finish-reason, 'usage
(define (openai-parse-response response-data)
  (if (not response-data)
      `((success . #f)
        (error . "No response from API"))
      (let* ((usage (alist-ref 'usage response-data))
             (prompt-tokens (if usage (alist-ref 'prompt_tokens usage) 0))
             (completion-tokens (if usage (alist-ref 'completion_tokens usage) 0))
             (choices (alist-ref 'choices response-data))
             (first-choice (if (and choices (> (vector-length choices) 0))
                               (vector-ref choices 0)
                               #f)))
        (if (not first-choice)
            `((success . #f)
              (error . "No response in choices")
              (input-tokens . ,(or prompt-tokens 0))
              (output-tokens . ,(or completion-tokens 0)))
            (let* ((message (alist-ref 'message first-choice))
                   (content (alist-ref 'content message))
                   (tool-calls (alist-ref 'tool_calls message))
                   (finish-reason (alist-ref 'finish_reason first-choice)))
              `((success . #t)
                (message . ,message)
                (content . ,content)
                (tool-calls . ,tool-calls)
                (finish-reason . ,finish-reason)
                (input-tokens . ,(or prompt-tokens 0))
                (output-tokens . ,(or completion-tokens 0))))))))

;; Extract tool calls from OpenAI response message
;; Returns: list of alists with 'id, 'name, 'arguments
(define (openai-extract-tool-calls response-message)
  (let ((tool-calls (alist-ref 'tool_calls response-message)))
    (if (and tool-calls (> (vector-length tool-calls) 0))
        (map (lambda (tc)
               (let* ((tool-id (alist-ref 'id tc))
                      (function (alist-ref 'function tc))
                      (tool-name (alist-ref 'name function))
                      (arguments-str (alist-ref 'arguments function)))
                 `((id . ,tool-id)
                   (name . ,tool-name)
                   (arguments . ,arguments-str))))
             (vector->list tool-calls))
        '())))

;; Format tool result message for OpenAI
;; Returns: message alist in OpenAI's tool result format
(define (openai-format-tool-result tool-call-id result)
  `((role . "tool")
    (tool_call_id . ,tool-call-id)
    (content . ,(if (string? result) result (json->string result)))))

;; Get pricing for a model
;; Returns: alist with 'input-price-per-1m and 'output-price-per-1m
(define (openai-get-model-pricing model-name)
  (let* ((model-sym (if (string? model-name)
                        (string->symbol model-name)
                        model-name))
         (config (alist-ref model-sym *openai-model-configs*)))
    (or config
        ;; Fallback pricing if model not found
        '((input-price-per-1m . 1.00)
          (output-price-per-1m . 3.00)))))

;;; ================================================================
;;; Image Generation
;;; ================================================================

;; Generate image using OpenAI's image generation API
;; PROMPT: string - the image description prompt
;; PARAMS: alist of additional parameters
;;   - model: "gpt-image-1" (default) or "dall-e-3" or "dall-e-2"
;;   - size: "1024x1024" (default), "1024x1536", or "1536x1024" for gpt-image-1
;;   - quality: "high" (default), "medium", or "low" for gpt-image-1; "standard" or "hd" for dall-e-3
;;   - output_format: "png" (default) or "jpeg" for gpt-image-1
;;   - n: number of images to generate (default 1)
;; Returns: list of base64-encoded image strings, or #f on failure
(define (openai-generate-image prompt params)
  (let* ((model (or (alist-ref 'model params) "gpt-image-1"))
         (size (or (alist-ref 'size params) "1024x1024"))
         (quality (or (alist-ref 'quality params) "high"))
         (n (or (alist-ref 'n params) 1))
         (output-format (or (alist-ref 'output_format params) "png"))
         ;; Build base payload
         (base-payload `((model . ,model)
                         (prompt . ,prompt)
                         (size . ,size)
                         (quality . ,quality)
                         (n . ,n)))
         ;; gpt-image-1 uses output_format and returns b64 by default
         ;; dall-e models use response_format for b64_json
         (payload (if (string-prefix? "gpt-image" model)
                      (cons `(output_format . ,output-format) base-payload)
                      (cons `(response_format . "b64_json") base-payload))))
    (and-let* ((response (openai-call-api "images/generations" payload))
               (data (alist-ref 'data response))
               ((> (vector-length data) 0)))
      ;; Extract base64 data from response
      ;; gpt-image-1 uses 'b64, dall-e uses 'b64_json
      (map (lambda (img)
             (or (alist-ref 'b64 img)
                 (alist-ref 'b64_json img)))
           (vector->list data)))))

;;; ================================================================
;;; Audio Transcription
;;; ================================================================

;; Transcribe audio using OpenAI's transcription API
;; FILE-PATH: string - path to the audio file to transcribe
;; PARAMS: alist of additional parameters
;;   - model: "gpt-4o-transcribe" (default) or "whisper-1"
;;   - response_format: "text" (default), "json", or "verbose_json"
;;   - language: optional language code (e.g., "en", "es")
;; Returns: string (for "text" format) or alist (for "json" formats), #f on failure
(define (openai-transcribe-audio file-path params)
  (let* ((model (or (alist-ref 'model params) "gpt-4o-transcribe"))
         (response-format (or (alist-ref 'response_format params) "text"))
         (language (alist-ref 'language params))
         ;; Extract just the filename from the path
         (filename (let ((parts (string-split file-path "/")))
                     (last parts)))
         ;; Build multipart form data
         (form-data `((file file: ,file-path filename: ,filename)
                      (model . ,model)
                      (response_format . ,response-format)
                      ,@(if language `((language . ,language)) '()))))
    ;; Use longer timeout for file uploads
    (parameterize ((tcp-read-timeout #f)
                   (tcp-write-timeout 600000))
      (openai-call-api "audio/transcriptions" form-data))))

;;; ================================================================
;;; OpenAI Provider Instance
;;; ================================================================

;; Create the OpenAI provider instance
(define openai-provider
  (make-llm-provider
   'openai                      ;; name
   *openai-default-model*       ;; default-model
   openai-prepare-message       ;; prepare-message
   openai-build-payload         ;; build-payload
   openai-call-api              ;; call-api
   openai-parse-response        ;; parse-response
   openai-format-tool-result    ;; format-tool-result
   openai-get-model-pricing     ;; get-model-pricing
   openai-extract-tool-calls    ;; extract-tool-calls
   openai-generate-image        ;; generate-image (optional)
   openai-transcribe-audio))    ;; transcribe-audio (optional)

) ;; end module