[elpa] externals/llm 95b907c32a 06/10: Add Open AI streaming and allow f

emacs-elpa-diffs

[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

[elpa] externals/llm 95b907c32a 06/10: Add Open AI streaming and allow f

From:	ELPA Syncer
Subject:	[elpa] externals/llm 95b907c32a 06/10: Add Open AI streaming and allow for raw processing on success
Date:	Sun, 1 Oct 2023 18:58:35 -0400 (EDT)

branch: externals/llm
commit 95b907c32aaa5d6bdfafeeec05c39eeadce94857
Author: Andrew Hyatt <ahyatt@gmail.com>
Commit: Andrew Hyatt <ahyatt@gmail.com>

    Add Open AI streaming and allow for raw processing on success
---
 llm-openai.el  | 50 +++++++++++++++++++++++++++++++++++++++++++++++---
 llm-request.el | 20 ++++++++++++++------
 llm-tester.el  |  2 +-
 llm.el         |  6 +++++-
 4 files changed, 67 insertions(+), 11 deletions(-)

diff --git a/llm-openai.el b/llm-openai.el
index bd4d8896ac..7f44f8d62b 100644
--- a/llm-openai.el
+++ b/llm-openai.el
@@ -99,10 +99,11 @@ PROVIDER is the llm-openai provider."
                :data (llm-openai--embedding-request provider string))
    #'llm-openai--embedding-extract-response))
 
-(defun llm-openai--chat-request (provider prompt &optional return-json-spec)
+(defun llm-openai--chat-request (provider prompt &optional return-json-spec 
streaming)
   "From PROMPT, create the chat request data to send.
 PROVIDER is the llm-openai provider to use.
-RETURN-JSON-SPEC is the optional specification for the JSON to return."
+RETURN-JSON-SPEC is the optional specification for the JSON to return.
+STREAMING if non-nil, turn on response streaming."
   (let (request-alist system-prompt)
     (when (llm-chat-prompt-context prompt)
       (setq system-prompt (llm-chat-prompt-context prompt)))
@@ -119,6 +120,7 @@ RETURN-JSON-SPEC is the optional specification for the JSON 
to return."
     (when system-prompt
       (push (make-llm-chat-prompt-interaction :role 'system :content 
system-prompt)
             (llm-chat-prompt-interactions prompt)))
+    (when streaming (push `("stream" . ,t) request-alist))
     (push `("messages" . ,(mapcar (lambda (p)
                                     `(("role" . ,(pcase 
(llm-chat-prompt-interaction-role p)
                                                    ('user "user")
@@ -165,9 +167,51 @@ RETURN-JSON-SPEC is the optional specification for the 
JSON to return."
   (llm-openai--handle-response
    (llm-request-sync "https://api.openai.com/v1/chat/completions";
                      :headers `(("Authorization" . ,(format "Bearer %s" 
(llm-openai-key provider))))
-                     :data (llm-openai--chat-request provider prompt) )
+                     :data (llm-openai--chat-request provider prompt))
    #'llm-openai--extract-chat-response))
 
+(defvar-local llm-openai-current-response ""
+  "The response so far from the server.")
+
+(defvar-local llm-openai-last-position 1
+  "The last position in the streamed response we read until.")
+
+(defun llm-openai--get-partial-chat-response (response)
+  "Return the text in the partial chat response from RESPONSE."
+  ;; To begin with, we should still be in the buffer with the actual response.
+  (let ((current-response llm-openai-current-response)
+        (last-position llm-openai-last-position))
+    (with-temp-buffer
+      (insert response)
+      (goto-char last-position)
+      (when (search-forward "\ndata: {" nil t)
+        (backward-char 2)
+        (ignore-errors
+          (setq current-response
+                (concat current-response (assoc-default 'content 
(assoc-default 'delta (aref (assoc-default 'choices (json-read)) 0))))))
+        (setq last-position (point))))
+    (setq-local llm-openai-current-response current-response)
+    (setq-local llm-openai-last-position last-position)
+    current-response))
+
+(cl-defmethod llm-chat-streaming ((provider llm-openai) prompt 
partial-callback response-callback error-callback)
+  (unless (llm-openai-key provider)
+    (error "To call Open AI API, the key must have been set"))
+  (llm-request-async "https://api.openai.com/v1/chat/completions";
+                     :headers `(("Authorization" . ,(format "Bearer %s" 
(llm-openai-key provider))))
+                     :data (llm-openai--chat-request provider prompt nil t)
+                     :on-error (lambda (_ data)
+                                 (let ((errdata (cdr (assoc 'error data))))
+                                   (funcall error-callback 'error
+                                            (format "Problem calling Open AI: 
%s message: %s"
+                                                    (cdr (assoc 'type errdata))
+                                                    (cdr (assoc 'message 
errdata))))))
+                     :on-partial (lambda (data)
+                                   (when-let ((response 
(llm-openai--get-partial-chat-response data)))
+                                     (funcall partial-callback response)))
+                     :on-success-raw (lambda (data)
+                                       (funcall response-callback 
(llm-openai--get-partial-chat-response data)))))
+
 (provide 'llm-openai)
 
 ;;; llm-openai.el ends here
diff --git a/llm-request.el b/llm-request.el
index 9a50beaf45..aa0640b91d 100644
--- a/llm-request.el
+++ b/llm-request.el
@@ -21,12 +21,12 @@
 
 ;;; Code:
 (require 'json)
+(require 'cl-macs)
 (require 'url-http)
 (require 'rx)
 
 (defun llm-request--content ()
   "From the current buffer, return the content of the response."
-  (message "llm-request--content for buffer %s" (current-buffer))
   (buffer-substring-no-properties
    (or (and (boundp 'url-http-end-of-headers) url-http-end-of-headers)
       (save-match-data
@@ -68,7 +68,7 @@ TIMEOUT is the number of seconds to wait for a response."
         (when llm-request--partial-callback
           (funcall llm-request--partial-callback (llm-request--content)))))))
 
-(cl-defun llm-request-async (url &key headers data on-success on-error 
on-partial)
+(cl-defun llm-request-async (url &key headers data on-success on-success-raw 
on-error on-partial)
   "Make a request to URL.
 Nothing will be returned.
 
@@ -78,14 +78,20 @@ standard json header. This is optional.
 DATA will be jsonified and sent as the request body.
 This is required.
 
-ON-SUCCESS will be called with the response body as a json object.
-This is required.
+ON-SUCCESS will be called with the response body as a json
+object. This is optional in the case that ON-SUCCESS-DATA is set,
+and required otherwise.
 
 ON-ERROR will be called with the error code and a response-body.
 This is required.
 
 ON-PARTIAL will be called with the potentially incomplete response
-body as a string.  This is an optional argument."
+body as a string.  This is an optional argument.
+
+ON-SUCCESS-RAW, if set, will be called in the buffer with the
+response body, and expect the response content. This is an
+optional argument, and mostly useful for streaming.  If not set,
+the buffer is turned into JSON and passed to ON-SUCCESS."
   (let ((url-request-method "POST")
         ;; This is necessary for streaming, otherwise we get gzip'd data that 
is
         ;; unparseable until the end. The responses should be small enough that
@@ -103,7 +109,9 @@ body as a string.  This is an optional argument."
               (remove-hook 'after-change-functions 
#'llm-request--handle-new-content t)
               (let ((code (url-http-parse-response)))
                 (if (eq code 200)
-                    (funcall on-success (json-read-from-string 
(llm-request--content)))
+                    (if on-success-raw
+                        (funcall on-success-raw (llm-request--content))
+                      (funcall on-success (json-read-from-string 
(llm-request--content))))
                   (funcall on-error code (ignore-errors
                                            (json-read-from-string 
(llm-request--content)))))))
             (list on-success on-error)
diff --git a/llm-tester.el b/llm-tester.el
index bbfe5faea1..dbd69c2223 100644
--- a/llm-tester.el
+++ b/llm-tester.el
@@ -119,7 +119,7 @@
       :interactions (list
                      (make-llm-chat-prompt-interaction
                       :role 'user
-                      :content "Write a poem in iambic pentameter about the 
pleasures of using Emacs.  The poem should make snide references to vi."))
+                      :content "Write a short poem in iambic pentameter about 
the pleasures of using Emacs.  The poem should make snide references to vi."))
       :temperature 0.5
       :max-tokens 200)
      (lambda (text)
diff --git a/llm.el b/llm.el
index 72cf5ed55d..8073960eca 100644
--- a/llm.el
+++ b/llm.el
@@ -135,9 +135,13 @@ PROMPT is a `llm-chat-prompt'.
 
 PARTIAL-CALLBACK is called with the output of the string response
 as it is built up. The callback is called with the entire
-response that has been received, as it is streamed back.
+response that has been received, as it is streamed back. It is
+not guaranteed to be called with the complete response before
+RESPONSE-CALLBACK is called.
 
 RESPONSE-CALLBACK receives the each piece of the string response.
+It is called once after the response has been completed, with the
+final text.
 
 ERROR-CALLBACK receives the error response."
   (ignore provider prompt partial-callback response-callback error-callback)

[Prev in Thread]

Current Thread

[Next in Thread]

[elpa] externals/llm updated (8dee3d059a -> 7954a92d7c), ELPA Syncer, 2023/10/01
- [elpa] externals/llm 38a627409c 01/10: Beginning of a streaming option, with only llm-vertex implemented, ELPA Syncer, 2023/10/01
- [elpa] externals/llm 0faa9e5cc7 02/10: Merge branch 'main' into streaming, ELPA Syncer, 2023/10/01
- [elpa] externals/llm 46feff756f 03/10: Change request functionality to better handle streaming, ELPA Syncer, 2023/10/01
- [elpa] externals/llm fe064b2bc3 04/10: Create streaming method and implement it for vertex, ELPA Syncer, 2023/10/01
- [elpa] externals/llm 95b907c32a 06/10: Add Open AI streaming and allow for raw processing on success, ELPA Syncer <=
- [elpa] externals/llm c7a1e06e3e 07/10: Test streaming as well in the normal suite of tests, ELPA Syncer, 2023/10/01
- [elpa] externals/llm 7e9b1f8c60 09/10: Add streaming to README, ELPA Syncer, 2023/10/01
- [elpa] externals/llm 7954a92d7c 10/10: Bump version to 0.3.0, and note changes in NEWS.org, ELPA Syncer, 2023/10/01
- [elpa] externals/llm 454ec53fd3 05/10: Merge branch 'main' into streaming, ELPA Syncer, 2023/10/01
- [elpa] externals/llm a16338f31a 08/10: Add streaming to llm-fake, ELPA Syncer, 2023/10/01

Prev by Date: [elpa] externals/llm fe064b2bc3 04/10: Create streaming method and implement it for vertex
Next by Date: [elpa] externals/llm c7a1e06e3e 07/10: Test streaming as well in the normal suite of tests
Previous by thread: [elpa] externals/llm fe064b2bc3 04/10: Create streaming method and implement it for vertex
Next by thread: [elpa] externals/llm c7a1e06e3e 07/10: Test streaming as well in the normal suite of tests
Index(es):
- Date
- Thread