[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]
[elpa] externals/llm 95b907c32a 06/10: Add Open AI streaming and allow f
From: |
ELPA Syncer |
Subject: |
[elpa] externals/llm 95b907c32a 06/10: Add Open AI streaming and allow for raw processing on success |
Date: |
Sun, 1 Oct 2023 18:58:35 -0400 (EDT) |
branch: externals/llm
commit 95b907c32aaa5d6bdfafeeec05c39eeadce94857
Author: Andrew Hyatt <ahyatt@gmail.com>
Commit: Andrew Hyatt <ahyatt@gmail.com>
Add Open AI streaming and allow for raw processing on success
---
llm-openai.el | 50 +++++++++++++++++++++++++++++++++++++++++++++++---
llm-request.el | 20 ++++++++++++++------
llm-tester.el | 2 +-
llm.el | 6 +++++-
4 files changed, 67 insertions(+), 11 deletions(-)
diff --git a/llm-openai.el b/llm-openai.el
index bd4d8896ac..7f44f8d62b 100644
--- a/llm-openai.el
+++ b/llm-openai.el
@@ -99,10 +99,11 @@ PROVIDER is the llm-openai provider."
:data (llm-openai--embedding-request provider string))
#'llm-openai--embedding-extract-response))
-(defun llm-openai--chat-request (provider prompt &optional return-json-spec)
+(defun llm-openai--chat-request (provider prompt &optional return-json-spec
streaming)
"From PROMPT, create the chat request data to send.
PROVIDER is the llm-openai provider to use.
-RETURN-JSON-SPEC is the optional specification for the JSON to return."
+RETURN-JSON-SPEC is the optional specification for the JSON to return.
+STREAMING if non-nil, turn on response streaming."
(let (request-alist system-prompt)
(when (llm-chat-prompt-context prompt)
(setq system-prompt (llm-chat-prompt-context prompt)))
@@ -119,6 +120,7 @@ RETURN-JSON-SPEC is the optional specification for the JSON
to return."
(when system-prompt
(push (make-llm-chat-prompt-interaction :role 'system :content
system-prompt)
(llm-chat-prompt-interactions prompt)))
+ (when streaming (push `("stream" . ,t) request-alist))
(push `("messages" . ,(mapcar (lambda (p)
`(("role" . ,(pcase
(llm-chat-prompt-interaction-role p)
('user "user")
@@ -165,9 +167,51 @@ RETURN-JSON-SPEC is the optional specification for the
JSON to return."
(llm-openai--handle-response
(llm-request-sync "https://api.openai.com/v1/chat/completions"
:headers `(("Authorization" . ,(format "Bearer %s"
(llm-openai-key provider))))
- :data (llm-openai--chat-request provider prompt) )
+ :data (llm-openai--chat-request provider prompt))
#'llm-openai--extract-chat-response))
+(defvar-local llm-openai-current-response ""
+ "The response so far from the server.")
+
+(defvar-local llm-openai-last-position 1
+ "The last position in the streamed response we read until.")
+
+(defun llm-openai--get-partial-chat-response (response)
+ "Return the text in the partial chat response from RESPONSE."
+ ;; To begin with, we should still be in the buffer with the actual response.
+ (let ((current-response llm-openai-current-response)
+ (last-position llm-openai-last-position))
+ (with-temp-buffer
+ (insert response)
+ (goto-char last-position)
+ (when (search-forward "\ndata: {" nil t)
+ (backward-char 2)
+ (ignore-errors
+ (setq current-response
+ (concat current-response (assoc-default 'content
(assoc-default 'delta (aref (assoc-default 'choices (json-read)) 0))))))
+ (setq last-position (point))))
+ (setq-local llm-openai-current-response current-response)
+ (setq-local llm-openai-last-position last-position)
+ current-response))
+
+(cl-defmethod llm-chat-streaming ((provider llm-openai) prompt
partial-callback response-callback error-callback)
+ (unless (llm-openai-key provider)
+ (error "To call Open AI API, the key must have been set"))
+ (llm-request-async "https://api.openai.com/v1/chat/completions"
+ :headers `(("Authorization" . ,(format "Bearer %s"
(llm-openai-key provider))))
+ :data (llm-openai--chat-request provider prompt nil t)
+ :on-error (lambda (_ data)
+ (let ((errdata (cdr (assoc 'error data))))
+ (funcall error-callback 'error
+ (format "Problem calling Open AI:
%s message: %s"
+ (cdr (assoc 'type errdata))
+ (cdr (assoc 'message
errdata))))))
+ :on-partial (lambda (data)
+ (when-let ((response
(llm-openai--get-partial-chat-response data)))
+ (funcall partial-callback response)))
+ :on-success-raw (lambda (data)
+ (funcall response-callback
(llm-openai--get-partial-chat-response data)))))
+
(provide 'llm-openai)
;;; llm-openai.el ends here
diff --git a/llm-request.el b/llm-request.el
index 9a50beaf45..aa0640b91d 100644
--- a/llm-request.el
+++ b/llm-request.el
@@ -21,12 +21,12 @@
;;; Code:
(require 'json)
+(require 'cl-macs)
(require 'url-http)
(require 'rx)
(defun llm-request--content ()
"From the current buffer, return the content of the response."
- (message "llm-request--content for buffer %s" (current-buffer))
(buffer-substring-no-properties
(or (and (boundp 'url-http-end-of-headers) url-http-end-of-headers)
(save-match-data
@@ -68,7 +68,7 @@ TIMEOUT is the number of seconds to wait for a response."
(when llm-request--partial-callback
(funcall llm-request--partial-callback (llm-request--content)))))))
-(cl-defun llm-request-async (url &key headers data on-success on-error
on-partial)
+(cl-defun llm-request-async (url &key headers data on-success on-success-raw
on-error on-partial)
"Make a request to URL.
Nothing will be returned.
@@ -78,14 +78,20 @@ standard json header. This is optional.
DATA will be jsonified and sent as the request body.
This is required.
-ON-SUCCESS will be called with the response body as a json object.
-This is required.
+ON-SUCCESS will be called with the response body as a json
+object. This is optional in the case that ON-SUCCESS-DATA is set,
+and required otherwise.
ON-ERROR will be called with the error code and a response-body.
This is required.
ON-PARTIAL will be called with the potentially incomplete response
-body as a string. This is an optional argument."
+body as a string. This is an optional argument.
+
+ON-SUCCESS-RAW, if set, will be called in the buffer with the
+response body, and expect the response content. This is an
+optional argument, and mostly useful for streaming. If not set,
+the buffer is turned into JSON and passed to ON-SUCCESS."
(let ((url-request-method "POST")
;; This is necessary for streaming, otherwise we get gzip'd data that
is
;; unparseable until the end. The responses should be small enough that
@@ -103,7 +109,9 @@ body as a string. This is an optional argument."
(remove-hook 'after-change-functions
#'llm-request--handle-new-content t)
(let ((code (url-http-parse-response)))
(if (eq code 200)
- (funcall on-success (json-read-from-string
(llm-request--content)))
+ (if on-success-raw
+ (funcall on-success-raw (llm-request--content))
+ (funcall on-success (json-read-from-string
(llm-request--content))))
(funcall on-error code (ignore-errors
(json-read-from-string
(llm-request--content)))))))
(list on-success on-error)
diff --git a/llm-tester.el b/llm-tester.el
index bbfe5faea1..dbd69c2223 100644
--- a/llm-tester.el
+++ b/llm-tester.el
@@ -119,7 +119,7 @@
:interactions (list
(make-llm-chat-prompt-interaction
:role 'user
- :content "Write a poem in iambic pentameter about the
pleasures of using Emacs. The poem should make snide references to vi."))
+ :content "Write a short poem in iambic pentameter about
the pleasures of using Emacs. The poem should make snide references to vi."))
:temperature 0.5
:max-tokens 200)
(lambda (text)
diff --git a/llm.el b/llm.el
index 72cf5ed55d..8073960eca 100644
--- a/llm.el
+++ b/llm.el
@@ -135,9 +135,13 @@ PROMPT is a `llm-chat-prompt'.
PARTIAL-CALLBACK is called with the output of the string response
as it is built up. The callback is called with the entire
-response that has been received, as it is streamed back.
+response that has been received, as it is streamed back. It is
+not guaranteed to be called with the complete response before
+RESPONSE-CALLBACK is called.
RESPONSE-CALLBACK receives the each piece of the string response.
+It is called once after the response has been completed, with the
+final text.
ERROR-CALLBACK receives the error response."
(ignore provider prompt partial-callback response-callback error-callback)
- [elpa] externals/llm updated (8dee3d059a -> 7954a92d7c), ELPA Syncer, 2023/10/01
- [elpa] externals/llm 38a627409c 01/10: Beginning of a streaming option, with only llm-vertex implemented, ELPA Syncer, 2023/10/01
- [elpa] externals/llm 0faa9e5cc7 02/10: Merge branch 'main' into streaming, ELPA Syncer, 2023/10/01
- [elpa] externals/llm 46feff756f 03/10: Change request functionality to better handle streaming, ELPA Syncer, 2023/10/01
- [elpa] externals/llm fe064b2bc3 04/10: Create streaming method and implement it for vertex, ELPA Syncer, 2023/10/01
- [elpa] externals/llm 95b907c32a 06/10: Add Open AI streaming and allow for raw processing on success,
ELPA Syncer <=
- [elpa] externals/llm c7a1e06e3e 07/10: Test streaming as well in the normal suite of tests, ELPA Syncer, 2023/10/01
- [elpa] externals/llm 7e9b1f8c60 09/10: Add streaming to README, ELPA Syncer, 2023/10/01
- [elpa] externals/llm 7954a92d7c 10/10: Bump version to 0.3.0, and note changes in NEWS.org, ELPA Syncer, 2023/10/01
- [elpa] externals/llm 454ec53fd3 05/10: Merge branch 'main' into streaming, ELPA Syncer, 2023/10/01
- [elpa] externals/llm a16338f31a 08/10: Add streaming to llm-fake, ELPA Syncer, 2023/10/01