[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]
[elpa] externals/llm a5424e6637 2/2: Add support for Vertex model text-b
From: |
ELPA Syncer |
Subject: |
[elpa] externals/llm a5424e6637 2/2: Add support for Vertex model text-bison |
Date: |
Sat, 4 Nov 2023 21:58:23 -0400 (EDT) |
branch: externals/llm
commit a5424e66373f5ac232a88d64d3b3fecf3760c697
Author: Andrew Hyatt <ahyatt@gmail.com>
Commit: Andrew Hyatt <ahyatt@gmail.com>
Add support for Vertex model text-bison
---
NEWS.org | 1 +
llm-vertex.el | 223 ++++++++++++++++++++++++++++++++++++++--------------------
2 files changed, 148 insertions(+), 76 deletions(-)
diff --git a/NEWS.org b/NEWS.org
index 5b30f7beec..05e47c8300 100644
--- a/NEWS.org
+++ b/NEWS.org
@@ -1,5 +1,6 @@
* Version 0.5.2
- Fix incompatibility with older Emacs introduced in Version 0.5.1.
+- Add support for Google Cloud Vertex model =text-bison= and variants.
* Version 0.5.1
- Implement token counting for Google Cloud Vertex via their API.
- Fix issue with Google Cloud Vertex erroring on multibyte strings.
diff --git a/llm-vertex.el b/llm-vertex.el
index 516d1905d1..9340ab5450 100644
--- a/llm-vertex.el
+++ b/llm-vertex.el
@@ -49,6 +49,20 @@ This is only used for streaming calls."
:type 'string
:group 'llm-vertex)
+(defcustom llm-vertex-default-max-output-tokens 500
+ "The default maximum number of tokens to ask for.
+This is only used when setting the maximum tokens is required,
+and there is no default. The maximum value possible here is 2049."
+ :type 'integer
+ :group 'llm-vertex)
+
+(defcustom llm-vertex-default-chat-model "chat-bison"
+ "The default model to ask for.
+This should almost certainly be a chat model, other models are
+for more specialized uses."
+ :type 'string
+ :group 'llm-vertex)
+
(cl-defstruct llm-vertex
"A struct representing a Vertex AI client.
@@ -63,7 +77,7 @@ KEY-GENTIME keeps track of when the key was generated,
because the key must be r
key
project
embedding-model
- chat-model
+ (chat-model llm-vertex-default-chat-model)
key-gentime)
(defun llm-vertex-refresh-key (provider)
@@ -142,19 +156,22 @@ If nothing needs to be set, return nil."
(if param-struct-alist
`(("parameters" . (("struct_val" . ,param-struct-alist)))))))
-(defun llm-vertex--get-chat-response-ui (response)
- "Return the actual response from the RESPONSE struct returned."
+(defun llm-vertex--get-chat-response-streaming (response)
+ "Return the actual response from the RESPONSE struct returned.
+This handles different kinds of models."
(pcase (type-of response)
- ('vector (mapconcat #'llm-vertex--get-chat-response-ui
+ ('vector (mapconcat #'llm-vertex--get-chat-response-streaming
response ""))
- ('cons (let* ((outputs (cdr (assoc 'outputs response)))
- (structVal-list (cdr (assoc 'structVal (aref outputs 0))))
- (candidates (cdr (assoc 'candidates structVal-list)))
- (listVal (cdr (assoc 'listVal candidates)))
- (structVal (cdr (assoc 'structVal (aref listVal 0))))
- (content (cdr (assoc 'content structVal)))
- (stringVal (aref (cdr (assoc 'stringVal content)) 0)))
- stringVal))))
+ ('cons (let* ((outputs (assoc-default 'outputs response))
+ (structVal-list (assoc-default 'structVal (aref outputs 0)))
+ (candidates (assoc-default 'candidates structVal-list)))
+ (if candidates
+ (let* ((listVal (assoc-default 'listVal candidates))
+ (structVal (assoc-default 'structVal (aref listVal 0)))
+ (content (assoc-default 'content structVal))
+ (stringVal (aref (assoc-default 'stringVal content)
0)))
+ stringVal)
+ (aref (assoc-default 'stringVal (assoc-default 'content
structVal-list)) 0))))))
(defun llm-vertex--get-partial-chat-ui-repsonse (response)
"Return the partial response from as much of RESPONSE as we can parse.
@@ -179,15 +196,15 @@ If the response is not parseable, return nil."
start end-of-valid-chunk)
;; Close off the json
"]")))))
- (llm-vertex--get-chat-response-ui json))
+ (llm-vertex--get-chat-response-streaming json))
(error (message "Unparseable buffer saved to
*llm-vertex-unparseable*")
(with-current-buffer (get-buffer-create
"*llm-vertex-unparseable*")
(erase-buffer)
(insert response))))))))
-(defun llm-vertex--chat-request-ui (prompt)
- "Return an alist with chat input, appropriate for ui API.
-PROMPT contains the input to the call to the chat API."
+(defun llm-vertex--collapsed-system-prompt (prompt)
+ "Return the text of the non-interaction parts of PROMPT.
+If there are no non-interaction parts, return nil."
(let ((system-prompt))
(when (llm-chat-prompt-context prompt)
(push (llm-chat-prompt-context prompt) system-prompt))
@@ -197,82 +214,133 @@ PROMPT contains the input to the call to the chat API."
(concat "User:\n" (car example)
"\nAssistant:\n" (cdr example)))
(llm-chat-prompt-examples prompt) "\n"))
system-prompt))
+ (when system-prompt
+ (mapconcat #'identity (nreverse system-prompt) "\n"))))
+
+(defun llm-vertex--chat-request-streaming (prompt model)
+ "Return an alist with chat input for the streaming API.
+PROMPT contains the input to the call to the chat API. MODEL
+contains the model to use, which can change the request."
+ (let ((system-prompt (llm-vertex--collapsed-system-prompt prompt)))
(append
`(("inputs" . ((("struct_val" .
- (("messages" .
- (("list_val" .
- ,(mapcar (lambda (interaction)
- `(("struct_val" . (("content" .
- (("string_val" .
- (,(format "'\"%s\"'"
-
(llm-chat-prompt-interaction-content
-
interaction))))))
- ("author" .
- (("string_val" .
- ,(format "'\"%s\"'"
- (pcase
(llm-chat-prompt-interaction-role interaction)
- ('user
"user")
- ('system
"system")
-
('assistant "assistant"))))))))))
- (if system-prompt
- (cons (make-llm-chat-prompt-interaction
- :role 'system
- :content (mapconcat #'identity
(nreverse system-prompt) "\n"))
- (llm-chat-prompt-interactions
prompt))
- (llm-chat-prompt-interactions
prompt))))))))))))
+ ,(if (string-match-p "text-bison" model)
+ (progn
+ (unless (= 1 (length (llm-chat-prompt-interactions
prompt)))
+ (error "Vertex model 'text-bison' must contain
only one interaction"))
+ `(("prompt" . (("string_val" .
+ [,(format "'\"%s\"'"
+ (concat system-prompt
(when system-prompt "\n")
+
(llm-chat-prompt-interaction-content
+ (car
(llm-chat-prompt-interactions prompt )))))])))))
+ `(("messages" .
+ (("list_val" .
+ ,(mapcar (lambda (interaction)
+ `(("struct_val" . (("content" .
+ (("string_val" .
+ (,(format
"'\"%s\"'"
+
(llm-chat-prompt-interaction-content
+
interaction))))))
+ ("author" .
+ (("string_val" .
+ ,(format
"'\"%s\"'"
+ (pcase
(llm-chat-prompt-interaction-role interaction)
+
('user "user")
+
('system "system")
+
('assistant "assistant"))))))))))
+ (if system-prompt
+ (cons
(make-llm-chat-prompt-interaction
+ :role 'system
+ :content system-prompt)
+ (llm-chat-prompt-interactions
prompt))
+ (llm-chat-prompt-interactions
prompt)))))))))))))
(llm-vertex--parameters-ui prompt))))
-(defun llm-vertex--chat-request-v1 (prompt)
- "From PROMPT, create the data to in the vertex chat request."
- (let ((prompt-alist)
- (params-alist))
- (when (llm-chat-prompt-context prompt)
- (push `("context" . ,(llm-chat-prompt-context prompt)) prompt-alist))
- (when (llm-chat-prompt-examples prompt)
- (push `("examples" . ,(apply #'vector
- (mapcar (lambda (example)
- `(("input" . (("content" . ,(car
example))))
- ("output" . (("content" . ,(cdr
example))))))
- (llm-chat-prompt-examples prompt))))
- prompt-alist))
- (push `("messages" . ,(apply #'vector
- (mapcar (lambda (interaction)
- `(("author" . (pcase
(llm-chat-prompt-interaction-role interaction)
- ('user "user")
- ('system (error
"System role not supported"))
- ('assistant
"assistant")))
- ("content" .
,(llm-chat-prompt-interaction-content interaction))))
- (llm-chat-prompt-interactions
prompt))))
- prompt-alist)
+(defun llm-vertex--chat-parameters (prompt)
+ "From PROMPT, create the parameters section.
+Return value is a cons for adding to an alist, unless there is
+nothing to add, in which case it is nil."
+ (let ((params-alist))
(when (llm-chat-prompt-temperature prompt)
- (push `("temperature" . ,(llm-chat-prompt-temperature prompt))
- params-alist))
+ (push `(temperature . ,(llm-chat-prompt-temperature prompt))
+ params-alist))
(when (llm-chat-prompt-max-tokens prompt)
- (push `("max_tokens" . ,(llm-chat-prompt-max-tokens prompt))
params-alist))
- `(("instances" . [,prompt-alist])
- ("parameters" . ,params-alist))))
+ (push `(maxOutputTokens . ,(llm-chat-prompt-max-tokens prompt))
params-alist))
+ (when params-alist
+ `(parameters . ,params-alist))))
+
+(defun llm-vertex--text-request (prompt)
+ "From PROMPT, create the data for the vertex text reequest.
+The text request can only have one interaction."
+ (unless (= 1 (length (llm-chat-prompt-interactions prompt)))
+ (error "Model text-bison can only have 1 prompt interaction"))
+ (let ((system-prompt (llm-vertex--collapsed-system-prompt prompt)))
+ (append
+ `((instances . [((prompt . ,(concat system-prompt
+ (when system-prompt "\n")
+ (llm-chat-prompt-interaction-content
+ (car
(llm-chat-prompt-interactions prompt))))))]))
+ (let ((params (llm-vertex--chat-parameters (let ((p (copy-llm-chat-prompt
prompt)))
+ ;; For some reason vertex
requires max-tokens
+ (setf
(llm-chat-prompt-max-tokens p)
+
llm-vertex-default-max-output-tokens)
+ p))))
+ (when params (list params))))))
+
+(defun llm-vertex--chat-request-v1 (prompt model)
+ "From PROMPT, create the data for the vertex chat request."
+ (if (string-match-p "text-bison" model)
+ (llm-vertex--text-request prompt)
+ (let ((prompt-alist))
+ (when (llm-chat-prompt-context prompt)
+ (push `("context" . ,(llm-chat-prompt-context prompt)) prompt-alist))
+ (when (llm-chat-prompt-examples prompt)
+ (push `("examples" . ,(apply #'vector
+ (mapcar (lambda (example)
+ `(("input" . (("content" . ,(car
example))))
+ ("output" . (("content" . ,(cdr
example))))))
+ (llm-chat-prompt-examples
prompt))))
+ prompt-alist))
+ (push `("messages" . ,(apply #'vector
+ (mapcar (lambda (interaction)
+ `(("author" . (pcase
(llm-chat-prompt-interaction-role interaction)
+ ('user "user")
+ ('system (error
"System role not supported"))
+ ('assistant
"assistant")))
+ ("content" .
,(llm-chat-prompt-interaction-content interaction))))
+ (llm-chat-prompt-interactions
prompt))))
+ prompt-alist)
+ (append
+ `(("instances" . [,prompt-alist]))
+ (let ((params (llm-vertex--chat-parameters prompt)))
+ (when params (list params)))))))
(defun llm-vertex--chat-url (provider streaming)
"Return the correct url to use for PROVIDER.
If STREAMING is non-nil, use the URL for the streaming API."
- (format
"https://%s-aiplatform.googleapis.com/%s/projects/%s/locations/%s/publishers/google/models/%s:%s"
+ (format
"https://%s-aiplatform.googleapis.com/v1/projects/%s/locations/%s/publishers/google/models/%s:%s"
llm-vertex-gcloud-region
- (if streaming "ui" "v1")
(llm-vertex-project provider)
llm-vertex-gcloud-region
- (or (llm-vertex-chat-model provider) "chat-bison")
+ (llm-vertex-chat-model provider)
(if streaming "serverStreamingPredict" "predict")))
(defun llm-vertex--chat-extract-response (response)
- "Return the chat response contained in the server RESPONSE."
- (cdr (assoc 'content (aref (cdr (assoc 'candidates (aref (cdr (assoc
'predictions response)) 0))) 0))))
+ "Return the chat response contained in the server RESPONSE.
+This should handle the various kinds of responses that the
+different models can return."
+ (let* ((predictions (aref (assoc-default 'predictions response) 0))
+ (candidates (assoc-default 'candidates predictions)))
+ (if candidates
+ (assoc-default 'content (aref candidates 0))
+ (assoc-default 'content predictions))))
(cl-defmethod llm-chat-async ((provider llm-vertex) prompt response-callback
error-callback)
(llm-vertex-refresh-key provider)
(let ((buf (current-buffer)))
(llm-request-async (llm-vertex--chat-url provider nil)
:headers `(("Authorization" . ,(format "Bearer %s"
(llm-vertex-key provider))))
- :data (llm-vertex--chat-request-v1 prompt)
+ :data (llm-vertex--chat-request-v1 prompt
(llm-vertex-chat-model provider))
:on-success (lambda (data)
(let ((response
(llm-vertex--chat-extract-response data)))
(setf (llm-chat-prompt-interactions
prompt)
@@ -289,24 +357,25 @@ If STREAMING is non-nil, use the URL for the streaming
API."
(llm-request-sync
(llm-vertex--chat-url provider nil)
:headers `(("Authorization" . ,(format "Bearer %s"
(llm-vertex-key provider))))
- :data (llm-vertex--chat-request-v1 prompt))
+ :data (llm-vertex--chat-request-v1 prompt
(llm-vertex-chat-model provider)))
#'llm-vertex--chat-extract-response)))
(setf (llm-chat-prompt-interactions prompt)
(append (llm-chat-prompt-interactions prompt)
(list (make-llm-chat-prompt-interaction :role 'assistant
:content response))))
response))
+;; API reference:
https://cloud.google.com/vertex-ai/docs/generative-ai/learn/streaming
(cl-defmethod llm-chat-streaming ((provider llm-vertex) prompt
partial-callback response-callback error-callback)
(llm-vertex-refresh-key provider)
(let ((buf (current-buffer)))
(llm-request-async (llm-vertex--chat-url provider t)
:headers `(("Authorization" . ,(format "Bearer %s"
(llm-vertex-key provider))))
- :data (llm-vertex--chat-request-ui prompt)
+ :data (llm-vertex--chat-request-streaming prompt
(llm-vertex-chat-model provider))
:on-partial (lambda (partial)
(when-let ((response
(llm-vertex--get-partial-chat-ui-repsonse partial)))
(llm-request-callback-in-buffer buf
partial-callback response)))
:on-success (lambda (data)
- (let ((response
(llm-vertex--get-chat-response-ui data)))
+ (let ((response
(llm-vertex--get-chat-response-streaming data)))
(setf (llm-chat-prompt-interactions
prompt)
(append
(llm-chat-prompt-interactions prompt)
(list
(make-llm-chat-prompt-interaction :role 'assistant :content response))))
@@ -326,11 +395,12 @@ MODEL "
llm-vertex-gcloud-region
(llm-vertex-project provider)
llm-vertex-gcloud-region
- (or (llm-vertex-chat-model provider) "chat-bison")))
+ (llm-vertex-chat-model provider)))
(defun llm-vertex--to-count-token-request (request)
"Return a version of REQUEST that is suitable for counting tokens."
- (seq-filter (lambda (c) (not (equal (car c) "parameters"))) request))
+ (seq-filter (lambda (c) (and (not (equal (car c) "parameters"))
+ (not (eq (car c) 'parameters)))) request))
(defun llm-vertex--count-tokens-extract-response (response)
"Extract the token count from the response."
@@ -342,7 +412,8 @@ MODEL "
(llm-request-sync (llm-vertex--count-token-url provider)
:headers `(("Authorization" . ,(format "Bearer %s"
(llm-vertex-key provider))))
:data (llm-vertex--to-count-token-request
- (llm-vertex--chat-request-v1
(llm-make-simple-chat-prompt string))))
+ (llm-vertex--chat-request-v1
+ (llm-make-simple-chat-prompt string)
(llm-vertex-chat-model provider))))
#'llm-vertex--count-tokens-extract-response))
(provide 'llm-vertex)