emacs-elpa-diffs
[Top][All Lists]
Advanced

[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

[elpa] externals/llm a5424e6637 2/2: Add support for Vertex model text-b


From: ELPA Syncer
Subject: [elpa] externals/llm a5424e6637 2/2: Add support for Vertex model text-bison
Date: Sat, 4 Nov 2023 21:58:23 -0400 (EDT)

branch: externals/llm
commit a5424e66373f5ac232a88d64d3b3fecf3760c697
Author: Andrew Hyatt <ahyatt@gmail.com>
Commit: Andrew Hyatt <ahyatt@gmail.com>

    Add support for Vertex model text-bison
---
 NEWS.org      |   1 +
 llm-vertex.el | 223 ++++++++++++++++++++++++++++++++++++++--------------------
 2 files changed, 148 insertions(+), 76 deletions(-)

diff --git a/NEWS.org b/NEWS.org
index 5b30f7beec..05e47c8300 100644
--- a/NEWS.org
+++ b/NEWS.org
@@ -1,5 +1,6 @@
 * Version 0.5.2
 - Fix incompatibility with older Emacs introduced in Version 0.5.1.
+- Add support for Google Cloud Vertex model =text-bison= and variants.
 * Version 0.5.1
 - Implement token counting for Google Cloud Vertex via their API.
 - Fix issue with Google Cloud Vertex erroring on multibyte strings.
diff --git a/llm-vertex.el b/llm-vertex.el
index 516d1905d1..9340ab5450 100644
--- a/llm-vertex.el
+++ b/llm-vertex.el
@@ -49,6 +49,20 @@ This is only used for streaming calls."
   :type 'string
   :group 'llm-vertex)
 
+(defcustom llm-vertex-default-max-output-tokens 500
+  "The default maximum number of tokens to ask for.
+This is only used when setting the maximum tokens is required,
+and there is no default. The maximum value possible here is 2049."
+  :type 'integer
+  :group 'llm-vertex)
+
+(defcustom llm-vertex-default-chat-model "chat-bison"
+  "The default model to ask for.
+This should almost certainly be a chat model, other models are
+for more specialized uses."
+  :type 'string
+  :group 'llm-vertex)
+
 (cl-defstruct llm-vertex
   "A struct representing a Vertex AI client.
 
@@ -63,7 +77,7 @@ KEY-GENTIME keeps track of when the key was generated, 
because the key must be r
   key
   project
   embedding-model
-  chat-model
+  (chat-model llm-vertex-default-chat-model)
   key-gentime)
 
 (defun llm-vertex-refresh-key (provider)
@@ -142,19 +156,22 @@ If nothing needs to be set, return nil."
     (if param-struct-alist
         `(("parameters" . (("struct_val" . ,param-struct-alist)))))))
 
-(defun llm-vertex--get-chat-response-ui (response)
-  "Return the actual response from the RESPONSE struct returned."
+(defun llm-vertex--get-chat-response-streaming (response)
+  "Return the actual response from the RESPONSE struct returned.
+This handles different kinds of models."
   (pcase (type-of response)
-    ('vector (mapconcat #'llm-vertex--get-chat-response-ui
+    ('vector (mapconcat #'llm-vertex--get-chat-response-streaming
                         response ""))
-    ('cons (let* ((outputs (cdr (assoc 'outputs response)))
-                  (structVal-list (cdr (assoc 'structVal (aref outputs 0))))
-                  (candidates (cdr (assoc 'candidates structVal-list)))
-                  (listVal (cdr (assoc 'listVal candidates)))
-                  (structVal (cdr (assoc 'structVal (aref listVal 0))))
-                  (content (cdr (assoc 'content structVal)))
-                  (stringVal (aref (cdr (assoc 'stringVal content)) 0)))
-             stringVal))))
+    ('cons (let* ((outputs (assoc-default 'outputs response))
+                  (structVal-list (assoc-default 'structVal (aref outputs 0)))
+                  (candidates (assoc-default 'candidates structVal-list)))
+             (if candidates
+                 (let* ((listVal (assoc-default 'listVal candidates))
+                        (structVal (assoc-default 'structVal (aref listVal 0)))
+                        (content (assoc-default 'content structVal))
+                        (stringVal (aref (assoc-default 'stringVal content) 
0)))
+                   stringVal)
+               (aref (assoc-default 'stringVal (assoc-default 'content 
structVal-list)) 0))))))
 
 (defun llm-vertex--get-partial-chat-ui-repsonse (response)
   "Return the partial response from as much of RESPONSE as we can parse.
@@ -179,15 +196,15 @@ If the response is not parseable, return nil."
                            start end-of-valid-chunk)
                           ;; Close off the json
                           "]")))))
-              (llm-vertex--get-chat-response-ui json))
+              (llm-vertex--get-chat-response-streaming json))
           (error (message "Unparseable buffer saved to 
*llm-vertex-unparseable*")
                  (with-current-buffer (get-buffer-create 
"*llm-vertex-unparseable*")
                      (erase-buffer)
                      (insert response))))))))
 
-(defun llm-vertex--chat-request-ui (prompt)
-  "Return an alist with chat input, appropriate for ui API.
-PROMPT contains the input to the call to the chat API."
+(defun llm-vertex--collapsed-system-prompt (prompt)
+  "Return the text of the non-interaction parts of PROMPT.
+If there are no non-interaction parts, return nil."
   (let ((system-prompt))
     (when (llm-chat-prompt-context prompt)
       (push (llm-chat-prompt-context prompt) system-prompt))
@@ -197,82 +214,133 @@ PROMPT contains the input to the call to the chat API."
                                  (concat "User:\n" (car example) 
"\nAssistant:\n" (cdr example)))
                                (llm-chat-prompt-examples prompt) "\n"))
             system-prompt))
+    (when system-prompt
+      (mapconcat #'identity (nreverse system-prompt) "\n"))))
+
+(defun llm-vertex--chat-request-streaming (prompt model)
+  "Return an alist with chat input for the streaming API.
+PROMPT contains the input to the call to the chat API. MODEL
+contains the model to use, which can change the request."
+  (let ((system-prompt (llm-vertex--collapsed-system-prompt prompt)))    
     (append
      `(("inputs" . ((("struct_val" .
-                     (("messages" .
-                       (("list_val" .
-                         ,(mapcar (lambda (interaction)
-                                    `(("struct_val" . (("content" .
-                                                        (("string_val" .
-                                                          (,(format "'\"%s\"'"
-                                                                    
(llm-chat-prompt-interaction-content
-                                                                     
interaction))))))
-                                                       ("author" .
-                                                        (("string_val" .
-                                                          ,(format "'\"%s\"'"
-                                                                   (pcase 
(llm-chat-prompt-interaction-role interaction)
-                                                                     ('user 
"user")
-                                                                     ('system 
"system")
-                                                                     
('assistant "assistant"))))))))))
-                                  (if system-prompt
-                                      (cons (make-llm-chat-prompt-interaction
-                                             :role 'system
-                                             :content (mapconcat #'identity 
(nreverse system-prompt) "\n"))
-                                            (llm-chat-prompt-interactions 
prompt))
-                                    (llm-chat-prompt-interactions 
prompt))))))))))))
+                      ,(if (string-match-p "text-bison" model)
+                          (progn
+                            (unless (= 1 (length (llm-chat-prompt-interactions 
prompt)))
+                              (error "Vertex model 'text-bison' must contain 
only one interaction"))
+                            `(("prompt" . (("string_val" .
+                                           [,(format "'\"%s\"'"
+                                                     (concat system-prompt 
(when system-prompt "\n")
+                                                             
(llm-chat-prompt-interaction-content
+                                                              (car 
(llm-chat-prompt-interactions prompt )))))])))))
+                         `(("messages" .
+                            (("list_val" .
+                              ,(mapcar (lambda (interaction)
+                                         `(("struct_val" . (("content" .
+                                                             (("string_val" .
+                                                               (,(format 
"'\"%s\"'"
+                                                                         
(llm-chat-prompt-interaction-content
+                                                                          
interaction))))))
+                                                            ("author" .
+                                                             (("string_val" .
+                                                               ,(format 
"'\"%s\"'"
+                                                                        (pcase 
(llm-chat-prompt-interaction-role interaction)
+                                                                          
('user "user")
+                                                                          
('system "system")
+                                                                          
('assistant "assistant"))))))))))
+                                       (if system-prompt
+                                           (cons 
(make-llm-chat-prompt-interaction
+                                                  :role 'system
+                                                  :content system-prompt)
+                                                 (llm-chat-prompt-interactions 
prompt))
+                                         (llm-chat-prompt-interactions 
prompt)))))))))))))
      (llm-vertex--parameters-ui prompt))))
 
-(defun llm-vertex--chat-request-v1 (prompt)
-  "From PROMPT, create the data to in the vertex chat request."
-  (let ((prompt-alist)
-        (params-alist))
-    (when (llm-chat-prompt-context prompt)
-      (push `("context" . ,(llm-chat-prompt-context prompt)) prompt-alist))
-    (when (llm-chat-prompt-examples prompt)
-      (push `("examples" . ,(apply #'vector
-                                   (mapcar (lambda (example)
-                                      `(("input" . (("content" . ,(car 
example))))
-                                        ("output" . (("content" . ,(cdr 
example))))))
-                                           (llm-chat-prompt-examples prompt))))
-            prompt-alist))
-    (push `("messages" . ,(apply #'vector
-                                 (mapcar (lambda (interaction)
-                                           `(("author" . (pcase 
(llm-chat-prompt-interaction-role interaction)
-                                                           ('user "user")
-                                                           ('system (error 
"System role not supported"))
-                                                           ('assistant 
"assistant")))
-                                             ("content" . 
,(llm-chat-prompt-interaction-content interaction))))
-                                         (llm-chat-prompt-interactions 
prompt))))
-          prompt-alist)
+(defun llm-vertex--chat-parameters (prompt)
+  "From PROMPT, create the parameters section.
+Return value is a cons for adding to an alist, unless there is
+nothing to add, in which case it is nil."
+  (let ((params-alist))
     (when (llm-chat-prompt-temperature prompt)
-      (push `("temperature" . ,(llm-chat-prompt-temperature prompt))
-            params-alist))
+            (push `(temperature . ,(llm-chat-prompt-temperature prompt))
+                  params-alist))
     (when (llm-chat-prompt-max-tokens prompt)
-      (push `("max_tokens" . ,(llm-chat-prompt-max-tokens prompt)) 
params-alist))
-    `(("instances" . [,prompt-alist])
-      ("parameters" . ,params-alist))))
+      (push `(maxOutputTokens . ,(llm-chat-prompt-max-tokens prompt)) 
params-alist))
+    (when params-alist
+      `(parameters . ,params-alist))))
+
+(defun llm-vertex--text-request (prompt)
+  "From PROMPT, create the data for the vertex text reequest.
+The text request can only have one interaction."
+  (unless (= 1 (length (llm-chat-prompt-interactions prompt)))
+    (error "Model text-bison can only have 1 prompt interaction"))
+  (let ((system-prompt (llm-vertex--collapsed-system-prompt prompt)))
+    (append
+     `((instances . [((prompt . ,(concat system-prompt
+                                        (when system-prompt "\n")
+                                        (llm-chat-prompt-interaction-content
+                                              (car 
(llm-chat-prompt-interactions prompt))))))]))
+     (let ((params (llm-vertex--chat-parameters (let ((p (copy-llm-chat-prompt 
prompt)))
+                                                  ;; For some reason vertex 
requires max-tokens
+                                                  (setf 
(llm-chat-prompt-max-tokens p)
+                                                        
llm-vertex-default-max-output-tokens)
+                                                  p))))
+       (when params (list params))))))
+
+(defun llm-vertex--chat-request-v1 (prompt model)
+  "From PROMPT, create the data for the vertex chat request."
+  (if (string-match-p "text-bison" model)
+      (llm-vertex--text-request prompt)
+    (let ((prompt-alist))
+      (when (llm-chat-prompt-context prompt)
+        (push `("context" . ,(llm-chat-prompt-context prompt)) prompt-alist))
+      (when (llm-chat-prompt-examples prompt)
+        (push `("examples" . ,(apply #'vector
+                                     (mapcar (lambda (example)
+                                        `(("input" . (("content" . ,(car 
example))))
+                                          ("output" . (("content" . ,(cdr 
example))))))
+                                             (llm-chat-prompt-examples 
prompt))))
+              prompt-alist))
+      (push `("messages" . ,(apply #'vector
+                                   (mapcar (lambda (interaction)
+                                             `(("author" . (pcase 
(llm-chat-prompt-interaction-role interaction)
+                                                             ('user "user")
+                                                             ('system (error 
"System role not supported"))
+                                                             ('assistant 
"assistant")))
+                                               ("content" . 
,(llm-chat-prompt-interaction-content interaction))))
+                                           (llm-chat-prompt-interactions 
prompt))))
+            prompt-alist)      
+      (append
+        `(("instances" . [,prompt-alist]))
+        (let ((params (llm-vertex--chat-parameters prompt)))
+          (when params (list params)))))))
 
 (defun llm-vertex--chat-url (provider streaming)
 "Return the correct url to use for PROVIDER.
 If STREAMING is non-nil, use the URL for the streaming API."
-  (format 
"https://%s-aiplatform.googleapis.com/%s/projects/%s/locations/%s/publishers/google/models/%s:%s";
+  (format 
"https://%s-aiplatform.googleapis.com/v1/projects/%s/locations/%s/publishers/google/models/%s:%s";
           llm-vertex-gcloud-region
-          (if streaming "ui" "v1")
           (llm-vertex-project provider)
           llm-vertex-gcloud-region
-          (or (llm-vertex-chat-model provider) "chat-bison")
+          (llm-vertex-chat-model provider)
           (if streaming "serverStreamingPredict" "predict")))
 
 (defun llm-vertex--chat-extract-response (response)
-  "Return the chat response contained in the server RESPONSE."
-  (cdr (assoc 'content (aref (cdr (assoc 'candidates (aref (cdr (assoc 
'predictions response)) 0))) 0))))
+  "Return the chat response contained in the server RESPONSE.
+This should handle the various kinds of responses that the
+different models can return."
+  (let* ((predictions (aref (assoc-default 'predictions response) 0))
+         (candidates (assoc-default 'candidates predictions)))
+    (if candidates
+        (assoc-default 'content (aref candidates 0))
+      (assoc-default 'content predictions))))
 
 (cl-defmethod llm-chat-async ((provider llm-vertex) prompt response-callback 
error-callback)
   (llm-vertex-refresh-key provider)
   (let ((buf (current-buffer)))
     (llm-request-async (llm-vertex--chat-url provider nil)
                      :headers `(("Authorization" . ,(format "Bearer %s" 
(llm-vertex-key provider))))
-                     :data (llm-vertex--chat-request-v1 prompt)
+                     :data (llm-vertex--chat-request-v1 prompt 
(llm-vertex-chat-model provider))
                      :on-success (lambda (data)
                                    (let ((response 
(llm-vertex--chat-extract-response data)))
                                      (setf (llm-chat-prompt-interactions 
prompt)
@@ -289,24 +357,25 @@ If STREAMING is non-nil, use the URL for the streaming 
API."
                  (llm-request-sync
                   (llm-vertex--chat-url provider nil)
                   :headers `(("Authorization" . ,(format "Bearer %s" 
(llm-vertex-key provider))))
-                  :data (llm-vertex--chat-request-v1 prompt))
+                  :data (llm-vertex--chat-request-v1 prompt 
(llm-vertex-chat-model provider)))
                  #'llm-vertex--chat-extract-response)))
     (setf (llm-chat-prompt-interactions prompt)
           (append (llm-chat-prompt-interactions prompt)
                   (list (make-llm-chat-prompt-interaction :role 'assistant 
:content response))))
     response))
 
+;; API reference: 
https://cloud.google.com/vertex-ai/docs/generative-ai/learn/streaming
 (cl-defmethod llm-chat-streaming ((provider llm-vertex) prompt 
partial-callback response-callback error-callback)
   (llm-vertex-refresh-key provider)
   (let ((buf (current-buffer)))
     (llm-request-async (llm-vertex--chat-url provider t)
                      :headers `(("Authorization" . ,(format "Bearer %s" 
(llm-vertex-key provider))))
-                     :data (llm-vertex--chat-request-ui prompt)
+                     :data (llm-vertex--chat-request-streaming prompt 
(llm-vertex-chat-model provider))
                      :on-partial (lambda (partial)
                                    (when-let ((response 
(llm-vertex--get-partial-chat-ui-repsonse partial)))
                                      (llm-request-callback-in-buffer buf 
partial-callback response)))
                      :on-success (lambda (data)
-                                   (let ((response 
(llm-vertex--get-chat-response-ui data)))
+                                   (let ((response 
(llm-vertex--get-chat-response-streaming data)))
                                      (setf (llm-chat-prompt-interactions 
prompt)
                                            (append 
(llm-chat-prompt-interactions prompt)
                                                    (list 
(make-llm-chat-prompt-interaction :role 'assistant :content response))))
@@ -326,11 +395,12 @@ MODEL "
           llm-vertex-gcloud-region
           (llm-vertex-project provider)
           llm-vertex-gcloud-region
-          (or (llm-vertex-chat-model provider) "chat-bison")))
+          (llm-vertex-chat-model provider)))
 
 (defun llm-vertex--to-count-token-request (request)
   "Return a version of REQUEST that is suitable for counting tokens."
-  (seq-filter (lambda (c) (not (equal (car c) "parameters"))) request))
+  (seq-filter (lambda (c) (and (not (equal (car c) "parameters"))
+                               (not (eq (car c) 'parameters)))) request))
 
 (defun llm-vertex--count-tokens-extract-response (response)
   "Extract the token count from the response."
@@ -342,7 +412,8 @@ MODEL "
    (llm-request-sync (llm-vertex--count-token-url provider)
                      :headers `(("Authorization" . ,(format "Bearer %s" 
(llm-vertex-key provider))))
                      :data (llm-vertex--to-count-token-request
-                            (llm-vertex--chat-request-v1 
(llm-make-simple-chat-prompt string))))
+                            (llm-vertex--chat-request-v1
+                             (llm-make-simple-chat-prompt string) 
(llm-vertex-chat-model provider))))
    #'llm-vertex--count-tokens-extract-response))
 
 (provide 'llm-vertex)



reply via email to

[Prev in Thread] Current Thread [Next in Thread]