emacs-elpa-diffs
[Top][All Lists]
Advanced

[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

[elpa] externals/llm b7787a306d: Add JSON mode for the providers that su


From: ELPA Syncer
Subject: [elpa] externals/llm b7787a306d: Add JSON mode for the providers that support it (#112)
Date: Wed, 27 Nov 2024 00:58:13 -0500 (EST)

branch: externals/llm
commit b7787a306da12492f6189f6318261a63d1507486
Author: Andrew Hyatt <ahyatt@gmail.com>
Commit: GitHub <noreply@github.com>

    Add JSON mode for the providers that support it (#112)
    
    This is completing feature request
    https://github.com/ahyatt/llm/issues/47#issuecomment-2495675276
---
 NEWS.org      |  4 +++-
 README.org    |  2 +-
 llm-ollama.el |  4 +++-
 llm-openai.el |  4 +++-
 llm-vertex.el |  4 +++-
 llm.el        | 15 +++++++++++++--
 6 files changed, 26 insertions(+), 7 deletions(-)

diff --git a/NEWS.org b/NEWS.org
index 97efd34028..c427b4f548 100644
--- a/NEWS.org
+++ b/NEWS.org
@@ -1,4 +1,6 @@
-* Veresion 0.18.1
+* Version 0.19.0
+- Add JSON mode, for most providers with the exception of Claude.
+* Version 0.18.1
 - Fix extra argument in ~llm-batch-embeddings-async~.
 * Version 0.18.0
 - Add media handling, for images, videos, and audio.
diff --git a/README.org b/README.org
index b42088bb79..c0a18b15c8 100644
--- a/README.org
+++ b/README.org
@@ -138,7 +138,7 @@ For all callbacks, the callback will be executed in the 
buffer the function was
 - ~llm-chat-token-limit~.  Gets the token limit for the chat model.  This 
isn't possible for some backends like =llama.cpp=, in which the model isn't 
selected or known by this library.
 
   And the following helper functions:
-  - ~llm-make-chat-prompt text &keys context examples functions temperature 
max-tokens~: This is how you make prompts.  ~text~ can be a string (the user 
input to the llm chatbot), or a list representing a series of back-and-forth 
exchanges, of odd number, with the last element of the list representing the 
user's latest input.  This supports inputting context (also commonly called a 
system prompt, although it isn't guaranteed to replace the actual system 
prompt), examples, and other impor [...]
+  - ~llm-make-chat-prompt text &keys context examples functions temperature 
max-tokens response-format non-standard-params~: This is how you make prompts.  
~text~ can be a string (the user input to the llm chatbot), or a list 
representing a series of back-and-forth exchanges, of odd number, with the last 
element of the list representing the user's latest input.  This supports 
inputting context (also commonly called a system prompt, although it isn't 
guaranteed to replace the actual syste [...]
   - ~llm-chat-prompt-to-text prompt~: From a prompt, return a string 
representation.  This is not usually suitable for passing to LLMs, but for 
debugging purposes.
   - ~llm-chat-streaming-to-point provider prompt buffer point 
finish-callback~: Same basic arguments as ~llm-chat-streaming~, but will stream 
to ~point~ in ~buffer~.
   - ~llm-chat-prompt-append-response prompt response role~: Append a new 
response (from the user, usually) to the prompt.  The ~role~ is optional, and 
defaults to ~'user~.
diff --git a/llm-ollama.el b/llm-ollama.el
index b8bc9d56a5..c428e25879 100644
--- a/llm-ollama.el
+++ b/llm-ollama.el
@@ -144,6 +144,8 @@ PROVIDER is the llm-ollama provider."
     (when (llm-chat-prompt-functions prompt)
       (push `("tools" . ,(mapcar #'llm-provider-utils-openai-function-spec
                                  (llm-chat-prompt-functions prompt))) 
request-alist))
+    (when (eq 'json (llm-chat-prompt-response-format prompt))
+      (push `("format" . ,(llm-chat-prompt-response-format prompt)) 
request-alist))
     (push `("stream" . ,(if streaming t :json-false)) request-alist)
     (when (llm-chat-prompt-temperature prompt)
       (push `("temperature" . ,(llm-chat-prompt-temperature prompt)) options))
@@ -188,7 +190,7 @@ PROVIDER is the llm-ollama provider."
                                         2048))
 
 (cl-defmethod llm-capabilities ((provider llm-ollama))
-  (append '(streaming)
+  (append '(streaming json-response)
           (when (and (llm-ollama-embedding-model provider)
                      (let ((embedding-model (llm-models-match
                                              (llm-ollama-embedding-model 
provider))))
diff --git a/llm-openai.el b/llm-openai.el
index 492f49485f..2ddcaec2d4 100644
--- a/llm-openai.el
+++ b/llm-openai.el
@@ -195,6 +195,8 @@ STREAMING if non-nil, turn on response streaming."
                      (llm-chat-prompt-interactions prompt)))
           request-alist)
     (push `("model" . ,(llm-openai-chat-model provider)) request-alist)
+    (when (eq 'json (llm-chat-prompt-response-format prompt))
+      (push '("response_format" . (("type" . "json_object"))) request-alist))
     (when (llm-chat-prompt-temperature prompt)
       (push `("temperature" . ,(* (llm-chat-prompt-temperature prompt) 2.0)) 
request-alist))
     (when (llm-chat-prompt-max-tokens prompt)
@@ -294,7 +296,7 @@ RESPONSE can be nil if the response is complete."
   (llm-provider-utils-model-token-limit (llm-openai-chat-model provider)))
 
 (cl-defmethod llm-capabilities ((provider llm-openai))
-  (append '(streaming embeddings function-calls)
+  (append '(streaming embeddings function-calls json-response)
           (when-let ((model (llm-models-match (llm-openai-chat-model 
provider))))
             (seq-intersection (llm-model-capabilities model)
                               '(image-input)))))
diff --git a/llm-vertex.el b/llm-vertex.el
index 5ab10d4a87..065558c7a1 100644
--- a/llm-vertex.el
+++ b/llm-vertex.el
@@ -245,6 +245,8 @@ nothing to add, in which case it is nil."
             params-alist))
     (when (llm-chat-prompt-max-tokens prompt)
       (push `(maxOutputTokens . ,(llm-chat-prompt-max-tokens prompt)) 
params-alist))
+    (pcase (llm-chat-prompt-response-format prompt)
+      ('json (push '("response_mime_type" . "application/json") params-alist)))
     (when params-alist
       `((generation_config . ,params-alist)))))
 
@@ -300,7 +302,7 @@ If STREAMING is non-nil, use the URL for the streaming API."
 
 (cl-defmethod llm-capabilities ((provider llm-vertex))
   (append
-   (list 'streaming 'embeddings)
+   (list 'streaming 'embeddings 'json-response)
    (when-let ((model (llm-models-match (llm-vertex-chat-model provider)))
               (capabilities (llm-model-capabilities model)))
      (append
diff --git a/llm.el b/llm.el
index 8484beec43..ba4b840842 100644
--- a/llm.el
+++ b/llm.el
@@ -55,6 +55,7 @@ for debugging, because the log buffer will grow without 
bound."
   :type 'boolean)
 
 (defun llm--warn-on-nonfree (name tos)
+
   "Issue a warning if `llm-warn-on-nonfree' is non-nil.
 NAME is the human readable name of the LLM (e.g \"Open AI\").
 
@@ -70,7 +71,7 @@ See %s for the details on the restrictions on use." name 
tos)))
   "This stores all the information needed for a structured chat prompt.
 
 Use of this directly is deprecated, instead use `llm-make-chat-prompt'."
-  context examples interactions functions temperature max-tokens 
non-standard-params)
+  context examples interactions functions temperature max-tokens 
response-format non-standard-params)
 
 (cl-defstruct llm-chat-prompt-interaction
   "This defines a single interaction given as part of a chat prompt.
@@ -229,7 +230,7 @@ instead."
   (llm-make-chat-prompt text))
 
 (cl-defun llm-make-chat-prompt (content &key context examples functions
-                                        temperature max-tokens
+                                        temperature max-tokens response-format
                                         non-standard-params)
   "Create a `llm-chat-prompt' with CONTENT sent to the LLM provider.
 
@@ -276,6 +277,12 @@ This is not required.
 
 MAX-TOKENS is the maximum number of tokens to generate.  This is optional.
 
+If RESPONSE-FORMAT is `json' (the currently only accepted value), we
+will attempt to force ouput to fit the format.  This should not be used
+with function calling.  If this is set the instructions to the LLM
+should tell the model about the format, for example with JSON format by
+including examples or describing the schema.
+
 CONTEXT, EXAMPLES, FUNCTIONS, TEMPERATURE, and MAX-TOKENS are
 usually turned into part of the interaction, and if so, they will
 be put in the first interaction of the prompt (before anything in
@@ -302,6 +309,7 @@ cdrs can be strings or numbers.  This is optional."
    :functions functions
    :temperature temperature
    :max-tokens max-tokens
+   :response-format response-format
    :non-standard-params non-standard-params))
 
 (defun llm-chat-prompt-append-response (prompt response &optional role)
@@ -536,6 +544,9 @@ won't have any partial responses, so basically just 
operates like
 
 `image-input': the LLM can accept images as input.
 
+`json-response': the LLM can be requested to return responses only in
+JSON format.
+
 `video-input': the LLM can accept video as input.
 
 `audio-input': the LLM can accept audio as input."



reply via email to

[Prev in Thread] Current Thread [Next in Thread]