[elpa] externals/llm 97933359cb: Add llm-chat-token-limit

emacs-elpa-diffs
[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]
[elpa] externals/llm 97933359cb: Add llm-chat-token-limit

From:	ELPA Syncer
Subject:	[elpa] externals/llm 97933359cb: Add llm-chat-token-limit
Date:	Fri, 12 Jan 2024 19:02:34 -0500 (EST)
branch: externals/llm
commit 97933359cb4f1bf4b03ded5ae43ea3360b818e77
Author: Andrew Hyatt <ahyatt@gmail.com>
Commit: Andrew Hyatt <ahyatt@gmail.com>

    Add llm-chat-token-limit
    
    We return different token limits based on the different models.
---
 NEWS.org              |  2 ++
 llm-gemini.el         |  4 ++++
 llm-gpt4all.el        |  4 ++++
 llm-ollama.el         |  3 +++
 llm-openai.el         | 21 +++++++++++++++++++++
 llm-provider-utils.el | 14 ++++++++++++++
 llm-test.el           | 50 +++++++++++++++++++++++++++++++++++++++++++++++++-
 llm-vertex.el         | 12 ++++++++++++
 llm.el                |  8 ++++++++
 9 files changed, 117 insertions(+), 1 deletion(-)

diff --git a/NEWS.org b/NEWS.org
index a66d21bbfd..12d46bea89 100644
--- a/NEWS.org
+++ b/NEWS.org
@@ -1,3 +1,5 @@
+* Version 0.9
+- Add =llm-chat-token-limit= to find the token limit based on the model.
 * Version 0.8
 - Allow users to change the Open AI URL, to allow for proxies and other 
services that re-use the API.
 - Add =llm-name= and =llm-cancel-request= to the API.
diff --git a/llm-gemini.el b/llm-gemini.el
index f228ece065..07b7aaa093 100644
--- a/llm-gemini.el
+++ b/llm-gemini.el
@@ -125,6 +125,10 @@ You can get this at 
https://makersuite.google.com/app/apikey.";
   "Return the name of PROVIDER."
   "Gemini")
 
+;; From https://ai.google.dev/models/gemini.
+(cl-defmethod llm-chat-token-limit ((provider llm-gemini))
+  (llm-vertex--chat-token-limit (llm-gemini-chat-model provider)))
+
 (provide 'llm-gemini)
 
 ;;; llm-gemini.el ends here
diff --git a/llm-gpt4all.el b/llm-gpt4all.el
index 6019f08f0a..b4f97b3ab3 100644
--- a/llm-gpt4all.el
+++ b/llm-gpt4all.el
@@ -33,6 +33,7 @@
 (require 'llm)
 (require 'llm-request)
 (require 'llm-openai)
+(require 'llm-provider-utils)
 
 (cl-defstruct llm-gpt4all
   "A structure for holding information needed by GPT4All.
@@ -85,6 +86,9 @@ default the default GPT4all port."
   "Return the name of the provider."
   (llm-gpt4all-chat-model provider))
 
+(cl-defmethod llm-chat-token-limit ((provider llm-gpt4all))
+  (llm-provider-utils-model-token-limit (llm-gpt4all-chat-model provider)))
+
 (provide 'llm-gpt4all)
 
 ;;; llm-gpt4all.el ends here
diff --git a/llm-ollama.el b/llm-ollama.el
index ab1b73c09a..101d963d6d 100644
--- a/llm-ollama.el
+++ b/llm-ollama.el
@@ -210,6 +210,9 @@ STREAMING if non-nil, turn on response streaming."
 (cl-defmethod llm-name ((provider llm-ollama))
   (llm-ollama-chat-model provider))
 
+(cl-defmethod llm-chat-token-limit ((provider llm-ollama))
+  (llm-provider-utils-model-token-limit (llm-ollama-chat-model provider)))
+
 (provide 'llm-ollama)
 
 ;;; llm-ollama.el ends here
diff --git a/llm-openai.el b/llm-openai.el
index 00a4b2c0e9..341275c9c8 100644
--- a/llm-openai.el
+++ b/llm-openai.el
@@ -268,6 +268,27 @@ them from 1 to however many are sent.")
 (cl-defmethod llm-name ((_ llm-openai))
   "Open AI")
 
+;; See https://platform.openai.com/docs/models/gpt-4-and-gpt-4-turbo
+;; and https://platform.openai.com/docs/models/gpt-3-5.
+(cl-defmethod llm-chat-token-limit ((provider llm-openai))
+  (let ((model (llm-openai-chat-model provider)))
+    (cond
+     ((string-match (rx (seq (or ?- ?_) (group-n 1 (+ digit)) ?k)) model)
+      (let ((n (string-to-number (match-string 1 model))))
+        ;; This looks weird but Open AI really has an extra token for 16k
+        ;; models, but not for 32k models.
+        (+ (* n 1024) (if (= n 16) 1 0))))
+     ((equal model "gpt-4") 8192)
+     ((string-match-p (rx (seq "gpt-4-" (+ ascii) "-preview")) model)
+       128000)
+     ((string-match-p (rx (seq "gpt-4-" (+ digit))) model)
+      8192)
+     ((string-match-p (rx (seq "gpt-3.5-turbo-1" (+ digit))) model)
+      16385)
+     ((string-match-p (rx (seq "gpt-3.5-turbo" (opt "-instruct"))) model)
+      4096)
+     (t 4096))))
+
 (provide 'llm-openai)
 
 ;;; llm-openai.el ends here
diff --git a/llm-provider-utils.el b/llm-provider-utils.el
index 9059ad5291..7365eebcff 100644
--- a/llm-provider-utils.el
+++ b/llm-provider-utils.el
@@ -100,6 +100,20 @@ things.  Providers should probably issue a warning when 
using this."
                          "\n\nThe current conversation follows:\n\n"
                          (llm-chat-prompt-interaction-content (car (last 
(llm-chat-prompt-interactions prompt))))))))))
 
+(defun llm-provider-utils-model-token-limit (model)
+  "Return the token limit for MODEL."
+  (let ((model (downcase model)))
+   (cond
+    ((string-match-p "mistral-7b" model) 8192)
+    ((string-match-p "mistral" model) 8192)
+    ((string-match-p "mixtral-45b" model) 131072)
+    ((string-match-p "mixtral" model) 131072)
+    ((string-match-p "falcon" model) 2048)
+    ((string-match-p "orca 2" model) 4096)
+    ((string-match-p "orca" model) 2048)
+    ((string-match-p "llama\s*2" model) 4096)
+    ((string-match-p "llama" model) 2048)
+    ((string-match-p "starcoder" model) 8192))))
 
 (provide 'llm-provider-utils)
 ;;; llm-provider-utils.el ends here
diff --git a/llm-test.el b/llm-test.el
index 4179439fdf..b06f410221 100644
--- a/llm-test.el
+++ b/llm-test.el
@@ -28,6 +28,11 @@
 (require 'llm)
 (require 'llm-fake)
 (require 'ert)
+(require 'llm-openai)
+(require 'llm-gemini)
+(require 'llm-vertex)
+(require 'llm-ollama)
+(require 'llm-gpt4all)
 
 (ert-deftest llm-test-embedding ()
   (should-error (llm-embedding nil "Test"))
@@ -51,6 +56,49 @@
   (should (equal
            "Response"
            (llm-chat (make-llm-fake :chat-action-func (lambda () "Response"))
-                              (make-llm-chat-prompt)))))
+                     (make-llm-chat-prompt)))))
+
+(ert-deftest llm-test-chat-token-limit-openai ()
+  (cl-flet* ((token-limit-for (model)
+               (llm-chat-token-limit (make-llm-openai :chat-model model)))
+             (should-have-token-limit (model limit)
+               (should (equal limit (token-limit-for model)))))
+    ;; From https://platform.openai.com/docs/models/gpt-3-5
+    (should-have-token-limit "gpt-3.5-turbo-1106" 16385)
+    (should-have-token-limit "gpt-3.5-turbo" 4096)
+    (should-have-token-limit "gpt-3.5-turbo-16k" 16385)
+    (should-have-token-limit "gpt-3.5-turbo-instruct" 4096)
+    (should-have-token-limit "gpt-3.5-turbo-0613" 4096)
+    (should-have-token-limit "gpt-3.5-turbo-16k-0613" 16385)
+    (should-have-token-limit "gpt-3.5-turbo-0301" 4096)
+    (should-have-token-limit "unknown" 4096)
+    ;; From https://platform.openai.com/docs/models/gpt-4-and-gpt-4-turbo
+    (should-have-token-limit "gpt-4-1106-preview" 128000)
+    (should-have-token-limit "gpt-4-vision-preview" 128000)
+    (should-have-token-limit "gpt-4" 8192)
+    (should-have-token-limit "gpt-4-32k" 32768)
+    (should-have-token-limit "gpt-4-0613" 8192)
+    (should-have-token-limit "gpt-4-32k-0613" 32768)))
+
+(ert-deftest llm-test-chat-token-limit-gemini ()
+  (should (= 30720 (llm-chat-token-limit (make-llm-gemini))))
+  (should (= 12288 (llm-chat-token-limit
+                    (make-llm-gemini :chat-model "gemini-pro-vision")))))
+
+(ert-deftest llm-test-chat-token-limit-vertex ()
+  (should (= 30720 (llm-chat-token-limit (make-llm-vertex))))
+  (should (= 12288 (llm-chat-token-limit
+                    (make-llm-vertex :chat-model "gemini-pro-vision")))))
+
+(ert-deftest llm-test-chat-token-limit-ollama ()
+  ;; The code is straightforward, so no need to test all the models.
+  (should (= 8192 (llm-chat-token-limit
+                   (make-llm-ollama :chat-model "mistral:latest")))))
+
+(ert-deftest llm-test-chat-token-limit-gpt4all ()
+  ;; The code is straightforward, so no need to test all the models.
+  (should (= 8192 (llm-chat-token-limit
+                   (make-llm-ollama :chat-model "Mistral")))))
+
 
 ;;; llm-test.el ends here
diff --git a/llm-vertex.el b/llm-vertex.el
index 42c499dfe7..87e4465cab 100644
--- a/llm-vertex.el
+++ b/llm-vertex.el
@@ -294,6 +294,18 @@ MODEL "
 (cl-defmethod llm-name ((_ llm-vertex))
   "Gemini")
 
+(defun llm-vertex--chat-token-limit (model)
+  "Get token limit for MODEL."
+  (cond ((equal "gemini-pro" model) 30720)
+        ((equal "gemini-pro-vision" model) 12288)
+        ;; This shouldn't happen unless there's a new model, which could be a
+        ;; smaller or larger model. We'll play it safe and choose a reasonable
+        ;; number.
+        (t 4096)))
+
+(cl-defmethod llm-chat-token-limit ((provider llm-vertex))
+  (llm-vertex--chat-token-limit (llm-vertex-chat-model provider)))
+
 (provide 'llm-vertex)
 
 ;;; llm-vertex.el ends here
diff --git a/llm.el b/llm.el
index b0a5998e4a..ae59017bcd 100644
--- a/llm.el
+++ b/llm.el
@@ -233,6 +233,14 @@ be passed to `llm-cancel-request'."
   (when-let (info (llm-nonfree-message-info provider))
     (llm--warn-on-nonfree (car info) (cdr info))))
 
+(cl-defgeneric llm-chat-token-limit (provider)
+  "Return max number of tokens that can be sent to the LLM.
+For many models we know this number, but for some we don't have
+enough information to know. In those cases we return a default
+value that should be a reasonable lower bound."
+  (ignore provider)
+  2048)
+
 (cl-defgeneric llm-embedding (provider string)
   "Return a vector embedding of STRING from PROVIDER."
   (ignore provider string)
[Prev in Thread]
Current Thread
[Next in Thread]
[elpa] externals/llm 97933359cb: Add llm-chat-token-limit, ELPA Syncer <=
Prev by Date: [elpa] externals/emms de8d0a3aa8: * emms-player-mpv.el: remove leftover --input-file mentions in docstrings
Next by Date: [nongnu] elpa/git-commit ddeaa2d69a: magit-repository-local-delete: Support acting on all repositories
Previous by thread: [elpa] externals/emms de8d0a3aa8: * emms-player-mpv.el: remove leftover --input-file mentions in docstrings
Next by thread: [nongnu] elpa/git-commit ddeaa2d69a: magit-repository-local-delete: Support acting on all repositories
Index(es):
- Date
- Thread