[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]
[elpa] externals/llm 97933359cb: Add llm-chat-token-limit
From: |
ELPA Syncer |
Subject: |
[elpa] externals/llm 97933359cb: Add llm-chat-token-limit |
Date: |
Fri, 12 Jan 2024 19:02:34 -0500 (EST) |
branch: externals/llm
commit 97933359cb4f1bf4b03ded5ae43ea3360b818e77
Author: Andrew Hyatt <ahyatt@gmail.com>
Commit: Andrew Hyatt <ahyatt@gmail.com>
Add llm-chat-token-limit
We return different token limits based on the different models.
---
NEWS.org | 2 ++
llm-gemini.el | 4 ++++
llm-gpt4all.el | 4 ++++
llm-ollama.el | 3 +++
llm-openai.el | 21 +++++++++++++++++++++
llm-provider-utils.el | 14 ++++++++++++++
llm-test.el | 50 +++++++++++++++++++++++++++++++++++++++++++++++++-
llm-vertex.el | 12 ++++++++++++
llm.el | 8 ++++++++
9 files changed, 117 insertions(+), 1 deletion(-)
diff --git a/NEWS.org b/NEWS.org
index a66d21bbfd..12d46bea89 100644
--- a/NEWS.org
+++ b/NEWS.org
@@ -1,3 +1,5 @@
+* Version 0.9
+- Add =llm-chat-token-limit= to find the token limit based on the model.
* Version 0.8
- Allow users to change the Open AI URL, to allow for proxies and other
services that re-use the API.
- Add =llm-name= and =llm-cancel-request= to the API.
diff --git a/llm-gemini.el b/llm-gemini.el
index f228ece065..07b7aaa093 100644
--- a/llm-gemini.el
+++ b/llm-gemini.el
@@ -125,6 +125,10 @@ You can get this at
https://makersuite.google.com/app/apikey."
"Return the name of PROVIDER."
"Gemini")
+;; From https://ai.google.dev/models/gemini.
+(cl-defmethod llm-chat-token-limit ((provider llm-gemini))
+ (llm-vertex--chat-token-limit (llm-gemini-chat-model provider)))
+
(provide 'llm-gemini)
;;; llm-gemini.el ends here
diff --git a/llm-gpt4all.el b/llm-gpt4all.el
index 6019f08f0a..b4f97b3ab3 100644
--- a/llm-gpt4all.el
+++ b/llm-gpt4all.el
@@ -33,6 +33,7 @@
(require 'llm)
(require 'llm-request)
(require 'llm-openai)
+(require 'llm-provider-utils)
(cl-defstruct llm-gpt4all
"A structure for holding information needed by GPT4All.
@@ -85,6 +86,9 @@ default the default GPT4all port."
"Return the name of the provider."
(llm-gpt4all-chat-model provider))
+(cl-defmethod llm-chat-token-limit ((provider llm-gpt4all))
+ (llm-provider-utils-model-token-limit (llm-gpt4all-chat-model provider)))
+
(provide 'llm-gpt4all)
;;; llm-gpt4all.el ends here
diff --git a/llm-ollama.el b/llm-ollama.el
index ab1b73c09a..101d963d6d 100644
--- a/llm-ollama.el
+++ b/llm-ollama.el
@@ -210,6 +210,9 @@ STREAMING if non-nil, turn on response streaming."
(cl-defmethod llm-name ((provider llm-ollama))
(llm-ollama-chat-model provider))
+(cl-defmethod llm-chat-token-limit ((provider llm-ollama))
+ (llm-provider-utils-model-token-limit (llm-ollama-chat-model provider)))
+
(provide 'llm-ollama)
;;; llm-ollama.el ends here
diff --git a/llm-openai.el b/llm-openai.el
index 00a4b2c0e9..341275c9c8 100644
--- a/llm-openai.el
+++ b/llm-openai.el
@@ -268,6 +268,27 @@ them from 1 to however many are sent.")
(cl-defmethod llm-name ((_ llm-openai))
"Open AI")
+;; See https://platform.openai.com/docs/models/gpt-4-and-gpt-4-turbo
+;; and https://platform.openai.com/docs/models/gpt-3-5.
+(cl-defmethod llm-chat-token-limit ((provider llm-openai))
+ (let ((model (llm-openai-chat-model provider)))
+ (cond
+ ((string-match (rx (seq (or ?- ?_) (group-n 1 (+ digit)) ?k)) model)
+ (let ((n (string-to-number (match-string 1 model))))
+ ;; This looks weird but Open AI really has an extra token for 16k
+ ;; models, but not for 32k models.
+ (+ (* n 1024) (if (= n 16) 1 0))))
+ ((equal model "gpt-4") 8192)
+ ((string-match-p (rx (seq "gpt-4-" (+ ascii) "-preview")) model)
+ 128000)
+ ((string-match-p (rx (seq "gpt-4-" (+ digit))) model)
+ 8192)
+ ((string-match-p (rx (seq "gpt-3.5-turbo-1" (+ digit))) model)
+ 16385)
+ ((string-match-p (rx (seq "gpt-3.5-turbo" (opt "-instruct"))) model)
+ 4096)
+ (t 4096))))
+
(provide 'llm-openai)
;;; llm-openai.el ends here
diff --git a/llm-provider-utils.el b/llm-provider-utils.el
index 9059ad5291..7365eebcff 100644
--- a/llm-provider-utils.el
+++ b/llm-provider-utils.el
@@ -100,6 +100,20 @@ things. Providers should probably issue a warning when
using this."
"\n\nThe current conversation follows:\n\n"
(llm-chat-prompt-interaction-content (car (last
(llm-chat-prompt-interactions prompt))))))))))
+(defun llm-provider-utils-model-token-limit (model)
+ "Return the token limit for MODEL."
+ (let ((model (downcase model)))
+ (cond
+ ((string-match-p "mistral-7b" model) 8192)
+ ((string-match-p "mistral" model) 8192)
+ ((string-match-p "mixtral-45b" model) 131072)
+ ((string-match-p "mixtral" model) 131072)
+ ((string-match-p "falcon" model) 2048)
+ ((string-match-p "orca 2" model) 4096)
+ ((string-match-p "orca" model) 2048)
+ ((string-match-p "llama\s*2" model) 4096)
+ ((string-match-p "llama" model) 2048)
+ ((string-match-p "starcoder" model) 8192))))
(provide 'llm-provider-utils)
;;; llm-provider-utils.el ends here
diff --git a/llm-test.el b/llm-test.el
index 4179439fdf..b06f410221 100644
--- a/llm-test.el
+++ b/llm-test.el
@@ -28,6 +28,11 @@
(require 'llm)
(require 'llm-fake)
(require 'ert)
+(require 'llm-openai)
+(require 'llm-gemini)
+(require 'llm-vertex)
+(require 'llm-ollama)
+(require 'llm-gpt4all)
(ert-deftest llm-test-embedding ()
(should-error (llm-embedding nil "Test"))
@@ -51,6 +56,49 @@
(should (equal
"Response"
(llm-chat (make-llm-fake :chat-action-func (lambda () "Response"))
- (make-llm-chat-prompt)))))
+ (make-llm-chat-prompt)))))
+
+(ert-deftest llm-test-chat-token-limit-openai ()
+ (cl-flet* ((token-limit-for (model)
+ (llm-chat-token-limit (make-llm-openai :chat-model model)))
+ (should-have-token-limit (model limit)
+ (should (equal limit (token-limit-for model)))))
+ ;; From https://platform.openai.com/docs/models/gpt-3-5
+ (should-have-token-limit "gpt-3.5-turbo-1106" 16385)
+ (should-have-token-limit "gpt-3.5-turbo" 4096)
+ (should-have-token-limit "gpt-3.5-turbo-16k" 16385)
+ (should-have-token-limit "gpt-3.5-turbo-instruct" 4096)
+ (should-have-token-limit "gpt-3.5-turbo-0613" 4096)
+ (should-have-token-limit "gpt-3.5-turbo-16k-0613" 16385)
+ (should-have-token-limit "gpt-3.5-turbo-0301" 4096)
+ (should-have-token-limit "unknown" 4096)
+ ;; From https://platform.openai.com/docs/models/gpt-4-and-gpt-4-turbo
+ (should-have-token-limit "gpt-4-1106-preview" 128000)
+ (should-have-token-limit "gpt-4-vision-preview" 128000)
+ (should-have-token-limit "gpt-4" 8192)
+ (should-have-token-limit "gpt-4-32k" 32768)
+ (should-have-token-limit "gpt-4-0613" 8192)
+ (should-have-token-limit "gpt-4-32k-0613" 32768)))
+
+(ert-deftest llm-test-chat-token-limit-gemini ()
+ (should (= 30720 (llm-chat-token-limit (make-llm-gemini))))
+ (should (= 12288 (llm-chat-token-limit
+ (make-llm-gemini :chat-model "gemini-pro-vision")))))
+
+(ert-deftest llm-test-chat-token-limit-vertex ()
+ (should (= 30720 (llm-chat-token-limit (make-llm-vertex))))
+ (should (= 12288 (llm-chat-token-limit
+ (make-llm-vertex :chat-model "gemini-pro-vision")))))
+
+(ert-deftest llm-test-chat-token-limit-ollama ()
+ ;; The code is straightforward, so no need to test all the models.
+ (should (= 8192 (llm-chat-token-limit
+ (make-llm-ollama :chat-model "mistral:latest")))))
+
+(ert-deftest llm-test-chat-token-limit-gpt4all ()
+ ;; The code is straightforward, so no need to test all the models.
+ (should (= 8192 (llm-chat-token-limit
+ (make-llm-ollama :chat-model "Mistral")))))
+
;;; llm-test.el ends here
diff --git a/llm-vertex.el b/llm-vertex.el
index 42c499dfe7..87e4465cab 100644
--- a/llm-vertex.el
+++ b/llm-vertex.el
@@ -294,6 +294,18 @@ MODEL "
(cl-defmethod llm-name ((_ llm-vertex))
"Gemini")
+(defun llm-vertex--chat-token-limit (model)
+ "Get token limit for MODEL."
+ (cond ((equal "gemini-pro" model) 30720)
+ ((equal "gemini-pro-vision" model) 12288)
+ ;; This shouldn't happen unless there's a new model, which could be a
+ ;; smaller or larger model. We'll play it safe and choose a reasonable
+ ;; number.
+ (t 4096)))
+
+(cl-defmethod llm-chat-token-limit ((provider llm-vertex))
+ (llm-vertex--chat-token-limit (llm-vertex-chat-model provider)))
+
(provide 'llm-vertex)
;;; llm-vertex.el ends here
diff --git a/llm.el b/llm.el
index b0a5998e4a..ae59017bcd 100644
--- a/llm.el
+++ b/llm.el
@@ -233,6 +233,14 @@ be passed to `llm-cancel-request'."
(when-let (info (llm-nonfree-message-info provider))
(llm--warn-on-nonfree (car info) (cdr info))))
+(cl-defgeneric llm-chat-token-limit (provider)
+ "Return max number of tokens that can be sent to the LLM.
+For many models we know this number, but for some we don't have
+enough information to know. In those cases we return a default
+value that should be a reasonable lower bound."
+ (ignore provider)
+ 2048)
+
(cl-defgeneric llm-embedding (provider string)
"Return a vector embedding of STRING from PROVIDER."
(ignore provider string)
[Prev in Thread] |
Current Thread |
[Next in Thread] |
- [elpa] externals/llm 97933359cb: Add llm-chat-token-limit,
ELPA Syncer <=