[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]
[elpa] externals/llm f7b8bea9c4 2/2: Make sure every model, even unknown
From: |
ELPA Syncer |
Subject: |
[elpa] externals/llm f7b8bea9c4 2/2: Make sure every model, even unknown ones, have some context size (#54) |
Date: |
Fri, 26 Jul 2024 00:58:30 -0400 (EDT) |
branch: externals/llm
commit f7b8bea9c4f84bb5a73d4e67b92321f772f71e02
Author: Andrew Hyatt <ahyatt@gmail.com>
Commit: GitHub <noreply@github.com>
Make sure every model, even unknown ones, have some context size (#54)
* Enable CI workflow to work on all branches
* Add default ollama context size
* Make sure everything else has a default
* Add NEWS entry
---
NEWS.org | 2 ++
llm-openai.el | 5 ++++-
llm-provider-utils.el | 5 ++++-
llm-test.el | 21 +++++++++++++++------
llm-vertex.el | 6 ++----
5 files changed, 27 insertions(+), 12 deletions(-)
diff --git a/NEWS.org b/NEWS.org
index d9250adbae..35e6820887 100644
--- a/NEWS.org
+++ b/NEWS.org
@@ -1,3 +1,5 @@
+* Verseion 0.17.1
+- Make sure every model, even unknown models, return some value for
~llm-chat-token-limit~.
* Version 0.17.0
- Introduced =llm-prompt= for prompt management and creation from generators.
- Removed Gemini and Vertex token counting, because =llm-prompt= uses token
diff --git a/llm-openai.el b/llm-openai.el
index 0aba6ecd5c..e650f12474 100644
--- a/llm-openai.el
+++ b/llm-openai.el
@@ -259,7 +259,7 @@ RESPONSE can be nil if the response is complete."
;; models, but not for 32k models.
(+ (* n 1024) (if (= n 16) 1 0))))
((equal model "gpt-4") 8192)
- ((equal model "gpt-4o") 30000)
+ ((string-match-p "gpt-4o" model) 30000)
((string-match-p (rx (seq "gpt-4-" (+ ascii) "-preview")) model)
128000)
((string-match-p (rx (seq "gpt-4-" (+ digit))) model)
@@ -270,6 +270,9 @@ RESPONSE can be nil if the response is complete."
4096)
(t 4096))))
+(cl-defmethod llm-chat-token-limit ((_ llm-openai-compatible))
+ (llm-provider-utils-model-token-limit (llm-ollama-chat-model provider)))
+
(cl-defmethod llm-capabilities ((_ llm-openai))
(list 'streaming 'embeddings 'function-calls))
diff --git a/llm-provider-utils.el b/llm-provider-utils.el
index ad715b9757..f7a07f8b61 100644
--- a/llm-provider-utils.el
+++ b/llm-provider-utils.el
@@ -432,7 +432,10 @@ conversation history will follow."
((string-match-p "orca" model) 2048)
((string-match-p "llama\s*2" model) 4096)
((string-match-p "llama" model) 2048)
- ((string-match-p "starcoder" model) 8192))))
+ ((string-match-p "starcoder" model) 8192)
+ ((string-match-p "gemma" model) 8192)
+ ;; default to the smallest context window, 2048
+ (t 2048))))
(defun llm-provider-utils-openai-arguments (args)
"Convert ARGS to the Open AI function calling spec.
diff --git a/llm-test.el b/llm-test.el
index 45777d3704..63fc12acc8 100644
--- a/llm-test.el
+++ b/llm-test.el
@@ -81,7 +81,8 @@
(cl-flet* ((token-limit-for (model)
(llm-chat-token-limit (make-llm-openai :chat-model model)))
(should-have-token-limit (model limit)
- (should (equal limit (token-limit-for model)))))
+ (ert-info ((format "Testing %s" model))
+ (should (equal limit (token-limit-for model))))))
;; From https://platform.openai.com/docs/models/gpt-3-5
(should-have-token-limit "gpt-3.5-turbo-1106" 16385)
(should-have-token-limit "gpt-3.5-turbo" 4096)
@@ -98,31 +99,39 @@
(should-have-token-limit "gpt-4-32k" 32768)
(should-have-token-limit "gpt-4-0613" 8192)
(should-have-token-limit "gpt-4-32k-0613" 32768)
- (should-have-token-limit "gpt-4o" 30000)))
+ (should-have-token-limit "gpt-4o" 30000)
+ (should-have-token-limit "gpt-4o-mini" 30000)
+ (should-have-token-limit "unknown" 4096)))
(ert-deftest llm-test-chat-token-limit-gemini ()
(should (= 30720 (llm-chat-token-limit (make-llm-gemini))))
(should (= 12288 (llm-chat-token-limit
(make-llm-gemini :chat-model "gemini-pro-vision"))))
(should (= 1048576 (llm-chat-token-limit
- (make-llm-gemini :chat-model "gemini-1.5-flash")))))
+ (make-llm-gemini :chat-model "gemini-1.5-flash"))))
+ (should (= 2048 (llm-chat-token-limit
+ (make-llm-vertex :chat-model "unknown")))))
(ert-deftest llm-test-chat-token-limit-vertex ()
(should (= 30720 (llm-chat-token-limit (make-llm-vertex))))
(should (= 12288 (llm-chat-token-limit
(make-llm-vertex :chat-model "gemini-pro-vision"))))
(should (= 1048576 (llm-chat-token-limit
- (make-llm-gemini :chat-model "gemini-1.5-flash")))))
+ (make-llm-gemini :chat-model "gemini-1.5-flash"))))
+ (should (= 2048 (llm-chat-token-limit
+ (make-llm-vertex :chat-model "unknown")))))
(ert-deftest llm-test-chat-token-limit-ollama ()
;; The code is straightforward, so no need to test all the models.
(should (= 8192 (llm-chat-token-limit
- (make-llm-ollama :chat-model "mistral:latest")))))
+ (make-llm-ollama :chat-model "mistral:latest"))))
+ (should (= 2048 (llm-chat-token-limit
+ (make-llm-ollama :chat-model "unknown")))))
(ert-deftest llm-test-chat-token-limit-gpt4all ()
;; The code is straightforward, so no need to test all the models.
(should (= 8192 (llm-chat-token-limit
- (make-llm-ollama :chat-model "Mistral")))))
+ (make-llm-gpt4all :chat-model "Mistral")))))
(provide 'llm-test)
;;; llm-test.el ends here
diff --git a/llm-vertex.el b/llm-vertex.el
index 60b5e3027d..0cecb86a32 100644
--- a/llm-vertex.el
+++ b/llm-vertex.el
@@ -283,10 +283,8 @@ If STREAMING is non-nil, use the URL for the streaming
API."
(cond ((equal "gemini-pro" model) 30720)
((equal "gemini-pro-vision" model) 12288)
((string-match-p (rx (seq "gemini-1.5")) model) 1048576)
- ;; This shouldn't happen unless there's a new model, which could be a
- ;; smaller or larger model. We'll play it safe and choose a reasonable
- ;; number.
- (t 4096)))
+ ;; Vertex can run different models, so check the standard model names.
+ (t (llm-provider-utils-model-token-limit model))))
(cl-defmethod llm-chat-token-limit ((provider llm-vertex))
(llm-vertex--chat-token-limit (llm-vertex-chat-model provider)))