[elpa] externals/llm f7b8bea9c4 2/2: Make sure every model, even unknown

emacs-elpa-diffs

[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

[elpa] externals/llm f7b8bea9c4 2/2: Make sure every model, even unknown

From:	ELPA Syncer
Subject:	[elpa] externals/llm f7b8bea9c4 2/2: Make sure every model, even unknown ones, have some context size (#54)
Date:	Fri, 26 Jul 2024 00:58:30 -0400 (EDT)

branch: externals/llm
commit f7b8bea9c4f84bb5a73d4e67b92321f772f71e02
Author: Andrew Hyatt <ahyatt@gmail.com>
Commit: GitHub <noreply@github.com>

    Make sure every model, even unknown ones, have some context size (#54)
    
    * Enable CI workflow to work on all branches
    
    * Add default ollama context size
    
    * Make sure everything else has a default
    
    * Add NEWS entry
---
 NEWS.org              |  2 ++
 llm-openai.el         |  5 ++++-
 llm-provider-utils.el |  5 ++++-
 llm-test.el           | 21 +++++++++++++++------
 llm-vertex.el         |  6 ++----
 5 files changed, 27 insertions(+), 12 deletions(-)

diff --git a/NEWS.org b/NEWS.org
index d9250adbae..35e6820887 100644
--- a/NEWS.org
+++ b/NEWS.org
@@ -1,3 +1,5 @@
+* Verseion 0.17.1
+- Make sure every model, even unknown models, return some value for 
~llm-chat-token-limit~.
 * Version 0.17.0
 - Introduced =llm-prompt= for prompt management and creation from generators.
 - Removed Gemini and Vertex token counting, because =llm-prompt= uses token
diff --git a/llm-openai.el b/llm-openai.el
index 0aba6ecd5c..e650f12474 100644
--- a/llm-openai.el
+++ b/llm-openai.el
@@ -259,7 +259,7 @@ RESPONSE can be nil if the response is complete."
         ;; models, but not for 32k models.
         (+ (* n 1024) (if (= n 16) 1 0))))
      ((equal model "gpt-4") 8192)
-     ((equal model "gpt-4o") 30000)
+     ((string-match-p "gpt-4o" model) 30000)
      ((string-match-p (rx (seq "gpt-4-" (+ ascii) "-preview")) model)
       128000)
      ((string-match-p (rx (seq "gpt-4-" (+ digit))) model)
@@ -270,6 +270,9 @@ RESPONSE can be nil if the response is complete."
       4096)
      (t 4096))))
 
+(cl-defmethod llm-chat-token-limit ((_ llm-openai-compatible))
+  (llm-provider-utils-model-token-limit (llm-ollama-chat-model provider)))
+
 (cl-defmethod llm-capabilities ((_ llm-openai))
   (list 'streaming 'embeddings 'function-calls))
 
diff --git a/llm-provider-utils.el b/llm-provider-utils.el
index ad715b9757..f7a07f8b61 100644
--- a/llm-provider-utils.el
+++ b/llm-provider-utils.el
@@ -432,7 +432,10 @@ conversation history will follow."
      ((string-match-p "orca" model) 2048)
      ((string-match-p "llama\s*2" model) 4096)
      ((string-match-p "llama" model) 2048)
-     ((string-match-p "starcoder" model) 8192))))
+     ((string-match-p "starcoder" model) 8192)
+     ((string-match-p "gemma" model) 8192)
+     ;; default to the smallest context window, 2048
+     (t 2048))))
 
 (defun llm-provider-utils-openai-arguments (args)
   "Convert ARGS to the Open AI function calling spec.
diff --git a/llm-test.el b/llm-test.el
index 45777d3704..63fc12acc8 100644
--- a/llm-test.el
+++ b/llm-test.el
@@ -81,7 +81,8 @@
   (cl-flet* ((token-limit-for (model)
                (llm-chat-token-limit (make-llm-openai :chat-model model)))
              (should-have-token-limit (model limit)
-               (should (equal limit (token-limit-for model)))))
+               (ert-info ((format "Testing %s" model))
+                 (should (equal limit (token-limit-for model))))))
     ;; From https://platform.openai.com/docs/models/gpt-3-5
     (should-have-token-limit "gpt-3.5-turbo-1106" 16385)
     (should-have-token-limit "gpt-3.5-turbo" 4096)
@@ -98,31 +99,39 @@
     (should-have-token-limit "gpt-4-32k" 32768)
     (should-have-token-limit "gpt-4-0613" 8192)
     (should-have-token-limit "gpt-4-32k-0613" 32768)
-    (should-have-token-limit "gpt-4o" 30000)))
+    (should-have-token-limit "gpt-4o" 30000)
+    (should-have-token-limit "gpt-4o-mini" 30000)
+    (should-have-token-limit "unknown" 4096)))
 
 (ert-deftest llm-test-chat-token-limit-gemini ()
   (should (= 30720 (llm-chat-token-limit (make-llm-gemini))))
   (should (= 12288 (llm-chat-token-limit
                     (make-llm-gemini :chat-model "gemini-pro-vision"))))
   (should (= 1048576 (llm-chat-token-limit
-                      (make-llm-gemini :chat-model "gemini-1.5-flash")))))
+                      (make-llm-gemini :chat-model "gemini-1.5-flash"))))
+  (should (= 2048 (llm-chat-token-limit
+                   (make-llm-vertex :chat-model "unknown")))))
 
 (ert-deftest llm-test-chat-token-limit-vertex ()
   (should (= 30720 (llm-chat-token-limit (make-llm-vertex))))
   (should (= 12288 (llm-chat-token-limit
                     (make-llm-vertex :chat-model "gemini-pro-vision"))))
   (should (= 1048576 (llm-chat-token-limit
-                      (make-llm-gemini :chat-model "gemini-1.5-flash")))))
+                      (make-llm-gemini :chat-model "gemini-1.5-flash"))))
+  (should (= 2048 (llm-chat-token-limit
+                   (make-llm-vertex :chat-model "unknown")))))
 
 (ert-deftest llm-test-chat-token-limit-ollama ()
   ;; The code is straightforward, so no need to test all the models.
   (should (= 8192 (llm-chat-token-limit
-                   (make-llm-ollama :chat-model "mistral:latest")))))
+                   (make-llm-ollama :chat-model "mistral:latest"))))
+  (should (= 2048 (llm-chat-token-limit
+                   (make-llm-ollama :chat-model "unknown")))))
 
 (ert-deftest llm-test-chat-token-limit-gpt4all ()
   ;; The code is straightforward, so no need to test all the models.
   (should (= 8192 (llm-chat-token-limit
-                   (make-llm-ollama :chat-model "Mistral")))))
+                   (make-llm-gpt4all :chat-model "Mistral")))))
 
 (provide 'llm-test)
 ;;; llm-test.el ends here
diff --git a/llm-vertex.el b/llm-vertex.el
index 60b5e3027d..0cecb86a32 100644
--- a/llm-vertex.el
+++ b/llm-vertex.el
@@ -283,10 +283,8 @@ If STREAMING is non-nil, use the URL for the streaming 
API."
   (cond ((equal "gemini-pro" model) 30720)
         ((equal "gemini-pro-vision" model) 12288)
         ((string-match-p (rx (seq "gemini-1.5")) model) 1048576)
-        ;; This shouldn't happen unless there's a new model, which could be a
-        ;; smaller or larger model.  We'll play it safe and choose a reasonable
-        ;; number.
-        (t 4096)))
+        ;; Vertex can run different models, so check the standard model names.
+        (t (llm-provider-utils-model-token-limit model))))
 
 (cl-defmethod llm-chat-token-limit ((provider llm-vertex))
   (llm-vertex--chat-token-limit (llm-vertex-chat-model provider)))

[Prev in Thread]

Current Thread

[Next in Thread]

[elpa] externals/llm updated (edbad651fb -> f7b8bea9c4), ELPA Syncer, 2024/07/26
- [elpa] externals/llm d4b977f3ed 1/2: Enable CI workflow to work on all branches, ELPA Syncer, 2024/07/26
- [elpa] externals/llm f7b8bea9c4 2/2: Make sure every model, even unknown ones, have some context size (#54), ELPA Syncer <=

Prev by Date: [elpa] externals/llm d4b977f3ed 1/2: Enable CI workflow to work on all branches
Next by Date: [elpa] externals/llm updated (edbad651fb -> f7b8bea9c4)
Previous by thread: [elpa] externals/llm d4b977f3ed 1/2: Enable CI workflow to work on all branches
Next by thread: [nongnu] elpa/flycheck 7a6398ea35: yaml-actionlint: only allow inside GitHub workflows dir
Index(es):
- Date
- Thread