[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]
[elpa] externals/llm c79cc43384 1/2: Add variable llm-prompt-default-max
From: |
ELPA Syncer |
Subject: |
[elpa] externals/llm c79cc43384 1/2: Add variable llm-prompt-default-max-tokens (#79) |
Date: |
Sat, 7 Sep 2024 00:58:17 -0400 (EDT) |
branch: externals/llm
commit c79cc4338408dfe6f164978450cc9ab8bd9cc295
Author: Andrew Hyatt <ahyatt@gmail.com>
Commit: GitHub <noreply@github.com>
Add variable llm-prompt-default-max-tokens (#79)
This can add a cap on the number of tokens to use, regardless of the
model.
---
NEWS.org | 1 +
README.org | 4 +++-
llm-prompt-test.el | 13 +++++++++++++
llm-prompt.el | 31 +++++++++++++++++++++++++------
4 files changed, 42 insertions(+), 7 deletions(-)
diff --git a/NEWS.org b/NEWS.org
index f72ee35b80..ff8ee89372 100644
--- a/NEWS.org
+++ b/NEWS.org
@@ -1,6 +1,7 @@
* Version 0.17.4
- Fix problem with Open AI's =llm-chat-token-limit=.
- Fix Open AI and Gemini's parallel function calling.
+- Add variable =llm-prompt-default-max-tokens= to put a cap on number of
tokens regardless of model size.
* Version 0.17.3
- More fixes with Claude and Ollama function calling conversation, thanks to
Paul Nelson.
- Make =llm-chat-streaming-to-point= more efficient, just inserting new text,
thanks to Paul Nelson.
diff --git a/README.org b/README.org
index ee89558fcb..320fb37833 100644
--- a/README.org
+++ b/README.org
@@ -223,7 +223,9 @@ subsequent terms. The variable
~llm-prompt-default-max-pct~ controls how much o
the context window we want to fill. The way we estimate the number of tokens
used is quick but inaccurate, so limiting to less than the maximum context size
is useful for guarding against a miscount leading to an error calling the LLM
-due to too many tokens.
+due to too many tokens. If you want to have a hard limit as well that doesn't
+depend on the context window size, you can use ~llm-prompt-default-max-tokens~.
+We will use the minimum of either value.
Variables are enclosed in double curly braces, like this: ={{instructions}}=.
They can just be the variable, or they can also denote a number of tickets,
like
diff --git a/llm-prompt-test.el b/llm-prompt-test.el
index 535946e880..1a472174f5 100644
--- a/llm-prompt-test.el
+++ b/llm-prompt-test.el
@@ -181,6 +181,19 @@ to converge."
:var1 '("this is a completely oversized item"
"a" "b" "c" "d")))))
+(ert-deftest llm-prompt--max-tokens ()
+ (cl-flet ((should-have-max-tokens (expected max-pct max-tokens)
+ (let ((llm-prompt-default-max-pct max-pct)
+ (llm-prompt-default-max-tokens max-tokens))
+ (should (equal expected (llm-prompt--max-tokens
+ (make-prompt-test-llm)))))))
+ ;; The test LLM has a 20 token limit
+ (should-have-max-tokens 10 50 nil)
+ (should-have-max-tokens 20 100 nil)
+ (should-have-max-tokens 5 50 5)
+ (should-have-max-tokens 10 50 10)
+ (should-have-max-tokens 10 50 20)))
+
(provide 'llm-prompt-test)
;;; llm-prompt-test.el ends here
diff --git a/llm-prompt.el b/llm-prompt.el
index cd66673022..9ada2486b9 100644
--- a/llm-prompt.el
+++ b/llm-prompt.el
@@ -63,11 +63,21 @@
:group 'llm)
(defcustom llm-prompt-default-max-pct 50
- "The default mode for all new notes.
+ "Default max percentage of context window to use for a prompt.
+The minimum of this and `llm-prompt-default-max-tokens' will be
+used. For an example, at the time of this writing, using Claude
+3.5 Sonnet will cost, at 50% tokens, $0.30 USD.
Using 100% or close to it is not recommended, as space is needed
-for conversation."
- :type 'integer)
+for conversation, and token counting is not exact."
+ :type 'integer
+ :group 'llm-prompt)
+
+(defcustom llm-prompt-default-max-tokens nil
+ "The default maximum number of tokens to use for a prompt.
+Set to nil to use `llm-prompt-default-max-pct' instead."
+ :type 'integer
+ :group 'llm-prompt)
(cl-defstruct llm-prompt piece text truncator)
@@ -90,7 +100,7 @@ arguments with other tickets. If not specified, it's assumed
that this will have as many tickets as the rest of all the other
arguments put together. If no one specifies the number of
tickets, we will pull evenly (but randomly) into each of the
-variables until we reach `prompt-default-max-pct'."
+variables until we reach the desired context window size."
(declare (indent defun))
`(puthash (quote ,name) ,text llm-prompt-prompts))
@@ -161,6 +171,16 @@ executed with no arguments to return an iterator."
(t (iter-lambda () (dolist (el var)
(iter-yield el)))))))
+(defun llm-prompt--max-tokens (provider)
+ "Return the maximum number of tokens to use for a prompt.
+PROVIDER is the provider which will be used, and which has a
+maximum number of tokens."
+ (floor
+ (min (or llm-prompt-default-max-tokens
+ (llm-chat-token-limit provider))
+ (* (/ llm-prompt-default-max-pct 100.0)
+ (llm-chat-token-limit provider)))))
+
(defun llm-prompt-fill-text (text provider &rest keys)
"Fill TEXT prompt, with the llm PROVIDER, values from KEYS.
@@ -217,8 +237,7 @@ a function, it should return values via a generator."
vars))))
(condition-case nil
(while (< total-tokens
- (* (/ llm-prompt-default-max-pct 100.0)
- (llm-chat-token-limit provider)))
+ (llm-prompt--max-tokens provider))
(let* ((val-cons (iter-next ticket-gen))
(sval (format "%s" (cdr val-cons))))
;; Only add if there is space, otherwise we ignore this value.