mirror of
https://github.com/AgentOps-AI/tokencost.git
synced 2024-06-22 04:30:40 +03:00
feat: ✨ Implement calculate_all_costs_and_tokens + revert back to original calculate_prompt_cost and calculate_completion_cost functions
This commit is contained in:
10
README.md
10
README.md
@@ -43,8 +43,8 @@ model = "gpt-3.5-turbo"
|
||||
prompt = [{ "role": "user", "content": "Hello world"}]
|
||||
completion = "How may I assist you today?"
|
||||
|
||||
prompt_cost, prompt_tokens = calculate_prompt_cost(prompt, model)
|
||||
completion_cost, completion_tokens = calculate_completion_cost(completion, model)
|
||||
prompt_cost = calculate_prompt_cost(prompt, model)
|
||||
completion_cost = calculate_completion_cost(completion, model)
|
||||
|
||||
print(f"{prompt_cost} + {completion_cost} = {prompt_cost + completion_cost}")
|
||||
# 0.0000135 + 0.000014 = 0.0000275
|
||||
@@ -76,8 +76,8 @@ chat_completion = client.chat.completions.create(
|
||||
completion = chat_completion.choices[0].message.content
|
||||
# "This is a test."
|
||||
|
||||
prompt_cost, prompt_tokens = calculate_prompt_cost(prompt, model)
|
||||
completion_cost, completion_tokens = calculate_completion_cost(completion, model)
|
||||
prompt_cost = calculate_prompt_cost(prompt, model)
|
||||
completion_cost = calculate_completion_cost(completion, model)
|
||||
print(f"{prompt_cost} + {completion_cost} = {prompt_cost + completion_cost}")
|
||||
# 0.0000180 + 0.000010 = 0.0000280
|
||||
```
|
||||
@@ -90,7 +90,7 @@ prompt_string = "Hello world"
|
||||
response = "How may I assist you today?"
|
||||
model= "gpt-3.5-turbo"
|
||||
|
||||
prompt_cost, prompt_tokens = calculate_prompt_cost(prompt_string, model)
|
||||
prompt_cost = calculate_prompt_cost(prompt_string, model)
|
||||
print(f"Cost: ${prompt_cost}")
|
||||
# Cost: $3e-06
|
||||
```
|
||||
|
||||
@@ -2,7 +2,7 @@ from typing import Any, Dict, List, Optional, cast
|
||||
from llama_index.core.callbacks.base_handler import BaseCallbackHandler
|
||||
from llama_index.core.callbacks.schema import CBEventType, EventPayload
|
||||
from llama_index.core.llms import ChatMessage
|
||||
from tokencost import calculate_prompt_cost, calculate_completion_cost
|
||||
from tokencost import calculate_all_costs_and_tokens
|
||||
|
||||
|
||||
class TokenCostHandler(BaseCallbackHandler):
|
||||
@@ -30,27 +30,23 @@ class TokenCostHandler(BaseCallbackHandler):
|
||||
if EventPayload.PROMPT in payload:
|
||||
prompt = str(payload.get(EventPayload.PROMPT))
|
||||
completion = str(payload.get(EventPayload.COMPLETION))
|
||||
prompt_cost, prompt_tokens = calculate_prompt_cost(prompt, self.model)
|
||||
completion_cost, completion_tokens = calculate_completion_cost(
|
||||
completion, self.model
|
||||
)
|
||||
estimates = calculate_all_costs_and_tokens(prompt, completion, self.model)
|
||||
|
||||
elif EventPayload.MESSAGES in payload:
|
||||
messages = cast(List[ChatMessage], payload.get(EventPayload.MESSAGES, []))
|
||||
messages_str = "\n".join([str(x) for x in messages])
|
||||
prompt_cost, prompt_tokens = calculate_prompt_cost(messages_str, self.model)
|
||||
response = str(payload.get(EventPayload.RESPONSE))
|
||||
completion_cost, completion_tokens = calculate_completion_cost(
|
||||
response, self.model
|
||||
estimates = calculate_all_costs_and_tokens(
|
||||
messages_str, response, self.model
|
||||
)
|
||||
|
||||
self.prompt_cost += prompt_cost
|
||||
self.completion_cost += completion_cost
|
||||
self.prompt_tokens += prompt_tokens
|
||||
self.completion_tokens += completion_tokens
|
||||
self.prompt_cost += estimates["prompt_cost"]
|
||||
self.completion_cost += estimates["completion_cost"]
|
||||
self.prompt_tokens += estimates["prompt_tokens"]
|
||||
self.completion_tokens += estimates["completion_tokens"]
|
||||
|
||||
print(f"# Prompt cost: {prompt_cost}")
|
||||
print(f"# Completion: {completion_cost}")
|
||||
print(f"# Prompt cost: {estimates['prompt_cost']}")
|
||||
print(f"# Completion: {estimates['completion_cost']}")
|
||||
print("\n")
|
||||
|
||||
def reset_counts(self) -> None:
|
||||
|
||||
@@ -131,9 +131,7 @@ def calculate_cost_by_tokens(num_tokens: int, model: str, token_type: str) -> De
|
||||
return Decimal(str(cost_per_token)) * Decimal(num_tokens)
|
||||
|
||||
|
||||
def calculate_prompt_cost(
|
||||
prompt: Union[List[dict], str], model: str
|
||||
) -> Tuple[Decimal, int]:
|
||||
def calculate_prompt_cost(prompt: Union[List[dict], str], model: str) -> Decimal:
|
||||
"""
|
||||
Calculate the prompt's cost in USD.
|
||||
|
||||
@@ -142,7 +140,7 @@ def calculate_prompt_cost(
|
||||
model (str): The model name.
|
||||
|
||||
Returns:
|
||||
Tuple[Decimal, int]: The calculated cost in USD and number of tokens.
|
||||
Decimal: The calculated cost in USD.
|
||||
|
||||
e.g.:
|
||||
>>> prompt = [{ "role": "user", "content": "Hello world"},
|
||||
@@ -173,10 +171,10 @@ def calculate_prompt_cost(
|
||||
else count_message_tokens(prompt, model)
|
||||
)
|
||||
|
||||
return calculate_cost_by_tokens(prompt_tokens, model, "input"), prompt_tokens
|
||||
return calculate_cost_by_tokens(prompt_tokens, model, "input")
|
||||
|
||||
|
||||
def calculate_completion_cost(completion: str, model: str) -> Tuple[Decimal, int]:
|
||||
def calculate_completion_cost(completion: str, model: str) -> Decimal:
|
||||
"""
|
||||
Calculate the prompt's cost in USD.
|
||||
|
||||
@@ -185,7 +183,7 @@ def calculate_completion_cost(completion: str, model: str) -> Tuple[Decimal, int
|
||||
model (str): The model name.
|
||||
|
||||
Returns:
|
||||
Tuple[Decimal, int]: The calculated cost in USD and number of tokens.
|
||||
Decimal: The calculated cost in USD.
|
||||
|
||||
e.g.:
|
||||
>>> completion = "How may I assist you today?"
|
||||
@@ -200,6 +198,41 @@ def calculate_completion_cost(completion: str, model: str) -> Tuple[Decimal, int
|
||||
)
|
||||
completion_tokens = count_string_tokens(completion, model)
|
||||
|
||||
return calculate_cost_by_tokens(
|
||||
completion_tokens, model, "output"
|
||||
), completion_tokens
|
||||
return calculate_cost_by_tokens(completion_tokens, model, "output")
|
||||
|
||||
|
||||
def calculate_all_costs_and_tokens(
|
||||
prompt: Union[List[dict], str], completion: str, model: str
|
||||
) -> dict:
|
||||
"""
|
||||
Calculate the prompt and completion costs and tokens in USD.
|
||||
|
||||
Args:
|
||||
prompt (Union[List[dict], str]): List of message objects or single string prompt.
|
||||
completion (str): Completion string.
|
||||
model (str): The model name.
|
||||
|
||||
Returns:
|
||||
dict: The calculated cost and tokens in USD.
|
||||
|
||||
e.g.:
|
||||
>>> prompt = "Hello world"
|
||||
>>> completion = "How may I assist you today?"
|
||||
>>> calculate_all_costs_and_tokens(prompt, completion, "gpt-3.5-turbo")
|
||||
{'prompt_cost': Decimal('0.0000030'), 'prompt_tokens': 2, 'completion_cost': Decimal('0.000014'), 'completion_tokens': 7}
|
||||
"""
|
||||
prompt_cost = calculate_prompt_cost(prompt, model)
|
||||
completion_cost = calculate_completion_cost(completion, model)
|
||||
prompt_tokens = (
|
||||
count_string_tokens(prompt, model)
|
||||
if isinstance(prompt, str)
|
||||
else count_message_tokens(prompt, model)
|
||||
)
|
||||
completion_tokens = count_string_tokens(completion, model)
|
||||
|
||||
return {
|
||||
"prompt_cost": prompt_cost,
|
||||
"prompt_tokens": prompt_tokens,
|
||||
"completion_cost": completion_cost,
|
||||
"completion_tokens": completion_tokens,
|
||||
}
|
||||
|
||||
Reference in New Issue
Block a user