mirror of
https://github.com/AgentOps-AI/tokencost.git
synced 2024-06-22 04:30:40 +03:00
remove TPUs
This commit is contained in:
15
README.md
15
README.md
@@ -25,8 +25,7 @@ prompt_cost = calculate_prompt_cost(prompt, model)
|
||||
completion_cost = calculate_completion_cost(completion, model)
|
||||
|
||||
print(f"{prompt_cost} + {completion_cost} = {prompt_cost + completion_cost}")
|
||||
# 135 + 140 = 275 ($0.0000275)
|
||||
# Priced in TPUs (token price units), which is 1/100,000,000th of a USD.
|
||||
# TODO:
|
||||
```
|
||||
|
||||
## Installation
|
||||
@@ -58,23 +57,19 @@ completion = chat_completion.choices[0].message.content
|
||||
prompt_cost = calculate_prompt_cost(prompt, model)
|
||||
completion_cost = calculate_completion_cost(completion, model)
|
||||
print(f"{prompt_cost} + {completion_cost} = {prompt_cost + completion_cost}")
|
||||
# 1800 + 1000 = 2800 ($0.0000280)
|
||||
|
||||
from tokencost import USD_PER_TPU
|
||||
print(f"Cost USD: ${(prompt_cost + completion_cost)/USD_PER_TPU}")
|
||||
# $2.8e-05
|
||||
# TODO:
|
||||
```
|
||||
|
||||
**Calculating cost using string prompts instead of messages:**
|
||||
```python
|
||||
from tokencost import calculate_prompt_cost, USD_PER_TPU
|
||||
from tokencost import calculate_prompt_cost
|
||||
|
||||
prompt_string = "Hello world"
|
||||
response = "How may I assist you today?"
|
||||
model= "gpt-3.5-turbo"
|
||||
|
||||
prompt_cost = calculate_prompt_cost(prompt_string, model)
|
||||
print(f"Cost: ${prompt_cost/USD_PER_TPU}")
|
||||
print(f"Cost: ${prompt_cost}")
|
||||
# Cost: $3e-06
|
||||
```
|
||||
|
||||
@@ -95,7 +90,7 @@ print(count_string_tokens(prompt="Hello world", model="gpt-3.5-turbo"))
|
||||
```
|
||||
|
||||
## Cost table
|
||||
Units denominated in TPUs (Token Price Units = 1/10,000,000 USD). All prices can be located in `model_prices.yaml`.
|
||||
Units denominated in USD.. All prices can be located in `model_prices.json`.
|
||||
|
||||
| Model Name | Prompt Cost (USD) | Completion Cost (USD) | Max Prompt Tokens |
|
||||
| --- | --- | --- | --- |
|
||||
|
||||
@@ -5,7 +5,6 @@ from llama_index.callbacks.schema import CBEventType, EventPayload
|
||||
from unittest.mock import MagicMock
|
||||
|
||||
# Mock the calculate_prompt_cost and calculate_completion_cost functions
|
||||
# and the USD_PER_TPU constant
|
||||
|
||||
STRING = "Hello, world!"
|
||||
|
||||
@@ -14,7 +13,6 @@ STRING = "Hello, world!"
|
||||
def mock_tokencost(monkeypatch):
|
||||
monkeypatch.setattr('tokencost.calculate_prompt_cost', MagicMock(return_value=100))
|
||||
monkeypatch.setattr('tokencost.calculate_completion_cost', MagicMock(return_value=200))
|
||||
monkeypatch.setattr('tokencost.USD_PER_TPU', 10)
|
||||
|
||||
# Mock the ChatMessage class
|
||||
|
||||
|
||||
@@ -4,4 +4,4 @@ from .costs import (
|
||||
calculate_completion_cost,
|
||||
calculate_prompt_cost,
|
||||
)
|
||||
from .constants import TOKEN_COSTS, USD_PER_TPU
|
||||
from .constants import TOKEN_COSTS
|
||||
|
||||
@@ -1,7 +1,7 @@
|
||||
from typing import Any, Dict, List, Optional, cast
|
||||
from llama_index.callbacks.base_handler import BaseCallbackHandler
|
||||
from llama_index.callbacks.schema import CBEventType, EventPayload
|
||||
from tokencost import calculate_prompt_cost, calculate_completion_cost, USD_PER_TPU
|
||||
from tokencost import calculate_prompt_cost, calculate_completion_cost
|
||||
|
||||
|
||||
class TokenCostHandler(BaseCallbackHandler):
|
||||
@@ -29,15 +29,15 @@ class TokenCostHandler(BaseCallbackHandler):
|
||||
if EventPayload.PROMPT in payload:
|
||||
prompt = str(payload.get(EventPayload.PROMPT))
|
||||
completion = str(payload.get(EventPayload.COMPLETION))
|
||||
prompt_cost = calculate_prompt_cost(prompt, self.model) / USD_PER_TPU
|
||||
completion_cost = calculate_completion_cost(completion, self.model) / USD_PER_TPU
|
||||
prompt_cost = calculate_prompt_cost(prompt, self.model)
|
||||
completion_cost = calculate_completion_cost(completion, self.model)
|
||||
|
||||
elif EventPayload.MESSAGES in payload:
|
||||
messages = cast(List[ChatMessage], payload.get(EventPayload.MESSAGES, []))
|
||||
messages_str = "\n".join([str(x) for x in messages])
|
||||
prompt_cost = calculate_prompt_cost(messages_str, self.model) / USD_PER_TPU
|
||||
prompt_cost = calculate_prompt_cost(messages_str, self.model)
|
||||
response = str(payload.get(EventPayload.RESPONSE))
|
||||
completion_cost = calculate_completion_cost(response, self.model) / USD_PER_TPU
|
||||
completion_cost = calculate_completion_cost(response, self.model)
|
||||
|
||||
print(f"# Prompt cost: {prompt_cost}")
|
||||
print(f"# Completion: {completion_cost}")
|
||||
|
||||
@@ -11,15 +11,11 @@ https://platform.openai.com/tokenizer
|
||||
|
||||
Note: When asking follow-up questions, everything above and including your follow-up question
|
||||
is considered a prompt (for the purpose of context) and will thus cost prompt tokens.
|
||||
|
||||
1 Token Price Unit (TPU) is defined as 1/100,000,000 of $1 (USD). 1,000,000 TPUs would equate to $0.01.
|
||||
"""
|
||||
|
||||
USD_PER_TPU = 100_000_000
|
||||
|
||||
# How to read TOKEN_COSTS:
|
||||
# Each prompt token costs __ TPUs per token.
|
||||
# Each completion token costs __ TPUs per token.
|
||||
# Each prompt token costs __ USD per token.
|
||||
# Each completion token costs __ USD per token.
|
||||
# Max prompt limit of each model is __ tokens.
|
||||
|
||||
with open(os.path.join(os.path.dirname(__file__), "model_prices.yaml"), "r") as f:
|
||||
|
||||
@@ -4,6 +4,7 @@ Costs dictionary and utility tool for counting tokens
|
||||
import tiktoken
|
||||
from typing import Union, List, Dict
|
||||
from .constants import TOKEN_COSTS
|
||||
from decimal import Decimal
|
||||
|
||||
|
||||
# TODO: Add Claude support
|
||||
@@ -90,17 +91,16 @@ def count_string_tokens(prompt: str, model: str) -> int:
|
||||
return len(encoding.encode(prompt))
|
||||
|
||||
|
||||
def calculate_prompt_cost(prompt: Union[List[dict], str], model: str) -> int:
|
||||
def calculate_prompt_cost(prompt: Union[List[dict], str], model: str) -> Decimal:
|
||||
"""
|
||||
Calculate the prompt's cost in token price units (TPU). 1 TPU = $1/10,000,000.
|
||||
e.g. 100,000 TPUs = $0.01.
|
||||
Calculate the prompt's cost in USD.
|
||||
|
||||
Args:
|
||||
prompt (Union[List[dict], str]): List of message objects or single string prompt.
|
||||
model (str): The model name.
|
||||
|
||||
Returns:
|
||||
int: The calculated cost in TPUs.
|
||||
Decimal: The calculated cost in USD.
|
||||
|
||||
e.g.:
|
||||
>>> prompt = [{ "role": "user", "content": "Hello world"},
|
||||
@@ -110,7 +110,7 @@ def calculate_prompt_cost(prompt: Union[List[dict], str], model: str) -> int:
|
||||
# or
|
||||
>>> prompt = "Hello world"
|
||||
>>> calculate_prompt_cost(prompt, "gpt-3.5-turbo")
|
||||
30
|
||||
TODO:
|
||||
"""
|
||||
model = model.lower()
|
||||
if model not in TOKEN_COSTS:
|
||||
@@ -131,25 +131,24 @@ def calculate_prompt_cost(prompt: Union[List[dict], str], model: str) -> int:
|
||||
)
|
||||
prompt_cost = TOKEN_COSTS[model]["prompt"]
|
||||
|
||||
return prompt_cost * prompt_tokens
|
||||
return Decimal(prompt_cost) * Decimal(prompt_tokens)
|
||||
|
||||
|
||||
def calculate_completion_cost(completion: str, model: str) -> int:
|
||||
def calculate_completion_cost(completion: str, model: str) -> Decimal:
|
||||
"""
|
||||
Calculate the prompt's cost in token price units (TPU). 1 TPU = $1/10,000,000.
|
||||
e.g. 100,000 TPUs = $0.01.
|
||||
Calculate the prompt's cost in USD.
|
||||
|
||||
Args:
|
||||
completion (str): Completion string.
|
||||
model (str): The model name.
|
||||
|
||||
Returns:
|
||||
int: The calculated cost in TPUs.
|
||||
Decimal: The calculated cost in USD.
|
||||
|
||||
e.g.:
|
||||
>>> completion = "How may I assist you today?"
|
||||
>>> calculate_completion_cost(completion, "gpt-3.5-turbo")
|
||||
140
|
||||
TODO:
|
||||
"""
|
||||
if model not in TOKEN_COSTS:
|
||||
raise KeyError(
|
||||
@@ -159,4 +158,4 @@ def calculate_completion_cost(completion: str, model: str) -> int:
|
||||
completion_tokens = count_string_tokens(completion, model)
|
||||
completion_cost = TOKEN_COSTS[model]["completion"]
|
||||
|
||||
return completion_cost * completion_tokens
|
||||
return Decimal(completion_cost) * Decimal(completion_tokens)
|
||||
|
||||
Reference in New Issue
Block a user