remove TPUs

This commit is contained in:
reibs
2024-01-30 19:35:32 -08:00
parent 931619272c
commit b62bc1fb6c
6 changed files with 24 additions and 36 deletions

View File

@@ -25,8 +25,7 @@ prompt_cost = calculate_prompt_cost(prompt, model)
completion_cost = calculate_completion_cost(completion, model)
print(f"{prompt_cost} + {completion_cost} = {prompt_cost + completion_cost}")
# 135 + 140 = 275 ($0.0000275)
# Priced in TPUs (token price units), which is 1/100,000,000th of a USD.
# TODO:
```
## Installation
@@ -58,23 +57,19 @@ completion = chat_completion.choices[0].message.content
prompt_cost = calculate_prompt_cost(prompt, model)
completion_cost = calculate_completion_cost(completion, model)
print(f"{prompt_cost} + {completion_cost} = {prompt_cost + completion_cost}")
# 1800 + 1000 = 2800 ($0.0000280)
from tokencost import USD_PER_TPU
print(f"Cost USD: ${(prompt_cost + completion_cost)/USD_PER_TPU}")
# $2.8e-05
# TODO:
```
**Calculating cost using string prompts instead of messages:**
```python
from tokencost import calculate_prompt_cost, USD_PER_TPU
from tokencost import calculate_prompt_cost
prompt_string = "Hello world"
response = "How may I assist you today?"
model= "gpt-3.5-turbo"
prompt_cost = calculate_prompt_cost(prompt_string, model)
print(f"Cost: ${prompt_cost/USD_PER_TPU}")
print(f"Cost: ${prompt_cost}")
# Cost: $3e-06
```
@@ -95,7 +90,7 @@ print(count_string_tokens(prompt="Hello world", model="gpt-3.5-turbo"))
```
## Cost table
Units denominated in TPUs (Token Price Units = 1/10,000,000 USD). All prices can be located in `model_prices.yaml`.
Units denominated in USD.. All prices can be located in `model_prices.json`.
| Model Name | Prompt Cost (USD) | Completion Cost (USD) | Max Prompt Tokens |
| --- | --- | --- | --- |

View File

@@ -5,7 +5,6 @@ from llama_index.callbacks.schema import CBEventType, EventPayload
from unittest.mock import MagicMock
# Mock the calculate_prompt_cost and calculate_completion_cost functions
# and the USD_PER_TPU constant
STRING = "Hello, world!"
@@ -14,7 +13,6 @@ STRING = "Hello, world!"
def mock_tokencost(monkeypatch):
monkeypatch.setattr('tokencost.calculate_prompt_cost', MagicMock(return_value=100))
monkeypatch.setattr('tokencost.calculate_completion_cost', MagicMock(return_value=200))
monkeypatch.setattr('tokencost.USD_PER_TPU', 10)
# Mock the ChatMessage class

View File

@@ -4,4 +4,4 @@ from .costs import (
calculate_completion_cost,
calculate_prompt_cost,
)
from .constants import TOKEN_COSTS, USD_PER_TPU
from .constants import TOKEN_COSTS

View File

@@ -1,7 +1,7 @@
from typing import Any, Dict, List, Optional, cast
from llama_index.callbacks.base_handler import BaseCallbackHandler
from llama_index.callbacks.schema import CBEventType, EventPayload
from tokencost import calculate_prompt_cost, calculate_completion_cost, USD_PER_TPU
from tokencost import calculate_prompt_cost, calculate_completion_cost
class TokenCostHandler(BaseCallbackHandler):
@@ -29,15 +29,15 @@ class TokenCostHandler(BaseCallbackHandler):
if EventPayload.PROMPT in payload:
prompt = str(payload.get(EventPayload.PROMPT))
completion = str(payload.get(EventPayload.COMPLETION))
prompt_cost = calculate_prompt_cost(prompt, self.model) / USD_PER_TPU
completion_cost = calculate_completion_cost(completion, self.model) / USD_PER_TPU
prompt_cost = calculate_prompt_cost(prompt, self.model)
completion_cost = calculate_completion_cost(completion, self.model)
elif EventPayload.MESSAGES in payload:
messages = cast(List[ChatMessage], payload.get(EventPayload.MESSAGES, []))
messages_str = "\n".join([str(x) for x in messages])
prompt_cost = calculate_prompt_cost(messages_str, self.model) / USD_PER_TPU
prompt_cost = calculate_prompt_cost(messages_str, self.model)
response = str(payload.get(EventPayload.RESPONSE))
completion_cost = calculate_completion_cost(response, self.model) / USD_PER_TPU
completion_cost = calculate_completion_cost(response, self.model)
print(f"# Prompt cost: {prompt_cost}")
print(f"# Completion: {completion_cost}")

View File

@@ -11,15 +11,11 @@ https://platform.openai.com/tokenizer
Note: When asking follow-up questions, everything above and including your follow-up question
is considered a prompt (for the purpose of context) and will thus cost prompt tokens.
1 Token Price Unit (TPU) is defined as 1/100,000,000 of $1 (USD). 1,000,000 TPUs would equate to $0.01.
"""
USD_PER_TPU = 100_000_000
# How to read TOKEN_COSTS:
# Each prompt token costs __ TPUs per token.
# Each completion token costs __ TPUs per token.
# Each prompt token costs __ USD per token.
# Each completion token costs __ USD per token.
# Max prompt limit of each model is __ tokens.
with open(os.path.join(os.path.dirname(__file__), "model_prices.yaml"), "r") as f:

View File

@@ -4,6 +4,7 @@ Costs dictionary and utility tool for counting tokens
import tiktoken
from typing import Union, List, Dict
from .constants import TOKEN_COSTS
from decimal import Decimal
# TODO: Add Claude support
@@ -90,17 +91,16 @@ def count_string_tokens(prompt: str, model: str) -> int:
return len(encoding.encode(prompt))
def calculate_prompt_cost(prompt: Union[List[dict], str], model: str) -> int:
def calculate_prompt_cost(prompt: Union[List[dict], str], model: str) -> Decimal:
"""
Calculate the prompt's cost in token price units (TPU). 1 TPU = $1/10,000,000.
e.g. 100,000 TPUs = $0.01.
Calculate the prompt's cost in USD.
Args:
prompt (Union[List[dict], str]): List of message objects or single string prompt.
model (str): The model name.
Returns:
int: The calculated cost in TPUs.
Decimal: The calculated cost in USD.
e.g.:
>>> prompt = [{ "role": "user", "content": "Hello world"},
@@ -110,7 +110,7 @@ def calculate_prompt_cost(prompt: Union[List[dict], str], model: str) -> int:
# or
>>> prompt = "Hello world"
>>> calculate_prompt_cost(prompt, "gpt-3.5-turbo")
30
TODO:
"""
model = model.lower()
if model not in TOKEN_COSTS:
@@ -131,25 +131,24 @@ def calculate_prompt_cost(prompt: Union[List[dict], str], model: str) -> int:
)
prompt_cost = TOKEN_COSTS[model]["prompt"]
return prompt_cost * prompt_tokens
return Decimal(prompt_cost) * Decimal(prompt_tokens)
def calculate_completion_cost(completion: str, model: str) -> int:
def calculate_completion_cost(completion: str, model: str) -> Decimal:
"""
Calculate the prompt's cost in token price units (TPU). 1 TPU = $1/10,000,000.
e.g. 100,000 TPUs = $0.01.
Calculate the prompt's cost in USD.
Args:
completion (str): Completion string.
model (str): The model name.
Returns:
int: The calculated cost in TPUs.
Decimal: The calculated cost in USD.
e.g.:
>>> completion = "How may I assist you today?"
>>> calculate_completion_cost(completion, "gpt-3.5-turbo")
140
TODO:
"""
if model not in TOKEN_COSTS:
raise KeyError(
@@ -159,4 +158,4 @@ def calculate_completion_cost(completion: str, model: str) -> int:
completion_tokens = count_string_tokens(completion, model)
completion_cost = TOKEN_COSTS[model]["completion"]
return completion_cost * completion_tokens
return Decimal(completion_cost) * Decimal(completion_tokens)