remove TPUs

2024-06-22 04:30:40 +03:00 · 2024-01-30 19:35:32 -08:00
parent 931619272c
commit b62bc1fb6c
6 changed files with 24 additions and 36 deletions
--- a/README.md
+++ b/README.md
@@ -25,8 +25,7 @@ prompt_cost = calculate_prompt_cost(prompt, model)
 completion_cost = calculate_completion_cost(completion, model)

 print(f"{prompt_cost} + {completion_cost} = {prompt_cost + completion_cost}")
-# 135 + 140 = 275 ($0.0000275)
-# Priced in TPUs (token price units), which is 1/100,000,000th of a USD.
+# TODO:
 ```

 ## Installation
@@ -58,23 +57,19 @@ completion = chat_completion.choices[0].message.content
 prompt_cost = calculate_prompt_cost(prompt, model)
 completion_cost = calculate_completion_cost(completion, model)
 print(f"{prompt_cost} + {completion_cost} = {prompt_cost + completion_cost}")
-# 1800 + 1000 = 2800 ($0.0000280)
-
-from tokencost import USD_PER_TPU
-print(f"Cost USD: ${(prompt_cost + completion_cost)/USD_PER_TPU}")
-# $2.8e-05
+# TODO:
 ```

 **Calculating cost using string prompts instead of messages:**
 ```python
-from tokencost import calculate_prompt_cost, USD_PER_TPU
+from tokencost import calculate_prompt_cost

 prompt_string = "Hello world" 
 response = "How may I assist you today?"
 model= "gpt-3.5-turbo"

 prompt_cost = calculate_prompt_cost(prompt_string, model)
-print(f"Cost: ${prompt_cost/USD_PER_TPU}")
+print(f"Cost: ${prompt_cost}")
 # Cost: $3e-06
 ```

@@ -95,7 +90,7 @@ print(count_string_tokens(prompt="Hello world", model="gpt-3.5-turbo"))
 ```

 ## Cost table
-Units denominated in TPUs (Token Price Units = 1/10,000,000 USD). All prices can be located in `model_prices.yaml`.
+Units denominated in USD.. All prices can be located in `model_prices.json`.

 | Model Name | Prompt Cost (USD) | Completion Cost (USD) | Max Prompt Tokens |
 | --- | --- | --- | --- |
--- a/tests/test_llama_index_callbacks.py
+++ b/tests/test_llama_index_callbacks.py
@@ -5,7 +5,6 @@ from llama_index.callbacks.schema import CBEventType, EventPayload
 from unittest.mock import MagicMock

 # Mock the calculate_prompt_cost and calculate_completion_cost functions
-# and the USD_PER_TPU constant

 STRING = "Hello, world!"

@@ -14,7 +13,6 @@ STRING = "Hello, world!"
 def mock_tokencost(monkeypatch):
    monkeypatch.setattr('tokencost.calculate_prompt_cost', MagicMock(return_value=100))
    monkeypatch.setattr('tokencost.calculate_completion_cost', MagicMock(return_value=200))
-    monkeypatch.setattr('tokencost.USD_PER_TPU', 10)

 # Mock the ChatMessage class

--- a/tokencost/init.py
+++ b/tokencost/init.py
@@ -4,4 +4,4 @@ from .costs import (
    calculate_completion_cost,
    calculate_prompt_cost,
 )
-from .constants import TOKEN_COSTS, USD_PER_TPU
+from .constants import TOKEN_COSTS
--- a/tokencost/callbacks/llama_index.py
+++ b/tokencost/callbacks/llama_index.py
@@ -1,7 +1,7 @@
 from typing import Any, Dict, List, Optional, cast
 from llama_index.callbacks.base_handler import BaseCallbackHandler
 from llama_index.callbacks.schema import CBEventType, EventPayload
-from tokencost import calculate_prompt_cost, calculate_completion_cost, USD_PER_TPU
+from tokencost import calculate_prompt_cost, calculate_completion_cost


 class TokenCostHandler(BaseCallbackHandler):
@@ -29,15 +29,15 @@ class TokenCostHandler(BaseCallbackHandler):
        if EventPayload.PROMPT in payload:
            prompt = str(payload.get(EventPayload.PROMPT))
            completion = str(payload.get(EventPayload.COMPLETION))
-            prompt_cost = calculate_prompt_cost(prompt, self.model) / USD_PER_TPU
-            completion_cost = calculate_completion_cost(completion, self.model) / USD_PER_TPU
+            prompt_cost = calculate_prompt_cost(prompt, self.model)
+            completion_cost = calculate_completion_cost(completion, self.model)

        elif EventPayload.MESSAGES in payload:
            messages = cast(List[ChatMessage], payload.get(EventPayload.MESSAGES, []))
            messages_str = "\n".join([str(x) for x in messages])
-            prompt_cost = calculate_prompt_cost(messages_str, self.model) / USD_PER_TPU
+            prompt_cost = calculate_prompt_cost(messages_str, self.model)
            response = str(payload.get(EventPayload.RESPONSE))
-            completion_cost = calculate_completion_cost(response, self.model) / USD_PER_TPU
+            completion_cost = calculate_completion_cost(response, self.model)

        print(f"# Prompt cost: {prompt_cost}")
        print(f"# Completion: {completion_cost}")
--- a/tokencost/constants.py
+++ b/tokencost/constants.py
@@ -11,15 +11,11 @@ https://platform.openai.com/tokenizer

 Note: When asking follow-up questions, everything above and including your follow-up question
 is considered a prompt (for the purpose of context) and will thus cost prompt tokens.
-
-1 Token Price Unit (TPU) is defined as 1/100,000,000 of $1 (USD). 1,000,000 TPUs would equate to $0.01.
 """

-USD_PER_TPU = 100_000_000
-
 # How to read TOKEN_COSTS:
-# Each prompt token costs __ TPUs per token.
-# Each completion token costs __ TPUs per token.
+# Each prompt token costs __ USD per token.
+# Each completion token costs __ USD per token.
 # Max prompt limit of each model is __ tokens.

 with open(os.path.join(os.path.dirname(__file__), "model_prices.yaml"), "r") as f:
--- a/tokencost/costs.py
+++ b/tokencost/costs.py
@@ -4,6 +4,7 @@ Costs dictionary and utility tool for counting tokens
 import tiktoken
 from typing import Union, List, Dict
 from .constants import TOKEN_COSTS
+from decimal import Decimal


 # TODO: Add Claude support
@@ -90,17 +91,16 @@ def count_string_tokens(prompt: str, model: str) -> int:
    return len(encoding.encode(prompt))


-def calculate_prompt_cost(prompt: Union[List[dict], str], model: str) -> int:
+def calculate_prompt_cost(prompt: Union[List[dict], str], model: str) -> Decimal:
    """
-    Calculate the prompt's cost in token price units (TPU). 1 TPU = $1/10,000,000.
-    e.g. 100,000 TPUs = $0.01.
+    Calculate the prompt's cost in USD.

    Args:
        prompt (Union[List[dict], str]): List of message objects or single string prompt.
        model (str): The model name.

    Returns:
-        int: The calculated cost in TPUs.
+        Decimal: The calculated cost in USD.

    e.g.:
    >>> prompt = [{ "role": "user", "content": "Hello world"},
@@ -110,7 +110,7 @@ def calculate_prompt_cost(prompt: Union[List[dict], str], model: str) -> int:
    # or
    >>> prompt = "Hello world"
    >>> calculate_prompt_cost(prompt, "gpt-3.5-turbo")
-    30
+    TODO:
    """
    model = model.lower()
    if model not in TOKEN_COSTS:
@@ -131,25 +131,24 @@ def calculate_prompt_cost(prompt: Union[List[dict], str], model: str) -> int:
    )
    prompt_cost = TOKEN_COSTS[model]["prompt"]

-    return prompt_cost * prompt_tokens
+    return Decimal(prompt_cost) * Decimal(prompt_tokens)


-def calculate_completion_cost(completion: str, model: str) -> int:
+def calculate_completion_cost(completion: str, model: str) -> Decimal:
    """
-    Calculate the prompt's cost in token price units (TPU). 1 TPU = $1/10,000,000.
-    e.g. 100,000 TPUs = $0.01.
+    Calculate the prompt's cost in USD.

    Args:
        completion (str): Completion string.
        model (str): The model name.

    Returns:
-        int: The calculated cost in TPUs.
+        Decimal: The calculated cost in USD.

    e.g.:
    >>> completion = "How may I assist you today?"
    >>> calculate_completion_cost(completion, "gpt-3.5-turbo")
-    140
+    TODO:
    """
    if model not in TOKEN_COSTS:
        raise KeyError(
@@ -159,4 +158,4 @@ def calculate_completion_cost(completion: str, model: str) -> int:
    completion_tokens = count_string_tokens(completion, model)
    completion_cost = TOKEN_COSTS[model]["completion"]

-    return completion_cost * completion_tokens
+    return Decimal(completion_cost) * Decimal(completion_tokens)