diff --git a/tokenize.ipynb b/tokenize.ipynb index 64d2500..8db2011 100644 --- a/tokenize.ipynb +++ b/tokenize.ipynb @@ -2,7 +2,7 @@ "cells": [ { "cell_type": "code", - "execution_count": 12, + "execution_count": null, "metadata": { "tags": [ "parameters" @@ -22,14 +22,14 @@ "# For use with Hugging Face models, specify user/model name from the Hugging Face model hub\n", "# For use with OpenAI models, specify the model name from the OpenAI API\n", "# See https://huggingface.co/models for Hugging Face models\n", - "model_name = 'mixedbread-ai/mxbai-embed-large-v1' # same tokenizer as used in embeddings\n", + "model_name = 'microsoft/Phi-3.5-mini-instruct'\n", "# See https://github.com/openai/tiktoken/blob/63527649963def8c759b0f91f2eb69a40934e468/tiktoken/model.py#L22-L72 for OpenAI models\n", "# model_name = 'gpt-4o'" ] }, { "cell_type": "code", - "execution_count": 13, + "execution_count": null, "metadata": {}, "outputs": [], "source": [ @@ -92,7 +92,7 @@ }, { "cell_type": "code", - "execution_count": 14, + "execution_count": null, "metadata": {}, "outputs": [], "source": [ @@ -103,7 +103,7 @@ }, { "cell_type": "code", - "execution_count": 15, + "execution_count": null, "metadata": {}, "outputs": [], "source": [ @@ -114,7 +114,7 @@ }, { "cell_type": "code", - "execution_count": 16, + "execution_count": null, "metadata": {}, "outputs": [], "source": [