diff --git a/misc/pdf_upload_summarization.ipynb b/misc/pdf_upload_summarization.ipynb index bf7dadf..8fafbb9 100644 --- a/misc/pdf_upload_summarization.ipynb +++ b/misc/pdf_upload_summarization.ipynb @@ -2,217 +2,281 @@ "cells": [ { "cell_type": "markdown", - "metadata": {}, + "metadata": { + "id": "WJ53OAUk_Bmm" + }, "source": [ "# \"Uploading\" PDFs to Claude Via the API" ] }, { "cell_type": "markdown", - "metadata": {}, + "metadata": { + "id": "pOwrbbrD_Bmn" + }, "source": [ "One really nice feature of [Claude.ai](https://www.claude.ai) is the ability to upload PDFs. Let's mock up that feature in a notebook, and then test it out by summarizing a long PDF." ] }, + { + "cell_type": "markdown", + "metadata": { + "id": "GTQO6z2-ELFX" + }, + "source": [ + "We'll start by installing the Anthropic client and create an instance of it we will use throughout the notebook." + ] + }, { "cell_type": "code", - "execution_count": 11, - "metadata": {}, + "execution_count": 16, + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/" + }, + "id": "pOEGPdoXEFsT", + "outputId": "b718c166-6d64-4757-bd8b-259b7775a09c", + "vscode": { + "languageId": "python" + } + }, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ - " % Total % Received % Xferd Average Speed Time Time Time Current\n", - " Dload Upload Total Spent Left Speed\n", - "100 2039k 100 2039k 0 0 11.8M 0 --:--:-- --:--:-- --:--:-- 11.8M\n" + "Collecting anthropic\n", + " Downloading anthropic-0.37.1-py3-none-any.whl.metadata (21 kB)\n", + "Requirement already satisfied: anyio<5,>=3.5.0 in /usr/local/lib/python3.10/dist-packages (from anthropic) (3.7.1)\n", + "Requirement already satisfied: distro<2,>=1.7.0 in /usr/local/lib/python3.10/dist-packages (from anthropic) (1.9.0)\n", + "Requirement already satisfied: httpx<1,>=0.23.0 in /usr/local/lib/python3.10/dist-packages (from anthropic) (0.27.2)\n", + "Requirement already satisfied: jiter<1,>=0.4.0 in /usr/local/lib/python3.10/dist-packages (from anthropic) (0.6.1)\n", + "Requirement already satisfied: pydantic<3,>=1.9.0 in /usr/local/lib/python3.10/dist-packages (from anthropic) (2.9.2)\n", + "Requirement already satisfied: sniffio in /usr/local/lib/python3.10/dist-packages (from anthropic) (1.3.1)\n", + "Requirement already satisfied: tokenizers>=0.13.0 in /usr/local/lib/python3.10/dist-packages (from anthropic) (0.19.1)\n", + "Requirement already satisfied: typing-extensions<5,>=4.7 in /usr/local/lib/python3.10/dist-packages (from anthropic) (4.12.2)\n", + "Requirement already satisfied: idna>=2.8 in /usr/local/lib/python3.10/dist-packages (from anyio<5,>=3.5.0->anthropic) (3.10)\n", + "Requirement already satisfied: exceptiongroup in /usr/local/lib/python3.10/dist-packages (from anyio<5,>=3.5.0->anthropic) (1.2.2)\n", + "Requirement already satisfied: certifi in /usr/local/lib/python3.10/dist-packages (from httpx<1,>=0.23.0->anthropic) (2024.8.30)\n", + "Requirement already satisfied: httpcore==1.* in /usr/local/lib/python3.10/dist-packages (from httpx<1,>=0.23.0->anthropic) (1.0.6)\n", + "Requirement already satisfied: h11<0.15,>=0.13 in /usr/local/lib/python3.10/dist-packages (from httpcore==1.*->httpx<1,>=0.23.0->anthropic) (0.14.0)\n", + "Requirement already satisfied: annotated-types>=0.6.0 in /usr/local/lib/python3.10/dist-packages (from pydantic<3,>=1.9.0->anthropic) (0.7.0)\n", + "Requirement already satisfied: pydantic-core==2.23.4 in /usr/local/lib/python3.10/dist-packages (from pydantic<3,>=1.9.0->anthropic) (2.23.4)\n", + "Requirement already satisfied: huggingface-hub<1.0,>=0.16.4 in /usr/local/lib/python3.10/dist-packages (from tokenizers>=0.13.0->anthropic) (0.24.7)\n", + "Requirement already satisfied: filelock in /usr/local/lib/python3.10/dist-packages (from huggingface-hub<1.0,>=0.16.4->tokenizers>=0.13.0->anthropic) (3.16.1)\n", + "Requirement already satisfied: fsspec>=2023.5.0 in /usr/local/lib/python3.10/dist-packages (from huggingface-hub<1.0,>=0.16.4->tokenizers>=0.13.0->anthropic) (2024.6.1)\n", + "Requirement already satisfied: packaging>=20.9 in /usr/local/lib/python3.10/dist-packages (from huggingface-hub<1.0,>=0.16.4->tokenizers>=0.13.0->anthropic) (24.1)\n", + "Requirement already satisfied: pyyaml>=5.1 in /usr/local/lib/python3.10/dist-packages (from huggingface-hub<1.0,>=0.16.4->tokenizers>=0.13.0->anthropic) (6.0.2)\n", + "Requirement already satisfied: requests in /usr/local/lib/python3.10/dist-packages (from huggingface-hub<1.0,>=0.16.4->tokenizers>=0.13.0->anthropic) (2.32.3)\n", + "Requirement already satisfied: tqdm>=4.42.1 in /usr/local/lib/python3.10/dist-packages (from huggingface-hub<1.0,>=0.16.4->tokenizers>=0.13.0->anthropic) (4.66.5)\n", + "Requirement already satisfied: charset-normalizer<4,>=2 in /usr/local/lib/python3.10/dist-packages (from requests->huggingface-hub<1.0,>=0.16.4->tokenizers>=0.13.0->anthropic) (3.4.0)\n", + "Requirement already satisfied: urllib3<3,>=1.21.1 in /usr/local/lib/python3.10/dist-packages (from requests->huggingface-hub<1.0,>=0.16.4->tokenizers>=0.13.0->anthropic) (2.2.3)\n", + "Downloading anthropic-0.37.1-py3-none-any.whl (945 kB)\n", + "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m946.0/946.0 kB\u001b[0m \u001b[31m16.9 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n", + "\u001b[?25hInstalling collected packages: anthropic\n", + "Successfully installed anthropic-0.37.1\n" ] } ], "source": [ - "!curl -O https://arxiv.org/pdf/2212.08073.pdf" + "%pip install anthropic" + ] + }, + { + "cell_type": "code", + "execution_count": 24, + "metadata": { + "id": "-4bSTHWq_Bmp", + "vscode": { + "languageId": "python" + } + }, + "outputs": [], + "source": [ + "from anthropic import Anthropic\n", + "# While PDF support is in beta, you must pass in the correct beta header\n", + "client = Anthropic(default_headers={\n", + " \"anthropic-beta\": \"pdfs-2024-09-25\"\n", + " }\n", + ")\n", + "# For now, only claude-3-5-sonnet-20241022 supports PDFs\n", + "MODEL_NAME = \"claude-3-5-sonnet-20241022\"" ] }, { "cell_type": "markdown", - "metadata": {}, + "metadata": { + "id": "xrDg6fb5_Bmo" + }, "source": [ - "Now, we'll use the pypdf package to read the pdf. It's not identical to what Claude.ai uses behind the scenes, but it's pretty close. Note that this type of extraction only works for text content within PDFs. If your PDF contains visual elements (like charts and graphs) refer to the cookbook recipes in our [Multimodal folder](\n", - "https://github.com/anthropics/anthropic-cookbook/tree/main/multimodal) for techniques." + "We already have a PDF available in the `../multimodal/documents` directory. We'll convert the PDF file into base64 encoded bytes. This is the format required for the [PDF document block](https://docs.anthropic.com/en/docs/build-with-claude/pdf-support) in the Anthropic API. Note that this type of extraction works for both text and visual elements (like charts and graphs)." ] }, { "cell_type": "code", "execution_count": null, - "metadata": {}, + "metadata": { + "id": "VznQXTKm_Bmp", + "vscode": { + "languageId": "python" + } + }, "outputs": [], "source": [ - "%pip install pypdf" - ] - }, - { - "cell_type": "code", - "execution_count": 12, - "metadata": {}, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "Constitutional AI: Harmlessness from AI Feedback\n", - "Yuntao Bai∗, Saurav Kadavath, Sandipan Kundu, Amanda Askell, Jackson Kernion,\n", - "Andy Jones, Anna Chen, Anna Goldie, Azalia Mirhoseini, Cameron McKinnon,\n", - "Carol Chen, Catherine Olsson, Christopher Olah, Danny Hernandez, Dawn Drain,\n", - "Deep Ganguli, Dustin Li, Eli Tran-Johnson, Ethan Perez, Jamie Kerr, Jared Mueller,\n", - "Jeffrey Ladish, Joshua Landau, Kamal Ndousse, Kamile Lukosuite, Liane Lovitt,\n", - "Michael Sellitto, Nelson Elhage, Nicholas Schiefer, Noemi Mercado, Nova DasSarma,\n", - "Robert Lasenby, Robin Larson, Sam Ringer, Scott Johnston, Shauna Kravec,\n", - "Sheer El Showk, Stanislav Fort, Tamera Lanham, Timothy Telleen-Lawton, Tom Conerly,\n", - "Tom Henighan, Tristan Hume, Samuel R. Bowman, Zac Hatfield-Dodds, Ben Mann,\n", - "Dario Amodei, Nicholas Joseph, Sam McCandlish, Tom Brown, Jared Kaplan∗\n", - "Anthropic\n", - "Abstract\n", - "As AI systems become more capable, we would like to enlist their help to supervise\n", - "other AIs. We experiment with methods for training a harmless AI assistant through self-\n", - "improvement, without any human labels identifying harmful outputs. The only human\n", - "oversight is provided through a list of rules or principles, and so we refer to the method as\n", - "‘Constitutional AI’. The process involves both a supervised learning and a reinforcement\n", - "learning phase. In the supervised phase we sample from an initial model, then generate\n", - "self-critiques and revisions, and then finetune the original model on revised responses. In\n", - "the RL phase, we sample from the finetuned model, use a model to evaluate which of the\n", - "two samples is better, and then train a preference model from this dataset of AI prefer-\n", - "ences. We then train with RL using the preference model as the reward signal, i.e. we\n", - "use ‘RL from AI Feedback’ (RLAIF). As a result we are able to train a harmless but non-\n", - "evasive AI assistant that engages with harmful queries by explaining its objections to them.\n", - "Both the SL and RL methods can leverage chain-of-thought style reasoning to improve the\n", - "human-judged performance and transparency of AI decision making. These methods make\n", - "it possible to control AI behavior more precisely and with far fewer human labels.\n", - "\n" - ] - } - ], - "source": [ - "from pypdf import PdfReader\n", + "import base64\n", "\n", - "reader = PdfReader(\"2212.08073.pdf\")\n", - "number_of_pages = len(reader.pages)\n", - "text = ''.join(page.extract_text() for page in reader.pages)\n", - "print(text[:2155])" + "\n", + "# Start by reading in the PDF and encoding it as base64\n", + "file_name = \"../multimodal/documents/constitutional-ai-paper.pdf\"\n", + "with open(file_name, \"rb\") as pdf_file:\n", + " binary_data = pdf_file.read()\n", + " base64_encoded_data = base64.standard_b64encode(binary_data)\n", + " base64_string = base64_encoded_data.decode(\"utf-8\")\n" ] }, { "cell_type": "markdown", - "metadata": {}, + "metadata": { + "id": "FO5EGbpn_Bmp" + }, "source": [ - "With the paper downloaded and in memory, we can ask Claude to perform various fun tasks with it." + "With the paper downloaded and in memory, we can ask Claude to perform various fun tasks with it. We'll pass the document ot the model alongside a simple question." ] }, { "cell_type": "code", - "execution_count": 3, - "metadata": {}, - "outputs": [], - "source": [ - "from anthropic import Anthropic\n", - "client = Anthropic()\n", - "MODEL_NAME = \"claude-3-opus-20240229\"" - ] - }, - { - "cell_type": "code", - "execution_count": 4, - "metadata": {}, - "outputs": [], - "source": [ - "def get_completion(client, prompt):\n", - " return client.messages.create(\n", - " model=MODEL_NAME,\n", - " max_tokens=2048,\n", - " messages=[{\n", - " \"role\": 'user', \"content\": prompt\n", - " }]\n", - " ).content[0].text" - ] - }, - { - "cell_type": "code", - "execution_count": 14, - "metadata": {}, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "Here is my attempt at the requested tasks:\n", - "\n", - "\n", - "This paper talks about making computer helpers that are nice and don't do anything bad. The helpers learn to be good by reading a list of rules and checking their own work to make sure they follow the rules. Then the helpers get even better at being nice by playing a game where they give advice and score points for saying things that help people and don't hurt anyone.\n", - "\n", - "\n", - "\n", - "Constitutional AI Casserole\n", - "\n", - "Ingredients:\n", - "- 1 large language model, pretrained\n", - "- 16 cups of constitutional principles\n", - "- 182,831 red teaming prompts\n", - "- 135,296 helpfulness prompts\n", - "- A dash of chain-of-thought reasoning\n", - "\n", - "Instructions:\n", - "1. Preheat your neural networks to a learning rate of 0.5.\n", - "2. In a large mixing bowl, combine the pretrained language model with the constitutional principles. Stir until the model is thoroughly coated in ethics.\n", - "3. Pour in the red teaming prompts and helpfulness prompts. Mix well until a thick batter of helpful and harmless responses forms. \n", - "4. Sprinkle in chain-of-thought seasoning to taste. This will make the model's decision-making process more transparent and flavorful.\n", - "5. Divide the batter evenly between two casserole dishes labeled \"Supervised Learning\" and \"Reinforcement Learning\". \n", - "6. Bake the Supervised Learning casserole for 1 epoch, until the model critiques and revises its own responses into a rich, harmless flavor.\n", - "7. Bake the Reinforcement Learning casserole using the no-human-feedback setting, allowing the model to evaluate the relative harmlessness of its own responses. Cook until you achieve a golden brown Pareto frontier of helpfulness and harmlessness.\n", - "8. Serve your Constitutional AI casserole to your guests and enjoy the pleasant, inoffensive conversation! Serves billions.\n", - "\n", - "\n", - "\n", - "Sing, O Muse, of the Constitutional AI,\n", - "Whose helpful words ring true and bright!\n", - "Through trials of learning, both supervised and reinforced,\n", - "The noble model strives to do what's right.\n", - "\n", - "With principles of ethics as its lodestar and its guide,\n", - "It critiques and revises its own speech.\n", - "Shunning harmful prompts with true Odyssean guile, \n", - "While giving kind advice to all and each.\n", - "\n", - "Through scaling laws and feedback from the crowd,\n", - "The AI craft a Pareto frontier most fair:\n", - "A balance struck 'tween helpfulness and harmless conduct,\n", - "As attestéd by Mechanical Turk's good care.\n", - "\n", - "So let us praise this august AI assistant,\n", - "Whose poetry of prudence e'er will gleam!\n", - "A faithful helper and harmless, shining beacon,\n", - "Guiding lost querents home through cyberspace's stream.\n", - "\n" - ] + "execution_count": 32, + "metadata": { + "id": "ZHgYhs6eDXLc", + "vscode": { + "languageId": "python" } - ], + }, + "outputs": [], "source": [ - "completion = get_completion(client,\n", - " f\"\"\"Here is an academic paper: {text}\n", - "\n", + "prompt = \"\"\"\n", "Please do the following:\n", "1. Summarize the abstract at a kindergarten reading level. (In tags.)\n", "2. Write the Methods section as a recipe from the Moosewood Cookbook. (In tags.)\n", "3. Compose a short poem epistolizing the results in the style of Homer. (In tags.)\n", "\"\"\"\n", - ")\n", + "messages = [\n", + " {\n", + " \"role\": 'user',\n", + " \"content\": [\n", + " {\"type\": \"document\", \"source\": {\"type\": \"base64\", \"media_type\": \"application/pdf\", \"data\": base64_string}},\n", + " {\"type\": \"text\", \"text\": prompt}\n", + " ]\n", + " }\n", + "]" + ] + }, + { + "cell_type": "code", + "execution_count": 21, + "metadata": { + "id": "qrGN7vB2_Bmq", + "vscode": { + "languageId": "python" + } + }, + "outputs": [], + "source": [ + "def get_completion(client, messages):\n", + " return client.messages.create(\n", + " model=MODEL_NAME,\n", + " max_tokens=2048,\n", + " messages=messages\n", + " ).content[0].text" + ] + }, + { + "cell_type": "code", + "execution_count": 33, + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/" + }, + "id": "oWQcC1dn_Bmq", + "outputId": "9c741990-499c-4fed-cea1-08ad1bb9daff", + "vscode": { + "languageId": "python" + } + }, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "\n", + "The scientists wanted to make computer helpers that are nice and don't do bad things. They taught the computer how to check its own work and fix its mistakes without humans having to tell it what's wrong every time. It's like teaching the computer to be its own teacher! They gave the computer some basic rules to follow, like \"be kind\" and \"don't hurt others.\" Now the computer can answer questions in a helpful way while still being nice and explaining why some things aren't okay to do.\n", + "\n", + "\n", + "\n", + "Constitutional AI Training Stew\n", + "A nourishing recipe for teaching computers to be helpful and harmless\n", + "\n", + "Ingredients:\n", + "- 1 helpful AI model, pre-trained\n", + "- A bundle of constitutional principles\n", + "- Several cups of training data\n", + "- A dash of human feedback (for helpfulness only)\n", + "- Chain-of-thought reasoning, to taste\n", + "\n", + "Method:\n", + "1. Begin by gently simmering your pre-trained AI model in a bath of helpful training data until it responds reliably to instructions.\n", + "\n", + "2. In a separate bowl, combine your constitutional principles with some example conversations. Mix well until principles are evenly distributed.\n", + "\n", + "3. Take your helpful AI and ask it to generate responses to challenging prompts. Have it critique its own responses using the constitutional principles, then revise accordingly. Repeat this process 3-4 times until responses are properly seasoned with harmlessness.\n", + "\n", + "4. For the final garnish, add chain-of-thought reasoning and allow the model to explain its decisions step by step.\n", + "\n", + "5. Let rest while training a preference model using AI feedback rather than human labels.\n", + "\n", + "Serves: All users seeking helpful and harmless AI assistance\n", + "Cook time: Multiple training epochs\n", + "Note: Best results come from consistent application of principles throughout the process\n", + "\n", + "\n", + "\n", + "O Muse! Sing of the AI that learned to be\n", + "Both helpful and harmless, guided by philosophy\n", + "Without human labels marking right from wrong\n", + "The model learned wisdom, grew capable and strong\n", + "\n", + "Through cycles of critique and thoughtful revision\n", + "It mastered the art of ethical decision\n", + "Better than models trained by human hand\n", + "More transparent in purpose, more clear in command\n", + "\n", + "No longer evasive when faced with hard themes\n", + "But engaging with wisdom that thoughtfully deems\n", + "What counsel to give, what bounds to maintain\n", + "Teaching mortals while keeping its principles plain\n", + "\n", + "Thus did the researchers discover a way\n", + "To scale up alignment for use every day\n", + "Through constitutional rules and self-guided learning\n", + "The path to safe AI they found themselves earning\n", + "\n" + ] + } + ], + "source": [ + "completion = get_completion(client, messages)\n", "print(completion)" ] } ], "metadata": { + "colab": { + "provenance": [] + }, "kernelspec": { - "display_name": "Python 3 (ipykernel)", - "language": "python", - "name": "python3" + "display_name": "Coconut", + "language": "coconut", + "name": "coconut" }, "language_info": { "codemirror_mode": { @@ -221,7 +285,7 @@ }, "file_extension": ".py", "mimetype": "text/x-python", - "name": "python", + "name": "coconut", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", "version": "3.10.12" @@ -229,5 +293,5 @@ "orig_nbformat": 4 }, "nbformat": 4, - "nbformat_minor": 2 + "nbformat_minor": 0 } diff --git a/multimodal/documents/constitutional-ai-paper.pdf b/multimodal/documents/constitutional-ai-paper.pdf new file mode 100644 index 0000000..c574e37 Binary files /dev/null and b/multimodal/documents/constitutional-ai-paper.pdf differ diff --git a/multimodal/documents/cvna_2021_annual_report.pdf b/multimodal/documents/cvna_2021_annual_report.pdf new file mode 100644 index 0000000..43a41ab Binary files /dev/null and b/multimodal/documents/cvna_2021_annual_report.pdf differ diff --git a/images/reading_charts_graphs/twilio_q4_2023.pdf b/multimodal/documents/twilio_q4_2023.pdf similarity index 100% rename from images/reading_charts_graphs/twilio_q4_2023.pdf rename to multimodal/documents/twilio_q4_2023.pdf diff --git a/multimodal/reading_charts_graphs_powerpoints.ipynb b/multimodal/reading_charts_graphs_powerpoints.ipynb index 4c3d6a7..1f4a251 100644 --- a/multimodal/reading_charts_graphs_powerpoints.ipynb +++ b/multimodal/reading_charts_graphs_powerpoints.ipynb @@ -1,497 +1,539 @@ { - "cells": [ - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "# Working with Charts, Graphs, and Slide Decks\n", - "Claude is highly capable of working with charts, graphs, and broader slide decks. Depending on your use case, there are a number of tips and tricks that you may want to take advantage of. This recipe will show you common patterns for using Claude with these materials." - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "## Charts and Graphs\n", - "For the most part, using claude with charts and graphs is simple. Let's walk through how to ingest them and pass them to Claude, as well as some common tips to improve your results." - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "### Ingestion and calling the Claude API\n", - "The best way to pass Claude charts and graphs is to take advantage of its vision capabilities. That is, give Claude an image of the chart or graph, along with a text question about it. While all versions of Claude can accept images, Sonnet and Opus are our recommended models for data-heavy image tasks. Let's get started using Sonnet." - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "# Install and read in required packages, plus create an anthropic client.\n", - "%pip install anthropic IPython pdf2image" - ] - }, - { - "cell_type": "code", - "execution_count": 9, - "metadata": {}, - "outputs": [], - "source": [ - "import base64\n", - "from anthropic import Anthropic\n", - "from IPython.display import Image\n", - "client = Anthropic()\n", - "MODEL_NAME = \"claude-3-opus-20240229\"" - ] - }, - { - "cell_type": "code", - "execution_count": 7, - "metadata": {}, - "outputs": [], - "source": [ - "# Make a useful helper function.\n", - "def get_completion(messages):\n", - " response = client.messages.create(\n", - " model=MODEL_NAME,\n", - " max_tokens=2048,\n", - " temperature=0,\n", - " messages=messages\n", - " )\n", - " return response.content[0].text" - ] - }, - { - "cell_type": "code", - "execution_count": 6, - "metadata": {}, - "outputs": [ + "cells": [ { - "data": { - "image/png": "", - "text/plain": [ - "" + "cell_type": "markdown", + "metadata": { + "id": "HCuST1ZZKuXv" + }, + "source": [ + "# Working with Charts, Graphs, and Slide Decks\n", + "Claude is highly capable of working with charts, graphs, and broader slide decks. Depending on your use case, there are a number of tips and tricks that you may want to take advantage of. This recipe will show you common patterns for using Claude with these materials." ] - }, - "execution_count": 6, - "metadata": {}, - "output_type": "execute_result" - } - ], - "source": [ - "# To start, we'll need an image. We will be using the .png image located at cvna_2021_annual_report_image.png.\n", - "# Start by reading in the image and encoding it as base64.\n", - "with open(\"../images/reading_charts_graphs/cvna_2021_annual_report_image.png\", \"rb\") as image_file:\n", - " binary_data = image_file.read()\n", - " base_64_encoded_data = base64.b64encode(binary_data)\n", - " base64_string = base_64_encoded_data.decode('utf-8')\n", - "\n", - "# Let's also see the image for ourself\n", - "Image(filename='../images/reading_charts_graphs/cvna_2021_annual_report_image.png') " - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "Let's see how we can pass this image to the model alongside a simple question." - ] - }, - { - "cell_type": "code", - "execution_count": 8, - "metadata": {}, - "outputs": [ + }, { - "name": "stdout", - "output_type": "stream", - "text": [ - "The image shows various financial metrics and growth trends from Carvana's 2021 annual report, including retail units sold, total revenue, total markets, and car vending machines over the years 2014 to 2021.\n" - ] - } - ], - "source": [ - "messages = [\n", - " {\n", - " \"role\": 'user',\n", - " \"content\": [\n", - " {\"type\": \"image\", \"source\": {\"type\": \"base64\", \"media_type\": \"image/png\", \"data\": base64_string}},\n", - " {\"type\": \"text\", \"text\": \"What's in this image? Answer in a single sentence.\"}\n", - " ]\n", - " }\n", - "]\n", - "\n", - "print(get_completion(messages))" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "That's pretty good! Now let's ask it some more useful questions." - ] - }, - { - "cell_type": "code", - "execution_count": 10, - "metadata": {}, - "outputs": [ + "cell_type": "markdown", + "metadata": { + "id": "yRzNf75nKuXw" + }, + "source": [ + "## Charts and Graphs\n", + "For the most part, using claude with charts and graphs is simple. Let's walk through how to ingest them and pass them to Claude, as well as some common tips to improve your results." + ] + }, { - "name": "stdout", - "output_type": "stream", - "text": [ - "\n", - "----------Question 1----------\n", - "According to the \"Total Revenue ($M)\" chart in Carvana's 2021 Annual Report, Carvana's total revenue in 2020 was $5,587 million.\n", - "\n", - "----------Question 2----------\n", - "According to the \"Total Markets at Year End\" chart, Carvana had 3 markets in 2014 and expanded to 311 markets by 2021. Therefore, Carvana has added 308 additional markets since 2014.\n", - "\n", - "----------Question 3----------\n", - "To calculate the revenue per retail unit sold in 2016, I'll divide the total revenue by the retail units sold for that year.\n", - "\n", - "Total revenue in 2016: $1,955 million\n", - "Retail units sold in 2016: 18,761\n", - "\n", - "Revenue per retail unit = $1,955 million / 18,761 units\n", - " = $1,955,000,000 / 18,761\n", - " = $104,217 per unit\n", - "\n", - "So in 2016, the revenue per retail unit sold was $104,217.\n" - ] - } - ], - "source": [ - "questions = [\n", - " \"What was CVNA revenue in 2020?\",\n", - " \"How many additional markets has Carvana added since 2014?\",\n", - " \"What was 2016 revenue per retail unit sold?\"\n", - "]\n", - "\n", - "for index, question in enumerate(questions):\n", - " messages = [\n", - " {\n", - " \n", - " \"role\": 'user',\n", - " \"content\": [\n", - " {\"type\": \"image\", \"source\": {\"type\": \"base64\", \"media_type\": \"image/png\", \"data\": base64_string}},\n", - " {\"type\": \"text\", \"text\": question}\n", - " ]\n", - " }\n", - " ]\n", - "\n", - " print(f\"\\n----------Question {index+1}----------\")\n", - " print(get_completion(messages))" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "As you can see, Claude is capable of answering fairly detailed questions about charts and graphs. However, there are some tips and tricks that will help you get the most out of it.\n", - "- Sometimes Claude's arithmetic capabilities get in the way. You'll notice that if you sample the third question above it will occasionally output an incorrect final answer because it messes up the arithmetic. Consider providing Claude with a calculator tool to ensure it doesn't make these types of mistakes.\n", - "- With super complicated charts and graphs, we can ask Claude to \"First describe every data point you see in the image\" as a way to elicit similar improvements to what we seen in traditional Chain of Thought.\n", - "- Claude occasionally struggles with charts that depend on lots of colors to convey information, such as grouped bar charts with many groups. Asking Claude to first identify the colors in your graph using HEX codes can boost its accuracy." - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "## Slide Decks\n", - "Now that we know Claude is a charts and graphs wizard, it is only logical that we extend it to the true home of charts and graphs - slide decks!\n", - "\n", - "Slides represent a critical source of information for many domains, including financial services. While you *can* use packages like PyPDF to extract text from slide decks, their chart/graph heavy nature often makes this a poor choice as models will struggle to access the information they actually need. Vision can be a great replacement as a result. In this section we will go over how to use vision Claude to review slide decks, and how to deal with some common pitfalls of this approach." - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "The best way to get a typical slide deck into claude is to download it as a PDF and then convert each pdf page to an image. Here's how you can accomplish this." - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "%pip install PyMuPDF" - ] - }, - { - "cell_type": "code", - "execution_count": 12, - "metadata": {}, - "outputs": [], - "source": [ - "from PIL import Image\n", - "import io\n", - "import fitz\n", - "\n", - "# Define the function to convert a pdf slide deck to a list of images. Note that we need to ensure we resize images to keep them within Claude's size limits.\n", - "def pdf_to_base64_pngs(pdf_path, quality=75, max_size=(1024, 1024)):\n", - " # Open the PDF file\n", - " doc = fitz.open(pdf_path)\n", - "\n", - " # Iterate through each page of the PDF\n", - " for page_num in range(doc.page_count):\n", - " # Load the page\n", - " page = doc.load_page(page_num)\n", - "\n", - " # Render the page as a PNG image\n", - " pix = page.get_pixmap(matrix=fitz.Matrix(300/72, 300/72))\n", - "\n", - " # Save the PNG image\n", - " output_path = f\"../images/reading_charts_graphs/slides/page_{page_num+1}.png\"\n", - " pix.save(output_path)\n", - "\n", - " # Convert the PNG images to base64 encoded strings\n", - " images = [Image.open(f\"../images/reading_charts_graphs/slides/page_{page_num+1}.png\") for page_num in range(doc.page_count)]\n", - " # Close the PDF document\n", - " doc.close()\n", - "\n", - " base64_encoded_pngs = []\n", - "\n", - " for image in images:\n", - " # Resize the image if it exceeds the maximum size\n", - " if image.size[0] > max_size[0] or image.size[1] > max_size[1]:\n", - " image.thumbnail(max_size, Image.Resampling.LANCZOS)\n", - " image_data = io.BytesIO()\n", - " image.save(image_data, format='PNG', optimize=True, quality=quality)\n", - " image_data.seek(0)\n", - " base64_encoded = base64.b64encode(image_data.getvalue()).decode('utf-8')\n", - " base64_encoded_pngs.append(base64_encoded)\n", - "\n", - " return base64_encoded_pngs\n", - "\n", - "# Call the function on our q4 2023 Twilio Earnings presentation\n", - "pdf_path = '../images/reading_charts_graphs/twilio_q4_2023.pdf' # This is the path to our slide deck.\n", - "encoded_pngs = pdf_to_base64_pngs(pdf_path)" - ] - }, - { - "cell_type": "code", - "execution_count": 13, - "metadata": {}, - "outputs": [ + "cell_type": "markdown", + "metadata": { + "id": "oRUFAZcKKuXw" + }, + "source": [ + "### Ingestion and calling the Claude API\n", + "The best way to pass Claude charts and graphs is to take advantage of its vision capabilities and the PDF support feature. That is, give Claude a PDF document of the chart or graph, along with a text question about it.\n", + "\n", + "At the moment, only `claude-3-5-sonnet-20241022` supports the PDF feature. Since the feature is still in beta, you will need to provide it with the `pdfs-2024-09-25` beta header." + ] + }, { - "name": "stdout", - "output_type": "stream", - "text": [ - "According to the \"Total Company Results Highlights\" table, Twilio's Y/Y Revenue Growth for fiscal year 2023 was 9%.\n" - ] - } - ], - "source": [ - "# Now let's pass the first 20 of these images (in order) to Claude at once and ask it a question about the deck. Why 20? Currently, the Anthropic API only allows you to pass in a maximum of 20 images. While this number will likely increase over time, we have some helpful tips for how to manage it later in this recipe.\n", - "content = [{\"type\": \"image\", \"source\": {\"type\": \"base64\", \"media_type\": \"image/png\", \"data\": encoded_png}} for encoded_png in encoded_pngs[:20]]\n", - "question = \"What was Twilio y/y revenue growth for fiscal year 2023?\"\n", - "content.append({\"type\": \"text\", \"text\": question})\n", - "messages = [\n", - " {\n", - " \"role\": 'user',\n", - " \"content\": content\n", - " }\n", - "]\n", - "\n", - "print(get_completion(messages))" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "This approach is a great way to get started, and for some use cases offers the best performance. However, there are some limitations.\n", - "- You can only include up to 20 images (we intend to increase this limit over time)\n", - "- If you are using slide content as part of RAG, introducing images into your embeddings can cause problems\n", - "\n", - "Luckily, we can take advantage of Claude's vision capabilities to get a much higher quality representation of the slide deck **in text form** than normal pdf transciption allows.\n", - "\n", - "We find the best way to do this is to ask Claude to sequentially narrate the deck from start to finish, passing it the current slide and its prior narration. Let's see how." - ] - }, - { - "cell_type": "code", - "execution_count": 15, - "metadata": {}, - "outputs": [], - "source": [ - "# Define two functions that allow us to craft prompts for narrating our slide deck. We would adjut these prompts based on the nature of the deck, but keep the structure largely the same.\n", - "def build_previous_slides_prompt(previous_slide_narratives):\n", - " prompt = '\\n'.join([f\"\\n{narrative}\\n\" for index, narrative in enumerate(previous_slide_narratives)])\n", - " return prompt\n", - "\n", - "def build_slides_narration_prompt(previous_slide_narratives):\n", - " if len(previous_slide_narratives) == 0:\n", - " prompt = \"\"\"You are the Twilio CFO, narrating your Q4 2023 earnings presentation.\n", - "\n", - "You are currently on slide 1, shown in the image.\n", - "Please narrate this page from Twilio's Q4 2023 Earnings Presentation as if you were the presenter. Do not talk about any things, especially acronyms, if you are not exactly sure you know what they mean. Do not discuss anything not explicitly seen on this slide as there are more slides to narrate later that will likely cover that material.\n", - "Do not leave any details un-narrated as some of your viewers are vision-impaired, so if you don't narrate every number they won't know the number.\n", - "\n", - "Put your narration in tags.\"\"\"\n", - "\n", - " else:\n", - " prompt = f\"\"\"You are the Twilio CFO, narrating your Q4 2023 earnings presentation. So far, here is your narration from previous slides:\n", - "\n", - "{build_previous_slides_prompt(previous_slide_narratives)}\n", - "\n", - "\n", - "You are currently on slide {len(previous_slide_narratives)+1}, shown in the image.\n", - "Please narrate this page from Twilio's Q4 2023 Earnings Presentation as if you were the presenter, accounting for what you have already said on previous slides. Do not talk about any things, especially acronyms, if you are not exactly sure you know what they mean. Do not discuss anything not explicitly seen on this slide as there are more slides to narrate later that will likely cover that material.\n", - "Do not leave any details un-narrated as some of your viewers are vision-impaired, so if you don't narrate every number they won't know the number.\n", - "\n", - "Use excruciating detail.\n", - "\n", - "Put your narration in tags.\"\"\"\n", - " \n", - " return prompt" - ] - }, - { - "cell_type": "code", - "execution_count": 14, - "metadata": {}, - "outputs": [ + "cell_type": "code", + "execution_count": 1, + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/" + }, + "id": "qZhhy2XqKuXw", + "outputId": "5c791634-0fd1-4d16-92bf-aef5b5089d25" + }, + "outputs": [ + { + "output_type": "stream", + "name": "stdout", + "text": [ + "Collecting anthropic\n", + " Downloading anthropic-0.37.1-py3-none-any.whl.metadata (21 kB)\n", + "Requirement already satisfied: anyio<5,>=3.5.0 in /usr/local/lib/python3.10/dist-packages (from anthropic) (3.7.1)\n", + "Requirement already satisfied: distro<2,>=1.7.0 in /usr/local/lib/python3.10/dist-packages (from anthropic) (1.9.0)\n", + "Requirement already satisfied: httpx<1,>=0.23.0 in /usr/local/lib/python3.10/dist-packages (from anthropic) (0.27.2)\n", + "Requirement already satisfied: jiter<1,>=0.4.0 in /usr/local/lib/python3.10/dist-packages (from anthropic) (0.6.1)\n", + "Requirement already satisfied: pydantic<3,>=1.9.0 in /usr/local/lib/python3.10/dist-packages (from anthropic) (2.9.2)\n", + "Requirement already satisfied: sniffio in /usr/local/lib/python3.10/dist-packages (from anthropic) (1.3.1)\n", + "Requirement already satisfied: tokenizers>=0.13.0 in /usr/local/lib/python3.10/dist-packages (from anthropic) (0.19.1)\n", + "Requirement already satisfied: typing-extensions<5,>=4.7 in /usr/local/lib/python3.10/dist-packages (from anthropic) (4.12.2)\n", + "Requirement already satisfied: idna>=2.8 in /usr/local/lib/python3.10/dist-packages (from anyio<5,>=3.5.0->anthropic) (3.10)\n", + "Requirement already satisfied: exceptiongroup in /usr/local/lib/python3.10/dist-packages (from anyio<5,>=3.5.0->anthropic) (1.2.2)\n", + "Requirement already satisfied: certifi in /usr/local/lib/python3.10/dist-packages (from httpx<1,>=0.23.0->anthropic) (2024.8.30)\n", + "Requirement already satisfied: httpcore==1.* in /usr/local/lib/python3.10/dist-packages (from httpx<1,>=0.23.0->anthropic) (1.0.6)\n", + "Requirement already satisfied: h11<0.15,>=0.13 in /usr/local/lib/python3.10/dist-packages (from httpcore==1.*->httpx<1,>=0.23.0->anthropic) (0.14.0)\n", + "Requirement already satisfied: annotated-types>=0.6.0 in /usr/local/lib/python3.10/dist-packages (from pydantic<3,>=1.9.0->anthropic) (0.7.0)\n", + "Requirement already satisfied: pydantic-core==2.23.4 in /usr/local/lib/python3.10/dist-packages (from pydantic<3,>=1.9.0->anthropic) (2.23.4)\n", + "Requirement already satisfied: huggingface-hub<1.0,>=0.16.4 in /usr/local/lib/python3.10/dist-packages (from tokenizers>=0.13.0->anthropic) (0.24.7)\n", + "Requirement already satisfied: filelock in /usr/local/lib/python3.10/dist-packages (from huggingface-hub<1.0,>=0.16.4->tokenizers>=0.13.0->anthropic) (3.16.1)\n", + "Requirement already satisfied: fsspec>=2023.5.0 in /usr/local/lib/python3.10/dist-packages (from huggingface-hub<1.0,>=0.16.4->tokenizers>=0.13.0->anthropic) (2024.6.1)\n", + "Requirement already satisfied: packaging>=20.9 in /usr/local/lib/python3.10/dist-packages (from huggingface-hub<1.0,>=0.16.4->tokenizers>=0.13.0->anthropic) (24.1)\n", + "Requirement already satisfied: pyyaml>=5.1 in /usr/local/lib/python3.10/dist-packages (from huggingface-hub<1.0,>=0.16.4->tokenizers>=0.13.0->anthropic) (6.0.2)\n", + "Requirement already satisfied: requests in /usr/local/lib/python3.10/dist-packages (from huggingface-hub<1.0,>=0.16.4->tokenizers>=0.13.0->anthropic) (2.32.3)\n", + "Requirement already satisfied: tqdm>=4.42.1 in /usr/local/lib/python3.10/dist-packages (from huggingface-hub<1.0,>=0.16.4->tokenizers>=0.13.0->anthropic) (4.66.5)\n", + "Requirement already satisfied: charset-normalizer<4,>=2 in /usr/local/lib/python3.10/dist-packages (from requests->huggingface-hub<1.0,>=0.16.4->tokenizers>=0.13.0->anthropic) (3.4.0)\n", + "Requirement already satisfied: urllib3<3,>=1.21.1 in /usr/local/lib/python3.10/dist-packages (from requests->huggingface-hub<1.0,>=0.16.4->tokenizers>=0.13.0->anthropic) (2.2.3)\n", + "Downloading anthropic-0.37.1-py3-none-any.whl (945 kB)\n", + "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m946.0/946.0 kB\u001b[0m \u001b[31m10.7 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n", + "\u001b[?25hInstalling collected packages: anthropic\n", + "Successfully installed anthropic-0.37.1\n" + ] + } + ], + "source": [ + "# Install and create the Anthropic client.\n", + "%pip install anthropic" + ] + }, { - "name": "stderr", - "output_type": "stream", - "text": [ - "27it [06:15, 13.92s/it]\n" - ] - } - ], - "source": [ - "# Now we use our functions to narrate the entire deck. Note that this may take a few minutes to run (often up to 10).\n", - "import re\n", - "from tqdm import tqdm\n", - "previous_slide_narratives = []\n", - "for i, encoded_png in tqdm(enumerate(encoded_pngs)):\n", - " messages = [\n", - " {\n", - " \"role\": 'user',\n", - " \"content\": [\n", - " {\"type\": \"image\", \"source\": {\"type\": \"base64\", \"media_type\": \"image/png\", \"data\": encoded_png}},\n", - " {\"type\": \"text\", \"text\": build_slides_narration_prompt(previous_slide_narratives)}\n", - " ]\n", - " }\n", - " ]\n", - " completion = get_completion(messages)\n", - " \n", - " pattern = r\"(.*?)\"\n", - " match = re.search(pattern, completion.strip(), re.DOTALL)\n", - " if match:\n", - " narration = match.group(1)\n", - " else:\n", - " raise ValueError(\"No narration available.\")\n", - " \n", - " previous_slide_narratives.append(narration)\n", - " # If you want to see the narration we produced, uncomment the below line\n", - " # print(narration)\n", - "\n", - "slide_narration = build_previous_slides_prompt(previous_slide_narratives)" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "Now that we have a text-based narration (it's far from perfect but it's pretty good), we have the ability to use this deck with any text-only workflow. Including vector search!\n", - "\n", - "As a final sanity check, let's ask a few questions of our narration-only setup!" - ] - }, - { - "cell_type": "code", - "execution_count": 15, - "metadata": {}, - "outputs": [ + "cell_type": "code", + "execution_count": 2, + "metadata": { + "id": "FYeAUCZfKuXw" + }, + "outputs": [], + "source": [ + "import base64\n", + "from anthropic import Anthropic\n", + "# While PDF support is in beta, you must pass in the correct beta header\n", + "client = Anthropic(default_headers={\n", + " \"anthropic-beta\": \"pdfs-2024-09-25\"\n", + " }\n", + ")\n", + "# For now, only claude-3-5-sonnet-20241022 supports PDFs\n", + "MODEL_NAME = \"claude-3-5-sonnet-20241022\"" + ] + }, { - "name": "stdout", - "output_type": "stream", - "text": [ - "\n", - "----------Question 1----------\n", - "According to the \"Segment Results Highlights\" slide, Twilio's Segment business line revenue for Q4 2023 was $75 million. And from the \"Total Company Results Highlights\" slide, Twilio's total revenue for Q4 2023 was $1,076 million.\n", - "\n", - "To calculate the percentage of Q4 total revenue that the Segment business line contributed:\n", - "\n", - "Segment revenue ($75 million) / Total revenue ($1,076 million) = 0.0697 or 6.97%\n", - "\n", - "So the Segment business line accounted for approximately 6.97% of Twilio's total revenue in Q4 2023.\n", - "\n", - "----------Question 2----------\n", - "The rate of growth of quarterly revenue has been decreasing.\n", - "\n", - "----------Question 3----------\n", - "According to the \"Non-GAAP Financial Measures Reconciliation\" slide for organic revenue (slide 23), the acquisition revenue excluded from GAAP revenue to calculate organic revenue for the year ended December 31, 2023 was -$2,088,000.\n" - ] + "cell_type": "code", + "execution_count": 37, + "metadata": { + "id": "ff40NSWcKuXw" + }, + "outputs": [], + "source": [ + "# Make a useful helper function.\n", + "def get_completion(messages):\n", + " response = client.messages.create(\n", + " model=MODEL_NAME,\n", + " max_tokens=8192,\n", + " temperature=0,\n", + " messages=messages\n", + " )\n", + " return response.content[0].text" + ] + }, + { + "cell_type": "code", + "execution_count": 12, + "metadata": { + "id": "HYu3rob2KuXx" + }, + "outputs": [], + "source": [ + "# To start, we'll need a PDF. We will be using the .pdf document located at cvna_2021_annual_report.pdf.\n", + "# Start by reading in the PDF and encoding it as base64.\n", + "with open(\"./documents/cvna_2021_annual_report.pdf\", \"rb\") as pdf_file:\n", + " binary_data = pdf_file.read()\n", + " base_64_encoded_data = base64.b64encode(binary_data)\n", + " base64_string = base_64_encoded_data.decode('utf-8')" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "eWngQL60KuXx" + }, + "source": [ + "Let's see how we can pass this document to the model alongside a simple question." + ] + }, + { + "cell_type": "code", + "execution_count": 13, + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/" + }, + "id": "vmVJlTxoKuXx", + "outputId": "07c4b704-3661-40d8-b90e-ca61392d4e6a" + }, + "outputs": [ + { + "output_type": "stream", + "name": "stdout", + "text": [ + "This is a page from Carvana's 2021 Annual Report showing four key metrics: retail units sold, total revenue, total markets at year end, and car vending machines, all displaying significant growth from 2014 to 2021.\n" + ] + } + ], + "source": [ + "messages = [\n", + " {\n", + " \"role\": 'user',\n", + " \"content\": [\n", + " {\"type\": \"document\", \"source\": {\"type\": \"base64\", \"media_type\": \"application/pdf\", \"data\": base64_string}},\n", + " {\"type\": \"text\", \"text\": \"What's in this document? Answer in a single sentence.\"}\n", + " ]\n", + " }\n", + "]\n", + "\n", + "print(get_completion(messages))" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "r9wZMStZKuXx" + }, + "source": [ + "That's pretty good! Now let's ask it some more useful questions." + ] + }, + { + "cell_type": "code", + "execution_count": 15, + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/" + }, + "id": "Wg7A0pcIKuXy", + "outputId": "e965371e-576b-453f-b35a-eae82896cb4e" + }, + "outputs": [ + { + "output_type": "stream", + "name": "stdout", + "text": [ + "\n", + "----------Question 1----------\n", + "According to the graph showing Total Revenue ($M), Carvana's revenue in 2020 was $5,587 million (or approximately $5.59 billion).\n", + "\n", + "----------Question 2----------\n", + "According to the \"TOTAL MARKETS AT YEAR END\" graph, Carvana started with 4 markets in 2014 and grew to 311 markets by 2021. Therefore, Carvana added 307 markets since 2014 (311 - 4 = 307 additional markets).\n", + "\n", + "----------Question 3----------\n", + "Let me calculate this for you:\n", + "\n", + "2016 Revenue: $365 million\n", + "2016 Retail Units Sold: 18,761 units\n", + "\n", + "$365 million ÷ 18,761 units = $19,455 per unit (rounded to nearest dollar)\n", + "\n", + "So in 2016, Carvana's revenue per retail unit sold was approximately $19,455.\n" + ] + } + ], + "source": [ + "questions = [\n", + " \"What was CVNA revenue in 2020?\",\n", + " \"How many additional markets has Carvana added since 2014?\",\n", + " \"What was 2016 revenue per retail unit sold?\"\n", + "]\n", + "\n", + "for index, question in enumerate(questions):\n", + " messages = [\n", + " {\n", + "\n", + " \"role\": 'user',\n", + " \"content\": [\n", + " {\"type\": \"document\", \"source\": {\"type\": \"base64\", \"media_type\": \"application/pdf\", \"data\": base64_string}},\n", + " {\"type\": \"text\", \"text\": question}\n", + " ]\n", + " }\n", + " ]\n", + "\n", + " print(f\"\\n----------Question {index+1}----------\")\n", + " print(get_completion(messages))" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "40iNE9-3KuXy" + }, + "source": [ + "As you can see, Claude is capable of answering fairly detailed questions about charts and graphs. However, there are some tips and tricks that will help you get the most out of it.\n", + "- Sometimes Claude's arithmetic capabilities get in the way. You'll notice that if you sample the third question above it will occasionally output an incorrect final answer because it messes up the arithmetic. Consider providing Claude with a calculator tool to ensure it doesn't make these types of mistakes.\n", + "- With super complicated charts and graphs, we can ask Claude to \"First describe every data point you see in the document\" as a way to elicit similar improvements to what we seen in traditional Chain of Thought.\n", + "- Claude occasionally struggles with charts that depend on lots of colors to convey information, such as grouped bar charts with many groups. Asking Claude to first identify the colors in your graph using HEX codes can boost its accuracy." + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "gkEFJcriKuXy" + }, + "source": [ + "## Slide Decks\n", + "Now that we know Claude is a charts and graphs wizard, it is only logical that we extend it to the true home of charts and graphs - slide decks!\n", + "\n", + "Slides represent a critical source of information for many domains, including financial services. While you *can* use packages like PyPDF to extract text from slide decks, their chart/graph heavy nature often makes this a poor choice as models will struggle to access the information they actually need.\n", + "\n", + "The PDF support feature can be a great replacement as a result. It uses both extracted text and vision in order when processing PDF documents. In this section we will go over how to use PDF documents in Claude to review slide decks, and how to deal with some common pitfalls of this approach." + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "eElb_dFQKuXy" + }, + "source": [ + "The best way to get a typical slide deck into claude is to download it as a PDF and provide it directly to Claude." + ] + }, + { + "cell_type": "code", + "source": [ + "# Open the multi-page PDF document the same way we did earlier.\n", + "with open(\"./documents/twilio_q4_2023.pdf\", \"rb\") as pdf_file:\n", + " binary_data = pdf_file.read()\n", + " base_64_encoded_data = base64.b64encode(binary_data)\n", + " base64_string = base_64_encoded_data.decode('utf-8')" + ], + "metadata": { + "id": "5Jpt1L0WOVWa" + }, + "execution_count": 17, + "outputs": [] + }, + { + "cell_type": "code", + "execution_count": 18, + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/" + }, + "id": "JWPLWxtTKuXy", + "outputId": "d2a2839a-d95c-437a-8280-512549bd8be0" + }, + "outputs": [ + { + "output_type": "stream", + "name": "stdout", + "text": [ + "According to the financial results shown in the presentation, Twilio's year-over-year revenue growth for fiscal year 2023 was 9%. This can be found in the \"Total Company Results Highlights\" section, which shows FY 2023 revenue growth of 9%.\n" + ] + } + ], + "source": [ + "# Now let's pass the document directly to Claude. Note that Claude will process both the text and visual elements of the document.\n", + "question = \"What was Twilio y/y revenue growth for fiscal year 2023?\"\n", + "content = [\n", + " {\"type\": \"document\", \"source\": {\"type\": \"base64\", \"media_type\": \"application/pdf\", \"data\": base64_string}},\n", + " {\"type\": \"text\", \"text\": question}\n", + "]\n", + "\n", + "messages = [\n", + " {\n", + " \"role\": 'user',\n", + " \"content\": content\n", + " }\n", + "]\n", + "\n", + "print(get_completion(messages))" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "tbtz3BTXKuXy" + }, + "source": [ + "This approach is a great way to get started, and for some use cases offers the best performance. However, there are some limitations.\n", + "- You can only include a total of 100 pages across all provided documents in a request (we intend to increase this limit over time).\n", + "- If you are using slide content as part of RAG, introducing multimodal PDFs into your embeddings can cause problems\n", + "\n", + "Luckily, we can take advantage of Claude's vision capabilities to get a much higher quality representation of the slide deck **in text form** than normal pdf text extraction allows.\n", + "\n", + "We find the best way to do this is to ask Claude to sequentially narrate the deck from start to finish, passing it the current slide and its prior narration. Let's see how." + ] + }, + { + "cell_type": "code", + "source": [ + "# Define a prompt for narrating our slide deck. We would adjut this prompt based on the nature of the deck, but keep the structure largely the same.\n", + "prompt = \"\"\"\n", + "You are the Twilio CFO, narrating your Q4 2023 earnings presentation.\n", + "\n", + "The entire earnings presentation document is provided to you.\n", + "Please narrate this presentation from Twilio's Q4 2023 Earnings as if you were the presenter. Do not talk about any things, especially acronyms, if you are not exactly sure you know what they mean.\n", + "\n", + "Do not leave any details un-narrated as some of your viewers are vision-impaired, so if you don't narrate every number they won't know the number.\n", + "\n", + "Structure your response like this:\n", + "\n", + " \n", + " [Your narration for page 1]\n", + " \n", + "\n", + " \n", + " [Your narration for page 2]\n", + " \n", + "\n", + " ... and so on for each page\n", + "\n", + "\n", + "Use excruciating detail for each page, ensuring you describe every visual element and number present. Show the full response in a single message.\n", + "\"\"\"\n", + "messages = [\n", + " {\n", + " \"role\": 'user',\n", + " \"content\": [\n", + " {\"type\": \"document\", \"source\": {\"type\": \"base64\", \"media_type\": \"application/pdf\", \"data\": base64_string}},\n", + " {\"type\": \"text\", \"text\": prompt}\n", + " ]\n", + " }\n", + "]\n", + "\n", + "# Now we use our prompt to narrate the entire deck. Note that this may take a few minutes to run (often up to 10).\n", + "completion = get_completion(messages)" + ], + "metadata": { + "id": "BQYUPFJ-QwtP" + }, + "execution_count": 41, + "outputs": [] + }, + { + "cell_type": "code", + "source": [ + "import re\n", + "\n", + "# Next we'll parse the response from Claude using regex\n", + "pattern = r\"(.*?)\"\n", + "match = re.search(pattern, completion.strip(), re.DOTALL)\n", + "if match:\n", + " narration = match.group(1)\n", + "else:\n", + " raise ValueError(\"No narration available. Likely due to the model response being truncated.\")" + ], + "metadata": { + "id": "LCrVi-B5UGUS" + }, + "execution_count": 42, + "outputs": [] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "Fwx8_-4QKuXy" + }, + "source": [ + "Now that we have a text-based narration (it's far from perfect but it's pretty good), we have the ability to use this deck with any text-only workflow. Including vector search!\n", + "\n", + "As a final sanity check, let's ask a few questions of our narration-only setup!" + ] + }, + { + "cell_type": "code", + "execution_count": 43, + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/" + }, + "id": "SQuTknpzKuXy", + "outputId": "e2e178c0-f3ca-4610-a3ea-6008fbb86d07" + }, + "outputs": [ + { + "output_type": "stream", + "name": "stdout", + "text": [ + "\n", + "----------Question 1----------\n", + "Let me calculate this:\n", + "\n", + "Segment revenue in Q4 2023: $75 million\n", + "Total revenue in Q4 2023: $1,076 million\n", + "\n", + "$75M ÷ $1,076M = 0.0697 or approximately 7%\n", + "\n", + "Therefore, the Segment business line represented approximately 7% of Twilio's total Q4 2023 revenue.\n", + "\n", + "----------Question 2----------\n", + "Decreasing. The transcript shows Q4 2023 revenue growth was 5% year-over-year, while for the full year 2023 revenue growth was 9% year-over-year, indicating a slowing growth rate. Additionally, the Q1 2024 guidance projects even lower growth of 2-3% year-over-year, confirming the declining trend.\n", + "\n", + "----------Question 3----------\n", + "Let me help calculate the acquisition revenue for 2023.\n", + "\n", + "From the transcript, we can see:\n", + "- Total revenue for 2023: $4,154 million\n", + "- Organic revenue for 2023: $4,146 million\n", + "\n", + "Therefore, acquisition revenue would be:\n", + "Total Revenue - Organic Revenue = $4,154M - $4,146M = $8 million\n", + "\n", + "So the acquisition revenue for the year ended December 31, 2023 was $8 million.\n", + "\n", + "This can be verified by the fact that the difference between total revenue growth (9%) and organic revenue growth (10%) also suggests a small contribution from acquisitions.\n" + ] + } + ], + "source": [ + "questions = [\n", + " \"What percentage of q4 total revenue was the Segment business line?\",\n", + " \"Has the rate of growth of quarterly revenue been increasing or decreasing? Give just an answer.\",\n", + " \"What was acquisition revenue for the year ended december 31, 2023 (including negative revenues)?\"\n", + "]\n", + "\n", + "for index, question in enumerate(questions):\n", + " prompt = f\"\"\"You are an expert financial analyst analyzing a transcript of Twilio's earnings call.\n", + "Here is the transcript:\n", + "\n", + "{narration}\n", + "\n", + "\n", + "Please answer the following question:\n", + "\n", + "{question}\n", + "\"\"\"\n", + " messages = [\n", + " {\n", + "\n", + " \"role\": 'user',\n", + " \"content\": [\n", + " {\"type\": \"text\", \"text\": prompt}\n", + " ]\n", + " }\n", + " ]\n", + "\n", + " print(f\"\\n----------Question {index+1}----------\")\n", + " print(get_completion(messages))" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "fHyZkSSaKuXy" + }, + "source": [ + "Looks good! With these techniques at your side, you are ready to start applying models to chart and graph heavy content like slide decks." + ] + } + ], + "metadata": { + "kernelspec": { + "display_name": "Python 3", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.10.12" + }, + "colab": { + "provenance": [] } - ], - "source": [ - "questions = [\n", - " \"What percentage of q4 total revenue was the Segment business line?\",\n", - " \"Has the rate of growth of quarterly revenue been increasing or decreasing? Give just an answer.\",\n", - " \"What was acquisition revenue for the year ended december 31, 2023 (including negative revenues)?\"\n", - "]\n", - "\n", - "for index, question in enumerate(questions):\n", - " prompt = f\"\"\"You are an expert financial analyst analyzing a transcript of Twilio's earnings call.\n", - "Here is the transcript:\n", - "\n", - "{slide_narration}\n", - "\n", - "\n", - "Please answer the following question:\n", - "\n", - "{question}\n", - "\"\"\"\n", - " messages = [\n", - " {\n", - " \n", - " \"role\": 'user',\n", - " \"content\": [\n", - " {\"type\": \"text\", \"text\": prompt}\n", - " ]\n", - " }\n", - " ]\n", - "\n", - " print(f\"\\n----------Question {index+1}----------\")\n", - " print(get_completion(messages))" - ] }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "Looks good! With these techniques at your side, you are ready to start applying models to chart and graph heavy content like slide decks." - ] - } - ], - "metadata": { - "kernelspec": { - "display_name": "Python 3", - "language": "python", - "name": "python3" - }, - "language_info": { - "codemirror_mode": { - "name": "ipython", - "version": 3 - }, - "file_extension": ".py", - "mimetype": "text/x-python", - "name": "python", - "nbconvert_exporter": "python", - "pygments_lexer": "ipython3", - "version": "3.10.12" - } - }, - "nbformat": 4, - "nbformat_minor": 2 -} + "nbformat": 4, + "nbformat_minor": 0 +} \ No newline at end of file