WIP Text to SQL

This commit is contained in:
Mahesh Murag
2024-09-23 22:05:45 +02:00
parent 81ab59f042
commit c982d54567

View File

@@ -65,17 +65,9 @@
},
{
"cell_type": "code",
"execution_count": 56,
"execution_count": null,
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"Note: you may need to restart the kernel to use updated packages.\n"
]
}
],
"outputs": [],
"source": [
"%pip install -q anthropic pandas voyageai"
]
@@ -91,7 +83,6 @@
"import sqlite3\n",
"import pandas as pd\n",
"from IPython.display import display\n",
"from textwrap import dedent\n",
"\n",
"# Set your Anthropic API key\n",
"os.environ[\"ANTHROPIC_API_KEY\"] = \"YOUR_ANTHROPIC_API_KEY\"\n",
@@ -114,176 +105,9 @@
},
{
"cell_type": "code",
"execution_count": 58,
"execution_count": null,
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"\n",
"Departments table:\n"
]
},
{
"data": {
"text/html": [
"<div>\n",
"<style scoped>\n",
" .dataframe tbody tr th:only-of-type {\n",
" vertical-align: middle;\n",
" }\n",
"\n",
" .dataframe tbody tr th {\n",
" vertical-align: top;\n",
" }\n",
"\n",
" .dataframe thead th {\n",
" text-align: right;\n",
" }\n",
"</style>\n",
"<table border=\"1\" class=\"dataframe\">\n",
" <thead>\n",
" <tr style=\"text-align: right;\">\n",
" <th></th>\n",
" <th>id</th>\n",
" <th>name</th>\n",
" <th>location</th>\n",
" </tr>\n",
" </thead>\n",
" <tbody>\n",
" <tr>\n",
" <th>0</th>\n",
" <td>1</td>\n",
" <td>HR</td>\n",
" <td>New York</td>\n",
" </tr>\n",
" <tr>\n",
" <th>1</th>\n",
" <td>2</td>\n",
" <td>Engineering</td>\n",
" <td>San Francisco</td>\n",
" </tr>\n",
" <tr>\n",
" <th>2</th>\n",
" <td>3</td>\n",
" <td>Marketing</td>\n",
" <td>Chicago</td>\n",
" </tr>\n",
" </tbody>\n",
"</table>\n",
"</div>"
],
"text/plain": [
" id name location\n",
"0 1 HR New York\n",
"1 2 Engineering San Francisco\n",
"2 3 Marketing Chicago"
]
},
"metadata": {},
"output_type": "display_data"
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"\n",
"Employees table:\n"
]
},
{
"data": {
"text/html": [
"<div>\n",
"<style scoped>\n",
" .dataframe tbody tr th:only-of-type {\n",
" vertical-align: middle;\n",
" }\n",
"\n",
" .dataframe tbody tr th {\n",
" vertical-align: top;\n",
" }\n",
"\n",
" .dataframe thead th {\n",
" text-align: right;\n",
" }\n",
"</style>\n",
"<table border=\"1\" class=\"dataframe\">\n",
" <thead>\n",
" <tr style=\"text-align: right;\">\n",
" <th></th>\n",
" <th>id</th>\n",
" <th>name</th>\n",
" <th>age</th>\n",
" <th>department_id</th>\n",
" <th>salary</th>\n",
" <th>hire_date</th>\n",
" </tr>\n",
" </thead>\n",
" <tbody>\n",
" <tr>\n",
" <th>0</th>\n",
" <td>1</td>\n",
" <td>John Doe</td>\n",
" <td>30</td>\n",
" <td>2</td>\n",
" <td>75000.0</td>\n",
" <td>2020-01-15</td>\n",
" </tr>\n",
" <tr>\n",
" <th>1</th>\n",
" <td>2</td>\n",
" <td>Jane Smith</td>\n",
" <td>35</td>\n",
" <td>1</td>\n",
" <td>65000.0</td>\n",
" <td>2019-05-01</td>\n",
" </tr>\n",
" <tr>\n",
" <th>2</th>\n",
" <td>3</td>\n",
" <td>Bob Johnson</td>\n",
" <td>28</td>\n",
" <td>2</td>\n",
" <td>80000.0</td>\n",
" <td>2021-03-10</td>\n",
" </tr>\n",
" <tr>\n",
" <th>3</th>\n",
" <td>4</td>\n",
" <td>Alice Brown</td>\n",
" <td>42</td>\n",
" <td>3</td>\n",
" <td>70000.0</td>\n",
" <td>2018-11-20</td>\n",
" </tr>\n",
" <tr>\n",
" <th>4</th>\n",
" <td>5</td>\n",
" <td>Charlie Davis</td>\n",
" <td>31</td>\n",
" <td>2</td>\n",
" <td>85000.0</td>\n",
" <td>2022-07-01</td>\n",
" </tr>\n",
" </tbody>\n",
"</table>\n",
"</div>"
],
"text/plain": [
" id name age department_id salary hire_date\n",
"0 1 John Doe 30 2 75000.0 2020-01-15\n",
"1 2 Jane Smith 35 1 65000.0 2019-05-01\n",
"2 3 Bob Johnson 28 2 80000.0 2021-03-10\n",
"3 4 Alice Brown 42 3 70000.0 2018-11-20\n",
"4 5 Charlie Davis 31 2 85000.0 2022-07-01"
]
},
"metadata": {},
"output_type": "display_data"
}
],
"outputs": [],
"source": [
"# Create a new SQLite database and tables\n",
"with sqlite3.connect(DATABASE_PATH) as conn:\n",
@@ -339,28 +163,9 @@
},
{
"cell_type": "code",
"execution_count": 59,
"execution_count": null,
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"Table: departments\n",
" - id (INTEGER)\n",
" - name (TEXT)\n",
" - location (TEXT)\n",
"\n",
"Table: employees\n",
" - id (INTEGER)\n",
" - name (TEXT)\n",
" - age (INTEGER)\n",
" - department_id (INTEGER)\n",
" - salary (REAL)\n",
" - hire_date (DATE)\n"
]
}
],
"outputs": [],
"source": [
"def get_schema_info(db_path):\n",
" conn = sqlite3.connect(db_path)\n",
@@ -398,42 +203,9 @@
},
{
"cell_type": "code",
"execution_count": 60,
"execution_count": null,
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"\n",
" You are an AI assistant that converts natural language queries into SQL.\n",
" Given the following SQL database schema:\n",
"\n",
" <schema>\n",
" Table: departments\n",
" - id (INTEGER)\n",
" - name (TEXT)\n",
" - location (TEXT)\n",
"\n",
"Table: employees\n",
" - id (INTEGER)\n",
" - name (TEXT)\n",
" - age (INTEGER)\n",
" - department_id (INTEGER)\n",
" - salary (REAL)\n",
" - hire_date (DATE)\n",
" </schema>\n",
"\n",
" Convert the following natural language query into SQL:\n",
" <query>\n",
" What are the names of all employees in the Engineering department?\n",
" </query>\n",
"\n",
" Provide only the SQL query in your response, without preamble or any explanation.\n",
" \n"
]
}
],
"outputs": [],
"source": [
"def generate_prompt(schema, query):\n",
" return f\"\"\"\n",
@@ -467,21 +239,9 @@
},
{
"cell_type": "code",
"execution_count": 61,
"execution_count": null,
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"Generated SQL:\n",
"SELECT e.name\n",
"FROM employees e\n",
"JOIN departments d ON e.department_id = d.id\n",
"WHERE d.name = 'Engineering';\n"
]
}
],
"outputs": [],
"source": [
"def generate_sql(prompt):\n",
" response = client.messages.create(\n",
@@ -509,68 +269,9 @@
},
{
"cell_type": "code",
"execution_count": 62,
"execution_count": null,
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"Query result:\n"
]
},
{
"data": {
"text/html": [
"<div>\n",
"<style scoped>\n",
" .dataframe tbody tr th:only-of-type {\n",
" vertical-align: middle;\n",
" }\n",
"\n",
" .dataframe tbody tr th {\n",
" vertical-align: top;\n",
" }\n",
"\n",
" .dataframe thead th {\n",
" text-align: right;\n",
" }\n",
"</style>\n",
"<table border=\"1\" class=\"dataframe\">\n",
" <thead>\n",
" <tr style=\"text-align: right;\">\n",
" <th></th>\n",
" <th>name</th>\n",
" </tr>\n",
" </thead>\n",
" <tbody>\n",
" <tr>\n",
" <th>0</th>\n",
" <td>John Doe</td>\n",
" </tr>\n",
" <tr>\n",
" <th>1</th>\n",
" <td>Bob Johnson</td>\n",
" </tr>\n",
" <tr>\n",
" <th>2</th>\n",
" <td>Charlie Davis</td>\n",
" </tr>\n",
" </tbody>\n",
"</table>\n",
"</div>"
],
"text/plain": [
" name\n",
"0 John Doe\n",
"1 Bob Johnson\n",
"2 Charlie Davis"
]
},
"metadata": {},
"output_type": "display_data"
}
],
"outputs": [],
"source": [
"def run_sql(sql):\n",
" conn = sqlite3.connect(DATABASE_PATH)\n",
@@ -596,60 +297,9 @@
},
{
"cell_type": "code",
"execution_count": 63,
"execution_count": null,
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"\n",
" You are an AI assistant that converts natural language queries into SQL.\n",
" Given the following SQL database schema:\n",
"\n",
" <schema>\n",
" Table: departments\n",
" - id (INTEGER)\n",
" - name (TEXT)\n",
" - location (TEXT)\n",
"\n",
"Table: employees\n",
" - id (INTEGER)\n",
" - name (TEXT)\n",
" - age (INTEGER)\n",
" - department_id (INTEGER)\n",
" - salary (REAL)\n",
" - hire_date (DATE)\n",
" </schema>\n",
"\n",
" Here are some examples of natural language queries and their corresponding SQL:\n",
"\n",
" <examples>\n",
" \n",
" Example 1:\n",
" <query>List all employees in the HR department.</<query>\n",
" <output>SELECT e.name FROM employees e JOIN departments d ON e.department_id = d.id WHERE d.name = 'HR';</output>\n",
"\n",
" Example 2:\n",
" User: What is the average salary of employees in the Engineering department?\n",
" SQL: SELECT AVG(e.salary) FROM employees e JOIN departments d ON e.department_id = d.id WHERE d.name = 'Engineering';\n",
"\n",
" Example 3:\n",
" User: Who is the oldest employee?\n",
" SQL: SELECT name, age FROM employees ORDER BY age DESC LIMIT 1;\n",
" \n",
" </examples>\n",
"\n",
" Now, convert the following natural language query into SQL:\n",
" <query>\n",
" What are the names and salaries of employees in the Marketing department?\n",
" </query>\n",
"\n",
" Provide only the SQL query in your response, without preamble or any explanation.\n",
" \n"
]
}
],
"outputs": [],
"source": [
"def generate_prompt_with_examples(schema, query):\n",
" examples = \"\"\"\n",
@@ -703,66 +353,9 @@
},
{
"cell_type": "code",
"execution_count": 64,
"execution_count": null,
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"Generated SQL:\n",
"SELECT e.name, e.salary\n",
"FROM employees e\n",
"JOIN departments d ON e.department_id = d.id\n",
"WHERE d.name = 'Marketing';\n",
"\n",
"Query result:\n"
]
},
{
"data": {
"text/html": [
"<div>\n",
"<style scoped>\n",
" .dataframe tbody tr th:only-of-type {\n",
" vertical-align: middle;\n",
" }\n",
"\n",
" .dataframe tbody tr th {\n",
" vertical-align: top;\n",
" }\n",
"\n",
" .dataframe thead th {\n",
" text-align: right;\n",
" }\n",
"</style>\n",
"<table border=\"1\" class=\"dataframe\">\n",
" <thead>\n",
" <tr style=\"text-align: right;\">\n",
" <th></th>\n",
" <th>name</th>\n",
" <th>salary</th>\n",
" </tr>\n",
" </thead>\n",
" <tbody>\n",
" <tr>\n",
" <th>0</th>\n",
" <td>Alice Brown</td>\n",
" <td>70000.0</td>\n",
" </tr>\n",
" </tbody>\n",
"</table>\n",
"</div>"
],
"text/plain": [
" name salary\n",
"0 Alice Brown 70000.0"
]
},
"metadata": {},
"output_type": "display_data"
}
],
"outputs": [],
"source": [
"# Generate SQL using the improved prompt\n",
"sql = generate_sql(prompt)\n",
@@ -797,68 +390,9 @@
},
{
"cell_type": "code",
"execution_count": 65,
"execution_count": null,
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"You are an AI assistant that converts natural language queries into SQL.\n",
" Given the following SQL database schema:\n",
"\n",
" <schema>\n",
" Table: departments\n",
" - id (INTEGER)\n",
" - name (TEXT)\n",
" - location (TEXT)\n",
"\n",
"Table: employees\n",
" - id (INTEGER)\n",
" - name (TEXT)\n",
" - age (INTEGER)\n",
" - department_id (INTEGER)\n",
" - salary (REAL)\n",
" - hire_date (DATE)\n",
" </schema>\n",
"\n",
" Here are some examples of natural language queries, thought processes, and their corresponding SQL:\n",
"\n",
" <examples>\n",
" \n",
" <example>\n",
" <query>List all employees in the HR department.</query>\n",
" <thought_process>\n",
" 1. We need to join the employees and departments tables.\n",
" 2. We'll match employees.department_id with departments.id.\n",
" 3. We'll filter for the HR department.\n",
" 4. We only need to return the employee names.\n",
" </thought_process>\n",
" <sql>SELECT e.name FROM employees e JOIN departments d ON e.department_id = d.id WHERE d.name = 'HR';</sql>\n",
" </example>\n",
"\n",
" <example>\n",
" <query>What is the average salary of employees hired in 2022?</query>\n",
" <thought_process>\n",
" 1. We need to work with the employees table.\n",
" 2. We need to filter for employees hired in 2022.\n",
" 3. We'll use the YEAR function to extract the year from the hire_date.\n",
" 4. We'll calculate the average of the salary column for the filtered rows.\n",
" </thought_process>\n",
" <sql>SELECT AVG(salary) FROM employees WHERE YEAR(hire_date) = 2022;</sql>\n",
" </example>\n",
" \n",
" </examples>\n",
"\n",
" Now, convert the following natural language query into SQL:\n",
" What are the names and hire dates of employees in the Engineering department, ordered by their salary?\n",
"\n",
" Within <thought_process> tags, explain your thought process for creating the SQL query.\n",
" Then, within <sql> tags, provide your output SQL query.\n",
" \n"
]
}
],
"outputs": [],
"source": [
"def generate_cot_prompt(schema, query):\n",
" examples = \"\"\"\n",
@@ -920,105 +454,9 @@
},
{
"cell_type": "code",
"execution_count": 66,
"execution_count": null,
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"Raw response from Claude:\n",
"<thought_process>\n",
"1. We need to join the employees and departments tables to get the department information.\n",
"2. We'll match employees.department_id with departments.id.\n",
"3. We need to filter for the Engineering department.\n",
"4. We need to select the names and hire dates of the employees.\n",
"5. We need to order the results by the employees' salaries.\n",
"6. We don't need to show the salary in the output, but we'll use it for ordering.\n",
"</thought_process>\n",
"\n",
"<sql>\n",
"SELECT e.name, e.hire_date\n",
"FROM employees e\n",
"JOIN departments d ON e.department_id = d.id\n",
"WHERE d.name = 'Engineering'\n",
"ORDER BY e.salary;\n",
"</sql>\n",
"\n",
"Thought Process:\n",
"1. We need to join the employees and departments tables to get the department information.\n",
"2. We'll match employees.department_id with departments.id.\n",
"3. We need to filter for the Engineering department.\n",
"4. We need to select the names and hire dates of the employees.\n",
"5. We need to order the results by the employees' salaries.\n",
"6. We don't need to show the salary in the output, but we'll use it for ordering.\n",
"\n",
"Generated SQL:\n",
"SELECT e.name, e.hire_date\n",
"FROM employees e\n",
"JOIN departments d ON e.department_id = d.id\n",
"WHERE d.name = 'Engineering'\n",
"ORDER BY e.salary;\n",
"\n",
"Query result:\n"
]
},
{
"data": {
"text/html": [
"<div>\n",
"<style scoped>\n",
" .dataframe tbody tr th:only-of-type {\n",
" vertical-align: middle;\n",
" }\n",
"\n",
" .dataframe tbody tr th {\n",
" vertical-align: top;\n",
" }\n",
"\n",
" .dataframe thead th {\n",
" text-align: right;\n",
" }\n",
"</style>\n",
"<table border=\"1\" class=\"dataframe\">\n",
" <thead>\n",
" <tr style=\"text-align: right;\">\n",
" <th></th>\n",
" <th>name</th>\n",
" <th>hire_date</th>\n",
" </tr>\n",
" </thead>\n",
" <tbody>\n",
" <tr>\n",
" <th>0</th>\n",
" <td>John Doe</td>\n",
" <td>2020-01-15</td>\n",
" </tr>\n",
" <tr>\n",
" <th>1</th>\n",
" <td>Bob Johnson</td>\n",
" <td>2021-03-10</td>\n",
" </tr>\n",
" <tr>\n",
" <th>2</th>\n",
" <td>Charlie Davis</td>\n",
" <td>2022-07-01</td>\n",
" </tr>\n",
" </tbody>\n",
"</table>\n",
"</div>"
],
"text/plain": [
" name hire_date\n",
"0 John Doe 2020-01-15\n",
"1 Bob Johnson 2021-03-10\n",
"2 Charlie Davis 2022-07-01"
]
},
"metadata": {},
"output_type": "display_data"
}
],
"outputs": [],
"source": [
"def generate_sql_with_explanation(prompt):\n",
" response = client.messages.create(\n",
@@ -1066,22 +504,9 @@
},
{
"cell_type": "code",
"execution_count": 110,
"execution_count": null,
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"Search results:\n",
"Similarity: 0.7318002364429477, Metadata: {'table': 'employees', 'column': 'salary', 'type': 'REAL'}\n",
"Similarity: 0.728456954795667, Metadata: {'table': 'employees', 'column': 'department_id', 'type': 'INTEGER'}\n",
"Similarity: 0.6810496067975434, Metadata: {'table': 'departments', 'column': 'name', 'type': 'TEXT'}\n",
"Similarity: 0.6697669330753087, Metadata: {'table': 'employees', 'column': 'name', 'type': 'TEXT'}\n",
"Similarity: 0.6666317064533498, Metadata: {'table': 'departments', 'column': 'location', 'type': 'TEXT'}\n"
]
}
],
"outputs": [],
"source": [
"import os\n",
"import numpy as np\n",
@@ -1159,140 +584,9 @@
},
{
"cell_type": "code",
"execution_count": 112,
"execution_count": null,
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"Generated prompt:\n",
"You are an AI assistant that converts natural language queries into SQL.\n",
" Given the following relevant columns from the SQL database schema:\n",
"\n",
" <schema>\n",
" Table: employees, Column: salary, Type: REAL\n",
"Table: employees, Column: department_id, Type: INTEGER\n",
"Table: departments, Column: name, Type: TEXT\n",
"Table: employees, Column: name, Type: TEXT\n",
"Table: departments, Column: location, Type: TEXT\n",
"Table: employees, Column: id, Type: INTEGER\n",
"Table: departments, Column: id, Type: INTEGER\n",
"Table: employees, Column: age, Type: INTEGER\n",
"Table: employees, Column: hire_date, Type: DATE\n",
" </schema>\n",
"\n",
" Convert the following natural language query into SQL:\n",
" <query>\n",
" What is the average salary of employees in each department?\n",
" </query>\n",
"\n",
" Within <thought_process> tags, explain your thought process for creating the SQL query.\n",
" Then, within <sql> tags, provide your output SQL query.\n",
" \n",
"\n",
"Generated result:\n",
"<thought_process>\n",
"To answer this query, we need to:\n",
"1. Join the employees and departments tables to get department information for each employee.\n",
"2. Group the results by department.\n",
"3. Calculate the average salary for each group.\n",
"\n",
"Here's the step-by-step thought process:\n",
"1. We'll use the employees table as our main table since it contains the salary information.\n",
"2. We need to join the departments table to get the department names.\n",
"3. The join will be on employees.department_id = departments.id\n",
"4. We'll group the results by department name (or id, but name is more informative).\n",
"5. We'll use the AVG function to calculate the average salary for each group.\n",
"6. We'll select the department name and the average salary in the SELECT clause.\n",
"</thought_process>\n",
"\n",
"<sql>\n",
"SELECT \n",
" d.name AS department_name,\n",
" AVG(e.salary) AS average_salary\n",
"FROM \n",
" employees e\n",
"JOIN \n",
" departments d ON e.department_id = d.id\n",
"GROUP BY \n",
" d.name\n",
"ORDER BY \n",
" d.name\n",
"</sql>\n",
"\n",
"Extracted SQL:\n",
"SELECT \n",
" d.name AS department_name,\n",
" AVG(e.salary) AS average_salary\n",
"FROM \n",
" employees e\n",
"JOIN \n",
" departments d ON e.department_id = d.id\n",
"GROUP BY \n",
" d.name\n",
"ORDER BY \n",
" d.name\n",
"\n",
"Query result:\n"
]
},
{
"data": {
"text/html": [
"<div>\n",
"<style scoped>\n",
" .dataframe tbody tr th:only-of-type {\n",
" vertical-align: middle;\n",
" }\n",
"\n",
" .dataframe tbody tr th {\n",
" vertical-align: top;\n",
" }\n",
"\n",
" .dataframe thead th {\n",
" text-align: right;\n",
" }\n",
"</style>\n",
"<table border=\"1\" class=\"dataframe\">\n",
" <thead>\n",
" <tr style=\"text-align: right;\">\n",
" <th></th>\n",
" <th>department_name</th>\n",
" <th>average_salary</th>\n",
" </tr>\n",
" </thead>\n",
" <tbody>\n",
" <tr>\n",
" <th>0</th>\n",
" <td>Engineering</td>\n",
" <td>80000.0</td>\n",
" </tr>\n",
" <tr>\n",
" <th>1</th>\n",
" <td>HR</td>\n",
" <td>65000.0</td>\n",
" </tr>\n",
" <tr>\n",
" <th>2</th>\n",
" <td>Marketing</td>\n",
" <td>70000.0</td>\n",
" </tr>\n",
" </tbody>\n",
"</table>\n",
"</div>"
],
"text/plain": [
" department_name average_salary\n",
"0 Engineering 80000.0\n",
"1 HR 65000.0\n",
"2 Marketing 70000.0"
]
},
"metadata": {},
"output_type": "display_data"
}
],
"outputs": [],
"source": [
"def generate_rag_prompt(query):\n",
" relevant_schema = vectordb.search(query, k=10, similarity_threshold=0.3)\n",
@@ -1382,84 +676,9 @@
},
{
"cell_type": "code",
"execution_count": 125,
"execution_count": null,
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"\n",
"Attempt 1:\n",
"SQL failed to execute\n",
"\n",
"Attempt 2:\n",
"SQL executed successfully!\n",
"\n",
"Final SQL query:\n",
"SELECT *\n",
"FROM (\n",
" SELECT \n",
" d.name AS department_name,\n",
" salary_range.max_salary / salary_range.min_salary AS salary_ratio\n",
" FROM \n",
" (SELECT \n",
" department_id,\n",
" MAX(salary) AS max_salary,\n",
" MIN(salary) AS min_salary\n",
" FROM \n",
" employees\n",
" GROUP BY \n",
" department_id) AS salary_range\n",
" JOIN \n",
" departments d ON d.id = salary_range.department_id\n",
") AS subquery\n",
"WHERE salary_ratio > 3\n",
"ORDER BY salary_ratio DESC;\n",
"\n",
"Query result:\n"
]
},
{
"data": {
"text/html": [
"<div>\n",
"<style scoped>\n",
" .dataframe tbody tr th:only-of-type {\n",
" vertical-align: middle;\n",
" }\n",
"\n",
" .dataframe tbody tr th {\n",
" vertical-align: top;\n",
" }\n",
"\n",
" .dataframe thead th {\n",
" text-align: right;\n",
" }\n",
"</style>\n",
"<table border=\"1\" class=\"dataframe\">\n",
" <thead>\n",
" <tr style=\"text-align: right;\">\n",
" <th></th>\n",
" <th>department_name</th>\n",
" <th>salary_ratio</th>\n",
" </tr>\n",
" </thead>\n",
" <tbody>\n",
" </tbody>\n",
"</table>\n",
"</div>"
],
"text/plain": [
"Empty DataFrame\n",
"Columns: [department_name, salary_ratio]\n",
"Index: []"
]
},
"metadata": {},
"output_type": "display_data"
}
],
"outputs": [],
"source": [
"def execute_sql_with_feedback(sql):\n",
" try:\n",