WIP Text to SQL - checkpoint

This commit is contained in:
Mahesh Murag
2024-09-24 23:11:05 +02:00
parent 2034210bcc
commit 6a7bb28b6b
3 changed files with 54 additions and 51 deletions

Binary file not shown.

View File

@@ -405,61 +405,64 @@
"source": [
"import random\n",
"from datetime import datetime, timedelta\n",
"# Create a new SQLite database and tables\n",
"with sqlite3.connect(DATABASE_PATH) as conn:\n",
" cursor = conn.cursor()\n",
" \n",
" cursor.executescript('''\n",
" CREATE TABLE IF NOT EXISTS departments (\n",
" id INTEGER PRIMARY KEY,\n",
" name TEXT NOT NULL,\n",
" location TEXT\n",
" );\n",
" CREATE TABLE IF NOT EXISTS employees (\n",
" id INTEGER PRIMARY KEY,\n",
" name TEXT NOT NULL,\n",
" age INTEGER,\n",
" department_id INTEGER,\n",
" salary REAL,\n",
" hire_date DATE,\n",
" FOREIGN KEY (department_id) REFERENCES departments (id)\n",
" );\n",
" ''')\n",
"\n",
" # Insert sample data\n",
" cursor.executemany('INSERT OR REPLACE INTO departments VALUES (?,?,?)',\n",
" [\n",
" (1, 'HR', 'New York'), \n",
" (2, 'Engineering', 'San Francisco'), \n",
" (3, 'Marketing', 'Chicago'),\n",
" (4, 'Sales', 'Los Angeles'),\n",
" (5, 'Finance', 'Boston'),\n",
" (6, 'Customer Support', 'Dallas'),\n",
" (7, 'Research', 'Seattle'),\n",
" (8, 'Legal', 'Washington D.C.'),\n",
" (9, 'Product', 'Austin'),\n",
" (10, 'Operations', 'Denver')\n",
" ])\n",
" \n",
" first_names = ['John', 'Jane', 'Bob', 'Alice', 'Charlie', 'Diana', 'Edward', 'Fiona', 'George', 'Hannah', 'Ian', 'Julia', 'Kevin', 'Laura', 'Michael', 'Nora', 'Oliver', 'Patricia', 'Quentin', 'Rachel', 'Steve', 'Tina', 'Ulysses', 'Victoria', 'William', 'Xena', 'Yannick', 'Zoe']\n",
" last_names = ['Smith', 'Johnson', 'Williams', 'Jones', 'Brown', 'Davis', 'Miller', 'Wilson', 'Moore', 'Taylor', 'Anderson', 'Thomas', 'Jackson', 'White', 'Harris', 'Martin', 'Thompson', 'Garcia', 'Martinez', 'Robinson', 'Clark', 'Rodriguez', 'Lewis', 'Lee', 'Walker', 'Hall', 'Allen', 'Young', 'King']\n",
"if not os.path.exists(DATABASE_PATH):\n",
" print(\"Database does not exist. Creating and populating...\")\n",
" # Create a new SQLite database and tables\n",
" with sqlite3.connect(DATABASE_PATH) as conn:\n",
" cursor = conn.cursor()\n",
" \n",
" cursor.executescript('''\n",
" CREATE TABLE IF NOT EXISTS departments (\n",
" id INTEGER PRIMARY KEY,\n",
" name TEXT NOT NULL,\n",
" location TEXT\n",
" );\n",
" CREATE TABLE IF NOT EXISTS employees (\n",
" id INTEGER PRIMARY KEY,\n",
" name TEXT NOT NULL,\n",
" age INTEGER,\n",
" department_id INTEGER,\n",
" salary REAL,\n",
" hire_date DATE,\n",
" FOREIGN KEY (department_id) REFERENCES departments (id)\n",
" );\n",
" ''')\n",
"\n",
" employees_data = []\n",
" for i in range(1, 201): # Generate 200 employees\n",
" name = f\"{random.choice(first_names)} {random.choice(last_names)}\"\n",
" age = random.randint(22, 65)\n",
" department_id = random.randint(1, 10)\n",
" salary = round(random.uniform(40000, 200000), 2)\n",
" hire_date = (datetime.now() - timedelta(days=random.randint(0, 3650))).strftime('%Y-%m-%d')\n",
" employees_data.append((i, name, age, department_id, salary, hire_date))\n",
" # Insert sample data\n",
" cursor.executemany('INSERT OR REPLACE INTO departments VALUES (?,?,?)',\n",
" [\n",
" (1, 'HR', 'New York'), \n",
" (2, 'Engineering', 'San Francisco'), \n",
" (3, 'Marketing', 'Chicago'),\n",
" (4, 'Sales', 'Los Angeles'),\n",
" (5, 'Finance', 'Boston'),\n",
" (6, 'Customer Support', 'Dallas'),\n",
" (7, 'Research', 'Seattle'),\n",
" (8, 'Legal', 'Washington D.C.'),\n",
" (9, 'Product', 'Austin'),\n",
" (10, 'Operations', 'Denver')\n",
" ])\n",
" \n",
" first_names = ['John', 'Jane', 'Bob', 'Alice', 'Charlie', 'Diana', 'Edward', 'Fiona', 'George', 'Hannah', 'Ian', 'Julia', 'Kevin', 'Laura', 'Michael', 'Nora', 'Oliver', 'Patricia', 'Quentin', 'Rachel', 'Steve', 'Tina', 'Ulysses', 'Victoria', 'William', 'Xena', 'Yannick', 'Zoe']\n",
" last_names = ['Smith', 'Johnson', 'Williams', 'Jones', 'Brown', 'Davis', 'Miller', 'Wilson', 'Moore', 'Taylor', 'Anderson', 'Thomas', 'Jackson', 'White', 'Harris', 'Martin', 'Thompson', 'Garcia', 'Martinez', 'Robinson', 'Clark', 'Rodriguez', 'Lewis', 'Lee', 'Walker', 'Hall', 'Allen', 'Young', 'King']\n",
"\n",
" cursor.executemany('INSERT OR REPLACE INTO employees VALUES (?,?,?,?,?,?)', employees_data)\n",
" employees_data = []\n",
" for i in range(1, 201): # Generate 200 employees\n",
" name = f\"{random.choice(first_names)} {random.choice(last_names)}\"\n",
" age = random.randint(22, 65)\n",
" department_id = random.randint(1, 10)\n",
" salary = round(random.uniform(40000, 200000), 2)\n",
" hire_date = (datetime.now() - timedelta(days=random.randint(0, 3650))).strftime('%Y-%m-%d')\n",
" employees_data.append((i, name, age, department_id, salary, hire_date))\n",
"\n",
" # Display table contents\n",
" for table in ['departments', 'employees']:\n",
" df = pd.read_sql_query(f\"SELECT * FROM {table}\", conn)\n",
" print(f\"\\n{table.capitalize()} table:\")\n",
" display(df)"
" cursor.executemany('INSERT OR REPLACE INTO employees VALUES (?,?,?,?,?,?)', employees_data)\n",
"\n",
"# Display table contents\n",
"for table in ['departments', 'employees']:\n",
" df = pd.read_sql_query(f\"SELECT * FROM {table}\", conn)\n",
" print(f\"\\n{table.capitalize()} table:\")\n",
" display(df)"
]
},
{