Files
awesome-reviewers/_reviewers/posthog-optimize-database-query-patterns.json
2025-08-19 12:19:58 +00:00

46 lines
10 KiB
JSON

[
{
"discussion_id": "2284651280",
"pr_number": 36529,
"pr_file": "plugin-server/src/worker/ingestion/persons/repositories/postgres-person-repository.ts",
"created_at": "2025-08-19T09:16:58+00:00",
"commented_code": "try {\n const { rows } = await this.postgres.query<RawPerson>(\n tx ?? PostgresUse.PERSONS_WRITE,\n- `WITH inserted_person AS (\n+ `${\n+ forcedId\n+ ? `WITH inserted_person AS (\n+ INSERT INTO posthog_person (\n+ id, created_at, properties, properties_last_updated_at,\n+ properties_last_operation, team_id, is_user_id, is_identified, uuid, version\n+ )\n+ VALUES ($1, $2, $3, $4, $5, $6, $7, $8, $9, $10)\n+ RETURNING *\n+ )`\n+ : `WITH inserted_person AS (\n INSERT INTO posthog_person (\n created_at, properties, properties_last_updated_at,\n properties_last_operation, team_id, is_user_id, is_identified, uuid, version\n )\n VALUES ($1, $2, $3, $4, $5, $6, $7, $8, $9)\n RETURNING *\n- )` +\n+ )`\n+ } ` +\n distinctIds\n .map(\n // NOTE: Keep this in sync with the posthog_persondistinctid INSERT in\n // `addDistinctId`\n (_, index) => `, distinct_id_${index} AS (\n INSERT INTO posthog_persondistinctid (distinct_id, person_id, team_id, version)\n VALUES (\n- $${11 + index + distinctIds!.length - 1},\n+ $${(forcedId ? 12 : 11) + index + distinctIds!.length - 1},",
"repo_full_name": "PostHog/posthog",
"discussion_comments": [
{
"comment_id": "2284651280",
"repo_full_name": "PostHog/posthog",
"pr_number": 36529,
"pr_file": "plugin-server/src/worker/ingestion/persons/repositories/postgres-person-repository.ts",
"discussion_id": "2284651280",
"commented_code": "@@ -272,54 +277,80 @@ export class PostgresPersonRepository\n try {\n const { rows } = await this.postgres.query<RawPerson>(\n tx ?? PostgresUse.PERSONS_WRITE,\n- `WITH inserted_person AS (\n+ `${\n+ forcedId\n+ ? `WITH inserted_person AS (\n+ INSERT INTO posthog_person (\n+ id, created_at, properties, properties_last_updated_at,\n+ properties_last_operation, team_id, is_user_id, is_identified, uuid, version\n+ )\n+ VALUES ($1, $2, $3, $4, $5, $6, $7, $8, $9, $10)\n+ RETURNING *\n+ )`\n+ : `WITH inserted_person AS (\n INSERT INTO posthog_person (\n created_at, properties, properties_last_updated_at,\n properties_last_operation, team_id, is_user_id, is_identified, uuid, version\n )\n VALUES ($1, $2, $3, $4, $5, $6, $7, $8, $9)\n RETURNING *\n- )` +\n+ )`\n+ } ` +\n distinctIds\n .map(\n // NOTE: Keep this in sync with the posthog_persondistinctid INSERT in\n // `addDistinctId`\n (_, index) => `, distinct_id_${index} AS (\n INSERT INTO posthog_persondistinctid (distinct_id, person_id, team_id, version)\n VALUES (\n- $${11 + index + distinctIds!.length - 1},\n+ $${(forcedId ? 12 : 11) + index + distinctIds!.length - 1},",
"comment_created_at": "2025-08-19T09:16:58+00:00",
"comment_author": "pl",
"comment_body": "question: Instead of adding forced ID conditionally, can we always add it and set the value in the query to null? I'd think that when passed null, the ID will be assigned by the autoincrement.",
"pr_file_module": null
}
]
},
{
"discussion_id": "2272899689",
"pr_number": 36474,
"pr_file": "frontend/src/scenes/data-warehouse/externalDataSourcesLogic.ts",
"created_at": "2025-08-13T10:36:21+00:00",
"commented_code": "return {\n ...values.dataWarehouseSources,\n results:\n- values.dataWarehouseSources?.results.map((s) =>\n+ values.dataWarehouseSources?.results.map((s: ExternalDataSource) =>\n s.id === updatedSource.id ? updatedSource : s\n ) || [],\n }\n },\n },\n ],\n+ totalRowsProcessed: [\n+ 0 as number,\n+ {\n+ loadTotalRowsProcessed: async ({ materializedViews }: { materializedViews: any[] }) => {\n+ const dataSources = values.dataWarehouseSources?.results || []\n+\n+ const monthStartISO = getMonthStartISO()\n+\n+ const [schemaResults, materializationResults] = await Promise.all([\n+ Promise.all(\n+ dataSources.map(async (source: ExternalDataSource) => {\n+ try {\n+ const jobs = await api.externalDataSources.jobs(source.id, monthStartISO, null)\n+ return sumMTDRows(jobs, monthStartISO)\n+ } catch (error) {\n+ posthog.captureException(error)\n+ return 0\n+ }\n+ })\n+ ),\n+\n+ Promise.all(\n+ materializedViews.map(async (view: any) => {\n+ try {\n+ const res = await api.dataWarehouseSavedQueries.dataWarehouseDataModelingJobs.list(\n+ view.id,\n+ DATA_WAREHOUSE_CONFIG.maxJobsForMTD,\n+ 0\n+ )\n+ return sumMTDRows(res.results || [], monthStartISO)\n+ } catch (error) {\n+ posthog.captureException(error)\n+ return 0\n+ }\n+ })\n+ ),\n+ ])",
"repo_full_name": "PostHog/posthog",
"discussion_comments": [
{
"comment_id": "2272899689",
"repo_full_name": "PostHog/posthog",
"pr_number": 36474,
"pr_file": "frontend/src/scenes/data-warehouse/externalDataSourcesLogic.ts",
"discussion_id": "2272899689",
"commented_code": "@@ -35,13 +76,143 @@ export const externalDataSourcesLogic = kea<externalDataSourcesLogicType>([\n return {\n ...values.dataWarehouseSources,\n results:\n- values.dataWarehouseSources?.results.map((s) =>\n+ values.dataWarehouseSources?.results.map((s: ExternalDataSource) =>\n s.id === updatedSource.id ? updatedSource : s\n ) || [],\n }\n },\n },\n ],\n+ totalRowsProcessed: [\n+ 0 as number,\n+ {\n+ loadTotalRowsProcessed: async ({ materializedViews }: { materializedViews: any[] }) => {\n+ const dataSources = values.dataWarehouseSources?.results || []\n+\n+ const monthStartISO = getMonthStartISO()\n+\n+ const [schemaResults, materializationResults] = await Promise.all([\n+ Promise.all(\n+ dataSources.map(async (source: ExternalDataSource) => {\n+ try {\n+ const jobs = await api.externalDataSources.jobs(source.id, monthStartISO, null)\n+ return sumMTDRows(jobs, monthStartISO)\n+ } catch (error) {\n+ posthog.captureException(error)\n+ return 0\n+ }\n+ })\n+ ),\n+\n+ Promise.all(\n+ materializedViews.map(async (view: any) => {\n+ try {\n+ const res = await api.dataWarehouseSavedQueries.dataWarehouseDataModelingJobs.list(\n+ view.id,\n+ DATA_WAREHOUSE_CONFIG.maxJobsForMTD,\n+ 0\n+ )\n+ return sumMTDRows(res.results || [], monthStartISO)\n+ } catch (error) {\n+ posthog.captureException(error)\n+ return 0\n+ }\n+ })\n+ ),\n+ ])",
"comment_created_at": "2025-08-13T10:36:21+00:00",
"comment_author": "Gilbert09",
"comment_body": "This will execute 85 requests for team 2 (62 mat views + 23 sources), right? This will be too much - I wouldn't shy away from making an endpoint for loading data for this page if we have a bunch of custom requirements here - we can do both of these with a single DB query each. ",
"pr_file_module": null
}
]
}
]