mirror of
https://github.com/baz-scm/awesome-reviewers.git
synced 2025-08-20 18:58:52 +03:00
160 lines
40 KiB
JSON
160 lines
40 KiB
JSON
[
|
|
{
|
|
"discussion_id": "2251724478",
|
|
"pr_number": 35726,
|
|
"pr_file": "ee/hogai/graph/insights/nodes.py",
|
|
"created_at": "2025-08-04T14:44:26+00:00",
|
|
"commented_code": "\"insight__team\",\n \"insight__short_id\",\n \"insight__query\",\n+ \"insight__filters\",\n )\n .order_by(\"insight_id\", \"-last_viewed_at\")\n .distinct(\"insight_id\")\n )\n \n- self._all_insights = list(\n+ def _get_total_insights_count(self) -> int:\n+ if self._total_insights_count is None:\n+ self._total_insights_count = self._get_insights_queryset().count()\n+ return self._total_insights_count\n+\n+ def _load_insights_page(self, page_number: int) -> list[dict]:\n+ \"\"\"Load a specific page of insights from database.\"\"\"\n+ if page_number in self._loaded_pages:\n+ return self._loaded_pages[page_number]\n+\n+ start_idx = page_number * self._page_size\n+ end_idx = start_idx + self._page_size\n+\n+ insights_qs = self._get_insights_queryset()[start_idx:end_idx]\n+\n+ page_insights = list(\n insights_qs.values(\n \"insight_id\",\n \"insight__name\",\n \"insight__description\",\n \"insight__derived_name\",\n \"insight__query\",\n \"insight__short_id\",\n+ \"insight__filters\",\n )\n )\n \n+ self._loaded_pages[page_number] = page_insights\n+ return page_insights\n+\n def _search_insights_iteratively(self, search_query: str) -> list[int]:",
|
|
"repo_full_name": "PostHog/posthog",
|
|
"discussion_comments": [
|
|
{
|
|
"comment_id": "2251724478",
|
|
"repo_full_name": "PostHog/posthog",
|
|
"pr_number": 35726,
|
|
"pr_file": "ee/hogai/graph/insights/nodes.py",
|
|
"discussion_id": "2251724478",
|
|
"commented_code": "@@ -135,33 +142,52 @@ def _load_all_insights(self) -> None:\n \"insight__team\",\n \"insight__short_id\",\n \"insight__query\",\n+ \"insight__filters\",\n )\n .order_by(\"insight_id\", \"-last_viewed_at\")\n .distinct(\"insight_id\")\n )\n \n- self._all_insights = list(\n+ def _get_total_insights_count(self) -> int:\n+ if self._total_insights_count is None:\n+ self._total_insights_count = self._get_insights_queryset().count()\n+ return self._total_insights_count\n+\n+ def _load_insights_page(self, page_number: int) -> list[dict]:\n+ \"\"\"Load a specific page of insights from database.\"\"\"\n+ if page_number in self._loaded_pages:\n+ return self._loaded_pages[page_number]\n+\n+ start_idx = page_number * self._page_size\n+ end_idx = start_idx + self._page_size\n+\n+ insights_qs = self._get_insights_queryset()[start_idx:end_idx]\n+\n+ page_insights = list(\n insights_qs.values(\n \"insight_id\",\n \"insight__name\",\n \"insight__description\",\n \"insight__derived_name\",\n \"insight__query\",\n \"insight__short_id\",\n+ \"insight__filters\",\n )\n )\n \n+ self._loaded_pages[page_number] = page_insights\n+ return page_insights\n+\n def _search_insights_iteratively(self, search_query: str) -> list[int]:",
|
|
"comment_created_at": "2025-08-04T14:44:26+00:00",
|
|
"comment_author": "kappa90",
|
|
"comment_body": "Nit because it would need heavy refactoring, maybe it would have made sense to write all of this as a subgraph, since we do iterative function calling. It wouid have made the code more readable as well, because you would have split in smaller functions/nodes (this file is 700 lines long).",
|
|
"pr_file_module": null
|
|
},
|
|
{
|
|
"comment_id": "2257269022",
|
|
"repo_full_name": "PostHog/posthog",
|
|
"pr_number": 35726,
|
|
"pr_file": "ee/hogai/graph/insights/nodes.py",
|
|
"discussion_id": "2251724478",
|
|
"commented_code": "@@ -135,33 +142,52 @@ def _load_all_insights(self) -> None:\n \"insight__team\",\n \"insight__short_id\",\n \"insight__query\",\n+ \"insight__filters\",\n )\n .order_by(\"insight_id\", \"-last_viewed_at\")\n .distinct(\"insight_id\")\n )\n \n- self._all_insights = list(\n+ def _get_total_insights_count(self) -> int:\n+ if self._total_insights_count is None:\n+ self._total_insights_count = self._get_insights_queryset().count()\n+ return self._total_insights_count\n+\n+ def _load_insights_page(self, page_number: int) -> list[dict]:\n+ \"\"\"Load a specific page of insights from database.\"\"\"\n+ if page_number in self._loaded_pages:\n+ return self._loaded_pages[page_number]\n+\n+ start_idx = page_number * self._page_size\n+ end_idx = start_idx + self._page_size\n+\n+ insights_qs = self._get_insights_queryset()[start_idx:end_idx]\n+\n+ page_insights = list(\n insights_qs.values(\n \"insight_id\",\n \"insight__name\",\n \"insight__description\",\n \"insight__derived_name\",\n \"insight__query\",\n \"insight__short_id\",\n+ \"insight__filters\",\n )\n )\n \n+ self._loaded_pages[page_number] = page_insights\n+ return page_insights\n+\n def _search_insights_iteratively(self, search_query: str) -> list[int]:",
|
|
"comment_created_at": "2025-08-06T13:57:09+00:00",
|
|
"comment_author": "tatoalo",
|
|
"comment_body": "Yeah, I have already worked on this a bit. I can split into `Planner --> ToolsNode --> EvaluationNode` in order to split a bit the logic. Not super sure it is actually more clear to follow. Will definitely evaluate in future PR though, thanks for flagging!",
|
|
"pr_file_module": null
|
|
}
|
|
]
|
|
},
|
|
{
|
|
"discussion_id": "2268157563",
|
|
"pr_number": 35726,
|
|
"pr_file": "ee/hogai/graph/insights/nodes.py",
|
|
"created_at": "2025-08-11T22:35:07+00:00",
|
|
"commented_code": "return \"\n\".join(formatted_insights)\n \n- def _parse_insight_ids(self, response_content: str) -> list[int]:\n- \"\"\"Parse insight IDs from LLM response.\"\"\"\n- import re\n+ def _get_all_loaded_insight_ids(self) -> set[int]:\n+ \"\"\"Get all insight IDs from loaded pages.\"\"\"\n+ all_ids = set()\n+ for page_insights in self._loaded_pages.values():\n+ for insight in page_insights:\n+ all_ids.add(insight.id)\n+ return all_ids\n+\n+ def _find_insight_by_id(self, insight_id: int) -> Insight | None:\n+ \"\"\"Find an insight by ID across all loaded pages.\"\"\"\n+ for page_insights in self._loaded_pages.values():\n+ for insight in page_insights:\n+ if insight.id == insight_id:\n+ return insight\n+ return None\n \n- # Look for numbers in the response\n+ def _parse_insight_ids(self, response_content: str) -> list[int]:\n+ \"\"\"Parse insight IDs from LLM response, removing duplicates and preserving order.\"\"\"\n numbers = re.findall(r\"\\b\\d+\\b\", response_content)\n \n # Convert to integers and validate against available insights\n- available_ids = {insight[\"insight_id\"] for insight in self._all_insights}\n+ available_ids = self._get_all_loaded_insight_ids()\n valid_ids = []\n+ seen_ids = set()\n \n for num_str in numbers:\n- insight_id = int(num_str)\n- if insight_id in available_ids:\n- valid_ids.append(insight_id)\n+ try:\n+ insight_id = int(num_str)\n+ if insight_id in available_ids and insight_id not in seen_ids:\n+ valid_ids.append(insight_id)\n+ seen_ids.add(insight_id)\n+ # Stop if we've found enough unique insights\n+ if len(valid_ids) >= self._max_insights:\n+ break\n+ except ValueError:\n+ continue\n+\n+ return valid_ids\n+\n+ def _create_enhanced_insight_summary(self, insight: Insight) -> str:\n+ \"\"\"Create enhanced summary with metadata and basic execution info.\"\"\"\n+ insight_id = insight.id\n+ name = insight.name or insight.derived_name or \"Unnamed\"\n+ description = insight.description or \"\"\n+\n+ insight_type = \"Unknown\"\n+ if insight.query:\n+ try:\n+ query_dict = json.loads(insight.query) if isinstance(insight.query, str) else insight.query\n+ query_kind = query_dict.get(\"kind\", \"Unknown\")\n+\n+ if query_kind == \"DataVisualizationNode\":\n+ source = query_dict.get(\"source\", {})\n+ if source.get(\"kind\") == \"HogQLQuery\":\n+ insight_type = \"HogQL\"\n+ else:\n+ insight_type = \"DataVisualization\"\n+ else:\n+ insight_type = query_kind.replace(\"Query\", \"\")\n+ except Exception:\n+ insight_type = \"Unknown\"\n \n- # Limit to 3 insights\n- return valid_ids[:3]\n+ # Check if insight can be visualized\n+ can_viz = bool(insight.query)\n+ viz_status = \"\u2713 Executable\" if can_viz else \"\u2717 Not executable\"\n \n- def _format_search_results(self, selected_insights: list[int], search_query: str) -> str:\n- \"\"\"Format final search results for display.\"\"\"\n- if not selected_insights:\n- return f\"No insights found matching '{search_query or 'your search'}'.\n\nSuggest that the user try:\n- Using different keywords\n- Searching for broader terms\n- Creating a new insight instead\"\n+ # Get basic query info without executing\n+ query_info = self._get_basic_query_info_from_insight(insight)\n \n- insight_details = []\n- for insight_id in selected_insights:\n- insight = next((i for i in self._all_insights if i[\"insight_id\"] == insight_id), None)\n- if insight:\n- insight_details.append(insight)\n+ insight_url = f\"/project/{self._team.id}/insights/{insight.short_id}\"\n+ hyperlink_format = f\"[{name}]({insight_url})\"\n \n- header = f\"Found {len(insight_details)} insight{'s' if len(insight_details) != 1 else ''}\"\n- if search_query:\n- header += f\" matching '{search_query}'\"\n- header += \":\n\n\"\n+ summary_parts = [f\"ID: {insight_id} | {name} | {hyperlink_format}\", f\"Type: {insight_type} | {viz_status}\"]\n \n- formatted_results = []\n- for i, insight in enumerate(insight_details, 1):\n- name = insight.get(\"insight__name\") or insight.get(\"insight__derived_name\", \"Unnamed Insight\")\n- description = insight.get(\"insight__description\")\n- insight_short_id = insight.get(\"insight__short_id\")\n- insight_url = f\"/project/{self._team.project_id}/insights/{insight_short_id}\"\n+ if description:\n+ summary_parts.append(f\"Description: {description}\")\n+\n+ if query_info:\n+ summary_parts.append(f\"Query: {query_info}\")\n+\n+ return \" | \".join(summary_parts)\n+\n+ def _get_basic_query_info_from_insight(self, insight: Insight) -> str | None:\n+ \"\"\"Extract basic query information from Insight object without execution.\"\"\"\n+ try:\n+ query_dict = None\n+\n+ # Parse query\n+ if insight.query:\n+ if isinstance(insight.query, str):\n+ query_dict = json.loads(insight.query)\n+ elif isinstance(insight.query, dict):\n+ query_dict = insight.query\n+\n+ if not query_dict:\n+ return None\n+\n+ # Extract basic info from query\n+ info_parts = []\n+\n+ # Get events/series info\n+ series = query_dict.get(\"series\", [])\n+ if series:\n+ events = []\n+ for s in series:\n+ if isinstance(s, dict):\n+ event_name = s.get(\"event\", s.get(\"name\", \"Unknown\"))\n+ events.append(event_name)\n+ if events:\n+ # Limit to first 3 for LLM context window\n+ info_parts.append(f\"Events: {', '.join(events[:3])}\")\n+\n+ # Get date range info\n+ date_range = query_dict.get(\"dateRange\", {})\n+ if date_range:\n+ date_from = date_range.get(\"date_from\", \"\")\n+ if date_from:\n+ info_parts.append(f\"Period: {date_from}\")\n+\n+ return \" | \".join(info_parts) if info_parts else None\n+\n+ except Exception:\n+ return \"Query error\"\n+\n+ def _process_insight_for_evaluation(self, insight: Insight, query_executor: AssistantQueryExecutor) -> dict:",
|
|
"repo_full_name": "PostHog/posthog",
|
|
"discussion_comments": [
|
|
{
|
|
"comment_id": "2268157563",
|
|
"repo_full_name": "PostHog/posthog",
|
|
"pr_number": 35726,
|
|
"pr_file": "ee/hogai/graph/insights/nodes.py",
|
|
"discussion_id": "2268157563",
|
|
"commented_code": "@@ -242,62 +307,254 @@ def _format_insights_page(self, page_number: int) -> str:\n \n return \"\\n\".join(formatted_insights)\n \n- def _parse_insight_ids(self, response_content: str) -> list[int]:\n- \"\"\"Parse insight IDs from LLM response.\"\"\"\n- import re\n+ def _get_all_loaded_insight_ids(self) -> set[int]:\n+ \"\"\"Get all insight IDs from loaded pages.\"\"\"\n+ all_ids = set()\n+ for page_insights in self._loaded_pages.values():\n+ for insight in page_insights:\n+ all_ids.add(insight.id)\n+ return all_ids\n+\n+ def _find_insight_by_id(self, insight_id: int) -> Insight | None:\n+ \"\"\"Find an insight by ID across all loaded pages.\"\"\"\n+ for page_insights in self._loaded_pages.values():\n+ for insight in page_insights:\n+ if insight.id == insight_id:\n+ return insight\n+ return None\n \n- # Look for numbers in the response\n+ def _parse_insight_ids(self, response_content: str) -> list[int]:\n+ \"\"\"Parse insight IDs from LLM response, removing duplicates and preserving order.\"\"\"\n numbers = re.findall(r\"\\b\\d+\\b\", response_content)\n \n # Convert to integers and validate against available insights\n- available_ids = {insight[\"insight_id\"] for insight in self._all_insights}\n+ available_ids = self._get_all_loaded_insight_ids()\n valid_ids = []\n+ seen_ids = set()\n \n for num_str in numbers:\n- insight_id = int(num_str)\n- if insight_id in available_ids:\n- valid_ids.append(insight_id)\n+ try:\n+ insight_id = int(num_str)\n+ if insight_id in available_ids and insight_id not in seen_ids:\n+ valid_ids.append(insight_id)\n+ seen_ids.add(insight_id)\n+ # Stop if we've found enough unique insights\n+ if len(valid_ids) >= self._max_insights:\n+ break\n+ except ValueError:\n+ continue\n+\n+ return valid_ids\n+\n+ def _create_enhanced_insight_summary(self, insight: Insight) -> str:\n+ \"\"\"Create enhanced summary with metadata and basic execution info.\"\"\"\n+ insight_id = insight.id\n+ name = insight.name or insight.derived_name or \"Unnamed\"\n+ description = insight.description or \"\"\n+\n+ insight_type = \"Unknown\"\n+ if insight.query:\n+ try:\n+ query_dict = json.loads(insight.query) if isinstance(insight.query, str) else insight.query\n+ query_kind = query_dict.get(\"kind\", \"Unknown\")\n+\n+ if query_kind == \"DataVisualizationNode\":\n+ source = query_dict.get(\"source\", {})\n+ if source.get(\"kind\") == \"HogQLQuery\":\n+ insight_type = \"HogQL\"\n+ else:\n+ insight_type = \"DataVisualization\"\n+ else:\n+ insight_type = query_kind.replace(\"Query\", \"\")\n+ except Exception:\n+ insight_type = \"Unknown\"\n \n- # Limit to 3 insights\n- return valid_ids[:3]\n+ # Check if insight can be visualized\n+ can_viz = bool(insight.query)\n+ viz_status = \"\u2713 Executable\" if can_viz else \"\u2717 Not executable\"\n \n- def _format_search_results(self, selected_insights: list[int], search_query: str) -> str:\n- \"\"\"Format final search results for display.\"\"\"\n- if not selected_insights:\n- return f\"No insights found matching '{search_query or 'your search'}'.\\n\\nSuggest that the user try:\\n- Using different keywords\\n- Searching for broader terms\\n- Creating a new insight instead\"\n+ # Get basic query info without executing\n+ query_info = self._get_basic_query_info_from_insight(insight)\n \n- insight_details = []\n- for insight_id in selected_insights:\n- insight = next((i for i in self._all_insights if i[\"insight_id\"] == insight_id), None)\n- if insight:\n- insight_details.append(insight)\n+ insight_url = f\"/project/{self._team.id}/insights/{insight.short_id}\"\n+ hyperlink_format = f\"[{name}]({insight_url})\"\n \n- header = f\"Found {len(insight_details)} insight{'s' if len(insight_details) != 1 else ''}\"\n- if search_query:\n- header += f\" matching '{search_query}'\"\n- header += \":\\n\\n\"\n+ summary_parts = [f\"ID: {insight_id} | {name} | {hyperlink_format}\", f\"Type: {insight_type} | {viz_status}\"]\n \n- formatted_results = []\n- for i, insight in enumerate(insight_details, 1):\n- name = insight.get(\"insight__name\") or insight.get(\"insight__derived_name\", \"Unnamed Insight\")\n- description = insight.get(\"insight__description\")\n- insight_short_id = insight.get(\"insight__short_id\")\n- insight_url = f\"/project/{self._team.project_id}/insights/{insight_short_id}\"\n+ if description:\n+ summary_parts.append(f\"Description: {description}\")\n+\n+ if query_info:\n+ summary_parts.append(f\"Query: {query_info}\")\n+\n+ return \" | \".join(summary_parts)\n+\n+ def _get_basic_query_info_from_insight(self, insight: Insight) -> str | None:\n+ \"\"\"Extract basic query information from Insight object without execution.\"\"\"\n+ try:\n+ query_dict = None\n+\n+ # Parse query\n+ if insight.query:\n+ if isinstance(insight.query, str):\n+ query_dict = json.loads(insight.query)\n+ elif isinstance(insight.query, dict):\n+ query_dict = insight.query\n+\n+ if not query_dict:\n+ return None\n+\n+ # Extract basic info from query\n+ info_parts = []\n+\n+ # Get events/series info\n+ series = query_dict.get(\"series\", [])\n+ if series:\n+ events = []\n+ for s in series:\n+ if isinstance(s, dict):\n+ event_name = s.get(\"event\", s.get(\"name\", \"Unknown\"))\n+ events.append(event_name)\n+ if events:\n+ # Limit to first 3 for LLM context window\n+ info_parts.append(f\"Events: {', '.join(events[:3])}\")\n+\n+ # Get date range info\n+ date_range = query_dict.get(\"dateRange\", {})\n+ if date_range:\n+ date_from = date_range.get(\"date_from\", \"\")\n+ if date_from:\n+ info_parts.append(f\"Period: {date_from}\")\n+\n+ return \" | \".join(info_parts) if info_parts else None\n+\n+ except Exception:\n+ return \"Query error\"\n+\n+ def _process_insight_for_evaluation(self, insight: Insight, query_executor: AssistantQueryExecutor) -> dict:",
|
|
"comment_created_at": "2025-08-11T22:35:07+00:00",
|
|
"comment_author": "kappa90",
|
|
"comment_body": "General nit comment: if these functions could be split up into smaller functions, it would improve readability and facilitate the review process. E.g. this function right now does the query parsing, query execution, and visualization message creation, these could have been different functions, e.g. (suggestion from Claude):\r\n\r\n```python\r\ndef _process_insight_for_evaluation(self, insight: Insight, query_executor: AssistantQueryExecutor) -> dict:\r\n insight_info = self._create_base_insight_info(insight)\r\n \r\n try:\r\n query_dict = self._parse_insight_query(insight)\r\n if query_dict:\r\n self._execute_and_update_info(insight_info, query_dict, query_executor)\r\n self._add_visualization_message(insight_info, insight)\r\n else:\r\n self._handle_no_query(insight_info, insight)\r\n except Exception as e:\r\n self._handle_evaluation_error(insight_info, insight, e)\r\n \r\n return insight_info\r\n\r\ndef _parse_insight_query(self, insight: Insight) -> dict | None:\r\n # Separate method for query parsing\r\n pass\r\n\r\ndef _execute_and_update_info(self, insight_info: dict, query_dict: dict, executor):\r\n # Separate method for execution\r\n pass\r\n```\r\n\r\n",
|
|
"pr_file_module": null
|
|
},
|
|
{
|
|
"comment_id": "2273401943",
|
|
"repo_full_name": "PostHog/posthog",
|
|
"pr_number": 35726,
|
|
"pr_file": "ee/hogai/graph/insights/nodes.py",
|
|
"discussion_id": "2268157563",
|
|
"commented_code": "@@ -242,62 +307,254 @@ def _format_insights_page(self, page_number: int) -> str:\n \n return \"\\n\".join(formatted_insights)\n \n- def _parse_insight_ids(self, response_content: str) -> list[int]:\n- \"\"\"Parse insight IDs from LLM response.\"\"\"\n- import re\n+ def _get_all_loaded_insight_ids(self) -> set[int]:\n+ \"\"\"Get all insight IDs from loaded pages.\"\"\"\n+ all_ids = set()\n+ for page_insights in self._loaded_pages.values():\n+ for insight in page_insights:\n+ all_ids.add(insight.id)\n+ return all_ids\n+\n+ def _find_insight_by_id(self, insight_id: int) -> Insight | None:\n+ \"\"\"Find an insight by ID across all loaded pages.\"\"\"\n+ for page_insights in self._loaded_pages.values():\n+ for insight in page_insights:\n+ if insight.id == insight_id:\n+ return insight\n+ return None\n \n- # Look for numbers in the response\n+ def _parse_insight_ids(self, response_content: str) -> list[int]:\n+ \"\"\"Parse insight IDs from LLM response, removing duplicates and preserving order.\"\"\"\n numbers = re.findall(r\"\\b\\d+\\b\", response_content)\n \n # Convert to integers and validate against available insights\n- available_ids = {insight[\"insight_id\"] for insight in self._all_insights}\n+ available_ids = self._get_all_loaded_insight_ids()\n valid_ids = []\n+ seen_ids = set()\n \n for num_str in numbers:\n- insight_id = int(num_str)\n- if insight_id in available_ids:\n- valid_ids.append(insight_id)\n+ try:\n+ insight_id = int(num_str)\n+ if insight_id in available_ids and insight_id not in seen_ids:\n+ valid_ids.append(insight_id)\n+ seen_ids.add(insight_id)\n+ # Stop if we've found enough unique insights\n+ if len(valid_ids) >= self._max_insights:\n+ break\n+ except ValueError:\n+ continue\n+\n+ return valid_ids\n+\n+ def _create_enhanced_insight_summary(self, insight: Insight) -> str:\n+ \"\"\"Create enhanced summary with metadata and basic execution info.\"\"\"\n+ insight_id = insight.id\n+ name = insight.name or insight.derived_name or \"Unnamed\"\n+ description = insight.description or \"\"\n+\n+ insight_type = \"Unknown\"\n+ if insight.query:\n+ try:\n+ query_dict = json.loads(insight.query) if isinstance(insight.query, str) else insight.query\n+ query_kind = query_dict.get(\"kind\", \"Unknown\")\n+\n+ if query_kind == \"DataVisualizationNode\":\n+ source = query_dict.get(\"source\", {})\n+ if source.get(\"kind\") == \"HogQLQuery\":\n+ insight_type = \"HogQL\"\n+ else:\n+ insight_type = \"DataVisualization\"\n+ else:\n+ insight_type = query_kind.replace(\"Query\", \"\")\n+ except Exception:\n+ insight_type = \"Unknown\"\n \n- # Limit to 3 insights\n- return valid_ids[:3]\n+ # Check if insight can be visualized\n+ can_viz = bool(insight.query)\n+ viz_status = \"\u2713 Executable\" if can_viz else \"\u2717 Not executable\"\n \n- def _format_search_results(self, selected_insights: list[int], search_query: str) -> str:\n- \"\"\"Format final search results for display.\"\"\"\n- if not selected_insights:\n- return f\"No insights found matching '{search_query or 'your search'}'.\\n\\nSuggest that the user try:\\n- Using different keywords\\n- Searching for broader terms\\n- Creating a new insight instead\"\n+ # Get basic query info without executing\n+ query_info = self._get_basic_query_info_from_insight(insight)\n \n- insight_details = []\n- for insight_id in selected_insights:\n- insight = next((i for i in self._all_insights if i[\"insight_id\"] == insight_id), None)\n- if insight:\n- insight_details.append(insight)\n+ insight_url = f\"/project/{self._team.id}/insights/{insight.short_id}\"\n+ hyperlink_format = f\"[{name}]({insight_url})\"\n \n- header = f\"Found {len(insight_details)} insight{'s' if len(insight_details) != 1 else ''}\"\n- if search_query:\n- header += f\" matching '{search_query}'\"\n- header += \":\\n\\n\"\n+ summary_parts = [f\"ID: {insight_id} | {name} | {hyperlink_format}\", f\"Type: {insight_type} | {viz_status}\"]\n \n- formatted_results = []\n- for i, insight in enumerate(insight_details, 1):\n- name = insight.get(\"insight__name\") or insight.get(\"insight__derived_name\", \"Unnamed Insight\")\n- description = insight.get(\"insight__description\")\n- insight_short_id = insight.get(\"insight__short_id\")\n- insight_url = f\"/project/{self._team.project_id}/insights/{insight_short_id}\"\n+ if description:\n+ summary_parts.append(f\"Description: {description}\")\n+\n+ if query_info:\n+ summary_parts.append(f\"Query: {query_info}\")\n+\n+ return \" | \".join(summary_parts)\n+\n+ def _get_basic_query_info_from_insight(self, insight: Insight) -> str | None:\n+ \"\"\"Extract basic query information from Insight object without execution.\"\"\"\n+ try:\n+ query_dict = None\n+\n+ # Parse query\n+ if insight.query:\n+ if isinstance(insight.query, str):\n+ query_dict = json.loads(insight.query)\n+ elif isinstance(insight.query, dict):\n+ query_dict = insight.query\n+\n+ if not query_dict:\n+ return None\n+\n+ # Extract basic info from query\n+ info_parts = []\n+\n+ # Get events/series info\n+ series = query_dict.get(\"series\", [])\n+ if series:\n+ events = []\n+ for s in series:\n+ if isinstance(s, dict):\n+ event_name = s.get(\"event\", s.get(\"name\", \"Unknown\"))\n+ events.append(event_name)\n+ if events:\n+ # Limit to first 3 for LLM context window\n+ info_parts.append(f\"Events: {', '.join(events[:3])}\")\n+\n+ # Get date range info\n+ date_range = query_dict.get(\"dateRange\", {})\n+ if date_range:\n+ date_from = date_range.get(\"date_from\", \"\")\n+ if date_from:\n+ info_parts.append(f\"Period: {date_from}\")\n+\n+ return \" | \".join(info_parts) if info_parts else None\n+\n+ except Exception:\n+ return \"Query error\"\n+\n+ def _process_insight_for_evaluation(self, insight: Insight, query_executor: AssistantQueryExecutor) -> dict:",
|
|
"comment_created_at": "2025-08-13T13:03:12+00:00",
|
|
"comment_author": "tatoalo",
|
|
"comment_body": "Yeah great point. Refactored to split the meatier chunks into smaller components, should be better now!",
|
|
"pr_file_module": null
|
|
}
|
|
]
|
|
},
|
|
{
|
|
"discussion_id": "2257584861",
|
|
"pr_number": 35960,
|
|
"pr_file": "ee/api/session_summaries.py",
|
|
"created_at": "2025-08-06T15:37:29+00:00",
|
|
"commented_code": "session_ids = serializer.validated_data[\"session_ids\"]\n focus_area = serializer.validated_data.get(\"focus_area\")\n # Check that sessions exist and get min/max timestamps for follow-up queries\n- min_timestamp, max_timestamp = self._find_sessions_timestamps(session_ids)\n+ min_timestamp, max_timestamp = find_sessions_timestamps(session_ids=session_ids, team=self.team)\n # Prepare extra context, if provided\n extra_summary_context = None\n if focus_area:\n extra_summary_context = ExtraSummaryContext(focus_area=focus_area)\n \n- # Summarize provided sessions\n- try:\n- summary = execute_summarize_session_group(\n+ # Helper function to consume the async generator and return a list\n+ async def _execute_and_collect_results() -> list[EnrichedSessionGroupSummaryPatternsList | str]:\n+ results = []\n+ async for update in execute_summarize_session_group(\n session_ids=session_ids,\n user_id=user.pk,\n team=self.team,\n min_timestamp=min_timestamp,\n max_timestamp=max_timestamp,\n extra_summary_context=extra_summary_context,\n local_reads_prod=False,\n- )\n+ ):\n+ results.append(update)\n+ return results",
|
|
"repo_full_name": "PostHog/posthog",
|
|
"discussion_comments": [
|
|
{
|
|
"comment_id": "2257584861",
|
|
"repo_full_name": "PostHog/posthog",
|
|
"pr_number": 35960,
|
|
"pr_file": "ee/api/session_summaries.py",
|
|
"discussion_id": "2257584861",
|
|
"commented_code": "@@ -70,23 +72,41 @@ def create_session_summaries(self, request: Request, **kwargs) -> Response:\n session_ids = serializer.validated_data[\"session_ids\"]\n focus_area = serializer.validated_data.get(\"focus_area\")\n # Check that sessions exist and get min/max timestamps for follow-up queries\n- min_timestamp, max_timestamp = self._find_sessions_timestamps(session_ids)\n+ min_timestamp, max_timestamp = find_sessions_timestamps(session_ids=session_ids, team=self.team)\n # Prepare extra context, if provided\n extra_summary_context = None\n if focus_area:\n extra_summary_context = ExtraSummaryContext(focus_area=focus_area)\n \n- # Summarize provided sessions\n- try:\n- summary = execute_summarize_session_group(\n+ # Helper function to consume the async generator and return a list\n+ async def _execute_and_collect_results() -> list[EnrichedSessionGroupSummaryPatternsList | str]:\n+ results = []\n+ async for update in execute_summarize_session_group(\n session_ids=session_ids,\n user_id=user.pk,\n team=self.team,\n min_timestamp=min_timestamp,\n max_timestamp=max_timestamp,\n extra_summary_context=extra_summary_context,\n local_reads_prod=False,\n- )\n+ ):\n+ results.append(update)\n+ return results",
|
|
"comment_created_at": "2025-08-06T15:37:29+00:00",
|
|
"comment_author": "Twixes",
|
|
"comment_body": "If possible, it'd be clearer to have this helper as a viewset helper method, as functions nested in functions are not a clean pattern in Python",
|
|
"pr_file_module": null
|
|
},
|
|
{
|
|
"comment_id": "2262628608",
|
|
"repo_full_name": "PostHog/posthog",
|
|
"pr_number": 35960,
|
|
"pr_file": "ee/api/session_summaries.py",
|
|
"discussion_id": "2257584861",
|
|
"commented_code": "@@ -70,23 +72,41 @@ def create_session_summaries(self, request: Request, **kwargs) -> Response:\n session_ids = serializer.validated_data[\"session_ids\"]\n focus_area = serializer.validated_data.get(\"focus_area\")\n # Check that sessions exist and get min/max timestamps for follow-up queries\n- min_timestamp, max_timestamp = self._find_sessions_timestamps(session_ids)\n+ min_timestamp, max_timestamp = find_sessions_timestamps(session_ids=session_ids, team=self.team)\n # Prepare extra context, if provided\n extra_summary_context = None\n if focus_area:\n extra_summary_context = ExtraSummaryContext(focus_area=focus_area)\n \n- # Summarize provided sessions\n- try:\n- summary = execute_summarize_session_group(\n+ # Helper function to consume the async generator and return a list\n+ async def _execute_and_collect_results() -> list[EnrichedSessionGroupSummaryPatternsList | str]:\n+ results = []\n+ async for update in execute_summarize_session_group(\n session_ids=session_ids,\n user_id=user.pk,\n team=self.team,\n min_timestamp=min_timestamp,\n max_timestamp=max_timestamp,\n extra_summary_context=extra_summary_context,\n local_reads_prod=False,\n- )\n+ ):\n+ results.append(update)\n+ return results",
|
|
"comment_created_at": "2025-08-08T10:54:35+00:00",
|
|
"comment_author": "sortafreel",
|
|
"comment_body": "Sure, makes sense. https://github.com/PostHog/posthog/pull/35960/commits/affb95e38a30848f26762bc99f3f9237a60bbb8a",
|
|
"pr_file_module": null
|
|
}
|
|
]
|
|
},
|
|
{
|
|
"discussion_id": "2280596400",
|
|
"pr_number": 36751,
|
|
"pr_file": "posthog/utils.py",
|
|
"created_at": "2025-08-16T20:50:53+00:00",
|
|
"commented_code": "return settings.JS_URL\n \n \n-def render_template(\n- template_name: str,\n+def get_context_for_template(",
|
|
"repo_full_name": "PostHog/posthog",
|
|
"discussion_comments": [
|
|
{
|
|
"comment_id": "2280596400",
|
|
"repo_full_name": "PostHog/posthog",
|
|
"pr_number": 36751,
|
|
"pr_file": "posthog/utils.py",
|
|
"discussion_id": "2280596400",
|
|
"commented_code": "@@ -320,29 +320,20 @@ def get_js_url(request: HttpRequest) -> str:\n return settings.JS_URL\n \n \n-def render_template(\n- template_name: str,\n+def get_context_for_template(",
|
|
"comment_created_at": "2025-08-16T20:50:53+00:00",
|
|
"comment_author": "pauldambra",
|
|
"comment_body": "pulled the context generation into its own method so i could test the generated value directly",
|
|
"pr_file_module": null
|
|
}
|
|
]
|
|
},
|
|
{
|
|
"discussion_id": "2279118766",
|
|
"pr_number": 36685,
|
|
"pr_file": "posthog/api/sharing.py",
|
|
"created_at": "2025-08-15T14:42:22+00:00",
|
|
"commented_code": "asset_description = resource.dashboard.description or \"\"\n resource.dashboard.last_accessed_at = now()\n resource.dashboard.save(update_fields=[\"last_accessed_at\"])\n+\n+ insights = (\n+ Insight.objects.filter(dashboard_tiles__dashboard=resource.dashboard).distinct().only(\"query_metadata\")\n+ )\n+ for insight in insights.iterator(chunk_size=100):",
|
|
"repo_full_name": "PostHog/posthog",
|
|
"discussion_comments": [
|
|
{
|
|
"comment_id": "2279118766",
|
|
"repo_full_name": "PostHog/posthog",
|
|
"pr_number": 36685,
|
|
"pr_file": "posthog/api/sharing.py",
|
|
"discussion_id": "2279118766",
|
|
"commented_code": "@@ -395,6 +411,26 @@ def retrieve(self, request: Request, *args: Any, **kwargs: Any) -> Any:\n asset_description = resource.dashboard.description or \"\"\n resource.dashboard.last_accessed_at = now()\n resource.dashboard.save(update_fields=[\"last_accessed_at\"])\n+\n+ insights = (\n+ Insight.objects.filter(dashboard_tiles__dashboard=resource.dashboard).distinct().only(\"query_metadata\")\n+ )\n+ for insight in insights.iterator(chunk_size=100):",
|
|
"comment_created_at": "2025-08-15T14:42:22+00:00",
|
|
"comment_author": "zlwaterfield",
|
|
"comment_body": "There is a lot of duplication for how the event names are found and log_event_usage is called.\r\n\r\nI think you should extract that all into a method like `log_event_usage_from_insight` and `log_event_usage_from_insights`.\r\n\r\n```\r\ndef log_event_usage_from_insight(insight: Insight, team_id: int, user_id: int):\r\n if insight.query_metadata and insight.query_metadata.get(\"events\", []):\r\n for event_name in insight.query_metadata[\"events\"]:\r\n if not event_name:\r\n continue\r\n try:\r\n log_event_usage(\r\n event_name=event_name,\r\n team_id=resource.team.pk,\r\n user_id=self.request.user.pk if self.request.user.is_authenticated else None,\r\n )\r\n except Exception as e:\r\n # fail silently\r\n capture_exception(e)\r\n\r\ndef log_event_usage_from_insights(insights: QuerySet[Insight], team_id: int, user_id: int):\r\n for insight in insights.iterator(chunk_size=100):\r\n log_event_usage_from_insight(insight, team_id, user_id)\r\n```\r\n\r\n\r\n```",
|
|
"pr_file_module": null
|
|
},
|
|
{
|
|
"comment_id": "2279609997",
|
|
"repo_full_name": "PostHog/posthog",
|
|
"pr_number": 36685,
|
|
"pr_file": "posthog/api/sharing.py",
|
|
"discussion_id": "2279118766",
|
|
"commented_code": "@@ -395,6 +411,26 @@ def retrieve(self, request: Request, *args: Any, **kwargs: Any) -> Any:\n asset_description = resource.dashboard.description or \"\"\n resource.dashboard.last_accessed_at = now()\n resource.dashboard.save(update_fields=[\"last_accessed_at\"])\n+\n+ insights = (\n+ Insight.objects.filter(dashboard_tiles__dashboard=resource.dashboard).distinct().only(\"query_metadata\")\n+ )\n+ for insight in insights.iterator(chunk_size=100):",
|
|
"comment_created_at": "2025-08-15T18:07:22+00:00",
|
|
"comment_author": "nima-karimi",
|
|
"comment_body": "The duplication part was on purpose because there isn't a lot of logic involved. It's just more explicit and IMO makes things easier to debug later. That said, I think having a separate function for insights makes sense since that's the primary use case right now. I think for dashboards, we can leave the for loops where they are used.",
|
|
"pr_file_module": null
|
|
}
|
|
]
|
|
}
|
|
] |