Files
awesome-reviewers/_reviewers/posthog-configuration-constants-management.json
2025-08-19 12:19:58 +00:00

138 lines
56 KiB
JSON

[
{
"discussion_id": "2268153978",
"pr_number": 35726,
"pr_file": "ee/hogai/graph/insights/nodes.py",
"created_at": "2025-08-11T22:32:11+00:00",
"commented_code": "+import json\n+import re\n from typing import Literal\n from uuid import uuid4\n-import time\n \n from langchain_core.runnables import RunnableConfig\n-from langchain_core.messages import HumanMessage, SystemMessage, ToolMessage\n+from langchain_core.messages import BaseMessage, HumanMessage, SystemMessage, ToolMessage\n from langchain_core.tools import tool\n-from langchain_openai import ChatOpenAI\n \n-import structlog\n+from langchain_openai import ChatOpenAI\n \n \n+from ee.hogai.graph.query_executor.query_executor import AssistantQueryExecutor\n+from ee.hogai.graph.root.nodes import MAX_SUPPORTED_QUERY_KIND_TO_MODEL\n from ee.hogai.utils.types import AssistantState, PartialAssistantState\n-from posthog.schema import AssistantToolCallMessage\n+from posthog.schema import (\n+ AssistantToolCallMessage,\n+ VisualizationMessage,\n+ AssistantTrendsQuery,\n+ AssistantFunnelsQuery,\n+ AssistantRetentionQuery,\n+ AssistantHogQLQuery,\n+)\n from ee.hogai.graph.base import AssistantNode\n-from .prompts import ITERATIVE_SEARCH_SYSTEM_PROMPT, ITERATIVE_SEARCH_USER_PROMPT\n-\n-from posthog.models import InsightViewed\n+from .prompts import (\n+ ITERATIVE_SEARCH_SYSTEM_PROMPT,\n+ ITERATIVE_SEARCH_USER_PROMPT,\n+ PAGINATION_INSTRUCTIONS_TEMPLATE,\n+ HYPERLINK_USAGE_INSTRUCTIONS,\n+ TOOL_BASED_EVALUATION_SYSTEM_PROMPT,\n+ NO_INSIGHTS_FOUND_MESSAGE,\n+ SEARCH_ERROR_INSTRUCTIONS,\n+ EMPTY_DATABASE_ERROR_MESSAGE,\n+)\n+\n+from posthog.models import Insight\n+from django.db.models import Max\n+from django.utils import timezone\n+from datetime import timedelta\n \n \n class InsightSearchNode(AssistantNode):\n- logger = structlog.get_logger(__name__)\n-\n def __init__(self, *args, **kwargs):\n super().__init__(*args, **kwargs)\n self._current_page = 0\n- self._page_size = 300\n+ self._page_size = 50",
"repo_full_name": "PostHog/posthog",
"discussion_comments": [
{
"comment_id": "2268153978",
"repo_full_name": "PostHog/posthog",
"pr_number": 35726,
"pr_file": "ee/hogai/graph/insights/nodes.py",
"discussion_id": "2268153978",
"commented_code": "@@ -1,33 +1,58 @@\n+import json\n+import re\n from typing import Literal\n from uuid import uuid4\n-import time\n \n from langchain_core.runnables import RunnableConfig\n-from langchain_core.messages import HumanMessage, SystemMessage, ToolMessage\n+from langchain_core.messages import BaseMessage, HumanMessage, SystemMessage, ToolMessage\n from langchain_core.tools import tool\n-from langchain_openai import ChatOpenAI\n \n-import structlog\n+from langchain_openai import ChatOpenAI\n \n \n+from ee.hogai.graph.query_executor.query_executor import AssistantQueryExecutor\n+from ee.hogai.graph.root.nodes import MAX_SUPPORTED_QUERY_KIND_TO_MODEL\n from ee.hogai.utils.types import AssistantState, PartialAssistantState\n-from posthog.schema import AssistantToolCallMessage\n+from posthog.schema import (\n+ AssistantToolCallMessage,\n+ VisualizationMessage,\n+ AssistantTrendsQuery,\n+ AssistantFunnelsQuery,\n+ AssistantRetentionQuery,\n+ AssistantHogQLQuery,\n+)\n from ee.hogai.graph.base import AssistantNode\n-from .prompts import ITERATIVE_SEARCH_SYSTEM_PROMPT, ITERATIVE_SEARCH_USER_PROMPT\n-\n-from posthog.models import InsightViewed\n+from .prompts import (\n+ ITERATIVE_SEARCH_SYSTEM_PROMPT,\n+ ITERATIVE_SEARCH_USER_PROMPT,\n+ PAGINATION_INSTRUCTIONS_TEMPLATE,\n+ HYPERLINK_USAGE_INSTRUCTIONS,\n+ TOOL_BASED_EVALUATION_SYSTEM_PROMPT,\n+ NO_INSIGHTS_FOUND_MESSAGE,\n+ SEARCH_ERROR_INSTRUCTIONS,\n+ EMPTY_DATABASE_ERROR_MESSAGE,\n+)\n+\n+from posthog.models import Insight\n+from django.db.models import Max\n+from django.utils import timezone\n+from datetime import timedelta\n \n \n class InsightSearchNode(AssistantNode):\n- logger = structlog.get_logger(__name__)\n-\n def __init__(self, *args, **kwargs):\n super().__init__(*args, **kwargs)\n self._current_page = 0\n- self._page_size = 300\n+ self._page_size = 50",
"comment_created_at": "2025-08-11T22:32:11+00:00",
"comment_author": "kappa90",
"comment_body": "nit, these numbers (50, 6, 3, 180), could be class based configs, e.g. `DEFAULT_PAGE_SIZE`",
"pr_file_module": null
},
{
"comment_id": "2273401763",
"repo_full_name": "PostHog/posthog",
"pr_number": 35726,
"pr_file": "ee/hogai/graph/insights/nodes.py",
"discussion_id": "2268153978",
"commented_code": "@@ -1,33 +1,58 @@\n+import json\n+import re\n from typing import Literal\n from uuid import uuid4\n-import time\n \n from langchain_core.runnables import RunnableConfig\n-from langchain_core.messages import HumanMessage, SystemMessage, ToolMessage\n+from langchain_core.messages import BaseMessage, HumanMessage, SystemMessage, ToolMessage\n from langchain_core.tools import tool\n-from langchain_openai import ChatOpenAI\n \n-import structlog\n+from langchain_openai import ChatOpenAI\n \n \n+from ee.hogai.graph.query_executor.query_executor import AssistantQueryExecutor\n+from ee.hogai.graph.root.nodes import MAX_SUPPORTED_QUERY_KIND_TO_MODEL\n from ee.hogai.utils.types import AssistantState, PartialAssistantState\n-from posthog.schema import AssistantToolCallMessage\n+from posthog.schema import (\n+ AssistantToolCallMessage,\n+ VisualizationMessage,\n+ AssistantTrendsQuery,\n+ AssistantFunnelsQuery,\n+ AssistantRetentionQuery,\n+ AssistantHogQLQuery,\n+)\n from ee.hogai.graph.base import AssistantNode\n-from .prompts import ITERATIVE_SEARCH_SYSTEM_PROMPT, ITERATIVE_SEARCH_USER_PROMPT\n-\n-from posthog.models import InsightViewed\n+from .prompts import (\n+ ITERATIVE_SEARCH_SYSTEM_PROMPT,\n+ ITERATIVE_SEARCH_USER_PROMPT,\n+ PAGINATION_INSTRUCTIONS_TEMPLATE,\n+ HYPERLINK_USAGE_INSTRUCTIONS,\n+ TOOL_BASED_EVALUATION_SYSTEM_PROMPT,\n+ NO_INSIGHTS_FOUND_MESSAGE,\n+ SEARCH_ERROR_INSTRUCTIONS,\n+ EMPTY_DATABASE_ERROR_MESSAGE,\n+)\n+\n+from posthog.models import Insight\n+from django.db.models import Max\n+from django.utils import timezone\n+from datetime import timedelta\n \n \n class InsightSearchNode(AssistantNode):\n- logger = structlog.get_logger(__name__)\n-\n def __init__(self, *args, **kwargs):\n super().__init__(*args, **kwargs)\n self._current_page = 0\n- self._page_size = 300\n+ self._page_size = 50",
"comment_created_at": "2025-08-13T13:03:08+00:00",
"comment_author": "tatoalo",
"comment_body": "Makes sense, added!",
"pr_file_module": null
}
]
},
{
"discussion_id": "2267603416",
"pr_number": 36468,
"pr_file": "ee/hogai/tool.py",
"created_at": "2025-08-11T18:09:36+00:00",
"commented_code": "It will be formatted like an f-string, with the tool context as the variables.\n For example, \"The current filters the user is seeing are: {current_filters}.\"\n \"\"\"\n+ show_tool_call_messages: bool = Field(description=\"Whether to show tool call messages.\", default=False)",
"repo_full_name": "PostHog/posthog",
"discussion_comments": [
{
"comment_id": "2267603416",
"repo_full_name": "PostHog/posthog",
"pr_number": 36468,
"pr_file": "ee/hogai/tool.py",
"discussion_id": "2267603416",
"commented_code": "@@ -124,6 +124,7 @@ class MaxTool(AssistantContextMixin, BaseTool):\n It will be formatted like an f-string, with the tool context as the variables.\n For example, \"The current filters the user is seeing are: {current_filters}.\"\n \"\"\"\n+ show_tool_call_messages: bool = Field(description=\"Whether to show tool call messages.\", default=False)",
"comment_created_at": "2025-08-11T18:09:36+00:00",
"comment_author": "denakorita",
"comment_body": "@skoob13 I am not certain about whether we should set the default to `False` here or not. \r\nIf we set it to `False` then I have to then go and change every tool we have atm, not sure if we want to do that, there might be tools that are better off with more generic messages. What do you think? \r\nI would say to avoid issues I proceed with default as `True` and we only set it to `False` for the session_recordings, since it is the one using the taxonomy agent which is more informative.",
"pr_file_module": null
},
{
"comment_id": "2269244812",
"repo_full_name": "PostHog/posthog",
"pr_number": 36468,
"pr_file": "ee/hogai/tool.py",
"discussion_id": "2267603416",
"commented_code": "@@ -124,6 +124,7 @@ class MaxTool(AssistantContextMixin, BaseTool):\n It will be formatted like an f-string, with the tool context as the variables.\n For example, \"The current filters the user is seeing are: {current_filters}.\"\n \"\"\"\n+ show_tool_call_messages: bool = Field(description=\"Whether to show tool call messages.\", default=False)",
"comment_created_at": "2025-08-12T09:22:28+00:00",
"comment_author": "skoob13",
"comment_body": "Yep, I agree. Let's change the default to True.",
"pr_file_module": null
}
]
},
{
"discussion_id": "2251719937",
"pr_number": 35960,
"pr_file": "ee/hogai/graph/session_summaries/nodes.py",
"created_at": "2025-08-04T14:42:57+00:00",
"commented_code": "+import asyncio\n+import time\n+from typing import cast, Literal, Any\n+from uuid import uuid4\n+\n+import structlog\n+from asgiref.sync import async_to_sync\n+from langchain_core.runnables import RunnableConfig\n+\n+from ee.hogai.graph.base import AssistantNode\n+from ee.hogai.session_summaries.constants import SESSION_SUMMARIES_STREAMING_MODEL\n+from ee.hogai.session_summaries.session_group.summarize_session_group import find_sessions_timestamps\n+from ee.hogai.session_summaries.session_group.summary_notebooks import create_summary_notebook\n+from ee.hogai.utils.types import AssistantState, PartialAssistantState\n+from posthog.schema import MaxRecordingUniversalFilters, RecordingsQuery, AssistantToolCallMessage\n+from posthog.temporal.ai.session_summary.summarize_session import execute_summarize_session\n+from posthog.temporal.ai.session_summary.summarize_session_group import execute_summarize_session_group\n+\n+\n+class SessionSummarizationNode(AssistantNode):\n+ logger = structlog.get_logger(__name__)\n+\n+ def __init__(self, *args, **kwargs):\n+ super().__init__(*args, **kwargs)\n+\n+ async def _generate_replay_filters(self, plain_text_query: str) -> MaxRecordingUniversalFilters | None:\n+ \"\"\"Generates replay filters to get session ids by querying a compiled Universal filters graph.\"\"\"\n+ from ee.hogai.graph.filter_options.prompts import PRODUCT_DESCRIPTION_PROMPT\n+ from products.replay.backend.prompts import SESSION_REPLAY_RESPONSE_FORMATS_PROMPT\n+ from products.replay.backend.prompts import SESSION_REPLAY_EXAMPLES_PROMPT\n+ from products.replay.backend.prompts import MULTIPLE_FILTERS_PROMPT\n+ from ee.hogai.graph.filter_options.graph import FilterOptionsGraph\n+\n+ # Create the graph with injected prompts\n+ injected_prompts = {\n+ \"product_description_prompt\": PRODUCT_DESCRIPTION_PROMPT,\n+ \"response_formats_prompt\": SESSION_REPLAY_RESPONSE_FORMATS_PROMPT,\n+ \"examples_prompt\": SESSION_REPLAY_EXAMPLES_PROMPT,\n+ \"multiple_filters_prompt\": MULTIPLE_FILTERS_PROMPT,\n+ }\n+ graph = FilterOptionsGraph(self._team, self._user, injected_prompts=injected_prompts).compile_full_graph()\n+ # Call with your query\n+ result = await graph.ainvoke(\n+ {\n+ \"change\": plain_text_query,\n+ \"current_filters\": {}, # Empty state, as we need results from the query-to-filter\n+ }\n+ )\n+ if (\n+ not result\n+ or not isinstance(result, dict)\n+ or not result.get(\"generated_filter_options\")\n+ or not result[\"generated_filter_options\"].get(\"data\")\n+ ):\n+ self.logger.error(\n+ f\"Invalid result from filter options graph: {result}\",\n+ extra={\n+ \"team_id\": getattr(self._team, \"id\", \"unknown\"),\n+ \"user_id\": getattr(self._user, \"id\", \"unknown\"),\n+ \"result\": result,\n+ },\n+ )\n+ return None\n+ # Extract the generated filters\n+ filters_data = result[\"generated_filter_options\"][\"data\"]\n+ if not filters_data:\n+ return None\n+ max_filters = cast(MaxRecordingUniversalFilters, filters_data)\n+ return max_filters\n+\n+ def _get_session_ids_with_filters(self, replay_filters: MaxRecordingUniversalFilters) -> list[str] | None:\n+ from posthog.session_recordings.queries.session_recording_list_from_query import SessionRecordingListFromQuery\n+\n+ # Convert Max filters into recordings query format\n+ properties = []\n+ if replay_filters.filter_group and replay_filters.filter_group.values:\n+ for inner_group in replay_filters.filter_group.values:\n+ if hasattr(inner_group, \"values\"):\n+ properties.extend(inner_group.values)\n+ recordings_query = RecordingsQuery(\n+ date_from=replay_filters.date_from,\n+ date_to=replay_filters.date_to,\n+ properties=properties,\n+ filter_test_accounts=replay_filters.filter_test_accounts,\n+ order=replay_filters.order,\n+ # Handle duration filters\n+ having_predicates=(\n+ [\n+ {\"key\": \"duration\", \"type\": \"recording\", \"operator\": dur.operator, \"value\": dur.value}\n+ for dur in (replay_filters.duration or [])\n+ ]\n+ if replay_filters.duration\n+ else None\n+ ),\n+ )\n+ # Execute the query to get session IDs\n+ query_runner = SessionRecordingListFromQuery(\n+ team=self._team, query=recordings_query, hogql_query_modifiers=None\n+ )\n+ results = query_runner.run()\n+ # Extract session IDs\n+ session_ids = [recording[\"session_id\"] for recording in results.results]\n+ return session_ids if session_ids else None\n+\n+ async def _summarize_sessions_into_content(self, session_ids: list[str]) -> str:\n+ \"\"\"Summarizes the sessions using the provided session IDs.\"\"\"\n+ # If a small amount of sessions - we won't be able to extract lots of patters,\n+ # so it's ok to summarize them one by one and answer fast (without notebook creation)\n+ if len(session_ids) <= 5:\n+ summaries_tasks = [\n+ # As it's used as a direct output, use faster streaming model instead\n+ execute_summarize_session(\n+ session_id=sid,\n+ user_id=self._user.id,\n+ team=self._team,\n+ model_to_use=SESSION_SUMMARIES_STREAMING_MODEL,\n+ )\n+ for sid in session_ids\n+ ]\n+ summaries: list[str] = await asyncio.gather(*summaries_tasks)\n+ # TODO: Add layer to convert JSON into more readable text for Max to returns to user\n+ content = \"\n\".join(summaries)\n+ return content\n+ # If a large amount of sessions - we will summarize them in a group and create a notebook\n+ # to provide a more detailed overview of the patterns and insights.\n+ min_timestamp, max_timestamp = find_sessions_timestamps(session_ids=session_ids, team=self._team)\n+ summary = execute_summarize_session_group(\n+ session_ids=session_ids,\n+ user_id=self._user.pk,\n+ team=self._team,\n+ min_timestamp=min_timestamp,\n+ max_timestamp=max_timestamp,\n+ extra_summary_context=None,\n+ local_reads_prod=False,\n+ )\n+ create_summary_notebook(session_ids=session_ids, user=self._user, team=self._team, summary=summary)\n+ content = summary.model_dump_json(exclude_none=True)\n+ return content\n+\n+ def run(self, state: AssistantState, config: RunnableConfig) -> PartialAssistantState | None:\n+ start_time = time.time()\n+ conversation_id = config.get(\"configurable\", {}).get(\"thread_id\", \"unknown\")\n+ # If query was not provided for some reason\n+ if not state.session_summarization_query:\n+ self._log_failure(\n+ f\"Session summarization query is not provided: {state.session_summarization_query}\",\n+ conversation_id,\n+ start_time,\n+ )\n+ return self._create_error_response(self._base_error_instructions, state.root_tool_call_id)\n+ try:\n+ # Generate filters to get session ids from DB\n+ replay_filters = async_to_sync(self._generate_replay_filters)(state.session_summarization_query)\n+ if not replay_filters:\n+ self._log_failure(\n+ f\"No Replay filters were generated for session summarization: {state.session_summarization_query}\",\n+ conversation_id,\n+ start_time,\n+ )\n+ return self._create_error_response(self._base_error_instructions, state.root_tool_call_id)\n+ # Query the filters to get session ids\n+ session_ids = self._get_session_ids_with_filters(replay_filters)\n+ # TODO: Remove after testing\n+ # Limit to 5 to test fast summarization\n+ session_ids = session_ids[:5] if session_ids else []\n+ if not session_ids:\n+ self._log_failure(\n+ f\"No session ids found for the provided filters: {replay_filters}\", conversation_id, start_time\n+ )\n+ return self._create_error_response(self._base_error_instructions, state.root_tool_call_id)\n+ summaries_content = async_to_sync(self._summarize_sessions_into_content)(session_ids)\n+ return PartialAssistantState(\n+ messages=[\n+ AssistantToolCallMessage(\n+ content=summaries_content,\n+ tool_call_id=state.root_tool_call_id or \"unknown\",\n+ id=str(uuid4()),\n+ ),\n+ ],\n+ session_summarization_query=None,\n+ root_tool_call_id=None,\n+ )\n+ except Exception as err:\n+ self._log_failure(\"Session summarization failed\", conversation_id, start_time, err)\n+ return self._create_error_response(self._base_error_instructions, state.root_tool_call_id)\n+\n+ def _create_error_response(self, message: str, root_tool_call_id: str | None) -> PartialAssistantState:\n+ return PartialAssistantState(\n+ messages=[\n+ AssistantToolCallMessage(\n+ content=message,\n+ tool_call_id=root_tool_call_id or \"unknown\",\n+ id=str(uuid4()),\n+ ),\n+ ],\n+ session_summarization_query=None,\n+ root_tool_call_id=None,\n+ )\n+\n+ def _log_failure(self, message: str, conversation_id: str, start_time: float, error: Any = None):\n+ self.logger.exception(\n+ message,\n+ extra={\n+ \"team_id\": getattr(self._team, \"id\", \"unknown\"),\n+ \"conversation_id\": conversation_id,\n+ \"execution_time_ms\": round(time.time() - start_time * 1000, 2),\n+ \"error\": str(error) if error else None,\n+ },\n+ )\n+\n+ @property\n+ def _base_error_instructions(self) -> str:\n+ return \"INSTRUCTIONS: Tell the user that you encountered an issue while summarizing the session and suggest they try again with a different question.\"",
"repo_full_name": "PostHog/posthog",
"discussion_comments": [
{
"comment_id": "2251719937",
"repo_full_name": "PostHog/posthog",
"pr_number": 35960,
"pr_file": "ee/hogai/graph/session_summaries/nodes.py",
"discussion_id": "2251719937",
"commented_code": "@@ -0,0 +1,216 @@\n+import asyncio\n+import time\n+from typing import cast, Literal, Any\n+from uuid import uuid4\n+\n+import structlog\n+from asgiref.sync import async_to_sync\n+from langchain_core.runnables import RunnableConfig\n+\n+from ee.hogai.graph.base import AssistantNode\n+from ee.hogai.session_summaries.constants import SESSION_SUMMARIES_STREAMING_MODEL\n+from ee.hogai.session_summaries.session_group.summarize_session_group import find_sessions_timestamps\n+from ee.hogai.session_summaries.session_group.summary_notebooks import create_summary_notebook\n+from ee.hogai.utils.types import AssistantState, PartialAssistantState\n+from posthog.schema import MaxRecordingUniversalFilters, RecordingsQuery, AssistantToolCallMessage\n+from posthog.temporal.ai.session_summary.summarize_session import execute_summarize_session\n+from posthog.temporal.ai.session_summary.summarize_session_group import execute_summarize_session_group\n+\n+\n+class SessionSummarizationNode(AssistantNode):\n+ logger = structlog.get_logger(__name__)\n+\n+ def __init__(self, *args, **kwargs):\n+ super().__init__(*args, **kwargs)\n+\n+ async def _generate_replay_filters(self, plain_text_query: str) -> MaxRecordingUniversalFilters | None:\n+ \"\"\"Generates replay filters to get session ids by querying a compiled Universal filters graph.\"\"\"\n+ from ee.hogai.graph.filter_options.prompts import PRODUCT_DESCRIPTION_PROMPT\n+ from products.replay.backend.prompts import SESSION_REPLAY_RESPONSE_FORMATS_PROMPT\n+ from products.replay.backend.prompts import SESSION_REPLAY_EXAMPLES_PROMPT\n+ from products.replay.backend.prompts import MULTIPLE_FILTERS_PROMPT\n+ from ee.hogai.graph.filter_options.graph import FilterOptionsGraph\n+\n+ # Create the graph with injected prompts\n+ injected_prompts = {\n+ \"product_description_prompt\": PRODUCT_DESCRIPTION_PROMPT,\n+ \"response_formats_prompt\": SESSION_REPLAY_RESPONSE_FORMATS_PROMPT,\n+ \"examples_prompt\": SESSION_REPLAY_EXAMPLES_PROMPT,\n+ \"multiple_filters_prompt\": MULTIPLE_FILTERS_PROMPT,\n+ }\n+ graph = FilterOptionsGraph(self._team, self._user, injected_prompts=injected_prompts).compile_full_graph()\n+ # Call with your query\n+ result = await graph.ainvoke(\n+ {\n+ \"change\": plain_text_query,\n+ \"current_filters\": {}, # Empty state, as we need results from the query-to-filter\n+ }\n+ )\n+ if (\n+ not result\n+ or not isinstance(result, dict)\n+ or not result.get(\"generated_filter_options\")\n+ or not result[\"generated_filter_options\"].get(\"data\")\n+ ):\n+ self.logger.error(\n+ f\"Invalid result from filter options graph: {result}\",\n+ extra={\n+ \"team_id\": getattr(self._team, \"id\", \"unknown\"),\n+ \"user_id\": getattr(self._user, \"id\", \"unknown\"),\n+ \"result\": result,\n+ },\n+ )\n+ return None\n+ # Extract the generated filters\n+ filters_data = result[\"generated_filter_options\"][\"data\"]\n+ if not filters_data:\n+ return None\n+ max_filters = cast(MaxRecordingUniversalFilters, filters_data)\n+ return max_filters\n+\n+ def _get_session_ids_with_filters(self, replay_filters: MaxRecordingUniversalFilters) -> list[str] | None:\n+ from posthog.session_recordings.queries.session_recording_list_from_query import SessionRecordingListFromQuery\n+\n+ # Convert Max filters into recordings query format\n+ properties = []\n+ if replay_filters.filter_group and replay_filters.filter_group.values:\n+ for inner_group in replay_filters.filter_group.values:\n+ if hasattr(inner_group, \"values\"):\n+ properties.extend(inner_group.values)\n+ recordings_query = RecordingsQuery(\n+ date_from=replay_filters.date_from,\n+ date_to=replay_filters.date_to,\n+ properties=properties,\n+ filter_test_accounts=replay_filters.filter_test_accounts,\n+ order=replay_filters.order,\n+ # Handle duration filters\n+ having_predicates=(\n+ [\n+ {\"key\": \"duration\", \"type\": \"recording\", \"operator\": dur.operator, \"value\": dur.value}\n+ for dur in (replay_filters.duration or [])\n+ ]\n+ if replay_filters.duration\n+ else None\n+ ),\n+ )\n+ # Execute the query to get session IDs\n+ query_runner = SessionRecordingListFromQuery(\n+ team=self._team, query=recordings_query, hogql_query_modifiers=None\n+ )\n+ results = query_runner.run()\n+ # Extract session IDs\n+ session_ids = [recording[\"session_id\"] for recording in results.results]\n+ return session_ids if session_ids else None\n+\n+ async def _summarize_sessions_into_content(self, session_ids: list[str]) -> str:\n+ \"\"\"Summarizes the sessions using the provided session IDs.\"\"\"\n+ # If a small amount of sessions - we won't be able to extract lots of patters,\n+ # so it's ok to summarize them one by one and answer fast (without notebook creation)\n+ if len(session_ids) <= 5:\n+ summaries_tasks = [\n+ # As it's used as a direct output, use faster streaming model instead\n+ execute_summarize_session(\n+ session_id=sid,\n+ user_id=self._user.id,\n+ team=self._team,\n+ model_to_use=SESSION_SUMMARIES_STREAMING_MODEL,\n+ )\n+ for sid in session_ids\n+ ]\n+ summaries: list[str] = await asyncio.gather(*summaries_tasks)\n+ # TODO: Add layer to convert JSON into more readable text for Max to returns to user\n+ content = \"\\n\".join(summaries)\n+ return content\n+ # If a large amount of sessions - we will summarize them in a group and create a notebook\n+ # to provide a more detailed overview of the patterns and insights.\n+ min_timestamp, max_timestamp = find_sessions_timestamps(session_ids=session_ids, team=self._team)\n+ summary = execute_summarize_session_group(\n+ session_ids=session_ids,\n+ user_id=self._user.pk,\n+ team=self._team,\n+ min_timestamp=min_timestamp,\n+ max_timestamp=max_timestamp,\n+ extra_summary_context=None,\n+ local_reads_prod=False,\n+ )\n+ create_summary_notebook(session_ids=session_ids, user=self._user, team=self._team, summary=summary)\n+ content = summary.model_dump_json(exclude_none=True)\n+ return content\n+\n+ def run(self, state: AssistantState, config: RunnableConfig) -> PartialAssistantState | None:\n+ start_time = time.time()\n+ conversation_id = config.get(\"configurable\", {}).get(\"thread_id\", \"unknown\")\n+ # If query was not provided for some reason\n+ if not state.session_summarization_query:\n+ self._log_failure(\n+ f\"Session summarization query is not provided: {state.session_summarization_query}\",\n+ conversation_id,\n+ start_time,\n+ )\n+ return self._create_error_response(self._base_error_instructions, state.root_tool_call_id)\n+ try:\n+ # Generate filters to get session ids from DB\n+ replay_filters = async_to_sync(self._generate_replay_filters)(state.session_summarization_query)\n+ if not replay_filters:\n+ self._log_failure(\n+ f\"No Replay filters were generated for session summarization: {state.session_summarization_query}\",\n+ conversation_id,\n+ start_time,\n+ )\n+ return self._create_error_response(self._base_error_instructions, state.root_tool_call_id)\n+ # Query the filters to get session ids\n+ session_ids = self._get_session_ids_with_filters(replay_filters)\n+ # TODO: Remove after testing\n+ # Limit to 5 to test fast summarization\n+ session_ids = session_ids[:5] if session_ids else []\n+ if not session_ids:\n+ self._log_failure(\n+ f\"No session ids found for the provided filters: {replay_filters}\", conversation_id, start_time\n+ )\n+ return self._create_error_response(self._base_error_instructions, state.root_tool_call_id)\n+ summaries_content = async_to_sync(self._summarize_sessions_into_content)(session_ids)\n+ return PartialAssistantState(\n+ messages=[\n+ AssistantToolCallMessage(\n+ content=summaries_content,\n+ tool_call_id=state.root_tool_call_id or \"unknown\",\n+ id=str(uuid4()),\n+ ),\n+ ],\n+ session_summarization_query=None,\n+ root_tool_call_id=None,\n+ )\n+ except Exception as err:\n+ self._log_failure(\"Session summarization failed\", conversation_id, start_time, err)\n+ return self._create_error_response(self._base_error_instructions, state.root_tool_call_id)\n+\n+ def _create_error_response(self, message: str, root_tool_call_id: str | None) -> PartialAssistantState:\n+ return PartialAssistantState(\n+ messages=[\n+ AssistantToolCallMessage(\n+ content=message,\n+ tool_call_id=root_tool_call_id or \"unknown\",\n+ id=str(uuid4()),\n+ ),\n+ ],\n+ session_summarization_query=None,\n+ root_tool_call_id=None,\n+ )\n+\n+ def _log_failure(self, message: str, conversation_id: str, start_time: float, error: Any = None):\n+ self.logger.exception(\n+ message,\n+ extra={\n+ \"team_id\": getattr(self._team, \"id\", \"unknown\"),\n+ \"conversation_id\": conversation_id,\n+ \"execution_time_ms\": round(time.time() - start_time * 1000, 2),\n+ \"error\": str(error) if error else None,\n+ },\n+ )\n+\n+ @property\n+ def _base_error_instructions(self) -> str:\n+ return \"INSTRUCTIONS: Tell the user that you encountered an issue while summarizing the session and suggest they try again with a different question.\"",
"comment_created_at": "2025-08-04T14:42:57+00:00",
"comment_author": "Twixes",
"comment_body": " As this is not parametrized, it should probably be a `BASE_ERROR_INSTRUCTIONS` global const",
"pr_file_module": null
},
{
"comment_id": "2256801952",
"repo_full_name": "PostHog/posthog",
"pr_number": 35960,
"pr_file": "ee/hogai/graph/session_summaries/nodes.py",
"discussion_id": "2251719937",
"commented_code": "@@ -0,0 +1,216 @@\n+import asyncio\n+import time\n+from typing import cast, Literal, Any\n+from uuid import uuid4\n+\n+import structlog\n+from asgiref.sync import async_to_sync\n+from langchain_core.runnables import RunnableConfig\n+\n+from ee.hogai.graph.base import AssistantNode\n+from ee.hogai.session_summaries.constants import SESSION_SUMMARIES_STREAMING_MODEL\n+from ee.hogai.session_summaries.session_group.summarize_session_group import find_sessions_timestamps\n+from ee.hogai.session_summaries.session_group.summary_notebooks import create_summary_notebook\n+from ee.hogai.utils.types import AssistantState, PartialAssistantState\n+from posthog.schema import MaxRecordingUniversalFilters, RecordingsQuery, AssistantToolCallMessage\n+from posthog.temporal.ai.session_summary.summarize_session import execute_summarize_session\n+from posthog.temporal.ai.session_summary.summarize_session_group import execute_summarize_session_group\n+\n+\n+class SessionSummarizationNode(AssistantNode):\n+ logger = structlog.get_logger(__name__)\n+\n+ def __init__(self, *args, **kwargs):\n+ super().__init__(*args, **kwargs)\n+\n+ async def _generate_replay_filters(self, plain_text_query: str) -> MaxRecordingUniversalFilters | None:\n+ \"\"\"Generates replay filters to get session ids by querying a compiled Universal filters graph.\"\"\"\n+ from ee.hogai.graph.filter_options.prompts import PRODUCT_DESCRIPTION_PROMPT\n+ from products.replay.backend.prompts import SESSION_REPLAY_RESPONSE_FORMATS_PROMPT\n+ from products.replay.backend.prompts import SESSION_REPLAY_EXAMPLES_PROMPT\n+ from products.replay.backend.prompts import MULTIPLE_FILTERS_PROMPT\n+ from ee.hogai.graph.filter_options.graph import FilterOptionsGraph\n+\n+ # Create the graph with injected prompts\n+ injected_prompts = {\n+ \"product_description_prompt\": PRODUCT_DESCRIPTION_PROMPT,\n+ \"response_formats_prompt\": SESSION_REPLAY_RESPONSE_FORMATS_PROMPT,\n+ \"examples_prompt\": SESSION_REPLAY_EXAMPLES_PROMPT,\n+ \"multiple_filters_prompt\": MULTIPLE_FILTERS_PROMPT,\n+ }\n+ graph = FilterOptionsGraph(self._team, self._user, injected_prompts=injected_prompts).compile_full_graph()\n+ # Call with your query\n+ result = await graph.ainvoke(\n+ {\n+ \"change\": plain_text_query,\n+ \"current_filters\": {}, # Empty state, as we need results from the query-to-filter\n+ }\n+ )\n+ if (\n+ not result\n+ or not isinstance(result, dict)\n+ or not result.get(\"generated_filter_options\")\n+ or not result[\"generated_filter_options\"].get(\"data\")\n+ ):\n+ self.logger.error(\n+ f\"Invalid result from filter options graph: {result}\",\n+ extra={\n+ \"team_id\": getattr(self._team, \"id\", \"unknown\"),\n+ \"user_id\": getattr(self._user, \"id\", \"unknown\"),\n+ \"result\": result,\n+ },\n+ )\n+ return None\n+ # Extract the generated filters\n+ filters_data = result[\"generated_filter_options\"][\"data\"]\n+ if not filters_data:\n+ return None\n+ max_filters = cast(MaxRecordingUniversalFilters, filters_data)\n+ return max_filters\n+\n+ def _get_session_ids_with_filters(self, replay_filters: MaxRecordingUniversalFilters) -> list[str] | None:\n+ from posthog.session_recordings.queries.session_recording_list_from_query import SessionRecordingListFromQuery\n+\n+ # Convert Max filters into recordings query format\n+ properties = []\n+ if replay_filters.filter_group and replay_filters.filter_group.values:\n+ for inner_group in replay_filters.filter_group.values:\n+ if hasattr(inner_group, \"values\"):\n+ properties.extend(inner_group.values)\n+ recordings_query = RecordingsQuery(\n+ date_from=replay_filters.date_from,\n+ date_to=replay_filters.date_to,\n+ properties=properties,\n+ filter_test_accounts=replay_filters.filter_test_accounts,\n+ order=replay_filters.order,\n+ # Handle duration filters\n+ having_predicates=(\n+ [\n+ {\"key\": \"duration\", \"type\": \"recording\", \"operator\": dur.operator, \"value\": dur.value}\n+ for dur in (replay_filters.duration or [])\n+ ]\n+ if replay_filters.duration\n+ else None\n+ ),\n+ )\n+ # Execute the query to get session IDs\n+ query_runner = SessionRecordingListFromQuery(\n+ team=self._team, query=recordings_query, hogql_query_modifiers=None\n+ )\n+ results = query_runner.run()\n+ # Extract session IDs\n+ session_ids = [recording[\"session_id\"] for recording in results.results]\n+ return session_ids if session_ids else None\n+\n+ async def _summarize_sessions_into_content(self, session_ids: list[str]) -> str:\n+ \"\"\"Summarizes the sessions using the provided session IDs.\"\"\"\n+ # If a small amount of sessions - we won't be able to extract lots of patters,\n+ # so it's ok to summarize them one by one and answer fast (without notebook creation)\n+ if len(session_ids) <= 5:\n+ summaries_tasks = [\n+ # As it's used as a direct output, use faster streaming model instead\n+ execute_summarize_session(\n+ session_id=sid,\n+ user_id=self._user.id,\n+ team=self._team,\n+ model_to_use=SESSION_SUMMARIES_STREAMING_MODEL,\n+ )\n+ for sid in session_ids\n+ ]\n+ summaries: list[str] = await asyncio.gather(*summaries_tasks)\n+ # TODO: Add layer to convert JSON into more readable text for Max to returns to user\n+ content = \"\\n\".join(summaries)\n+ return content\n+ # If a large amount of sessions - we will summarize them in a group and create a notebook\n+ # to provide a more detailed overview of the patterns and insights.\n+ min_timestamp, max_timestamp = find_sessions_timestamps(session_ids=session_ids, team=self._team)\n+ summary = execute_summarize_session_group(\n+ session_ids=session_ids,\n+ user_id=self._user.pk,\n+ team=self._team,\n+ min_timestamp=min_timestamp,\n+ max_timestamp=max_timestamp,\n+ extra_summary_context=None,\n+ local_reads_prod=False,\n+ )\n+ create_summary_notebook(session_ids=session_ids, user=self._user, team=self._team, summary=summary)\n+ content = summary.model_dump_json(exclude_none=True)\n+ return content\n+\n+ def run(self, state: AssistantState, config: RunnableConfig) -> PartialAssistantState | None:\n+ start_time = time.time()\n+ conversation_id = config.get(\"configurable\", {}).get(\"thread_id\", \"unknown\")\n+ # If query was not provided for some reason\n+ if not state.session_summarization_query:\n+ self._log_failure(\n+ f\"Session summarization query is not provided: {state.session_summarization_query}\",\n+ conversation_id,\n+ start_time,\n+ )\n+ return self._create_error_response(self._base_error_instructions, state.root_tool_call_id)\n+ try:\n+ # Generate filters to get session ids from DB\n+ replay_filters = async_to_sync(self._generate_replay_filters)(state.session_summarization_query)\n+ if not replay_filters:\n+ self._log_failure(\n+ f\"No Replay filters were generated for session summarization: {state.session_summarization_query}\",\n+ conversation_id,\n+ start_time,\n+ )\n+ return self._create_error_response(self._base_error_instructions, state.root_tool_call_id)\n+ # Query the filters to get session ids\n+ session_ids = self._get_session_ids_with_filters(replay_filters)\n+ # TODO: Remove after testing\n+ # Limit to 5 to test fast summarization\n+ session_ids = session_ids[:5] if session_ids else []\n+ if not session_ids:\n+ self._log_failure(\n+ f\"No session ids found for the provided filters: {replay_filters}\", conversation_id, start_time\n+ )\n+ return self._create_error_response(self._base_error_instructions, state.root_tool_call_id)\n+ summaries_content = async_to_sync(self._summarize_sessions_into_content)(session_ids)\n+ return PartialAssistantState(\n+ messages=[\n+ AssistantToolCallMessage(\n+ content=summaries_content,\n+ tool_call_id=state.root_tool_call_id or \"unknown\",\n+ id=str(uuid4()),\n+ ),\n+ ],\n+ session_summarization_query=None,\n+ root_tool_call_id=None,\n+ )\n+ except Exception as err:\n+ self._log_failure(\"Session summarization failed\", conversation_id, start_time, err)\n+ return self._create_error_response(self._base_error_instructions, state.root_tool_call_id)\n+\n+ def _create_error_response(self, message: str, root_tool_call_id: str | None) -> PartialAssistantState:\n+ return PartialAssistantState(\n+ messages=[\n+ AssistantToolCallMessage(\n+ content=message,\n+ tool_call_id=root_tool_call_id or \"unknown\",\n+ id=str(uuid4()),\n+ ),\n+ ],\n+ session_summarization_query=None,\n+ root_tool_call_id=None,\n+ )\n+\n+ def _log_failure(self, message: str, conversation_id: str, start_time: float, error: Any = None):\n+ self.logger.exception(\n+ message,\n+ extra={\n+ \"team_id\": getattr(self._team, \"id\", \"unknown\"),\n+ \"conversation_id\": conversation_id,\n+ \"execution_time_ms\": round(time.time() - start_time * 1000, 2),\n+ \"error\": str(error) if error else None,\n+ },\n+ )\n+\n+ @property\n+ def _base_error_instructions(self) -> str:\n+ return \"INSTRUCTIONS: Tell the user that you encountered an issue while summarizing the session and suggest they try again with a different question.\"",
"comment_created_at": "2025-08-06T11:03:14+00:00",
"comment_author": "sortafreel",
"comment_body": "I think it's not parameterized **yet**. With more context/more specialized queries coming - I see it's having way more variety.",
"pr_file_module": null
},
{
"comment_id": "2257630987",
"repo_full_name": "PostHog/posthog",
"pr_number": 35960,
"pr_file": "ee/hogai/graph/session_summaries/nodes.py",
"discussion_id": "2251719937",
"commented_code": "@@ -0,0 +1,216 @@\n+import asyncio\n+import time\n+from typing import cast, Literal, Any\n+from uuid import uuid4\n+\n+import structlog\n+from asgiref.sync import async_to_sync\n+from langchain_core.runnables import RunnableConfig\n+\n+from ee.hogai.graph.base import AssistantNode\n+from ee.hogai.session_summaries.constants import SESSION_SUMMARIES_STREAMING_MODEL\n+from ee.hogai.session_summaries.session_group.summarize_session_group import find_sessions_timestamps\n+from ee.hogai.session_summaries.session_group.summary_notebooks import create_summary_notebook\n+from ee.hogai.utils.types import AssistantState, PartialAssistantState\n+from posthog.schema import MaxRecordingUniversalFilters, RecordingsQuery, AssistantToolCallMessage\n+from posthog.temporal.ai.session_summary.summarize_session import execute_summarize_session\n+from posthog.temporal.ai.session_summary.summarize_session_group import execute_summarize_session_group\n+\n+\n+class SessionSummarizationNode(AssistantNode):\n+ logger = structlog.get_logger(__name__)\n+\n+ def __init__(self, *args, **kwargs):\n+ super().__init__(*args, **kwargs)\n+\n+ async def _generate_replay_filters(self, plain_text_query: str) -> MaxRecordingUniversalFilters | None:\n+ \"\"\"Generates replay filters to get session ids by querying a compiled Universal filters graph.\"\"\"\n+ from ee.hogai.graph.filter_options.prompts import PRODUCT_DESCRIPTION_PROMPT\n+ from products.replay.backend.prompts import SESSION_REPLAY_RESPONSE_FORMATS_PROMPT\n+ from products.replay.backend.prompts import SESSION_REPLAY_EXAMPLES_PROMPT\n+ from products.replay.backend.prompts import MULTIPLE_FILTERS_PROMPT\n+ from ee.hogai.graph.filter_options.graph import FilterOptionsGraph\n+\n+ # Create the graph with injected prompts\n+ injected_prompts = {\n+ \"product_description_prompt\": PRODUCT_DESCRIPTION_PROMPT,\n+ \"response_formats_prompt\": SESSION_REPLAY_RESPONSE_FORMATS_PROMPT,\n+ \"examples_prompt\": SESSION_REPLAY_EXAMPLES_PROMPT,\n+ \"multiple_filters_prompt\": MULTIPLE_FILTERS_PROMPT,\n+ }\n+ graph = FilterOptionsGraph(self._team, self._user, injected_prompts=injected_prompts).compile_full_graph()\n+ # Call with your query\n+ result = await graph.ainvoke(\n+ {\n+ \"change\": plain_text_query,\n+ \"current_filters\": {}, # Empty state, as we need results from the query-to-filter\n+ }\n+ )\n+ if (\n+ not result\n+ or not isinstance(result, dict)\n+ or not result.get(\"generated_filter_options\")\n+ or not result[\"generated_filter_options\"].get(\"data\")\n+ ):\n+ self.logger.error(\n+ f\"Invalid result from filter options graph: {result}\",\n+ extra={\n+ \"team_id\": getattr(self._team, \"id\", \"unknown\"),\n+ \"user_id\": getattr(self._user, \"id\", \"unknown\"),\n+ \"result\": result,\n+ },\n+ )\n+ return None\n+ # Extract the generated filters\n+ filters_data = result[\"generated_filter_options\"][\"data\"]\n+ if not filters_data:\n+ return None\n+ max_filters = cast(MaxRecordingUniversalFilters, filters_data)\n+ return max_filters\n+\n+ def _get_session_ids_with_filters(self, replay_filters: MaxRecordingUniversalFilters) -> list[str] | None:\n+ from posthog.session_recordings.queries.session_recording_list_from_query import SessionRecordingListFromQuery\n+\n+ # Convert Max filters into recordings query format\n+ properties = []\n+ if replay_filters.filter_group and replay_filters.filter_group.values:\n+ for inner_group in replay_filters.filter_group.values:\n+ if hasattr(inner_group, \"values\"):\n+ properties.extend(inner_group.values)\n+ recordings_query = RecordingsQuery(\n+ date_from=replay_filters.date_from,\n+ date_to=replay_filters.date_to,\n+ properties=properties,\n+ filter_test_accounts=replay_filters.filter_test_accounts,\n+ order=replay_filters.order,\n+ # Handle duration filters\n+ having_predicates=(\n+ [\n+ {\"key\": \"duration\", \"type\": \"recording\", \"operator\": dur.operator, \"value\": dur.value}\n+ for dur in (replay_filters.duration or [])\n+ ]\n+ if replay_filters.duration\n+ else None\n+ ),\n+ )\n+ # Execute the query to get session IDs\n+ query_runner = SessionRecordingListFromQuery(\n+ team=self._team, query=recordings_query, hogql_query_modifiers=None\n+ )\n+ results = query_runner.run()\n+ # Extract session IDs\n+ session_ids = [recording[\"session_id\"] for recording in results.results]\n+ return session_ids if session_ids else None\n+\n+ async def _summarize_sessions_into_content(self, session_ids: list[str]) -> str:\n+ \"\"\"Summarizes the sessions using the provided session IDs.\"\"\"\n+ # If a small amount of sessions - we won't be able to extract lots of patters,\n+ # so it's ok to summarize them one by one and answer fast (without notebook creation)\n+ if len(session_ids) <= 5:\n+ summaries_tasks = [\n+ # As it's used as a direct output, use faster streaming model instead\n+ execute_summarize_session(\n+ session_id=sid,\n+ user_id=self._user.id,\n+ team=self._team,\n+ model_to_use=SESSION_SUMMARIES_STREAMING_MODEL,\n+ )\n+ for sid in session_ids\n+ ]\n+ summaries: list[str] = await asyncio.gather(*summaries_tasks)\n+ # TODO: Add layer to convert JSON into more readable text for Max to returns to user\n+ content = \"\\n\".join(summaries)\n+ return content\n+ # If a large amount of sessions - we will summarize them in a group and create a notebook\n+ # to provide a more detailed overview of the patterns and insights.\n+ min_timestamp, max_timestamp = find_sessions_timestamps(session_ids=session_ids, team=self._team)\n+ summary = execute_summarize_session_group(\n+ session_ids=session_ids,\n+ user_id=self._user.pk,\n+ team=self._team,\n+ min_timestamp=min_timestamp,\n+ max_timestamp=max_timestamp,\n+ extra_summary_context=None,\n+ local_reads_prod=False,\n+ )\n+ create_summary_notebook(session_ids=session_ids, user=self._user, team=self._team, summary=summary)\n+ content = summary.model_dump_json(exclude_none=True)\n+ return content\n+\n+ def run(self, state: AssistantState, config: RunnableConfig) -> PartialAssistantState | None:\n+ start_time = time.time()\n+ conversation_id = config.get(\"configurable\", {}).get(\"thread_id\", \"unknown\")\n+ # If query was not provided for some reason\n+ if not state.session_summarization_query:\n+ self._log_failure(\n+ f\"Session summarization query is not provided: {state.session_summarization_query}\",\n+ conversation_id,\n+ start_time,\n+ )\n+ return self._create_error_response(self._base_error_instructions, state.root_tool_call_id)\n+ try:\n+ # Generate filters to get session ids from DB\n+ replay_filters = async_to_sync(self._generate_replay_filters)(state.session_summarization_query)\n+ if not replay_filters:\n+ self._log_failure(\n+ f\"No Replay filters were generated for session summarization: {state.session_summarization_query}\",\n+ conversation_id,\n+ start_time,\n+ )\n+ return self._create_error_response(self._base_error_instructions, state.root_tool_call_id)\n+ # Query the filters to get session ids\n+ session_ids = self._get_session_ids_with_filters(replay_filters)\n+ # TODO: Remove after testing\n+ # Limit to 5 to test fast summarization\n+ session_ids = session_ids[:5] if session_ids else []\n+ if not session_ids:\n+ self._log_failure(\n+ f\"No session ids found for the provided filters: {replay_filters}\", conversation_id, start_time\n+ )\n+ return self._create_error_response(self._base_error_instructions, state.root_tool_call_id)\n+ summaries_content = async_to_sync(self._summarize_sessions_into_content)(session_ids)\n+ return PartialAssistantState(\n+ messages=[\n+ AssistantToolCallMessage(\n+ content=summaries_content,\n+ tool_call_id=state.root_tool_call_id or \"unknown\",\n+ id=str(uuid4()),\n+ ),\n+ ],\n+ session_summarization_query=None,\n+ root_tool_call_id=None,\n+ )\n+ except Exception as err:\n+ self._log_failure(\"Session summarization failed\", conversation_id, start_time, err)\n+ return self._create_error_response(self._base_error_instructions, state.root_tool_call_id)\n+\n+ def _create_error_response(self, message: str, root_tool_call_id: str | None) -> PartialAssistantState:\n+ return PartialAssistantState(\n+ messages=[\n+ AssistantToolCallMessage(\n+ content=message,\n+ tool_call_id=root_tool_call_id or \"unknown\",\n+ id=str(uuid4()),\n+ ),\n+ ],\n+ session_summarization_query=None,\n+ root_tool_call_id=None,\n+ )\n+\n+ def _log_failure(self, message: str, conversation_id: str, start_time: float, error: Any = None):\n+ self.logger.exception(\n+ message,\n+ extra={\n+ \"team_id\": getattr(self._team, \"id\", \"unknown\"),\n+ \"conversation_id\": conversation_id,\n+ \"execution_time_ms\": round(time.time() - start_time * 1000, 2),\n+ \"error\": str(error) if error else None,\n+ },\n+ )\n+\n+ @property\n+ def _base_error_instructions(self) -> str:\n+ return \"INSTRUCTIONS: Tell the user that you encountered an issue while summarizing the session and suggest they try again with a different question.\"",
"comment_created_at": "2025-08-06T15:56:26+00:00",
"comment_author": "Twixes",
"comment_body": "Normally I'd lean towards this being a small refactor later in a case like that, but I trust the plan",
"pr_file_module": null
}
]
},
{
"discussion_id": "2224773118",
"pr_number": 35327,
"pr_file": "posthog/models/web_preaggregated/sql.py",
"created_at": "2025-07-23T08:15:40+00:00",
"commented_code": "from posthog.hogql.database.schema.web_analytics_s3 import get_s3_function_args\n \n \n-def TABLE_TEMPLATE(table_name, columns, order_by):\n- engine = MergeTreeEngine(table_name, replication_scheme=ReplicationScheme.REPLICATED)\n+def TABLE_TEMPLATE(table_name, columns, order_by, storage_policy=None):\n+ policy = \"s3_policy\" if storage_policy == \"s3\" else None",
"repo_full_name": "PostHog/posthog",
"discussion_comments": [
{
"comment_id": "2224773118",
"repo_full_name": "PostHog/posthog",
"pr_number": 35327,
"pr_file": "posthog/models/web_preaggregated/sql.py",
"discussion_id": "2224773118",
"commented_code": "@@ -3,8 +3,15 @@\n from posthog.hogql.database.schema.web_analytics_s3 import get_s3_function_args\n \n \n-def TABLE_TEMPLATE(table_name, columns, order_by):\n- engine = MergeTreeEngine(table_name, replication_scheme=ReplicationScheme.REPLICATED)\n+def TABLE_TEMPLATE(table_name, columns, order_by, storage_policy=None):\n+ policy = \"s3_policy\" if storage_policy == \"s3\" else None",
"comment_created_at": "2025-07-23T08:15:40+00:00",
"comment_author": "Daesgar",
"comment_body": "If we finally go for this S3-backed tables, I'd suggest using the same policy name for all environments.\r\n\r\nIt could make sense to store it in a constant for S3-only tables, so everyone who wants to use S3-backed tables reuse the same policy.\r\n\r\n```suggestion\r\n policy = \"s3_backed\" if storage_policy == \"s3\" else None\r\n```",
"pr_file_module": null
}
]
}
]