mirror of
https://github.com/baz-scm/awesome-reviewers.git
synced 2025-08-20 18:58:52 +03:00
114 lines
42 KiB
JSON
114 lines
42 KiB
JSON
[
|
|
{
|
|
"discussion_id": "2268500847",
|
|
"pr_number": 36416,
|
|
"pr_file": "posthog/api/github.py",
|
|
"created_at": "2025-08-12T03:40:09+00:00",
|
|
"commented_code": "+import base64\n+import requests\n+from typing import Any\n+\n+from django.apps import apps\n+from django.core.cache import cache\n+from django.db.models import Q\n+from hashlib import sha256\n+from rest_framework import serializers\n+from rest_framework.exceptions import ValidationError\n+from rest_framework.parsers import JSONParser\n+from rest_framework.permissions import AllowAny\n+from rest_framework.response import Response\n+from rest_framework.views import APIView\n+from cryptography.hazmat.primitives import hashes, serialization\n+from cryptography.hazmat.primitives.asymmetric import ec\n+from cryptography.exceptions import InvalidSignature\n+\n+from posthog.models.personal_api_key import find_personal_api_key\n+from posthog.api.personal_api_key import PersonalAPIKeySerializer\n+from posthog.tasks.email import send_personal_api_key_exposed\n+\n+GITHUB_KEYS_URI = \"https://api.github.com/meta/public_keys/secret_scanning\"\n+TWENTY_FOUR_HOURS = 60 * 60 * 24\n+\n+\n+class SignatureVerificationError(Exception):\n+ pass\n+\n+\n+def verify_github_signature(payload: str, kid: str, sig: str) -> None:\n+ cache_key = f\"github:public_key:{kid}\"\n+\n+ pem = cache.get(cache_key)\n+\n+ if pem is None:\n+ try:\n+ resp = requests.get(GITHUB_KEYS_URI, timeout=10)\n+ resp.raise_for_status()\n+ data = resp.json()\n+ except Exception:\n+ raise SignatureVerificationError(\"Failed to fetch GitHub public keys\")\n+\n+ public_keys = data.get(\"public_keys\")\n+ if not isinstance(public_keys, list) or not public_keys:\n+ raise SignatureVerificationError(\"No public keys found\")\n+\n+ entry = next((k for k in public_keys if k.get(\"key_identifier\") == kid), None)\n+ if entry is None:\n+ raise SignatureVerificationError(\"No public key found matching key identifier\")\n+\n+ pem = entry.get(\"key\")\n+ if not isinstance(pem, str) or not pem.strip():\n+ raise SignatureVerificationError(\"Malformed public key entry\")\n+\n+ cache.set(cache_key, pem, TWENTY_FOUR_HOURS)\n+\n+ try:\n+ pub = serialization.load_pem_public_key(pem.encode(\"utf-8\"))\n+ except Exception as e:\n+ raise SignatureVerificationError(\"Unable to parse public key\") from e\n+\n+ if not isinstance(pub, ec.EllipticCurvePublicKey) or pub.curve.name.lower() not in (\"secp256r1\", \"prime256v1\"):\n+ raise SignatureVerificationError(\"Unsupported public key type/curve (expected ECDSA P-256)\")\n+\n+ try:\n+ sig_bytes = base64.b64decode(sig, validate=True)\n+ except Exception as e:\n+ raise SignatureVerificationError(\"Signature is not valid base64\") from e\n+\n+ message = payload.encode(\"utf-8\")\n+\n+ try:\n+ pub.verify(sig_bytes, message, ec.ECDSA(hashes.SHA256()))\n+ except InvalidSignature as e:\n+ raise SignatureVerificationError(\"Signature does not match payload\") from e\n+\n+\n+class SecretAlertSerializer(serializers.Serializer):\n+ token = serializers.CharField()\n+ type = serializers.ChoiceField(choices=[\"posthog_personal_api_key\", \"posthog_feature_flags_secure_api_key\"])\n+ url = serializers.CharField(allow_blank=True)\n+ source: Any = serializers.CharField()\n+\n+\n+class SecretAlert(APIView):\n+ authentication_classes = []\n+ permission_classes = [AllowAny]\n+ parser_classes = [JSONParser]\n+\n+ def post(self, request):\n+ kid = (request.headers.get(\"Github-Public-Key-Identifier\") or \"\").strip()\n+ sig = (request.headers.get(\"Github-Public-Key-Signature\") or \"\").strip()\n+\n+ if not kid:\n+ raise ValidationError(\n+ {\n+ \"headers\": {\n+ \"Github-Public-Key-Identifier\": \"required non-blank string\",\n+ }\n+ }\n+ )\n+ if not sig:\n+ raise ValidationError(\n+ {\n+ \"headers\": {\n+ \"Github-Public-Key-Signature\": \"required non-blank string\",\n+ }\n+ }\n+ )\n+\n+ try:\n+ verify_github_signature(request.body.decode(\"utf-8\"), kid, sig)",
|
|
"repo_full_name": "PostHog/posthog",
|
|
"discussion_comments": [
|
|
{
|
|
"comment_id": "2268500847",
|
|
"repo_full_name": "PostHog/posthog",
|
|
"pr_number": 36416,
|
|
"pr_file": "posthog/api/github.py",
|
|
"discussion_id": "2268500847",
|
|
"commented_code": "@@ -0,0 +1,162 @@\n+import base64\n+import requests\n+from typing import Any\n+\n+from django.apps import apps\n+from django.core.cache import cache\n+from django.db.models import Q\n+from hashlib import sha256\n+from rest_framework import serializers\n+from rest_framework.exceptions import ValidationError\n+from rest_framework.parsers import JSONParser\n+from rest_framework.permissions import AllowAny\n+from rest_framework.response import Response\n+from rest_framework.views import APIView\n+from cryptography.hazmat.primitives import hashes, serialization\n+from cryptography.hazmat.primitives.asymmetric import ec\n+from cryptography.exceptions import InvalidSignature\n+\n+from posthog.models.personal_api_key import find_personal_api_key\n+from posthog.api.personal_api_key import PersonalAPIKeySerializer\n+from posthog.tasks.email import send_personal_api_key_exposed\n+\n+GITHUB_KEYS_URI = \"https://api.github.com/meta/public_keys/secret_scanning\"\n+TWENTY_FOUR_HOURS = 60 * 60 * 24\n+\n+\n+class SignatureVerificationError(Exception):\n+ pass\n+\n+\n+def verify_github_signature(payload: str, kid: str, sig: str) -> None:\n+ cache_key = f\"github:public_key:{kid}\"\n+\n+ pem = cache.get(cache_key)\n+\n+ if pem is None:\n+ try:\n+ resp = requests.get(GITHUB_KEYS_URI, timeout=10)\n+ resp.raise_for_status()\n+ data = resp.json()\n+ except Exception:\n+ raise SignatureVerificationError(\"Failed to fetch GitHub public keys\")\n+\n+ public_keys = data.get(\"public_keys\")\n+ if not isinstance(public_keys, list) or not public_keys:\n+ raise SignatureVerificationError(\"No public keys found\")\n+\n+ entry = next((k for k in public_keys if k.get(\"key_identifier\") == kid), None)\n+ if entry is None:\n+ raise SignatureVerificationError(\"No public key found matching key identifier\")\n+\n+ pem = entry.get(\"key\")\n+ if not isinstance(pem, str) or not pem.strip():\n+ raise SignatureVerificationError(\"Malformed public key entry\")\n+\n+ cache.set(cache_key, pem, TWENTY_FOUR_HOURS)\n+\n+ try:\n+ pub = serialization.load_pem_public_key(pem.encode(\"utf-8\"))\n+ except Exception as e:\n+ raise SignatureVerificationError(\"Unable to parse public key\") from e\n+\n+ if not isinstance(pub, ec.EllipticCurvePublicKey) or pub.curve.name.lower() not in (\"secp256r1\", \"prime256v1\"):\n+ raise SignatureVerificationError(\"Unsupported public key type/curve (expected ECDSA P-256)\")\n+\n+ try:\n+ sig_bytes = base64.b64decode(sig, validate=True)\n+ except Exception as e:\n+ raise SignatureVerificationError(\"Signature is not valid base64\") from e\n+\n+ message = payload.encode(\"utf-8\")\n+\n+ try:\n+ pub.verify(sig_bytes, message, ec.ECDSA(hashes.SHA256()))\n+ except InvalidSignature as e:\n+ raise SignatureVerificationError(\"Signature does not match payload\") from e\n+\n+\n+class SecretAlertSerializer(serializers.Serializer):\n+ token = serializers.CharField()\n+ type = serializers.ChoiceField(choices=[\"posthog_personal_api_key\", \"posthog_feature_flags_secure_api_key\"])\n+ url = serializers.CharField(allow_blank=True)\n+ source: Any = serializers.CharField()\n+\n+\n+class SecretAlert(APIView):\n+ authentication_classes = []\n+ permission_classes = [AllowAny]\n+ parser_classes = [JSONParser]\n+\n+ def post(self, request):\n+ kid = (request.headers.get(\"Github-Public-Key-Identifier\") or \"\").strip()\n+ sig = (request.headers.get(\"Github-Public-Key-Signature\") or \"\").strip()\n+\n+ if not kid:\n+ raise ValidationError(\n+ {\n+ \"headers\": {\n+ \"Github-Public-Key-Identifier\": \"required non-blank string\",\n+ }\n+ }\n+ )\n+ if not sig:\n+ raise ValidationError(\n+ {\n+ \"headers\": {\n+ \"Github-Public-Key-Signature\": \"required non-blank string\",\n+ }\n+ }\n+ )\n+\n+ try:\n+ verify_github_signature(request.body.decode(\"utf-8\"), kid, sig)",
|
|
"comment_created_at": "2025-08-12T03:40:09+00:00",
|
|
"comment_author": "rafaeelaudibert",
|
|
"comment_body": "What if this raises any other error? You're being very strict on the `except`s inside this function, I'm sure there's something that we might let slip through - for example in case some Python API changes and we don't notice it",
|
|
"pr_file_module": null
|
|
},
|
|
{
|
|
"comment_id": "2268665669",
|
|
"repo_full_name": "PostHog/posthog",
|
|
"pr_number": 36416,
|
|
"pr_file": "posthog/api/github.py",
|
|
"discussion_id": "2268500847",
|
|
"commented_code": "@@ -0,0 +1,162 @@\n+import base64\n+import requests\n+from typing import Any\n+\n+from django.apps import apps\n+from django.core.cache import cache\n+from django.db.models import Q\n+from hashlib import sha256\n+from rest_framework import serializers\n+from rest_framework.exceptions import ValidationError\n+from rest_framework.parsers import JSONParser\n+from rest_framework.permissions import AllowAny\n+from rest_framework.response import Response\n+from rest_framework.views import APIView\n+from cryptography.hazmat.primitives import hashes, serialization\n+from cryptography.hazmat.primitives.asymmetric import ec\n+from cryptography.exceptions import InvalidSignature\n+\n+from posthog.models.personal_api_key import find_personal_api_key\n+from posthog.api.personal_api_key import PersonalAPIKeySerializer\n+from posthog.tasks.email import send_personal_api_key_exposed\n+\n+GITHUB_KEYS_URI = \"https://api.github.com/meta/public_keys/secret_scanning\"\n+TWENTY_FOUR_HOURS = 60 * 60 * 24\n+\n+\n+class SignatureVerificationError(Exception):\n+ pass\n+\n+\n+def verify_github_signature(payload: str, kid: str, sig: str) -> None:\n+ cache_key = f\"github:public_key:{kid}\"\n+\n+ pem = cache.get(cache_key)\n+\n+ if pem is None:\n+ try:\n+ resp = requests.get(GITHUB_KEYS_URI, timeout=10)\n+ resp.raise_for_status()\n+ data = resp.json()\n+ except Exception:\n+ raise SignatureVerificationError(\"Failed to fetch GitHub public keys\")\n+\n+ public_keys = data.get(\"public_keys\")\n+ if not isinstance(public_keys, list) or not public_keys:\n+ raise SignatureVerificationError(\"No public keys found\")\n+\n+ entry = next((k for k in public_keys if k.get(\"key_identifier\") == kid), None)\n+ if entry is None:\n+ raise SignatureVerificationError(\"No public key found matching key identifier\")\n+\n+ pem = entry.get(\"key\")\n+ if not isinstance(pem, str) or not pem.strip():\n+ raise SignatureVerificationError(\"Malformed public key entry\")\n+\n+ cache.set(cache_key, pem, TWENTY_FOUR_HOURS)\n+\n+ try:\n+ pub = serialization.load_pem_public_key(pem.encode(\"utf-8\"))\n+ except Exception as e:\n+ raise SignatureVerificationError(\"Unable to parse public key\") from e\n+\n+ if not isinstance(pub, ec.EllipticCurvePublicKey) or pub.curve.name.lower() not in (\"secp256r1\", \"prime256v1\"):\n+ raise SignatureVerificationError(\"Unsupported public key type/curve (expected ECDSA P-256)\")\n+\n+ try:\n+ sig_bytes = base64.b64decode(sig, validate=True)\n+ except Exception as e:\n+ raise SignatureVerificationError(\"Signature is not valid base64\") from e\n+\n+ message = payload.encode(\"utf-8\")\n+\n+ try:\n+ pub.verify(sig_bytes, message, ec.ECDSA(hashes.SHA256()))\n+ except InvalidSignature as e:\n+ raise SignatureVerificationError(\"Signature does not match payload\") from e\n+\n+\n+class SecretAlertSerializer(serializers.Serializer):\n+ token = serializers.CharField()\n+ type = serializers.ChoiceField(choices=[\"posthog_personal_api_key\", \"posthog_feature_flags_secure_api_key\"])\n+ url = serializers.CharField(allow_blank=True)\n+ source: Any = serializers.CharField()\n+\n+\n+class SecretAlert(APIView):\n+ authentication_classes = []\n+ permission_classes = [AllowAny]\n+ parser_classes = [JSONParser]\n+\n+ def post(self, request):\n+ kid = (request.headers.get(\"Github-Public-Key-Identifier\") or \"\").strip()\n+ sig = (request.headers.get(\"Github-Public-Key-Signature\") or \"\").strip()\n+\n+ if not kid:\n+ raise ValidationError(\n+ {\n+ \"headers\": {\n+ \"Github-Public-Key-Identifier\": \"required non-blank string\",\n+ }\n+ }\n+ )\n+ if not sig:\n+ raise ValidationError(\n+ {\n+ \"headers\": {\n+ \"Github-Public-Key-Signature\": \"required non-blank string\",\n+ }\n+ }\n+ )\n+\n+ try:\n+ verify_github_signature(request.body.decode(\"utf-8\"), kid, sig)",
|
|
"comment_created_at": "2025-08-12T05:52:24+00:00",
|
|
"comment_author": "Piccirello",
|
|
"comment_body": "The strict excepts are so that we can respond with an appropriate 401. Anything else should result in a 5xx.",
|
|
"pr_file_module": null
|
|
}
|
|
]
|
|
},
|
|
{
|
|
"discussion_id": "2251752442",
|
|
"pr_number": 35726,
|
|
"pr_file": "ee/hogai/graph/insights/nodes.py",
|
|
"created_at": "2025-08-04T14:54:20+00:00",
|
|
"commented_code": "selected_insights = self._parse_insight_ids(content)\n break\n \n- except Exception as e:\n- self.logger.warning(f\"Search iteration failed: {e}\")\n+ except Exception:\n break\n \n- # Fallback to first 3 insights if no results\n+ # Fallback to max_insights if no results\n if not selected_insights:",
|
|
"repo_full_name": "PostHog/posthog",
|
|
"discussion_comments": [
|
|
{
|
|
"comment_id": "2251752442",
|
|
"repo_full_name": "PostHog/posthog",
|
|
"pr_number": 35726,
|
|
"pr_file": "ee/hogai/graph/insights/nodes.py",
|
|
"discussion_id": "2251752442",
|
|
"commented_code": "@@ -208,23 +234,19 @@ def _search_insights_iteratively(self, search_query: str) -> list[int]:\n selected_insights = self._parse_insight_ids(content)\n break\n \n- except Exception as e:\n- self.logger.warning(f\"Search iteration failed: {e}\")\n+ except Exception:\n break\n \n- # Fallback to first 3 insights if no results\n+ # Fallback to max_insights if no results\n if not selected_insights:",
|
|
"comment_created_at": "2025-08-04T14:54:20+00:00",
|
|
"comment_author": "kappa90",
|
|
"comment_body": "Wouldn't it be better to just return an empty list? If the LLM hasn't selected anything it means nothing fits the query.",
|
|
"pr_file_module": null
|
|
}
|
|
]
|
|
},
|
|
{
|
|
"discussion_id": "2268061370",
|
|
"pr_number": 35726,
|
|
"pr_file": "ee/hogai/graph/insights/nodes.py",
|
|
"created_at": "2025-08-11T21:27:07+00:00",
|
|
"commented_code": "return read_insights_page\n \n+ def _create_insight_evaluation_tools(self):\n+ \"\"\"Create tools for insight evaluation.\"\"\"\n+\n+ @tool\n+ def select_insight(insight_id: int, explanation: str) -> str:\n+ \"\"\"Select an insight as useful for the user's query.\"\"\"\n+ insight = self._find_insight_by_id(insight_id)\n+ if not insight:\n+ return f\"Insight {insight_id} not found\"\n+\n+ self._evaluation_selections[insight_id] = {\"insight\": insight, \"explanation\": explanation}\n+\n+ name = insight.name or insight.derived_name or \"Unnamed\"\n+ return f\"Selected insight {insight_id}: {name}\"\n+\n+ @tool\n+ def get_insight_details(insight_id: int) -> str:\n+ \"\"\"Get detailed information (with query execution)about an insight including its current results.\"\"\"\n+ insight = self._find_insight_by_id(insight_id)\n+ if not insight:\n+ return f\"Insight {insight_id} not found\"\n+\n+ insight_info = self._process_insight_for_evaluation(\n+ insight, AssistantQueryExecutor(self._team, self._utc_now_datetime)\n+ )\n+\n+ insight_url = f\"/project/{self._team.id}/insights/{insight.short_id}\"\n+ hyperlink_format = f\"[{insight_info['name']}]({insight_url})\"\n+\n+ return f\"\"\"Insight: {insight_info['name']} (ID: {insight_info['insight_id']})\n+HYPERLINK FORMAT: {hyperlink_format}\n+Description: {insight_info['description'] or 'No description'}\n+Query: {insight_info['query']}\n+Current Results: {insight_info['results']}\"\"\"\n+\n+ @tool\n+ def reject_all_insights(reason: str) -> str:\n+ \"\"\"Indicate that none of the insights are suitable.\"\"\"\n+ self._evaluation_selections = {}\n+ self._rejection_reason = reason\n+ return \"All insights rejected. Will create new insight.\"\n+\n+ return [select_insight, get_insight_details, reject_all_insights]\n+\n def run(self, state: AssistantState, config: RunnableConfig) -> PartialAssistantState | None:\n- start_time = time.time()\n search_query = state.search_insights_query\n- conversation_id = config.get(\"configurable\", {}).get(\"thread_id\", \"unknown\")\n \n try:\n self._current_iteration = 0\n- self._load_all_insights()\n \n- if not self._all_insights:\n- return self._create_error_response(\"No insights found in the database.\", state.root_tool_call_id)\n+ # Check if we have any insights at all\n+ if self._get_total_insights_count() == 0:\n+ return self._create_error_response(EMPTY_DATABASE_ERROR_MESSAGE, state.root_tool_call_id)\n \n selected_insights = self._search_insights_iteratively(search_query or \"\")\n \n- formatted_content = self._format_search_results(selected_insights, search_query or \"\")\n-\n- execution_time = time.time() - start_time\n- self.logger.info(\n- f\"Iterative insight search completed\",\n- extra={\n- \"team_id\": getattr(self._team, \"id\", \"unknown\"),\n- \"conversation_id\": conversation_id,\n- \"query_length\": len(search_query) if search_query else 0,\n- \"results_count\": len(selected_insights),\n- \"execution_time_ms\": round(execution_time * 1000, 2),\n- \"iterations\": self._current_iteration,\n- },\n+ evaluation_result = self._evaluate_insights_with_tools(\n+ selected_insights, search_query or \"\", max_selections=1\n )\n \n- return PartialAssistantState(\n- messages=[\n+ if evaluation_result[\"should_use_existing\"]:\n+ # Create visualization messages for the insights to show actual charts\n+ messages_to_return = []\n+\n+ formatted_content = f\"**Evaluation Result**: {evaluation_result['explanation']}\"\n+\n+ formatted_content += HYPERLINK_USAGE_INSTRUCTIONS\n+\n+ messages_to_return.append(\n AssistantToolCallMessage(\n content=formatted_content,\n tool_call_id=state.root_tool_call_id or \"unknown\",\n id=str(uuid4()),\n- ),\n- ],\n- search_insights_query=None,\n- root_tool_call_id=None,\n- )\n-\n- except Exception as e:\n- execution_time = time.time() - start_time\n- self.logger.exception(\n- f\"Iterative insight search failed\",\n- extra={\n- \"team_id\": getattr(self._team, \"id\", \"unknown\"),\n- \"conversation_id\": conversation_id,\n- \"query_length\": len(search_query) if search_query else 0,\n- \"execution_time_ms\": round(execution_time * 1000, 2),\n- \"error\": str(e),\n- },\n- )\n-\n+ )\n+ )\n+\n+ # Add visualization messages returned from evaluation\n+ messages_to_return.extend(evaluation_result[\"visualization_messages\"])\n+\n+ return PartialAssistantState(\n+ messages=messages_to_return,\n+ search_insights_query=None,\n+ root_tool_call_id=None,\n+ root_tool_insight_plan=None,\n+ )\n+ else:\n+ # No suitable insights found, triggering creation of a new insight\n+ no_insights_message = AssistantToolCallMessage(\n+ content=NO_INSIGHTS_FOUND_MESSAGE,\n+ tool_call_id=state.root_tool_call_id or \"unknown\",\n+ id=str(uuid4()),\n+ )\n+ return PartialAssistantState(\n+ messages=[no_insights_message],\n+ root_tool_insight_plan=search_query,\n+ search_insights_query=None,\n+ )\n+\n+ except Exception:",
|
|
"repo_full_name": "PostHog/posthog",
|
|
"discussion_comments": [
|
|
{
|
|
"comment_id": "2268061370",
|
|
"repo_full_name": "PostHog/posthog",
|
|
"pr_number": 35726,
|
|
"pr_file": "ee/hogai/graph/insights/nodes.py",
|
|
"discussion_id": "2268061370",
|
|
"commented_code": "@@ -65,103 +87,152 @@ def read_insights_page(page_number: int) -> str:\n \n return read_insights_page\n \n+ def _create_insight_evaluation_tools(self):\n+ \"\"\"Create tools for insight evaluation.\"\"\"\n+\n+ @tool\n+ def select_insight(insight_id: int, explanation: str) -> str:\n+ \"\"\"Select an insight as useful for the user's query.\"\"\"\n+ insight = self._find_insight_by_id(insight_id)\n+ if not insight:\n+ return f\"Insight {insight_id} not found\"\n+\n+ self._evaluation_selections[insight_id] = {\"insight\": insight, \"explanation\": explanation}\n+\n+ name = insight.name or insight.derived_name or \"Unnamed\"\n+ return f\"Selected insight {insight_id}: {name}\"\n+\n+ @tool\n+ def get_insight_details(insight_id: int) -> str:\n+ \"\"\"Get detailed information (with query execution)about an insight including its current results.\"\"\"\n+ insight = self._find_insight_by_id(insight_id)\n+ if not insight:\n+ return f\"Insight {insight_id} not found\"\n+\n+ insight_info = self._process_insight_for_evaluation(\n+ insight, AssistantQueryExecutor(self._team, self._utc_now_datetime)\n+ )\n+\n+ insight_url = f\"/project/{self._team.id}/insights/{insight.short_id}\"\n+ hyperlink_format = f\"[{insight_info['name']}]({insight_url})\"\n+\n+ return f\"\"\"Insight: {insight_info['name']} (ID: {insight_info['insight_id']})\n+HYPERLINK FORMAT: {hyperlink_format}\n+Description: {insight_info['description'] or 'No description'}\n+Query: {insight_info['query']}\n+Current Results: {insight_info['results']}\"\"\"\n+\n+ @tool\n+ def reject_all_insights(reason: str) -> str:\n+ \"\"\"Indicate that none of the insights are suitable.\"\"\"\n+ self._evaluation_selections = {}\n+ self._rejection_reason = reason\n+ return \"All insights rejected. Will create new insight.\"\n+\n+ return [select_insight, get_insight_details, reject_all_insights]\n+\n def run(self, state: AssistantState, config: RunnableConfig) -> PartialAssistantState | None:\n- start_time = time.time()\n search_query = state.search_insights_query\n- conversation_id = config.get(\"configurable\", {}).get(\"thread_id\", \"unknown\")\n \n try:\n self._current_iteration = 0\n- self._load_all_insights()\n \n- if not self._all_insights:\n- return self._create_error_response(\"No insights found in the database.\", state.root_tool_call_id)\n+ # Check if we have any insights at all\n+ if self._get_total_insights_count() == 0:\n+ return self._create_error_response(EMPTY_DATABASE_ERROR_MESSAGE, state.root_tool_call_id)\n \n selected_insights = self._search_insights_iteratively(search_query or \"\")\n \n- formatted_content = self._format_search_results(selected_insights, search_query or \"\")\n-\n- execution_time = time.time() - start_time\n- self.logger.info(\n- f\"Iterative insight search completed\",\n- extra={\n- \"team_id\": getattr(self._team, \"id\", \"unknown\"),\n- \"conversation_id\": conversation_id,\n- \"query_length\": len(search_query) if search_query else 0,\n- \"results_count\": len(selected_insights),\n- \"execution_time_ms\": round(execution_time * 1000, 2),\n- \"iterations\": self._current_iteration,\n- },\n+ evaluation_result = self._evaluate_insights_with_tools(\n+ selected_insights, search_query or \"\", max_selections=1\n )\n \n- return PartialAssistantState(\n- messages=[\n+ if evaluation_result[\"should_use_existing\"]:\n+ # Create visualization messages for the insights to show actual charts\n+ messages_to_return = []\n+\n+ formatted_content = f\"**Evaluation Result**: {evaluation_result['explanation']}\"\n+\n+ formatted_content += HYPERLINK_USAGE_INSTRUCTIONS\n+\n+ messages_to_return.append(\n AssistantToolCallMessage(\n content=formatted_content,\n tool_call_id=state.root_tool_call_id or \"unknown\",\n id=str(uuid4()),\n- ),\n- ],\n- search_insights_query=None,\n- root_tool_call_id=None,\n- )\n-\n- except Exception as e:\n- execution_time = time.time() - start_time\n- self.logger.exception(\n- f\"Iterative insight search failed\",\n- extra={\n- \"team_id\": getattr(self._team, \"id\", \"unknown\"),\n- \"conversation_id\": conversation_id,\n- \"query_length\": len(search_query) if search_query else 0,\n- \"execution_time_ms\": round(execution_time * 1000, 2),\n- \"error\": str(e),\n- },\n- )\n-\n+ )\n+ )\n+\n+ # Add visualization messages returned from evaluation\n+ messages_to_return.extend(evaluation_result[\"visualization_messages\"])\n+\n+ return PartialAssistantState(\n+ messages=messages_to_return,\n+ search_insights_query=None,\n+ root_tool_call_id=None,\n+ root_tool_insight_plan=None,\n+ )\n+ else:\n+ # No suitable insights found, triggering creation of a new insight\n+ no_insights_message = AssistantToolCallMessage(\n+ content=NO_INSIGHTS_FOUND_MESSAGE,\n+ tool_call_id=state.root_tool_call_id or \"unknown\",\n+ id=str(uuid4()),\n+ )\n+ return PartialAssistantState(\n+ messages=[no_insights_message],\n+ root_tool_insight_plan=search_query,\n+ search_insights_query=None,\n+ )\n+\n+ except Exception:",
|
|
"comment_created_at": "2025-08-11T21:27:07+00:00",
|
|
"comment_author": "kappa90",
|
|
"comment_body": "This is very broad, the function has a lot of nested function so I'm not sure what exactly could throw errors here, but we need at least to capture the exception:\r\n\r\n```python\r\nexcept Exception as e:\r\n capture_exception(e)\r\n```",
|
|
"pr_file_module": null
|
|
},
|
|
{
|
|
"comment_id": "2273401664",
|
|
"repo_full_name": "PostHog/posthog",
|
|
"pr_number": 35726,
|
|
"pr_file": "ee/hogai/graph/insights/nodes.py",
|
|
"discussion_id": "2268061370",
|
|
"commented_code": "@@ -65,103 +87,152 @@ def read_insights_page(page_number: int) -> str:\n \n return read_insights_page\n \n+ def _create_insight_evaluation_tools(self):\n+ \"\"\"Create tools for insight evaluation.\"\"\"\n+\n+ @tool\n+ def select_insight(insight_id: int, explanation: str) -> str:\n+ \"\"\"Select an insight as useful for the user's query.\"\"\"\n+ insight = self._find_insight_by_id(insight_id)\n+ if not insight:\n+ return f\"Insight {insight_id} not found\"\n+\n+ self._evaluation_selections[insight_id] = {\"insight\": insight, \"explanation\": explanation}\n+\n+ name = insight.name or insight.derived_name or \"Unnamed\"\n+ return f\"Selected insight {insight_id}: {name}\"\n+\n+ @tool\n+ def get_insight_details(insight_id: int) -> str:\n+ \"\"\"Get detailed information (with query execution)about an insight including its current results.\"\"\"\n+ insight = self._find_insight_by_id(insight_id)\n+ if not insight:\n+ return f\"Insight {insight_id} not found\"\n+\n+ insight_info = self._process_insight_for_evaluation(\n+ insight, AssistantQueryExecutor(self._team, self._utc_now_datetime)\n+ )\n+\n+ insight_url = f\"/project/{self._team.id}/insights/{insight.short_id}\"\n+ hyperlink_format = f\"[{insight_info['name']}]({insight_url})\"\n+\n+ return f\"\"\"Insight: {insight_info['name']} (ID: {insight_info['insight_id']})\n+HYPERLINK FORMAT: {hyperlink_format}\n+Description: {insight_info['description'] or 'No description'}\n+Query: {insight_info['query']}\n+Current Results: {insight_info['results']}\"\"\"\n+\n+ @tool\n+ def reject_all_insights(reason: str) -> str:\n+ \"\"\"Indicate that none of the insights are suitable.\"\"\"\n+ self._evaluation_selections = {}\n+ self._rejection_reason = reason\n+ return \"All insights rejected. Will create new insight.\"\n+\n+ return [select_insight, get_insight_details, reject_all_insights]\n+\n def run(self, state: AssistantState, config: RunnableConfig) -> PartialAssistantState | None:\n- start_time = time.time()\n search_query = state.search_insights_query\n- conversation_id = config.get(\"configurable\", {}).get(\"thread_id\", \"unknown\")\n \n try:\n self._current_iteration = 0\n- self._load_all_insights()\n \n- if not self._all_insights:\n- return self._create_error_response(\"No insights found in the database.\", state.root_tool_call_id)\n+ # Check if we have any insights at all\n+ if self._get_total_insights_count() == 0:\n+ return self._create_error_response(EMPTY_DATABASE_ERROR_MESSAGE, state.root_tool_call_id)\n \n selected_insights = self._search_insights_iteratively(search_query or \"\")\n \n- formatted_content = self._format_search_results(selected_insights, search_query or \"\")\n-\n- execution_time = time.time() - start_time\n- self.logger.info(\n- f\"Iterative insight search completed\",\n- extra={\n- \"team_id\": getattr(self._team, \"id\", \"unknown\"),\n- \"conversation_id\": conversation_id,\n- \"query_length\": len(search_query) if search_query else 0,\n- \"results_count\": len(selected_insights),\n- \"execution_time_ms\": round(execution_time * 1000, 2),\n- \"iterations\": self._current_iteration,\n- },\n+ evaluation_result = self._evaluate_insights_with_tools(\n+ selected_insights, search_query or \"\", max_selections=1\n )\n \n- return PartialAssistantState(\n- messages=[\n+ if evaluation_result[\"should_use_existing\"]:\n+ # Create visualization messages for the insights to show actual charts\n+ messages_to_return = []\n+\n+ formatted_content = f\"**Evaluation Result**: {evaluation_result['explanation']}\"\n+\n+ formatted_content += HYPERLINK_USAGE_INSTRUCTIONS\n+\n+ messages_to_return.append(\n AssistantToolCallMessage(\n content=formatted_content,\n tool_call_id=state.root_tool_call_id or \"unknown\",\n id=str(uuid4()),\n- ),\n- ],\n- search_insights_query=None,\n- root_tool_call_id=None,\n- )\n-\n- except Exception as e:\n- execution_time = time.time() - start_time\n- self.logger.exception(\n- f\"Iterative insight search failed\",\n- extra={\n- \"team_id\": getattr(self._team, \"id\", \"unknown\"),\n- \"conversation_id\": conversation_id,\n- \"query_length\": len(search_query) if search_query else 0,\n- \"execution_time_ms\": round(execution_time * 1000, 2),\n- \"error\": str(e),\n- },\n- )\n-\n+ )\n+ )\n+\n+ # Add visualization messages returned from evaluation\n+ messages_to_return.extend(evaluation_result[\"visualization_messages\"])\n+\n+ return PartialAssistantState(\n+ messages=messages_to_return,\n+ search_insights_query=None,\n+ root_tool_call_id=None,\n+ root_tool_insight_plan=None,\n+ )\n+ else:\n+ # No suitable insights found, triggering creation of a new insight\n+ no_insights_message = AssistantToolCallMessage(\n+ content=NO_INSIGHTS_FOUND_MESSAGE,\n+ tool_call_id=state.root_tool_call_id or \"unknown\",\n+ id=str(uuid4()),\n+ )\n+ return PartialAssistantState(\n+ messages=[no_insights_message],\n+ root_tool_insight_plan=search_query,\n+ search_insights_query=None,\n+ )\n+\n+ except Exception:",
|
|
"comment_created_at": "2025-08-13T13:03:06+00:00",
|
|
"comment_author": "tatoalo",
|
|
"comment_body": "Yep, good point. Added!",
|
|
"pr_file_module": null
|
|
}
|
|
]
|
|
},
|
|
{
|
|
"discussion_id": "2262697935",
|
|
"pr_number": 35940,
|
|
"pr_file": "posthog/api/sharing.py",
|
|
"created_at": "2025-08-08T11:30:21+00:00",
|
|
"commented_code": "# We don't want the dashboard to be accidentally loaded via the shared endpoint\n exported_data.update({\"dashboard\": dashboard_data})\n exported_data.update({\"themes\": get_themes_for_team(resource.team)})\n+ elif (\n+ isinstance(resource, ExportedAsset) and resource.export_context and resource.export_context.get(\"replay_id\")\n+ ):\n+ # Handle replay export via export_context\n+ replay_id = resource.export_context.get(\"replay_id\")\n+ timestamp = resource.export_context.get(\"timestamp\")\n+\n+ if not replay_id:\n+ raise NotFound(\"Invalid replay export - missing replay_id\")\n+\n+ # Create a SessionRecording object for the replay\n+ try:\n+ # First, try to get existing recording from database\n+ try:\n+ recording = SessionRecording.objects.get(session_id=replay_id, team=resource.team)\n+ except SessionRecording.DoesNotExist:\n+ # If not found, create it properly\n+ recording = SessionRecording(session_id=replay_id, team=resource.team)\n+ recording.save() # This ensures it exists in PostgreSQL\n+\n+ # Create a JWT for the recording\n+ export_access_token = \"\"\n+ if resource.created_by and resource.created_by.id:\n+ export_access_token = encode_jwt(\n+ {\"id\": resource.created_by.id},\n+ timedelta(minutes=5), # 5 mins should be enough for the export to complete\n+ PosthogJwtAudience.IMPERSONATED_USER,\n+ )\n+\n+ asset_title = \"Session Recording\"\n+ asset_description = f\"Recording {replay_id}\"\n+\n+ recording_data = SessionRecordingSerializer(recording, context=context).data\n+\n+ exported_data.update(\n+ {\n+ \"type\": \"replay_export\",\n+ \"recording\": recording_data,\n+ \"timestamp\": timestamp,\n+ \"replay_id\": replay_id,\n+ \"exportToken\": export_access_token,\n+ \"noBorder\": True,\n+ \"autoplay\": True,\n+ \"mode\": \"screenshot\",\n+ }\n+ )\n+\n+ except Exception:\n+ raise NotFound()",
|
|
"repo_full_name": "PostHog/posthog",
|
|
"discussion_comments": [
|
|
{
|
|
"comment_id": "2262697935",
|
|
"repo_full_name": "PostHog/posthog",
|
|
"pr_number": 35940,
|
|
"pr_file": "posthog/api/sharing.py",
|
|
"discussion_id": "2262697935",
|
|
"commented_code": "@@ -384,6 +385,55 @@ def retrieve(self, request: Request, *args: Any, **kwargs: Any) -> Any:\n # We don't want the dashboard to be accidentally loaded via the shared endpoint\n exported_data.update({\"dashboard\": dashboard_data})\n exported_data.update({\"themes\": get_themes_for_team(resource.team)})\n+ elif (\n+ isinstance(resource, ExportedAsset) and resource.export_context and resource.export_context.get(\"replay_id\")\n+ ):\n+ # Handle replay export via export_context\n+ replay_id = resource.export_context.get(\"replay_id\")\n+ timestamp = resource.export_context.get(\"timestamp\")\n+\n+ if not replay_id:\n+ raise NotFound(\"Invalid replay export - missing replay_id\")\n+\n+ # Create a SessionRecording object for the replay\n+ try:\n+ # First, try to get existing recording from database\n+ try:\n+ recording = SessionRecording.objects.get(session_id=replay_id, team=resource.team)\n+ except SessionRecording.DoesNotExist:\n+ # If not found, create it properly\n+ recording = SessionRecording(session_id=replay_id, team=resource.team)\n+ recording.save() # This ensures it exists in PostgreSQL\n+\n+ # Create a JWT for the recording\n+ export_access_token = \"\"\n+ if resource.created_by and resource.created_by.id:\n+ export_access_token = encode_jwt(\n+ {\"id\": resource.created_by.id},\n+ timedelta(minutes=5), # 5 mins should be enough for the export to complete\n+ PosthogJwtAudience.IMPERSONATED_USER,\n+ )\n+\n+ asset_title = \"Session Recording\"\n+ asset_description = f\"Recording {replay_id}\"\n+\n+ recording_data = SessionRecordingSerializer(recording, context=context).data\n+\n+ exported_data.update(\n+ {\n+ \"type\": \"replay_export\",\n+ \"recording\": recording_data,\n+ \"timestamp\": timestamp,\n+ \"replay_id\": replay_id,\n+ \"exportToken\": export_access_token,\n+ \"noBorder\": True,\n+ \"autoplay\": True,\n+ \"mode\": \"screenshot\",\n+ }\n+ )\n+\n+ except Exception:\n+ raise NotFound()",
|
|
"comment_created_at": "2025-08-08T11:30:21+00:00",
|
|
"comment_author": "pauldambra",
|
|
"comment_body": "we could capture exception here so we get feedback in prod?",
|
|
"pr_file_module": null
|
|
}
|
|
]
|
|
}
|
|
] |