awesome-reviewers/_reviewers/posthog-validate-inputs-recursively.json

[
  {
    "discussion_id": "2267627113",
    "pr_number": 35962,
    "pr_file": "ee/hogai/notebook/notebook_serializer.py",
    "created_at": "2025-08-11T18:21:57+00:00",
    "commented_code": "+import re\n+import logging\n+from urllib.parse import urlparse, unquote\n+from typing import Optional\n+\n+from posthog.schema import ProsemirrorJSONContent, Mark\n+\n+logger = logging.getLogger(__name__)\n+\n+\n+class MarkdownTokenizer:\n+    \"\"\"Simple markdown tokenizer that handles the most common markdown elements.\"\"\"\n+\n+    def __init__(self):\n+        self.tokens = []\n+        self.pos = 0\n+        self.text = \"\"\n+\n+    def tokenize(self, text: str) -> list[dict]:\n+        \"\"\"Tokenize markdown text into a list of tokens.\"\"\"\n+        self.text = text\n+        self.pos = 0\n+        self.tokens = []\n+\n+        while self.pos < len(self.text):\n+            if not self._try_parse_block_element():\n+                # If no block element found, parse as paragraph\n+                self._parse_paragraph()\n+\n+        return self.tokens\n+\n+    def _try_parse_block_element(self) -> bool:\n+        \"\"\"Try to parse a block-level element. Returns True if successful.\"\"\"\n+        # Skip empty lines\n+        if self._at_line_start() and self._current_line().strip() == \"\":\n+            self._skip_line()\n+            return True\n+\n+        # Try different block elements\n+        if self._try_parse_heading():\n+            return True\n+        if self._try_parse_code_block():\n+            return True\n+        if self._try_parse_blockquote():\n+            return True\n+        if self._try_parse_horizontal_rule():\n+            return True\n+        if self._try_parse_list():\n+            return True\n+\n+        return False\n+\n+    def _try_parse_heading(self) -> bool:\n+        \"\"\"Parse heading (# ## ### etc).\"\"\"\n+        if not self._at_line_start():\n+            return False\n+\n+        line = self._current_line()\n+        match = re.match(r\"^(#{1,6})\\s+(.+)$\", line)\n+        if match:\n+            level = len(match.group(1))\n+            content = match.group(2).strip()\n+            self.tokens.append({\"type\": \"heading\", \"level\": level, \"content\": content})\n+            self._skip_line()\n+            return True\n+        return False\n+\n+    def _try_parse_code_block(self) -> bool:\n+        \"\"\"Parse fenced code block (``` language).\"\"\"\n+        if not self._at_line_start():\n+            return False\n+\n+        line = self._current_line()\n+        match = re.match(r\"^```(\\w*)\\s*$\", line)\n+        if match:\n+            language = match.group(1) or None\n+            self._skip_line()\n+\n+            # Collect code lines until closing ```\n+            code_lines = []\n+            while self.pos < len(self.text):\n+                line = self._current_line()\n+                if line.strip() == \"```\":\n+                    self._skip_line()\n+                    break\n+                code_lines.append(line)\n+                self._skip_line()\n+\n+            self.tokens.append({\"type\": \"code_block\", \"language\": language, \"content\": \"\n\".join(code_lines)})\n+            return True\n+        return False\n+\n+    def _try_parse_blockquote(self) -> bool:\n+        \"\"\"Parse blockquote (> text).\"\"\"\n+        if not self._at_line_start():\n+            return False\n+\n+        line = self._current_line()\n+        if line.startswith(\"> \"):\n+            # Collect all consecutive blockquote lines\n+            quote_lines = []\n+            while self.pos < len(self.text) and self._current_line().startswith(\"> \"):\n+                quote_lines.append(self._current_line()[2:])  # Remove \"> \"\n+                self._skip_line()\n+\n+            self.tokens.append({\"type\": \"blockquote\", \"content\": \"\n\".join(quote_lines)})\n+            return True\n+        return False\n+\n+    def _try_parse_horizontal_rule(self) -> bool:\n+        \"\"\"Parse horizontal rule (--- or ***).\"\"\"\n+        if not self._at_line_start():\n+            return False\n+\n+        line = self._current_line().strip()\n+        if re.match(r\"^(-{3,}|\\*{3,}|_{3,})$\", line):\n+            self.tokens.append({\"type\": \"horizontal_rule\"})\n+            self._skip_line()\n+            return True\n+        return False\n+\n+    def _try_parse_list(self) -> bool:\n+        \"\"\"Parse ordered or unordered list.\"\"\"\n+        if not self._at_line_start():\n+            return False\n+\n+        line = self._current_line()\n+\n+        # Check for unordered list (- * +)\n+        unordered_match = re.match(r\"^(\\s*)([-*+])\\s+(.+)$\", line)\n+        if unordered_match:\n+            return self._parse_list_items(\"unordered\", unordered_match.group(1))\n+\n+        # Check for ordered list (1. 2. etc)\n+        ordered_match = re.match(r\"^(\\s*)(\\d+)\\.\\s+(.+)$\", line)\n+        if ordered_match:\n+            start_num = int(ordered_match.group(2))\n+            return self._parse_list_items(\"ordered\", ordered_match.group(1), start_num)\n+\n+        return False\n+\n+    def _parse_list_items(self, list_type: str, base_indent: str, start: int = 1) -> bool:\n+        \"\"\"Parse consecutive list items.\"\"\"\n+        items: list[str] = []\n+\n+        while self.pos < len(self.text):\n+            line = self._current_line()\n+\n+            if list_type == \"unordered\":\n+                match = re.match(rf\"^{re.escape(base_indent)}[-*+]\\s+(.+)$\", line)\n+            else:\n+                match = re.match(rf\"^{re.escape(base_indent)}\\d+\\.\\s+(.+)$\", line)\n+\n+            if match:\n+                items.append(match.group(1))\n+                self._skip_line()\n+            else:\n+                break\n+\n+        if items:\n+            token: dict[str, str | int | list[str]] = {\"type\": list_type + \"_list\", \"items\": items}\n+            if list_type == \"ordered\":\n+                token[\"start\"] = start\n+            self.tokens.append(token)\n+            return True\n+\n+        return False\n+\n+    def _parse_paragraph(self) -> None:\n+        \"\"\"Parse a paragraph (everything else).\"\"\"\n+        if self.pos >= len(self.text):\n+            return\n+\n+        # Collect lines until we hit a blank line or end\n+        para_lines = []\n+\n+        while self.pos < len(self.text):\n+            line = self._current_line()\n+\n+            # Stop at blank line\n+            if line.strip() == \"\":\n+                break\n+\n+            # Stop if we hit a block element at line start\n+            if self._at_line_start() and self._looks_like_block_element(line):\n+                break\n+\n+            para_lines.append(line)\n+            self._skip_line()\n+\n+        if para_lines:\n+            content = \" \".join(line.strip() for line in para_lines).strip()\n+            if content:\n+                self.tokens.append({\"type\": \"paragraph\", \"content\": content})\n+\n+    def _looks_like_block_element(self, line: str) -> bool:\n+        \"\"\"Check if a line looks like the start of a block element.\"\"\"\n+        line = line.strip()\n+        return (\n+            bool(re.match(r\"^#{1,6}\\s+\", line))  # heading\n+            or line.startswith(\"```\")  # code block\n+            or line.startswith(\"> \")  # blockquote\n+            or bool(re.match(r\"^(-{3,}|\\*{3,}|_{3,})$\", line))  # horizontal rule\n+            or bool(re.match(r\"^(\\s*)([-*+]|\\d+\\.)\\s+\", line))  # list\n+        )\n+\n+    def _current_line(self) -> str:\n+        \"\"\"Get the current line from position.\"\"\"\n+        if self.pos >= len(self.text):\n+            return \"\"\n+\n+        end = self.text.find(\"\n\", self.pos)\n+        if end == -1:\n+            return self.text[self.pos :]\n+        return self.text[self.pos : end]\n+\n+    def _skip_line(self) -> None:\n+        \"\"\"Move to the next line.\"\"\"\n+        end = self.text.find(\"\n\", self.pos)\n+        if end == -1:\n+            self.pos = len(self.text)\n+        else:\n+            self.pos = end + 1\n+\n+    def _at_line_start(self) -> bool:\n+        \"\"\"Check if we're at the start of a line.\"\"\"\n+        return self.pos == 0 or (self.pos > 0 and self.text[self.pos - 1] == \"\n\")\n+\n+\n+class NotebookSerializer:\n+    # Allowed URL schemes for security\n+    ALLOWED_SCHEMES = {\"http\", \"https\", \"mailto\", \"tel\"}\n+\n+    # Tags that map to marks - only officially supported marks in @tiptap/starter-kit\n+    MARK_TAGS = {\n+        \"strong\": \"bold\",\n+        \"b\": \"bold\",\n+        \"em\": \"italic\",\n+        \"i\": \"italic\",\n+        \"u\": \"underline\",\n+        \"s\": \"strike\",\n+        \"del\": \"strike\",\n+        \"strike\": \"strike\",\n+        \"code\": \"code\",\n+    }\n+\n+    def to_json_paragraph(self, input: str | list[ProsemirrorJSONContent]) -> ProsemirrorJSONContent:\n+        return ProsemirrorJSONContent(\n+            type=\"paragraph\",\n+            content=input if isinstance(input, list) else [ProsemirrorJSONContent(type=\"text\", text=input)],\n+        )\n+\n+    def to_json_heading(self, input: str | list[ProsemirrorJSONContent], level: int) -> ProsemirrorJSONContent:\n+        return ProsemirrorJSONContent(\n+            type=\"heading\",\n+            attrs={\"level\": level},\n+            content=input if isinstance(input, list) else [ProsemirrorJSONContent(type=\"text\", text=input)],\n+        )\n+\n+    def to_json_bullet_list(self, items: list[ProsemirrorJSONContent]) -> ProsemirrorJSONContent:\n+        return ProsemirrorJSONContent(type=\"bulletList\", content=items)\n+\n+    def to_json_ordered_list(self, items: list[ProsemirrorJSONContent], start: int = 1) -> ProsemirrorJSONContent:\n+        return ProsemirrorJSONContent(type=\"orderedList\", attrs={\"start\": start}, content=items)\n+\n+    def to_json_list_item(self, content: list[ProsemirrorJSONContent]) -> ProsemirrorJSONContent:\n+        return ProsemirrorJSONContent(type=\"listItem\", content=content)\n+\n+    def to_json_code_block(self, code: str, language: str | None = None) -> ProsemirrorJSONContent:\n+        attrs = {\"language\": language} if language else {}\n+        return ProsemirrorJSONContent(\n+            type=\"codeBlock\", attrs=attrs, content=[ProsemirrorJSONContent(type=\"text\", text=code)]\n+        )\n+\n+    def to_json_blockquote(self, content: list[ProsemirrorJSONContent]) -> ProsemirrorJSONContent:\n+        return ProsemirrorJSONContent(type=\"blockquote\", content=content)\n+\n+    def to_json_horizontal_rule(self) -> ProsemirrorJSONContent:\n+        return ProsemirrorJSONContent(type=\"horizontalRule\")\n+\n+    def from_markdown_to_json(self, input: str) -> ProsemirrorJSONContent:\n+        \"\"\"\n+        Parse markdown and convert to TipTap notebook schema.\n+        \"\"\"\n+        # Tokenize the markdown\n+        tokenizer = MarkdownTokenizer()\n+        tokens = tokenizer.tokenize(input)\n+\n+        # Convert tokens to ProsemirrorJSONContent\n+        json_result: list[ProsemirrorJSONContent] = []\n+        for token in tokens:\n+            nodes = self._convert_markdown_token(token)\n+            json_result.extend(nodes)\n+\n+        return ProsemirrorJSONContent(type=\"doc\", content=json_result)\n+\n+    def _convert_markdown_token(self, token: dict) -> list[ProsemirrorJSONContent]:\n+        \"\"\"Convert a markdown token to ProsemirrorJSONContent nodes.\"\"\"\n+        token_type = token[\"type\"]\n+\n+        if token_type == \"paragraph\":\n+            content = self._parse_markdown_inline_content(token[\"content\"])\n+            return [self.to_json_paragraph(content)]\n+\n+        elif token_type == \"heading\":\n+            content = self._parse_markdown_inline_content(token[\"content\"])\n+            return [self.to_json_heading(content, token[\"level\"])]\n+\n+        elif token_type == \"code_block\":\n+            return [self.to_json_code_block(token[\"content\"], token.get(\"language\"))]\n+\n+        elif token_type == \"blockquote\":\n+            # Parse blockquote content as markdown and convert to block content\n+            quote_content = self._parse_blockquote_content(token[\"content\"])\n+            return [self.to_json_blockquote(quote_content)]\n+\n+        elif token_type == \"horizontal_rule\":\n+            return [self.to_json_horizontal_rule()]\n+\n+        elif token_type == \"unordered_list\":\n+            items = []\n+            for item_text in token[\"items\"]:\n+                item_content = self._parse_markdown_inline_content(item_text)\n+                items.append(self.to_json_list_item([self.to_json_paragraph(item_content)]))\n+            return [self.to_json_bullet_list(items)]\n+\n+        elif token_type == \"ordered_list\":\n+            items = []\n+            for item_text in token[\"items\"]:\n+                item_content = self._parse_markdown_inline_content(item_text)\n+                items.append(self.to_json_list_item([self.to_json_paragraph(item_content)]))\n+            start = token.get(\"start\", 1)\n+            return [self.to_json_ordered_list(items, start)]\n+\n+        return []\n+\n+    def _parse_markdown_inline_content(self, text: str) -> list[ProsemirrorJSONContent]:\n+        \"\"\"Parse inline markdown content (bold, italic, links, etc.).\"\"\"\n+        if not text:\n+            return []\n+\n+        # This is a simplified inline parser - handles basic formatting\n+        content = []\n+        pos = 0\n+\n+        while pos < len(text):\n+            # Look for markdown patterns\n+            next_match = self._find_next_markdown_pattern(text, pos)\n+\n+            if next_match is None:\n+                # No more patterns, add remaining text\n+                remaining = text[pos:].rstrip()\n+                if remaining:\n+                    content.append(ProsemirrorJSONContent(type=\"text\", text=remaining))\n+                break\n+\n+            match_start, match_end, pattern_type, pattern_data = next_match\n+\n+            # Add text before the pattern\n+            if match_start > pos:\n+                before_text = text[pos:match_start]\n+                if before_text:\n+                    content.append(ProsemirrorJSONContent(type=\"text\", text=before_text))\n+\n+            # Add the formatted content\n+            if pattern_type == \"bold\":\n+                inner_text = pattern_data[\"text\"]\n+                content.append(ProsemirrorJSONContent(type=\"text\", text=inner_text, marks=[Mark(type=\"bold\")]))\n+            elif pattern_type == \"italic\":\n+                inner_text = pattern_data[\"text\"]\n+                content.append(ProsemirrorJSONContent(type=\"text\", text=inner_text, marks=[Mark(type=\"italic\")]))\n+            elif pattern_type == \"code\":\n+                inner_text = pattern_data[\"text\"]\n+                content.append(ProsemirrorJSONContent(type=\"text\", text=inner_text, marks=[Mark(type=\"code\")]))\n+            elif pattern_type == \"strikethrough\":\n+                inner_text = pattern_data[\"text\"]\n+                content.append(ProsemirrorJSONContent(type=\"text\", text=inner_text, marks=[Mark(type=\"strike\")]))\n+            elif pattern_type == \"link\":\n+                link_text = pattern_data[\"text\"]\n+                href = pattern_data[\"href\"]\n+                if self._is_safe_url(href):\n+                    content.append(\n+                        ProsemirrorJSONContent(\n+                            type=\"text\",\n+                            text=link_text,\n+                            marks=[Mark(type=\"link\", attrs={\"href\": href, \"target\": \"_blank\"})],\n+                        )\n+                    )\n+                else:\n+                    # Unsafe URL, just add as text\n+                    content.append(ProsemirrorJSONContent(type=\"text\", text=link_text))\n+\n+            pos = match_end\n+\n+        return content if content else [ProsemirrorJSONContent(type=\"text\", text=text)]\n+\n+    def _find_next_markdown_pattern(self, text: str, start_pos: int) -> Optional[tuple[int, int, str, dict]]:\n+        \"\"\"Find the next markdown formatting pattern in text.\"\"\"\n+        patterns = [\n+            # Bold: **text** or __text__ - check these first to prioritize over italic\n+            (r\"\\*\\*(.+?)\\*\\*\", \"bold\"),\n+            (r\"__(.*?)__\", \"bold\"),\n+            # Italic: *text* or _text_\n+            (r\"\\*(.*?)\\*\", \"italic\"),\n+            (r\"_(.*?)_\", \"italic\"),\n+            # Code: `text`\n+            (r\"`(.*?)`\", \"code\"),\n+            # Strikethrough: ~~text~~\n+            (r\"~~(.*?)~~\", \"strikethrough\"),\n+            # Link: [text](url)\n+            (r\"\\[([^\\]]*)\\]\\(([^)]*)\\)\", \"link\"),\n+        ]\n+\n+        earliest_match = None\n+        earliest_pos = len(text)\n+\n+        for pattern, pattern_type in patterns:\n+            match = re.search(pattern, text[start_pos:])\n+            if match:\n+                match_start = start_pos + match.start()\n+                match_end = start_pos + match.end()\n+\n+                if match_start < earliest_pos:\n+                    earliest_pos = match_start\n+                    if pattern_type == \"link\":\n+                        earliest_match = (\n+                            match_start,\n+                            match_end,\n+                            pattern_type,\n+                            {\"text\": match.group(1), \"href\": match.group(2)},\n+                        )\n+                    else:\n+                        earliest_match = (match_start, match_end, pattern_type, {\"text\": match.group(1)})\n+\n+        return earliest_match\n+\n+    def _parse_blockquote_content(self, content: str) -> list[ProsemirrorJSONContent]:\n+        \"\"\"Parse blockquote content as nested markdown.\"\"\"\n+        # Recursively parse the blockquote content as markdown\n+        tokenizer = MarkdownTokenizer()\n+        tokens = tokenizer.tokenize(content)\n+\n+        result = []\n+        for token in tokens:\n+            nodes = self._convert_markdown_token(token)\n+            result.extend(nodes)\n+\n+        return result if result else [self.to_json_paragraph(\"\")]\n+\n+    def _is_safe_url(self, url: str) -> bool:",
    "repo_full_name": "PostHog/posthog",
    "discussion_comments": [
      {
        "comment_id": "2267627113",
        "repo_full_name": "PostHog/posthog",
        "pr_number": 35962,
        "pr_file": "ee/hogai/notebook/notebook_serializer.py",
        "discussion_id": "2267627113",
        "commented_code": "@@ -0,0 +1,610 @@\n+import re\n+import logging\n+from urllib.parse import urlparse, unquote\n+from typing import Optional\n+\n+from posthog.schema import ProsemirrorJSONContent, Mark\n+\n+logger = logging.getLogger(__name__)\n+\n+\n+class MarkdownTokenizer:\n+    \"\"\"Simple markdown tokenizer that handles the most common markdown elements.\"\"\"\n+\n+    def __init__(self):\n+        self.tokens = []\n+        self.pos = 0\n+        self.text = \"\"\n+\n+    def tokenize(self, text: str) -> list[dict]:\n+        \"\"\"Tokenize markdown text into a list of tokens.\"\"\"\n+        self.text = text\n+        self.pos = 0\n+        self.tokens = []\n+\n+        while self.pos < len(self.text):\n+            if not self._try_parse_block_element():\n+                # If no block element found, parse as paragraph\n+                self._parse_paragraph()\n+\n+        return self.tokens\n+\n+    def _try_parse_block_element(self) -> bool:\n+        \"\"\"Try to parse a block-level element. Returns True if successful.\"\"\"\n+        # Skip empty lines\n+        if self._at_line_start() and self._current_line().strip() == \"\":\n+            self._skip_line()\n+            return True\n+\n+        # Try different block elements\n+        if self._try_parse_heading():\n+            return True\n+        if self._try_parse_code_block():\n+            return True\n+        if self._try_parse_blockquote():\n+            return True\n+        if self._try_parse_horizontal_rule():\n+            return True\n+        if self._try_parse_list():\n+            return True\n+\n+        return False\n+\n+    def _try_parse_heading(self) -> bool:\n+        \"\"\"Parse heading (# ## ### etc).\"\"\"\n+        if not self._at_line_start():\n+            return False\n+\n+        line = self._current_line()\n+        match = re.match(r\"^(#{1,6})\\s+(.+)$\", line)\n+        if match:\n+            level = len(match.group(1))\n+            content = match.group(2).strip()\n+            self.tokens.append({\"type\": \"heading\", \"level\": level, \"content\": content})\n+            self._skip_line()\n+            return True\n+        return False\n+\n+    def _try_parse_code_block(self) -> bool:\n+        \"\"\"Parse fenced code block (``` language).\"\"\"\n+        if not self._at_line_start():\n+            return False\n+\n+        line = self._current_line()\n+        match = re.match(r\"^```(\\w*)\\s*$\", line)\n+        if match:\n+            language = match.group(1) or None\n+            self._skip_line()\n+\n+            # Collect code lines until closing ```\n+            code_lines = []\n+            while self.pos < len(self.text):\n+                line = self._current_line()\n+                if line.strip() == \"```\":\n+                    self._skip_line()\n+                    break\n+                code_lines.append(line)\n+                self._skip_line()\n+\n+            self.tokens.append({\"type\": \"code_block\", \"language\": language, \"content\": \"\\n\".join(code_lines)})\n+            return True\n+        return False\n+\n+    def _try_parse_blockquote(self) -> bool:\n+        \"\"\"Parse blockquote (> text).\"\"\"\n+        if not self._at_line_start():\n+            return False\n+\n+        line = self._current_line()\n+        if line.startswith(\"> \"):\n+            # Collect all consecutive blockquote lines\n+            quote_lines = []\n+            while self.pos < len(self.text) and self._current_line().startswith(\"> \"):\n+                quote_lines.append(self._current_line()[2:])  # Remove \"> \"\n+                self._skip_line()\n+\n+            self.tokens.append({\"type\": \"blockquote\", \"content\": \"\\n\".join(quote_lines)})\n+            return True\n+        return False\n+\n+    def _try_parse_horizontal_rule(self) -> bool:\n+        \"\"\"Parse horizontal rule (--- or ***).\"\"\"\n+        if not self._at_line_start():\n+            return False\n+\n+        line = self._current_line().strip()\n+        if re.match(r\"^(-{3,}|\\*{3,}|_{3,})$\", line):\n+            self.tokens.append({\"type\": \"horizontal_rule\"})\n+            self._skip_line()\n+            return True\n+        return False\n+\n+    def _try_parse_list(self) -> bool:\n+        \"\"\"Parse ordered or unordered list.\"\"\"\n+        if not self._at_line_start():\n+            return False\n+\n+        line = self._current_line()\n+\n+        # Check for unordered list (- * +)\n+        unordered_match = re.match(r\"^(\\s*)([-*+])\\s+(.+)$\", line)\n+        if unordered_match:\n+            return self._parse_list_items(\"unordered\", unordered_match.group(1))\n+\n+        # Check for ordered list (1. 2. etc)\n+        ordered_match = re.match(r\"^(\\s*)(\\d+)\\.\\s+(.+)$\", line)\n+        if ordered_match:\n+            start_num = int(ordered_match.group(2))\n+            return self._parse_list_items(\"ordered\", ordered_match.group(1), start_num)\n+\n+        return False\n+\n+    def _parse_list_items(self, list_type: str, base_indent: str, start: int = 1) -> bool:\n+        \"\"\"Parse consecutive list items.\"\"\"\n+        items: list[str] = []\n+\n+        while self.pos < len(self.text):\n+            line = self._current_line()\n+\n+            if list_type == \"unordered\":\n+                match = re.match(rf\"^{re.escape(base_indent)}[-*+]\\s+(.+)$\", line)\n+            else:\n+                match = re.match(rf\"^{re.escape(base_indent)}\\d+\\.\\s+(.+)$\", line)\n+\n+            if match:\n+                items.append(match.group(1))\n+                self._skip_line()\n+            else:\n+                break\n+\n+        if items:\n+            token: dict[str, str | int | list[str]] = {\"type\": list_type + \"_list\", \"items\": items}\n+            if list_type == \"ordered\":\n+                token[\"start\"] = start\n+            self.tokens.append(token)\n+            return True\n+\n+        return False\n+\n+    def _parse_paragraph(self) -> None:\n+        \"\"\"Parse a paragraph (everything else).\"\"\"\n+        if self.pos >= len(self.text):\n+            return\n+\n+        # Collect lines until we hit a blank line or end\n+        para_lines = []\n+\n+        while self.pos < len(self.text):\n+            line = self._current_line()\n+\n+            # Stop at blank line\n+            if line.strip() == \"\":\n+                break\n+\n+            # Stop if we hit a block element at line start\n+            if self._at_line_start() and self._looks_like_block_element(line):\n+                break\n+\n+            para_lines.append(line)\n+            self._skip_line()\n+\n+        if para_lines:\n+            content = \" \".join(line.strip() for line in para_lines).strip()\n+            if content:\n+                self.tokens.append({\"type\": \"paragraph\", \"content\": content})\n+\n+    def _looks_like_block_element(self, line: str) -> bool:\n+        \"\"\"Check if a line looks like the start of a block element.\"\"\"\n+        line = line.strip()\n+        return (\n+            bool(re.match(r\"^#{1,6}\\s+\", line))  # heading\n+            or line.startswith(\"```\")  # code block\n+            or line.startswith(\"> \")  # blockquote\n+            or bool(re.match(r\"^(-{3,}|\\*{3,}|_{3,})$\", line))  # horizontal rule\n+            or bool(re.match(r\"^(\\s*)([-*+]|\\d+\\.)\\s+\", line))  # list\n+        )\n+\n+    def _current_line(self) -> str:\n+        \"\"\"Get the current line from position.\"\"\"\n+        if self.pos >= len(self.text):\n+            return \"\"\n+\n+        end = self.text.find(\"\\n\", self.pos)\n+        if end == -1:\n+            return self.text[self.pos :]\n+        return self.text[self.pos : end]\n+\n+    def _skip_line(self) -> None:\n+        \"\"\"Move to the next line.\"\"\"\n+        end = self.text.find(\"\\n\", self.pos)\n+        if end == -1:\n+            self.pos = len(self.text)\n+        else:\n+            self.pos = end + 1\n+\n+    def _at_line_start(self) -> bool:\n+        \"\"\"Check if we're at the start of a line.\"\"\"\n+        return self.pos == 0 or (self.pos > 0 and self.text[self.pos - 1] == \"\\n\")\n+\n+\n+class NotebookSerializer:\n+    # Allowed URL schemes for security\n+    ALLOWED_SCHEMES = {\"http\", \"https\", \"mailto\", \"tel\"}\n+\n+    # Tags that map to marks - only officially supported marks in @tiptap/starter-kit\n+    MARK_TAGS = {\n+        \"strong\": \"bold\",\n+        \"b\": \"bold\",\n+        \"em\": \"italic\",\n+        \"i\": \"italic\",\n+        \"u\": \"underline\",\n+        \"s\": \"strike\",\n+        \"del\": \"strike\",\n+        \"strike\": \"strike\",\n+        \"code\": \"code\",\n+    }\n+\n+    def to_json_paragraph(self, input: str | list[ProsemirrorJSONContent]) -> ProsemirrorJSONContent:\n+        return ProsemirrorJSONContent(\n+            type=\"paragraph\",\n+            content=input if isinstance(input, list) else [ProsemirrorJSONContent(type=\"text\", text=input)],\n+        )\n+\n+    def to_json_heading(self, input: str | list[ProsemirrorJSONContent], level: int) -> ProsemirrorJSONContent:\n+        return ProsemirrorJSONContent(\n+            type=\"heading\",\n+            attrs={\"level\": level},\n+            content=input if isinstance(input, list) else [ProsemirrorJSONContent(type=\"text\", text=input)],\n+        )\n+\n+    def to_json_bullet_list(self, items: list[ProsemirrorJSONContent]) -> ProsemirrorJSONContent:\n+        return ProsemirrorJSONContent(type=\"bulletList\", content=items)\n+\n+    def to_json_ordered_list(self, items: list[ProsemirrorJSONContent], start: int = 1) -> ProsemirrorJSONContent:\n+        return ProsemirrorJSONContent(type=\"orderedList\", attrs={\"start\": start}, content=items)\n+\n+    def to_json_list_item(self, content: list[ProsemirrorJSONContent]) -> ProsemirrorJSONContent:\n+        return ProsemirrorJSONContent(type=\"listItem\", content=content)\n+\n+    def to_json_code_block(self, code: str, language: str | None = None) -> ProsemirrorJSONContent:\n+        attrs = {\"language\": language} if language else {}\n+        return ProsemirrorJSONContent(\n+            type=\"codeBlock\", attrs=attrs, content=[ProsemirrorJSONContent(type=\"text\", text=code)]\n+        )\n+\n+    def to_json_blockquote(self, content: list[ProsemirrorJSONContent]) -> ProsemirrorJSONContent:\n+        return ProsemirrorJSONContent(type=\"blockquote\", content=content)\n+\n+    def to_json_horizontal_rule(self) -> ProsemirrorJSONContent:\n+        return ProsemirrorJSONContent(type=\"horizontalRule\")\n+\n+    def from_markdown_to_json(self, input: str) -> ProsemirrorJSONContent:\n+        \"\"\"\n+        Parse markdown and convert to TipTap notebook schema.\n+        \"\"\"\n+        # Tokenize the markdown\n+        tokenizer = MarkdownTokenizer()\n+        tokens = tokenizer.tokenize(input)\n+\n+        # Convert tokens to ProsemirrorJSONContent\n+        json_result: list[ProsemirrorJSONContent] = []\n+        for token in tokens:\n+            nodes = self._convert_markdown_token(token)\n+            json_result.extend(nodes)\n+\n+        return ProsemirrorJSONContent(type=\"doc\", content=json_result)\n+\n+    def _convert_markdown_token(self, token: dict) -> list[ProsemirrorJSONContent]:\n+        \"\"\"Convert a markdown token to ProsemirrorJSONContent nodes.\"\"\"\n+        token_type = token[\"type\"]\n+\n+        if token_type == \"paragraph\":\n+            content = self._parse_markdown_inline_content(token[\"content\"])\n+            return [self.to_json_paragraph(content)]\n+\n+        elif token_type == \"heading\":\n+            content = self._parse_markdown_inline_content(token[\"content\"])\n+            return [self.to_json_heading(content, token[\"level\"])]\n+\n+        elif token_type == \"code_block\":\n+            return [self.to_json_code_block(token[\"content\"], token.get(\"language\"))]\n+\n+        elif token_type == \"blockquote\":\n+            # Parse blockquote content as markdown and convert to block content\n+            quote_content = self._parse_blockquote_content(token[\"content\"])\n+            return [self.to_json_blockquote(quote_content)]\n+\n+        elif token_type == \"horizontal_rule\":\n+            return [self.to_json_horizontal_rule()]\n+\n+        elif token_type == \"unordered_list\":\n+            items = []\n+            for item_text in token[\"items\"]:\n+                item_content = self._parse_markdown_inline_content(item_text)\n+                items.append(self.to_json_list_item([self.to_json_paragraph(item_content)]))\n+            return [self.to_json_bullet_list(items)]\n+\n+        elif token_type == \"ordered_list\":\n+            items = []\n+            for item_text in token[\"items\"]:\n+                item_content = self._parse_markdown_inline_content(item_text)\n+                items.append(self.to_json_list_item([self.to_json_paragraph(item_content)]))\n+            start = token.get(\"start\", 1)\n+            return [self.to_json_ordered_list(items, start)]\n+\n+        return []\n+\n+    def _parse_markdown_inline_content(self, text: str) -> list[ProsemirrorJSONContent]:\n+        \"\"\"Parse inline markdown content (bold, italic, links, etc.).\"\"\"\n+        if not text:\n+            return []\n+\n+        # This is a simplified inline parser - handles basic formatting\n+        content = []\n+        pos = 0\n+\n+        while pos < len(text):\n+            # Look for markdown patterns\n+            next_match = self._find_next_markdown_pattern(text, pos)\n+\n+            if next_match is None:\n+                # No more patterns, add remaining text\n+                remaining = text[pos:].rstrip()\n+                if remaining:\n+                    content.append(ProsemirrorJSONContent(type=\"text\", text=remaining))\n+                break\n+\n+            match_start, match_end, pattern_type, pattern_data = next_match\n+\n+            # Add text before the pattern\n+            if match_start > pos:\n+                before_text = text[pos:match_start]\n+                if before_text:\n+                    content.append(ProsemirrorJSONContent(type=\"text\", text=before_text))\n+\n+            # Add the formatted content\n+            if pattern_type == \"bold\":\n+                inner_text = pattern_data[\"text\"]\n+                content.append(ProsemirrorJSONContent(type=\"text\", text=inner_text, marks=[Mark(type=\"bold\")]))\n+            elif pattern_type == \"italic\":\n+                inner_text = pattern_data[\"text\"]\n+                content.append(ProsemirrorJSONContent(type=\"text\", text=inner_text, marks=[Mark(type=\"italic\")]))\n+            elif pattern_type == \"code\":\n+                inner_text = pattern_data[\"text\"]\n+                content.append(ProsemirrorJSONContent(type=\"text\", text=inner_text, marks=[Mark(type=\"code\")]))\n+            elif pattern_type == \"strikethrough\":\n+                inner_text = pattern_data[\"text\"]\n+                content.append(ProsemirrorJSONContent(type=\"text\", text=inner_text, marks=[Mark(type=\"strike\")]))\n+            elif pattern_type == \"link\":\n+                link_text = pattern_data[\"text\"]\n+                href = pattern_data[\"href\"]\n+                if self._is_safe_url(href):\n+                    content.append(\n+                        ProsemirrorJSONContent(\n+                            type=\"text\",\n+                            text=link_text,\n+                            marks=[Mark(type=\"link\", attrs={\"href\": href, \"target\": \"_blank\"})],\n+                        )\n+                    )\n+                else:\n+                    # Unsafe URL, just add as text\n+                    content.append(ProsemirrorJSONContent(type=\"text\", text=link_text))\n+\n+            pos = match_end\n+\n+        return content if content else [ProsemirrorJSONContent(type=\"text\", text=text)]\n+\n+    def _find_next_markdown_pattern(self, text: str, start_pos: int) -> Optional[tuple[int, int, str, dict]]:\n+        \"\"\"Find the next markdown formatting pattern in text.\"\"\"\n+        patterns = [\n+            # Bold: **text** or __text__ - check these first to prioritize over italic\n+            (r\"\\*\\*(.+?)\\*\\*\", \"bold\"),\n+            (r\"__(.*?)__\", \"bold\"),\n+            # Italic: *text* or _text_\n+            (r\"\\*(.*?)\\*\", \"italic\"),\n+            (r\"_(.*?)_\", \"italic\"),\n+            # Code: `text`\n+            (r\"`(.*?)`\", \"code\"),\n+            # Strikethrough: ~~text~~\n+            (r\"~~(.*?)~~\", \"strikethrough\"),\n+            # Link: [text](url)\n+            (r\"\\[([^\\]]*)\\]\\(([^)]*)\\)\", \"link\"),\n+        ]\n+\n+        earliest_match = None\n+        earliest_pos = len(text)\n+\n+        for pattern, pattern_type in patterns:\n+            match = re.search(pattern, text[start_pos:])\n+            if match:\n+                match_start = start_pos + match.start()\n+                match_end = start_pos + match.end()\n+\n+                if match_start < earliest_pos:\n+                    earliest_pos = match_start\n+                    if pattern_type == \"link\":\n+                        earliest_match = (\n+                            match_start,\n+                            match_end,\n+                            pattern_type,\n+                            {\"text\": match.group(1), \"href\": match.group(2)},\n+                        )\n+                    else:\n+                        earliest_match = (match_start, match_end, pattern_type, {\"text\": match.group(1)})\n+\n+        return earliest_match\n+\n+    def _parse_blockquote_content(self, content: str) -> list[ProsemirrorJSONContent]:\n+        \"\"\"Parse blockquote content as nested markdown.\"\"\"\n+        # Recursively parse the blockquote content as markdown\n+        tokenizer = MarkdownTokenizer()\n+        tokens = tokenizer.tokenize(content)\n+\n+        result = []\n+        for token in tokens:\n+            nodes = self._convert_markdown_token(token)\n+            result.extend(nodes)\n+\n+        return result if result else [self.to_json_paragraph(\"\")]\n+\n+    def _is_safe_url(self, url: str) -> bool:",
        "comment_created_at": "2025-08-11T18:21:57+00:00",
        "comment_author": "sortafreel",
        "comment_body": "If to be paranoid - it seems it could still be unsafe \ud83d\ude05  Like, if we add something like `%256Aavascript:` it would decode to `%6Aavascript:` if we do just a single unquote, so there's a space for XSS attack, right now something like `'javascript%253Aalert(1)` could pass through.\r\n\r\nCould be skipped, but if we want to be double sure it would probably make sense to have a  recursive checj. Not sure how much do we care though, or where the URL can come from.",
        "pr_file_module": null
      },
      {
        "comment_id": "2267948914",
        "repo_full_name": "PostHog/posthog",
        "pr_number": 35962,
        "pr_file": "ee/hogai/notebook/notebook_serializer.py",
        "discussion_id": "2267627113",
        "commented_code": "@@ -0,0 +1,610 @@\n+import re\n+import logging\n+from urllib.parse import urlparse, unquote\n+from typing import Optional\n+\n+from posthog.schema import ProsemirrorJSONContent, Mark\n+\n+logger = logging.getLogger(__name__)\n+\n+\n+class MarkdownTokenizer:\n+    \"\"\"Simple markdown tokenizer that handles the most common markdown elements.\"\"\"\n+\n+    def __init__(self):\n+        self.tokens = []\n+        self.pos = 0\n+        self.text = \"\"\n+\n+    def tokenize(self, text: str) -> list[dict]:\n+        \"\"\"Tokenize markdown text into a list of tokens.\"\"\"\n+        self.text = text\n+        self.pos = 0\n+        self.tokens = []\n+\n+        while self.pos < len(self.text):\n+            if not self._try_parse_block_element():\n+                # If no block element found, parse as paragraph\n+                self._parse_paragraph()\n+\n+        return self.tokens\n+\n+    def _try_parse_block_element(self) -> bool:\n+        \"\"\"Try to parse a block-level element. Returns True if successful.\"\"\"\n+        # Skip empty lines\n+        if self._at_line_start() and self._current_line().strip() == \"\":\n+            self._skip_line()\n+            return True\n+\n+        # Try different block elements\n+        if self._try_parse_heading():\n+            return True\n+        if self._try_parse_code_block():\n+            return True\n+        if self._try_parse_blockquote():\n+            return True\n+        if self._try_parse_horizontal_rule():\n+            return True\n+        if self._try_parse_list():\n+            return True\n+\n+        return False\n+\n+    def _try_parse_heading(self) -> bool:\n+        \"\"\"Parse heading (# ## ### etc).\"\"\"\n+        if not self._at_line_start():\n+            return False\n+\n+        line = self._current_line()\n+        match = re.match(r\"^(#{1,6})\\s+(.+)$\", line)\n+        if match:\n+            level = len(match.group(1))\n+            content = match.group(2).strip()\n+            self.tokens.append({\"type\": \"heading\", \"level\": level, \"content\": content})\n+            self._skip_line()\n+            return True\n+        return False\n+\n+    def _try_parse_code_block(self) -> bool:\n+        \"\"\"Parse fenced code block (``` language).\"\"\"\n+        if not self._at_line_start():\n+            return False\n+\n+        line = self._current_line()\n+        match = re.match(r\"^```(\\w*)\\s*$\", line)\n+        if match:\n+            language = match.group(1) or None\n+            self._skip_line()\n+\n+            # Collect code lines until closing ```\n+            code_lines = []\n+            while self.pos < len(self.text):\n+                line = self._current_line()\n+                if line.strip() == \"```\":\n+                    self._skip_line()\n+                    break\n+                code_lines.append(line)\n+                self._skip_line()\n+\n+            self.tokens.append({\"type\": \"code_block\", \"language\": language, \"content\": \"\\n\".join(code_lines)})\n+            return True\n+        return False\n+\n+    def _try_parse_blockquote(self) -> bool:\n+        \"\"\"Parse blockquote (> text).\"\"\"\n+        if not self._at_line_start():\n+            return False\n+\n+        line = self._current_line()\n+        if line.startswith(\"> \"):\n+            # Collect all consecutive blockquote lines\n+            quote_lines = []\n+            while self.pos < len(self.text) and self._current_line().startswith(\"> \"):\n+                quote_lines.append(self._current_line()[2:])  # Remove \"> \"\n+                self._skip_line()\n+\n+            self.tokens.append({\"type\": \"blockquote\", \"content\": \"\\n\".join(quote_lines)})\n+            return True\n+        return False\n+\n+    def _try_parse_horizontal_rule(self) -> bool:\n+        \"\"\"Parse horizontal rule (--- or ***).\"\"\"\n+        if not self._at_line_start():\n+            return False\n+\n+        line = self._current_line().strip()\n+        if re.match(r\"^(-{3,}|\\*{3,}|_{3,})$\", line):\n+            self.tokens.append({\"type\": \"horizontal_rule\"})\n+            self._skip_line()\n+            return True\n+        return False\n+\n+    def _try_parse_list(self) -> bool:\n+        \"\"\"Parse ordered or unordered list.\"\"\"\n+        if not self._at_line_start():\n+            return False\n+\n+        line = self._current_line()\n+\n+        # Check for unordered list (- * +)\n+        unordered_match = re.match(r\"^(\\s*)([-*+])\\s+(.+)$\", line)\n+        if unordered_match:\n+            return self._parse_list_items(\"unordered\", unordered_match.group(1))\n+\n+        # Check for ordered list (1. 2. etc)\n+        ordered_match = re.match(r\"^(\\s*)(\\d+)\\.\\s+(.+)$\", line)\n+        if ordered_match:\n+            start_num = int(ordered_match.group(2))\n+            return self._parse_list_items(\"ordered\", ordered_match.group(1), start_num)\n+\n+        return False\n+\n+    def _parse_list_items(self, list_type: str, base_indent: str, start: int = 1) -> bool:\n+        \"\"\"Parse consecutive list items.\"\"\"\n+        items: list[str] = []\n+\n+        while self.pos < len(self.text):\n+            line = self._current_line()\n+\n+            if list_type == \"unordered\":\n+                match = re.match(rf\"^{re.escape(base_indent)}[-*+]\\s+(.+)$\", line)\n+            else:\n+                match = re.match(rf\"^{re.escape(base_indent)}\\d+\\.\\s+(.+)$\", line)\n+\n+            if match:\n+                items.append(match.group(1))\n+                self._skip_line()\n+            else:\n+                break\n+\n+        if items:\n+            token: dict[str, str | int | list[str]] = {\"type\": list_type + \"_list\", \"items\": items}\n+            if list_type == \"ordered\":\n+                token[\"start\"] = start\n+            self.tokens.append(token)\n+            return True\n+\n+        return False\n+\n+    def _parse_paragraph(self) -> None:\n+        \"\"\"Parse a paragraph (everything else).\"\"\"\n+        if self.pos >= len(self.text):\n+            return\n+\n+        # Collect lines until we hit a blank line or end\n+        para_lines = []\n+\n+        while self.pos < len(self.text):\n+            line = self._current_line()\n+\n+            # Stop at blank line\n+            if line.strip() == \"\":\n+                break\n+\n+            # Stop if we hit a block element at line start\n+            if self._at_line_start() and self._looks_like_block_element(line):\n+                break\n+\n+            para_lines.append(line)\n+            self._skip_line()\n+\n+        if para_lines:\n+            content = \" \".join(line.strip() for line in para_lines).strip()\n+            if content:\n+                self.tokens.append({\"type\": \"paragraph\", \"content\": content})\n+\n+    def _looks_like_block_element(self, line: str) -> bool:\n+        \"\"\"Check if a line looks like the start of a block element.\"\"\"\n+        line = line.strip()\n+        return (\n+            bool(re.match(r\"^#{1,6}\\s+\", line))  # heading\n+            or line.startswith(\"```\")  # code block\n+            or line.startswith(\"> \")  # blockquote\n+            or bool(re.match(r\"^(-{3,}|\\*{3,}|_{3,})$\", line))  # horizontal rule\n+            or bool(re.match(r\"^(\\s*)([-*+]|\\d+\\.)\\s+\", line))  # list\n+        )\n+\n+    def _current_line(self) -> str:\n+        \"\"\"Get the current line from position.\"\"\"\n+        if self.pos >= len(self.text):\n+            return \"\"\n+\n+        end = self.text.find(\"\\n\", self.pos)\n+        if end == -1:\n+            return self.text[self.pos :]\n+        return self.text[self.pos : end]\n+\n+    def _skip_line(self) -> None:\n+        \"\"\"Move to the next line.\"\"\"\n+        end = self.text.find(\"\\n\", self.pos)\n+        if end == -1:\n+            self.pos = len(self.text)\n+        else:\n+            self.pos = end + 1\n+\n+    def _at_line_start(self) -> bool:\n+        \"\"\"Check if we're at the start of a line.\"\"\"\n+        return self.pos == 0 or (self.pos > 0 and self.text[self.pos - 1] == \"\\n\")\n+\n+\n+class NotebookSerializer:\n+    # Allowed URL schemes for security\n+    ALLOWED_SCHEMES = {\"http\", \"https\", \"mailto\", \"tel\"}\n+\n+    # Tags that map to marks - only officially supported marks in @tiptap/starter-kit\n+    MARK_TAGS = {\n+        \"strong\": \"bold\",\n+        \"b\": \"bold\",\n+        \"em\": \"italic\",\n+        \"i\": \"italic\",\n+        \"u\": \"underline\",\n+        \"s\": \"strike\",\n+        \"del\": \"strike\",\n+        \"strike\": \"strike\",\n+        \"code\": \"code\",\n+    }\n+\n+    def to_json_paragraph(self, input: str | list[ProsemirrorJSONContent]) -> ProsemirrorJSONContent:\n+        return ProsemirrorJSONContent(\n+            type=\"paragraph\",\n+            content=input if isinstance(input, list) else [ProsemirrorJSONContent(type=\"text\", text=input)],\n+        )\n+\n+    def to_json_heading(self, input: str | list[ProsemirrorJSONContent], level: int) -> ProsemirrorJSONContent:\n+        return ProsemirrorJSONContent(\n+            type=\"heading\",\n+            attrs={\"level\": level},\n+            content=input if isinstance(input, list) else [ProsemirrorJSONContent(type=\"text\", text=input)],\n+        )\n+\n+    def to_json_bullet_list(self, items: list[ProsemirrorJSONContent]) -> ProsemirrorJSONContent:\n+        return ProsemirrorJSONContent(type=\"bulletList\", content=items)\n+\n+    def to_json_ordered_list(self, items: list[ProsemirrorJSONContent], start: int = 1) -> ProsemirrorJSONContent:\n+        return ProsemirrorJSONContent(type=\"orderedList\", attrs={\"start\": start}, content=items)\n+\n+    def to_json_list_item(self, content: list[ProsemirrorJSONContent]) -> ProsemirrorJSONContent:\n+        return ProsemirrorJSONContent(type=\"listItem\", content=content)\n+\n+    def to_json_code_block(self, code: str, language: str | None = None) -> ProsemirrorJSONContent:\n+        attrs = {\"language\": language} if language else {}\n+        return ProsemirrorJSONContent(\n+            type=\"codeBlock\", attrs=attrs, content=[ProsemirrorJSONContent(type=\"text\", text=code)]\n+        )\n+\n+    def to_json_blockquote(self, content: list[ProsemirrorJSONContent]) -> ProsemirrorJSONContent:\n+        return ProsemirrorJSONContent(type=\"blockquote\", content=content)\n+\n+    def to_json_horizontal_rule(self) -> ProsemirrorJSONContent:\n+        return ProsemirrorJSONContent(type=\"horizontalRule\")\n+\n+    def from_markdown_to_json(self, input: str) -> ProsemirrorJSONContent:\n+        \"\"\"\n+        Parse markdown and convert to TipTap notebook schema.\n+        \"\"\"\n+        # Tokenize the markdown\n+        tokenizer = MarkdownTokenizer()\n+        tokens = tokenizer.tokenize(input)\n+\n+        # Convert tokens to ProsemirrorJSONContent\n+        json_result: list[ProsemirrorJSONContent] = []\n+        for token in tokens:\n+            nodes = self._convert_markdown_token(token)\n+            json_result.extend(nodes)\n+\n+        return ProsemirrorJSONContent(type=\"doc\", content=json_result)\n+\n+    def _convert_markdown_token(self, token: dict) -> list[ProsemirrorJSONContent]:\n+        \"\"\"Convert a markdown token to ProsemirrorJSONContent nodes.\"\"\"\n+        token_type = token[\"type\"]\n+\n+        if token_type == \"paragraph\":\n+            content = self._parse_markdown_inline_content(token[\"content\"])\n+            return [self.to_json_paragraph(content)]\n+\n+        elif token_type == \"heading\":\n+            content = self._parse_markdown_inline_content(token[\"content\"])\n+            return [self.to_json_heading(content, token[\"level\"])]\n+\n+        elif token_type == \"code_block\":\n+            return [self.to_json_code_block(token[\"content\"], token.get(\"language\"))]\n+\n+        elif token_type == \"blockquote\":\n+            # Parse blockquote content as markdown and convert to block content\n+            quote_content = self._parse_blockquote_content(token[\"content\"])\n+            return [self.to_json_blockquote(quote_content)]\n+\n+        elif token_type == \"horizontal_rule\":\n+            return [self.to_json_horizontal_rule()]\n+\n+        elif token_type == \"unordered_list\":\n+            items = []\n+            for item_text in token[\"items\"]:\n+                item_content = self._parse_markdown_inline_content(item_text)\n+                items.append(self.to_json_list_item([self.to_json_paragraph(item_content)]))\n+            return [self.to_json_bullet_list(items)]\n+\n+        elif token_type == \"ordered_list\":\n+            items = []\n+            for item_text in token[\"items\"]:\n+                item_content = self._parse_markdown_inline_content(item_text)\n+                items.append(self.to_json_list_item([self.to_json_paragraph(item_content)]))\n+            start = token.get(\"start\", 1)\n+            return [self.to_json_ordered_list(items, start)]\n+\n+        return []\n+\n+    def _parse_markdown_inline_content(self, text: str) -> list[ProsemirrorJSONContent]:\n+        \"\"\"Parse inline markdown content (bold, italic, links, etc.).\"\"\"\n+        if not text:\n+            return []\n+\n+        # This is a simplified inline parser - handles basic formatting\n+        content = []\n+        pos = 0\n+\n+        while pos < len(text):\n+            # Look for markdown patterns\n+            next_match = self._find_next_markdown_pattern(text, pos)\n+\n+            if next_match is None:\n+                # No more patterns, add remaining text\n+                remaining = text[pos:].rstrip()\n+                if remaining:\n+                    content.append(ProsemirrorJSONContent(type=\"text\", text=remaining))\n+                break\n+\n+            match_start, match_end, pattern_type, pattern_data = next_match\n+\n+            # Add text before the pattern\n+            if match_start > pos:\n+                before_text = text[pos:match_start]\n+                if before_text:\n+                    content.append(ProsemirrorJSONContent(type=\"text\", text=before_text))\n+\n+            # Add the formatted content\n+            if pattern_type == \"bold\":\n+                inner_text = pattern_data[\"text\"]\n+                content.append(ProsemirrorJSONContent(type=\"text\", text=inner_text, marks=[Mark(type=\"bold\")]))\n+            elif pattern_type == \"italic\":\n+                inner_text = pattern_data[\"text\"]\n+                content.append(ProsemirrorJSONContent(type=\"text\", text=inner_text, marks=[Mark(type=\"italic\")]))\n+            elif pattern_type == \"code\":\n+                inner_text = pattern_data[\"text\"]\n+                content.append(ProsemirrorJSONContent(type=\"text\", text=inner_text, marks=[Mark(type=\"code\")]))\n+            elif pattern_type == \"strikethrough\":\n+                inner_text = pattern_data[\"text\"]\n+                content.append(ProsemirrorJSONContent(type=\"text\", text=inner_text, marks=[Mark(type=\"strike\")]))\n+            elif pattern_type == \"link\":\n+                link_text = pattern_data[\"text\"]\n+                href = pattern_data[\"href\"]\n+                if self._is_safe_url(href):\n+                    content.append(\n+                        ProsemirrorJSONContent(\n+                            type=\"text\",\n+                            text=link_text,\n+                            marks=[Mark(type=\"link\", attrs={\"href\": href, \"target\": \"_blank\"})],\n+                        )\n+                    )\n+                else:\n+                    # Unsafe URL, just add as text\n+                    content.append(ProsemirrorJSONContent(type=\"text\", text=link_text))\n+\n+            pos = match_end\n+\n+        return content if content else [ProsemirrorJSONContent(type=\"text\", text=text)]\n+\n+    def _find_next_markdown_pattern(self, text: str, start_pos: int) -> Optional[tuple[int, int, str, dict]]:\n+        \"\"\"Find the next markdown formatting pattern in text.\"\"\"\n+        patterns = [\n+            # Bold: **text** or __text__ - check these first to prioritize over italic\n+            (r\"\\*\\*(.+?)\\*\\*\", \"bold\"),\n+            (r\"__(.*?)__\", \"bold\"),\n+            # Italic: *text* or _text_\n+            (r\"\\*(.*?)\\*\", \"italic\"),\n+            (r\"_(.*?)_\", \"italic\"),\n+            # Code: `text`\n+            (r\"`(.*?)`\", \"code\"),\n+            # Strikethrough: ~~text~~\n+            (r\"~~(.*?)~~\", \"strikethrough\"),\n+            # Link: [text](url)\n+            (r\"\\[([^\\]]*)\\]\\(([^)]*)\\)\", \"link\"),\n+        ]\n+\n+        earliest_match = None\n+        earliest_pos = len(text)\n+\n+        for pattern, pattern_type in patterns:\n+            match = re.search(pattern, text[start_pos:])\n+            if match:\n+                match_start = start_pos + match.start()\n+                match_end = start_pos + match.end()\n+\n+                if match_start < earliest_pos:\n+                    earliest_pos = match_start\n+                    if pattern_type == \"link\":\n+                        earliest_match = (\n+                            match_start,\n+                            match_end,\n+                            pattern_type,\n+                            {\"text\": match.group(1), \"href\": match.group(2)},\n+                        )\n+                    else:\n+                        earliest_match = (match_start, match_end, pattern_type, {\"text\": match.group(1)})\n+\n+        return earliest_match\n+\n+    def _parse_blockquote_content(self, content: str) -> list[ProsemirrorJSONContent]:\n+        \"\"\"Parse blockquote content as nested markdown.\"\"\"\n+        # Recursively parse the blockquote content as markdown\n+        tokenizer = MarkdownTokenizer()\n+        tokens = tokenizer.tokenize(content)\n+\n+        result = []\n+        for token in tokens:\n+            nodes = self._convert_markdown_token(token)\n+            result.extend(nodes)\n+\n+        return result if result else [self.to_json_paragraph(\"\")]\n+\n+    def _is_safe_url(self, url: str) -> bool:",
        "comment_created_at": "2025-08-11T20:26:20+00:00",
        "comment_author": "kappa90",
        "comment_body": "I added recursive decoding so this doesn't happen, better safe than sorry. The URL comes from the LLM but we could reuse this class somewhere else.",
        "pr_file_module": null
      }
    ]
  },
  {
    "discussion_id": "2269240589",
    "pr_number": 36394,
    "pr_file": "ee/api/vercel/vercel_installation.py",
    "created_at": "2025-08-12T09:20:40+00:00",
    "commented_code": "+\"\"\"\n+Implements the Vercel Marketplace API server for managing marketplace installations.\n+\n+Biggest problem here is that we don't yet conform to Vercel's response schema.\n+\n+See:\n+https://vercel.com/docs/integrations/create-integration/marketplace-api\n+\"\"\"\n+\n+from typing import Any\n+from django.conf import settings\n+from django.db import IntegrityError\n+from rest_framework import serializers, viewsets, exceptions\n+from rest_framework.request import Request\n+from rest_framework.response import Response\n+from rest_framework import mixins\n+from rest_framework.permissions import BasePermission\n+from ee.api.authentication import VercelAuthentication\n+from posthog.event_usage import report_user_signed_up\n+from posthog.models.user import User\n+from ee.models.vercel.vercel_installation import VercelInstallation\n+from rest_framework import decorators\n+\n+\n+def get_vercel_plans() -> list[dict[str, Any]]:\n+    \"\"\"Get PostHog plans formatted for Vercel Marketplace\"\"\"\n+    return [\n+        {\n+            \"id\": \"free\",\n+            \"type\": \"subscription\",\n+            \"name\": \"Free\",\n+            \"description\": \"No credit card required\",\n+            \"scope\": \"installation\",\n+            \"paymentMethodRequired\": False,\n+            \"details\": [\n+                {\"label\": \"Data retention\", \"value\": \"1 year\"},\n+                {\"label\": \"Projects\", \"value\": \"1\"},\n+                {\"label\": \"Team members\", \"value\": \"Unlimited\"},\n+                {\"label\": \"API Access\", \"value\": \"\u2713\"},\n+                {\"label\": \"No limits on tracked users\", \"value\": \"\u2713\"},\n+                {\"label\": \"Community support\", \"value\": \"Support via community forum\"},\n+            ],\n+            \"highlightedDetails\": [\n+                {\"label\": \"Feature Flags\", \"value\": \"1 million free requests\"},\n+                {\"label\": \"Experiments\", \"value\": \"1 million free requests\"},\n+            ],\n+        },\n+        {\n+            \"id\": \"pay_as_you_go\",\n+            \"type\": \"subscription\",\n+            \"name\": \"Pay-as-you-go\",\n+            \"description\": \"Usage-based pricing after free tier\",\n+            \"scope\": \"installation\",\n+            \"paymentMethodRequired\": True,\n+            \"details\": [\n+                {\"label\": \"Data retention\", \"value\": \"7 years\"},\n+                {\"label\": \"Projects\", \"value\": \"6\"},\n+                {\"label\": \"Team members\", \"value\": \"Unlimited\"},\n+                {\"label\": \"API Access\", \"value\": \"\u2713\"},\n+                {\"label\": \"No limits on tracked users\", \"value\": \"\u2713\"},\n+                {\"label\": \"Standard support\", \"value\": \"Support via email, Slack-based over $2k/mo\"},\n+            ],\n+            \"highlightedDetails\": [\n+                {\"label\": \"Feature flags\", \"value\": \"1 million requests for free, then from $0.0001/request\"},\n+                {\"label\": \"Experiments\", \"value\": \"Billed with feature flags\"},\n+            ],\n+        },\n+    ]\n+\n+\n+class VercelInstallationPermission(BasePermission):\n+    \"\"\"\n+    Custom permission that validates Vercel auth type and installation ID match.\n+    Vercel auth type is determined by the X-Vercel-Auth header, and can differ per endpoint.\n+    See Marketplace API spec.\n+    \"\"\"\n+\n+    def has_permission(self, request: Request, view) -> bool:\n+        self._validate_auth_type_allowed(request, view)\n+        return True\n+\n+    def has_object_permission(self, request: Request, view, obj) -> bool:\n+        self._validate_installation_id_match(request, view)\n+        return True\n+\n+    def _get_supported_auth_types(self, view) -> list[str]:\n+        \"\"\"\n+        Get supported auth types for the current action from the viewset.\n+        Supported auth type is specified by the marketplace API spec.\n+        \"\"\"\n+        return getattr(view, \"supported_auth_types\", {}).get(view.action, [\"User\", \"System\"])\n+\n+    def _validate_auth_type_allowed(self, request: Request, view) -> None:\n+        \"\"\"Validate that the auth type from X-Vercel-Auth header is allowed for this endpoint\"\"\"\n+        auth_type = request.headers.get(\"X-Vercel-Auth\", \"\").lower()\n+        if not auth_type:\n+            raise exceptions.AuthenticationFailed(\"Missing X-Vercel-Auth header\")\n+\n+        auth_type_title = auth_type.title()\n+        supported_types = self._get_supported_auth_types(view)\n+\n+        if auth_type_title not in supported_types:\n+            raise exceptions.PermissionDenied(\n+                f\"Auth type '{auth_type_title}' not allowed for this endpoint. \"\n+                f\"Supported types: {', '.join(supported_types)}\"\n+            )\n+\n+    def _validate_installation_id_match(self, request: Request, view) -> None:\n+        \"\"\"Validate that JWT installation_id matches URL parameter\"\"\"\n+        jwt_payload = self._get_jwt_payload(request)\n+\n+        # installation_id when going through vercel_installation ViewSet,\n+        # or parent_lookup_installation_id when going through vercel_resource\n+        installation_id = view.kwargs.get(\"installation_id\") or view.kwargs.get(\"parent_lookup_installation_id\")\n+\n+        if jwt_payload.get(\"installation_id\") != installation_id:\n+            raise exceptions.PermissionDenied(\"Installation ID mismatch\")\n+\n+    def _get_jwt_payload(self, request: Request) -> dict[str, Any]:\n+        \"\"\"Extract JWT payload from authenticated request\"\"\"\n+        if hasattr(request, \"auth\") and isinstance(request.auth, dict) and request.auth:\n+            return request.auth\n+        raise exceptions.AuthenticationFailed(\"No valid JWT authentication found\")\n+\n+\n+class VercelCredentialsSerializer(serializers.Serializer):\n+    access_token = serializers.CharField(help_text=\"Access token authorizes marketplace and integration APIs.\")\n+    token_type = serializers.CharField(help_text=\"The type of token (default: Bearer).\")\n+\n+\n+class VercelContactSerializer(serializers.Serializer):\n+    email = serializers.EmailField(help_text=\"Contact email address for the account.\")\n+    name = serializers.CharField(required=False, allow_blank=True, help_text=\"Contact name for the account (optional).\")\n+\n+\n+class VercelAccountSerializer(serializers.Serializer):\n+    name = serializers.CharField(required=False, allow_blank=True, help_text=\"Account name (optional).\")\n+    url = serializers.URLField(help_text=\"URL of the account.\")\n+    contact = VercelContactSerializer(help_text=\"Contact information for the account.\")\n+\n+\n+class UpsertInstallationPayloadSerializer(serializers.Serializer):\n+    scopes = serializers.ListField(\n+        child=serializers.CharField(), min_length=1, help_text=\"Array of scopes, must have at least one. Min Length: 1\"\n+    )\n+    acceptedPolicies = serializers.DictField(\n+        child=serializers.JSONField(),\n+        help_text='Policies accepted by the customer. Example: { \"toc\": \"2024-02-28T10:00:00Z\" }',\n+    )\n+    credentials = VercelCredentialsSerializer(\n+        help_text=\"The service-account access token to access marketplace and integration APIs on behalf of a customer's installation.\"\n+    )\n+    account = VercelAccountSerializer(\n+        help_text=\"The account information for this installation. Use Get Account Info API to re-fetch this data post installation.\"\n+    )\n+\n+\n+class VercelInstallationSerializer(serializers.ModelSerializer):\n+    class Meta:\n+        model = VercelInstallation\n+        fields = \"__all__\"\n+\n+\n+class VercelInstallationViewSet(\n+    mixins.RetrieveModelMixin, mixins.UpdateModelMixin, mixins.DestroyModelMixin, viewsets.GenericViewSet\n+):\n+    queryset = VercelInstallation.objects.all()\n+    serializer_class = VercelInstallationSerializer\n+    lookup_field = \"installation_id\"\n+    authentication_classes = [VercelAuthentication]\n+    permission_classes = [VercelInstallationPermission]\n+\n+    supported_auth_types = {\n+        \"update\": [\"User\"],\n+        \"partial_update\": [\"User\"],\n+        \"destroy\": [\"User\", \"System\"],\n+        \"retrieve\": [\"System\"],\n+        \"plans\": [\"System\"],\n+    }\n+\n+    def update(self, request: Request, *args: Any, **kwargs: Any) -> Response:\n+        \"\"\"\n+        Implements: https://vercel.com/docs/integrations/create-integration/marketplace-api#upsert-installation\n+        \"\"\"\n+        serializer: UpsertInstallationPayloadSerializer = UpsertInstallationPayloadSerializer(data=request.data)\n+        if not serializer.is_valid():\n+            raise exceptions.ValidationError(detail=serializer.errors)\n+\n+        installation_id = self.kwargs[\"installation_id\"]\n+\n+        try:\n+            # TODO: Not sure if this is the best move because users might be confused\n+            # by the default project created here and their \"Resource\" project.\n+            organization, _, user = User.objects.bootstrap(\n+                is_staff=False,\n+                is_email_verified=True,",
    "repo_full_name": "PostHog/posthog",
    "discussion_comments": [
      {
        "comment_id": "2269240589",
        "repo_full_name": "PostHog/posthog",
        "pr_number": 36394,
        "pr_file": "ee/api/vercel/vercel_installation.py",
        "discussion_id": "2269240589",
        "commented_code": "@@ -0,0 +1,321 @@\n+\"\"\"\n+Implements the Vercel Marketplace API server for managing marketplace installations.\n+\n+Biggest problem here is that we don't yet conform to Vercel's response schema.\n+\n+See:\n+https://vercel.com/docs/integrations/create-integration/marketplace-api\n+\"\"\"\n+\n+from typing import Any\n+from django.conf import settings\n+from django.db import IntegrityError\n+from rest_framework import serializers, viewsets, exceptions\n+from rest_framework.request import Request\n+from rest_framework.response import Response\n+from rest_framework import mixins\n+from rest_framework.permissions import BasePermission\n+from ee.api.authentication import VercelAuthentication\n+from posthog.event_usage import report_user_signed_up\n+from posthog.models.user import User\n+from ee.models.vercel.vercel_installation import VercelInstallation\n+from rest_framework import decorators\n+\n+\n+def get_vercel_plans() -> list[dict[str, Any]]:\n+    \"\"\"Get PostHog plans formatted for Vercel Marketplace\"\"\"\n+    return [\n+        {\n+            \"id\": \"free\",\n+            \"type\": \"subscription\",\n+            \"name\": \"Free\",\n+            \"description\": \"No credit card required\",\n+            \"scope\": \"installation\",\n+            \"paymentMethodRequired\": False,\n+            \"details\": [\n+                {\"label\": \"Data retention\", \"value\": \"1 year\"},\n+                {\"label\": \"Projects\", \"value\": \"1\"},\n+                {\"label\": \"Team members\", \"value\": \"Unlimited\"},\n+                {\"label\": \"API Access\", \"value\": \"\u2713\"},\n+                {\"label\": \"No limits on tracked users\", \"value\": \"\u2713\"},\n+                {\"label\": \"Community support\", \"value\": \"Support via community forum\"},\n+            ],\n+            \"highlightedDetails\": [\n+                {\"label\": \"Feature Flags\", \"value\": \"1 million free requests\"},\n+                {\"label\": \"Experiments\", \"value\": \"1 million free requests\"},\n+            ],\n+        },\n+        {\n+            \"id\": \"pay_as_you_go\",\n+            \"type\": \"subscription\",\n+            \"name\": \"Pay-as-you-go\",\n+            \"description\": \"Usage-based pricing after free tier\",\n+            \"scope\": \"installation\",\n+            \"paymentMethodRequired\": True,\n+            \"details\": [\n+                {\"label\": \"Data retention\", \"value\": \"7 years\"},\n+                {\"label\": \"Projects\", \"value\": \"6\"},\n+                {\"label\": \"Team members\", \"value\": \"Unlimited\"},\n+                {\"label\": \"API Access\", \"value\": \"\u2713\"},\n+                {\"label\": \"No limits on tracked users\", \"value\": \"\u2713\"},\n+                {\"label\": \"Standard support\", \"value\": \"Support via email, Slack-based over $2k/mo\"},\n+            ],\n+            \"highlightedDetails\": [\n+                {\"label\": \"Feature flags\", \"value\": \"1 million requests for free, then from $0.0001/request\"},\n+                {\"label\": \"Experiments\", \"value\": \"Billed with feature flags\"},\n+            ],\n+        },\n+    ]\n+\n+\n+class VercelInstallationPermission(BasePermission):\n+    \"\"\"\n+    Custom permission that validates Vercel auth type and installation ID match.\n+    Vercel auth type is determined by the X-Vercel-Auth header, and can differ per endpoint.\n+    See Marketplace API spec.\n+    \"\"\"\n+\n+    def has_permission(self, request: Request, view) -> bool:\n+        self._validate_auth_type_allowed(request, view)\n+        return True\n+\n+    def has_object_permission(self, request: Request, view, obj) -> bool:\n+        self._validate_installation_id_match(request, view)\n+        return True\n+\n+    def _get_supported_auth_types(self, view) -> list[str]:\n+        \"\"\"\n+        Get supported auth types for the current action from the viewset.\n+        Supported auth type is specified by the marketplace API spec.\n+        \"\"\"\n+        return getattr(view, \"supported_auth_types\", {}).get(view.action, [\"User\", \"System\"])\n+\n+    def _validate_auth_type_allowed(self, request: Request, view) -> None:\n+        \"\"\"Validate that the auth type from X-Vercel-Auth header is allowed for this endpoint\"\"\"\n+        auth_type = request.headers.get(\"X-Vercel-Auth\", \"\").lower()\n+        if not auth_type:\n+            raise exceptions.AuthenticationFailed(\"Missing X-Vercel-Auth header\")\n+\n+        auth_type_title = auth_type.title()\n+        supported_types = self._get_supported_auth_types(view)\n+\n+        if auth_type_title not in supported_types:\n+            raise exceptions.PermissionDenied(\n+                f\"Auth type '{auth_type_title}' not allowed for this endpoint. \"\n+                f\"Supported types: {', '.join(supported_types)}\"\n+            )\n+\n+    def _validate_installation_id_match(self, request: Request, view) -> None:\n+        \"\"\"Validate that JWT installation_id matches URL parameter\"\"\"\n+        jwt_payload = self._get_jwt_payload(request)\n+\n+        # installation_id when going through vercel_installation ViewSet,\n+        # or parent_lookup_installation_id when going through vercel_resource\n+        installation_id = view.kwargs.get(\"installation_id\") or view.kwargs.get(\"parent_lookup_installation_id\")\n+\n+        if jwt_payload.get(\"installation_id\") != installation_id:\n+            raise exceptions.PermissionDenied(\"Installation ID mismatch\")\n+\n+    def _get_jwt_payload(self, request: Request) -> dict[str, Any]:\n+        \"\"\"Extract JWT payload from authenticated request\"\"\"\n+        if hasattr(request, \"auth\") and isinstance(request.auth, dict) and request.auth:\n+            return request.auth\n+        raise exceptions.AuthenticationFailed(\"No valid JWT authentication found\")\n+\n+\n+class VercelCredentialsSerializer(serializers.Serializer):\n+    access_token = serializers.CharField(help_text=\"Access token authorizes marketplace and integration APIs.\")\n+    token_type = serializers.CharField(help_text=\"The type of token (default: Bearer).\")\n+\n+\n+class VercelContactSerializer(serializers.Serializer):\n+    email = serializers.EmailField(help_text=\"Contact email address for the account.\")\n+    name = serializers.CharField(required=False, allow_blank=True, help_text=\"Contact name for the account (optional).\")\n+\n+\n+class VercelAccountSerializer(serializers.Serializer):\n+    name = serializers.CharField(required=False, allow_blank=True, help_text=\"Account name (optional).\")\n+    url = serializers.URLField(help_text=\"URL of the account.\")\n+    contact = VercelContactSerializer(help_text=\"Contact information for the account.\")\n+\n+\n+class UpsertInstallationPayloadSerializer(serializers.Serializer):\n+    scopes = serializers.ListField(\n+        child=serializers.CharField(), min_length=1, help_text=\"Array of scopes, must have at least one. Min Length: 1\"\n+    )\n+    acceptedPolicies = serializers.DictField(\n+        child=serializers.JSONField(),\n+        help_text='Policies accepted by the customer. Example: { \"toc\": \"2024-02-28T10:00:00Z\" }',\n+    )\n+    credentials = VercelCredentialsSerializer(\n+        help_text=\"The service-account access token to access marketplace and integration APIs on behalf of a customer's installation.\"\n+    )\n+    account = VercelAccountSerializer(\n+        help_text=\"The account information for this installation. Use Get Account Info API to re-fetch this data post installation.\"\n+    )\n+\n+\n+class VercelInstallationSerializer(serializers.ModelSerializer):\n+    class Meta:\n+        model = VercelInstallation\n+        fields = \"__all__\"\n+\n+\n+class VercelInstallationViewSet(\n+    mixins.RetrieveModelMixin, mixins.UpdateModelMixin, mixins.DestroyModelMixin, viewsets.GenericViewSet\n+):\n+    queryset = VercelInstallation.objects.all()\n+    serializer_class = VercelInstallationSerializer\n+    lookup_field = \"installation_id\"\n+    authentication_classes = [VercelAuthentication]\n+    permission_classes = [VercelInstallationPermission]\n+\n+    supported_auth_types = {\n+        \"update\": [\"User\"],\n+        \"partial_update\": [\"User\"],\n+        \"destroy\": [\"User\", \"System\"],\n+        \"retrieve\": [\"System\"],\n+        \"plans\": [\"System\"],\n+    }\n+\n+    def update(self, request: Request, *args: Any, **kwargs: Any) -> Response:\n+        \"\"\"\n+        Implements: https://vercel.com/docs/integrations/create-integration/marketplace-api#upsert-installation\n+        \"\"\"\n+        serializer: UpsertInstallationPayloadSerializer = UpsertInstallationPayloadSerializer(data=request.data)\n+        if not serializer.is_valid():\n+            raise exceptions.ValidationError(detail=serializer.errors)\n+\n+        installation_id = self.kwargs[\"installation_id\"]\n+\n+        try:\n+            # TODO: Not sure if this is the best move because users might be confused\n+            # by the default project created here and their \"Resource\" project.\n+            organization, _, user = User.objects.bootstrap(\n+                is_staff=False,\n+                is_email_verified=True,",
        "comment_created_at": "2025-08-12T09:20:40+00:00",
        "comment_author": "joshsny",
        "comment_body": "This is dodgy, as it makes our email verification dependent on Vercel's - we probably need to keep their email unverified",
        "pr_file_module": null
      },
      {
        "comment_id": "2269591556",
        "repo_full_name": "PostHog/posthog",
        "pr_number": 36394,
        "pr_file": "ee/api/vercel/vercel_installation.py",
        "discussion_id": "2269240589",
        "commented_code": "@@ -0,0 +1,321 @@\n+\"\"\"\n+Implements the Vercel Marketplace API server for managing marketplace installations.\n+\n+Biggest problem here is that we don't yet conform to Vercel's response schema.\n+\n+See:\n+https://vercel.com/docs/integrations/create-integration/marketplace-api\n+\"\"\"\n+\n+from typing import Any\n+from django.conf import settings\n+from django.db import IntegrityError\n+from rest_framework import serializers, viewsets, exceptions\n+from rest_framework.request import Request\n+from rest_framework.response import Response\n+from rest_framework import mixins\n+from rest_framework.permissions import BasePermission\n+from ee.api.authentication import VercelAuthentication\n+from posthog.event_usage import report_user_signed_up\n+from posthog.models.user import User\n+from ee.models.vercel.vercel_installation import VercelInstallation\n+from rest_framework import decorators\n+\n+\n+def get_vercel_plans() -> list[dict[str, Any]]:\n+    \"\"\"Get PostHog plans formatted for Vercel Marketplace\"\"\"\n+    return [\n+        {\n+            \"id\": \"free\",\n+            \"type\": \"subscription\",\n+            \"name\": \"Free\",\n+            \"description\": \"No credit card required\",\n+            \"scope\": \"installation\",\n+            \"paymentMethodRequired\": False,\n+            \"details\": [\n+                {\"label\": \"Data retention\", \"value\": \"1 year\"},\n+                {\"label\": \"Projects\", \"value\": \"1\"},\n+                {\"label\": \"Team members\", \"value\": \"Unlimited\"},\n+                {\"label\": \"API Access\", \"value\": \"\u2713\"},\n+                {\"label\": \"No limits on tracked users\", \"value\": \"\u2713\"},\n+                {\"label\": \"Community support\", \"value\": \"Support via community forum\"},\n+            ],\n+            \"highlightedDetails\": [\n+                {\"label\": \"Feature Flags\", \"value\": \"1 million free requests\"},\n+                {\"label\": \"Experiments\", \"value\": \"1 million free requests\"},\n+            ],\n+        },\n+        {\n+            \"id\": \"pay_as_you_go\",\n+            \"type\": \"subscription\",\n+            \"name\": \"Pay-as-you-go\",\n+            \"description\": \"Usage-based pricing after free tier\",\n+            \"scope\": \"installation\",\n+            \"paymentMethodRequired\": True,\n+            \"details\": [\n+                {\"label\": \"Data retention\", \"value\": \"7 years\"},\n+                {\"label\": \"Projects\", \"value\": \"6\"},\n+                {\"label\": \"Team members\", \"value\": \"Unlimited\"},\n+                {\"label\": \"API Access\", \"value\": \"\u2713\"},\n+                {\"label\": \"No limits on tracked users\", \"value\": \"\u2713\"},\n+                {\"label\": \"Standard support\", \"value\": \"Support via email, Slack-based over $2k/mo\"},\n+            ],\n+            \"highlightedDetails\": [\n+                {\"label\": \"Feature flags\", \"value\": \"1 million requests for free, then from $0.0001/request\"},\n+                {\"label\": \"Experiments\", \"value\": \"Billed with feature flags\"},\n+            ],\n+        },\n+    ]\n+\n+\n+class VercelInstallationPermission(BasePermission):\n+    \"\"\"\n+    Custom permission that validates Vercel auth type and installation ID match.\n+    Vercel auth type is determined by the X-Vercel-Auth header, and can differ per endpoint.\n+    See Marketplace API spec.\n+    \"\"\"\n+\n+    def has_permission(self, request: Request, view) -> bool:\n+        self._validate_auth_type_allowed(request, view)\n+        return True\n+\n+    def has_object_permission(self, request: Request, view, obj) -> bool:\n+        self._validate_installation_id_match(request, view)\n+        return True\n+\n+    def _get_supported_auth_types(self, view) -> list[str]:\n+        \"\"\"\n+        Get supported auth types for the current action from the viewset.\n+        Supported auth type is specified by the marketplace API spec.\n+        \"\"\"\n+        return getattr(view, \"supported_auth_types\", {}).get(view.action, [\"User\", \"System\"])\n+\n+    def _validate_auth_type_allowed(self, request: Request, view) -> None:\n+        \"\"\"Validate that the auth type from X-Vercel-Auth header is allowed for this endpoint\"\"\"\n+        auth_type = request.headers.get(\"X-Vercel-Auth\", \"\").lower()\n+        if not auth_type:\n+            raise exceptions.AuthenticationFailed(\"Missing X-Vercel-Auth header\")\n+\n+        auth_type_title = auth_type.title()\n+        supported_types = self._get_supported_auth_types(view)\n+\n+        if auth_type_title not in supported_types:\n+            raise exceptions.PermissionDenied(\n+                f\"Auth type '{auth_type_title}' not allowed for this endpoint. \"\n+                f\"Supported types: {', '.join(supported_types)}\"\n+            )\n+\n+    def _validate_installation_id_match(self, request: Request, view) -> None:\n+        \"\"\"Validate that JWT installation_id matches URL parameter\"\"\"\n+        jwt_payload = self._get_jwt_payload(request)\n+\n+        # installation_id when going through vercel_installation ViewSet,\n+        # or parent_lookup_installation_id when going through vercel_resource\n+        installation_id = view.kwargs.get(\"installation_id\") or view.kwargs.get(\"parent_lookup_installation_id\")\n+\n+        if jwt_payload.get(\"installation_id\") != installation_id:\n+            raise exceptions.PermissionDenied(\"Installation ID mismatch\")\n+\n+    def _get_jwt_payload(self, request: Request) -> dict[str, Any]:\n+        \"\"\"Extract JWT payload from authenticated request\"\"\"\n+        if hasattr(request, \"auth\") and isinstance(request.auth, dict) and request.auth:\n+            return request.auth\n+        raise exceptions.AuthenticationFailed(\"No valid JWT authentication found\")\n+\n+\n+class VercelCredentialsSerializer(serializers.Serializer):\n+    access_token = serializers.CharField(help_text=\"Access token authorizes marketplace and integration APIs.\")\n+    token_type = serializers.CharField(help_text=\"The type of token (default: Bearer).\")\n+\n+\n+class VercelContactSerializer(serializers.Serializer):\n+    email = serializers.EmailField(help_text=\"Contact email address for the account.\")\n+    name = serializers.CharField(required=False, allow_blank=True, help_text=\"Contact name for the account (optional).\")\n+\n+\n+class VercelAccountSerializer(serializers.Serializer):\n+    name = serializers.CharField(required=False, allow_blank=True, help_text=\"Account name (optional).\")\n+    url = serializers.URLField(help_text=\"URL of the account.\")\n+    contact = VercelContactSerializer(help_text=\"Contact information for the account.\")\n+\n+\n+class UpsertInstallationPayloadSerializer(serializers.Serializer):\n+    scopes = serializers.ListField(\n+        child=serializers.CharField(), min_length=1, help_text=\"Array of scopes, must have at least one. Min Length: 1\"\n+    )\n+    acceptedPolicies = serializers.DictField(\n+        child=serializers.JSONField(),\n+        help_text='Policies accepted by the customer. Example: { \"toc\": \"2024-02-28T10:00:00Z\" }',\n+    )\n+    credentials = VercelCredentialsSerializer(\n+        help_text=\"The service-account access token to access marketplace and integration APIs on behalf of a customer's installation.\"\n+    )\n+    account = VercelAccountSerializer(\n+        help_text=\"The account information for this installation. Use Get Account Info API to re-fetch this data post installation.\"\n+    )\n+\n+\n+class VercelInstallationSerializer(serializers.ModelSerializer):\n+    class Meta:\n+        model = VercelInstallation\n+        fields = \"__all__\"\n+\n+\n+class VercelInstallationViewSet(\n+    mixins.RetrieveModelMixin, mixins.UpdateModelMixin, mixins.DestroyModelMixin, viewsets.GenericViewSet\n+):\n+    queryset = VercelInstallation.objects.all()\n+    serializer_class = VercelInstallationSerializer\n+    lookup_field = \"installation_id\"\n+    authentication_classes = [VercelAuthentication]\n+    permission_classes = [VercelInstallationPermission]\n+\n+    supported_auth_types = {\n+        \"update\": [\"User\"],\n+        \"partial_update\": [\"User\"],\n+        \"destroy\": [\"User\", \"System\"],\n+        \"retrieve\": [\"System\"],\n+        \"plans\": [\"System\"],\n+    }\n+\n+    def update(self, request: Request, *args: Any, **kwargs: Any) -> Response:\n+        \"\"\"\n+        Implements: https://vercel.com/docs/integrations/create-integration/marketplace-api#upsert-installation\n+        \"\"\"\n+        serializer: UpsertInstallationPayloadSerializer = UpsertInstallationPayloadSerializer(data=request.data)\n+        if not serializer.is_valid():\n+            raise exceptions.ValidationError(detail=serializer.errors)\n+\n+        installation_id = self.kwargs[\"installation_id\"]\n+\n+        try:\n+            # TODO: Not sure if this is the best move because users might be confused\n+            # by the default project created here and their \"Resource\" project.\n+            organization, _, user = User.objects.bootstrap(\n+                is_staff=False,\n+                is_email_verified=True,",
        "comment_created_at": "2025-08-12T11:49:20+00:00",
        "comment_author": "JonathanLab",
        "comment_body": "Agree, have set it to False",
        "pr_file_module": null
      }
    ]
  },
  {
    "discussion_id": "2204227072",
    "pr_number": 33948,
    "pr_file": "posthog/api/survey.py",
    "created_at": "2025-07-14T08:38:36+00:00",
    "commented_code": "),\n         )\n \n+    # If survey_id is provided, return individual survey\n+    if survey_id:\n+        try:\n+            survey = Survey.objects.select_related(\"linked_flag\", \"targeting_flag\", \"internal_targeting_flag\").get(\n+                id=survey_id, team=team\n+            )\n+        except Survey.DoesNotExist:\n+            return cors_response(\n+                request,\n+                generate_exception_response(\n+                    \"surveys\",\n+                    \"Survey not found.\",\n+                    type=\"not_found\",\n+                    code=\"survey_not_found\",\n+                    status_code=status.HTTP_404_NOT_FOUND,\n+                ),\n+            )\n+\n+        # Check if survey is archived\n+        if survey.archived:\n+            return cors_response(\n+                request,\n+                generate_exception_response(\n+                    \"surveys\",\n+                    \"This survey is no longer available.\",\n+                    type=\"not_found\",\n+                    code=\"survey_archived\",\n+                    status_code=status.HTTP_404_NOT_FOUND,\n+                ),\n+            )\n+\n+        # Return individual survey response\n+        serialized_survey = SurveyAPISerializer(survey).data\n+        response_data = {\n+            \"survey\": serialized_survey,\n+            \"project_config\": {\n+                \"api_host\": request.build_absolute_uri(\"/\").rstrip(\"/\"),\n+                \"token\": team.api_token,\n+            },\n+        }\n+        return cors_response(request, JsonResponse(response_data))\n+\n+    # Return all surveys (existing behavior)\n     return cors_response(request, JsonResponse(get_surveys_response(team)))\n \n \n+# Constants for better maintainability\n+logger = structlog.get_logger(__name__)\n+SURVEY_ID_MAX_LENGTH = 50\n+CACHE_TIMEOUT_SECONDS = 300\n+\n+\n+def is_valid_uuid(uuid_string: str) -> bool:\n+    \"\"\"Validate if a string is a valid UUID format.\"\"\"\n+    try:\n+        uuid.UUID(uuid_string)\n+        return True\n+    except (ValueError, TypeError):\n+        return False\n+\n+\n+@csrf_exempt\n+@axes_dispatch\n+def public_survey_page(request, survey_id: str):\n+    \"\"\"\n+    Server-side rendered public survey page with security and performance optimizations\n+    \"\"\"\n+    if request.method == \"OPTIONS\":\n+        return cors_response(request, HttpResponse(\"\"))\n+\n+    # Input validation\n+    if not is_valid_uuid(survey_id) or len(survey_id) > SURVEY_ID_MAX_LENGTH:\n+        logger.warning(\"survey_page_invalid_id\", survey_id=survey_id)\n+        return render(\n+            request,\n+            \"surveys/error.html\",\n+            {\n+                \"error_title\": \"Invalid Request\",\n+                \"error_message\": \"The requested survey is not available.\",\n+            },\n+            status=400,\n+        )\n+\n+    # Database query with minimal fields and timeout protection\n+    try:\n+        survey = (\n+            Survey.objects.select_related(\"team\")\n+            .only(\"id\", \"name\", \"appearance\", \"archived\", \"is_publicly_shareable\", \"team__id\", \"team__api_token\")\n+            .get(id=survey_id)\n+        )\n+    except Survey.DoesNotExist:\n+        logger.info(\"survey_page_not_found\", survey_id=survey_id)\n+        # Use generic error message to prevent survey ID enumeration\n+        return render(\n+            request,\n+            \"surveys/error.html\",\n+            {\n+                \"error_title\": \"Survey Not Available\",\n+                \"error_message\": \"The requested survey is not available.\",\n+            },\n+            status=404,\n+        )\n+    except Exception as e:\n+        logger.exception(\"survey_page_db_error\", error=str(e), survey_id=survey_id)\n+        capture_exception(e)\n+        return render(\n+            request,\n+            \"surveys/error.html\",\n+            {\n+                \"error_title\": \"Service Unavailable\",\n+                \"error_message\": \"The service is temporarily unavailable. Please try again later.\",\n+            },\n+            status=503,\n+        )\n+\n+    survey_is_running = (\n+        survey.start_date is not None and survey.start_date <= datetime.now(UTC) and survey.end_date is None\n+    )\n+\n+    # Check survey availability (combine checks for consistent error message)\n+    if survey.archived or not survey.is_publicly_shareable or not survey_is_running:\n+        logger.info(\n+            \"survey_page_access_denied\",\n+            survey_id=survey_id,\n+            archived=survey.archived,\n+            publicly_shareable=survey.is_publicly_shareable,\n+        )\n+        return render(\n+            request,\n+            \"surveys/error.html\",\n+            {\n+                \"error_title\": \"Survey not receiving responses\",\n+                \"error_message\": \"The requested survey is not receiving responses.\",\n+            },\n+            status=404,  # Use 404 instead of 403 to prevent information leakage\n+        )\n+\n+    # Build project config\n+    project_config = {\n+        \"api_host\": request.build_absolute_uri(\"/\").rstrip(\"/\"),\n+        \"token\": survey.team.api_token,\n+    }\n+\n+    if hasattr(survey.team, \"ui_host\") and survey.team.ui_host:\n+        project_config[\"ui_host\"] = survey.team.ui_host\n+\n+    context = {\n+        \"name\": survey.name,\n+        \"id\": survey.id,\n+        \"appearance\": json.dumps(survey.appearance),\n+        \"project_config_json\": json.dumps(project_config),\n+        \"debug\": settings.DEBUG,\n+    }\n+\n+    logger.info(\"survey_page_rendered\", survey_id=survey_id, team_id=survey.team.id)\n+\n+    response = render(request, \"surveys/public_survey.html\", context)\n+\n+    # Security headers\n+    response[\"X-Frame-Options\"] = \"DENY\"\n+    response[\"X-Content-Type-Options\"] = \"nosniff\"\n+    response[\"Referrer-Policy\"] = \"strict-origin-when-cross-origin\"\n+    response[\"Permissions-Policy\"] = \"accelerometer=(), camera=(), microphone=(), geolocation=()\"\n+    response[\"X-XSS-Protection\"] = \"1; mode=block\"\n+\n+    # Cache headers\n+    response[\"Cache-Control\"] = f\"public, max-age={CACHE_TIMEOUT_SECONDS}\"\n+    response[\"Vary\"] = \"Accept-Encoding\"  # Enable compression caching",
    "repo_full_name": "PostHog/posthog",
    "discussion_comments": [
      {
        "comment_id": "2204227072",
        "repo_full_name": "PostHog/posthog",
        "pr_number": 33948,
        "pr_file": "posthog/api/survey.py",
        "discussion_id": "2204227072",
        "commented_code": "@@ -1386,9 +1394,178 @@ def surveys(request: Request):\n             ),\n         )\n \n+    # If survey_id is provided, return individual survey\n+    if survey_id:\n+        try:\n+            survey = Survey.objects.select_related(\"linked_flag\", \"targeting_flag\", \"internal_targeting_flag\").get(\n+                id=survey_id, team=team\n+            )\n+        except Survey.DoesNotExist:\n+            return cors_response(\n+                request,\n+                generate_exception_response(\n+                    \"surveys\",\n+                    \"Survey not found.\",\n+                    type=\"not_found\",\n+                    code=\"survey_not_found\",\n+                    status_code=status.HTTP_404_NOT_FOUND,\n+                ),\n+            )\n+\n+        # Check if survey is archived\n+        if survey.archived:\n+            return cors_response(\n+                request,\n+                generate_exception_response(\n+                    \"surveys\",\n+                    \"This survey is no longer available.\",\n+                    type=\"not_found\",\n+                    code=\"survey_archived\",\n+                    status_code=status.HTTP_404_NOT_FOUND,\n+                ),\n+            )\n+\n+        # Return individual survey response\n+        serialized_survey = SurveyAPISerializer(survey).data\n+        response_data = {\n+            \"survey\": serialized_survey,\n+            \"project_config\": {\n+                \"api_host\": request.build_absolute_uri(\"/\").rstrip(\"/\"),\n+                \"token\": team.api_token,\n+            },\n+        }\n+        return cors_response(request, JsonResponse(response_data))\n+\n+    # Return all surveys (existing behavior)\n     return cors_response(request, JsonResponse(get_surveys_response(team)))\n \n \n+# Constants for better maintainability\n+logger = structlog.get_logger(__name__)\n+SURVEY_ID_MAX_LENGTH = 50\n+CACHE_TIMEOUT_SECONDS = 300\n+\n+\n+def is_valid_uuid(uuid_string: str) -> bool:\n+    \"\"\"Validate if a string is a valid UUID format.\"\"\"\n+    try:\n+        uuid.UUID(uuid_string)\n+        return True\n+    except (ValueError, TypeError):\n+        return False\n+\n+\n+@csrf_exempt\n+@axes_dispatch\n+def public_survey_page(request, survey_id: str):\n+    \"\"\"\n+    Server-side rendered public survey page with security and performance optimizations\n+    \"\"\"\n+    if request.method == \"OPTIONS\":\n+        return cors_response(request, HttpResponse(\"\"))\n+\n+    # Input validation\n+    if not is_valid_uuid(survey_id) or len(survey_id) > SURVEY_ID_MAX_LENGTH:\n+        logger.warning(\"survey_page_invalid_id\", survey_id=survey_id)\n+        return render(\n+            request,\n+            \"surveys/error.html\",\n+            {\n+                \"error_title\": \"Invalid Request\",\n+                \"error_message\": \"The requested survey is not available.\",\n+            },\n+            status=400,\n+        )\n+\n+    # Database query with minimal fields and timeout protection\n+    try:\n+        survey = (\n+            Survey.objects.select_related(\"team\")\n+            .only(\"id\", \"name\", \"appearance\", \"archived\", \"is_publicly_shareable\", \"team__id\", \"team__api_token\")\n+            .get(id=survey_id)\n+        )\n+    except Survey.DoesNotExist:\n+        logger.info(\"survey_page_not_found\", survey_id=survey_id)\n+        # Use generic error message to prevent survey ID enumeration\n+        return render(\n+            request,\n+            \"surveys/error.html\",\n+            {\n+                \"error_title\": \"Survey Not Available\",\n+                \"error_message\": \"The requested survey is not available.\",\n+            },\n+            status=404,\n+        )\n+    except Exception as e:\n+        logger.exception(\"survey_page_db_error\", error=str(e), survey_id=survey_id)\n+        capture_exception(e)\n+        return render(\n+            request,\n+            \"surveys/error.html\",\n+            {\n+                \"error_title\": \"Service Unavailable\",\n+                \"error_message\": \"The service is temporarily unavailable. Please try again later.\",\n+            },\n+            status=503,\n+        )\n+\n+    survey_is_running = (\n+        survey.start_date is not None and survey.start_date <= datetime.now(UTC) and survey.end_date is None\n+    )\n+\n+    # Check survey availability (combine checks for consistent error message)\n+    if survey.archived or not survey.is_publicly_shareable or not survey_is_running:\n+        logger.info(\n+            \"survey_page_access_denied\",\n+            survey_id=survey_id,\n+            archived=survey.archived,\n+            publicly_shareable=survey.is_publicly_shareable,\n+        )\n+        return render(\n+            request,\n+            \"surveys/error.html\",\n+            {\n+                \"error_title\": \"Survey not receiving responses\",\n+                \"error_message\": \"The requested survey is not receiving responses.\",\n+            },\n+            status=404,  # Use 404 instead of 403 to prevent information leakage\n+        )\n+\n+    # Build project config\n+    project_config = {\n+        \"api_host\": request.build_absolute_uri(\"/\").rstrip(\"/\"),\n+        \"token\": survey.team.api_token,\n+    }\n+\n+    if hasattr(survey.team, \"ui_host\") and survey.team.ui_host:\n+        project_config[\"ui_host\"] = survey.team.ui_host\n+\n+    context = {\n+        \"name\": survey.name,\n+        \"id\": survey.id,\n+        \"appearance\": json.dumps(survey.appearance),\n+        \"project_config_json\": json.dumps(project_config),\n+        \"debug\": settings.DEBUG,\n+    }\n+\n+    logger.info(\"survey_page_rendered\", survey_id=survey_id, team_id=survey.team.id)\n+\n+    response = render(request, \"surveys/public_survey.html\", context)\n+\n+    # Security headers\n+    response[\"X-Frame-Options\"] = \"DENY\"\n+    response[\"X-Content-Type-Options\"] = \"nosniff\"\n+    response[\"Referrer-Policy\"] = \"strict-origin-when-cross-origin\"\n+    response[\"Permissions-Policy\"] = \"accelerometer=(), camera=(), microphone=(), geolocation=()\"\n+    response[\"X-XSS-Protection\"] = \"1; mode=block\"\n+\n+    # Cache headers\n+    response[\"Cache-Control\"] = f\"public, max-age={CACHE_TIMEOUT_SECONDS}\"\n+    response[\"Vary\"] = \"Accept-Encoding\"  # Enable compression caching",
        "comment_created_at": "2025-07-14T08:38:36+00:00",
        "comment_author": "marandaneto",
        "comment_body": "do we need all of that? do we set this elsewhere or how did we come up with those headers? just curious since i dont know much about cors, etc",
        "pr_file_module": null
      },
      {
        "comment_id": "2206040319",
        "repo_full_name": "PostHog/posthog",
        "pr_number": 33948,
        "pr_file": "posthog/api/survey.py",
        "discussion_id": "2204227072",
        "commented_code": "@@ -1386,9 +1394,178 @@ def surveys(request: Request):\n             ),\n         )\n \n+    # If survey_id is provided, return individual survey\n+    if survey_id:\n+        try:\n+            survey = Survey.objects.select_related(\"linked_flag\", \"targeting_flag\", \"internal_targeting_flag\").get(\n+                id=survey_id, team=team\n+            )\n+        except Survey.DoesNotExist:\n+            return cors_response(\n+                request,\n+                generate_exception_response(\n+                    \"surveys\",\n+                    \"Survey not found.\",\n+                    type=\"not_found\",\n+                    code=\"survey_not_found\",\n+                    status_code=status.HTTP_404_NOT_FOUND,\n+                ),\n+            )\n+\n+        # Check if survey is archived\n+        if survey.archived:\n+            return cors_response(\n+                request,\n+                generate_exception_response(\n+                    \"surveys\",\n+                    \"This survey is no longer available.\",\n+                    type=\"not_found\",\n+                    code=\"survey_archived\",\n+                    status_code=status.HTTP_404_NOT_FOUND,\n+                ),\n+            )\n+\n+        # Return individual survey response\n+        serialized_survey = SurveyAPISerializer(survey).data\n+        response_data = {\n+            \"survey\": serialized_survey,\n+            \"project_config\": {\n+                \"api_host\": request.build_absolute_uri(\"/\").rstrip(\"/\"),\n+                \"token\": team.api_token,\n+            },\n+        }\n+        return cors_response(request, JsonResponse(response_data))\n+\n+    # Return all surveys (existing behavior)\n     return cors_response(request, JsonResponse(get_surveys_response(team)))\n \n \n+# Constants for better maintainability\n+logger = structlog.get_logger(__name__)\n+SURVEY_ID_MAX_LENGTH = 50\n+CACHE_TIMEOUT_SECONDS = 300\n+\n+\n+def is_valid_uuid(uuid_string: str) -> bool:\n+    \"\"\"Validate if a string is a valid UUID format.\"\"\"\n+    try:\n+        uuid.UUID(uuid_string)\n+        return True\n+    except (ValueError, TypeError):\n+        return False\n+\n+\n+@csrf_exempt\n+@axes_dispatch\n+def public_survey_page(request, survey_id: str):\n+    \"\"\"\n+    Server-side rendered public survey page with security and performance optimizations\n+    \"\"\"\n+    if request.method == \"OPTIONS\":\n+        return cors_response(request, HttpResponse(\"\"))\n+\n+    # Input validation\n+    if not is_valid_uuid(survey_id) or len(survey_id) > SURVEY_ID_MAX_LENGTH:\n+        logger.warning(\"survey_page_invalid_id\", survey_id=survey_id)\n+        return render(\n+            request,\n+            \"surveys/error.html\",\n+            {\n+                \"error_title\": \"Invalid Request\",\n+                \"error_message\": \"The requested survey is not available.\",\n+            },\n+            status=400,\n+        )\n+\n+    # Database query with minimal fields and timeout protection\n+    try:\n+        survey = (\n+            Survey.objects.select_related(\"team\")\n+            .only(\"id\", \"name\", \"appearance\", \"archived\", \"is_publicly_shareable\", \"team__id\", \"team__api_token\")\n+            .get(id=survey_id)\n+        )\n+    except Survey.DoesNotExist:\n+        logger.info(\"survey_page_not_found\", survey_id=survey_id)\n+        # Use generic error message to prevent survey ID enumeration\n+        return render(\n+            request,\n+            \"surveys/error.html\",\n+            {\n+                \"error_title\": \"Survey Not Available\",\n+                \"error_message\": \"The requested survey is not available.\",\n+            },\n+            status=404,\n+        )\n+    except Exception as e:\n+        logger.exception(\"survey_page_db_error\", error=str(e), survey_id=survey_id)\n+        capture_exception(e)\n+        return render(\n+            request,\n+            \"surveys/error.html\",\n+            {\n+                \"error_title\": \"Service Unavailable\",\n+                \"error_message\": \"The service is temporarily unavailable. Please try again later.\",\n+            },\n+            status=503,\n+        )\n+\n+    survey_is_running = (\n+        survey.start_date is not None and survey.start_date <= datetime.now(UTC) and survey.end_date is None\n+    )\n+\n+    # Check survey availability (combine checks for consistent error message)\n+    if survey.archived or not survey.is_publicly_shareable or not survey_is_running:\n+        logger.info(\n+            \"survey_page_access_denied\",\n+            survey_id=survey_id,\n+            archived=survey.archived,\n+            publicly_shareable=survey.is_publicly_shareable,\n+        )\n+        return render(\n+            request,\n+            \"surveys/error.html\",\n+            {\n+                \"error_title\": \"Survey not receiving responses\",\n+                \"error_message\": \"The requested survey is not receiving responses.\",\n+            },\n+            status=404,  # Use 404 instead of 403 to prevent information leakage\n+        )\n+\n+    # Build project config\n+    project_config = {\n+        \"api_host\": request.build_absolute_uri(\"/\").rstrip(\"/\"),\n+        \"token\": survey.team.api_token,\n+    }\n+\n+    if hasattr(survey.team, \"ui_host\") and survey.team.ui_host:\n+        project_config[\"ui_host\"] = survey.team.ui_host\n+\n+    context = {\n+        \"name\": survey.name,\n+        \"id\": survey.id,\n+        \"appearance\": json.dumps(survey.appearance),\n+        \"project_config_json\": json.dumps(project_config),\n+        \"debug\": settings.DEBUG,\n+    }\n+\n+    logger.info(\"survey_page_rendered\", survey_id=survey_id, team_id=survey.team.id)\n+\n+    response = render(request, \"surveys/public_survey.html\", context)\n+\n+    # Security headers\n+    response[\"X-Frame-Options\"] = \"DENY\"\n+    response[\"X-Content-Type-Options\"] = \"nosniff\"\n+    response[\"Referrer-Policy\"] = \"strict-origin-when-cross-origin\"\n+    response[\"Permissions-Policy\"] = \"accelerometer=(), camera=(), microphone=(), geolocation=()\"\n+    response[\"X-XSS-Protection\"] = \"1; mode=block\"\n+\n+    # Cache headers\n+    response[\"Cache-Control\"] = f\"public, max-age={CACHE_TIMEOUT_SECONDS}\"\n+    response[\"Vary\"] = \"Accept-Encoding\"  # Enable compression caching",
        "comment_created_at": "2025-07-15T00:29:28+00:00",
        "comment_author": "lucasheriques",
        "comment_body": "we actually only need the `X-Frame-Options` to prevent our survey to be shown on iframes, since it's a potential liability and we don't have a need for that. will remove the others",
        "pr_file_module": null
      }
    ]
  }
]