mirror of
https://github.com/baz-scm/awesome-reviewers.git
synced 2025-08-20 18:58:52 +03:00
104 lines
104 KiB
JSON
104 lines
104 KiB
JSON
[
|
|
{
|
|
"discussion_id": "2267627113",
|
|
"pr_number": 35962,
|
|
"pr_file": "ee/hogai/notebook/notebook_serializer.py",
|
|
"created_at": "2025-08-11T18:21:57+00:00",
|
|
"commented_code": "+import re\n+import logging\n+from urllib.parse import urlparse, unquote\n+from typing import Optional\n+\n+from posthog.schema import ProsemirrorJSONContent, Mark\n+\n+logger = logging.getLogger(__name__)\n+\n+\n+class MarkdownTokenizer:\n+ \"\"\"Simple markdown tokenizer that handles the most common markdown elements.\"\"\"\n+\n+ def __init__(self):\n+ self.tokens = []\n+ self.pos = 0\n+ self.text = \"\"\n+\n+ def tokenize(self, text: str) -> list[dict]:\n+ \"\"\"Tokenize markdown text into a list of tokens.\"\"\"\n+ self.text = text\n+ self.pos = 0\n+ self.tokens = []\n+\n+ while self.pos < len(self.text):\n+ if not self._try_parse_block_element():\n+ # If no block element found, parse as paragraph\n+ self._parse_paragraph()\n+\n+ return self.tokens\n+\n+ def _try_parse_block_element(self) -> bool:\n+ \"\"\"Try to parse a block-level element. Returns True if successful.\"\"\"\n+ # Skip empty lines\n+ if self._at_line_start() and self._current_line().strip() == \"\":\n+ self._skip_line()\n+ return True\n+\n+ # Try different block elements\n+ if self._try_parse_heading():\n+ return True\n+ if self._try_parse_code_block():\n+ return True\n+ if self._try_parse_blockquote():\n+ return True\n+ if self._try_parse_horizontal_rule():\n+ return True\n+ if self._try_parse_list():\n+ return True\n+\n+ return False\n+\n+ def _try_parse_heading(self) -> bool:\n+ \"\"\"Parse heading (# ## ### etc).\"\"\"\n+ if not self._at_line_start():\n+ return False\n+\n+ line = self._current_line()\n+ match = re.match(r\"^(#{1,6})\\s+(.+)$\", line)\n+ if match:\n+ level = len(match.group(1))\n+ content = match.group(2).strip()\n+ self.tokens.append({\"type\": \"heading\", \"level\": level, \"content\": content})\n+ self._skip_line()\n+ return True\n+ return False\n+\n+ def _try_parse_code_block(self) -> bool:\n+ \"\"\"Parse fenced code block (``` language).\"\"\"\n+ if not self._at_line_start():\n+ return False\n+\n+ line = self._current_line()\n+ match = re.match(r\"^```(\\w*)\\s*$\", line)\n+ if match:\n+ language = match.group(1) or None\n+ self._skip_line()\n+\n+ # Collect code lines until closing ```\n+ code_lines = []\n+ while self.pos < len(self.text):\n+ line = self._current_line()\n+ if line.strip() == \"```\":\n+ self._skip_line()\n+ break\n+ code_lines.append(line)\n+ self._skip_line()\n+\n+ self.tokens.append({\"type\": \"code_block\", \"language\": language, \"content\": \"\n\".join(code_lines)})\n+ return True\n+ return False\n+\n+ def _try_parse_blockquote(self) -> bool:\n+ \"\"\"Parse blockquote (> text).\"\"\"\n+ if not self._at_line_start():\n+ return False\n+\n+ line = self._current_line()\n+ if line.startswith(\"> \"):\n+ # Collect all consecutive blockquote lines\n+ quote_lines = []\n+ while self.pos < len(self.text) and self._current_line().startswith(\"> \"):\n+ quote_lines.append(self._current_line()[2:]) # Remove \"> \"\n+ self._skip_line()\n+\n+ self.tokens.append({\"type\": \"blockquote\", \"content\": \"\n\".join(quote_lines)})\n+ return True\n+ return False\n+\n+ def _try_parse_horizontal_rule(self) -> bool:\n+ \"\"\"Parse horizontal rule (--- or ***).\"\"\"\n+ if not self._at_line_start():\n+ return False\n+\n+ line = self._current_line().strip()\n+ if re.match(r\"^(-{3,}|\\*{3,}|_{3,})$\", line):\n+ self.tokens.append({\"type\": \"horizontal_rule\"})\n+ self._skip_line()\n+ return True\n+ return False\n+\n+ def _try_parse_list(self) -> bool:\n+ \"\"\"Parse ordered or unordered list.\"\"\"\n+ if not self._at_line_start():\n+ return False\n+\n+ line = self._current_line()\n+\n+ # Check for unordered list (- * +)\n+ unordered_match = re.match(r\"^(\\s*)([-*+])\\s+(.+)$\", line)\n+ if unordered_match:\n+ return self._parse_list_items(\"unordered\", unordered_match.group(1))\n+\n+ # Check for ordered list (1. 2. etc)\n+ ordered_match = re.match(r\"^(\\s*)(\\d+)\\.\\s+(.+)$\", line)\n+ if ordered_match:\n+ start_num = int(ordered_match.group(2))\n+ return self._parse_list_items(\"ordered\", ordered_match.group(1), start_num)\n+\n+ return False\n+\n+ def _parse_list_items(self, list_type: str, base_indent: str, start: int = 1) -> bool:\n+ \"\"\"Parse consecutive list items.\"\"\"\n+ items: list[str] = []\n+\n+ while self.pos < len(self.text):\n+ line = self._current_line()\n+\n+ if list_type == \"unordered\":\n+ match = re.match(rf\"^{re.escape(base_indent)}[-*+]\\s+(.+)$\", line)\n+ else:\n+ match = re.match(rf\"^{re.escape(base_indent)}\\d+\\.\\s+(.+)$\", line)\n+\n+ if match:\n+ items.append(match.group(1))\n+ self._skip_line()\n+ else:\n+ break\n+\n+ if items:\n+ token: dict[str, str | int | list[str]] = {\"type\": list_type + \"_list\", \"items\": items}\n+ if list_type == \"ordered\":\n+ token[\"start\"] = start\n+ self.tokens.append(token)\n+ return True\n+\n+ return False\n+\n+ def _parse_paragraph(self) -> None:\n+ \"\"\"Parse a paragraph (everything else).\"\"\"\n+ if self.pos >= len(self.text):\n+ return\n+\n+ # Collect lines until we hit a blank line or end\n+ para_lines = []\n+\n+ while self.pos < len(self.text):\n+ line = self._current_line()\n+\n+ # Stop at blank line\n+ if line.strip() == \"\":\n+ break\n+\n+ # Stop if we hit a block element at line start\n+ if self._at_line_start() and self._looks_like_block_element(line):\n+ break\n+\n+ para_lines.append(line)\n+ self._skip_line()\n+\n+ if para_lines:\n+ content = \" \".join(line.strip() for line in para_lines).strip()\n+ if content:\n+ self.tokens.append({\"type\": \"paragraph\", \"content\": content})\n+\n+ def _looks_like_block_element(self, line: str) -> bool:\n+ \"\"\"Check if a line looks like the start of a block element.\"\"\"\n+ line = line.strip()\n+ return (\n+ bool(re.match(r\"^#{1,6}\\s+\", line)) # heading\n+ or line.startswith(\"```\") # code block\n+ or line.startswith(\"> \") # blockquote\n+ or bool(re.match(r\"^(-{3,}|\\*{3,}|_{3,})$\", line)) # horizontal rule\n+ or bool(re.match(r\"^(\\s*)([-*+]|\\d+\\.)\\s+\", line)) # list\n+ )\n+\n+ def _current_line(self) -> str:\n+ \"\"\"Get the current line from position.\"\"\"\n+ if self.pos >= len(self.text):\n+ return \"\"\n+\n+ end = self.text.find(\"\n\", self.pos)\n+ if end == -1:\n+ return self.text[self.pos :]\n+ return self.text[self.pos : end]\n+\n+ def _skip_line(self) -> None:\n+ \"\"\"Move to the next line.\"\"\"\n+ end = self.text.find(\"\n\", self.pos)\n+ if end == -1:\n+ self.pos = len(self.text)\n+ else:\n+ self.pos = end + 1\n+\n+ def _at_line_start(self) -> bool:\n+ \"\"\"Check if we're at the start of a line.\"\"\"\n+ return self.pos == 0 or (self.pos > 0 and self.text[self.pos - 1] == \"\n\")\n+\n+\n+class NotebookSerializer:\n+ # Allowed URL schemes for security\n+ ALLOWED_SCHEMES = {\"http\", \"https\", \"mailto\", \"tel\"}\n+\n+ # Tags that map to marks - only officially supported marks in @tiptap/starter-kit\n+ MARK_TAGS = {\n+ \"strong\": \"bold\",\n+ \"b\": \"bold\",\n+ \"em\": \"italic\",\n+ \"i\": \"italic\",\n+ \"u\": \"underline\",\n+ \"s\": \"strike\",\n+ \"del\": \"strike\",\n+ \"strike\": \"strike\",\n+ \"code\": \"code\",\n+ }\n+\n+ def to_json_paragraph(self, input: str | list[ProsemirrorJSONContent]) -> ProsemirrorJSONContent:\n+ return ProsemirrorJSONContent(\n+ type=\"paragraph\",\n+ content=input if isinstance(input, list) else [ProsemirrorJSONContent(type=\"text\", text=input)],\n+ )\n+\n+ def to_json_heading(self, input: str | list[ProsemirrorJSONContent], level: int) -> ProsemirrorJSONContent:\n+ return ProsemirrorJSONContent(\n+ type=\"heading\",\n+ attrs={\"level\": level},\n+ content=input if isinstance(input, list) else [ProsemirrorJSONContent(type=\"text\", text=input)],\n+ )\n+\n+ def to_json_bullet_list(self, items: list[ProsemirrorJSONContent]) -> ProsemirrorJSONContent:\n+ return ProsemirrorJSONContent(type=\"bulletList\", content=items)\n+\n+ def to_json_ordered_list(self, items: list[ProsemirrorJSONContent], start: int = 1) -> ProsemirrorJSONContent:\n+ return ProsemirrorJSONContent(type=\"orderedList\", attrs={\"start\": start}, content=items)\n+\n+ def to_json_list_item(self, content: list[ProsemirrorJSONContent]) -> ProsemirrorJSONContent:\n+ return ProsemirrorJSONContent(type=\"listItem\", content=content)\n+\n+ def to_json_code_block(self, code: str, language: str | None = None) -> ProsemirrorJSONContent:\n+ attrs = {\"language\": language} if language else {}\n+ return ProsemirrorJSONContent(\n+ type=\"codeBlock\", attrs=attrs, content=[ProsemirrorJSONContent(type=\"text\", text=code)]\n+ )\n+\n+ def to_json_blockquote(self, content: list[ProsemirrorJSONContent]) -> ProsemirrorJSONContent:\n+ return ProsemirrorJSONContent(type=\"blockquote\", content=content)\n+\n+ def to_json_horizontal_rule(self) -> ProsemirrorJSONContent:\n+ return ProsemirrorJSONContent(type=\"horizontalRule\")\n+\n+ def from_markdown_to_json(self, input: str) -> ProsemirrorJSONContent:\n+ \"\"\"\n+ Parse markdown and convert to TipTap notebook schema.\n+ \"\"\"\n+ # Tokenize the markdown\n+ tokenizer = MarkdownTokenizer()\n+ tokens = tokenizer.tokenize(input)\n+\n+ # Convert tokens to ProsemirrorJSONContent\n+ json_result: list[ProsemirrorJSONContent] = []\n+ for token in tokens:\n+ nodes = self._convert_markdown_token(token)\n+ json_result.extend(nodes)\n+\n+ return ProsemirrorJSONContent(type=\"doc\", content=json_result)\n+\n+ def _convert_markdown_token(self, token: dict) -> list[ProsemirrorJSONContent]:\n+ \"\"\"Convert a markdown token to ProsemirrorJSONContent nodes.\"\"\"\n+ token_type = token[\"type\"]\n+\n+ if token_type == \"paragraph\":\n+ content = self._parse_markdown_inline_content(token[\"content\"])\n+ return [self.to_json_paragraph(content)]\n+\n+ elif token_type == \"heading\":\n+ content = self._parse_markdown_inline_content(token[\"content\"])\n+ return [self.to_json_heading(content, token[\"level\"])]\n+\n+ elif token_type == \"code_block\":\n+ return [self.to_json_code_block(token[\"content\"], token.get(\"language\"))]\n+\n+ elif token_type == \"blockquote\":\n+ # Parse blockquote content as markdown and convert to block content\n+ quote_content = self._parse_blockquote_content(token[\"content\"])\n+ return [self.to_json_blockquote(quote_content)]\n+\n+ elif token_type == \"horizontal_rule\":\n+ return [self.to_json_horizontal_rule()]\n+\n+ elif token_type == \"unordered_list\":\n+ items = []\n+ for item_text in token[\"items\"]:\n+ item_content = self._parse_markdown_inline_content(item_text)\n+ items.append(self.to_json_list_item([self.to_json_paragraph(item_content)]))\n+ return [self.to_json_bullet_list(items)]\n+\n+ elif token_type == \"ordered_list\":\n+ items = []\n+ for item_text in token[\"items\"]:\n+ item_content = self._parse_markdown_inline_content(item_text)\n+ items.append(self.to_json_list_item([self.to_json_paragraph(item_content)]))\n+ start = token.get(\"start\", 1)\n+ return [self.to_json_ordered_list(items, start)]\n+\n+ return []\n+\n+ def _parse_markdown_inline_content(self, text: str) -> list[ProsemirrorJSONContent]:\n+ \"\"\"Parse inline markdown content (bold, italic, links, etc.).\"\"\"\n+ if not text:\n+ return []\n+\n+ # This is a simplified inline parser - handles basic formatting\n+ content = []\n+ pos = 0\n+\n+ while pos < len(text):\n+ # Look for markdown patterns\n+ next_match = self._find_next_markdown_pattern(text, pos)\n+\n+ if next_match is None:\n+ # No more patterns, add remaining text\n+ remaining = text[pos:].rstrip()\n+ if remaining:\n+ content.append(ProsemirrorJSONContent(type=\"text\", text=remaining))\n+ break\n+\n+ match_start, match_end, pattern_type, pattern_data = next_match\n+\n+ # Add text before the pattern\n+ if match_start > pos:\n+ before_text = text[pos:match_start]\n+ if before_text:\n+ content.append(ProsemirrorJSONContent(type=\"text\", text=before_text))\n+\n+ # Add the formatted content\n+ if pattern_type == \"bold\":\n+ inner_text = pattern_data[\"text\"]\n+ content.append(ProsemirrorJSONContent(type=\"text\", text=inner_text, marks=[Mark(type=\"bold\")]))\n+ elif pattern_type == \"italic\":\n+ inner_text = pattern_data[\"text\"]\n+ content.append(ProsemirrorJSONContent(type=\"text\", text=inner_text, marks=[Mark(type=\"italic\")]))\n+ elif pattern_type == \"code\":\n+ inner_text = pattern_data[\"text\"]\n+ content.append(ProsemirrorJSONContent(type=\"text\", text=inner_text, marks=[Mark(type=\"code\")]))\n+ elif pattern_type == \"strikethrough\":\n+ inner_text = pattern_data[\"text\"]\n+ content.append(ProsemirrorJSONContent(type=\"text\", text=inner_text, marks=[Mark(type=\"strike\")]))\n+ elif pattern_type == \"link\":\n+ link_text = pattern_data[\"text\"]\n+ href = pattern_data[\"href\"]\n+ if self._is_safe_url(href):\n+ content.append(\n+ ProsemirrorJSONContent(\n+ type=\"text\",\n+ text=link_text,\n+ marks=[Mark(type=\"link\", attrs={\"href\": href, \"target\": \"_blank\"})],\n+ )\n+ )\n+ else:\n+ # Unsafe URL, just add as text\n+ content.append(ProsemirrorJSONContent(type=\"text\", text=link_text))\n+\n+ pos = match_end\n+\n+ return content if content else [ProsemirrorJSONContent(type=\"text\", text=text)]\n+\n+ def _find_next_markdown_pattern(self, text: str, start_pos: int) -> Optional[tuple[int, int, str, dict]]:\n+ \"\"\"Find the next markdown formatting pattern in text.\"\"\"\n+ patterns = [\n+ # Bold: **text** or __text__ - check these first to prioritize over italic\n+ (r\"\\*\\*(.+?)\\*\\*\", \"bold\"),\n+ (r\"__(.*?)__\", \"bold\"),\n+ # Italic: *text* or _text_\n+ (r\"\\*(.*?)\\*\", \"italic\"),\n+ (r\"_(.*?)_\", \"italic\"),\n+ # Code: `text`\n+ (r\"`(.*?)`\", \"code\"),\n+ # Strikethrough: ~~text~~\n+ (r\"~~(.*?)~~\", \"strikethrough\"),\n+ # Link: [text](url)\n+ (r\"\\[([^\\]]*)\\]\\(([^)]*)\\)\", \"link\"),\n+ ]\n+\n+ earliest_match = None\n+ earliest_pos = len(text)\n+\n+ for pattern, pattern_type in patterns:\n+ match = re.search(pattern, text[start_pos:])\n+ if match:\n+ match_start = start_pos + match.start()\n+ match_end = start_pos + match.end()\n+\n+ if match_start < earliest_pos:\n+ earliest_pos = match_start\n+ if pattern_type == \"link\":\n+ earliest_match = (\n+ match_start,\n+ match_end,\n+ pattern_type,\n+ {\"text\": match.group(1), \"href\": match.group(2)},\n+ )\n+ else:\n+ earliest_match = (match_start, match_end, pattern_type, {\"text\": match.group(1)})\n+\n+ return earliest_match\n+\n+ def _parse_blockquote_content(self, content: str) -> list[ProsemirrorJSONContent]:\n+ \"\"\"Parse blockquote content as nested markdown.\"\"\"\n+ # Recursively parse the blockquote content as markdown\n+ tokenizer = MarkdownTokenizer()\n+ tokens = tokenizer.tokenize(content)\n+\n+ result = []\n+ for token in tokens:\n+ nodes = self._convert_markdown_token(token)\n+ result.extend(nodes)\n+\n+ return result if result else [self.to_json_paragraph(\"\")]\n+\n+ def _is_safe_url(self, url: str) -> bool:",
|
|
"repo_full_name": "PostHog/posthog",
|
|
"discussion_comments": [
|
|
{
|
|
"comment_id": "2267627113",
|
|
"repo_full_name": "PostHog/posthog",
|
|
"pr_number": 35962,
|
|
"pr_file": "ee/hogai/notebook/notebook_serializer.py",
|
|
"discussion_id": "2267627113",
|
|
"commented_code": "@@ -0,0 +1,610 @@\n+import re\n+import logging\n+from urllib.parse import urlparse, unquote\n+from typing import Optional\n+\n+from posthog.schema import ProsemirrorJSONContent, Mark\n+\n+logger = logging.getLogger(__name__)\n+\n+\n+class MarkdownTokenizer:\n+ \"\"\"Simple markdown tokenizer that handles the most common markdown elements.\"\"\"\n+\n+ def __init__(self):\n+ self.tokens = []\n+ self.pos = 0\n+ self.text = \"\"\n+\n+ def tokenize(self, text: str) -> list[dict]:\n+ \"\"\"Tokenize markdown text into a list of tokens.\"\"\"\n+ self.text = text\n+ self.pos = 0\n+ self.tokens = []\n+\n+ while self.pos < len(self.text):\n+ if not self._try_parse_block_element():\n+ # If no block element found, parse as paragraph\n+ self._parse_paragraph()\n+\n+ return self.tokens\n+\n+ def _try_parse_block_element(self) -> bool:\n+ \"\"\"Try to parse a block-level element. Returns True if successful.\"\"\"\n+ # Skip empty lines\n+ if self._at_line_start() and self._current_line().strip() == \"\":\n+ self._skip_line()\n+ return True\n+\n+ # Try different block elements\n+ if self._try_parse_heading():\n+ return True\n+ if self._try_parse_code_block():\n+ return True\n+ if self._try_parse_blockquote():\n+ return True\n+ if self._try_parse_horizontal_rule():\n+ return True\n+ if self._try_parse_list():\n+ return True\n+\n+ return False\n+\n+ def _try_parse_heading(self) -> bool:\n+ \"\"\"Parse heading (# ## ### etc).\"\"\"\n+ if not self._at_line_start():\n+ return False\n+\n+ line = self._current_line()\n+ match = re.match(r\"^(#{1,6})\\s+(.+)$\", line)\n+ if match:\n+ level = len(match.group(1))\n+ content = match.group(2).strip()\n+ self.tokens.append({\"type\": \"heading\", \"level\": level, \"content\": content})\n+ self._skip_line()\n+ return True\n+ return False\n+\n+ def _try_parse_code_block(self) -> bool:\n+ \"\"\"Parse fenced code block (``` language).\"\"\"\n+ if not self._at_line_start():\n+ return False\n+\n+ line = self._current_line()\n+ match = re.match(r\"^```(\\w*)\\s*$\", line)\n+ if match:\n+ language = match.group(1) or None\n+ self._skip_line()\n+\n+ # Collect code lines until closing ```\n+ code_lines = []\n+ while self.pos < len(self.text):\n+ line = self._current_line()\n+ if line.strip() == \"```\":\n+ self._skip_line()\n+ break\n+ code_lines.append(line)\n+ self._skip_line()\n+\n+ self.tokens.append({\"type\": \"code_block\", \"language\": language, \"content\": \"\\n\".join(code_lines)})\n+ return True\n+ return False\n+\n+ def _try_parse_blockquote(self) -> bool:\n+ \"\"\"Parse blockquote (> text).\"\"\"\n+ if not self._at_line_start():\n+ return False\n+\n+ line = self._current_line()\n+ if line.startswith(\"> \"):\n+ # Collect all consecutive blockquote lines\n+ quote_lines = []\n+ while self.pos < len(self.text) and self._current_line().startswith(\"> \"):\n+ quote_lines.append(self._current_line()[2:]) # Remove \"> \"\n+ self._skip_line()\n+\n+ self.tokens.append({\"type\": \"blockquote\", \"content\": \"\\n\".join(quote_lines)})\n+ return True\n+ return False\n+\n+ def _try_parse_horizontal_rule(self) -> bool:\n+ \"\"\"Parse horizontal rule (--- or ***).\"\"\"\n+ if not self._at_line_start():\n+ return False\n+\n+ line = self._current_line().strip()\n+ if re.match(r\"^(-{3,}|\\*{3,}|_{3,})$\", line):\n+ self.tokens.append({\"type\": \"horizontal_rule\"})\n+ self._skip_line()\n+ return True\n+ return False\n+\n+ def _try_parse_list(self) -> bool:\n+ \"\"\"Parse ordered or unordered list.\"\"\"\n+ if not self._at_line_start():\n+ return False\n+\n+ line = self._current_line()\n+\n+ # Check for unordered list (- * +)\n+ unordered_match = re.match(r\"^(\\s*)([-*+])\\s+(.+)$\", line)\n+ if unordered_match:\n+ return self._parse_list_items(\"unordered\", unordered_match.group(1))\n+\n+ # Check for ordered list (1. 2. etc)\n+ ordered_match = re.match(r\"^(\\s*)(\\d+)\\.\\s+(.+)$\", line)\n+ if ordered_match:\n+ start_num = int(ordered_match.group(2))\n+ return self._parse_list_items(\"ordered\", ordered_match.group(1), start_num)\n+\n+ return False\n+\n+ def _parse_list_items(self, list_type: str, base_indent: str, start: int = 1) -> bool:\n+ \"\"\"Parse consecutive list items.\"\"\"\n+ items: list[str] = []\n+\n+ while self.pos < len(self.text):\n+ line = self._current_line()\n+\n+ if list_type == \"unordered\":\n+ match = re.match(rf\"^{re.escape(base_indent)}[-*+]\\s+(.+)$\", line)\n+ else:\n+ match = re.match(rf\"^{re.escape(base_indent)}\\d+\\.\\s+(.+)$\", line)\n+\n+ if match:\n+ items.append(match.group(1))\n+ self._skip_line()\n+ else:\n+ break\n+\n+ if items:\n+ token: dict[str, str | int | list[str]] = {\"type\": list_type + \"_list\", \"items\": items}\n+ if list_type == \"ordered\":\n+ token[\"start\"] = start\n+ self.tokens.append(token)\n+ return True\n+\n+ return False\n+\n+ def _parse_paragraph(self) -> None:\n+ \"\"\"Parse a paragraph (everything else).\"\"\"\n+ if self.pos >= len(self.text):\n+ return\n+\n+ # Collect lines until we hit a blank line or end\n+ para_lines = []\n+\n+ while self.pos < len(self.text):\n+ line = self._current_line()\n+\n+ # Stop at blank line\n+ if line.strip() == \"\":\n+ break\n+\n+ # Stop if we hit a block element at line start\n+ if self._at_line_start() and self._looks_like_block_element(line):\n+ break\n+\n+ para_lines.append(line)\n+ self._skip_line()\n+\n+ if para_lines:\n+ content = \" \".join(line.strip() for line in para_lines).strip()\n+ if content:\n+ self.tokens.append({\"type\": \"paragraph\", \"content\": content})\n+\n+ def _looks_like_block_element(self, line: str) -> bool:\n+ \"\"\"Check if a line looks like the start of a block element.\"\"\"\n+ line = line.strip()\n+ return (\n+ bool(re.match(r\"^#{1,6}\\s+\", line)) # heading\n+ or line.startswith(\"```\") # code block\n+ or line.startswith(\"> \") # blockquote\n+ or bool(re.match(r\"^(-{3,}|\\*{3,}|_{3,})$\", line)) # horizontal rule\n+ or bool(re.match(r\"^(\\s*)([-*+]|\\d+\\.)\\s+\", line)) # list\n+ )\n+\n+ def _current_line(self) -> str:\n+ \"\"\"Get the current line from position.\"\"\"\n+ if self.pos >= len(self.text):\n+ return \"\"\n+\n+ end = self.text.find(\"\\n\", self.pos)\n+ if end == -1:\n+ return self.text[self.pos :]\n+ return self.text[self.pos : end]\n+\n+ def _skip_line(self) -> None:\n+ \"\"\"Move to the next line.\"\"\"\n+ end = self.text.find(\"\\n\", self.pos)\n+ if end == -1:\n+ self.pos = len(self.text)\n+ else:\n+ self.pos = end + 1\n+\n+ def _at_line_start(self) -> bool:\n+ \"\"\"Check if we're at the start of a line.\"\"\"\n+ return self.pos == 0 or (self.pos > 0 and self.text[self.pos - 1] == \"\\n\")\n+\n+\n+class NotebookSerializer:\n+ # Allowed URL schemes for security\n+ ALLOWED_SCHEMES = {\"http\", \"https\", \"mailto\", \"tel\"}\n+\n+ # Tags that map to marks - only officially supported marks in @tiptap/starter-kit\n+ MARK_TAGS = {\n+ \"strong\": \"bold\",\n+ \"b\": \"bold\",\n+ \"em\": \"italic\",\n+ \"i\": \"italic\",\n+ \"u\": \"underline\",\n+ \"s\": \"strike\",\n+ \"del\": \"strike\",\n+ \"strike\": \"strike\",\n+ \"code\": \"code\",\n+ }\n+\n+ def to_json_paragraph(self, input: str | list[ProsemirrorJSONContent]) -> ProsemirrorJSONContent:\n+ return ProsemirrorJSONContent(\n+ type=\"paragraph\",\n+ content=input if isinstance(input, list) else [ProsemirrorJSONContent(type=\"text\", text=input)],\n+ )\n+\n+ def to_json_heading(self, input: str | list[ProsemirrorJSONContent], level: int) -> ProsemirrorJSONContent:\n+ return ProsemirrorJSONContent(\n+ type=\"heading\",\n+ attrs={\"level\": level},\n+ content=input if isinstance(input, list) else [ProsemirrorJSONContent(type=\"text\", text=input)],\n+ )\n+\n+ def to_json_bullet_list(self, items: list[ProsemirrorJSONContent]) -> ProsemirrorJSONContent:\n+ return ProsemirrorJSONContent(type=\"bulletList\", content=items)\n+\n+ def to_json_ordered_list(self, items: list[ProsemirrorJSONContent], start: int = 1) -> ProsemirrorJSONContent:\n+ return ProsemirrorJSONContent(type=\"orderedList\", attrs={\"start\": start}, content=items)\n+\n+ def to_json_list_item(self, content: list[ProsemirrorJSONContent]) -> ProsemirrorJSONContent:\n+ return ProsemirrorJSONContent(type=\"listItem\", content=content)\n+\n+ def to_json_code_block(self, code: str, language: str | None = None) -> ProsemirrorJSONContent:\n+ attrs = {\"language\": language} if language else {}\n+ return ProsemirrorJSONContent(\n+ type=\"codeBlock\", attrs=attrs, content=[ProsemirrorJSONContent(type=\"text\", text=code)]\n+ )\n+\n+ def to_json_blockquote(self, content: list[ProsemirrorJSONContent]) -> ProsemirrorJSONContent:\n+ return ProsemirrorJSONContent(type=\"blockquote\", content=content)\n+\n+ def to_json_horizontal_rule(self) -> ProsemirrorJSONContent:\n+ return ProsemirrorJSONContent(type=\"horizontalRule\")\n+\n+ def from_markdown_to_json(self, input: str) -> ProsemirrorJSONContent:\n+ \"\"\"\n+ Parse markdown and convert to TipTap notebook schema.\n+ \"\"\"\n+ # Tokenize the markdown\n+ tokenizer = MarkdownTokenizer()\n+ tokens = tokenizer.tokenize(input)\n+\n+ # Convert tokens to ProsemirrorJSONContent\n+ json_result: list[ProsemirrorJSONContent] = []\n+ for token in tokens:\n+ nodes = self._convert_markdown_token(token)\n+ json_result.extend(nodes)\n+\n+ return ProsemirrorJSONContent(type=\"doc\", content=json_result)\n+\n+ def _convert_markdown_token(self, token: dict) -> list[ProsemirrorJSONContent]:\n+ \"\"\"Convert a markdown token to ProsemirrorJSONContent nodes.\"\"\"\n+ token_type = token[\"type\"]\n+\n+ if token_type == \"paragraph\":\n+ content = self._parse_markdown_inline_content(token[\"content\"])\n+ return [self.to_json_paragraph(content)]\n+\n+ elif token_type == \"heading\":\n+ content = self._parse_markdown_inline_content(token[\"content\"])\n+ return [self.to_json_heading(content, token[\"level\"])]\n+\n+ elif token_type == \"code_block\":\n+ return [self.to_json_code_block(token[\"content\"], token.get(\"language\"))]\n+\n+ elif token_type == \"blockquote\":\n+ # Parse blockquote content as markdown and convert to block content\n+ quote_content = self._parse_blockquote_content(token[\"content\"])\n+ return [self.to_json_blockquote(quote_content)]\n+\n+ elif token_type == \"horizontal_rule\":\n+ return [self.to_json_horizontal_rule()]\n+\n+ elif token_type == \"unordered_list\":\n+ items = []\n+ for item_text in token[\"items\"]:\n+ item_content = self._parse_markdown_inline_content(item_text)\n+ items.append(self.to_json_list_item([self.to_json_paragraph(item_content)]))\n+ return [self.to_json_bullet_list(items)]\n+\n+ elif token_type == \"ordered_list\":\n+ items = []\n+ for item_text in token[\"items\"]:\n+ item_content = self._parse_markdown_inline_content(item_text)\n+ items.append(self.to_json_list_item([self.to_json_paragraph(item_content)]))\n+ start = token.get(\"start\", 1)\n+ return [self.to_json_ordered_list(items, start)]\n+\n+ return []\n+\n+ def _parse_markdown_inline_content(self, text: str) -> list[ProsemirrorJSONContent]:\n+ \"\"\"Parse inline markdown content (bold, italic, links, etc.).\"\"\"\n+ if not text:\n+ return []\n+\n+ # This is a simplified inline parser - handles basic formatting\n+ content = []\n+ pos = 0\n+\n+ while pos < len(text):\n+ # Look for markdown patterns\n+ next_match = self._find_next_markdown_pattern(text, pos)\n+\n+ if next_match is None:\n+ # No more patterns, add remaining text\n+ remaining = text[pos:].rstrip()\n+ if remaining:\n+ content.append(ProsemirrorJSONContent(type=\"text\", text=remaining))\n+ break\n+\n+ match_start, match_end, pattern_type, pattern_data = next_match\n+\n+ # Add text before the pattern\n+ if match_start > pos:\n+ before_text = text[pos:match_start]\n+ if before_text:\n+ content.append(ProsemirrorJSONContent(type=\"text\", text=before_text))\n+\n+ # Add the formatted content\n+ if pattern_type == \"bold\":\n+ inner_text = pattern_data[\"text\"]\n+ content.append(ProsemirrorJSONContent(type=\"text\", text=inner_text, marks=[Mark(type=\"bold\")]))\n+ elif pattern_type == \"italic\":\n+ inner_text = pattern_data[\"text\"]\n+ content.append(ProsemirrorJSONContent(type=\"text\", text=inner_text, marks=[Mark(type=\"italic\")]))\n+ elif pattern_type == \"code\":\n+ inner_text = pattern_data[\"text\"]\n+ content.append(ProsemirrorJSONContent(type=\"text\", text=inner_text, marks=[Mark(type=\"code\")]))\n+ elif pattern_type == \"strikethrough\":\n+ inner_text = pattern_data[\"text\"]\n+ content.append(ProsemirrorJSONContent(type=\"text\", text=inner_text, marks=[Mark(type=\"strike\")]))\n+ elif pattern_type == \"link\":\n+ link_text = pattern_data[\"text\"]\n+ href = pattern_data[\"href\"]\n+ if self._is_safe_url(href):\n+ content.append(\n+ ProsemirrorJSONContent(\n+ type=\"text\",\n+ text=link_text,\n+ marks=[Mark(type=\"link\", attrs={\"href\": href, \"target\": \"_blank\"})],\n+ )\n+ )\n+ else:\n+ # Unsafe URL, just add as text\n+ content.append(ProsemirrorJSONContent(type=\"text\", text=link_text))\n+\n+ pos = match_end\n+\n+ return content if content else [ProsemirrorJSONContent(type=\"text\", text=text)]\n+\n+ def _find_next_markdown_pattern(self, text: str, start_pos: int) -> Optional[tuple[int, int, str, dict]]:\n+ \"\"\"Find the next markdown formatting pattern in text.\"\"\"\n+ patterns = [\n+ # Bold: **text** or __text__ - check these first to prioritize over italic\n+ (r\"\\*\\*(.+?)\\*\\*\", \"bold\"),\n+ (r\"__(.*?)__\", \"bold\"),\n+ # Italic: *text* or _text_\n+ (r\"\\*(.*?)\\*\", \"italic\"),\n+ (r\"_(.*?)_\", \"italic\"),\n+ # Code: `text`\n+ (r\"`(.*?)`\", \"code\"),\n+ # Strikethrough: ~~text~~\n+ (r\"~~(.*?)~~\", \"strikethrough\"),\n+ # Link: [text](url)\n+ (r\"\\[([^\\]]*)\\]\\(([^)]*)\\)\", \"link\"),\n+ ]\n+\n+ earliest_match = None\n+ earliest_pos = len(text)\n+\n+ for pattern, pattern_type in patterns:\n+ match = re.search(pattern, text[start_pos:])\n+ if match:\n+ match_start = start_pos + match.start()\n+ match_end = start_pos + match.end()\n+\n+ if match_start < earliest_pos:\n+ earliest_pos = match_start\n+ if pattern_type == \"link\":\n+ earliest_match = (\n+ match_start,\n+ match_end,\n+ pattern_type,\n+ {\"text\": match.group(1), \"href\": match.group(2)},\n+ )\n+ else:\n+ earliest_match = (match_start, match_end, pattern_type, {\"text\": match.group(1)})\n+\n+ return earliest_match\n+\n+ def _parse_blockquote_content(self, content: str) -> list[ProsemirrorJSONContent]:\n+ \"\"\"Parse blockquote content as nested markdown.\"\"\"\n+ # Recursively parse the blockquote content as markdown\n+ tokenizer = MarkdownTokenizer()\n+ tokens = tokenizer.tokenize(content)\n+\n+ result = []\n+ for token in tokens:\n+ nodes = self._convert_markdown_token(token)\n+ result.extend(nodes)\n+\n+ return result if result else [self.to_json_paragraph(\"\")]\n+\n+ def _is_safe_url(self, url: str) -> bool:",
|
|
"comment_created_at": "2025-08-11T18:21:57+00:00",
|
|
"comment_author": "sortafreel",
|
|
"comment_body": "If to be paranoid - it seems it could still be unsafe \ud83d\ude05 Like, if we add something like `%256Aavascript:` it would decode to `%6Aavascript:` if we do just a single unquote, so there's a space for XSS attack, right now something like `'javascript%253Aalert(1)` could pass through.\r\n\r\nCould be skipped, but if we want to be double sure it would probably make sense to have a recursive checj. Not sure how much do we care though, or where the URL can come from.",
|
|
"pr_file_module": null
|
|
},
|
|
{
|
|
"comment_id": "2267948914",
|
|
"repo_full_name": "PostHog/posthog",
|
|
"pr_number": 35962,
|
|
"pr_file": "ee/hogai/notebook/notebook_serializer.py",
|
|
"discussion_id": "2267627113",
|
|
"commented_code": "@@ -0,0 +1,610 @@\n+import re\n+import logging\n+from urllib.parse import urlparse, unquote\n+from typing import Optional\n+\n+from posthog.schema import ProsemirrorJSONContent, Mark\n+\n+logger = logging.getLogger(__name__)\n+\n+\n+class MarkdownTokenizer:\n+ \"\"\"Simple markdown tokenizer that handles the most common markdown elements.\"\"\"\n+\n+ def __init__(self):\n+ self.tokens = []\n+ self.pos = 0\n+ self.text = \"\"\n+\n+ def tokenize(self, text: str) -> list[dict]:\n+ \"\"\"Tokenize markdown text into a list of tokens.\"\"\"\n+ self.text = text\n+ self.pos = 0\n+ self.tokens = []\n+\n+ while self.pos < len(self.text):\n+ if not self._try_parse_block_element():\n+ # If no block element found, parse as paragraph\n+ self._parse_paragraph()\n+\n+ return self.tokens\n+\n+ def _try_parse_block_element(self) -> bool:\n+ \"\"\"Try to parse a block-level element. Returns True if successful.\"\"\"\n+ # Skip empty lines\n+ if self._at_line_start() and self._current_line().strip() == \"\":\n+ self._skip_line()\n+ return True\n+\n+ # Try different block elements\n+ if self._try_parse_heading():\n+ return True\n+ if self._try_parse_code_block():\n+ return True\n+ if self._try_parse_blockquote():\n+ return True\n+ if self._try_parse_horizontal_rule():\n+ return True\n+ if self._try_parse_list():\n+ return True\n+\n+ return False\n+\n+ def _try_parse_heading(self) -> bool:\n+ \"\"\"Parse heading (# ## ### etc).\"\"\"\n+ if not self._at_line_start():\n+ return False\n+\n+ line = self._current_line()\n+ match = re.match(r\"^(#{1,6})\\s+(.+)$\", line)\n+ if match:\n+ level = len(match.group(1))\n+ content = match.group(2).strip()\n+ self.tokens.append({\"type\": \"heading\", \"level\": level, \"content\": content})\n+ self._skip_line()\n+ return True\n+ return False\n+\n+ def _try_parse_code_block(self) -> bool:\n+ \"\"\"Parse fenced code block (``` language).\"\"\"\n+ if not self._at_line_start():\n+ return False\n+\n+ line = self._current_line()\n+ match = re.match(r\"^```(\\w*)\\s*$\", line)\n+ if match:\n+ language = match.group(1) or None\n+ self._skip_line()\n+\n+ # Collect code lines until closing ```\n+ code_lines = []\n+ while self.pos < len(self.text):\n+ line = self._current_line()\n+ if line.strip() == \"```\":\n+ self._skip_line()\n+ break\n+ code_lines.append(line)\n+ self._skip_line()\n+\n+ self.tokens.append({\"type\": \"code_block\", \"language\": language, \"content\": \"\\n\".join(code_lines)})\n+ return True\n+ return False\n+\n+ def _try_parse_blockquote(self) -> bool:\n+ \"\"\"Parse blockquote (> text).\"\"\"\n+ if not self._at_line_start():\n+ return False\n+\n+ line = self._current_line()\n+ if line.startswith(\"> \"):\n+ # Collect all consecutive blockquote lines\n+ quote_lines = []\n+ while self.pos < len(self.text) and self._current_line().startswith(\"> \"):\n+ quote_lines.append(self._current_line()[2:]) # Remove \"> \"\n+ self._skip_line()\n+\n+ self.tokens.append({\"type\": \"blockquote\", \"content\": \"\\n\".join(quote_lines)})\n+ return True\n+ return False\n+\n+ def _try_parse_horizontal_rule(self) -> bool:\n+ \"\"\"Parse horizontal rule (--- or ***).\"\"\"\n+ if not self._at_line_start():\n+ return False\n+\n+ line = self._current_line().strip()\n+ if re.match(r\"^(-{3,}|\\*{3,}|_{3,})$\", line):\n+ self.tokens.append({\"type\": \"horizontal_rule\"})\n+ self._skip_line()\n+ return True\n+ return False\n+\n+ def _try_parse_list(self) -> bool:\n+ \"\"\"Parse ordered or unordered list.\"\"\"\n+ if not self._at_line_start():\n+ return False\n+\n+ line = self._current_line()\n+\n+ # Check for unordered list (- * +)\n+ unordered_match = re.match(r\"^(\\s*)([-*+])\\s+(.+)$\", line)\n+ if unordered_match:\n+ return self._parse_list_items(\"unordered\", unordered_match.group(1))\n+\n+ # Check for ordered list (1. 2. etc)\n+ ordered_match = re.match(r\"^(\\s*)(\\d+)\\.\\s+(.+)$\", line)\n+ if ordered_match:\n+ start_num = int(ordered_match.group(2))\n+ return self._parse_list_items(\"ordered\", ordered_match.group(1), start_num)\n+\n+ return False\n+\n+ def _parse_list_items(self, list_type: str, base_indent: str, start: int = 1) -> bool:\n+ \"\"\"Parse consecutive list items.\"\"\"\n+ items: list[str] = []\n+\n+ while self.pos < len(self.text):\n+ line = self._current_line()\n+\n+ if list_type == \"unordered\":\n+ match = re.match(rf\"^{re.escape(base_indent)}[-*+]\\s+(.+)$\", line)\n+ else:\n+ match = re.match(rf\"^{re.escape(base_indent)}\\d+\\.\\s+(.+)$\", line)\n+\n+ if match:\n+ items.append(match.group(1))\n+ self._skip_line()\n+ else:\n+ break\n+\n+ if items:\n+ token: dict[str, str | int | list[str]] = {\"type\": list_type + \"_list\", \"items\": items}\n+ if list_type == \"ordered\":\n+ token[\"start\"] = start\n+ self.tokens.append(token)\n+ return True\n+\n+ return False\n+\n+ def _parse_paragraph(self) -> None:\n+ \"\"\"Parse a paragraph (everything else).\"\"\"\n+ if self.pos >= len(self.text):\n+ return\n+\n+ # Collect lines until we hit a blank line or end\n+ para_lines = []\n+\n+ while self.pos < len(self.text):\n+ line = self._current_line()\n+\n+ # Stop at blank line\n+ if line.strip() == \"\":\n+ break\n+\n+ # Stop if we hit a block element at line start\n+ if self._at_line_start() and self._looks_like_block_element(line):\n+ break\n+\n+ para_lines.append(line)\n+ self._skip_line()\n+\n+ if para_lines:\n+ content = \" \".join(line.strip() for line in para_lines).strip()\n+ if content:\n+ self.tokens.append({\"type\": \"paragraph\", \"content\": content})\n+\n+ def _looks_like_block_element(self, line: str) -> bool:\n+ \"\"\"Check if a line looks like the start of a block element.\"\"\"\n+ line = line.strip()\n+ return (\n+ bool(re.match(r\"^#{1,6}\\s+\", line)) # heading\n+ or line.startswith(\"```\") # code block\n+ or line.startswith(\"> \") # blockquote\n+ or bool(re.match(r\"^(-{3,}|\\*{3,}|_{3,})$\", line)) # horizontal rule\n+ or bool(re.match(r\"^(\\s*)([-*+]|\\d+\\.)\\s+\", line)) # list\n+ )\n+\n+ def _current_line(self) -> str:\n+ \"\"\"Get the current line from position.\"\"\"\n+ if self.pos >= len(self.text):\n+ return \"\"\n+\n+ end = self.text.find(\"\\n\", self.pos)\n+ if end == -1:\n+ return self.text[self.pos :]\n+ return self.text[self.pos : end]\n+\n+ def _skip_line(self) -> None:\n+ \"\"\"Move to the next line.\"\"\"\n+ end = self.text.find(\"\\n\", self.pos)\n+ if end == -1:\n+ self.pos = len(self.text)\n+ else:\n+ self.pos = end + 1\n+\n+ def _at_line_start(self) -> bool:\n+ \"\"\"Check if we're at the start of a line.\"\"\"\n+ return self.pos == 0 or (self.pos > 0 and self.text[self.pos - 1] == \"\\n\")\n+\n+\n+class NotebookSerializer:\n+ # Allowed URL schemes for security\n+ ALLOWED_SCHEMES = {\"http\", \"https\", \"mailto\", \"tel\"}\n+\n+ # Tags that map to marks - only officially supported marks in @tiptap/starter-kit\n+ MARK_TAGS = {\n+ \"strong\": \"bold\",\n+ \"b\": \"bold\",\n+ \"em\": \"italic\",\n+ \"i\": \"italic\",\n+ \"u\": \"underline\",\n+ \"s\": \"strike\",\n+ \"del\": \"strike\",\n+ \"strike\": \"strike\",\n+ \"code\": \"code\",\n+ }\n+\n+ def to_json_paragraph(self, input: str | list[ProsemirrorJSONContent]) -> ProsemirrorJSONContent:\n+ return ProsemirrorJSONContent(\n+ type=\"paragraph\",\n+ content=input if isinstance(input, list) else [ProsemirrorJSONContent(type=\"text\", text=input)],\n+ )\n+\n+ def to_json_heading(self, input: str | list[ProsemirrorJSONContent], level: int) -> ProsemirrorJSONContent:\n+ return ProsemirrorJSONContent(\n+ type=\"heading\",\n+ attrs={\"level\": level},\n+ content=input if isinstance(input, list) else [ProsemirrorJSONContent(type=\"text\", text=input)],\n+ )\n+\n+ def to_json_bullet_list(self, items: list[ProsemirrorJSONContent]) -> ProsemirrorJSONContent:\n+ return ProsemirrorJSONContent(type=\"bulletList\", content=items)\n+\n+ def to_json_ordered_list(self, items: list[ProsemirrorJSONContent], start: int = 1) -> ProsemirrorJSONContent:\n+ return ProsemirrorJSONContent(type=\"orderedList\", attrs={\"start\": start}, content=items)\n+\n+ def to_json_list_item(self, content: list[ProsemirrorJSONContent]) -> ProsemirrorJSONContent:\n+ return ProsemirrorJSONContent(type=\"listItem\", content=content)\n+\n+ def to_json_code_block(self, code: str, language: str | None = None) -> ProsemirrorJSONContent:\n+ attrs = {\"language\": language} if language else {}\n+ return ProsemirrorJSONContent(\n+ type=\"codeBlock\", attrs=attrs, content=[ProsemirrorJSONContent(type=\"text\", text=code)]\n+ )\n+\n+ def to_json_blockquote(self, content: list[ProsemirrorJSONContent]) -> ProsemirrorJSONContent:\n+ return ProsemirrorJSONContent(type=\"blockquote\", content=content)\n+\n+ def to_json_horizontal_rule(self) -> ProsemirrorJSONContent:\n+ return ProsemirrorJSONContent(type=\"horizontalRule\")\n+\n+ def from_markdown_to_json(self, input: str) -> ProsemirrorJSONContent:\n+ \"\"\"\n+ Parse markdown and convert to TipTap notebook schema.\n+ \"\"\"\n+ # Tokenize the markdown\n+ tokenizer = MarkdownTokenizer()\n+ tokens = tokenizer.tokenize(input)\n+\n+ # Convert tokens to ProsemirrorJSONContent\n+ json_result: list[ProsemirrorJSONContent] = []\n+ for token in tokens:\n+ nodes = self._convert_markdown_token(token)\n+ json_result.extend(nodes)\n+\n+ return ProsemirrorJSONContent(type=\"doc\", content=json_result)\n+\n+ def _convert_markdown_token(self, token: dict) -> list[ProsemirrorJSONContent]:\n+ \"\"\"Convert a markdown token to ProsemirrorJSONContent nodes.\"\"\"\n+ token_type = token[\"type\"]\n+\n+ if token_type == \"paragraph\":\n+ content = self._parse_markdown_inline_content(token[\"content\"])\n+ return [self.to_json_paragraph(content)]\n+\n+ elif token_type == \"heading\":\n+ content = self._parse_markdown_inline_content(token[\"content\"])\n+ return [self.to_json_heading(content, token[\"level\"])]\n+\n+ elif token_type == \"code_block\":\n+ return [self.to_json_code_block(token[\"content\"], token.get(\"language\"))]\n+\n+ elif token_type == \"blockquote\":\n+ # Parse blockquote content as markdown and convert to block content\n+ quote_content = self._parse_blockquote_content(token[\"content\"])\n+ return [self.to_json_blockquote(quote_content)]\n+\n+ elif token_type == \"horizontal_rule\":\n+ return [self.to_json_horizontal_rule()]\n+\n+ elif token_type == \"unordered_list\":\n+ items = []\n+ for item_text in token[\"items\"]:\n+ item_content = self._parse_markdown_inline_content(item_text)\n+ items.append(self.to_json_list_item([self.to_json_paragraph(item_content)]))\n+ return [self.to_json_bullet_list(items)]\n+\n+ elif token_type == \"ordered_list\":\n+ items = []\n+ for item_text in token[\"items\"]:\n+ item_content = self._parse_markdown_inline_content(item_text)\n+ items.append(self.to_json_list_item([self.to_json_paragraph(item_content)]))\n+ start = token.get(\"start\", 1)\n+ return [self.to_json_ordered_list(items, start)]\n+\n+ return []\n+\n+ def _parse_markdown_inline_content(self, text: str) -> list[ProsemirrorJSONContent]:\n+ \"\"\"Parse inline markdown content (bold, italic, links, etc.).\"\"\"\n+ if not text:\n+ return []\n+\n+ # This is a simplified inline parser - handles basic formatting\n+ content = []\n+ pos = 0\n+\n+ while pos < len(text):\n+ # Look for markdown patterns\n+ next_match = self._find_next_markdown_pattern(text, pos)\n+\n+ if next_match is None:\n+ # No more patterns, add remaining text\n+ remaining = text[pos:].rstrip()\n+ if remaining:\n+ content.append(ProsemirrorJSONContent(type=\"text\", text=remaining))\n+ break\n+\n+ match_start, match_end, pattern_type, pattern_data = next_match\n+\n+ # Add text before the pattern\n+ if match_start > pos:\n+ before_text = text[pos:match_start]\n+ if before_text:\n+ content.append(ProsemirrorJSONContent(type=\"text\", text=before_text))\n+\n+ # Add the formatted content\n+ if pattern_type == \"bold\":\n+ inner_text = pattern_data[\"text\"]\n+ content.append(ProsemirrorJSONContent(type=\"text\", text=inner_text, marks=[Mark(type=\"bold\")]))\n+ elif pattern_type == \"italic\":\n+ inner_text = pattern_data[\"text\"]\n+ content.append(ProsemirrorJSONContent(type=\"text\", text=inner_text, marks=[Mark(type=\"italic\")]))\n+ elif pattern_type == \"code\":\n+ inner_text = pattern_data[\"text\"]\n+ content.append(ProsemirrorJSONContent(type=\"text\", text=inner_text, marks=[Mark(type=\"code\")]))\n+ elif pattern_type == \"strikethrough\":\n+ inner_text = pattern_data[\"text\"]\n+ content.append(ProsemirrorJSONContent(type=\"text\", text=inner_text, marks=[Mark(type=\"strike\")]))\n+ elif pattern_type == \"link\":\n+ link_text = pattern_data[\"text\"]\n+ href = pattern_data[\"href\"]\n+ if self._is_safe_url(href):\n+ content.append(\n+ ProsemirrorJSONContent(\n+ type=\"text\",\n+ text=link_text,\n+ marks=[Mark(type=\"link\", attrs={\"href\": href, \"target\": \"_blank\"})],\n+ )\n+ )\n+ else:\n+ # Unsafe URL, just add as text\n+ content.append(ProsemirrorJSONContent(type=\"text\", text=link_text))\n+\n+ pos = match_end\n+\n+ return content if content else [ProsemirrorJSONContent(type=\"text\", text=text)]\n+\n+ def _find_next_markdown_pattern(self, text: str, start_pos: int) -> Optional[tuple[int, int, str, dict]]:\n+ \"\"\"Find the next markdown formatting pattern in text.\"\"\"\n+ patterns = [\n+ # Bold: **text** or __text__ - check these first to prioritize over italic\n+ (r\"\\*\\*(.+?)\\*\\*\", \"bold\"),\n+ (r\"__(.*?)__\", \"bold\"),\n+ # Italic: *text* or _text_\n+ (r\"\\*(.*?)\\*\", \"italic\"),\n+ (r\"_(.*?)_\", \"italic\"),\n+ # Code: `text`\n+ (r\"`(.*?)`\", \"code\"),\n+ # Strikethrough: ~~text~~\n+ (r\"~~(.*?)~~\", \"strikethrough\"),\n+ # Link: [text](url)\n+ (r\"\\[([^\\]]*)\\]\\(([^)]*)\\)\", \"link\"),\n+ ]\n+\n+ earliest_match = None\n+ earliest_pos = len(text)\n+\n+ for pattern, pattern_type in patterns:\n+ match = re.search(pattern, text[start_pos:])\n+ if match:\n+ match_start = start_pos + match.start()\n+ match_end = start_pos + match.end()\n+\n+ if match_start < earliest_pos:\n+ earliest_pos = match_start\n+ if pattern_type == \"link\":\n+ earliest_match = (\n+ match_start,\n+ match_end,\n+ pattern_type,\n+ {\"text\": match.group(1), \"href\": match.group(2)},\n+ )\n+ else:\n+ earliest_match = (match_start, match_end, pattern_type, {\"text\": match.group(1)})\n+\n+ return earliest_match\n+\n+ def _parse_blockquote_content(self, content: str) -> list[ProsemirrorJSONContent]:\n+ \"\"\"Parse blockquote content as nested markdown.\"\"\"\n+ # Recursively parse the blockquote content as markdown\n+ tokenizer = MarkdownTokenizer()\n+ tokens = tokenizer.tokenize(content)\n+\n+ result = []\n+ for token in tokens:\n+ nodes = self._convert_markdown_token(token)\n+ result.extend(nodes)\n+\n+ return result if result else [self.to_json_paragraph(\"\")]\n+\n+ def _is_safe_url(self, url: str) -> bool:",
|
|
"comment_created_at": "2025-08-11T20:26:20+00:00",
|
|
"comment_author": "kappa90",
|
|
"comment_body": "I added recursive decoding so this doesn't happen, better safe than sorry. The URL comes from the LLM but we could reuse this class somewhere else.",
|
|
"pr_file_module": null
|
|
}
|
|
]
|
|
},
|
|
{
|
|
"discussion_id": "2269240589",
|
|
"pr_number": 36394,
|
|
"pr_file": "ee/api/vercel/vercel_installation.py",
|
|
"created_at": "2025-08-12T09:20:40+00:00",
|
|
"commented_code": "+\"\"\"\n+Implements the Vercel Marketplace API server for managing marketplace installations.\n+\n+Biggest problem here is that we don't yet conform to Vercel's response schema.\n+\n+See:\n+https://vercel.com/docs/integrations/create-integration/marketplace-api\n+\"\"\"\n+\n+from typing import Any\n+from django.conf import settings\n+from django.db import IntegrityError\n+from rest_framework import serializers, viewsets, exceptions\n+from rest_framework.request import Request\n+from rest_framework.response import Response\n+from rest_framework import mixins\n+from rest_framework.permissions import BasePermission\n+from ee.api.authentication import VercelAuthentication\n+from posthog.event_usage import report_user_signed_up\n+from posthog.models.user import User\n+from ee.models.vercel.vercel_installation import VercelInstallation\n+from rest_framework import decorators\n+\n+\n+def get_vercel_plans() -> list[dict[str, Any]]:\n+ \"\"\"Get PostHog plans formatted for Vercel Marketplace\"\"\"\n+ return [\n+ {\n+ \"id\": \"free\",\n+ \"type\": \"subscription\",\n+ \"name\": \"Free\",\n+ \"description\": \"No credit card required\",\n+ \"scope\": \"installation\",\n+ \"paymentMethodRequired\": False,\n+ \"details\": [\n+ {\"label\": \"Data retention\", \"value\": \"1 year\"},\n+ {\"label\": \"Projects\", \"value\": \"1\"},\n+ {\"label\": \"Team members\", \"value\": \"Unlimited\"},\n+ {\"label\": \"API Access\", \"value\": \"\u2713\"},\n+ {\"label\": \"No limits on tracked users\", \"value\": \"\u2713\"},\n+ {\"label\": \"Community support\", \"value\": \"Support via community forum\"},\n+ ],\n+ \"highlightedDetails\": [\n+ {\"label\": \"Feature Flags\", \"value\": \"1 million free requests\"},\n+ {\"label\": \"Experiments\", \"value\": \"1 million free requests\"},\n+ ],\n+ },\n+ {\n+ \"id\": \"pay_as_you_go\",\n+ \"type\": \"subscription\",\n+ \"name\": \"Pay-as-you-go\",\n+ \"description\": \"Usage-based pricing after free tier\",\n+ \"scope\": \"installation\",\n+ \"paymentMethodRequired\": True,\n+ \"details\": [\n+ {\"label\": \"Data retention\", \"value\": \"7 years\"},\n+ {\"label\": \"Projects\", \"value\": \"6\"},\n+ {\"label\": \"Team members\", \"value\": \"Unlimited\"},\n+ {\"label\": \"API Access\", \"value\": \"\u2713\"},\n+ {\"label\": \"No limits on tracked users\", \"value\": \"\u2713\"},\n+ {\"label\": \"Standard support\", \"value\": \"Support via email, Slack-based over $2k/mo\"},\n+ ],\n+ \"highlightedDetails\": [\n+ {\"label\": \"Feature flags\", \"value\": \"1 million requests for free, then from $0.0001/request\"},\n+ {\"label\": \"Experiments\", \"value\": \"Billed with feature flags\"},\n+ ],\n+ },\n+ ]\n+\n+\n+class VercelInstallationPermission(BasePermission):\n+ \"\"\"\n+ Custom permission that validates Vercel auth type and installation ID match.\n+ Vercel auth type is determined by the X-Vercel-Auth header, and can differ per endpoint.\n+ See Marketplace API spec.\n+ \"\"\"\n+\n+ def has_permission(self, request: Request, view) -> bool:\n+ self._validate_auth_type_allowed(request, view)\n+ return True\n+\n+ def has_object_permission(self, request: Request, view, obj) -> bool:\n+ self._validate_installation_id_match(request, view)\n+ return True\n+\n+ def _get_supported_auth_types(self, view) -> list[str]:\n+ \"\"\"\n+ Get supported auth types for the current action from the viewset.\n+ Supported auth type is specified by the marketplace API spec.\n+ \"\"\"\n+ return getattr(view, \"supported_auth_types\", {}).get(view.action, [\"User\", \"System\"])\n+\n+ def _validate_auth_type_allowed(self, request: Request, view) -> None:\n+ \"\"\"Validate that the auth type from X-Vercel-Auth header is allowed for this endpoint\"\"\"\n+ auth_type = request.headers.get(\"X-Vercel-Auth\", \"\").lower()\n+ if not auth_type:\n+ raise exceptions.AuthenticationFailed(\"Missing X-Vercel-Auth header\")\n+\n+ auth_type_title = auth_type.title()\n+ supported_types = self._get_supported_auth_types(view)\n+\n+ if auth_type_title not in supported_types:\n+ raise exceptions.PermissionDenied(\n+ f\"Auth type '{auth_type_title}' not allowed for this endpoint. \"\n+ f\"Supported types: {', '.join(supported_types)}\"\n+ )\n+\n+ def _validate_installation_id_match(self, request: Request, view) -> None:\n+ \"\"\"Validate that JWT installation_id matches URL parameter\"\"\"\n+ jwt_payload = self._get_jwt_payload(request)\n+\n+ # installation_id when going through vercel_installation ViewSet,\n+ # or parent_lookup_installation_id when going through vercel_resource\n+ installation_id = view.kwargs.get(\"installation_id\") or view.kwargs.get(\"parent_lookup_installation_id\")\n+\n+ if jwt_payload.get(\"installation_id\") != installation_id:\n+ raise exceptions.PermissionDenied(\"Installation ID mismatch\")\n+\n+ def _get_jwt_payload(self, request: Request) -> dict[str, Any]:\n+ \"\"\"Extract JWT payload from authenticated request\"\"\"\n+ if hasattr(request, \"auth\") and isinstance(request.auth, dict) and request.auth:\n+ return request.auth\n+ raise exceptions.AuthenticationFailed(\"No valid JWT authentication found\")\n+\n+\n+class VercelCredentialsSerializer(serializers.Serializer):\n+ access_token = serializers.CharField(help_text=\"Access token authorizes marketplace and integration APIs.\")\n+ token_type = serializers.CharField(help_text=\"The type of token (default: Bearer).\")\n+\n+\n+class VercelContactSerializer(serializers.Serializer):\n+ email = serializers.EmailField(help_text=\"Contact email address for the account.\")\n+ name = serializers.CharField(required=False, allow_blank=True, help_text=\"Contact name for the account (optional).\")\n+\n+\n+class VercelAccountSerializer(serializers.Serializer):\n+ name = serializers.CharField(required=False, allow_blank=True, help_text=\"Account name (optional).\")\n+ url = serializers.URLField(help_text=\"URL of the account.\")\n+ contact = VercelContactSerializer(help_text=\"Contact information for the account.\")\n+\n+\n+class UpsertInstallationPayloadSerializer(serializers.Serializer):\n+ scopes = serializers.ListField(\n+ child=serializers.CharField(), min_length=1, help_text=\"Array of scopes, must have at least one. Min Length: 1\"\n+ )\n+ acceptedPolicies = serializers.DictField(\n+ child=serializers.JSONField(),\n+ help_text='Policies accepted by the customer. Example: { \"toc\": \"2024-02-28T10:00:00Z\" }',\n+ )\n+ credentials = VercelCredentialsSerializer(\n+ help_text=\"The service-account access token to access marketplace and integration APIs on behalf of a customer's installation.\"\n+ )\n+ account = VercelAccountSerializer(\n+ help_text=\"The account information for this installation. Use Get Account Info API to re-fetch this data post installation.\"\n+ )\n+\n+\n+class VercelInstallationSerializer(serializers.ModelSerializer):\n+ class Meta:\n+ model = VercelInstallation\n+ fields = \"__all__\"\n+\n+\n+class VercelInstallationViewSet(\n+ mixins.RetrieveModelMixin, mixins.UpdateModelMixin, mixins.DestroyModelMixin, viewsets.GenericViewSet\n+):\n+ queryset = VercelInstallation.objects.all()\n+ serializer_class = VercelInstallationSerializer\n+ lookup_field = \"installation_id\"\n+ authentication_classes = [VercelAuthentication]\n+ permission_classes = [VercelInstallationPermission]\n+\n+ supported_auth_types = {\n+ \"update\": [\"User\"],\n+ \"partial_update\": [\"User\"],\n+ \"destroy\": [\"User\", \"System\"],\n+ \"retrieve\": [\"System\"],\n+ \"plans\": [\"System\"],\n+ }\n+\n+ def update(self, request: Request, *args: Any, **kwargs: Any) -> Response:\n+ \"\"\"\n+ Implements: https://vercel.com/docs/integrations/create-integration/marketplace-api#upsert-installation\n+ \"\"\"\n+ serializer: UpsertInstallationPayloadSerializer = UpsertInstallationPayloadSerializer(data=request.data)\n+ if not serializer.is_valid():\n+ raise exceptions.ValidationError(detail=serializer.errors)\n+\n+ installation_id = self.kwargs[\"installation_id\"]\n+\n+ try:\n+ # TODO: Not sure if this is the best move because users might be confused\n+ # by the default project created here and their \"Resource\" project.\n+ organization, _, user = User.objects.bootstrap(\n+ is_staff=False,\n+ is_email_verified=True,",
|
|
"repo_full_name": "PostHog/posthog",
|
|
"discussion_comments": [
|
|
{
|
|
"comment_id": "2269240589",
|
|
"repo_full_name": "PostHog/posthog",
|
|
"pr_number": 36394,
|
|
"pr_file": "ee/api/vercel/vercel_installation.py",
|
|
"discussion_id": "2269240589",
|
|
"commented_code": "@@ -0,0 +1,321 @@\n+\"\"\"\n+Implements the Vercel Marketplace API server for managing marketplace installations.\n+\n+Biggest problem here is that we don't yet conform to Vercel's response schema.\n+\n+See:\n+https://vercel.com/docs/integrations/create-integration/marketplace-api\n+\"\"\"\n+\n+from typing import Any\n+from django.conf import settings\n+from django.db import IntegrityError\n+from rest_framework import serializers, viewsets, exceptions\n+from rest_framework.request import Request\n+from rest_framework.response import Response\n+from rest_framework import mixins\n+from rest_framework.permissions import BasePermission\n+from ee.api.authentication import VercelAuthentication\n+from posthog.event_usage import report_user_signed_up\n+from posthog.models.user import User\n+from ee.models.vercel.vercel_installation import VercelInstallation\n+from rest_framework import decorators\n+\n+\n+def get_vercel_plans() -> list[dict[str, Any]]:\n+ \"\"\"Get PostHog plans formatted for Vercel Marketplace\"\"\"\n+ return [\n+ {\n+ \"id\": \"free\",\n+ \"type\": \"subscription\",\n+ \"name\": \"Free\",\n+ \"description\": \"No credit card required\",\n+ \"scope\": \"installation\",\n+ \"paymentMethodRequired\": False,\n+ \"details\": [\n+ {\"label\": \"Data retention\", \"value\": \"1 year\"},\n+ {\"label\": \"Projects\", \"value\": \"1\"},\n+ {\"label\": \"Team members\", \"value\": \"Unlimited\"},\n+ {\"label\": \"API Access\", \"value\": \"\u2713\"},\n+ {\"label\": \"No limits on tracked users\", \"value\": \"\u2713\"},\n+ {\"label\": \"Community support\", \"value\": \"Support via community forum\"},\n+ ],\n+ \"highlightedDetails\": [\n+ {\"label\": \"Feature Flags\", \"value\": \"1 million free requests\"},\n+ {\"label\": \"Experiments\", \"value\": \"1 million free requests\"},\n+ ],\n+ },\n+ {\n+ \"id\": \"pay_as_you_go\",\n+ \"type\": \"subscription\",\n+ \"name\": \"Pay-as-you-go\",\n+ \"description\": \"Usage-based pricing after free tier\",\n+ \"scope\": \"installation\",\n+ \"paymentMethodRequired\": True,\n+ \"details\": [\n+ {\"label\": \"Data retention\", \"value\": \"7 years\"},\n+ {\"label\": \"Projects\", \"value\": \"6\"},\n+ {\"label\": \"Team members\", \"value\": \"Unlimited\"},\n+ {\"label\": \"API Access\", \"value\": \"\u2713\"},\n+ {\"label\": \"No limits on tracked users\", \"value\": \"\u2713\"},\n+ {\"label\": \"Standard support\", \"value\": \"Support via email, Slack-based over $2k/mo\"},\n+ ],\n+ \"highlightedDetails\": [\n+ {\"label\": \"Feature flags\", \"value\": \"1 million requests for free, then from $0.0001/request\"},\n+ {\"label\": \"Experiments\", \"value\": \"Billed with feature flags\"},\n+ ],\n+ },\n+ ]\n+\n+\n+class VercelInstallationPermission(BasePermission):\n+ \"\"\"\n+ Custom permission that validates Vercel auth type and installation ID match.\n+ Vercel auth type is determined by the X-Vercel-Auth header, and can differ per endpoint.\n+ See Marketplace API spec.\n+ \"\"\"\n+\n+ def has_permission(self, request: Request, view) -> bool:\n+ self._validate_auth_type_allowed(request, view)\n+ return True\n+\n+ def has_object_permission(self, request: Request, view, obj) -> bool:\n+ self._validate_installation_id_match(request, view)\n+ return True\n+\n+ def _get_supported_auth_types(self, view) -> list[str]:\n+ \"\"\"\n+ Get supported auth types for the current action from the viewset.\n+ Supported auth type is specified by the marketplace API spec.\n+ \"\"\"\n+ return getattr(view, \"supported_auth_types\", {}).get(view.action, [\"User\", \"System\"])\n+\n+ def _validate_auth_type_allowed(self, request: Request, view) -> None:\n+ \"\"\"Validate that the auth type from X-Vercel-Auth header is allowed for this endpoint\"\"\"\n+ auth_type = request.headers.get(\"X-Vercel-Auth\", \"\").lower()\n+ if not auth_type:\n+ raise exceptions.AuthenticationFailed(\"Missing X-Vercel-Auth header\")\n+\n+ auth_type_title = auth_type.title()\n+ supported_types = self._get_supported_auth_types(view)\n+\n+ if auth_type_title not in supported_types:\n+ raise exceptions.PermissionDenied(\n+ f\"Auth type '{auth_type_title}' not allowed for this endpoint. \"\n+ f\"Supported types: {', '.join(supported_types)}\"\n+ )\n+\n+ def _validate_installation_id_match(self, request: Request, view) -> None:\n+ \"\"\"Validate that JWT installation_id matches URL parameter\"\"\"\n+ jwt_payload = self._get_jwt_payload(request)\n+\n+ # installation_id when going through vercel_installation ViewSet,\n+ # or parent_lookup_installation_id when going through vercel_resource\n+ installation_id = view.kwargs.get(\"installation_id\") or view.kwargs.get(\"parent_lookup_installation_id\")\n+\n+ if jwt_payload.get(\"installation_id\") != installation_id:\n+ raise exceptions.PermissionDenied(\"Installation ID mismatch\")\n+\n+ def _get_jwt_payload(self, request: Request) -> dict[str, Any]:\n+ \"\"\"Extract JWT payload from authenticated request\"\"\"\n+ if hasattr(request, \"auth\") and isinstance(request.auth, dict) and request.auth:\n+ return request.auth\n+ raise exceptions.AuthenticationFailed(\"No valid JWT authentication found\")\n+\n+\n+class VercelCredentialsSerializer(serializers.Serializer):\n+ access_token = serializers.CharField(help_text=\"Access token authorizes marketplace and integration APIs.\")\n+ token_type = serializers.CharField(help_text=\"The type of token (default: Bearer).\")\n+\n+\n+class VercelContactSerializer(serializers.Serializer):\n+ email = serializers.EmailField(help_text=\"Contact email address for the account.\")\n+ name = serializers.CharField(required=False, allow_blank=True, help_text=\"Contact name for the account (optional).\")\n+\n+\n+class VercelAccountSerializer(serializers.Serializer):\n+ name = serializers.CharField(required=False, allow_blank=True, help_text=\"Account name (optional).\")\n+ url = serializers.URLField(help_text=\"URL of the account.\")\n+ contact = VercelContactSerializer(help_text=\"Contact information for the account.\")\n+\n+\n+class UpsertInstallationPayloadSerializer(serializers.Serializer):\n+ scopes = serializers.ListField(\n+ child=serializers.CharField(), min_length=1, help_text=\"Array of scopes, must have at least one. Min Length: 1\"\n+ )\n+ acceptedPolicies = serializers.DictField(\n+ child=serializers.JSONField(),\n+ help_text='Policies accepted by the customer. Example: { \"toc\": \"2024-02-28T10:00:00Z\" }',\n+ )\n+ credentials = VercelCredentialsSerializer(\n+ help_text=\"The service-account access token to access marketplace and integration APIs on behalf of a customer's installation.\"\n+ )\n+ account = VercelAccountSerializer(\n+ help_text=\"The account information for this installation. Use Get Account Info API to re-fetch this data post installation.\"\n+ )\n+\n+\n+class VercelInstallationSerializer(serializers.ModelSerializer):\n+ class Meta:\n+ model = VercelInstallation\n+ fields = \"__all__\"\n+\n+\n+class VercelInstallationViewSet(\n+ mixins.RetrieveModelMixin, mixins.UpdateModelMixin, mixins.DestroyModelMixin, viewsets.GenericViewSet\n+):\n+ queryset = VercelInstallation.objects.all()\n+ serializer_class = VercelInstallationSerializer\n+ lookup_field = \"installation_id\"\n+ authentication_classes = [VercelAuthentication]\n+ permission_classes = [VercelInstallationPermission]\n+\n+ supported_auth_types = {\n+ \"update\": [\"User\"],\n+ \"partial_update\": [\"User\"],\n+ \"destroy\": [\"User\", \"System\"],\n+ \"retrieve\": [\"System\"],\n+ \"plans\": [\"System\"],\n+ }\n+\n+ def update(self, request: Request, *args: Any, **kwargs: Any) -> Response:\n+ \"\"\"\n+ Implements: https://vercel.com/docs/integrations/create-integration/marketplace-api#upsert-installation\n+ \"\"\"\n+ serializer: UpsertInstallationPayloadSerializer = UpsertInstallationPayloadSerializer(data=request.data)\n+ if not serializer.is_valid():\n+ raise exceptions.ValidationError(detail=serializer.errors)\n+\n+ installation_id = self.kwargs[\"installation_id\"]\n+\n+ try:\n+ # TODO: Not sure if this is the best move because users might be confused\n+ # by the default project created here and their \"Resource\" project.\n+ organization, _, user = User.objects.bootstrap(\n+ is_staff=False,\n+ is_email_verified=True,",
|
|
"comment_created_at": "2025-08-12T09:20:40+00:00",
|
|
"comment_author": "joshsny",
|
|
"comment_body": "This is dodgy, as it makes our email verification dependent on Vercel's - we probably need to keep their email unverified",
|
|
"pr_file_module": null
|
|
},
|
|
{
|
|
"comment_id": "2269591556",
|
|
"repo_full_name": "PostHog/posthog",
|
|
"pr_number": 36394,
|
|
"pr_file": "ee/api/vercel/vercel_installation.py",
|
|
"discussion_id": "2269240589",
|
|
"commented_code": "@@ -0,0 +1,321 @@\n+\"\"\"\n+Implements the Vercel Marketplace API server for managing marketplace installations.\n+\n+Biggest problem here is that we don't yet conform to Vercel's response schema.\n+\n+See:\n+https://vercel.com/docs/integrations/create-integration/marketplace-api\n+\"\"\"\n+\n+from typing import Any\n+from django.conf import settings\n+from django.db import IntegrityError\n+from rest_framework import serializers, viewsets, exceptions\n+from rest_framework.request import Request\n+from rest_framework.response import Response\n+from rest_framework import mixins\n+from rest_framework.permissions import BasePermission\n+from ee.api.authentication import VercelAuthentication\n+from posthog.event_usage import report_user_signed_up\n+from posthog.models.user import User\n+from ee.models.vercel.vercel_installation import VercelInstallation\n+from rest_framework import decorators\n+\n+\n+def get_vercel_plans() -> list[dict[str, Any]]:\n+ \"\"\"Get PostHog plans formatted for Vercel Marketplace\"\"\"\n+ return [\n+ {\n+ \"id\": \"free\",\n+ \"type\": \"subscription\",\n+ \"name\": \"Free\",\n+ \"description\": \"No credit card required\",\n+ \"scope\": \"installation\",\n+ \"paymentMethodRequired\": False,\n+ \"details\": [\n+ {\"label\": \"Data retention\", \"value\": \"1 year\"},\n+ {\"label\": \"Projects\", \"value\": \"1\"},\n+ {\"label\": \"Team members\", \"value\": \"Unlimited\"},\n+ {\"label\": \"API Access\", \"value\": \"\u2713\"},\n+ {\"label\": \"No limits on tracked users\", \"value\": \"\u2713\"},\n+ {\"label\": \"Community support\", \"value\": \"Support via community forum\"},\n+ ],\n+ \"highlightedDetails\": [\n+ {\"label\": \"Feature Flags\", \"value\": \"1 million free requests\"},\n+ {\"label\": \"Experiments\", \"value\": \"1 million free requests\"},\n+ ],\n+ },\n+ {\n+ \"id\": \"pay_as_you_go\",\n+ \"type\": \"subscription\",\n+ \"name\": \"Pay-as-you-go\",\n+ \"description\": \"Usage-based pricing after free tier\",\n+ \"scope\": \"installation\",\n+ \"paymentMethodRequired\": True,\n+ \"details\": [\n+ {\"label\": \"Data retention\", \"value\": \"7 years\"},\n+ {\"label\": \"Projects\", \"value\": \"6\"},\n+ {\"label\": \"Team members\", \"value\": \"Unlimited\"},\n+ {\"label\": \"API Access\", \"value\": \"\u2713\"},\n+ {\"label\": \"No limits on tracked users\", \"value\": \"\u2713\"},\n+ {\"label\": \"Standard support\", \"value\": \"Support via email, Slack-based over $2k/mo\"},\n+ ],\n+ \"highlightedDetails\": [\n+ {\"label\": \"Feature flags\", \"value\": \"1 million requests for free, then from $0.0001/request\"},\n+ {\"label\": \"Experiments\", \"value\": \"Billed with feature flags\"},\n+ ],\n+ },\n+ ]\n+\n+\n+class VercelInstallationPermission(BasePermission):\n+ \"\"\"\n+ Custom permission that validates Vercel auth type and installation ID match.\n+ Vercel auth type is determined by the X-Vercel-Auth header, and can differ per endpoint.\n+ See Marketplace API spec.\n+ \"\"\"\n+\n+ def has_permission(self, request: Request, view) -> bool:\n+ self._validate_auth_type_allowed(request, view)\n+ return True\n+\n+ def has_object_permission(self, request: Request, view, obj) -> bool:\n+ self._validate_installation_id_match(request, view)\n+ return True\n+\n+ def _get_supported_auth_types(self, view) -> list[str]:\n+ \"\"\"\n+ Get supported auth types for the current action from the viewset.\n+ Supported auth type is specified by the marketplace API spec.\n+ \"\"\"\n+ return getattr(view, \"supported_auth_types\", {}).get(view.action, [\"User\", \"System\"])\n+\n+ def _validate_auth_type_allowed(self, request: Request, view) -> None:\n+ \"\"\"Validate that the auth type from X-Vercel-Auth header is allowed for this endpoint\"\"\"\n+ auth_type = request.headers.get(\"X-Vercel-Auth\", \"\").lower()\n+ if not auth_type:\n+ raise exceptions.AuthenticationFailed(\"Missing X-Vercel-Auth header\")\n+\n+ auth_type_title = auth_type.title()\n+ supported_types = self._get_supported_auth_types(view)\n+\n+ if auth_type_title not in supported_types:\n+ raise exceptions.PermissionDenied(\n+ f\"Auth type '{auth_type_title}' not allowed for this endpoint. \"\n+ f\"Supported types: {', '.join(supported_types)}\"\n+ )\n+\n+ def _validate_installation_id_match(self, request: Request, view) -> None:\n+ \"\"\"Validate that JWT installation_id matches URL parameter\"\"\"\n+ jwt_payload = self._get_jwt_payload(request)\n+\n+ # installation_id when going through vercel_installation ViewSet,\n+ # or parent_lookup_installation_id when going through vercel_resource\n+ installation_id = view.kwargs.get(\"installation_id\") or view.kwargs.get(\"parent_lookup_installation_id\")\n+\n+ if jwt_payload.get(\"installation_id\") != installation_id:\n+ raise exceptions.PermissionDenied(\"Installation ID mismatch\")\n+\n+ def _get_jwt_payload(self, request: Request) -> dict[str, Any]:\n+ \"\"\"Extract JWT payload from authenticated request\"\"\"\n+ if hasattr(request, \"auth\") and isinstance(request.auth, dict) and request.auth:\n+ return request.auth\n+ raise exceptions.AuthenticationFailed(\"No valid JWT authentication found\")\n+\n+\n+class VercelCredentialsSerializer(serializers.Serializer):\n+ access_token = serializers.CharField(help_text=\"Access token authorizes marketplace and integration APIs.\")\n+ token_type = serializers.CharField(help_text=\"The type of token (default: Bearer).\")\n+\n+\n+class VercelContactSerializer(serializers.Serializer):\n+ email = serializers.EmailField(help_text=\"Contact email address for the account.\")\n+ name = serializers.CharField(required=False, allow_blank=True, help_text=\"Contact name for the account (optional).\")\n+\n+\n+class VercelAccountSerializer(serializers.Serializer):\n+ name = serializers.CharField(required=False, allow_blank=True, help_text=\"Account name (optional).\")\n+ url = serializers.URLField(help_text=\"URL of the account.\")\n+ contact = VercelContactSerializer(help_text=\"Contact information for the account.\")\n+\n+\n+class UpsertInstallationPayloadSerializer(serializers.Serializer):\n+ scopes = serializers.ListField(\n+ child=serializers.CharField(), min_length=1, help_text=\"Array of scopes, must have at least one. Min Length: 1\"\n+ )\n+ acceptedPolicies = serializers.DictField(\n+ child=serializers.JSONField(),\n+ help_text='Policies accepted by the customer. Example: { \"toc\": \"2024-02-28T10:00:00Z\" }',\n+ )\n+ credentials = VercelCredentialsSerializer(\n+ help_text=\"The service-account access token to access marketplace and integration APIs on behalf of a customer's installation.\"\n+ )\n+ account = VercelAccountSerializer(\n+ help_text=\"The account information for this installation. Use Get Account Info API to re-fetch this data post installation.\"\n+ )\n+\n+\n+class VercelInstallationSerializer(serializers.ModelSerializer):\n+ class Meta:\n+ model = VercelInstallation\n+ fields = \"__all__\"\n+\n+\n+class VercelInstallationViewSet(\n+ mixins.RetrieveModelMixin, mixins.UpdateModelMixin, mixins.DestroyModelMixin, viewsets.GenericViewSet\n+):\n+ queryset = VercelInstallation.objects.all()\n+ serializer_class = VercelInstallationSerializer\n+ lookup_field = \"installation_id\"\n+ authentication_classes = [VercelAuthentication]\n+ permission_classes = [VercelInstallationPermission]\n+\n+ supported_auth_types = {\n+ \"update\": [\"User\"],\n+ \"partial_update\": [\"User\"],\n+ \"destroy\": [\"User\", \"System\"],\n+ \"retrieve\": [\"System\"],\n+ \"plans\": [\"System\"],\n+ }\n+\n+ def update(self, request: Request, *args: Any, **kwargs: Any) -> Response:\n+ \"\"\"\n+ Implements: https://vercel.com/docs/integrations/create-integration/marketplace-api#upsert-installation\n+ \"\"\"\n+ serializer: UpsertInstallationPayloadSerializer = UpsertInstallationPayloadSerializer(data=request.data)\n+ if not serializer.is_valid():\n+ raise exceptions.ValidationError(detail=serializer.errors)\n+\n+ installation_id = self.kwargs[\"installation_id\"]\n+\n+ try:\n+ # TODO: Not sure if this is the best move because users might be confused\n+ # by the default project created here and their \"Resource\" project.\n+ organization, _, user = User.objects.bootstrap(\n+ is_staff=False,\n+ is_email_verified=True,",
|
|
"comment_created_at": "2025-08-12T11:49:20+00:00",
|
|
"comment_author": "JonathanLab",
|
|
"comment_body": "Agree, have set it to False",
|
|
"pr_file_module": null
|
|
}
|
|
]
|
|
},
|
|
{
|
|
"discussion_id": "2204227072",
|
|
"pr_number": 33948,
|
|
"pr_file": "posthog/api/survey.py",
|
|
"created_at": "2025-07-14T08:38:36+00:00",
|
|
"commented_code": "),\n )\n \n+ # If survey_id is provided, return individual survey\n+ if survey_id:\n+ try:\n+ survey = Survey.objects.select_related(\"linked_flag\", \"targeting_flag\", \"internal_targeting_flag\").get(\n+ id=survey_id, team=team\n+ )\n+ except Survey.DoesNotExist:\n+ return cors_response(\n+ request,\n+ generate_exception_response(\n+ \"surveys\",\n+ \"Survey not found.\",\n+ type=\"not_found\",\n+ code=\"survey_not_found\",\n+ status_code=status.HTTP_404_NOT_FOUND,\n+ ),\n+ )\n+\n+ # Check if survey is archived\n+ if survey.archived:\n+ return cors_response(\n+ request,\n+ generate_exception_response(\n+ \"surveys\",\n+ \"This survey is no longer available.\",\n+ type=\"not_found\",\n+ code=\"survey_archived\",\n+ status_code=status.HTTP_404_NOT_FOUND,\n+ ),\n+ )\n+\n+ # Return individual survey response\n+ serialized_survey = SurveyAPISerializer(survey).data\n+ response_data = {\n+ \"survey\": serialized_survey,\n+ \"project_config\": {\n+ \"api_host\": request.build_absolute_uri(\"/\").rstrip(\"/\"),\n+ \"token\": team.api_token,\n+ },\n+ }\n+ return cors_response(request, JsonResponse(response_data))\n+\n+ # Return all surveys (existing behavior)\n return cors_response(request, JsonResponse(get_surveys_response(team)))\n \n \n+# Constants for better maintainability\n+logger = structlog.get_logger(__name__)\n+SURVEY_ID_MAX_LENGTH = 50\n+CACHE_TIMEOUT_SECONDS = 300\n+\n+\n+def is_valid_uuid(uuid_string: str) -> bool:\n+ \"\"\"Validate if a string is a valid UUID format.\"\"\"\n+ try:\n+ uuid.UUID(uuid_string)\n+ return True\n+ except (ValueError, TypeError):\n+ return False\n+\n+\n+@csrf_exempt\n+@axes_dispatch\n+def public_survey_page(request, survey_id: str):\n+ \"\"\"\n+ Server-side rendered public survey page with security and performance optimizations\n+ \"\"\"\n+ if request.method == \"OPTIONS\":\n+ return cors_response(request, HttpResponse(\"\"))\n+\n+ # Input validation\n+ if not is_valid_uuid(survey_id) or len(survey_id) > SURVEY_ID_MAX_LENGTH:\n+ logger.warning(\"survey_page_invalid_id\", survey_id=survey_id)\n+ return render(\n+ request,\n+ \"surveys/error.html\",\n+ {\n+ \"error_title\": \"Invalid Request\",\n+ \"error_message\": \"The requested survey is not available.\",\n+ },\n+ status=400,\n+ )\n+\n+ # Database query with minimal fields and timeout protection\n+ try:\n+ survey = (\n+ Survey.objects.select_related(\"team\")\n+ .only(\"id\", \"name\", \"appearance\", \"archived\", \"is_publicly_shareable\", \"team__id\", \"team__api_token\")\n+ .get(id=survey_id)\n+ )\n+ except Survey.DoesNotExist:\n+ logger.info(\"survey_page_not_found\", survey_id=survey_id)\n+ # Use generic error message to prevent survey ID enumeration\n+ return render(\n+ request,\n+ \"surveys/error.html\",\n+ {\n+ \"error_title\": \"Survey Not Available\",\n+ \"error_message\": \"The requested survey is not available.\",\n+ },\n+ status=404,\n+ )\n+ except Exception as e:\n+ logger.exception(\"survey_page_db_error\", error=str(e), survey_id=survey_id)\n+ capture_exception(e)\n+ return render(\n+ request,\n+ \"surveys/error.html\",\n+ {\n+ \"error_title\": \"Service Unavailable\",\n+ \"error_message\": \"The service is temporarily unavailable. Please try again later.\",\n+ },\n+ status=503,\n+ )\n+\n+ survey_is_running = (\n+ survey.start_date is not None and survey.start_date <= datetime.now(UTC) and survey.end_date is None\n+ )\n+\n+ # Check survey availability (combine checks for consistent error message)\n+ if survey.archived or not survey.is_publicly_shareable or not survey_is_running:\n+ logger.info(\n+ \"survey_page_access_denied\",\n+ survey_id=survey_id,\n+ archived=survey.archived,\n+ publicly_shareable=survey.is_publicly_shareable,\n+ )\n+ return render(\n+ request,\n+ \"surveys/error.html\",\n+ {\n+ \"error_title\": \"Survey not receiving responses\",\n+ \"error_message\": \"The requested survey is not receiving responses.\",\n+ },\n+ status=404, # Use 404 instead of 403 to prevent information leakage\n+ )\n+\n+ # Build project config\n+ project_config = {\n+ \"api_host\": request.build_absolute_uri(\"/\").rstrip(\"/\"),\n+ \"token\": survey.team.api_token,\n+ }\n+\n+ if hasattr(survey.team, \"ui_host\") and survey.team.ui_host:\n+ project_config[\"ui_host\"] = survey.team.ui_host\n+\n+ context = {\n+ \"name\": survey.name,\n+ \"id\": survey.id,\n+ \"appearance\": json.dumps(survey.appearance),\n+ \"project_config_json\": json.dumps(project_config),\n+ \"debug\": settings.DEBUG,\n+ }\n+\n+ logger.info(\"survey_page_rendered\", survey_id=survey_id, team_id=survey.team.id)\n+\n+ response = render(request, \"surveys/public_survey.html\", context)\n+\n+ # Security headers\n+ response[\"X-Frame-Options\"] = \"DENY\"\n+ response[\"X-Content-Type-Options\"] = \"nosniff\"\n+ response[\"Referrer-Policy\"] = \"strict-origin-when-cross-origin\"\n+ response[\"Permissions-Policy\"] = \"accelerometer=(), camera=(), microphone=(), geolocation=()\"\n+ response[\"X-XSS-Protection\"] = \"1; mode=block\"\n+\n+ # Cache headers\n+ response[\"Cache-Control\"] = f\"public, max-age={CACHE_TIMEOUT_SECONDS}\"\n+ response[\"Vary\"] = \"Accept-Encoding\" # Enable compression caching",
|
|
"repo_full_name": "PostHog/posthog",
|
|
"discussion_comments": [
|
|
{
|
|
"comment_id": "2204227072",
|
|
"repo_full_name": "PostHog/posthog",
|
|
"pr_number": 33948,
|
|
"pr_file": "posthog/api/survey.py",
|
|
"discussion_id": "2204227072",
|
|
"commented_code": "@@ -1386,9 +1394,178 @@ def surveys(request: Request):\n ),\n )\n \n+ # If survey_id is provided, return individual survey\n+ if survey_id:\n+ try:\n+ survey = Survey.objects.select_related(\"linked_flag\", \"targeting_flag\", \"internal_targeting_flag\").get(\n+ id=survey_id, team=team\n+ )\n+ except Survey.DoesNotExist:\n+ return cors_response(\n+ request,\n+ generate_exception_response(\n+ \"surveys\",\n+ \"Survey not found.\",\n+ type=\"not_found\",\n+ code=\"survey_not_found\",\n+ status_code=status.HTTP_404_NOT_FOUND,\n+ ),\n+ )\n+\n+ # Check if survey is archived\n+ if survey.archived:\n+ return cors_response(\n+ request,\n+ generate_exception_response(\n+ \"surveys\",\n+ \"This survey is no longer available.\",\n+ type=\"not_found\",\n+ code=\"survey_archived\",\n+ status_code=status.HTTP_404_NOT_FOUND,\n+ ),\n+ )\n+\n+ # Return individual survey response\n+ serialized_survey = SurveyAPISerializer(survey).data\n+ response_data = {\n+ \"survey\": serialized_survey,\n+ \"project_config\": {\n+ \"api_host\": request.build_absolute_uri(\"/\").rstrip(\"/\"),\n+ \"token\": team.api_token,\n+ },\n+ }\n+ return cors_response(request, JsonResponse(response_data))\n+\n+ # Return all surveys (existing behavior)\n return cors_response(request, JsonResponse(get_surveys_response(team)))\n \n \n+# Constants for better maintainability\n+logger = structlog.get_logger(__name__)\n+SURVEY_ID_MAX_LENGTH = 50\n+CACHE_TIMEOUT_SECONDS = 300\n+\n+\n+def is_valid_uuid(uuid_string: str) -> bool:\n+ \"\"\"Validate if a string is a valid UUID format.\"\"\"\n+ try:\n+ uuid.UUID(uuid_string)\n+ return True\n+ except (ValueError, TypeError):\n+ return False\n+\n+\n+@csrf_exempt\n+@axes_dispatch\n+def public_survey_page(request, survey_id: str):\n+ \"\"\"\n+ Server-side rendered public survey page with security and performance optimizations\n+ \"\"\"\n+ if request.method == \"OPTIONS\":\n+ return cors_response(request, HttpResponse(\"\"))\n+\n+ # Input validation\n+ if not is_valid_uuid(survey_id) or len(survey_id) > SURVEY_ID_MAX_LENGTH:\n+ logger.warning(\"survey_page_invalid_id\", survey_id=survey_id)\n+ return render(\n+ request,\n+ \"surveys/error.html\",\n+ {\n+ \"error_title\": \"Invalid Request\",\n+ \"error_message\": \"The requested survey is not available.\",\n+ },\n+ status=400,\n+ )\n+\n+ # Database query with minimal fields and timeout protection\n+ try:\n+ survey = (\n+ Survey.objects.select_related(\"team\")\n+ .only(\"id\", \"name\", \"appearance\", \"archived\", \"is_publicly_shareable\", \"team__id\", \"team__api_token\")\n+ .get(id=survey_id)\n+ )\n+ except Survey.DoesNotExist:\n+ logger.info(\"survey_page_not_found\", survey_id=survey_id)\n+ # Use generic error message to prevent survey ID enumeration\n+ return render(\n+ request,\n+ \"surveys/error.html\",\n+ {\n+ \"error_title\": \"Survey Not Available\",\n+ \"error_message\": \"The requested survey is not available.\",\n+ },\n+ status=404,\n+ )\n+ except Exception as e:\n+ logger.exception(\"survey_page_db_error\", error=str(e), survey_id=survey_id)\n+ capture_exception(e)\n+ return render(\n+ request,\n+ \"surveys/error.html\",\n+ {\n+ \"error_title\": \"Service Unavailable\",\n+ \"error_message\": \"The service is temporarily unavailable. Please try again later.\",\n+ },\n+ status=503,\n+ )\n+\n+ survey_is_running = (\n+ survey.start_date is not None and survey.start_date <= datetime.now(UTC) and survey.end_date is None\n+ )\n+\n+ # Check survey availability (combine checks for consistent error message)\n+ if survey.archived or not survey.is_publicly_shareable or not survey_is_running:\n+ logger.info(\n+ \"survey_page_access_denied\",\n+ survey_id=survey_id,\n+ archived=survey.archived,\n+ publicly_shareable=survey.is_publicly_shareable,\n+ )\n+ return render(\n+ request,\n+ \"surveys/error.html\",\n+ {\n+ \"error_title\": \"Survey not receiving responses\",\n+ \"error_message\": \"The requested survey is not receiving responses.\",\n+ },\n+ status=404, # Use 404 instead of 403 to prevent information leakage\n+ )\n+\n+ # Build project config\n+ project_config = {\n+ \"api_host\": request.build_absolute_uri(\"/\").rstrip(\"/\"),\n+ \"token\": survey.team.api_token,\n+ }\n+\n+ if hasattr(survey.team, \"ui_host\") and survey.team.ui_host:\n+ project_config[\"ui_host\"] = survey.team.ui_host\n+\n+ context = {\n+ \"name\": survey.name,\n+ \"id\": survey.id,\n+ \"appearance\": json.dumps(survey.appearance),\n+ \"project_config_json\": json.dumps(project_config),\n+ \"debug\": settings.DEBUG,\n+ }\n+\n+ logger.info(\"survey_page_rendered\", survey_id=survey_id, team_id=survey.team.id)\n+\n+ response = render(request, \"surveys/public_survey.html\", context)\n+\n+ # Security headers\n+ response[\"X-Frame-Options\"] = \"DENY\"\n+ response[\"X-Content-Type-Options\"] = \"nosniff\"\n+ response[\"Referrer-Policy\"] = \"strict-origin-when-cross-origin\"\n+ response[\"Permissions-Policy\"] = \"accelerometer=(), camera=(), microphone=(), geolocation=()\"\n+ response[\"X-XSS-Protection\"] = \"1; mode=block\"\n+\n+ # Cache headers\n+ response[\"Cache-Control\"] = f\"public, max-age={CACHE_TIMEOUT_SECONDS}\"\n+ response[\"Vary\"] = \"Accept-Encoding\" # Enable compression caching",
|
|
"comment_created_at": "2025-07-14T08:38:36+00:00",
|
|
"comment_author": "marandaneto",
|
|
"comment_body": "do we need all of that? do we set this elsewhere or how did we come up with those headers? just curious since i dont know much about cors, etc",
|
|
"pr_file_module": null
|
|
},
|
|
{
|
|
"comment_id": "2206040319",
|
|
"repo_full_name": "PostHog/posthog",
|
|
"pr_number": 33948,
|
|
"pr_file": "posthog/api/survey.py",
|
|
"discussion_id": "2204227072",
|
|
"commented_code": "@@ -1386,9 +1394,178 @@ def surveys(request: Request):\n ),\n )\n \n+ # If survey_id is provided, return individual survey\n+ if survey_id:\n+ try:\n+ survey = Survey.objects.select_related(\"linked_flag\", \"targeting_flag\", \"internal_targeting_flag\").get(\n+ id=survey_id, team=team\n+ )\n+ except Survey.DoesNotExist:\n+ return cors_response(\n+ request,\n+ generate_exception_response(\n+ \"surveys\",\n+ \"Survey not found.\",\n+ type=\"not_found\",\n+ code=\"survey_not_found\",\n+ status_code=status.HTTP_404_NOT_FOUND,\n+ ),\n+ )\n+\n+ # Check if survey is archived\n+ if survey.archived:\n+ return cors_response(\n+ request,\n+ generate_exception_response(\n+ \"surveys\",\n+ \"This survey is no longer available.\",\n+ type=\"not_found\",\n+ code=\"survey_archived\",\n+ status_code=status.HTTP_404_NOT_FOUND,\n+ ),\n+ )\n+\n+ # Return individual survey response\n+ serialized_survey = SurveyAPISerializer(survey).data\n+ response_data = {\n+ \"survey\": serialized_survey,\n+ \"project_config\": {\n+ \"api_host\": request.build_absolute_uri(\"/\").rstrip(\"/\"),\n+ \"token\": team.api_token,\n+ },\n+ }\n+ return cors_response(request, JsonResponse(response_data))\n+\n+ # Return all surveys (existing behavior)\n return cors_response(request, JsonResponse(get_surveys_response(team)))\n \n \n+# Constants for better maintainability\n+logger = structlog.get_logger(__name__)\n+SURVEY_ID_MAX_LENGTH = 50\n+CACHE_TIMEOUT_SECONDS = 300\n+\n+\n+def is_valid_uuid(uuid_string: str) -> bool:\n+ \"\"\"Validate if a string is a valid UUID format.\"\"\"\n+ try:\n+ uuid.UUID(uuid_string)\n+ return True\n+ except (ValueError, TypeError):\n+ return False\n+\n+\n+@csrf_exempt\n+@axes_dispatch\n+def public_survey_page(request, survey_id: str):\n+ \"\"\"\n+ Server-side rendered public survey page with security and performance optimizations\n+ \"\"\"\n+ if request.method == \"OPTIONS\":\n+ return cors_response(request, HttpResponse(\"\"))\n+\n+ # Input validation\n+ if not is_valid_uuid(survey_id) or len(survey_id) > SURVEY_ID_MAX_LENGTH:\n+ logger.warning(\"survey_page_invalid_id\", survey_id=survey_id)\n+ return render(\n+ request,\n+ \"surveys/error.html\",\n+ {\n+ \"error_title\": \"Invalid Request\",\n+ \"error_message\": \"The requested survey is not available.\",\n+ },\n+ status=400,\n+ )\n+\n+ # Database query with minimal fields and timeout protection\n+ try:\n+ survey = (\n+ Survey.objects.select_related(\"team\")\n+ .only(\"id\", \"name\", \"appearance\", \"archived\", \"is_publicly_shareable\", \"team__id\", \"team__api_token\")\n+ .get(id=survey_id)\n+ )\n+ except Survey.DoesNotExist:\n+ logger.info(\"survey_page_not_found\", survey_id=survey_id)\n+ # Use generic error message to prevent survey ID enumeration\n+ return render(\n+ request,\n+ \"surveys/error.html\",\n+ {\n+ \"error_title\": \"Survey Not Available\",\n+ \"error_message\": \"The requested survey is not available.\",\n+ },\n+ status=404,\n+ )\n+ except Exception as e:\n+ logger.exception(\"survey_page_db_error\", error=str(e), survey_id=survey_id)\n+ capture_exception(e)\n+ return render(\n+ request,\n+ \"surveys/error.html\",\n+ {\n+ \"error_title\": \"Service Unavailable\",\n+ \"error_message\": \"The service is temporarily unavailable. Please try again later.\",\n+ },\n+ status=503,\n+ )\n+\n+ survey_is_running = (\n+ survey.start_date is not None and survey.start_date <= datetime.now(UTC) and survey.end_date is None\n+ )\n+\n+ # Check survey availability (combine checks for consistent error message)\n+ if survey.archived or not survey.is_publicly_shareable or not survey_is_running:\n+ logger.info(\n+ \"survey_page_access_denied\",\n+ survey_id=survey_id,\n+ archived=survey.archived,\n+ publicly_shareable=survey.is_publicly_shareable,\n+ )\n+ return render(\n+ request,\n+ \"surveys/error.html\",\n+ {\n+ \"error_title\": \"Survey not receiving responses\",\n+ \"error_message\": \"The requested survey is not receiving responses.\",\n+ },\n+ status=404, # Use 404 instead of 403 to prevent information leakage\n+ )\n+\n+ # Build project config\n+ project_config = {\n+ \"api_host\": request.build_absolute_uri(\"/\").rstrip(\"/\"),\n+ \"token\": survey.team.api_token,\n+ }\n+\n+ if hasattr(survey.team, \"ui_host\") and survey.team.ui_host:\n+ project_config[\"ui_host\"] = survey.team.ui_host\n+\n+ context = {\n+ \"name\": survey.name,\n+ \"id\": survey.id,\n+ \"appearance\": json.dumps(survey.appearance),\n+ \"project_config_json\": json.dumps(project_config),\n+ \"debug\": settings.DEBUG,\n+ }\n+\n+ logger.info(\"survey_page_rendered\", survey_id=survey_id, team_id=survey.team.id)\n+\n+ response = render(request, \"surveys/public_survey.html\", context)\n+\n+ # Security headers\n+ response[\"X-Frame-Options\"] = \"DENY\"\n+ response[\"X-Content-Type-Options\"] = \"nosniff\"\n+ response[\"Referrer-Policy\"] = \"strict-origin-when-cross-origin\"\n+ response[\"Permissions-Policy\"] = \"accelerometer=(), camera=(), microphone=(), geolocation=()\"\n+ response[\"X-XSS-Protection\"] = \"1; mode=block\"\n+\n+ # Cache headers\n+ response[\"Cache-Control\"] = f\"public, max-age={CACHE_TIMEOUT_SECONDS}\"\n+ response[\"Vary\"] = \"Accept-Encoding\" # Enable compression caching",
|
|
"comment_created_at": "2025-07-15T00:29:28+00:00",
|
|
"comment_author": "lucasheriques",
|
|
"comment_body": "we actually only need the `X-Frame-Options` to prevent our survey to be shown on iframes, since it's a potential liability and we don't have a need for that. will remove the others",
|
|
"pr_file_module": null
|
|
}
|
|
]
|
|
}
|
|
] |