mirror of
https://github.com/omnara-ai/omnara.git
synced 2025-08-12 20:39:09 +03:00
Git diffs (#25)
* git diffs * cmdline arg for git diff * git diff validation * change structure * add git diff to model * nits * revert webhook --------- Co-authored-by: Kartik Sarangmath <kartiksarangmath@Kartiks-MacBook-Air.local>
This commit is contained in:
@@ -0,0 +1,31 @@
|
||||
"""Add git_diff field to agent_instances
|
||||
|
||||
Revision ID: 4e77ec2a7faa
|
||||
Revises: 84cd4a8c9a18
|
||||
Create Date: 2025-07-21 12:44:28.861822
|
||||
|
||||
"""
|
||||
|
||||
from typing import Sequence, Union
|
||||
|
||||
from alembic import op
|
||||
import sqlalchemy as sa
|
||||
|
||||
|
||||
# revision identifiers, used by Alembic.
|
||||
revision: str = "4e77ec2a7faa"
|
||||
down_revision: Union[str, None] = "84cd4a8c9a18"
|
||||
branch_labels: Union[str, Sequence[str], None] = None
|
||||
depends_on: Union[str, Sequence[str], None] = None
|
||||
|
||||
|
||||
def upgrade() -> None:
|
||||
# ### commands auto generated by Alembic - please adjust! ###
|
||||
op.add_column("agent_instances", sa.Column("git_diff", sa.Text(), nullable=True))
|
||||
# ### end Alembic commands ###
|
||||
|
||||
|
||||
def downgrade() -> None:
|
||||
# ### commands auto generated by Alembic - please adjust! ###
|
||||
op.drop_column("agent_instances", "git_diff")
|
||||
# ### end Alembic commands ###
|
||||
@@ -9,9 +9,11 @@ from sqlalchemy.orm import (
|
||||
Mapped, # type: ignore[attr-defined]
|
||||
mapped_column, # type: ignore[attr-defined]
|
||||
relationship,
|
||||
validates,
|
||||
)
|
||||
|
||||
from .enums import AgentStatus
|
||||
from .utils import is_valid_git_diff
|
||||
|
||||
if TYPE_CHECKING:
|
||||
from .subscription_models import (
|
||||
@@ -119,6 +121,7 @@ class AgentInstance(Base):
|
||||
status: Mapped[AgentStatus] = mapped_column(default=AgentStatus.ACTIVE)
|
||||
started_at: Mapped[datetime] = mapped_column(default=lambda: datetime.now(UTC))
|
||||
ended_at: Mapped[datetime | None] = mapped_column(default=None)
|
||||
git_diff: Mapped[str | None] = mapped_column(Text, default=None)
|
||||
|
||||
# Relationships
|
||||
user_agent: Mapped["UserAgent"] = relationship(
|
||||
@@ -137,6 +140,20 @@ class AgentInstance(Base):
|
||||
order_by="AgentUserFeedback.created_at",
|
||||
)
|
||||
|
||||
@validates("git_diff")
|
||||
def validate_git_diff(self, key, value):
|
||||
"""Validate git diff at the database level.
|
||||
|
||||
Raises ValueError if the git diff is invalid.
|
||||
"""
|
||||
if value is None:
|
||||
return value
|
||||
|
||||
if not is_valid_git_diff(value):
|
||||
raise ValueError("Invalid git diff format. Must be a valid unified diff.")
|
||||
|
||||
return value
|
||||
|
||||
|
||||
class AgentStep(Base):
|
||||
__tablename__ = "agent_steps"
|
||||
|
||||
88
shared/database/utils.py
Normal file
88
shared/database/utils.py
Normal file
@@ -0,0 +1,88 @@
|
||||
"""Database utility functions."""
|
||||
|
||||
import re
|
||||
from typing import Optional
|
||||
|
||||
|
||||
def is_valid_git_diff(diff: Optional[str]) -> bool:
|
||||
"""Validate if a string is a valid git diff.
|
||||
|
||||
Checks for:
|
||||
- Basic git diff format markers
|
||||
- Proper structure
|
||||
- Not just random text
|
||||
|
||||
Args:
|
||||
diff: The string to validate as a git diff
|
||||
|
||||
Returns:
|
||||
True if valid git diff format, False otherwise
|
||||
"""
|
||||
if not diff or not isinstance(diff, str):
|
||||
return False
|
||||
|
||||
# Check for essential git diff patterns
|
||||
has_diff_header = re.search(r"^diff --git", diff, re.MULTILINE) is not None
|
||||
has_index_line = (
|
||||
re.search(r"^index [a-f0-9]+\.\.[a-f0-9]+", diff, re.MULTILINE) is not None
|
||||
)
|
||||
has_file_markers = (
|
||||
re.search(r"^--- ", diff, re.MULTILINE) is not None
|
||||
and re.search(r"^\+\+\+ ", diff, re.MULTILINE) is not None
|
||||
)
|
||||
has_hunk_header = re.search(r"^@@[ \-\+,0-9]+@@", diff, re.MULTILINE) is not None
|
||||
|
||||
# For new files (untracked), we might not have index lines
|
||||
has_new_file = re.search(r"^new file mode", diff, re.MULTILINE) is not None
|
||||
|
||||
# A valid diff should have:
|
||||
# 1. diff --git header
|
||||
# 2. Either (index line) OR (new file mode)
|
||||
# 3. File markers (--- and +++)
|
||||
# 4. At least one hunk header (@@)
|
||||
|
||||
is_valid = (
|
||||
has_diff_header
|
||||
and (has_index_line or has_new_file)
|
||||
and has_file_markers
|
||||
and has_hunk_header
|
||||
)
|
||||
|
||||
# Additional check: should have some actual diff content (lines starting with +, -, or space)
|
||||
has_diff_content = re.search(r"^[ \+\-]", diff, re.MULTILINE) is not None
|
||||
|
||||
return is_valid and has_diff_content
|
||||
|
||||
|
||||
def sanitize_git_diff(diff: Optional[str]) -> Optional[str]:
|
||||
"""Sanitize and validate a git diff for storage.
|
||||
|
||||
Args:
|
||||
diff: The git diff string to sanitize (None means no update needed)
|
||||
|
||||
Returns:
|
||||
- Original diff string if valid
|
||||
- Empty string if diff is empty (clears the git diff)
|
||||
- None if diff is invalid or not provided
|
||||
"""
|
||||
if diff is None:
|
||||
return None
|
||||
|
||||
# Strip excessive whitespace
|
||||
diff = diff.strip()
|
||||
|
||||
# If empty after stripping, return empty string (valid case)
|
||||
if not diff:
|
||||
return ""
|
||||
|
||||
# Check if it's a valid git diff
|
||||
if not is_valid_git_diff(diff):
|
||||
return None
|
||||
|
||||
# Limit size to prevent abuse (1MB)
|
||||
max_size = 1024 * 1024 # 1MB
|
||||
if len(diff) > max_size:
|
||||
# Truncate and add marker
|
||||
diff = diff[: max_size - 100] + "\n\n... [TRUNCATED - DIFF TOO LARGE] ..."
|
||||
|
||||
return diff
|
||||
Reference in New Issue
Block a user