Backtracking unknown escape sequences, various tests for XTermParser

2025-10-17 02:38:12 +03:00 · 2022-06-09 16:27:53 +01:00
parent fe151a7f25
commit bfb962bacf
4 changed files with 190 additions and 6 deletions
--- a/src/textual/_parser.py
+++ b/src/textual/_parser.py
@@ -166,7 +166,6 @@ if __name__ == "__main__":
        def parse(
            self, on_token: Callable[[str], None]
        ) -> Generator[Awaitable, str, None]:
-            data = yield self.read1()
            while True:
                data = yield self.read1()
                if not data:
--- a/src/textual/_xterm_parser.py
+++ b/src/textual/_xterm_parser.py
@@ -2,6 +2,7 @@ from __future__ import annotations


 import re
+from collections import deque
 from typing import Any, Callable, Generator, Iterable

 from . import messages
@@ -10,6 +11,11 @@ from ._types import MessageTarget
 from ._parser import Awaitable, Parser, TokenCallback
 from ._ansi_sequences import ANSI_SEQUENCES_KEYS

+# When trying to determine whether the current sequence is a supported/valid
+# escape sequence, at which length should we give up and consider our search
+# to be unsuccessful?
+_MAX_SEQUENCE_SEARCH_THRESHOLD = 20
+
 _re_mouse_event = re.compile("^" + re.escape("\x1b[") + r"(<?[\d;]+[mM]|M...)\Z")
 _re_terminal_mode_response = re.compile(
    "^" + re.escape("\x1b[") + r"\?(?P<mode_id>\d+);(?P<setting_parameter>\d)\$y"
@@ -30,7 +36,7 @@ class XTermParser(Parser[events.Event]):
        self.last_x = 0
        self.last_y = 0

-        self._debug_log_file = open("keys.log", "wt") if debug else None
+        self._debug_log_file = open("keys.log", "wt")

        super().__init__()

@@ -105,7 +111,6 @@ class XTermParser(Parser[events.Event]):

            character = yield read1()

-            # If we're currently performing a bracketed paste,
            if bracketed_paste:
                paste_buffer.append(character)
                self.debug_log(f"paste_buffer={paste_buffer!r}")
@@ -130,7 +135,28 @@ class XTermParser(Parser[events.Event]):
                            continue

                while True:
-                    sequence += yield read1()
+                    # If we look ahead and see an escape, then we've failed
+                    # to find an escape sequence and should reissue the characters
+                    # up till this point.
+                    buffer = yield self.peek_buffer()
+
+                    if (
+                        buffer
+                        and buffer[0] == ESC
+                        or len(sequence) > _MAX_SEQUENCE_SEARCH_THRESHOLD
+                    ):
+                        for character in sequence:
+                            keys = get_key_ansi_sequence(character, None)
+                            if keys is not None:
+                                for key in keys:
+                                    on_token(events.Key(self.sender, key=key))
+                            else:
+                                on_token(events.Key(self.sender, key=character))
+                        break
+
+                    sequence_character = yield read1()
+                    sequence += sequence_character
+
                    self.debug_log(f"sequence={sequence!r}")

                    # Firstly, check if it's a bracketed paste escape code
@@ -161,6 +187,7 @@ class XTermParser(Parser[events.Event]):
                        mouse_match = _re_mouse_event.match(sequence)
                        if mouse_match is not None:
                            mouse_code = mouse_match.group(0)
+                            print(mouse_code)
                            event = self.parse_mouse_code(mouse_code, self.sender)
                            if event:
                                on_token(event)
--- a/tests/test_parser.py
+++ b/tests/test_parser.py
@@ -13,12 +13,10 @@ def test_read1():
                on_token(data)

    test_parser = TestParser()
-
    test_data = "Where there is a Will there is a way!"

    for size in range(1, len(test_data) + 1):
        # Feed the parser in pieces, first 1 character at a time, then 2, etc
-        test_parser = TestParser()
        data = []
        for offset in range(0, len(test_data), size):
            for chunk in test_parser.feed(test_data[offset : offset + size]):
--- a/tests/test_xterm_parser.py
+++ b/tests/test_xterm_parser.py
@@ -0,0 +1,160 @@
+from unittest import mock
+
+import pytest
+
+from textual._xterm_parser import XTermParser
+from textual.events import Paste, Key, MouseDown, MouseUp, MouseMove
+
+
+@pytest.fixture
+def parser():
+    return XTermParser(sender=mock.sentinel, more_data=lambda: False)
+
+
+def test_bracketed_paste(parser):
+    """ When bracketed paste mode is enabled in the terminal emulator and
+    the user pastes in some text, it will surround the pasted input
+    with the escape codes "\x1b[200~" and "\x1b[201~". The text between
+    these codes corresponds to a single `Paste` event in Textual.
+    """
+    pasted_text = "PASTED"
+    events = list(parser.feed(f"\x1b[200~{pasted_text}\x1b[201~"))
+
+    assert len(events) == 1
+    assert isinstance(events[0], Paste)
+    assert events[0].text == pasted_text
+    assert events[0].sender == mock.sentinel
+
+
+def test_bracketed_paste_content_contains_escape_codes(parser):
+    """When performing a bracketed paste, if the pasted content contains
+    supported ANSI escape sequences, it should not interfere with the paste,
+    and no escape sequences within the bracketed paste should be converted
+    into Textual events.
+    """
+    pasted_text = "PAS\x0fTED"
+    events = list(parser.feed(f"\x1b[200~{pasted_text}\x1b[201~"))
+    assert len(events) == 1
+    assert events[0].text == pasted_text
+
+
+def test_cant_match_escape_sequence_too_long(parser):
+    """ The sequence did not match, and we hit the maximum sequence search
+    length threshold, so each character should be issued as a key-press instead.
+    """
+    sequence = "\x1b[123456789123456789123"
+    events = list(parser.feed(sequence))
+
+    # Every character in the sequence is converted to a key press
+    assert len(events) == len(sequence)
+    assert all(isinstance(event, Key) for event in events)
+
+    # '\x1b' is translated to 'escape'
+    assert events[0].key == "escape"
+
+    # The rest of the characters correspond to the expected key presses
+    events = events[1:]
+    for index, character in enumerate(sequence[1:]):
+        assert events[index].key == character
+
+
+def test_unknown_sequence_followed_by_known_sequence(parser):
+    """ When we feed the parser an unknown sequence followed by a known
+    sequence. The characters in the unknown sequence are delivered as keys,
+    and the known escape sequence that follows is delivered as expected.
+    """
+    unknown_sequence = "\x1b[?"
+    known_sequence = "\x1b[8~"  # key = 'end'
+
+    sequence = unknown_sequence + known_sequence
+    events = parser.feed(sequence)
+
+    assert next(events).key == "escape"
+    assert next(events).key == "["
+    assert next(events).key == "?"
+    assert next(events).key == "end"
+
+    with pytest.raises(StopIteration):
+        next(events)
+
+
+def test_simple_key_presses_all_delivered_correct_order(parser):
+    sequence = "123abc"
+    events = parser.feed(sequence)
+    assert "".join(event.key for event in events) == sequence
+
+
+def test_key_presses_and_escape_sequence_mixed(parser):
+    sequence = "abc\x1b[13~123"
+    events = list(parser.feed(sequence))
+
+    assert len(events) == 7
+    assert "".join(event.key for event in events) == "abcf3123"
+
+
+def test_single_escape(parser):
+    """A single \x1b should be interpreted as a single press of the Escape key"""
+    events = parser.feed("\x1b")
+    assert [event.key for event in events] == ["escape"]
+
+
+def test_double_escape(parser):
+    """Windows Terminal writes double ESC when the user presses the Escape key once."""
+    events = parser.feed("\x1b\x1b")
+    assert [event.key for event in events] == ["escape"]
+
+
+@pytest.mark.parametrize("sequence, event_type, shift, meta", [
+    # Mouse down, with and without modifiers
+    ("\x1b[<0;50;25M", MouseDown, False, False),
+    ("\x1b[<4;50;25M", MouseDown, True, False),
+    ("\x1b[<8;50;25M", MouseDown, False, True),
+    # Mouse up, with and without modifiers
+    ("\x1b[<0;50;25m", MouseUp, False, False),
+    ("\x1b[<4;50;25m", MouseUp, True, False),
+    ("\x1b[<8;50;25m", MouseUp, False, True),
+])
+def test_mouse_click(parser, sequence, event_type, shift, meta):
+    """ANSI codes for mouse should be converted to Textual events"""
+    events = list(parser.feed(sequence))
+
+    assert len(events) == 1
+
+    event = events[0]
+
+    assert isinstance(event, event_type)
+    assert event.x == 49
+    assert event.y == 24
+    assert event.screen_x == 49
+    assert event.screen_y == 24
+    assert event.meta is meta
+    assert event.shift is shift
+
+
+@pytest.mark.parametrize("sequence, shift, meta, button", [
+    ("\x1b[<32;15;38M", False, False, 1),  # Click and drag
+    ("\x1b[<35;15;38M", False, False, 0),  # Basic cursor movement
+    ("\x1b[<39;15;38M", True, False, 0),   # Shift held down
+    ("\x1b[<43;15;38M", False, True, 0),   # Meta held down
+])
+def test_mouse_move(parser, sequence, shift, meta, button):
+    events = list(parser.feed(sequence))
+
+    assert len(events) == 1
+
+    event = events[0]
+
+    assert isinstance(event, MouseMove)
+    assert event.x == 14
+    assert event.y == 37
+    assert event.shift is shift
+    assert event.meta is meta
+    assert event.button == button
+
+
+def test_escape_sequence_resulting_in_multiple_keypresses(parser):
+    """Some sequences are interpreted as more than 1 keypress"""
+    events = list(parser.feed("\x1b[2;4~"))
+    assert len(events) == 2
+    assert events[0].key == "escape"
+    assert events[1].key == "shift+insert"