Merge pull request #250 from Textualize/css-variables-tokenise

Tokenise CSS variables
This commit is contained in:
Darren Burns
2022-02-02 09:16:20 +00:00
committed by GitHub
4 changed files with 200 additions and 34 deletions

View File

@@ -6,23 +6,62 @@ from typing import Iterable
from textual.css.tokenizer import Expect, Tokenizer, Token
COMMENT_START = r"\/\*"
SCALAR = r"\-?\d+\.?\d*(?:fr|%|w|h|vw|vh)"
DURATION = r"\d+\.?\d*(?:ms|s)"
NUMBER = r"\-?\d+\.?\d*"
COLOR = r"\#[0-9a-fA-F]{6}|color\([0-9]{1,3}\)|rgb\(\d{1,3}\,\s?\d{1,3}\,\s?\d{1,3}\)"
KEY_VALUE = r"[a-zA-Z_-][a-zA-Z0-9_-]*=[0-9a-zA-Z_\-\/]+"
TOKEN = "[a-zA-Z_-]+"
STRING = r"\".*?\""
VARIABLE_REF = r"\$[a-zA-Z0-9_-]+"
expect_selector = Expect(
# Values permitted in declarations.
DECLARATION_VALUES = {
"scalar": SCALAR,
"duration": DURATION,
"number": NUMBER,
"color": COLOR,
"key_value": KEY_VALUE,
"token": TOKEN,
"string": STRING,
"variable_ref": VARIABLE_REF,
}
# The tokenizers "expectation" while at the root/highest level of scope
# in the CSS file. At this level we might expect to see selectors, comments,
# variable definitions etc.
expect_root_scope = Expect(
whitespace=r"\s+",
comment_start=r"\/\*",
comment_start=COMMENT_START,
selector_start_id=r"\#[a-zA-Z_\-][a-zA-Z0-9_\-]*",
selector_start_class=r"\.[a-zA-Z_\-][a-zA-Z0-9_\-]*",
selector_start_universal=r"\*",
selector_start=r"[a-zA-Z_\-]+",
variable_name=f"{VARIABLE_REF}:",
).expect_eof(True)
# After a variable declaration e.g. "$warning-text: TOKENS;"
# for tokenizing variable value ------^~~~~~~^
expect_variable_value = Expect(
comment_start=COMMENT_START,
whitespace=r"\s+",
variable_value=rf"[^;\n{COMMENT_START}]+",
)
expect_variable_value_end = Expect(
variable_value_end=r"\n|;",
).expect_eof(True)
expect_comment_end = Expect(
comment_end=re.escape("*/"),
)
# After we come across a selector in CSS e.g. ".my-class", we may
# find other selectors, pseudo-classes... e.g. ".my-class :hover"
expect_selector_continue = Expect(
whitespace=r"\s+",
comment_start=r"\/\*",
comment_start=COMMENT_START,
pseudo_class=r"\:[a-zA-Z_-]+",
selector_id=r"\#[a-zA-Z_\-][a-zA-Z0-9_\-]*",
selector_class=r"\.[a-zA-Z_\-][a-zA-Z0-9_\-]*",
@@ -33,31 +72,29 @@ expect_selector_continue = Expect(
declaration_set_start=r"\{",
)
# A declaration e.g. "text: red;"
# ^---^
expect_declaration = Expect(
whitespace=r"\s+",
comment_start=r"\/\*",
comment_start=COMMENT_START,
declaration_name=r"[a-zA-Z_\-]+\:",
declaration_set_end=r"\}",
)
expect_declaration_solo = Expect(
whitespace=r"\s+",
comment_start=r"\/\*",
comment_start=COMMENT_START,
declaration_name=r"[a-zA-Z_\-]+\:",
declaration_set_end=r"\}",
).expect_eof(True)
# The value(s)/content from a declaration e.g. "text: red;"
# ^---^
expect_declaration_content = Expect(
declaration_end=r"\n|;",
whitespace=r"\s+",
comment_start=r"\/\*",
scalar=r"\-?\d+\.?\d*(?:fr|%|w|h|vw|vh)",
duration=r"\d+\.?\d*(?:ms|s)",
number=r"\-?\d+\.?\d*",
color=r"\#[0-9a-fA-F]{6}|color\([0-9]{1,3}\)|rgb\(\d{1,3}\,\s?\d{1,3}\,\s?\d{1,3}\)",
key_value=r"[a-zA-Z_-][a-zA-Z0-9_-]*=[0-9a-zA-Z_\-\/]+",
token="[a-zA-Z_-]+",
string=r"\".*?\"",
comment_start=COMMENT_START,
**DECLARATION_VALUES,
important=r"\!important",
comma=",",
declaration_set_end=r"\}",
@@ -65,8 +102,22 @@ expect_declaration_content = Expect(
class TokenizerState:
EXPECT = expect_selector
"""State machine for the tokenizer.
Attributes:
EXPECT: The initial expectation of the tokenizer. Since we start tokenizing
at the root scope, we might expect to see either a variable or selector, for example.
STATE_MAP: Maps token names to Expects, defines the sets of valid tokens
that we'd expect to see next, given the current token. For example, if
we've just processed a variable declaration name, we next expect to see
the value of that variable.
"""
EXPECT = expect_root_scope
STATE_MAP = {
"variable_name": expect_variable_value,
"variable_value": expect_variable_value_end,
"variable_value_end": expect_root_scope,
"selector_start": expect_selector_continue,
"selector_start_id": expect_selector_continue,
"selector_start_class": expect_selector_continue,
@@ -77,7 +128,7 @@ class TokenizerState:
"declaration_set_start": expect_declaration,
"declaration_name": expect_declaration_content,
"declaration_end": expect_declaration,
"declaration_set_end": expect_selector,
"declaration_set_end": expect_root_scope,
}
def __call__(self, code: str, path: str) -> Iterable[Token]:
@@ -108,25 +159,6 @@ class DeclarationTokenizerState(TokenizerState):
tokenize = TokenizerState()
tokenize_declarations = DeclarationTokenizerState()
# def tokenize(
# code: str, path: str, *, expect: Expect = expect_selector
# ) -> Iterable[Token]:
# tokenizer = Tokenizer(code, path=path)
# # expect = expect_selector
# get_token = tokenizer.get_token
# get_state = _STATES.get
# while True:
# token = get_token(expect)
# name = token.name
# if name == "comment_start":
# tokenizer.skip_to(expect_comment_end)
# continue
# elif name == "eof":
# break
# expect = get_state(name, expect)
# yield token
if __name__ == "__main__":
css = """#something {
text: on red;

0
tests/css/__init__.py Normal file
View File

134
tests/css/test_tokenize.py Normal file
View File

@@ -0,0 +1,134 @@
import pytest
import textual.css.tokenizer
from textual.css.tokenize import tokenize
from textual.css.tokenizer import Token, TokenizeError
VALID_VARIABLE_NAMES = [
"warning-text",
"warning_text",
"warningtext1",
"1warningtext",
"WarningText1",
"warningtext_",
"warningtext-",
"_warningtext",
"-warningtext",
]
@pytest.mark.parametrize("name", VALID_VARIABLE_NAMES)
def test_variable_declaration_valid_names(name):
css = f"${name}: black on red;"
assert list(tokenize(css, "")) == [
Token(
name="variable_name", value=f"${name}:", path="", code=css, location=(0, 0)
),
Token(name="whitespace", value=" ", path="", code=css, location=(0, 14)),
Token(name="variable_value", value="black on red", path="", code=css, location=(0, 15)),
Token(name="variable_value_end", value=";", path="", code=css, location=(0, 27)),
]
def test_variable_declaration_comment_ignored():
css = "$x: red; /* comment */"
assert list(tokenize(css, "")) == [
Token(name='variable_name', value='$x:', path='', code=css, location=(0, 0)),
Token(name='whitespace', value=' ', path='', code=css, location=(0, 3)),
Token(name='variable_value', value='red', path='', code=css, location=(0, 4)),
Token(name='variable_value_end', value=';', path='', code=css, location=(0, 7)),
Token(name='whitespace', value=' ', path='', code=css, location=(0, 8)),
]
def test_variable_declaration_comment_interspersed_raises():
css = "$x: re/* comment */d;"
with pytest.raises(TokenizeError):
assert list(tokenize(css, ""))
def test_variable_declaration_invalid_value_eof():
css = "$x:\n"
with pytest.raises(textual.css.tokenizer.EOFError):
list(tokenize(css, ""))
def test_variable_declaration_no_semicolon():
css = "$x: 1\n$y: 2"
assert list(tokenize(css, "")) == [
Token(name="variable_name", value="$x:", code=css, path="", location=(0, 0)),
Token(name="whitespace", value=" ", code=css, path="", location=(0, 3)),
Token(name="variable_value", value="1", code=css, path="", location=(0, 4)),
Token(name="variable_value_end", value="\n", code=css, path="", location=(0, 5)),
Token(name="variable_name", value="$y:", code=css, path="", location=(1, 0)),
Token(name="whitespace", value=" ", code=css, path="", location=(1, 3)),
Token(name="variable_value", value="2", code=css, path="", location=(1, 4)),
]
def test_variables_declarations_amongst_rulesets():
css = "$x:1; .thing{text:red;} $y:2;"
assert list(tokenize(css, "")) == [
Token(name='variable_name', value='$x:', path='', code=css, location=(0, 0)),
Token(name='variable_value', value='1', path='', code=css, location=(0, 3)),
Token(name='variable_value_end', value=';', path='', code=css, location=(0, 4)),
Token(name='whitespace', value=' ', path='', code=css, location=(0, 5)),
Token(name='selector_start_class', value='.thing', path='', code=css, location=(0, 6)),
Token(name='declaration_set_start', value='{', path='', code=css, location=(0, 12)),
Token(name='declaration_name', value='text:', path='', code=css, location=(0, 13)),
Token(name='token', value='red', path='', code=css, location=(0, 18)),
Token(name='declaration_end', value=';', path='', code=css, location=(0, 21)),
Token(name='declaration_set_end', value='}', path='', code=css, location=(0, 22)),
Token(name='whitespace', value=' ', path='', code=css, location=(0, 23)),
Token(name='variable_name', value='$y:', path='', code=css, location=(0, 24)),
Token(name='variable_value', value='2', path='', code=css, location=(0, 27)),
Token(name='variable_value_end', value=';', path='', code=css, location=(0, 28)),
]
def test_variables_reference_in_rule_declaration_value():
css = ".warn{text: $warning;}"
assert list(tokenize(css, "")) == [
Token(name='selector_start_class', value='.warn', path='', code=css, location=(0, 0)),
Token(name='declaration_set_start', value='{', path='', code=css, location=(0, 5)),
Token(name='declaration_name', value='text:', path='', code=css, location=(0, 6)),
Token(name='whitespace', value=' ', path='', code=css, location=(0, 11)),
Token(name='variable_ref', value='$warning', path='', code=css, location=(0, 12)),
Token(name='declaration_end', value=';', path='', code=css, location=(0, 20)),
Token(name='declaration_set_end', value='}', path='', code=css, location=(0, 21)),
]
def test_variables_reference_in_rule_declaration_value_multiple():
css = ".card{padding: $pad-y $pad-x;}"
assert list(tokenize(css, "")) == [
Token(name='selector_start_class', value='.card', path='', code=css, location=(0, 0)),
Token(name='declaration_set_start', value='{', path='', code=css, location=(0, 5)),
Token(name='declaration_name', value='padding:', path='', code=css, location=(0, 6)),
Token(name='whitespace', value=' ', path='', code=css, location=(0, 14)),
Token(name='variable_ref', value='$pad-y', path='', code=css, location=(0, 15)),
Token(name='whitespace', value=' ', path='', code=css, location=(0, 21)),
Token(name='variable_ref', value='$pad-x', path='', code=css, location=(0, 22)),
Token(name='declaration_end', value=';', path='', code=css, location=(0, 28)),
Token(name='declaration_set_end', value='}', path='', code=css, location=(0, 29)),
]
def test_variables_reference_in_variable_declaration():
css = "$x: $y;"
assert list(tokenize(css, "")) == [
Token(name='variable_name', value='$x:', path='', code=css, location=(0, 0)),
Token(name='whitespace', value=' ', path='', code=css, location=(0, 3)),
Token(name='variable_value', value='$y', path='', code=css, location=(0, 4)),
Token(name='variable_value_end', value=';', path='', code=css, location=(0, 6)),
]
def test_variable_references_in_variable_declaration_multiple():
css = "$x: $y $z\n"
assert list(tokenize(css, "")) == [
Token(name='variable_name', value='$x:', path='', code=css, location=(0, 0)),
Token(name='whitespace', value=' ', path='', code=css, location=(0, 3)),
Token(name='variable_value', value='$y $z', path='', code=css, location=(0, 4)),
Token(name='variable_value_end', value='\n', path='', code=css, location=(0, 10)),
]