Allow conversion to and from unicode names.

Throughout the code base, there are a couple of places (noted in #1830) where we take a Unicode name and make it more friendly, or take a 'friendly' name and try to recover the original Unicode name. We add a function to go from 'friendly' to the Unicode name, which tackles the issue highlighted in #1815.

Related issues: #1815, #1830.
This commit is contained in:
Rodrigo Girão Serrão
2023-03-02 17:31:55 +00:00
parent b6b76025d0
commit 7acacf0746

View File

@@ -209,6 +209,37 @@ KEY_NAME_REPLACEMENTS = {
}
REPLACED_KEYS = {value: key for key, value in KEY_NAME_REPLACEMENTS.items()}
# Convert the friendly versions of character key Unicode names
# back to their original names.
# This is because we go from Unicode to friendly by replacing spaces and dashes
# with underscores, which cannot be undone by replacing underscores with spaces/dashes.
KEY_TO_UNICODE_NAME = {
"exclamation_mark": "EXCLAMATION MARK",
"quotation_mark": "QUOTATION MARK",
"number_sign": "NUMBER SIGN",
"dollar_sign": "DOLLAR SIGN",
"percent_sign": "PERCENT SIGN",
"left_parenthesis": "LEFT PARENTHESIS",
"right_parenthesis": "RIGHT PARENTHESIS",
"plus_sign": "PLUS SIGN",
"hyphen_minus": "HYPHEN-MINUS",
"full_stop": "FULL STOP",
"less_than_sign": "LESS-THAN SIGN",
"equals_sign": "EQUALS SIGN",
"greater_than_sign": "GREATER-THAN SIGN",
"question_mark": "QUESTION MARK",
"commercial_at": "COMMERCIAL AT",
"left_square_bracket": "LEFT SQUARE BRACKET",
"reverse_solidus": "REVERSE SOLIDUS",
"right_square_bracket": "RIGHT SQUARE BRACKET",
"circumflex_accent": "CIRCUMFLEX ACCENT",
"low_line": "LOW LINE",
"grave_accent": "GRAVE ACCENT",
"left_curly_bracket": "LEFT CURLY BRACKET",
"vertical_line": "VERTICAL LINE",
"right_curly_bracket": "RIGHT CURLY BRACKET",
}
# Some keys have aliases. For example, if you press `ctrl+m` on your keyboard,
# it's treated the same way as if you press `enter`. Key handlers `key_ctrl_m` and
# `key_enter` are both valid in this case.
@@ -233,6 +264,14 @@ KEY_DISPLAY_ALIASES = {
}
def _get_unicode_name_from_key(key: str) -> str:
"""Get the best guess for the Unicode name of the char corresponding to the key.
This function can be seen as a pseudo-inverse of the function `_character_to_key`.
"""
return KEY_TO_UNICODE_NAME.get(key, key.upper())
def _get_key_aliases(key: str) -> list[str]:
"""Return all aliases for the given key, including the key itself"""
return [key] + KEY_ALIASES.get(key, [])
@@ -261,8 +300,10 @@ def _get_key_display(key: str) -> str:
def _character_to_key(character: str) -> str:
"""Convert a single character to a key value."""
assert len(character) == 1
"""Convert a single character to a key value.
This transformation can be undone by the function `_get_unicode_name_from_key`.
"""
if not character.isalnum():
key = unicodedata.name(character).lower().replace("-", "_").replace(" ", "_")
else: