1
0
mirror of https://github.com/jfilter/clean-text.git synced 2021-09-19 22:32:58 +03:00

improve phone regex (fix #10)

This commit is contained in:
Johannes Filter
2020-10-16 00:18:06 +02:00
parent d32aa94ad2
commit 335ab64820
2 changed files with 24 additions and 4 deletions

View File

@@ -42,8 +42,9 @@ EMAIL_REGEX = re.compile(
flags=re.IGNORECASE | re.UNICODE,
)
# for more information: https://github.com/jfilter/clean-text/issues/10
PHONE_REGEX = re.compile(
r"(?:^|(?<=[^\w)]))(\+?1[ .-]?)?(\(?\d{3}\)?[ .-]?)?(\d{3}[ .-]?\d{4})(\s?(?:ext\.?|[#x-])\s?\d{2,6})?(?:$|(?=\W))"
r"((?:^|(?<=[^\w)]))(((\+?[01])|(\+\d{2}))[ .-]?)?(\(?\d{3,4}\)?/?[ .-]?)?(\d{3}[ .-]?\d{4})(\s?(?:ext\.?|[#x-])\s?\d{2,6})?(?:$|(?=\W)))|\+?\d{4,5}[ .-/]\d{6,9}"
)
NUMBERS_REGEX = re.compile(

View File

@@ -36,10 +36,29 @@ def test_replace_emails():
assert cleantext.replace_emails(text, "*EMAIL*") == proc_text
phone_numbers = [
"+49 123 1548690",
"555-123-4567",
"2404 9099130",
"024049099130",
"02404 9099130",
"02404/9099130",
"+492404 9099130",
"+4924049099130",
"+492404/9099130",
"0160 123456789",
"0160/123456789",
"+32160 123456789",
"Tel.: 0160 123456789",
]
def test_replace_phone_numbers():
text = "I can be reached at 555-123-4567 through next Friday."
proc_text = "I can be reached at *PHONE* through next Friday."
assert cleantext.replace_phone_numbers(text, "*PHONE*") == proc_text
for x in phone_numbers:
x_phone = cleantext.replace_phone_numbers(x, "*PHONE*")
assert "PHONE" in x_phone and not any(map(str.isdigit, x_phone)), (
x + " / " + x_phone
)
def test_replace_numbers():