mirror of
https://github.com/ViralLab/TurkishBERTweet.git
synced 2023-12-19 18:19:59 +03:00
prep bug fix
This commit is contained in:
@@ -28,9 +28,10 @@ url_extractor = URLExtract()
|
||||
|
||||
def url_handler(text: str):
|
||||
urls = list(url_extractor.gen_urls(text))
|
||||
updated_urls = [url if "http" in url else f"https://{url}" for url in urls]
|
||||
updated_urls = list(
|
||||
set([url if "http" in url else f"https://{url}" for url in urls])
|
||||
)
|
||||
domains = [urllib.parse.urlparse(url_text).netloc for url_text in updated_urls]
|
||||
domains = list(set(domains))
|
||||
for i in range(len(domains)):
|
||||
text = text.replace(urls[i], f" <http> {domains[i]} </http> ")
|
||||
return text
|
||||
|
||||
Reference in New Issue
Block a user