Add language detection for chesscom profiles.

main
Joshua Potter 2023-12-06 20:53:54 -07:00
parent f2fd289225
commit 44a18fc59c
9 changed files with 296 additions and 149 deletions

View File

@ -4,6 +4,7 @@ from typing import List
import aiohttp import aiohttp
import psycopg2 import psycopg2
from lingua import LanguageDetector, LanguageDetectorBuilder
from app.chesscom import Pipeline as ChesscomPipeline from app.chesscom import Pipeline as ChesscomPipeline
from app.database import backup_database, load_languages from app.database import backup_database, load_languages
@ -14,21 +15,31 @@ from app.types import Site
WORKER_COUNT = 10 WORKER_COUNT = 10
async def _process(site: Site, conn, session: aiohttp.ClientSession): async def _process(
site: Site, conn, detector: LanguageDetector, session: aiohttp.ClientSession
):
if site == Site.CHESSCOM: if site == Site.CHESSCOM:
await ChesscomPipeline(worker_count=WORKER_COUNT).process(conn, session) await ChesscomPipeline(worker_count=WORKER_COUNT).process(
conn, detector, session
)
elif site == Site.LICHESS: elif site == Site.LICHESS:
await LichessPipeline(worker_count=WORKER_COUNT).process(conn, session) await LichessPipeline(worker_count=WORKER_COUNT).process(
conn, detector, session
)
else: else:
assert False, f"Encountered unknown site: {site}." assert False, f"Encountered unknown site: {site}."
async def _entrypoint(conn, user_agent: str, sites: List[Site]): async def _entrypoint(
conn, detector: LanguageDetector, user_agent: str, sites: List[Site]
):
"""Top-level entrypoint that dispatches a pipeline per requested site.""" """Top-level entrypoint that dispatches a pipeline per requested site."""
async with aiohttp.ClientSession( async with aiohttp.ClientSession(
headers={"User-Agent": f"BoardWise coach-scraper ({user_agent})"} headers={"User-Agent": f"BoardWise coach-scraper ({user_agent})"}
) as session: ) as session:
await asyncio.gather(*[_process(site, conn, session) for site in sites]) await asyncio.gather(
*[_process(site, conn, detector, session) for site in sites]
)
def main(): def main():
@ -58,6 +69,8 @@ def main():
args = parser.parse_args() args = parser.parse_args()
detector = LanguageDetectorBuilder.from_all_languages().build()
conn = None conn = None
try: try:
conn = psycopg2.connect( conn = psycopg2.connect(
@ -72,6 +85,7 @@ def main():
asyncio.run( asyncio.run(
_entrypoint( _entrypoint(
conn=conn, conn=conn,
detector=detector,
user_agent=args.user_agent, user_agent=args.user_agent,
sites=list(map(Site, set(args.site))), sites=list(map(Site, set(args.site))),
) )

View File

@ -6,7 +6,9 @@ from typing import List
import aiohttp import aiohttp
from bs4 import BeautifulSoup, SoupStrainer, Tag from bs4 import BeautifulSoup, SoupStrainer, Tag
from lingua import LanguageDetector
from app.locale import Locale, lang_to_locale
from app.pipeline import Extractor as BaseExtractor from app.pipeline import Extractor as BaseExtractor
from app.pipeline import Fetcher as BaseFetcher from app.pipeline import Fetcher as BaseFetcher
from app.pipeline import Pipeline as BasePipeline from app.pipeline import Pipeline as BasePipeline
@ -97,16 +99,19 @@ class Fetcher(BaseFetcher):
def _profile_filter(elem: Tag | str | None, attrs={}) -> bool: def _profile_filter(elem: Tag | str | None, attrs={}) -> bool:
if "profile-header-info" in attrs.get("class", ""): for className in [
return True "profile-header-info",
if "profile-card-info" in attrs.get("class", ""): "profile-card-info",
"profile-about",
]:
if className in attrs.get("class", ""):
return True return True
return False return False
class Extractor(BaseExtractor): class Extractor(BaseExtractor):
def __init__(self, fetcher: BaseFetcher, username: str): def __init__(self, fetcher: BaseFetcher, detector: LanguageDetector, username: str):
super().__init__(fetcher, username) super().__init__(fetcher, detector, username)
self.profile_soup = None self.profile_soup = None
try: try:
@ -164,9 +169,19 @@ class Extractor(BaseExtractor):
except ValueError: except ValueError:
return None return None
def get_languages(self) -> List[str] | None: def get_languages(self) -> List[Locale] | None:
# TODO: Extract using huggingface model. if self.profile_soup is None:
return None return None
about = self.profile_soup.find("div", class_="profile-about")
if not isinstance(about, Tag):
return None
detected = self.detector.detect_language_of(about.text)
if detected is None:
return None
code = lang_to_locale.get(detected)
if code is None:
return None
return [code]
def get_rapid(self) -> int | None: def get_rapid(self) -> int | None:
return self.stats_json.get("rapid", {}).get("rating") return self.stats_json.get("rapid", {}).get("rating")
@ -182,5 +197,7 @@ class Pipeline(BasePipeline):
def get_fetcher(self, session: aiohttp.ClientSession): def get_fetcher(self, session: aiohttp.ClientSession):
return Fetcher(session) return Fetcher(session)
def get_extractor(self, fetcher: BaseFetcher, username: str): def get_extractor(
return Extractor(fetcher, username) self, fetcher: BaseFetcher, detector: LanguageDetector, username: str
):
return Extractor(fetcher, detector, username)

View File

@ -4,7 +4,8 @@ from typing import List, Literal
from typing_extensions import TypedDict from typing_extensions import TypedDict
from app.types import Site, Title, code_to_lang from app.locale import Locale, locale_to_str, native_to_locale
from app.types import Site, Title
SCHEMA_NAME = "coach_scraper" SCHEMA_NAME = "coach_scraper"
MAIN_TABLE_NAME = "export" MAIN_TABLE_NAME = "export"
@ -41,7 +42,7 @@ class Row(TypedDict, total=False):
# The FIDE title assigned to the coach on the source siste. # The FIDE title assigned to the coach on the source siste.
title: Title title: Title
# The list of languages the coach is fluent in. # The list of languages the coach is fluent in.
languages: List[str] languages: List[Locale]
# Rapid rating relative to the site they were sourced from. # Rapid rating relative to the site they were sourced from.
rapid: int rapid: int
# Blitz rating relative to the site they were sourced from. # Blitz rating relative to the site they were sourced from.
@ -55,7 +56,7 @@ def load_languages(conn):
cursor = None cursor = None
try: try:
cursor = conn.cursor() cursor = conn.cursor()
for pos, (code, name) in enumerate(list(code_to_lang.items())): for pos, (name, loc) in enumerate(list(native_to_locale.items())):
cursor.execute( cursor.execute(
f""" f"""
INSERT INTO {SCHEMA_NAME}.{LANG_TABLE_NAME} INSERT INTO {SCHEMA_NAME}.{LANG_TABLE_NAME}
@ -67,7 +68,7 @@ def load_languages(conn):
DO UPDATE SET DO UPDATE SET
name = EXCLUDED.name; name = EXCLUDED.name;
""", """,
[code, name, pos], [locale_to_str(loc), name, pos],
) )
conn.commit() conn.commit()
finally: finally:
@ -157,7 +158,7 @@ def upsert_row(conn, row: Row):
row.get("name"), row.get("name"),
row.get("image_url"), row.get("image_url"),
row["title"].value if "title" in row else None, row["title"].value if "title" in row else None,
row.get("languages", []), list(map(locale_to_str, row.get("languages", []))),
row.get("rapid"), row.get("rapid"),
row.get("blitz"), row.get("blitz"),
row.get("bullet"), row.get("bullet"),

View File

@ -5,11 +5,13 @@ from typing import List
import aiohttp import aiohttp
from bs4 import BeautifulSoup, SoupStrainer, Tag from bs4 import BeautifulSoup, SoupStrainer, Tag
from lingua import LanguageDetector
from app.locale import Locale, native_to_locale
from app.pipeline import Extractor as BaseExtractor from app.pipeline import Extractor as BaseExtractor
from app.pipeline import Fetcher as BaseFetcher from app.pipeline import Fetcher as BaseFetcher
from app.pipeline import Pipeline as BasePipeline from app.pipeline import Pipeline as BasePipeline
from app.types import Site, Title, lang_to_code from app.types import Site, Title
# The number of pages we will at most iterate through. This number was # The number of pages we will at most iterate through. This number was
# determined by going to https://lichess.org/coach/all/all/alphabetical # determined by going to https://lichess.org/coach/all/all/alphabetical
@ -113,8 +115,8 @@ def _stats_filter(elem: Tag | str | None, attrs={}) -> bool:
class Extractor(BaseExtractor): class Extractor(BaseExtractor):
def __init__(self, fetcher: BaseFetcher, username: str): def __init__(self, fetcher: BaseFetcher, detector: LanguageDetector, username: str):
super().__init__(fetcher, username) super().__init__(fetcher, detector, username)
self.profile_soup = None self.profile_soup = None
try: try:
@ -175,7 +177,7 @@ class Extractor(BaseExtractor):
except ValueError: except ValueError:
return None return None
def get_languages(self) -> List[str] | None: def get_languages(self) -> List[Locale] | None:
if self.profile_soup is None: if self.profile_soup is None:
return None return None
tr = self.profile_soup.find("tr", class_="languages") tr = self.profile_soup.find("tr", class_="languages")
@ -187,8 +189,8 @@ class Extractor(BaseExtractor):
codes = [] codes = []
for lang in [s.strip() for s in tr.get_text().split(",")]: for lang in [s.strip() for s in tr.get_text().split(",")]:
if lang in lang_to_code: if lang in native_to_locale:
codes.append(lang_to_code[lang]) codes.append(native_to_locale[lang])
return codes return codes
def get_rapid(self) -> int | None: def get_rapid(self) -> int | None:
@ -225,5 +227,7 @@ class Pipeline(BasePipeline):
def get_fetcher(self, session: aiohttp.ClientSession): def get_fetcher(self, session: aiohttp.ClientSession):
return Fetcher(session) return Fetcher(session)
def get_extractor(self, fetcher: BaseFetcher, username: str): def get_extractor(
return Extractor(fetcher, username) self, fetcher: BaseFetcher, detector: LanguageDetector, username: str
):
return Extractor(fetcher, detector, username)

154
app/locale.py Normal file
View File

@ -0,0 +1,154 @@
import enum
from collections import OrderedDict
from typing import Dict
from lingua import Language
class Locale(enum.Enum):
"""Maps {language}_{country} to the name of the langage in said language."""
en_GB = "English"
af_ZA = "Afrikaans"
an_ES = "Aragonés"
ar_SA = "العربية"
as_IN = "অসমীয়া"
av_DA = "авар мацӀ"
az_AZ = "Azərbaycanca"
be_BY = "Беларуская"
bg_BG = "български език"
bn_BD = "বাংলা"
br_FR = "Brezhoneg"
bs_BA = "Bosanski"
ca_ES = "Català, valencià"
ckb_IR = "کوردی سۆرانی"
co_FR = "Corsu"
cs_CZ = "Čeština"
cv_CU = "чӑваш чӗлхи"
cy_GB = "Cymraeg"
da_DK = "Dansk"
de_DE = "Deutsch"
el_GR = "Ελληνικά"
en_US = "English (US)"
eo_UY = "Esperanto"
es_ES = "Español"
et_EE = "Eesti keel"
eu_ES = "Euskara"
fa_IR = "فارسی"
fi_FI = "Suomen kieli"
fo_FO = "Føroyskt"
fr_FR = "Français"
frp_IT = "Arpitan"
fy_NL = "Frysk"
ga_IE = "Gaeilge"
gd_GB = "Gàidhlig"
gl_ES = "Galego"
gsw_CH = "Schwizerdütsch"
gu_IN = "ગુજરાતી"
he_IL = "עִבְרִית"
hi_IN = "हिन्दी, हिंदी"
hr_HR = "Hrvatski"
hu_HU = "Magyar"
hy_AM = "Հայերեն"
ia_IA = "Interlingua"
id_ID = "Bahasa Indonesia"
io_EN = "Ido"
is_IS = "Íslenska"
it_IT = "Italiano"
ja_JP = "日本語"
jbo_EN = "Lojban"
jv_ID = "Basa Jawa"
ka_GE = "ქართული"
kab_DZ = "Taqvaylit"
kk_KZ = "қазақша"
kmr_TR = "Kurdî (Kurmancî)"
kn_IN = "ಕನ್ನಡ"
ko_KR = "한국어"
ky_KG = "кыргызча"
la_LA = "Lingua Latina"
lb_LU = "Lëtzebuergesch"
lt_LT = "Lietuvių kalba"
lv_LV = "Latviešu valoda"
mg_MG = "Fiteny malagasy"
mk_MK = "македонски јази"
ml_IN = "മലയാളം"
mn_MN = "монгол"
mr_IN = "मराठी"
ms_MY = "Melayu"
nb_NO = "Norsk bokmål"
ne_NP = "नेपाली"
nl_NL = "Nederlands"
nn_NO = "Norsk nynorsk"
pi_IN = "पालि"
pl_PL = "Polski"
ps_AF = "پښتو"
pt_PT = "Português"
pt_BR = "Português (BR)"
ro_RO = "Română"
ru_RU = "русский язык"
ry_UA = "Русинська бисїда"
sa_IN = "संस्कृत"
sk_SK = "Slovenčina"
sl_SI = "Slovenščina"
sq_AL = "Shqip"
sr_SP = "Српски језик"
sv_SE = "Svenska"
sw_KE = "Kiswahili"
ta_IN = "தமிழ்"
tg_TJ = "тоҷикӣ"
th_TH = "ไทย"
tk_TM = "Türkmençe"
tl_PH = "Tagalog"
tp_TP = "Toki pona"
tr_TR = "Türkçe"
uk_UA = "українська"
ur_PK = "اُردُو"
uz_UZ = "oʻzbekcha"
vi_VN = "Tiếng Việt"
yo_NG = "Yorùbá"
zh_CN = "中文"
zh_TW = "繁體中文"
zu_ZA = "isiZulu"
def locale_to_str(loc: Locale) -> str:
return loc.name.replace("_", "-")
# Uses the name of the language (in said language) as the key.
native_to_locale: OrderedDict[str, Locale] = OrderedDict(
[(loc.value, loc) for loc in Locale]
)
# Uses an inferred/detected language as the key. Mapping was manually created
# using https://github.com/pemistahl/lingua-rs/blob/main/src/isocode.rs#L40 as
# a reference.
lang_to_locale: Dict[Language, Locale] = {
Language.CHINESE: Locale.zh_CN,
Language.CROATIAN: Locale.hr_HR,
Language.DANISH: Locale.da_DK,
Language.DUTCH: Locale.nl_NL,
Language.ENGLISH: Locale.en_GB,
Language.FINNISH: Locale.fi_FI,
Language.FRENCH: Locale.fr_FR,
Language.GERMAN: Locale.de_DE,
Language.HUNGARIAN: Locale.hu_HU,
Language.ITALIAN: Locale.it_IT,
Language.KOREAN: Locale.ko_KR,
Language.LATIN: Locale.la_LA,
Language.MALAY: Locale.ms_MY,
Language.PERSIAN: Locale.fa_IR,
Language.POLISH: Locale.pl_PL,
Language.PORTUGUESE: Locale.pt_PT,
Language.ROMANIAN: Locale.ro_RO,
Language.RUSSIAN: Locale.ru_RU,
Language.SLOVENE: Locale.sl_SI,
Language.SPANISH: Locale.es_ES,
Language.SWAHILI: Locale.sw_KE,
Language.SWEDISH: Locale.sv_SE,
Language.TAGALOG: Locale.tl_PH,
Language.TURKISH: Locale.tr_TR,
Language.UKRAINIAN: Locale.uk_UA,
Language.VIETNAMESE: Locale.vi_VN,
Language.YORUBA: Locale.yo_NG,
}

View File

@ -3,8 +3,10 @@ import os.path
from typing import Any, List, Tuple from typing import Any, List, Tuple
import aiohttp import aiohttp
from lingua import LanguageDetector
from app.database import Row, RowKey, upsert_row from app.database import Row, RowKey, upsert_row
from app.locale import Locale
from app.types import Site, Title from app.types import Site, Title
@ -94,8 +96,9 @@ def _insert(row: Row, key: RowKey, value: Any):
class Extractor: class Extractor:
def __init__(self, fetcher: Fetcher, username: str): def __init__(self, fetcher: Fetcher, detector: LanguageDetector, username: str):
self.fetcher = fetcher self.fetcher = fetcher
self.detector = detector
self.username = username self.username = username
def get_name(self) -> str | None: def get_name(self) -> str | None:
@ -107,7 +110,7 @@ class Extractor:
def get_title(self) -> Title | None: def get_title(self) -> Title | None:
raise NotImplementedError() raise NotImplementedError()
def get_languages(self) -> List[str] | None: def get_languages(self) -> List[Locale] | None:
raise NotImplementedError() raise NotImplementedError()
def get_rapid(self) -> int | None: def get_rapid(self) -> int | None:
@ -157,10 +160,14 @@ class Pipeline:
def get_fetcher(self, session: aiohttp.ClientSession) -> Fetcher: def get_fetcher(self, session: aiohttp.ClientSession) -> Fetcher:
raise NotImplementedError() raise NotImplementedError()
def get_extractor(self, fetcher: Fetcher, username: str) -> Extractor: def get_extractor(
self, fetcher: Fetcher, detector: LanguageDetector, username: str
) -> Extractor:
raise NotImplementedError() raise NotImplementedError()
async def process(self, conn, session: aiohttp.ClientSession): async def process(
self, conn, detector: LanguageDetector, session: aiohttp.ClientSession
):
fetcher = self.get_fetcher(session) fetcher = self.get_fetcher(session)
queue: asyncio.Queue = asyncio.Queue() queue: asyncio.Queue = asyncio.Queue()
@ -180,7 +187,7 @@ class Pipeline:
page_no += 1 page_no += 1
for username in usernames or []: for username in usernames or []:
await fetcher._download_user_files(username) await fetcher._download_user_files(username)
extractor = self.get_extractor(fetcher, username) extractor = self.get_extractor(fetcher, detector, username)
queue.put_nowait((conn, extractor)) queue.put_nowait((conn, extractor))
# Wait until the queue is fully processed. # Wait until the queue is fully processed.

View File

@ -1,5 +1,4 @@
import enum import enum
from collections import OrderedDict
class Site(enum.Enum): class Site(enum.Enum):
@ -18,115 +17,3 @@ class Title(enum.Enum):
WFM = "WFM" WFM = "WFM"
WCM = "WCM" WCM = "WCM"
WNM = "WNM" WNM = "WNM"
class Language(enum.Enum):
en_GB = "English"
af_ZA = "Afrikaans"
an_ES = "Aragonés"
ar_SA = "العربية"
as_IN = "অসমীয়া"
av_DA = "авар мацӀ"
az_AZ = "Azərbaycanca"
be_BY = "Беларуская"
bg_BG = "български език"
bn_BD = "বাংলা"
br_FR = "Brezhoneg"
bs_BA = "Bosanski"
ca_ES = "Català, valencià"
ckb_IR = "کوردی سۆرانی"
co_FR = "Corsu"
cs_CZ = "Čeština"
cv_CU = "чӑваш чӗлхи"
cy_GB = "Cymraeg"
da_DK = "Dansk"
de_DE = "Deutsch"
el_GR = "Ελληνικά"
en_US = "English (US)"
eo_UY = "Esperanto"
es_ES = "Español"
et_EE = "Eesti keel"
eu_ES = "Euskara"
fa_IR = "فارسی"
fi_FI = "Suomen kieli"
fo_FO = "Føroyskt"
fr_FR = "Français"
frp_IT = "Arpitan"
fy_NL = "Frysk"
ga_IE = "Gaeilge"
gd_GB = "Gàidhlig"
gl_ES = "Galego"
gsw_CH = "Schwizerdütsch"
gu_IN = "ગુજરાતી"
he_IL = "עִבְרִית"
hi_IN = "हिन्दी, हिंदी"
hr_HR = "Hrvatski"
hu_HU = "Magyar"
hy_AM = "Հայերեն"
ia_IA = "Interlingua"
id_ID = "Bahasa Indonesia"
io_EN = "Ido"
is_IS = "Íslenska"
it_IT = "Italiano"
ja_JP = "日本語"
jbo_EN = "Lojban"
jv_ID = "Basa Jawa"
ka_GE = "ქართული"
kab_DZ = "Taqvaylit"
kk_KZ = "қазақша"
kmr_TR = "Kurdî (Kurmancî)"
kn_IN = "ಕನ್ನಡ"
ko_KR = "한국어"
ky_KG = "кыргызча"
la_LA = "Lingua Latina"
lb_LU = "Lëtzebuergesch"
lt_LT = "Lietuvių kalba"
lv_LV = "Latviešu valoda"
mg_MG = "Fiteny malagasy"
mk_MK = "македонски јази"
ml_IN = "മലയാളം"
mn_MN = "монгол"
mr_IN = "मराठी"
nb_NO = "Norsk bokmål"
ne_NP = "नेपाली"
nl_NL = "Nederlands"
nn_NO = "Norsk nynorsk"
pi_IN = "पालि"
pl_PL = "Polski"
ps_AF = "پښتو"
pt_PT = "Português"
pt_BR = "Português (BR)"
ro_RO = "Română"
ru_RU = "русский язык"
ry_UA = "Русинська бисїда"
sa_IN = "संस्कृत"
sk_SK = "Slovenčina"
sl_SI = "Slovenščina"
sq_AL = "Shqip"
sr_SP = "Српски језик"
sv_SE = "Svenska"
sw_KE = "Kiswahili"
ta_IN = "தமிழ்"
tg_TJ = "тоҷикӣ"
th_TH = "ไทย"
tk_TM = "Türkmençe"
tl_PH = "Tagalog"
tp_TP = "Toki pona"
tr_TR = "Türkçe"
uk_UA = "українська"
ur_PK = "اُردُو"
uz_UZ = "oʻzbekcha"
vi_VN = "Tiếng Việt"
yo_NG = "Yorùbá"
zh_CN = "中文"
zh_TW = "繁體中文"
zu_ZA = "isiZulu"
code_to_lang = OrderedDict(
[(lang.name.replace("_", "-"), lang.value) for lang in Language]
)
lang_to_code = OrderedDict(
[(lang.value, lang.name.replace("_", "-")) for lang in Language]
)

64
poetry.lock generated
View File

@ -226,6 +226,68 @@ files = [
{file = "idna-3.6.tar.gz", hash = "sha256:9ecdbbd083b06798ae1e86adcbfe8ab1479cf864e4ee30fe4e46a003d12491ca"}, {file = "idna-3.6.tar.gz", hash = "sha256:9ecdbbd083b06798ae1e86adcbfe8ab1479cf864e4ee30fe4e46a003d12491ca"},
] ]
[[package]]
name = "lingua-language-detector"
version = "2.0.1"
description = "An accurate natural language detection library, suitable for short text and mixed-language text"
optional = false
python-versions = ">=3.8"
files = [
{file = "lingua_language_detector-2.0.1-cp310-cp310-macosx_10_7_x86_64.whl", hash = "sha256:6a8d473de427e7eff54a5eb68fd38d75053dfc7b59e256a7233cc7409435d8ce"},
{file = "lingua_language_detector-2.0.1-cp310-cp310-macosx_11_0_arm64.whl", hash = "sha256:1b4d67769fe25d903dbc41a63a5d1707913ddd3e574b072c84001cef8472ead0"},
{file = "lingua_language_detector-2.0.1-cp310-cp310-manylinux_2_12_i686.manylinux2010_i686.whl", hash = "sha256:bcb9c8d273637ca69fa242184c8525c4bc56075d4b174d4adc68d81b11b814be"},
{file = "lingua_language_detector-2.0.1-cp310-cp310-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:0c42e66f3b20e97f4e4b442b3f2977b5aefb04968535fc0a78ccd4137db5ef34"},
{file = "lingua_language_detector-2.0.1-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:9563cc878cf9b45d78d9effe9b3b101cb3098b932f2921023e92f984bd5f5120"},
{file = "lingua_language_detector-2.0.1-cp310-none-win32.whl", hash = "sha256:2cb95b8e8abb40703b0705321cd3394033812812bc18d559a9a26604b241a663"},
{file = "lingua_language_detector-2.0.1-cp310-none-win_amd64.whl", hash = "sha256:b89d995ac9974b9190036585cbd0b70e6117a2d09f2150a898b332abd1a57636"},
{file = "lingua_language_detector-2.0.1-cp311-cp311-macosx_10_7_x86_64.whl", hash = "sha256:06db94dbb5b492924a536dbf97c247b228a3fcb00fe5bef9ca83b6b1aa959ca8"},
{file = "lingua_language_detector-2.0.1-cp311-cp311-macosx_11_0_arm64.whl", hash = "sha256:4d9dcf69a304819d5d2716943de980cccf140168e7d0243925bb98dd0c661600"},
{file = "lingua_language_detector-2.0.1-cp311-cp311-manylinux_2_12_i686.manylinux2010_i686.whl", hash = "sha256:7f66ed5cd572d07a1a853017f41bfd94e84e3081cc39690188adfa97337b199f"},
{file = "lingua_language_detector-2.0.1-cp311-cp311-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:e89c39a2a324d265c44f8ee7c3ffc499506d6307bb484ab1d9565f2d5857697b"},
{file = "lingua_language_detector-2.0.1-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:ae66cae403e36134558e929f8ba0d076be45e968f9fc7bab63869b19b34ddac1"},
{file = "lingua_language_detector-2.0.1-cp311-none-win32.whl", hash = "sha256:d92f1e0b30233dc1091cb28fe00e0dba8255be609b2337c0dab7f71a2f9b5086"},
{file = "lingua_language_detector-2.0.1-cp311-none-win_amd64.whl", hash = "sha256:60c1d0136c242097c58874a74d55b26e0c98ed81e44724d6426411b4bf585566"},
{file = "lingua_language_detector-2.0.1-cp312-cp312-macosx_10_7_x86_64.whl", hash = "sha256:5531fbc6881149ce905e511e278ac97ed0e92d64b91b99910204058abe057769"},
{file = "lingua_language_detector-2.0.1-cp312-cp312-macosx_11_0_arm64.whl", hash = "sha256:6193a0eac1534593427548d03cbcd82bcac040b3344a2abe67654c15a023c196"},
{file = "lingua_language_detector-2.0.1-cp312-cp312-manylinux_2_12_i686.manylinux2010_i686.whl", hash = "sha256:32644b03f107b9cee4a0884096acc21f1900dbec7951ede7c154d19d2a1a6f04"},
{file = "lingua_language_detector-2.0.1-cp312-cp312-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:c9890f5227a7a767253248dd64fb651e4405256d8376f39216a6ff6e2702a0ee"},
{file = "lingua_language_detector-2.0.1-cp312-cp312-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:1d286e14105794300217513326e2d8e82911c5efe773c6a37991336fbd63f9e2"},
{file = "lingua_language_detector-2.0.1-cp312-none-win32.whl", hash = "sha256:ec88318a16467fea3457208ff1052dbd72cc486f9f07caeb4325fa76cab9044c"},
{file = "lingua_language_detector-2.0.1-cp312-none-win_amd64.whl", hash = "sha256:5fcd53f1a2dc84a00a79c56ca59e4580cfbbb829e5e56249835d31444cf1f9ea"},
{file = "lingua_language_detector-2.0.1-cp38-cp38-macosx_10_7_x86_64.whl", hash = "sha256:8efa10f188c40e10016e4f0d2a34d743e1555ddf4bd47553f6dd420f673c0e78"},
{file = "lingua_language_detector-2.0.1-cp38-cp38-macosx_11_0_arm64.whl", hash = "sha256:ca3f11f372658cda18b309998596de6ffceaf6461e376da9c2861ac9d8b7efa3"},
{file = "lingua_language_detector-2.0.1-cp38-cp38-manylinux_2_12_i686.manylinux2010_i686.whl", hash = "sha256:5599752ded097132d2a59b2764287240e72e2e9859bb69f43b2957f12d69ac6f"},
{file = "lingua_language_detector-2.0.1-cp38-cp38-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:27c1e422e5355d6931f82a1da52f822048be68c5f74d8b997c7d9f9617002e6a"},
{file = "lingua_language_detector-2.0.1-cp38-cp38-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:e489cb648684c5bb5a0d6cab2f602fe0bda7e72921f327ba7350e30c60edaa43"},
{file = "lingua_language_detector-2.0.1-cp38-none-win32.whl", hash = "sha256:5d9f7f4b47c5bde5ff85089bcc4625f2f1a17e7677ec15dadb272b6e4b42c274"},
{file = "lingua_language_detector-2.0.1-cp38-none-win_amd64.whl", hash = "sha256:493908b45ff237c8c776d1d2b688b113999667b841f284bae862af5f7f526b4f"},
{file = "lingua_language_detector-2.0.1-cp39-cp39-macosx_10_7_x86_64.whl", hash = "sha256:e0c269a6099e2a637e2b8a312870792be2bb047abc6e7646122e498a159cc0b4"},
{file = "lingua_language_detector-2.0.1-cp39-cp39-macosx_11_0_arm64.whl", hash = "sha256:7fcd61ec1c771f452346bc57d756fd079a89e9134b93ef94f5f166e27cda6385"},
{file = "lingua_language_detector-2.0.1-cp39-cp39-manylinux_2_12_i686.manylinux2010_i686.whl", hash = "sha256:3728012ace5b8e799df27d1e9272396bc5829a609f1aa799109a22e07e1034c2"},
{file = "lingua_language_detector-2.0.1-cp39-cp39-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:0619216bed8746501f0c3c4294ebe1bd55a2d9d72083e68dc0d954f9af5ab12e"},
{file = "lingua_language_detector-2.0.1-cp39-cp39-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:cde1651655cac14a3b9ea9c3319a700de66a1f73cd07d63e9e653fc84d02f11e"},
{file = "lingua_language_detector-2.0.1-cp39-none-win32.whl", hash = "sha256:dc1cfcaea24d71d657cf75fb54b593d3db14cf3a19aef6cd1017d0982a407b4e"},
{file = "lingua_language_detector-2.0.1-cp39-none-win_amd64.whl", hash = "sha256:71d0662458e025aae7a6cbb0cc4e8169ea75e5b615a85e3310964748449896dd"},
{file = "lingua_language_detector-2.0.1-pp310-pypy310_pp73-macosx_10_7_x86_64.whl", hash = "sha256:42907729b8a343e91fd3d6e61b6add116c513354b6a88072245a4a1d21a18fb9"},
{file = "lingua_language_detector-2.0.1-pp310-pypy310_pp73-macosx_11_0_arm64.whl", hash = "sha256:ed60cb9b9fead792ec97544a3888e7aeda2ae34503c6252a92df599e006d7253"},
{file = "lingua_language_detector-2.0.1-pp310-pypy310_pp73-manylinux_2_12_i686.manylinux2010_i686.whl", hash = "sha256:fa1a6ab7bec65695650e801e23648742e92651315cf1443e9002a1b88ea2ac41"},
{file = "lingua_language_detector-2.0.1-pp310-pypy310_pp73-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:be1bf4faa1e645876c8dfc29b37f8f567451b48d43f45c916aba13b946d5069c"},
{file = "lingua_language_detector-2.0.1-pp310-pypy310_pp73-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:31c7e52f4e0ca47b787fb5e05560a9e3d7a6bc10488a35a31248f82647314957"},
{file = "lingua_language_detector-2.0.1-pp38-pypy38_pp73-macosx_10_7_x86_64.whl", hash = "sha256:c91fedd566b07ac1dc6c091bde6d69dae5c12c90e3e3c1d74fe29f76852e775a"},
{file = "lingua_language_detector-2.0.1-pp38-pypy38_pp73-macosx_11_0_arm64.whl", hash = "sha256:16f5fecb6eba86bc094a734ba4bd0603b5bcc580a70c07d659ac2aec14f018ac"},
{file = "lingua_language_detector-2.0.1-pp38-pypy38_pp73-manylinux_2_12_i686.manylinux2010_i686.whl", hash = "sha256:f21abb598efa1de408275e640a22e8b967a43a9fbb0f32302a206efabf3ca0bc"},
{file = "lingua_language_detector-2.0.1-pp38-pypy38_pp73-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:8906262b3866ae4473b5d5f08703addf0b6b12bc9c9aefcf2e2d855c1496d47a"},
{file = "lingua_language_detector-2.0.1-pp38-pypy38_pp73-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:cc9a6d87e766bf6d2d014986fda13fb8aa6c5602811e856f5555dd8128bd4f2e"},
{file = "lingua_language_detector-2.0.1-pp39-pypy39_pp73-macosx_10_7_x86_64.whl", hash = "sha256:650a6df0c1c7c7d650c9872be1f8e4b6ba32ff363d8184f60ee80441cffad779"},
{file = "lingua_language_detector-2.0.1-pp39-pypy39_pp73-macosx_11_0_arm64.whl", hash = "sha256:62d7e29b9e85289bdf80a02a6ef9fcc7b943a5d123eaafa313aad4cfe7b48767"},
{file = "lingua_language_detector-2.0.1-pp39-pypy39_pp73-manylinux_2_12_i686.manylinux2010_i686.whl", hash = "sha256:ee213e9c0c4a256c2bbe2c1f074c9ac122073045049b6a7f999b507da185dea3"},
{file = "lingua_language_detector-2.0.1-pp39-pypy39_pp73-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:06787b555ad9ddae613c0850b2aff991ea3e87c1ef714166d9691f90d1ad366c"},
{file = "lingua_language_detector-2.0.1-pp39-pypy39_pp73-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:d983db8f8f0afbf2b4a796e6c8790490868c024492e59ec8b2fca30599c84760"},
]
[package.extras]
test = ["pytest (==7.4.3)"]
[[package]] [[package]]
name = "lxml" name = "lxml"
version = "4.9.3" version = "4.9.3"
@ -555,4 +617,4 @@ multidict = ">=4.0"
[metadata] [metadata]
lock-version = "2.0" lock-version = "2.0"
python-versions = "^3.11" python-versions = "^3.11"
content-hash = "4a667c2246018c259f23d7fe07102b3b680693beccbc685467b1e29c2c3e2db6" content-hash = "d6e80d9fee5ef164d85b7b32db698755af31847470468e3c35d655d5f44f95b4"

View File

@ -11,6 +11,7 @@ beautifulsoup4 = "^4.12.2"
aiohttp = "^3.8.6" aiohttp = "^3.8.6"
lxml = "^4.9.3" lxml = "^4.9.3"
psycopg2 = "^2.9.9" psycopg2 = "^2.9.9"
lingua-language-detector = "^2.0.1"
[build-system] [build-system]
requires = ["poetry-core"] requires = ["poetry-core"]
@ -20,5 +21,5 @@ build-backend = "poetry.core.masonry.api"
app = "app.__main__:main" app = "app.__main__:main"
[[tool.mypy.overrides]] [[tool.mypy.overrides]]
module = "aiohttp" module = ["aiohttp", "lingua"]
ignore_missing_imports = true ignore_missing_imports = true