Add language detection for chesscom profiles.

main
Joshua Potter 2023-12-06 20:53:54 -07:00
parent f2fd289225
commit 44a18fc59c
9 changed files with 296 additions and 149 deletions

View File

@ -4,6 +4,7 @@ from typing import List
import aiohttp
import psycopg2
from lingua import LanguageDetector, LanguageDetectorBuilder
from app.chesscom import Pipeline as ChesscomPipeline
from app.database import backup_database, load_languages
@ -14,21 +15,31 @@ from app.types import Site
WORKER_COUNT = 10
async def _process(site: Site, conn, session: aiohttp.ClientSession):
async def _process(
site: Site, conn, detector: LanguageDetector, session: aiohttp.ClientSession
):
if site == Site.CHESSCOM:
await ChesscomPipeline(worker_count=WORKER_COUNT).process(conn, session)
await ChesscomPipeline(worker_count=WORKER_COUNT).process(
conn, detector, session
)
elif site == Site.LICHESS:
await LichessPipeline(worker_count=WORKER_COUNT).process(conn, session)
await LichessPipeline(worker_count=WORKER_COUNT).process(
conn, detector, session
)
else:
assert False, f"Encountered unknown site: {site}."
async def _entrypoint(conn, user_agent: str, sites: List[Site]):
async def _entrypoint(
conn, detector: LanguageDetector, user_agent: str, sites: List[Site]
):
"""Top-level entrypoint that dispatches a pipeline per requested site."""
async with aiohttp.ClientSession(
headers={"User-Agent": f"BoardWise coach-scraper ({user_agent})"}
) as session:
await asyncio.gather(*[_process(site, conn, session) for site in sites])
await asyncio.gather(
*[_process(site, conn, detector, session) for site in sites]
)
def main():
@ -58,6 +69,8 @@ def main():
args = parser.parse_args()
detector = LanguageDetectorBuilder.from_all_languages().build()
conn = None
try:
conn = psycopg2.connect(
@ -72,6 +85,7 @@ def main():
asyncio.run(
_entrypoint(
conn=conn,
detector=detector,
user_agent=args.user_agent,
sites=list(map(Site, set(args.site))),
)

View File

@ -6,7 +6,9 @@ from typing import List
import aiohttp
from bs4 import BeautifulSoup, SoupStrainer, Tag
from lingua import LanguageDetector
from app.locale import Locale, lang_to_locale
from app.pipeline import Extractor as BaseExtractor
from app.pipeline import Fetcher as BaseFetcher
from app.pipeline import Pipeline as BasePipeline
@ -97,16 +99,19 @@ class Fetcher(BaseFetcher):
def _profile_filter(elem: Tag | str | None, attrs={}) -> bool:
if "profile-header-info" in attrs.get("class", ""):
return True
if "profile-card-info" in attrs.get("class", ""):
for className in [
"profile-header-info",
"profile-card-info",
"profile-about",
]:
if className in attrs.get("class", ""):
return True
return False
class Extractor(BaseExtractor):
def __init__(self, fetcher: BaseFetcher, username: str):
super().__init__(fetcher, username)
def __init__(self, fetcher: BaseFetcher, detector: LanguageDetector, username: str):
super().__init__(fetcher, detector, username)
self.profile_soup = None
try:
@ -164,9 +169,19 @@ class Extractor(BaseExtractor):
except ValueError:
return None
def get_languages(self) -> List[str] | None:
# TODO: Extract using huggingface model.
def get_languages(self) -> List[Locale] | None:
if self.profile_soup is None:
return None
about = self.profile_soup.find("div", class_="profile-about")
if not isinstance(about, Tag):
return None
detected = self.detector.detect_language_of(about.text)
if detected is None:
return None
code = lang_to_locale.get(detected)
if code is None:
return None
return [code]
def get_rapid(self) -> int | None:
return self.stats_json.get("rapid", {}).get("rating")
@ -182,5 +197,7 @@ class Pipeline(BasePipeline):
def get_fetcher(self, session: aiohttp.ClientSession):
return Fetcher(session)
def get_extractor(self, fetcher: BaseFetcher, username: str):
return Extractor(fetcher, username)
def get_extractor(
self, fetcher: BaseFetcher, detector: LanguageDetector, username: str
):
return Extractor(fetcher, detector, username)

View File

@ -4,7 +4,8 @@ from typing import List, Literal
from typing_extensions import TypedDict
from app.types import Site, Title, code_to_lang
from app.locale import Locale, locale_to_str, native_to_locale
from app.types import Site, Title
SCHEMA_NAME = "coach_scraper"
MAIN_TABLE_NAME = "export"
@ -41,7 +42,7 @@ class Row(TypedDict, total=False):
# The FIDE title assigned to the coach on the source siste.
title: Title
# The list of languages the coach is fluent in.
languages: List[str]
languages: List[Locale]
# Rapid rating relative to the site they were sourced from.
rapid: int
# Blitz rating relative to the site they were sourced from.
@ -55,7 +56,7 @@ def load_languages(conn):
cursor = None
try:
cursor = conn.cursor()
for pos, (code, name) in enumerate(list(code_to_lang.items())):
for pos, (name, loc) in enumerate(list(native_to_locale.items())):
cursor.execute(
f"""
INSERT INTO {SCHEMA_NAME}.{LANG_TABLE_NAME}
@ -67,7 +68,7 @@ def load_languages(conn):
DO UPDATE SET
name = EXCLUDED.name;
""",
[code, name, pos],
[locale_to_str(loc), name, pos],
)
conn.commit()
finally:
@ -157,7 +158,7 @@ def upsert_row(conn, row: Row):
row.get("name"),
row.get("image_url"),
row["title"].value if "title" in row else None,
row.get("languages", []),
list(map(locale_to_str, row.get("languages", []))),
row.get("rapid"),
row.get("blitz"),
row.get("bullet"),

View File

@ -5,11 +5,13 @@ from typing import List
import aiohttp
from bs4 import BeautifulSoup, SoupStrainer, Tag
from lingua import LanguageDetector
from app.locale import Locale, native_to_locale
from app.pipeline import Extractor as BaseExtractor
from app.pipeline import Fetcher as BaseFetcher
from app.pipeline import Pipeline as BasePipeline
from app.types import Site, Title, lang_to_code
from app.types import Site, Title
# The number of pages we will at most iterate through. This number was
# determined by going to https://lichess.org/coach/all/all/alphabetical
@ -113,8 +115,8 @@ def _stats_filter(elem: Tag | str | None, attrs={}) -> bool:
class Extractor(BaseExtractor):
def __init__(self, fetcher: BaseFetcher, username: str):
super().__init__(fetcher, username)
def __init__(self, fetcher: BaseFetcher, detector: LanguageDetector, username: str):
super().__init__(fetcher, detector, username)
self.profile_soup = None
try:
@ -175,7 +177,7 @@ class Extractor(BaseExtractor):
except ValueError:
return None
def get_languages(self) -> List[str] | None:
def get_languages(self) -> List[Locale] | None:
if self.profile_soup is None:
return None
tr = self.profile_soup.find("tr", class_="languages")
@ -187,8 +189,8 @@ class Extractor(BaseExtractor):
codes = []
for lang in [s.strip() for s in tr.get_text().split(",")]:
if lang in lang_to_code:
codes.append(lang_to_code[lang])
if lang in native_to_locale:
codes.append(native_to_locale[lang])
return codes
def get_rapid(self) -> int | None:
@ -225,5 +227,7 @@ class Pipeline(BasePipeline):
def get_fetcher(self, session: aiohttp.ClientSession):
return Fetcher(session)
def get_extractor(self, fetcher: BaseFetcher, username: str):
return Extractor(fetcher, username)
def get_extractor(
self, fetcher: BaseFetcher, detector: LanguageDetector, username: str
):
return Extractor(fetcher, detector, username)

154
app/locale.py Normal file
View File

@ -0,0 +1,154 @@
import enum
from collections import OrderedDict
from typing import Dict
from lingua import Language
class Locale(enum.Enum):
"""Maps {language}_{country} to the name of the langage in said language."""
en_GB = "English"
af_ZA = "Afrikaans"
an_ES = "Aragonés"
ar_SA = "العربية"
as_IN = "অসমীয়া"
av_DA = "авар мацӀ"
az_AZ = "Azərbaycanca"
be_BY = "Беларуская"
bg_BG = "български език"
bn_BD = "বাংলা"
br_FR = "Brezhoneg"
bs_BA = "Bosanski"
ca_ES = "Català, valencià"
ckb_IR = "کوردی سۆرانی"
co_FR = "Corsu"
cs_CZ = "Čeština"
cv_CU = "чӑваш чӗлхи"
cy_GB = "Cymraeg"
da_DK = "Dansk"
de_DE = "Deutsch"
el_GR = "Ελληνικά"
en_US = "English (US)"
eo_UY = "Esperanto"
es_ES = "Español"
et_EE = "Eesti keel"
eu_ES = "Euskara"
fa_IR = "فارسی"
fi_FI = "Suomen kieli"
fo_FO = "Føroyskt"
fr_FR = "Français"
frp_IT = "Arpitan"
fy_NL = "Frysk"
ga_IE = "Gaeilge"
gd_GB = "Gàidhlig"
gl_ES = "Galego"
gsw_CH = "Schwizerdütsch"
gu_IN = "ગુજરાતી"
he_IL = "עִבְרִית"
hi_IN = "हिन्दी, हिंदी"
hr_HR = "Hrvatski"
hu_HU = "Magyar"
hy_AM = "Հայերեն"
ia_IA = "Interlingua"
id_ID = "Bahasa Indonesia"
io_EN = "Ido"
is_IS = "Íslenska"
it_IT = "Italiano"
ja_JP = "日本語"
jbo_EN = "Lojban"
jv_ID = "Basa Jawa"
ka_GE = "ქართული"
kab_DZ = "Taqvaylit"
kk_KZ = "қазақша"
kmr_TR = "Kurdî (Kurmancî)"
kn_IN = "ಕನ್ನಡ"
ko_KR = "한국어"
ky_KG = "кыргызча"
la_LA = "Lingua Latina"
lb_LU = "Lëtzebuergesch"
lt_LT = "Lietuvių kalba"
lv_LV = "Latviešu valoda"
mg_MG = "Fiteny malagasy"
mk_MK = "македонски јази"
ml_IN = "മലയാളം"
mn_MN = "монгол"
mr_IN = "मराठी"
ms_MY = "Melayu"
nb_NO = "Norsk bokmål"
ne_NP = "नेपाली"
nl_NL = "Nederlands"
nn_NO = "Norsk nynorsk"
pi_IN = "पालि"
pl_PL = "Polski"
ps_AF = "پښتو"
pt_PT = "Português"
pt_BR = "Português (BR)"
ro_RO = "Română"
ru_RU = "русский язык"
ry_UA = "Русинська бисїда"
sa_IN = "संस्कृत"
sk_SK = "Slovenčina"
sl_SI = "Slovenščina"
sq_AL = "Shqip"
sr_SP = "Српски језик"
sv_SE = "Svenska"
sw_KE = "Kiswahili"
ta_IN = "தமிழ்"
tg_TJ = "тоҷикӣ"
th_TH = "ไทย"
tk_TM = "Türkmençe"
tl_PH = "Tagalog"
tp_TP = "Toki pona"
tr_TR = "Türkçe"
uk_UA = "українська"
ur_PK = "اُردُو"
uz_UZ = "oʻzbekcha"
vi_VN = "Tiếng Việt"
yo_NG = "Yorùbá"
zh_CN = "中文"
zh_TW = "繁體中文"
zu_ZA = "isiZulu"
def locale_to_str(loc: Locale) -> str:
return loc.name.replace("_", "-")
# Uses the name of the language (in said language) as the key.
native_to_locale: OrderedDict[str, Locale] = OrderedDict(
[(loc.value, loc) for loc in Locale]
)
# Uses an inferred/detected language as the key. Mapping was manually created
# using https://github.com/pemistahl/lingua-rs/blob/main/src/isocode.rs#L40 as
# a reference.
lang_to_locale: Dict[Language, Locale] = {
Language.CHINESE: Locale.zh_CN,
Language.CROATIAN: Locale.hr_HR,
Language.DANISH: Locale.da_DK,
Language.DUTCH: Locale.nl_NL,
Language.ENGLISH: Locale.en_GB,
Language.FINNISH: Locale.fi_FI,
Language.FRENCH: Locale.fr_FR,
Language.GERMAN: Locale.de_DE,
Language.HUNGARIAN: Locale.hu_HU,
Language.ITALIAN: Locale.it_IT,
Language.KOREAN: Locale.ko_KR,
Language.LATIN: Locale.la_LA,
Language.MALAY: Locale.ms_MY,
Language.PERSIAN: Locale.fa_IR,
Language.POLISH: Locale.pl_PL,
Language.PORTUGUESE: Locale.pt_PT,
Language.ROMANIAN: Locale.ro_RO,
Language.RUSSIAN: Locale.ru_RU,
Language.SLOVENE: Locale.sl_SI,
Language.SPANISH: Locale.es_ES,
Language.SWAHILI: Locale.sw_KE,
Language.SWEDISH: Locale.sv_SE,
Language.TAGALOG: Locale.tl_PH,
Language.TURKISH: Locale.tr_TR,
Language.UKRAINIAN: Locale.uk_UA,
Language.VIETNAMESE: Locale.vi_VN,
Language.YORUBA: Locale.yo_NG,
}

View File

@ -3,8 +3,10 @@ import os.path
from typing import Any, List, Tuple
import aiohttp
from lingua import LanguageDetector
from app.database import Row, RowKey, upsert_row
from app.locale import Locale
from app.types import Site, Title
@ -94,8 +96,9 @@ def _insert(row: Row, key: RowKey, value: Any):
class Extractor:
def __init__(self, fetcher: Fetcher, username: str):
def __init__(self, fetcher: Fetcher, detector: LanguageDetector, username: str):
self.fetcher = fetcher
self.detector = detector
self.username = username
def get_name(self) -> str | None:
@ -107,7 +110,7 @@ class Extractor:
def get_title(self) -> Title | None:
raise NotImplementedError()
def get_languages(self) -> List[str] | None:
def get_languages(self) -> List[Locale] | None:
raise NotImplementedError()
def get_rapid(self) -> int | None:
@ -157,10 +160,14 @@ class Pipeline:
def get_fetcher(self, session: aiohttp.ClientSession) -> Fetcher:
raise NotImplementedError()
def get_extractor(self, fetcher: Fetcher, username: str) -> Extractor:
def get_extractor(
self, fetcher: Fetcher, detector: LanguageDetector, username: str
) -> Extractor:
raise NotImplementedError()
async def process(self, conn, session: aiohttp.ClientSession):
async def process(
self, conn, detector: LanguageDetector, session: aiohttp.ClientSession
):
fetcher = self.get_fetcher(session)
queue: asyncio.Queue = asyncio.Queue()
@ -180,7 +187,7 @@ class Pipeline:
page_no += 1
for username in usernames or []:
await fetcher._download_user_files(username)
extractor = self.get_extractor(fetcher, username)
extractor = self.get_extractor(fetcher, detector, username)
queue.put_nowait((conn, extractor))
# Wait until the queue is fully processed.

View File

@ -1,5 +1,4 @@
import enum
from collections import OrderedDict
class Site(enum.Enum):
@ -18,115 +17,3 @@ class Title(enum.Enum):
WFM = "WFM"
WCM = "WCM"
WNM = "WNM"
class Language(enum.Enum):
en_GB = "English"
af_ZA = "Afrikaans"
an_ES = "Aragonés"
ar_SA = "العربية"
as_IN = "অসমীয়া"
av_DA = "авар мацӀ"
az_AZ = "Azərbaycanca"
be_BY = "Беларуская"
bg_BG = "български език"
bn_BD = "বাংলা"
br_FR = "Brezhoneg"
bs_BA = "Bosanski"
ca_ES = "Català, valencià"
ckb_IR = "کوردی سۆرانی"
co_FR = "Corsu"
cs_CZ = "Čeština"
cv_CU = "чӑваш чӗлхи"
cy_GB = "Cymraeg"
da_DK = "Dansk"
de_DE = "Deutsch"
el_GR = "Ελληνικά"
en_US = "English (US)"
eo_UY = "Esperanto"
es_ES = "Español"
et_EE = "Eesti keel"
eu_ES = "Euskara"
fa_IR = "فارسی"
fi_FI = "Suomen kieli"
fo_FO = "Føroyskt"
fr_FR = "Français"
frp_IT = "Arpitan"
fy_NL = "Frysk"
ga_IE = "Gaeilge"
gd_GB = "Gàidhlig"
gl_ES = "Galego"
gsw_CH = "Schwizerdütsch"
gu_IN = "ગુજરાતી"
he_IL = "עִבְרִית"
hi_IN = "हिन्दी, हिंदी"
hr_HR = "Hrvatski"
hu_HU = "Magyar"
hy_AM = "Հայերեն"
ia_IA = "Interlingua"
id_ID = "Bahasa Indonesia"
io_EN = "Ido"
is_IS = "Íslenska"
it_IT = "Italiano"
ja_JP = "日本語"
jbo_EN = "Lojban"
jv_ID = "Basa Jawa"
ka_GE = "ქართული"
kab_DZ = "Taqvaylit"
kk_KZ = "қазақша"
kmr_TR = "Kurdî (Kurmancî)"
kn_IN = "ಕನ್ನಡ"
ko_KR = "한국어"
ky_KG = "кыргызча"
la_LA = "Lingua Latina"
lb_LU = "Lëtzebuergesch"
lt_LT = "Lietuvių kalba"
lv_LV = "Latviešu valoda"
mg_MG = "Fiteny malagasy"
mk_MK = "македонски јази"
ml_IN = "മലയാളം"
mn_MN = "монгол"
mr_IN = "मराठी"
nb_NO = "Norsk bokmål"
ne_NP = "नेपाली"
nl_NL = "Nederlands"
nn_NO = "Norsk nynorsk"
pi_IN = "पालि"
pl_PL = "Polski"
ps_AF = "پښتو"
pt_PT = "Português"
pt_BR = "Português (BR)"
ro_RO = "Română"
ru_RU = "русский язык"
ry_UA = "Русинська бисїда"
sa_IN = "संस्कृत"
sk_SK = "Slovenčina"
sl_SI = "Slovenščina"
sq_AL = "Shqip"
sr_SP = "Српски језик"
sv_SE = "Svenska"
sw_KE = "Kiswahili"
ta_IN = "தமிழ்"
tg_TJ = "тоҷикӣ"
th_TH = "ไทย"
tk_TM = "Türkmençe"
tl_PH = "Tagalog"
tp_TP = "Toki pona"
tr_TR = "Türkçe"
uk_UA = "українська"
ur_PK = "اُردُو"
uz_UZ = "oʻzbekcha"
vi_VN = "Tiếng Việt"
yo_NG = "Yorùbá"
zh_CN = "中文"
zh_TW = "繁體中文"
zu_ZA = "isiZulu"
code_to_lang = OrderedDict(
[(lang.name.replace("_", "-"), lang.value) for lang in Language]
)
lang_to_code = OrderedDict(
[(lang.value, lang.name.replace("_", "-")) for lang in Language]
)

64
poetry.lock generated
View File

@ -226,6 +226,68 @@ files = [
{file = "idna-3.6.tar.gz", hash = "sha256:9ecdbbd083b06798ae1e86adcbfe8ab1479cf864e4ee30fe4e46a003d12491ca"},
]
[[package]]
name = "lingua-language-detector"
version = "2.0.1"
description = "An accurate natural language detection library, suitable for short text and mixed-language text"
optional = false
python-versions = ">=3.8"
files = [
{file = "lingua_language_detector-2.0.1-cp310-cp310-macosx_10_7_x86_64.whl", hash = "sha256:6a8d473de427e7eff54a5eb68fd38d75053dfc7b59e256a7233cc7409435d8ce"},
{file = "lingua_language_detector-2.0.1-cp310-cp310-macosx_11_0_arm64.whl", hash = "sha256:1b4d67769fe25d903dbc41a63a5d1707913ddd3e574b072c84001cef8472ead0"},
{file = "lingua_language_detector-2.0.1-cp310-cp310-manylinux_2_12_i686.manylinux2010_i686.whl", hash = "sha256:bcb9c8d273637ca69fa242184c8525c4bc56075d4b174d4adc68d81b11b814be"},
{file = "lingua_language_detector-2.0.1-cp310-cp310-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:0c42e66f3b20e97f4e4b442b3f2977b5aefb04968535fc0a78ccd4137db5ef34"},
{file = "lingua_language_detector-2.0.1-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:9563cc878cf9b45d78d9effe9b3b101cb3098b932f2921023e92f984bd5f5120"},
{file = "lingua_language_detector-2.0.1-cp310-none-win32.whl", hash = "sha256:2cb95b8e8abb40703b0705321cd3394033812812bc18d559a9a26604b241a663"},
{file = "lingua_language_detector-2.0.1-cp310-none-win_amd64.whl", hash = "sha256:b89d995ac9974b9190036585cbd0b70e6117a2d09f2150a898b332abd1a57636"},
{file = "lingua_language_detector-2.0.1-cp311-cp311-macosx_10_7_x86_64.whl", hash = "sha256:06db94dbb5b492924a536dbf97c247b228a3fcb00fe5bef9ca83b6b1aa959ca8"},
{file = "lingua_language_detector-2.0.1-cp311-cp311-macosx_11_0_arm64.whl", hash = "sha256:4d9dcf69a304819d5d2716943de980cccf140168e7d0243925bb98dd0c661600"},
{file = "lingua_language_detector-2.0.1-cp311-cp311-manylinux_2_12_i686.manylinux2010_i686.whl", hash = "sha256:7f66ed5cd572d07a1a853017f41bfd94e84e3081cc39690188adfa97337b199f"},
{file = "lingua_language_detector-2.0.1-cp311-cp311-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:e89c39a2a324d265c44f8ee7c3ffc499506d6307bb484ab1d9565f2d5857697b"},
{file = "lingua_language_detector-2.0.1-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:ae66cae403e36134558e929f8ba0d076be45e968f9fc7bab63869b19b34ddac1"},
{file = "lingua_language_detector-2.0.1-cp311-none-win32.whl", hash = "sha256:d92f1e0b30233dc1091cb28fe00e0dba8255be609b2337c0dab7f71a2f9b5086"},
{file = "lingua_language_detector-2.0.1-cp311-none-win_amd64.whl", hash = "sha256:60c1d0136c242097c58874a74d55b26e0c98ed81e44724d6426411b4bf585566"},
{file = "lingua_language_detector-2.0.1-cp312-cp312-macosx_10_7_x86_64.whl", hash = "sha256:5531fbc6881149ce905e511e278ac97ed0e92d64b91b99910204058abe057769"},
{file = "lingua_language_detector-2.0.1-cp312-cp312-macosx_11_0_arm64.whl", hash = "sha256:6193a0eac1534593427548d03cbcd82bcac040b3344a2abe67654c15a023c196"},
{file = "lingua_language_detector-2.0.1-cp312-cp312-manylinux_2_12_i686.manylinux2010_i686.whl", hash = "sha256:32644b03f107b9cee4a0884096acc21f1900dbec7951ede7c154d19d2a1a6f04"},
{file = "lingua_language_detector-2.0.1-cp312-cp312-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:c9890f5227a7a767253248dd64fb651e4405256d8376f39216a6ff6e2702a0ee"},
{file = "lingua_language_detector-2.0.1-cp312-cp312-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:1d286e14105794300217513326e2d8e82911c5efe773c6a37991336fbd63f9e2"},
{file = "lingua_language_detector-2.0.1-cp312-none-win32.whl", hash = "sha256:ec88318a16467fea3457208ff1052dbd72cc486f9f07caeb4325fa76cab9044c"},
{file = "lingua_language_detector-2.0.1-cp312-none-win_amd64.whl", hash = "sha256:5fcd53f1a2dc84a00a79c56ca59e4580cfbbb829e5e56249835d31444cf1f9ea"},
{file = "lingua_language_detector-2.0.1-cp38-cp38-macosx_10_7_x86_64.whl", hash = "sha256:8efa10f188c40e10016e4f0d2a34d743e1555ddf4bd47553f6dd420f673c0e78"},
{file = "lingua_language_detector-2.0.1-cp38-cp38-macosx_11_0_arm64.whl", hash = "sha256:ca3f11f372658cda18b309998596de6ffceaf6461e376da9c2861ac9d8b7efa3"},
{file = "lingua_language_detector-2.0.1-cp38-cp38-manylinux_2_12_i686.manylinux2010_i686.whl", hash = "sha256:5599752ded097132d2a59b2764287240e72e2e9859bb69f43b2957f12d69ac6f"},
{file = "lingua_language_detector-2.0.1-cp38-cp38-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:27c1e422e5355d6931f82a1da52f822048be68c5f74d8b997c7d9f9617002e6a"},
{file = "lingua_language_detector-2.0.1-cp38-cp38-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:e489cb648684c5bb5a0d6cab2f602fe0bda7e72921f327ba7350e30c60edaa43"},
{file = "lingua_language_detector-2.0.1-cp38-none-win32.whl", hash = "sha256:5d9f7f4b47c5bde5ff85089bcc4625f2f1a17e7677ec15dadb272b6e4b42c274"},
{file = "lingua_language_detector-2.0.1-cp38-none-win_amd64.whl", hash = "sha256:493908b45ff237c8c776d1d2b688b113999667b841f284bae862af5f7f526b4f"},
{file = "lingua_language_detector-2.0.1-cp39-cp39-macosx_10_7_x86_64.whl", hash = "sha256:e0c269a6099e2a637e2b8a312870792be2bb047abc6e7646122e498a159cc0b4"},
{file = "lingua_language_detector-2.0.1-cp39-cp39-macosx_11_0_arm64.whl", hash = "sha256:7fcd61ec1c771f452346bc57d756fd079a89e9134b93ef94f5f166e27cda6385"},
{file = "lingua_language_detector-2.0.1-cp39-cp39-manylinux_2_12_i686.manylinux2010_i686.whl", hash = "sha256:3728012ace5b8e799df27d1e9272396bc5829a609f1aa799109a22e07e1034c2"},
{file = "lingua_language_detector-2.0.1-cp39-cp39-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:0619216bed8746501f0c3c4294ebe1bd55a2d9d72083e68dc0d954f9af5ab12e"},
{file = "lingua_language_detector-2.0.1-cp39-cp39-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:cde1651655cac14a3b9ea9c3319a700de66a1f73cd07d63e9e653fc84d02f11e"},
{file = "lingua_language_detector-2.0.1-cp39-none-win32.whl", hash = "sha256:dc1cfcaea24d71d657cf75fb54b593d3db14cf3a19aef6cd1017d0982a407b4e"},
{file = "lingua_language_detector-2.0.1-cp39-none-win_amd64.whl", hash = "sha256:71d0662458e025aae7a6cbb0cc4e8169ea75e5b615a85e3310964748449896dd"},
{file = "lingua_language_detector-2.0.1-pp310-pypy310_pp73-macosx_10_7_x86_64.whl", hash = "sha256:42907729b8a343e91fd3d6e61b6add116c513354b6a88072245a4a1d21a18fb9"},
{file = "lingua_language_detector-2.0.1-pp310-pypy310_pp73-macosx_11_0_arm64.whl", hash = "sha256:ed60cb9b9fead792ec97544a3888e7aeda2ae34503c6252a92df599e006d7253"},
{file = "lingua_language_detector-2.0.1-pp310-pypy310_pp73-manylinux_2_12_i686.manylinux2010_i686.whl", hash = "sha256:fa1a6ab7bec65695650e801e23648742e92651315cf1443e9002a1b88ea2ac41"},
{file = "lingua_language_detector-2.0.1-pp310-pypy310_pp73-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:be1bf4faa1e645876c8dfc29b37f8f567451b48d43f45c916aba13b946d5069c"},
{file = "lingua_language_detector-2.0.1-pp310-pypy310_pp73-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:31c7e52f4e0ca47b787fb5e05560a9e3d7a6bc10488a35a31248f82647314957"},
{file = "lingua_language_detector-2.0.1-pp38-pypy38_pp73-macosx_10_7_x86_64.whl", hash = "sha256:c91fedd566b07ac1dc6c091bde6d69dae5c12c90e3e3c1d74fe29f76852e775a"},
{file = "lingua_language_detector-2.0.1-pp38-pypy38_pp73-macosx_11_0_arm64.whl", hash = "sha256:16f5fecb6eba86bc094a734ba4bd0603b5bcc580a70c07d659ac2aec14f018ac"},
{file = "lingua_language_detector-2.0.1-pp38-pypy38_pp73-manylinux_2_12_i686.manylinux2010_i686.whl", hash = "sha256:f21abb598efa1de408275e640a22e8b967a43a9fbb0f32302a206efabf3ca0bc"},
{file = "lingua_language_detector-2.0.1-pp38-pypy38_pp73-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:8906262b3866ae4473b5d5f08703addf0b6b12bc9c9aefcf2e2d855c1496d47a"},
{file = "lingua_language_detector-2.0.1-pp38-pypy38_pp73-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:cc9a6d87e766bf6d2d014986fda13fb8aa6c5602811e856f5555dd8128bd4f2e"},
{file = "lingua_language_detector-2.0.1-pp39-pypy39_pp73-macosx_10_7_x86_64.whl", hash = "sha256:650a6df0c1c7c7d650c9872be1f8e4b6ba32ff363d8184f60ee80441cffad779"},
{file = "lingua_language_detector-2.0.1-pp39-pypy39_pp73-macosx_11_0_arm64.whl", hash = "sha256:62d7e29b9e85289bdf80a02a6ef9fcc7b943a5d123eaafa313aad4cfe7b48767"},
{file = "lingua_language_detector-2.0.1-pp39-pypy39_pp73-manylinux_2_12_i686.manylinux2010_i686.whl", hash = "sha256:ee213e9c0c4a256c2bbe2c1f074c9ac122073045049b6a7f999b507da185dea3"},
{file = "lingua_language_detector-2.0.1-pp39-pypy39_pp73-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:06787b555ad9ddae613c0850b2aff991ea3e87c1ef714166d9691f90d1ad366c"},
{file = "lingua_language_detector-2.0.1-pp39-pypy39_pp73-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:d983db8f8f0afbf2b4a796e6c8790490868c024492e59ec8b2fca30599c84760"},
]
[package.extras]
test = ["pytest (==7.4.3)"]
[[package]]
name = "lxml"
version = "4.9.3"
@ -555,4 +617,4 @@ multidict = ">=4.0"
[metadata]
lock-version = "2.0"
python-versions = "^3.11"
content-hash = "4a667c2246018c259f23d7fe07102b3b680693beccbc685467b1e29c2c3e2db6"
content-hash = "d6e80d9fee5ef164d85b7b32db698755af31847470468e3c35d655d5f44f95b4"

View File

@ -11,6 +11,7 @@ beautifulsoup4 = "^4.12.2"
aiohttp = "^3.8.6"
lxml = "^4.9.3"
psycopg2 = "^2.9.9"
lingua-language-detector = "^2.0.1"
[build-system]
requires = ["poetry-core"]
@ -20,5 +21,5 @@ build-backend = "poetry.core.masonry.api"
app = "app.__main__:main"
[[tool.mypy.overrides]]
module = "aiohttp"
module = ["aiohttp", "lingua"]
ignore_missing_imports = true