Add language detection for chesscom profiles.

2023-12-06 20:53:54 -07:00 · 2023-12-06 20:53:54 -07:00 · 44a18fc59c
parent f2fd289225
commit 44a18fc59c
9 changed files with 296 additions and 149 deletions
--- a/app/main.py
+++ b/app/main.py
@ -4,6 +4,7 @@ from typing import List
 import aiohttp
 import psycopg2
 from lingua import LanguageDetector, LanguageDetectorBuilder
 from app.chesscom import Pipeline as ChesscomPipeline
 from app.database import backup_database, load_languages
@ -14,21 +15,31 @@ from app.types import Site
 WORKER_COUNT = 10
-async def _process(site: Site, conn, session: aiohttp.ClientSession):
+async def _process(
    site: Site, conn, detector: LanguageDetector, session: aiohttp.ClientSession
 ):
    if site == Site.CHESSCOM:
-        await ChesscomPipeline(worker_count=WORKER_COUNT).process(conn, session)
+        await ChesscomPipeline(worker_count=WORKER_COUNT).process(
            conn, detector, session
        )
    elif site == Site.LICHESS:
-        await LichessPipeline(worker_count=WORKER_COUNT).process(conn, session)
+        await LichessPipeline(worker_count=WORKER_COUNT).process(
            conn, detector, session
        )
    else:
        assert False, f"Encountered unknown site: {site}."
-async def _entrypoint(conn, user_agent: str, sites: List[Site]):
+async def _entrypoint(
    conn, detector: LanguageDetector, user_agent: str, sites: List[Site]
 ):
    """Top-level entrypoint that dispatches a pipeline per requested site."""
    async with aiohttp.ClientSession(
        headers={"User-Agent": f"BoardWise coach-scraper ({user_agent})"}
    ) as session:
-        await asyncio.gather(*[_process(site, conn, session) for site in sites])
+        await asyncio.gather(
            *[_process(site, conn, detector, session) for site in sites]
        )
 def main():
@ -58,6 +69,8 @@ def main():
    args = parser.parse_args()
    detector = LanguageDetectorBuilder.from_all_languages().build()
    conn = None
    try:
        conn = psycopg2.connect(
@ -72,6 +85,7 @@ def main():
        asyncio.run(
            _entrypoint(
                conn=conn,
                detector=detector,
                user_agent=args.user_agent,
                sites=list(map(Site, set(args.site))),
            )
--- a/app/chesscom.py
+++ b/app/chesscom.py
@ -6,7 +6,9 @@ from typing import List
 import aiohttp
 from bs4 import BeautifulSoup, SoupStrainer, Tag
 from lingua import LanguageDetector
 from app.locale import Locale, lang_to_locale
 from app.pipeline import Extractor as BaseExtractor
 from app.pipeline import Fetcher as BaseFetcher
 from app.pipeline import Pipeline as BasePipeline
@ -97,16 +99,19 @@ class Fetcher(BaseFetcher):
 def _profile_filter(elem: Tag | str | None, attrs={}) -> bool:
-    if "profile-header-info" in attrs.get("class", ""):
+    for className in [
-        return True
+        "profile-header-info",
-    if "profile-card-info" in attrs.get("class", ""):
+        "profile-card-info",
        "profile-about",
    ]:
        if className in attrs.get("class", ""):
            return True
    return False
 class Extractor(BaseExtractor):
-    def __init__(self, fetcher: BaseFetcher, username: str):
+    def __init__(self, fetcher: BaseFetcher, detector: LanguageDetector, username: str):
-        super().__init__(fetcher, username)
+        super().__init__(fetcher, detector, username)
        self.profile_soup = None
        try:
@ -164,9 +169,19 @@ class Extractor(BaseExtractor):
        except ValueError:
            return None
-    def get_languages(self) -> List[str] | None:
+    def get_languages(self) -> List[Locale] | None:
-        # TODO: Extract using huggingface model.
+        if self.profile_soup is None:
            return None
        about = self.profile_soup.find("div", class_="profile-about")
        if not isinstance(about, Tag):
            return None
        detected = self.detector.detect_language_of(about.text)
        if detected is None:
            return None
        code = lang_to_locale.get(detected)
        if code is None:
            return None
        return [code]
    def get_rapid(self) -> int | None:
        return self.stats_json.get("rapid", {}).get("rating")
@ -182,5 +197,7 @@ class Pipeline(BasePipeline):
    def get_fetcher(self, session: aiohttp.ClientSession):
        return Fetcher(session)
-    def get_extractor(self, fetcher: BaseFetcher, username: str):
+    def get_extractor(
-        return Extractor(fetcher, username)
+        self, fetcher: BaseFetcher, detector: LanguageDetector, username: str
    ):
        return Extractor(fetcher, detector, username)
--- a/app/database.py
+++ b/app/database.py
@ -4,7 +4,8 @@ from typing import List, Literal
 from typing_extensions import TypedDict
-from app.types import Site, Title, code_to_lang
+from app.locale import Locale, locale_to_str, native_to_locale
 from app.types import Site, Title
 SCHEMA_NAME = "coach_scraper"
 MAIN_TABLE_NAME = "export"
@ -41,7 +42,7 @@ class Row(TypedDict, total=False):
    # The FIDE title assigned to the coach on the source siste.
    title: Title
    # The list of languages the coach is fluent in.
-    languages: List[str]
+    languages: List[Locale]
    # Rapid rating relative to the site they were sourced from.
    rapid: int
    # Blitz rating relative to the site they were sourced from.
@ -55,7 +56,7 @@ def load_languages(conn):
    cursor = None
    try:
        cursor = conn.cursor()
-        for pos, (code, name) in enumerate(list(code_to_lang.items())):
+        for pos, (name, loc) in enumerate(list(native_to_locale.items())):
            cursor.execute(
                f"""
                INSERT INTO {SCHEMA_NAME}.{LANG_TABLE_NAME}
@ -67,7 +68,7 @@ def load_languages(conn):
                DO UPDATE SET
                  name = EXCLUDED.name;
                """,
-                [code, name, pos],
+                [locale_to_str(loc), name, pos],
            )
        conn.commit()
    finally:
@ -157,7 +158,7 @@ def upsert_row(conn, row: Row):
                row.get("name"),
                row.get("image_url"),
                row["title"].value if "title" in row else None,
-                row.get("languages", []),
+                list(map(locale_to_str, row.get("languages", []))),
                row.get("rapid"),
                row.get("blitz"),
                row.get("bullet"),
--- a/app/lichess.py
+++ b/app/lichess.py
@ -5,11 +5,13 @@ from typing import List
 import aiohttp
 from bs4 import BeautifulSoup, SoupStrainer, Tag
 from lingua import LanguageDetector
 from app.locale import Locale, native_to_locale
 from app.pipeline import Extractor as BaseExtractor
 from app.pipeline import Fetcher as BaseFetcher
 from app.pipeline import Pipeline as BasePipeline
-from app.types import Site, Title, lang_to_code
+from app.types import Site, Title
 # The number of pages we will at most iterate through. This number was
 # determined by going to https://lichess.org/coach/all/all/alphabetical
@ -113,8 +115,8 @@ def _stats_filter(elem: Tag | str | None, attrs={}) -> bool:
 class Extractor(BaseExtractor):
-    def __init__(self, fetcher: BaseFetcher, username: str):
+    def __init__(self, fetcher: BaseFetcher, detector: LanguageDetector, username: str):
-        super().__init__(fetcher, username)
+        super().__init__(fetcher, detector, username)
        self.profile_soup = None
        try:
@ -175,7 +177,7 @@ class Extractor(BaseExtractor):
        except ValueError:
            return None
-    def get_languages(self) -> List[str] | None:
+    def get_languages(self) -> List[Locale] | None:
        if self.profile_soup is None:
            return None
        tr = self.profile_soup.find("tr", class_="languages")
@ -187,8 +189,8 @@ class Extractor(BaseExtractor):
        codes = []
        for lang in [s.strip() for s in tr.get_text().split(",")]:
-            if lang in lang_to_code:
+            if lang in native_to_locale:
-                codes.append(lang_to_code[lang])
+                codes.append(native_to_locale[lang])
        return codes
    def get_rapid(self) -> int | None:
@ -225,5 +227,7 @@ class Pipeline(BasePipeline):
    def get_fetcher(self, session: aiohttp.ClientSession):
        return Fetcher(session)
-    def get_extractor(self, fetcher: BaseFetcher, username: str):
+    def get_extractor(
-        return Extractor(fetcher, username)
+        self, fetcher: BaseFetcher, detector: LanguageDetector, username: str
    ):
        return Extractor(fetcher, detector, username)
--- a/app/locale.py
+++ b/app/locale.py
@ -0,0 +1,154 @@
 import enum
 from collections import OrderedDict
 from typing import Dict
 from lingua import Language
 class Locale(enum.Enum):
    """Maps {language}_{country} to the name of the langage in said language."""
    en_GB = "English"
    af_ZA = "Afrikaans"
    an_ES = "Aragonés"
    ar_SA = "العربية"
    as_IN = "অসমীয়া"
    av_DA = "авар мацӀ"
    az_AZ = "Azərbaycanca"
    be_BY = "Беларуская"
    bg_BG = "български език"
    bn_BD = "বাংলা"
    br_FR = "Brezhoneg"
    bs_BA = "Bosanski"
    ca_ES = "Català, valencià"
    ckb_IR = "کوردی سۆرانی"
    co_FR = "Corsu"
    cs_CZ = "Čeština"
    cv_CU = "чӑваш чӗлхи"
    cy_GB = "Cymraeg"
    da_DK = "Dansk"
    de_DE = "Deutsch"
    el_GR = "Ελληνικά"
    en_US = "English (US)"
    eo_UY = "Esperanto"
    es_ES = "Español"
    et_EE = "Eesti keel"
    eu_ES = "Euskara"
    fa_IR = "فارسی"
    fi_FI = "Suomen kieli"
    fo_FO = "Føroyskt"
    fr_FR = "Français"
    frp_IT = "Arpitan"
    fy_NL = "Frysk"
    ga_IE = "Gaeilge"
    gd_GB = "Gàidhlig"
    gl_ES = "Galego"
    gsw_CH = "Schwizerdütsch"
    gu_IN = "ગુજરાતી"
    he_IL = "עִבְרִית"
    hi_IN = "हिन्दी, हिंदी"
    hr_HR = "Hrvatski"
    hu_HU = "Magyar"
    hy_AM = "Հայերեն"
    ia_IA = "Interlingua"
    id_ID = "Bahasa Indonesia"
    io_EN = "Ido"
    is_IS = "Íslenska"
    it_IT = "Italiano"
    ja_JP = "日本語"
    jbo_EN = "Lojban"
    jv_ID = "Basa Jawa"
    ka_GE = "ქართული"
    kab_DZ = "Taqvaylit"
    kk_KZ = "қазақша"
    kmr_TR = "Kurdî (Kurmancî)"
    kn_IN = "ಕನ್ನಡ"
    ko_KR = "한국어"
    ky_KG = "кыргызча"
    la_LA = "Lingua Latina"
    lb_LU = "Lëtzebuergesch"
    lt_LT = "Lietuvių kalba"
    lv_LV = "Latviešu valoda"
    mg_MG = "Fiteny malagasy"
    mk_MK = "македонски јази"
    ml_IN = "മലയാളം"
    mn_MN = "монгол"
    mr_IN = "मराठी"
    ms_MY = "Melayu"
    nb_NO = "Norsk bokmål"
    ne_NP = "नेपाली"
    nl_NL = "Nederlands"
    nn_NO = "Norsk nynorsk"
    pi_IN = "पालि"
    pl_PL = "Polski"
    ps_AF = "پښتو"
    pt_PT = "Português"
    pt_BR = "Português (BR)"
    ro_RO = "Română"
    ru_RU = "русский язык"
    ry_UA = "Русинська бисїда"
    sa_IN = "संस्कृत"
    sk_SK = "Slovenčina"
    sl_SI = "Slovenščina"
    sq_AL = "Shqip"
    sr_SP = "Српски језик"
    sv_SE = "Svenska"
    sw_KE = "Kiswahili"
    ta_IN = "தமிழ்"
    tg_TJ = "тоҷикӣ"
    th_TH = "ไทย"
    tk_TM = "Türkmençe"
    tl_PH = "Tagalog"
    tp_TP = "Toki pona"
    tr_TR = "Türkçe"
    uk_UA = "українська"
    ur_PK = "اُردُو"
    uz_UZ = "oʻzbekcha"
    vi_VN = "Tiếng Việt"
    yo_NG = "Yorùbá"
    zh_CN = "中文"
    zh_TW = "繁體中文"
    zu_ZA = "isiZulu"
 def locale_to_str(loc: Locale) -> str:
    return loc.name.replace("_", "-")
 # Uses the name of the language (in said language) as the key.
 native_to_locale: OrderedDict[str, Locale] = OrderedDict(
    [(loc.value, loc) for loc in Locale]
 )
 # Uses an inferred/detected language as the key. Mapping was manually created
 # using https://github.com/pemistahl/lingua-rs/blob/main/src/isocode.rs#L40 as
 # a reference.
 lang_to_locale: Dict[Language, Locale] = {
    Language.CHINESE: Locale.zh_CN,
    Language.CROATIAN: Locale.hr_HR,
    Language.DANISH: Locale.da_DK,
    Language.DUTCH: Locale.nl_NL,
    Language.ENGLISH: Locale.en_GB,
    Language.FINNISH: Locale.fi_FI,
    Language.FRENCH: Locale.fr_FR,
    Language.GERMAN: Locale.de_DE,
    Language.HUNGARIAN: Locale.hu_HU,
    Language.ITALIAN: Locale.it_IT,
    Language.KOREAN: Locale.ko_KR,
    Language.LATIN: Locale.la_LA,
    Language.MALAY: Locale.ms_MY,
    Language.PERSIAN: Locale.fa_IR,
    Language.POLISH: Locale.pl_PL,
    Language.PORTUGUESE: Locale.pt_PT,
    Language.ROMANIAN: Locale.ro_RO,
    Language.RUSSIAN: Locale.ru_RU,
    Language.SLOVENE: Locale.sl_SI,
    Language.SPANISH: Locale.es_ES,
    Language.SWAHILI: Locale.sw_KE,
    Language.SWEDISH: Locale.sv_SE,
    Language.TAGALOG: Locale.tl_PH,
    Language.TURKISH: Locale.tr_TR,
    Language.UKRAINIAN: Locale.uk_UA,
    Language.VIETNAMESE: Locale.vi_VN,
    Language.YORUBA: Locale.yo_NG,
 }
--- a/app/pipeline.py
+++ b/app/pipeline.py
@ -3,8 +3,10 @@ import os.path
 from typing import Any, List, Tuple
 import aiohttp
 from lingua import LanguageDetector
 from app.database import Row, RowKey, upsert_row
 from app.locale import Locale
 from app.types import Site, Title
@ -94,8 +96,9 @@ def _insert(row: Row, key: RowKey, value: Any):
 class Extractor:
-    def __init__(self, fetcher: Fetcher, username: str):
+    def __init__(self, fetcher: Fetcher, detector: LanguageDetector, username: str):
        self.fetcher = fetcher
        self.detector = detector
        self.username = username
    def get_name(self) -> str | None:
@ -107,7 +110,7 @@ class Extractor:
    def get_title(self) -> Title | None:
        raise NotImplementedError()
-    def get_languages(self) -> List[str] | None:
+    def get_languages(self) -> List[Locale] | None:
        raise NotImplementedError()
    def get_rapid(self) -> int | None:
@ -157,10 +160,14 @@ class Pipeline:
    def get_fetcher(self, session: aiohttp.ClientSession) -> Fetcher:
        raise NotImplementedError()
-    def get_extractor(self, fetcher: Fetcher, username: str) -> Extractor:
+    def get_extractor(
        self, fetcher: Fetcher, detector: LanguageDetector, username: str
    ) -> Extractor:
        raise NotImplementedError()
-    async def process(self, conn, session: aiohttp.ClientSession):
+    async def process(
        self, conn, detector: LanguageDetector, session: aiohttp.ClientSession
    ):
        fetcher = self.get_fetcher(session)
        queue: asyncio.Queue = asyncio.Queue()
@ -180,7 +187,7 @@ class Pipeline:
            page_no += 1
            for username in usernames or []:
                await fetcher._download_user_files(username)
-                extractor = self.get_extractor(fetcher, username)
+                extractor = self.get_extractor(fetcher, detector, username)
                queue.put_nowait((conn, extractor))
        # Wait until the queue is fully processed.
--- a/app/types.py
+++ b/app/types.py
@ -1,5 +1,4 @@
 import enum
 from collections import OrderedDict
 class Site(enum.Enum):
@ -18,115 +17,3 @@ class Title(enum.Enum):
    WFM = "WFM"
    WCM = "WCM"
    WNM = "WNM"
 class Language(enum.Enum):
    en_GB = "English"
    af_ZA = "Afrikaans"
    an_ES = "Aragonés"
    ar_SA = "العربية"
    as_IN = "অসমীয়া"
    av_DA = "авар мацӀ"
    az_AZ = "Azərbaycanca"
    be_BY = "Беларуская"
    bg_BG = "български език"
    bn_BD = "বাংলা"
    br_FR = "Brezhoneg"
    bs_BA = "Bosanski"
    ca_ES = "Català, valencià"
    ckb_IR = "کوردی سۆرانی"
    co_FR = "Corsu"
    cs_CZ = "Čeština"
    cv_CU = "чӑваш чӗлхи"
    cy_GB = "Cymraeg"
    da_DK = "Dansk"
    de_DE = "Deutsch"
    el_GR = "Ελληνικά"
    en_US = "English (US)"
    eo_UY = "Esperanto"
    es_ES = "Español"
    et_EE = "Eesti keel"
    eu_ES = "Euskara"
    fa_IR = "فارسی"
    fi_FI = "Suomen kieli"
    fo_FO = "Føroyskt"
    fr_FR = "Français"
    frp_IT = "Arpitan"
    fy_NL = "Frysk"
    ga_IE = "Gaeilge"
    gd_GB = "Gàidhlig"
    gl_ES = "Galego"
    gsw_CH = "Schwizerdütsch"
    gu_IN = "ગુજરાતી"
    he_IL = "עִבְרִית"
    hi_IN = "हिन्दी, हिंदी"
    hr_HR = "Hrvatski"
    hu_HU = "Magyar"
    hy_AM = "Հայերեն"
    ia_IA = "Interlingua"
    id_ID = "Bahasa Indonesia"
    io_EN = "Ido"
    is_IS = "Íslenska"
    it_IT = "Italiano"
    ja_JP = "日本語"
    jbo_EN = "Lojban"
    jv_ID = "Basa Jawa"
    ka_GE = "ქართული"
    kab_DZ = "Taqvaylit"
    kk_KZ = "қазақша"
    kmr_TR = "Kurdî (Kurmancî)"
    kn_IN = "ಕನ್ನಡ"
    ko_KR = "한국어"
    ky_KG = "кыргызча"
    la_LA = "Lingua Latina"
    lb_LU = "Lëtzebuergesch"
    lt_LT = "Lietuvių kalba"
    lv_LV = "Latviešu valoda"
    mg_MG = "Fiteny malagasy"
    mk_MK = "македонски јази"
    ml_IN = "മലയാളം"
    mn_MN = "монгол"
    mr_IN = "मराठी"
    nb_NO = "Norsk bokmål"
    ne_NP = "नेपाली"
    nl_NL = "Nederlands"
    nn_NO = "Norsk nynorsk"
    pi_IN = "पालि"
    pl_PL = "Polski"
    ps_AF = "پښتو"
    pt_PT = "Português"
    pt_BR = "Português (BR)"
    ro_RO = "Română"
    ru_RU = "русский язык"
    ry_UA = "Русинська бисїда"
    sa_IN = "संस्कृत"
    sk_SK = "Slovenčina"
    sl_SI = "Slovenščina"
    sq_AL = "Shqip"
    sr_SP = "Српски језик"
    sv_SE = "Svenska"
    sw_KE = "Kiswahili"
    ta_IN = "தமிழ்"
    tg_TJ = "тоҷикӣ"
    th_TH = "ไทย"
    tk_TM = "Türkmençe"
    tl_PH = "Tagalog"
    tp_TP = "Toki pona"
    tr_TR = "Türkçe"
    uk_UA = "українська"
    ur_PK = "اُردُو"
    uz_UZ = "oʻzbekcha"
    vi_VN = "Tiếng Việt"
    yo_NG = "Yorùbá"
    zh_CN = "中文"
    zh_TW = "繁體中文"
    zu_ZA = "isiZulu"
 code_to_lang = OrderedDict(
    [(lang.name.replace("_", "-"), lang.value) for lang in Language]
 )
 lang_to_code = OrderedDict(
    [(lang.value, lang.name.replace("_", "-")) for lang in Language]
 )
--- a/poetry.lock
+++ b/poetry.lock
@ -226,6 +226,68 @@ files = [
    {file = "idna-3.6.tar.gz", hash = "sha256:9ecdbbd083b06798ae1e86adcbfe8ab1479cf864e4ee30fe4e46a003d12491ca"},
 ]
 [[package]]
 name = "lingua-language-detector"
 version = "2.0.1"
 description = "An accurate natural language detection library, suitable for short text and mixed-language text"
 optional = false
 python-versions = ">=3.8"
 files = [
    {file = "lingua_language_detector-2.0.1-cp310-cp310-macosx_10_7_x86_64.whl", hash = "sha256:6a8d473de427e7eff54a5eb68fd38d75053dfc7b59e256a7233cc7409435d8ce"},
    {file = "lingua_language_detector-2.0.1-cp310-cp310-macosx_11_0_arm64.whl", hash = "sha256:1b4d67769fe25d903dbc41a63a5d1707913ddd3e574b072c84001cef8472ead0"},
    {file = "lingua_language_detector-2.0.1-cp310-cp310-manylinux_2_12_i686.manylinux2010_i686.whl", hash = "sha256:bcb9c8d273637ca69fa242184c8525c4bc56075d4b174d4adc68d81b11b814be"},
    {file = "lingua_language_detector-2.0.1-cp310-cp310-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:0c42e66f3b20e97f4e4b442b3f2977b5aefb04968535fc0a78ccd4137db5ef34"},
    {file = "lingua_language_detector-2.0.1-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:9563cc878cf9b45d78d9effe9b3b101cb3098b932f2921023e92f984bd5f5120"},
    {file = "lingua_language_detector-2.0.1-cp310-none-win32.whl", hash = "sha256:2cb95b8e8abb40703b0705321cd3394033812812bc18d559a9a26604b241a663"},
    {file = "lingua_language_detector-2.0.1-cp310-none-win_amd64.whl", hash = "sha256:b89d995ac9974b9190036585cbd0b70e6117a2d09f2150a898b332abd1a57636"},
    {file = "lingua_language_detector-2.0.1-cp311-cp311-macosx_10_7_x86_64.whl", hash = "sha256:06db94dbb5b492924a536dbf97c247b228a3fcb00fe5bef9ca83b6b1aa959ca8"},
    {file = "lingua_language_detector-2.0.1-cp311-cp311-macosx_11_0_arm64.whl", hash = "sha256:4d9dcf69a304819d5d2716943de980cccf140168e7d0243925bb98dd0c661600"},
    {file = "lingua_language_detector-2.0.1-cp311-cp311-manylinux_2_12_i686.manylinux2010_i686.whl", hash = "sha256:7f66ed5cd572d07a1a853017f41bfd94e84e3081cc39690188adfa97337b199f"},
    {file = "lingua_language_detector-2.0.1-cp311-cp311-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:e89c39a2a324d265c44f8ee7c3ffc499506d6307bb484ab1d9565f2d5857697b"},
    {file = "lingua_language_detector-2.0.1-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:ae66cae403e36134558e929f8ba0d076be45e968f9fc7bab63869b19b34ddac1"},
    {file = "lingua_language_detector-2.0.1-cp311-none-win32.whl", hash = "sha256:d92f1e0b30233dc1091cb28fe00e0dba8255be609b2337c0dab7f71a2f9b5086"},
    {file = "lingua_language_detector-2.0.1-cp311-none-win_amd64.whl", hash = "sha256:60c1d0136c242097c58874a74d55b26e0c98ed81e44724d6426411b4bf585566"},
    {file = "lingua_language_detector-2.0.1-cp312-cp312-macosx_10_7_x86_64.whl", hash = "sha256:5531fbc6881149ce905e511e278ac97ed0e92d64b91b99910204058abe057769"},
    {file = "lingua_language_detector-2.0.1-cp312-cp312-macosx_11_0_arm64.whl", hash = "sha256:6193a0eac1534593427548d03cbcd82bcac040b3344a2abe67654c15a023c196"},
    {file = "lingua_language_detector-2.0.1-cp312-cp312-manylinux_2_12_i686.manylinux2010_i686.whl", hash = "sha256:32644b03f107b9cee4a0884096acc21f1900dbec7951ede7c154d19d2a1a6f04"},
    {file = "lingua_language_detector-2.0.1-cp312-cp312-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:c9890f5227a7a767253248dd64fb651e4405256d8376f39216a6ff6e2702a0ee"},
    {file = "lingua_language_detector-2.0.1-cp312-cp312-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:1d286e14105794300217513326e2d8e82911c5efe773c6a37991336fbd63f9e2"},
    {file = "lingua_language_detector-2.0.1-cp312-none-win32.whl", hash = "sha256:ec88318a16467fea3457208ff1052dbd72cc486f9f07caeb4325fa76cab9044c"},
    {file = "lingua_language_detector-2.0.1-cp312-none-win_amd64.whl", hash = "sha256:5fcd53f1a2dc84a00a79c56ca59e4580cfbbb829e5e56249835d31444cf1f9ea"},
    {file = "lingua_language_detector-2.0.1-cp38-cp38-macosx_10_7_x86_64.whl", hash = "sha256:8efa10f188c40e10016e4f0d2a34d743e1555ddf4bd47553f6dd420f673c0e78"},
    {file = "lingua_language_detector-2.0.1-cp38-cp38-macosx_11_0_arm64.whl", hash = "sha256:ca3f11f372658cda18b309998596de6ffceaf6461e376da9c2861ac9d8b7efa3"},
    {file = "lingua_language_detector-2.0.1-cp38-cp38-manylinux_2_12_i686.manylinux2010_i686.whl", hash = "sha256:5599752ded097132d2a59b2764287240e72e2e9859bb69f43b2957f12d69ac6f"},
    {file = "lingua_language_detector-2.0.1-cp38-cp38-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:27c1e422e5355d6931f82a1da52f822048be68c5f74d8b997c7d9f9617002e6a"},
    {file = "lingua_language_detector-2.0.1-cp38-cp38-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:e489cb648684c5bb5a0d6cab2f602fe0bda7e72921f327ba7350e30c60edaa43"},
    {file = "lingua_language_detector-2.0.1-cp38-none-win32.whl", hash = "sha256:5d9f7f4b47c5bde5ff85089bcc4625f2f1a17e7677ec15dadb272b6e4b42c274"},
    {file = "lingua_language_detector-2.0.1-cp38-none-win_amd64.whl", hash = "sha256:493908b45ff237c8c776d1d2b688b113999667b841f284bae862af5f7f526b4f"},
    {file = "lingua_language_detector-2.0.1-cp39-cp39-macosx_10_7_x86_64.whl", hash = "sha256:e0c269a6099e2a637e2b8a312870792be2bb047abc6e7646122e498a159cc0b4"},
    {file = "lingua_language_detector-2.0.1-cp39-cp39-macosx_11_0_arm64.whl", hash = "sha256:7fcd61ec1c771f452346bc57d756fd079a89e9134b93ef94f5f166e27cda6385"},
    {file = "lingua_language_detector-2.0.1-cp39-cp39-manylinux_2_12_i686.manylinux2010_i686.whl", hash = "sha256:3728012ace5b8e799df27d1e9272396bc5829a609f1aa799109a22e07e1034c2"},
    {file = "lingua_language_detector-2.0.1-cp39-cp39-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:0619216bed8746501f0c3c4294ebe1bd55a2d9d72083e68dc0d954f9af5ab12e"},
    {file = "lingua_language_detector-2.0.1-cp39-cp39-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:cde1651655cac14a3b9ea9c3319a700de66a1f73cd07d63e9e653fc84d02f11e"},
    {file = "lingua_language_detector-2.0.1-cp39-none-win32.whl", hash = "sha256:dc1cfcaea24d71d657cf75fb54b593d3db14cf3a19aef6cd1017d0982a407b4e"},
    {file = "lingua_language_detector-2.0.1-cp39-none-win_amd64.whl", hash = "sha256:71d0662458e025aae7a6cbb0cc4e8169ea75e5b615a85e3310964748449896dd"},
    {file = "lingua_language_detector-2.0.1-pp310-pypy310_pp73-macosx_10_7_x86_64.whl", hash = "sha256:42907729b8a343e91fd3d6e61b6add116c513354b6a88072245a4a1d21a18fb9"},
    {file = "lingua_language_detector-2.0.1-pp310-pypy310_pp73-macosx_11_0_arm64.whl", hash = "sha256:ed60cb9b9fead792ec97544a3888e7aeda2ae34503c6252a92df599e006d7253"},
    {file = "lingua_language_detector-2.0.1-pp310-pypy310_pp73-manylinux_2_12_i686.manylinux2010_i686.whl", hash = "sha256:fa1a6ab7bec65695650e801e23648742e92651315cf1443e9002a1b88ea2ac41"},
    {file = "lingua_language_detector-2.0.1-pp310-pypy310_pp73-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:be1bf4faa1e645876c8dfc29b37f8f567451b48d43f45c916aba13b946d5069c"},
    {file = "lingua_language_detector-2.0.1-pp310-pypy310_pp73-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:31c7e52f4e0ca47b787fb5e05560a9e3d7a6bc10488a35a31248f82647314957"},
    {file = "lingua_language_detector-2.0.1-pp38-pypy38_pp73-macosx_10_7_x86_64.whl", hash = "sha256:c91fedd566b07ac1dc6c091bde6d69dae5c12c90e3e3c1d74fe29f76852e775a"},
    {file = "lingua_language_detector-2.0.1-pp38-pypy38_pp73-macosx_11_0_arm64.whl", hash = "sha256:16f5fecb6eba86bc094a734ba4bd0603b5bcc580a70c07d659ac2aec14f018ac"},
    {file = "lingua_language_detector-2.0.1-pp38-pypy38_pp73-manylinux_2_12_i686.manylinux2010_i686.whl", hash = "sha256:f21abb598efa1de408275e640a22e8b967a43a9fbb0f32302a206efabf3ca0bc"},
    {file = "lingua_language_detector-2.0.1-pp38-pypy38_pp73-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:8906262b3866ae4473b5d5f08703addf0b6b12bc9c9aefcf2e2d855c1496d47a"},
    {file = "lingua_language_detector-2.0.1-pp38-pypy38_pp73-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:cc9a6d87e766bf6d2d014986fda13fb8aa6c5602811e856f5555dd8128bd4f2e"},
    {file = "lingua_language_detector-2.0.1-pp39-pypy39_pp73-macosx_10_7_x86_64.whl", hash = "sha256:650a6df0c1c7c7d650c9872be1f8e4b6ba32ff363d8184f60ee80441cffad779"},
    {file = "lingua_language_detector-2.0.1-pp39-pypy39_pp73-macosx_11_0_arm64.whl", hash = "sha256:62d7e29b9e85289bdf80a02a6ef9fcc7b943a5d123eaafa313aad4cfe7b48767"},
    {file = "lingua_language_detector-2.0.1-pp39-pypy39_pp73-manylinux_2_12_i686.manylinux2010_i686.whl", hash = "sha256:ee213e9c0c4a256c2bbe2c1f074c9ac122073045049b6a7f999b507da185dea3"},
    {file = "lingua_language_detector-2.0.1-pp39-pypy39_pp73-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:06787b555ad9ddae613c0850b2aff991ea3e87c1ef714166d9691f90d1ad366c"},
    {file = "lingua_language_detector-2.0.1-pp39-pypy39_pp73-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:d983db8f8f0afbf2b4a796e6c8790490868c024492e59ec8b2fca30599c84760"},
 ]
 [package.extras]
 test = ["pytest (==7.4.3)"]
 [[package]]
 name = "lxml"
 version = "4.9.3"
@ -555,4 +617,4 @@ multidict = ">=4.0"
 [metadata]
 lock-version = "2.0"
 python-versions = "^3.11"
-content-hash = "4a667c2246018c259f23d7fe07102b3b680693beccbc685467b1e29c2c3e2db6"
+content-hash = "d6e80d9fee5ef164d85b7b32db698755af31847470468e3c35d655d5f44f95b4"
--- a/pyproject.toml
+++ b/pyproject.toml
@ -11,6 +11,7 @@ beautifulsoup4 = "^4.12.2"
 aiohttp = "^3.8.6"
 lxml = "^4.9.3"
 psycopg2 = "^2.9.9"
 lingua-language-detector = "^2.0.1"
 [build-system]
 requires = ["poetry-core"]
@ -20,5 +21,5 @@ build-backend = "poetry.core.masonry.api"
 app = "app.__main__:main"
 [[tool.mypy.overrides]]
-module = "aiohttp"
+module = ["aiohttp", "lingua"]
 ignore_missing_imports = true