anki-synonyms/__init__.py

150 lines
4.3 KiB
Python

import copy
import enum
import random
from dataclasses import dataclass
from typing import Optional, Union
from anki import hooks
from anki.template import TemplateRenderContext, TemplateRenderOutput
from aqt import mw
config = mw.addonManager.getConfig(__name__)
START_TAG: str = config["START_TAG"]
END_TAG: str = config["END_TAG"]
CHOICE_TAG: str = config["CHOICE_TAG"]
assert (
len(set([START_TAG, END_TAG, CHOICE_TAG])) == 3
), "Must have unique start, end, and choice operators."
class Tag(enum.Enum):
START = 0
END = 1
CHOICE = 2
Token = Union[Tag, str]
@dataclass
class ParserState:
"""Convenience class used when traversing our tokenized stream."""
starts: int
pos: int
tokens: list[Token]
def _matches_at(arg: str, target: str, pos: int = 0) -> bool:
"""Check a substring matches the @target parameter."""
return arg[pos : pos + len(target)] == target
def _label_tokens(arg: str) -> ParserState:
"""Primary lexing function which traverses our stream and assigns initial
token labels.
Note this is a greedy algorithm so it is possible we incorrectly label
tokens as 'START'. For instance, consider a start tag of "'(". Then running
>>> _label_tokens(arg="hello'(")
will yield a token stream like ["hello", START] when we should have just a
single entry "hello'(". This gets corrected in `_relabel_starts`.
"""
state = ParserState(starts=0, pos=0, tokens=[])
while state.pos < len(arg):
if _matches_at(arg, target=START_TAG, pos=state.pos):
state.tokens.append(Tag.START)
state.starts += 1
state.pos += len(START_TAG)
elif state.starts and _matches_at(arg, target=END_TAG, pos=state.pos):
state.tokens.append(Tag.END)
state.starts -= 1
state.pos += len(END_TAG)
elif state.starts and _matches_at(arg, target=CHOICE_TAG, pos=state.pos):
state.tokens.append(Tag.CHOICE)
state.pos += 1
else:
state.tokens.append(arg[state.pos])
state.pos += 1
return state
def _relabel_starts(arg: str, state: ParserState) -> ParserState:
"""Relabels 'START' tags that may have been labeled incorrectly."""
new_state = copy.copy(state)
if not new_state.starts:
return new_state
for i, token in enumerate(reversed(new_state.tokens)):
if token != Tag.START:
continue
index = len(new_state.tokens) - i - 1
new_state.tokens[index] = START_TAG
new_state.starts -= 1
if not new_state.starts:
break
return new_state
def _group_tokens(state: ParserState) -> list[Token]:
"""Aggregate adjacent strings together into a single token."""
new_tokens: list[Token] = []
for token in state.tokens:
if new_tokens and isinstance(token, str) and isinstance(new_tokens[-1], str):
new_tokens[-1] += token
else:
new_tokens.append(token)
return new_tokens
def _tokenize(arg: str) -> list[Token]:
"""Break string into token stream for easier handling."""
state = _label_tokens(arg)
state = _relabel_starts(arg, state)
return _group_tokens(state)
def run_parser(arg: str) -> str:
"""Find all "choice" selections within the given @arg.
For instance, assuming a START, END, and CHOICE of "'(", ")", and "|"
respectively, parsing "'(hello|world)" yields either "hello" or "world".
"""
tokens = _tokenize(arg)
buffer: list[str] = [""]
stack: list[list[str]] = []
for token in tokens:
if token is Tag.START:
buffer.append("")
stack.append([])
elif token is Tag.END:
stack[-1].append(buffer.pop())
ts = stack.pop()
buffer[-1] += random.choice(ts)
elif token is Tag.CHOICE:
stack[-1].append(buffer[-1])
buffer[-1] = ""
else:
buffer[-1] += token
assert not stack, "Stack should be empty"
assert len(buffer) == 1, "Buffer should only have one element."
return buffer[0]
def on_card_render(
output: TemplateRenderOutput,
_unused_context: TemplateRenderContext,
):
output.question_text = run_parser(output.question_text)
output.answer_text = run_parser(output.answer_text)
hooks.card_did_render.append(on_card_render)