diff --git a/README.md b/README.md new file mode 100644 index 0000000..5eb7759 --- /dev/null +++ b/README.md @@ -0,0 +1,36 @@ +# anki-synonyms + +A simple [Anki](https://apps.ankiweb.net/) plugin that allows randomly choosing +different options for parts of prompts. This was designed to handle synonyms in +a clean way. + +Consider a [total order](https://en.wikipedia.org/wiki/Total_order). What this +is does not matter; what it could also be called does. What some people call a +"total order", others call a "linear order". Though this example is simple, it +does highlight an issue - remembering the various synonyms used to describe +a concept is important for fluency. + +As of now, to handle this situation, it is probably best to use two flashcards, +one with prompt "Total Order" and another with prompt "Linear Order". In some +cases though, it'd be nice if the flashcard could *choose* which term it shows +when it shows it. That is, it'd be nice to have a single card and allow Anki to +randomly choose to show "Total Order" *or* "Linear Order". + +To do so, we can install this plugin and write the following: + +``` +'(Total|Linear) Order +``` + +Here, `'(` is used to indicate the start of a set of choices Anki can display, +`|` is used to separate the different options, and `)` is used to indicate the +end of the set. The result is either "Total Order" or "Linear Order" at time +of prompting. + +## Configuration + +TODO + +## Nesting + +TODO diff --git a/parser.py b/parser.py new file mode 100644 index 0000000..843762e --- /dev/null +++ b/parser.py @@ -0,0 +1,121 @@ +from dataclasses import dataclass +from typing import Optional, Union + +import copy +import enum +import random + + +START_TAG = "'(" +END_TAG = ")" +CHOICE_TAG = "|" + + +class Tag(enum.Enum): + START = 0 + END = 1 + CHOICE = 2 + + +Token = Union[Tag, str] + + +@dataclass +class ParserState: + """Convenience class used when traversing our tokenized stream.""" + + starts: int + pos: int + tokens: list[Token] + + +def _matches_at(arg: str, target: str, pos: int = 0) -> bool: + """Check a substring matches the @target parameter.""" + return arg[pos : pos + len(target)] == target + + +def _label_tokens(arg: str) -> ParserState: + """Primary lexing function which traverses our stream and assigns initial + token labels. + + Note this is a greedy algorithm so it is possible we incorrectly label + tokens as 'START'. For instance, consider a start tag of "'(". Then running + + >>> _label_tokens(arg="hello'(") + + will yield a token stream like ["hello", START] when we should have just a + single entry "hello'(". This gets corrected in `_relabel_starts`. + """ + state = ParserState(starts=0, pos=0, tokens=[]) + while state.pos < len(arg): + if _matches_at(arg, target=START_TAG, pos=state.pos): + state.tokens.append(Tag.START) + state.starts += 1 + state.pos += len(START_TAG) + elif state.starts and _matches_at(arg, target=END_TAG, pos=state.pos): + state.tokens.append(Tag.END) + state.starts -= 1 + state.pos += len(END_TAG) + elif state.starts and _matches_at(arg, target=CHOICE_TAG, pos=state.pos): + state.tokens.append(Tag.CHOICE) + state.pos += 1 + else: + state.tokens.append(arg[state.pos]) + state.pos += 1 + return state + + +def _relabel_starts(arg: str, state: ParserState) -> ParserState: + """Relabels 'START' tags that may have been labeled incorrectly.""" + new_state = copy.copy(state) + if not new_state.starts: + return new_state + for i, token in enumerate(reversed(new_state.tokens)): + if token != Tag.START: + continue + index = len(new_state.tokens) - i - 1 + new_state.tokens[index] = START_TAG + new_state.starts -= 1 + if not new_state.starts: + break + return new_state + + +def _group_tokens(state: ParserState) -> list[Token]: + """Aggregate adjacent strings together into a single token.""" + new_tokens: list[Token] = [] + for token in state.tokens: + if new_tokens and isinstance(token, str) and isinstance(new_tokens[-1], str): + new_tokens[-1] += token + else: + new_tokens.append(token) + return new_tokens + + +def _tokenize(arg: str) -> list[Token]: + """Break string into token stream for easier handling.""" + state = _label_tokens(arg) + state = _relabel_starts(arg, state) + return _group_tokens(state) + + +def run_parser(arg: str) -> str: + """Find all "choice" selections within the given @arg. + + For instance, assuming a START, END, and CHOICE of "'(", ")", and "|" + respectively, parsing "'(hello|world)" yields either "hello" or "world". + """ + tokens = _tokenize(arg) + stack: list[list[str]] = [[]] + for token in tokens: + if token is Tag.START: + stack.append([]) + elif token is Tag.END: + ts = stack.pop() + stack[-1].append(random.choice(ts)) + elif token is Tag.CHOICE: + pass + else: + stack[-1].append(token) + assert len(stack) == 1, "Stack is larger than a single element" + return "".join(stack[0])