Partial state.

2022-07-02 19:01:12 -06:00 · 2022-07-02 19:01:12 -06:00 · 0b629044a8
parent 54411fbf70
commit 0b629044a8
2 changed files with 157 additions and 0 deletions
--- a/README.md
+++ b/README.md
@ -0,0 +1,36 @@
 # anki-synonyms
 A simple [Anki](https://apps.ankiweb.net/) plugin that allows randomly choosing
 different options for parts of prompts. This was designed to handle synonyms in
 a clean way.
 Consider a [total order](https://en.wikipedia.org/wiki/Total_order). What this
 is does not matter; what it could also be called does. What some people call a
 "total order", others call a "linear order". Though this example is simple, it
 does highlight an issue - remembering the various synonyms used to describe
 a concept is important for fluency.
 As of now, to handle this situation, it is probably best to use two flashcards,
 one with prompt "Total Order" and another with prompt "Linear Order". In some
 cases though, it'd be nice if the flashcard could *choose* which term it shows
 when it shows it. That is, it'd be nice to have a single card and allow Anki to
 randomly choose to show "Total Order" *or* "Linear Order".
 To do so, we can install this plugin and write the following:
 ```
 '(Total|Linear) Order
 ```
 Here, `'(` is used to indicate the start of a set of choices Anki can display,
 `|` is used to separate the different options, and `)` is used to indicate the
 end of the set. The result is either "Total Order" or "Linear Order" at time
 of prompting.
 ## Configuration
 TODO
 ## Nesting
 TODO
--- a/parser.py
+++ b/parser.py
@ -0,0 +1,121 @@
 from dataclasses import dataclass
 from typing import Optional, Union
 import copy
 import enum
 import random
 START_TAG = "'("
 END_TAG = ")"
 CHOICE_TAG = "|"
 class Tag(enum.Enum):
    START = 0
    END = 1
    CHOICE = 2
 Token = Union[Tag, str]
@dataclass
 class ParserState:
    """Convenience class used when traversing our tokenized stream."""
    starts: int
    pos: int
    tokens: list[Token]
 def _matches_at(arg: str, target: str, pos: int = 0) -> bool:
    """Check a substring matches the @target parameter."""
    return arg[pos : pos + len(target)] == target
 def _label_tokens(arg: str) -> ParserState:
    """Primary lexing function which traverses our stream and assigns initial
    token labels.
    Note this is a greedy algorithm so it is possible we incorrectly label
    tokens as 'START'. For instance, consider a start tag of "'(". Then running
    >>> _label_tokens(arg="hello'(")
    will yield a token stream like ["hello", START] when we should have just a
    single entry "hello'(". This gets corrected in `_relabel_starts`.
    """
    state = ParserState(starts=0, pos=0, tokens=[])
    while state.pos < len(arg):
        if _matches_at(arg, target=START_TAG, pos=state.pos):
            state.tokens.append(Tag.START)
            state.starts += 1
            state.pos += len(START_TAG)
        elif state.starts and _matches_at(arg, target=END_TAG, pos=state.pos):
            state.tokens.append(Tag.END)
            state.starts -= 1
            state.pos += len(END_TAG)
        elif state.starts and _matches_at(arg, target=CHOICE_TAG, pos=state.pos):
            state.tokens.append(Tag.CHOICE)
            state.pos += 1
        else:
            state.tokens.append(arg[state.pos])
            state.pos += 1
    return state
 def _relabel_starts(arg: str, state: ParserState) -> ParserState:
    """Relabels 'START' tags that may have been labeled incorrectly."""
    new_state = copy.copy(state)
    if not new_state.starts:
        return new_state
    for i, token in enumerate(reversed(new_state.tokens)):
        if token != Tag.START:
            continue
        index = len(new_state.tokens) - i - 1
        new_state.tokens[index] = START_TAG
        new_state.starts -= 1
        if not new_state.starts:
            break
    return new_state
 def _group_tokens(state: ParserState) -> list[Token]:
    """Aggregate adjacent strings together into a single token."""
    new_tokens: list[Token] = []
    for token in state.tokens:
        if new_tokens and isinstance(token, str) and isinstance(new_tokens[-1], str):
            new_tokens[-1] += token
        else:
            new_tokens.append(token)
    return new_tokens
 def _tokenize(arg: str) -> list[Token]:
    """Break string into token stream for easier handling."""
    state = _label_tokens(arg)
    state = _relabel_starts(arg, state)
    return _group_tokens(state)
 def run_parser(arg: str) -> str:
    """Find all "choice" selections within the given @arg.
    For instance, assuming a START, END, and CHOICE of "'(", ")", and "|"
    respectively, parsing "'(hello|world)" yields either "hello" or "world".
    """
    tokens = _tokenize(arg)
    stack: list[list[str]] = [[]]
    for token in tokens:
        if token is Tag.START:
            stack.append([])
        elif token is Tag.END:
            ts = stack.pop()
            stack[-1].append(random.choice(ts))
        elif token is Tag.CHOICE:
            pass
        else:
            stack[-1].append(token)
    assert len(stack) == 1, "Stack is larger than a single element"
    return "".join(stack[0])