Partial state.

2022-07-02 19:01:12 -06:00 · 2022-07-02 19:01:12 -06:00 · 0b629044a8
parent 54411fbf70
commit 0b629044a8
2 changed files with 157 additions and 0 deletions
--- a/README.md
+++ b/README.md
@ -0,0 +1,36 @@
+# anki-synonyms
+
+A simple [Anki](https://apps.ankiweb.net/) plugin that allows randomly choosing
+different options for parts of prompts. This was designed to handle synonyms in
+a clean way.
+
+Consider a [total order](https://en.wikipedia.org/wiki/Total_order). What this
+is does not matter; what it could also be called does. What some people call a
+"total order", others call a "linear order". Though this example is simple, it
+does highlight an issue - remembering the various synonyms used to describe
+a concept is important for fluency.
+
+As of now, to handle this situation, it is probably best to use two flashcards,
+one with prompt "Total Order" and another with prompt "Linear Order". In some
+cases though, it'd be nice if the flashcard could *choose* which term it shows
+when it shows it. That is, it'd be nice to have a single card and allow Anki to
+randomly choose to show "Total Order" *or* "Linear Order".
+
+To do so, we can install this plugin and write the following:
+
+```
+'(Total|Linear) Order
+```
+
+Here, `'(` is used to indicate the start of a set of choices Anki can display,
+`|` is used to separate the different options, and `)` is used to indicate the
+end of the set. The result is either "Total Order" or "Linear Order" at time
+of prompting.
+
+## Configuration
+
+TODO
+
+## Nesting
+
+TODO
--- a/parser.py
+++ b/parser.py
@ -0,0 +1,121 @@
+from dataclasses import dataclass
+from typing import Optional, Union
+
+import copy
+import enum
+import random
+
+
+START_TAG = "'("
+END_TAG = ")"
+CHOICE_TAG = "|"
+
+
+class Tag(enum.Enum):
+    START = 0
+    END = 1
+    CHOICE = 2
+
+
+Token = Union[Tag, str]
+
+
+@dataclass
+class ParserState:
+    """Convenience class used when traversing our tokenized stream."""
+
+    starts: int
+    pos: int
+    tokens: list[Token]
+
+
+def _matches_at(arg: str, target: str, pos: int = 0) -> bool:
+    """Check a substring matches the @target parameter."""
+    return arg[pos : pos + len(target)] == target
+
+
+def _label_tokens(arg: str) -> ParserState:
+    """Primary lexing function which traverses our stream and assigns initial
+    token labels.
+
+    Note this is a greedy algorithm so it is possible we incorrectly label
+    tokens as 'START'. For instance, consider a start tag of "'(". Then running
+
+    >>> _label_tokens(arg="hello'(")
+
+    will yield a token stream like ["hello", START] when we should have just a
+    single entry "hello'(". This gets corrected in `_relabel_starts`.
+    """
+    state = ParserState(starts=0, pos=0, tokens=[])
+    while state.pos < len(arg):
+        if _matches_at(arg, target=START_TAG, pos=state.pos):
+            state.tokens.append(Tag.START)
+            state.starts += 1
+            state.pos += len(START_TAG)
+        elif state.starts and _matches_at(arg, target=END_TAG, pos=state.pos):
+            state.tokens.append(Tag.END)
+            state.starts -= 1
+            state.pos += len(END_TAG)
+        elif state.starts and _matches_at(arg, target=CHOICE_TAG, pos=state.pos):
+            state.tokens.append(Tag.CHOICE)
+            state.pos += 1
+        else:
+            state.tokens.append(arg[state.pos])
+            state.pos += 1
+    return state
+
+
+def _relabel_starts(arg: str, state: ParserState) -> ParserState:
+    """Relabels 'START' tags that may have been labeled incorrectly."""
+    new_state = copy.copy(state)
+    if not new_state.starts:
+        return new_state
+    for i, token in enumerate(reversed(new_state.tokens)):
+        if token != Tag.START:
+            continue
+        index = len(new_state.tokens) - i - 1
+        new_state.tokens[index] = START_TAG
+        new_state.starts -= 1
+        if not new_state.starts:
+            break
+    return new_state
+
+
+def _group_tokens(state: ParserState) -> list[Token]:
+    """Aggregate adjacent strings together into a single token."""
+    new_tokens: list[Token] = []
+    for token in state.tokens:
+        if new_tokens and isinstance(token, str) and isinstance(new_tokens[-1], str):
+            new_tokens[-1] += token
+        else:
+            new_tokens.append(token)
+    return new_tokens
+
+
+def _tokenize(arg: str) -> list[Token]:
+    """Break string into token stream for easier handling."""
+    state = _label_tokens(arg)
+    state = _relabel_starts(arg, state)
+    return _group_tokens(state)
+
+
+def run_parser(arg: str) -> str:
+    """Find all "choice" selections within the given @arg.
+
+    For instance, assuming a START, END, and CHOICE of "'(", ")", and "|"
+    respectively, parsing "'(hello|world)" yields either "hello" or "world".
+    """
+    tokens = _tokenize(arg)
+    stack: list[list[str]] = [[]]
+    for token in tokens:
+        if token is Tag.START:
+            stack.append([])
+        elif token is Tag.END:
+            ts = stack.pop()
+            stack[-1].append(random.choice(ts))
+        elif token is Tag.CHOICE:
+            pass
+        else:
+            stack[-1].append(token)
+    assert len(stack) == 1, "Stack is larger than a single element"
+    return "".join(stack[0])