Partial state.

main
Joshua Potter 2022-07-02 19:01:12 -06:00
parent 54411fbf70
commit 0b629044a8
2 changed files with 157 additions and 0 deletions

36
README.md Normal file
View File

@ -0,0 +1,36 @@
# anki-synonyms
A simple [Anki](https://apps.ankiweb.net/) plugin that allows randomly choosing
different options for parts of prompts. This was designed to handle synonyms in
a clean way.
Consider a [total order](https://en.wikipedia.org/wiki/Total_order). What this
is does not matter; what it could also be called does. What some people call a
"total order", others call a "linear order". Though this example is simple, it
does highlight an issue - remembering the various synonyms used to describe
a concept is important for fluency.
As of now, to handle this situation, it is probably best to use two flashcards,
one with prompt "Total Order" and another with prompt "Linear Order". In some
cases though, it'd be nice if the flashcard could *choose* which term it shows
when it shows it. That is, it'd be nice to have a single card and allow Anki to
randomly choose to show "Total Order" *or* "Linear Order".
To do so, we can install this plugin and write the following:
```
'(Total|Linear) Order
```
Here, `'(` is used to indicate the start of a set of choices Anki can display,
`|` is used to separate the different options, and `)` is used to indicate the
end of the set. The result is either "Total Order" or "Linear Order" at time
of prompting.
## Configuration
TODO
## Nesting
TODO

121
parser.py Normal file
View File

@ -0,0 +1,121 @@
from dataclasses import dataclass
from typing import Optional, Union
import copy
import enum
import random
START_TAG = "'("
END_TAG = ")"
CHOICE_TAG = "|"
class Tag(enum.Enum):
START = 0
END = 1
CHOICE = 2
Token = Union[Tag, str]
@dataclass
class ParserState:
"""Convenience class used when traversing our tokenized stream."""
starts: int
pos: int
tokens: list[Token]
def _matches_at(arg: str, target: str, pos: int = 0) -> bool:
"""Check a substring matches the @target parameter."""
return arg[pos : pos + len(target)] == target
def _label_tokens(arg: str) -> ParserState:
"""Primary lexing function which traverses our stream and assigns initial
token labels.
Note this is a greedy algorithm so it is possible we incorrectly label
tokens as 'START'. For instance, consider a start tag of "'(". Then running
>>> _label_tokens(arg="hello'(")
will yield a token stream like ["hello", START] when we should have just a
single entry "hello'(". This gets corrected in `_relabel_starts`.
"""
state = ParserState(starts=0, pos=0, tokens=[])
while state.pos < len(arg):
if _matches_at(arg, target=START_TAG, pos=state.pos):
state.tokens.append(Tag.START)
state.starts += 1
state.pos += len(START_TAG)
elif state.starts and _matches_at(arg, target=END_TAG, pos=state.pos):
state.tokens.append(Tag.END)
state.starts -= 1
state.pos += len(END_TAG)
elif state.starts and _matches_at(arg, target=CHOICE_TAG, pos=state.pos):
state.tokens.append(Tag.CHOICE)
state.pos += 1
else:
state.tokens.append(arg[state.pos])
state.pos += 1
return state
def _relabel_starts(arg: str, state: ParserState) -> ParserState:
"""Relabels 'START' tags that may have been labeled incorrectly."""
new_state = copy.copy(state)
if not new_state.starts:
return new_state
for i, token in enumerate(reversed(new_state.tokens)):
if token != Tag.START:
continue
index = len(new_state.tokens) - i - 1
new_state.tokens[index] = START_TAG
new_state.starts -= 1
if not new_state.starts:
break
return new_state
def _group_tokens(state: ParserState) -> list[Token]:
"""Aggregate adjacent strings together into a single token."""
new_tokens: list[Token] = []
for token in state.tokens:
if new_tokens and isinstance(token, str) and isinstance(new_tokens[-1], str):
new_tokens[-1] += token
else:
new_tokens.append(token)
return new_tokens
def _tokenize(arg: str) -> list[Token]:
"""Break string into token stream for easier handling."""
state = _label_tokens(arg)
state = _relabel_starts(arg, state)
return _group_tokens(state)
def run_parser(arg: str) -> str:
"""Find all "choice" selections within the given @arg.
For instance, assuming a START, END, and CHOICE of "'(", ")", and "|"
respectively, parsing "'(hello|world)" yields either "hello" or "world".
"""
tokens = _tokenize(arg)
stack: list[list[str]] = [[]]
for token in tokens:
if token is Tag.START:
stack.append([])
elif token is Tag.END:
ts = stack.pop()
stack[-1].append(random.choice(ts))
elif token is Tag.CHOICE:
pass
else:
stack[-1].append(token)
assert len(stack) == 1, "Stack is larger than a single element"
return "".join(stack[0])