From 30edc2dbdcf8a7576fab5e26c4020377efaf3257 Mon Sep 17 00:00:00 2001 From: Joshua Potter Date: Sun, 4 Feb 2024 05:50:19 -0700 Subject: [PATCH] Add notes on POSIX regexps. --- .../plugins/obsidian-to-anki-plugin/data.json | 60 +++- notes/bash/robustness.md | 1 + notes/gawk/regexp.md | 25 ++ notes/journal/2024-02-04.md | 11 + notes/posix/regexp.md | 279 ++++++++++++++++++ 5 files changed, 361 insertions(+), 15 deletions(-) create mode 100644 notes/gawk/regexp.md create mode 100644 notes/journal/2024-02-04.md create mode 100644 notes/posix/regexp.md diff --git a/notes/.obsidian/plugins/obsidian-to-anki-plugin/data.json b/notes/.obsidian/plugins/obsidian-to-anki-plugin/data.json index 779999a..2c9b638 100644 --- a/notes/.obsidian/plugins/obsidian-to-anki-plugin/data.json +++ b/notes/.obsidian/plugins/obsidian-to-anki-plugin/data.json @@ -16,12 +16,39 @@ "Image Occlusion": "Occlusion", "Image Occlusion Enhanced": "ID (hidden)" }, - "CONTEXT_FIELDS": {}, - "FOLDER_DECKS": {}, - "FOLDER_TAGS": {}, + "CONTEXT_FIELDS": { + "Basic": "Context", + "Cloze": "Context" + }, + "FOLDER_DECKS": { + "algorithms": "", + "bash": "", + "c": "", + "gawk": "", + "journal": "", + "logic": "", + "lua": "", + "nix": "", + "posix": "", + "templates": "", + "algorithms/sorting": "" + }, + "FOLDER_TAGS": { + "algorithms": "", + "bash": "", + "c": "", + "gawk": "", + "journal": "", + "logic": "", + "lua": "", + "nix": "", + "posix": "", + "templates": "", + "algorithms/sorting": "" + }, "Syntax": { - "Begin Note": "START", - "End Note": "END", + "Begin Note": "%%ANKI", + "End Note": "END%%", "Begin Inline Note": "STARTI", "End Inline Note": "ENDI", "Target Deck Line": "TARGET DECK", @@ -31,13 +58,13 @@ }, "Defaults": { "Scan Directory": "", - "Tag": "Obsidian_to_Anki", - "Deck": "Default", + "Tag": "", + "Deck": "Obsidian", "Scheduling Interval": 0, "Add File Link": false, - "Add Context": false, - "CurlyCloze": false, - "CurlyCloze - Highlights to Clozes": false, + "Add Context": true, + "CurlyCloze": true, + "CurlyCloze - Highlights to Clozes": true, "ID Comments": true, "Add Obsidian Tags": false }, @@ -53,10 +80,10 @@ "bash/index.md": "3b5296277f095acdf16655adcdf524af", "bash/prompts.md": "61cb877e68da040a15b85af76b1f68ba", "bash/quoting.md": "b1d8869a91001f8b22f0cdc54d806f61", - "bash/robustness.md": "a1d0d334939b54cca4bdfd2fd8ca27f0", + "bash/robustness.md": "7ab094b95ba2bfa885adba8e9efedf68", "bash/shebang.md": "9006547710f9a079a3666169fbeda7aa", "c/escape-sequences.md": "07f0811b0fff14f54f78abc33f2e6606", - "c/index.md": "d41d8cd98f00b204e9800998ecf8427e", + "c/index.md": "aa8a34c62e7bc284ff589e28609222dc", "gawk/index.md": "0263448c8ae1ecfc0eacc4788f8402e9", "gawk/variables.md": "4482c297e7f4f5987f42f1926a880ca7", "index.md": "e48e895feeed7046425bb2ee15419770", @@ -64,14 +91,17 @@ "journal/2024-02-01.md": "3aa232387d2dc662384976fd116888eb", "journal/2024-02-02.md": "a3b222daee8a50bce4cbac699efc7180", "journal/2024-02-03.md": "c6d411f0e2e964270399dd3a99f48382", - "logic/index.md": "d41d8cd98f00b204e9800998ecf8427e", + "logic/index.md": "3084b41fe1451259a0cf3e54560c2e85", "logic/propositional.md": "45aee8bf688aa8fef4b136145085d38a", - "lua/index.md": "d41d8cd98f00b204e9800998ecf8427e", + "lua/index.md": "26632dae1f852519e2f1af11d65c34eb", "nix/callPackage.md": "59796c480e2856fa7491f62ceb7e3c9c", "nix/index.md": "dd5ddd19e95d9bdbe020c68974d77a33", "posix/index.md": "f7b1ae55f8f5e8f50f89738b1aca9111", "posix/signals.md": "2120ddd933fc0d57abb93c33f639afd8", - "templates/daily.md": "7866014e730e85683155207a02e367d8" + "templates/daily.md": "7866014e730e85683155207a02e367d8", + "posix/regexp.md": "d7effae06677d559b15180ce30f1d306", + "journal/2024-02-04.md": "e2b5678fc53d7284b71ed6820c02b954", + "gawk/regexp.md": "2dbc2548ed9212ddac8e8f66d979b5b7" }, "fields_dict": { "Basic": [ diff --git a/notes/bash/robustness.md b/notes/bash/robustness.md index d0d5688..67e5885 100644 --- a/notes/bash/robustness.md +++ b/notes/bash/robustness.md @@ -60,6 +60,7 @@ It's interesting to think what else can be used as a measure of a command's robu * Perhaps a program waits a specified amount of time before input is available. The command's success is now externally determined. * Locale-aware functionality * Consider for instance [[gawk/index|gawk]]'s `\u` [[escape-sequences|sequence]] which targets characters in the current locale's character set as opposed to specifically Unicode. + * POSIX standard [[posix/regexp#Character Classes|character classes]] serve as another example. The above scenarios are what makes something like [[nix/index|nix]] so compelling. diff --git a/notes/gawk/regexp.md b/notes/gawk/regexp.md new file mode 100644 index 0000000..8218a95 --- /dev/null +++ b/notes/gawk/regexp.md @@ -0,0 +1,25 @@ +--- +title: Regular Expressions +TARGET DECK: Obsidian::STEM +FILE TAGS: linux::cli gawk regexp +tags: + - gawk +--- + +## Overview + +Most `awk` patterns are regular expressions delimited with `/`. We can use `~` and `!~` to perform more complicated regexp filtering: + +```bash +# Matches any line with `li` somewhere. +$ awk '/li/' data +$ awk `$0 ~ /li/` data +# Matches any line with `li` somewhere in the first field. +$ awk '$1 ~ /li/' data +``` + +`awk`'s implementation of regexps are a superset of [[posix/regexp|POSIX EREs]]. + +## References + +* Robbins, Arnold D. “GAWK: Effective AWK Programming,” October 2023. [https://www.gnu.org/software/gawk/manual/gawk.pdf](https://www.gnu.org/software/gawk/manual/gawk.pdf) \ No newline at end of file diff --git a/notes/journal/2024-02-04.md b/notes/journal/2024-02-04.md new file mode 100644 index 0000000..2dc9240 --- /dev/null +++ b/notes/journal/2024-02-04.md @@ -0,0 +1,11 @@ +--- +title: "2024-02-04" +--- + +- [ ] Anki Flashcards +- [x] KoL +- [ ] Sheet Music (10 min.) +- [ ] OGS (1 Life & Death Problem) +- [ ] Korean (Read 1 Story) +- [ ] Interview Prep (1 Practice Problem) +- [ ] Log Work Hours (Max 3 hours) \ No newline at end of file diff --git a/notes/posix/regexp.md b/notes/posix/regexp.md new file mode 100644 index 0000000..98dc519 --- /dev/null +++ b/notes/posix/regexp.md @@ -0,0 +1,279 @@ +--- +title: Regular Expressions +TARGET DECK: Obsidian::STEM +FILE TAGS: regexp +tags: + - regexp +--- + +## Overview + +The following ERE (**E**xtended **R**egular **E**xpression) operators were defined to achieve consistency between programs like `grep`, `sed`, and `awk`. + +%%ANKI +Basic +What is the POSIX ERE standard? +Back: The **E**xtended **R**egular **E**xpression standard. A standard based off of regexps accepted by `egrep`. +Reference: “POSIX Basic Regular Expressions,” accessed February 4, 2024, [https://en.wikibooks.org/wiki/Regular_Expressions/POSIX_Basic_Regular_Expressions](https://en.wikibooks.org/wiki/Regular_Expressions/POSIX_Basic_Regular_Expressions). + +END%% + +* `.` matches any single character. + * There exist application-specific exclusions. For instance, newlines and the `NUL` character are often ignored. + +%%ANKI +Cloze +The {`.`} operator matches {any single character}. +Reference: “POSIX Basic Regular Expressions,” accessed February 4, 2024, [https://en.wikibooks.org/wiki/Regular_Expressions/POSIX_Basic_Regular_Expressions](https://en.wikibooks.org/wiki/Regular_Expressions/POSIX_Basic_Regular_Expressions). + +END%% + +%%ANKI +Basic +What two common exclusions are made with `.`? +Back: Newlines and the `NUL` character. +Reference: Robbins, Arnold D. “GAWK: Effective AWK Programming,” October 2023. [https://www.gnu.org/software/gawk/manual/gawk.pdf](https://www.gnu.org/software/gawk/manual/gawk.pdf) + +END%% + +* `[...]`, the **bracket expression**, matches any enclosed character. + * An optional `-` can be included to denote a range. + * `-` is treated literally if its the first or last specified character. + * `]` is treated literally if its the first specified character. + * `^` complements the set if its the first specified character. + +%%ANKI +Basic +What name is given to the `[...]` operator? +Back: The bracket expression. +Reference: “POSIX Basic Regular Expressions,” accessed February 4, 2024, [https://en.wikibooks.org/wiki/Regular_Expressions/POSIX_Basic_Regular_Expressions](https://en.wikibooks.org/wiki/Regular_Expressions/POSIX_Basic_Regular_Expressions). + +END%% + +%%ANKI +Basic +What three characters are interpreted specially in a bracket expression? +Back: `^`, `-`, and `]` +Reference: “POSIX Basic Regular Expressions,” accessed February 4, 2024, [https://en.wikibooks.org/wiki/Regular_Expressions/POSIX_Basic_Regular_Expressions](https://en.wikibooks.org/wiki/Regular_Expressions/POSIX_Basic_Regular_Expressions). + +END%% + +%%ANKI +Basic +When is `-` interpreted literally in a bracket expression? +Back: When it is the first or last specified character. +Reference: “POSIX Basic Regular Expressions,” accessed February 4, 2024, [https://en.wikibooks.org/wiki/Regular_Expressions/POSIX_Basic_Regular_Expressions](https://en.wikibooks.org/wiki/Regular_Expressions/POSIX_Basic_Regular_Expressions). + +END%% + +%%ANKI +Basic +When is `^` interpreted literally in a bracket expression? +Back: When it is not the first specified character. +Reference: “POSIX Basic Regular Expressions,” accessed February 4, 2024, [https://en.wikibooks.org/wiki/Regular_Expressions/POSIX_Basic_Regular_Expressions](https://en.wikibooks.org/wiki/Regular_Expressions/POSIX_Basic_Regular_Expressions). + +END%% + +%%ANKI +Basic +When is `]` interpreted literally in a bracket expression? +Back: When it is the first specified character. +Reference: “POSIX Basic Regular Expressions,” accessed February 4, 2024, [https://en.wikibooks.org/wiki/Regular_Expressions/POSIX_Basic_Regular_Expressions](https://en.wikibooks.org/wiki/Regular_Expressions/POSIX_Basic_Regular_Expressions). + +END%% + +* `^` is the leading anchor. It matches the starting position of a string. +* `$` is the trailing anchor. It matches the ending position of a string. + +%%ANKI +Cloze +The {`^`} operator matches {the starting position of a string}. +Reference: “POSIX Basic Regular Expressions,” accessed February 4, 2024, [https://en.wikibooks.org/wiki/Regular_Expressions/POSIX_Basic_Regular_Expressions](https://en.wikibooks.org/wiki/Regular_Expressions/POSIX_Basic_Regular_Expressions). + +END%% + +%%ANKI +Cloze +The {`$`} operator matches {the ending position of a string}. +Reference: “POSIX Basic Regular Expressions,” accessed February 4, 2024, [https://en.wikibooks.org/wiki/Regular_Expressions/POSIX_Basic_Regular_Expressions](https://en.wikibooks.org/wiki/Regular_Expressions/POSIX_Basic_Regular_Expressions). + +END%% + +%%ANKI +Basic +`^` and `$` belong to what operator category? +Back: Anchors +Reference: “POSIX Basic Regular Expressions,” accessed February 4, 2024, [https://en.wikibooks.org/wiki/Regular_Expressions/POSIX_Basic_Regular_Expressions](https://en.wikibooks.org/wiki/Regular_Expressions/POSIX_Basic_Regular_Expressions). + +END%% + +* `*` matches the preceding element zero or more times. +* `+` matches the preceding element one or more times. +* `?` matches the preceding element zero or one times. + +%%ANKI +Basic +What does the `*` operator do? +Back: Matches the preceding element zero or more times. +Reference: “POSIX Basic Regular Expressions,” accessed February 4, 2024, [https://en.wikibooks.org/wiki/Regular_Expressions/POSIX_Basic_Regular_Expressions](https://en.wikibooks.org/wiki/Regular_Expressions/POSIX_Basic_Regular_Expressions). + +END%% + +%%ANKI +Basic +What does the `+` operator do? +Back: Matches the preceding element one or more times. +Reference: “POSIX Basic Regular Expressions,” accessed February 4, 2024, [https://en.wikibooks.org/wiki/Regular_Expressions/POSIX_Basic_Regular_Expressions](https://en.wikibooks.org/wiki/Regular_Expressions/POSIX_Basic_Regular_Expressions). + +END%% + +%%ANKI +Basic +What does the `?` operator do? +Back: Matches the preceding element zero or one times. +Reference: “POSIX Basic Regular Expressions,” accessed February 4, 2024, [https://en.wikibooks.org/wiki/Regular_Expressions/POSIX_Basic_Regular_Expressions](https://en.wikibooks.org/wiki/Regular_Expressions/POSIX_Basic_Regular_Expressions). + +END%% + +* `{n}`, an **interval expression**, matches the preceding element `n` times. + * `{n,}` matches the preceding element at least `n` times. + * `{n,m}` matches the preceding element between `n` and `m` times. + * Interval expressions cannot contain repetition counts `> 255`. Results are otherwise undefined. + +%%ANKI +Basic +What name is given to the e.g. `{n,m}` operator? +Back: The interval expression. +Reference: Robbins, Arnold D. “GAWK: Effective AWK Programming,” October 2023. [https://www.gnu.org/software/gawk/manual/gawk.pdf](https://www.gnu.org/software/gawk/manual/gawk.pdf) + +END%% + +%%ANKI +Basic +What does the `{n}` operator do? +Back: Matches the preceding element `n` times. +Reference: Robbins, Arnold D. “GAWK: Effective AWK Programming,” October 2023. [https://www.gnu.org/software/gawk/manual/gawk.pdf](https://www.gnu.org/software/gawk/manual/gawk.pdf) + +END%% + +%%ANKI +Basic +What does the `{n,}` operator do? +Back: Matches the preceding element at least `n` times. +Reference: Robbins, Arnold D. “GAWK: Effective AWK Programming,” October 2023. [https://www.gnu.org/software/gawk/manual/gawk.pdf](https://www.gnu.org/software/gawk/manual/gawk.pdf) + +END%% + +%%ANKI +Basic +What does the `{n,m}` operator do? +Back: Matches the preceding element between `n` and `m` times. +Reference: Robbins, Arnold D. “GAWK: Effective AWK Programming,” October 2023. [https://www.gnu.org/software/gawk/manual/gawk.pdf](https://www.gnu.org/software/gawk/manual/gawk.pdf) + +END%% + +%%ANKI +Basic +What interval expression repetition counts lead to undefined behavior? +Back: Counts greater than `255`. +Reference: Robbins, Arnold D. “GAWK: Effective AWK Programming,” October 2023. [https://www.gnu.org/software/gawk/manual/gawk.pdf](https://www.gnu.org/software/gawk/manual/gawk.pdf) + +END%% + +* `|` is the **alternation operator**. It allows specifying match alternatives. + +%%ANKI +Basic +What name is given to the e.g. `|` operator? +Back: The alternation operator. +Reference: Robbins, Arnold D. “GAWK: Effective AWK Programming,” October 2023. [https://www.gnu.org/software/gawk/manual/gawk.pdf](https://www.gnu.org/software/gawk/manual/gawk.pdf) + +END%% + +%%ANKI +Basic +What does the `|` operator do? +Back: Matches different regexp alternatives. +Reference: Robbins, Arnold D. “GAWK: Effective AWK Programming,” October 2023. [https://www.gnu.org/software/gawk/manual/gawk.pdf](https://www.gnu.org/software/gawk/manual/gawk.pdf) + +END%% + +%%ANKI +Basic +Which regexp operator has the least precedence? +Back: `|` +Reference: Robbins, Arnold D. “GAWK: Effective AWK Programming,” October 2023. [https://www.gnu.org/software/gawk/manual/gawk.pdf](https://www.gnu.org/software/gawk/manual/gawk.pdf) + +END%% + +## Character Classes + +Notation for describing a class of characters specific to a given locale/character set. + +%%ANKI +Basic +What inconsistency do character classes introduce? +Back: Matching characters are dependent on locale/character set. +Reference: Robbins, Arnold D. “GAWK: Effective AWK Programming,” October 2023. [https://www.gnu.org/software/gawk/manual/gawk.pdf](https://www.gnu.org/software/gawk/manual/gawk.pdf) + +END%% + +%%ANKI +Basic +How are character classes denoted? +Back: `[:class:]` +Reference: Robbins, Arnold D. “GAWK: Effective AWK Programming,” October 2023. [https://www.gnu.org/software/gawk/manual/gawk.pdf](https://www.gnu.org/software/gawk/manual/gawk.pdf) + +END%% + +Class | Similar To | Meaning +------------ | --------------- | ------------------------------------------------ +`[:alnum:]` | `[A-Za-z0-9]` | Alphanumeric characters +`[:alpha:]` | `[A-Za-z]` | Alphabetic characters +`[:blank:]` | `[ \t]` | `' '` and `TAB` characters +`[:cntrl:]` | | Control characters +`[:digit:]` | `[0-9]` | Numeric characters +`[:graph:]` | `[^ [:cntrl:]]` | Printable and visible characters +`[:lower:]` | `[a-z]` | Lowercase alphabetic characters +`[:print:]` | `[ [:graph:]]` | Printable characters +`[:punct:]` | | All graphic characters except letters and digits +`[:space:]` | `[ \t\n\r\f\v]` | Whitespace characters +`[:upper:]` | `[A-Z]` | Uppercase alphabetic characters +`[:xdigit:]` | `[0-9A-Fa-f]` | Hexadecimal digits + +%%ANKI +Basic +Generally speaking, what is a printable character? +Back: Characters that can be displayed on screen or printed on paper. +Reference: Robbins, Arnold D. “GAWK: Effective AWK Programming,” October 2023. [https://www.gnu.org/software/gawk/manual/gawk.pdf](https://www.gnu.org/software/gawk/manual/gawk.pdf) + +END%% + +%%ANKI +Basic +Is `'a'` (i.e. the letter *a*) printable and/or visible? +Back: It is printable and visible. +Reference: Robbins, Arnold D. “GAWK: Effective AWK Programming,” October 2023. [https://www.gnu.org/software/gawk/manual/gawk.pdf](https://www.gnu.org/software/gawk/manual/gawk.pdf) + +END%% + +%%ANKI +Basic +Is `' '` (i.e. the space character) printable and/or visible? +Back: It is printable but not visible. +Reference: Robbins, Arnold D. “GAWK: Effective AWK Programming,” October 2023. [https://www.gnu.org/software/gawk/manual/gawk.pdf](https://www.gnu.org/software/gawk/manual/gawk.pdf) + +END%% + +%%ANKI +Basic +Is `'\t'` (i.e. the tab character) printable and/or visible? +Back: It is neither printable nor visible. +Reference: Robbins, Arnold D. “GAWK: Effective AWK Programming,” October 2023. [https://www.gnu.org/software/gawk/manual/gawk.pdf](https://www.gnu.org/software/gawk/manual/gawk.pdf) + +END%% + +## References + +* “POSIX Basic Regular Expressions,” accessed February 4, 2024, [https://en.wikibooks.org/wiki/Regular_Expressions/POSIX_Basic_Regular_Expressions](https://en.wikibooks.org/wiki/Regular_Expressions/POSIX_Basic_Regular_Expressions). +* Robbins, Arnold D. “GAWK: Effective AWK Programming,” October 2023. [https://www.gnu.org/software/gawk/manual/gawk.pdf](https://www.gnu.org/software/gawk/manual/gawk.pdf)