Add notes on POSIX regexps.
parent
14595b857d
commit
30edc2dbdc
|
@ -16,12 +16,39 @@
|
||||||
"Image Occlusion": "Occlusion",
|
"Image Occlusion": "Occlusion",
|
||||||
"Image Occlusion Enhanced": "ID (hidden)"
|
"Image Occlusion Enhanced": "ID (hidden)"
|
||||||
},
|
},
|
||||||
"CONTEXT_FIELDS": {},
|
"CONTEXT_FIELDS": {
|
||||||
"FOLDER_DECKS": {},
|
"Basic": "Context",
|
||||||
"FOLDER_TAGS": {},
|
"Cloze": "Context"
|
||||||
|
},
|
||||||
|
"FOLDER_DECKS": {
|
||||||
|
"algorithms": "",
|
||||||
|
"bash": "",
|
||||||
|
"c": "",
|
||||||
|
"gawk": "",
|
||||||
|
"journal": "",
|
||||||
|
"logic": "",
|
||||||
|
"lua": "",
|
||||||
|
"nix": "",
|
||||||
|
"posix": "",
|
||||||
|
"templates": "",
|
||||||
|
"algorithms/sorting": ""
|
||||||
|
},
|
||||||
|
"FOLDER_TAGS": {
|
||||||
|
"algorithms": "",
|
||||||
|
"bash": "",
|
||||||
|
"c": "",
|
||||||
|
"gawk": "",
|
||||||
|
"journal": "",
|
||||||
|
"logic": "",
|
||||||
|
"lua": "",
|
||||||
|
"nix": "",
|
||||||
|
"posix": "",
|
||||||
|
"templates": "",
|
||||||
|
"algorithms/sorting": ""
|
||||||
|
},
|
||||||
"Syntax": {
|
"Syntax": {
|
||||||
"Begin Note": "START",
|
"Begin Note": "%%ANKI",
|
||||||
"End Note": "END",
|
"End Note": "END%%",
|
||||||
"Begin Inline Note": "STARTI",
|
"Begin Inline Note": "STARTI",
|
||||||
"End Inline Note": "ENDI",
|
"End Inline Note": "ENDI",
|
||||||
"Target Deck Line": "TARGET DECK",
|
"Target Deck Line": "TARGET DECK",
|
||||||
|
@ -31,13 +58,13 @@
|
||||||
},
|
},
|
||||||
"Defaults": {
|
"Defaults": {
|
||||||
"Scan Directory": "",
|
"Scan Directory": "",
|
||||||
"Tag": "Obsidian_to_Anki",
|
"Tag": "",
|
||||||
"Deck": "Default",
|
"Deck": "Obsidian",
|
||||||
"Scheduling Interval": 0,
|
"Scheduling Interval": 0,
|
||||||
"Add File Link": false,
|
"Add File Link": false,
|
||||||
"Add Context": false,
|
"Add Context": true,
|
||||||
"CurlyCloze": false,
|
"CurlyCloze": true,
|
||||||
"CurlyCloze - Highlights to Clozes": false,
|
"CurlyCloze - Highlights to Clozes": true,
|
||||||
"ID Comments": true,
|
"ID Comments": true,
|
||||||
"Add Obsidian Tags": false
|
"Add Obsidian Tags": false
|
||||||
},
|
},
|
||||||
|
@ -53,10 +80,10 @@
|
||||||
"bash/index.md": "3b5296277f095acdf16655adcdf524af",
|
"bash/index.md": "3b5296277f095acdf16655adcdf524af",
|
||||||
"bash/prompts.md": "61cb877e68da040a15b85af76b1f68ba",
|
"bash/prompts.md": "61cb877e68da040a15b85af76b1f68ba",
|
||||||
"bash/quoting.md": "b1d8869a91001f8b22f0cdc54d806f61",
|
"bash/quoting.md": "b1d8869a91001f8b22f0cdc54d806f61",
|
||||||
"bash/robustness.md": "a1d0d334939b54cca4bdfd2fd8ca27f0",
|
"bash/robustness.md": "7ab094b95ba2bfa885adba8e9efedf68",
|
||||||
"bash/shebang.md": "9006547710f9a079a3666169fbeda7aa",
|
"bash/shebang.md": "9006547710f9a079a3666169fbeda7aa",
|
||||||
"c/escape-sequences.md": "07f0811b0fff14f54f78abc33f2e6606",
|
"c/escape-sequences.md": "07f0811b0fff14f54f78abc33f2e6606",
|
||||||
"c/index.md": "d41d8cd98f00b204e9800998ecf8427e",
|
"c/index.md": "aa8a34c62e7bc284ff589e28609222dc",
|
||||||
"gawk/index.md": "0263448c8ae1ecfc0eacc4788f8402e9",
|
"gawk/index.md": "0263448c8ae1ecfc0eacc4788f8402e9",
|
||||||
"gawk/variables.md": "4482c297e7f4f5987f42f1926a880ca7",
|
"gawk/variables.md": "4482c297e7f4f5987f42f1926a880ca7",
|
||||||
"index.md": "e48e895feeed7046425bb2ee15419770",
|
"index.md": "e48e895feeed7046425bb2ee15419770",
|
||||||
|
@ -64,14 +91,17 @@
|
||||||
"journal/2024-02-01.md": "3aa232387d2dc662384976fd116888eb",
|
"journal/2024-02-01.md": "3aa232387d2dc662384976fd116888eb",
|
||||||
"journal/2024-02-02.md": "a3b222daee8a50bce4cbac699efc7180",
|
"journal/2024-02-02.md": "a3b222daee8a50bce4cbac699efc7180",
|
||||||
"journal/2024-02-03.md": "c6d411f0e2e964270399dd3a99f48382",
|
"journal/2024-02-03.md": "c6d411f0e2e964270399dd3a99f48382",
|
||||||
"logic/index.md": "d41d8cd98f00b204e9800998ecf8427e",
|
"logic/index.md": "3084b41fe1451259a0cf3e54560c2e85",
|
||||||
"logic/propositional.md": "45aee8bf688aa8fef4b136145085d38a",
|
"logic/propositional.md": "45aee8bf688aa8fef4b136145085d38a",
|
||||||
"lua/index.md": "d41d8cd98f00b204e9800998ecf8427e",
|
"lua/index.md": "26632dae1f852519e2f1af11d65c34eb",
|
||||||
"nix/callPackage.md": "59796c480e2856fa7491f62ceb7e3c9c",
|
"nix/callPackage.md": "59796c480e2856fa7491f62ceb7e3c9c",
|
||||||
"nix/index.md": "dd5ddd19e95d9bdbe020c68974d77a33",
|
"nix/index.md": "dd5ddd19e95d9bdbe020c68974d77a33",
|
||||||
"posix/index.md": "f7b1ae55f8f5e8f50f89738b1aca9111",
|
"posix/index.md": "f7b1ae55f8f5e8f50f89738b1aca9111",
|
||||||
"posix/signals.md": "2120ddd933fc0d57abb93c33f639afd8",
|
"posix/signals.md": "2120ddd933fc0d57abb93c33f639afd8",
|
||||||
"templates/daily.md": "7866014e730e85683155207a02e367d8"
|
"templates/daily.md": "7866014e730e85683155207a02e367d8",
|
||||||
|
"posix/regexp.md": "d7effae06677d559b15180ce30f1d306",
|
||||||
|
"journal/2024-02-04.md": "e2b5678fc53d7284b71ed6820c02b954",
|
||||||
|
"gawk/regexp.md": "2dbc2548ed9212ddac8e8f66d979b5b7"
|
||||||
},
|
},
|
||||||
"fields_dict": {
|
"fields_dict": {
|
||||||
"Basic": [
|
"Basic": [
|
||||||
|
|
|
@ -60,6 +60,7 @@ It's interesting to think what else can be used as a measure of a command's robu
|
||||||
* Perhaps a program waits a specified amount of time before input is available. The command's success is now externally determined.
|
* Perhaps a program waits a specified amount of time before input is available. The command's success is now externally determined.
|
||||||
* Locale-aware functionality
|
* Locale-aware functionality
|
||||||
* Consider for instance [[gawk/index|gawk]]'s `\u` [[escape-sequences|sequence]] which targets characters in the current locale's character set as opposed to specifically Unicode.
|
* Consider for instance [[gawk/index|gawk]]'s `\u` [[escape-sequences|sequence]] which targets characters in the current locale's character set as opposed to specifically Unicode.
|
||||||
|
* POSIX standard [[posix/regexp#Character Classes|character classes]] serve as another example.
|
||||||
|
|
||||||
The above scenarios are what makes something like [[nix/index|nix]] so compelling.
|
The above scenarios are what makes something like [[nix/index|nix]] so compelling.
|
||||||
|
|
||||||
|
|
|
@ -0,0 +1,25 @@
|
||||||
|
---
|
||||||
|
title: Regular Expressions
|
||||||
|
TARGET DECK: Obsidian::STEM
|
||||||
|
FILE TAGS: linux::cli gawk regexp
|
||||||
|
tags:
|
||||||
|
- gawk
|
||||||
|
---
|
||||||
|
|
||||||
|
## Overview
|
||||||
|
|
||||||
|
Most `awk` patterns are regular expressions delimited with `/`. We can use `~` and `!~` to perform more complicated regexp filtering:
|
||||||
|
|
||||||
|
```bash
|
||||||
|
# Matches any line with `li` somewhere.
|
||||||
|
$ awk '/li/' data
|
||||||
|
$ awk `$0 ~ /li/` data
|
||||||
|
# Matches any line with `li` somewhere in the first field.
|
||||||
|
$ awk '$1 ~ /li/' data
|
||||||
|
```
|
||||||
|
|
||||||
|
`awk`'s implementation of regexps are a superset of [[posix/regexp|POSIX EREs]].
|
||||||
|
|
||||||
|
## References
|
||||||
|
|
||||||
|
* Robbins, Arnold D. “GAWK: Effective AWK Programming,” October 2023. [https://www.gnu.org/software/gawk/manual/gawk.pdf](https://www.gnu.org/software/gawk/manual/gawk.pdf)
|
|
@ -0,0 +1,11 @@
|
||||||
|
---
|
||||||
|
title: "2024-02-04"
|
||||||
|
---
|
||||||
|
|
||||||
|
- [ ] Anki Flashcards
|
||||||
|
- [x] KoL
|
||||||
|
- [ ] Sheet Music (10 min.)
|
||||||
|
- [ ] OGS (1 Life & Death Problem)
|
||||||
|
- [ ] Korean (Read 1 Story)
|
||||||
|
- [ ] Interview Prep (1 Practice Problem)
|
||||||
|
- [ ] Log Work Hours (Max 3 hours)
|
|
@ -0,0 +1,279 @@
|
||||||
|
---
|
||||||
|
title: Regular Expressions
|
||||||
|
TARGET DECK: Obsidian::STEM
|
||||||
|
FILE TAGS: regexp
|
||||||
|
tags:
|
||||||
|
- regexp
|
||||||
|
---
|
||||||
|
|
||||||
|
## Overview
|
||||||
|
|
||||||
|
The following ERE (**E**xtended **R**egular **E**xpression) operators were defined to achieve consistency between programs like `grep`, `sed`, and `awk`.
|
||||||
|
|
||||||
|
%%ANKI
|
||||||
|
Basic
|
||||||
|
What is the POSIX ERE standard?
|
||||||
|
Back: The **E**xtended **R**egular **E**xpression standard. A standard based off of regexps accepted by `egrep`.
|
||||||
|
Reference: “POSIX Basic Regular Expressions,” accessed February 4, 2024, [https://en.wikibooks.org/wiki/Regular_Expressions/POSIX_Basic_Regular_Expressions](https://en.wikibooks.org/wiki/Regular_Expressions/POSIX_Basic_Regular_Expressions).
|
||||||
|
<!--ID: 1707050923589-->
|
||||||
|
END%%
|
||||||
|
|
||||||
|
* `.` matches any single character.
|
||||||
|
* There exist application-specific exclusions. For instance, newlines and the `NUL` character are often ignored.
|
||||||
|
|
||||||
|
%%ANKI
|
||||||
|
Cloze
|
||||||
|
The {`.`} operator matches {any single character}.
|
||||||
|
Reference: “POSIX Basic Regular Expressions,” accessed February 4, 2024, [https://en.wikibooks.org/wiki/Regular_Expressions/POSIX_Basic_Regular_Expressions](https://en.wikibooks.org/wiki/Regular_Expressions/POSIX_Basic_Regular_Expressions).
|
||||||
|
<!--ID: 1707050923593-->
|
||||||
|
END%%
|
||||||
|
|
||||||
|
%%ANKI
|
||||||
|
Basic
|
||||||
|
What two common exclusions are made with `.`?
|
||||||
|
Back: Newlines and the `NUL` character.
|
||||||
|
Reference: Robbins, Arnold D. “GAWK: Effective AWK Programming,” October 2023. [https://www.gnu.org/software/gawk/manual/gawk.pdf](https://www.gnu.org/software/gawk/manual/gawk.pdf)
|
||||||
|
<!--ID: 1707050923596-->
|
||||||
|
END%%
|
||||||
|
|
||||||
|
* `[...]`, the **bracket expression**, matches any enclosed character.
|
||||||
|
* An optional `-` can be included to denote a range.
|
||||||
|
* `-` is treated literally if its the first or last specified character.
|
||||||
|
* `]` is treated literally if its the first specified character.
|
||||||
|
* `^` complements the set if its the first specified character.
|
||||||
|
|
||||||
|
%%ANKI
|
||||||
|
Basic
|
||||||
|
What name is given to the `[...]` operator?
|
||||||
|
Back: The bracket expression.
|
||||||
|
Reference: “POSIX Basic Regular Expressions,” accessed February 4, 2024, [https://en.wikibooks.org/wiki/Regular_Expressions/POSIX_Basic_Regular_Expressions](https://en.wikibooks.org/wiki/Regular_Expressions/POSIX_Basic_Regular_Expressions).
|
||||||
|
<!--ID: 1707050923600-->
|
||||||
|
END%%
|
||||||
|
|
||||||
|
%%ANKI
|
||||||
|
Basic
|
||||||
|
What three characters are interpreted specially in a bracket expression?
|
||||||
|
Back: `^`, `-`, and `]`
|
||||||
|
Reference: “POSIX Basic Regular Expressions,” accessed February 4, 2024, [https://en.wikibooks.org/wiki/Regular_Expressions/POSIX_Basic_Regular_Expressions](https://en.wikibooks.org/wiki/Regular_Expressions/POSIX_Basic_Regular_Expressions).
|
||||||
|
<!--ID: 1707050923605-->
|
||||||
|
END%%
|
||||||
|
|
||||||
|
%%ANKI
|
||||||
|
Basic
|
||||||
|
When is `-` interpreted literally in a bracket expression?
|
||||||
|
Back: When it is the first or last specified character.
|
||||||
|
Reference: “POSIX Basic Regular Expressions,” accessed February 4, 2024, [https://en.wikibooks.org/wiki/Regular_Expressions/POSIX_Basic_Regular_Expressions](https://en.wikibooks.org/wiki/Regular_Expressions/POSIX_Basic_Regular_Expressions).
|
||||||
|
<!--ID: 1707050923610-->
|
||||||
|
END%%
|
||||||
|
|
||||||
|
%%ANKI
|
||||||
|
Basic
|
||||||
|
When is `^` interpreted literally in a bracket expression?
|
||||||
|
Back: When it is not the first specified character.
|
||||||
|
Reference: “POSIX Basic Regular Expressions,” accessed February 4, 2024, [https://en.wikibooks.org/wiki/Regular_Expressions/POSIX_Basic_Regular_Expressions](https://en.wikibooks.org/wiki/Regular_Expressions/POSIX_Basic_Regular_Expressions).
|
||||||
|
<!--ID: 1707050923615-->
|
||||||
|
END%%
|
||||||
|
|
||||||
|
%%ANKI
|
||||||
|
Basic
|
||||||
|
When is `]` interpreted literally in a bracket expression?
|
||||||
|
Back: When it is the first specified character.
|
||||||
|
Reference: “POSIX Basic Regular Expressions,” accessed February 4, 2024, [https://en.wikibooks.org/wiki/Regular_Expressions/POSIX_Basic_Regular_Expressions](https://en.wikibooks.org/wiki/Regular_Expressions/POSIX_Basic_Regular_Expressions).
|
||||||
|
<!--ID: 1707050923621-->
|
||||||
|
END%%
|
||||||
|
|
||||||
|
* `^` is the leading anchor. It matches the starting position of a string.
|
||||||
|
* `$` is the trailing anchor. It matches the ending position of a string.
|
||||||
|
|
||||||
|
%%ANKI
|
||||||
|
Cloze
|
||||||
|
The {`^`} operator matches {the starting position of a string}.
|
||||||
|
Reference: “POSIX Basic Regular Expressions,” accessed February 4, 2024, [https://en.wikibooks.org/wiki/Regular_Expressions/POSIX_Basic_Regular_Expressions](https://en.wikibooks.org/wiki/Regular_Expressions/POSIX_Basic_Regular_Expressions).
|
||||||
|
<!--ID: 1707050923629-->
|
||||||
|
END%%
|
||||||
|
|
||||||
|
%%ANKI
|
||||||
|
Cloze
|
||||||
|
The {`$`} operator matches {the ending position of a string}.
|
||||||
|
Reference: “POSIX Basic Regular Expressions,” accessed February 4, 2024, [https://en.wikibooks.org/wiki/Regular_Expressions/POSIX_Basic_Regular_Expressions](https://en.wikibooks.org/wiki/Regular_Expressions/POSIX_Basic_Regular_Expressions).
|
||||||
|
<!--ID: 1707050923635-->
|
||||||
|
END%%
|
||||||
|
|
||||||
|
%%ANKI
|
||||||
|
Basic
|
||||||
|
`^` and `$` belong to what operator category?
|
||||||
|
Back: Anchors
|
||||||
|
Reference: “POSIX Basic Regular Expressions,” accessed February 4, 2024, [https://en.wikibooks.org/wiki/Regular_Expressions/POSIX_Basic_Regular_Expressions](https://en.wikibooks.org/wiki/Regular_Expressions/POSIX_Basic_Regular_Expressions).
|
||||||
|
<!--ID: 1707050923643-->
|
||||||
|
END%%
|
||||||
|
|
||||||
|
* `*` matches the preceding element zero or more times.
|
||||||
|
* `+` matches the preceding element one or more times.
|
||||||
|
* `?` matches the preceding element zero or one times.
|
||||||
|
|
||||||
|
%%ANKI
|
||||||
|
Basic
|
||||||
|
What does the `*` operator do?
|
||||||
|
Back: Matches the preceding element zero or more times.
|
||||||
|
Reference: “POSIX Basic Regular Expressions,” accessed February 4, 2024, [https://en.wikibooks.org/wiki/Regular_Expressions/POSIX_Basic_Regular_Expressions](https://en.wikibooks.org/wiki/Regular_Expressions/POSIX_Basic_Regular_Expressions).
|
||||||
|
<!--ID: 1707050923650-->
|
||||||
|
END%%
|
||||||
|
|
||||||
|
%%ANKI
|
||||||
|
Basic
|
||||||
|
What does the `+` operator do?
|
||||||
|
Back: Matches the preceding element one or more times.
|
||||||
|
Reference: “POSIX Basic Regular Expressions,” accessed February 4, 2024, [https://en.wikibooks.org/wiki/Regular_Expressions/POSIX_Basic_Regular_Expressions](https://en.wikibooks.org/wiki/Regular_Expressions/POSIX_Basic_Regular_Expressions).
|
||||||
|
<!--ID: 1707050923656-->
|
||||||
|
END%%
|
||||||
|
|
||||||
|
%%ANKI
|
||||||
|
Basic
|
||||||
|
What does the `?` operator do?
|
||||||
|
Back: Matches the preceding element zero or one times.
|
||||||
|
Reference: “POSIX Basic Regular Expressions,” accessed February 4, 2024, [https://en.wikibooks.org/wiki/Regular_Expressions/POSIX_Basic_Regular_Expressions](https://en.wikibooks.org/wiki/Regular_Expressions/POSIX_Basic_Regular_Expressions).
|
||||||
|
<!--ID: 1707050923662-->
|
||||||
|
END%%
|
||||||
|
|
||||||
|
* `{n}`, an **interval expression**, matches the preceding element `n` times.
|
||||||
|
* `{n,}` matches the preceding element at least `n` times.
|
||||||
|
* `{n,m}` matches the preceding element between `n` and `m` times.
|
||||||
|
* Interval expressions cannot contain repetition counts `> 255`. Results are otherwise undefined.
|
||||||
|
|
||||||
|
%%ANKI
|
||||||
|
Basic
|
||||||
|
What name is given to the e.g. `{n,m}` operator?
|
||||||
|
Back: The interval expression.
|
||||||
|
Reference: Robbins, Arnold D. “GAWK: Effective AWK Programming,” October 2023. [https://www.gnu.org/software/gawk/manual/gawk.pdf](https://www.gnu.org/software/gawk/manual/gawk.pdf)
|
||||||
|
<!--ID: 1707050923669-->
|
||||||
|
END%%
|
||||||
|
|
||||||
|
%%ANKI
|
||||||
|
Basic
|
||||||
|
What does the `{n}` operator do?
|
||||||
|
Back: Matches the preceding element `n` times.
|
||||||
|
Reference: Robbins, Arnold D. “GAWK: Effective AWK Programming,” October 2023. [https://www.gnu.org/software/gawk/manual/gawk.pdf](https://www.gnu.org/software/gawk/manual/gawk.pdf)
|
||||||
|
<!--ID: 1707050923676-->
|
||||||
|
END%%
|
||||||
|
|
||||||
|
%%ANKI
|
||||||
|
Basic
|
||||||
|
What does the `{n,}` operator do?
|
||||||
|
Back: Matches the preceding element at least `n` times.
|
||||||
|
Reference: Robbins, Arnold D. “GAWK: Effective AWK Programming,” October 2023. [https://www.gnu.org/software/gawk/manual/gawk.pdf](https://www.gnu.org/software/gawk/manual/gawk.pdf)
|
||||||
|
<!--ID: 1707050923683-->
|
||||||
|
END%%
|
||||||
|
|
||||||
|
%%ANKI
|
||||||
|
Basic
|
||||||
|
What does the `{n,m}` operator do?
|
||||||
|
Back: Matches the preceding element between `n` and `m` times.
|
||||||
|
Reference: Robbins, Arnold D. “GAWK: Effective AWK Programming,” October 2023. [https://www.gnu.org/software/gawk/manual/gawk.pdf](https://www.gnu.org/software/gawk/manual/gawk.pdf)
|
||||||
|
<!--ID: 1707050923689-->
|
||||||
|
END%%
|
||||||
|
|
||||||
|
%%ANKI
|
||||||
|
Basic
|
||||||
|
What interval expression repetition counts lead to undefined behavior?
|
||||||
|
Back: Counts greater than `255`.
|
||||||
|
Reference: Robbins, Arnold D. “GAWK: Effective AWK Programming,” October 2023. [https://www.gnu.org/software/gawk/manual/gawk.pdf](https://www.gnu.org/software/gawk/manual/gawk.pdf)
|
||||||
|
<!--ID: 1707050923695-->
|
||||||
|
END%%
|
||||||
|
|
||||||
|
* `|` is the **alternation operator**. It allows specifying match alternatives.
|
||||||
|
|
||||||
|
%%ANKI
|
||||||
|
Basic
|
||||||
|
What name is given to the e.g. `|` operator?
|
||||||
|
Back: The alternation operator.
|
||||||
|
Reference: Robbins, Arnold D. “GAWK: Effective AWK Programming,” October 2023. [https://www.gnu.org/software/gawk/manual/gawk.pdf](https://www.gnu.org/software/gawk/manual/gawk.pdf)
|
||||||
|
<!--ID: 1707050923701-->
|
||||||
|
END%%
|
||||||
|
|
||||||
|
%%ANKI
|
||||||
|
Basic
|
||||||
|
What does the `|` operator do?
|
||||||
|
Back: Matches different regexp alternatives.
|
||||||
|
Reference: Robbins, Arnold D. “GAWK: Effective AWK Programming,” October 2023. [https://www.gnu.org/software/gawk/manual/gawk.pdf](https://www.gnu.org/software/gawk/manual/gawk.pdf)
|
||||||
|
<!--ID: 1707050923708-->
|
||||||
|
END%%
|
||||||
|
|
||||||
|
%%ANKI
|
||||||
|
Basic
|
||||||
|
Which regexp operator has the least precedence?
|
||||||
|
Back: `|`
|
||||||
|
Reference: Robbins, Arnold D. “GAWK: Effective AWK Programming,” October 2023. [https://www.gnu.org/software/gawk/manual/gawk.pdf](https://www.gnu.org/software/gawk/manual/gawk.pdf)
|
||||||
|
<!--ID: 1707050923713-->
|
||||||
|
END%%
|
||||||
|
|
||||||
|
## Character Classes
|
||||||
|
|
||||||
|
Notation for describing a class of characters specific to a given locale/character set.
|
||||||
|
|
||||||
|
%%ANKI
|
||||||
|
Basic
|
||||||
|
What inconsistency do character classes introduce?
|
||||||
|
Back: Matching characters are dependent on locale/character set.
|
||||||
|
Reference: Robbins, Arnold D. “GAWK: Effective AWK Programming,” October 2023. [https://www.gnu.org/software/gawk/manual/gawk.pdf](https://www.gnu.org/software/gawk/manual/gawk.pdf)
|
||||||
|
<!--ID: 1707050923719-->
|
||||||
|
END%%
|
||||||
|
|
||||||
|
%%ANKI
|
||||||
|
Basic
|
||||||
|
How are character classes denoted?
|
||||||
|
Back: `[:class:]`
|
||||||
|
Reference: Robbins, Arnold D. “GAWK: Effective AWK Programming,” October 2023. [https://www.gnu.org/software/gawk/manual/gawk.pdf](https://www.gnu.org/software/gawk/manual/gawk.pdf)
|
||||||
|
<!--ID: 1707050923724-->
|
||||||
|
END%%
|
||||||
|
|
||||||
|
Class | Similar To | Meaning
|
||||||
|
------------ | --------------- | ------------------------------------------------
|
||||||
|
`[:alnum:]` | `[A-Za-z0-9]` | Alphanumeric characters
|
||||||
|
`[:alpha:]` | `[A-Za-z]` | Alphabetic characters
|
||||||
|
`[:blank:]` | `[ \t]` | `' '` and `TAB` characters
|
||||||
|
`[:cntrl:]` | | Control characters
|
||||||
|
`[:digit:]` | `[0-9]` | Numeric characters
|
||||||
|
`[:graph:]` | `[^ [:cntrl:]]` | Printable and visible characters
|
||||||
|
`[:lower:]` | `[a-z]` | Lowercase alphabetic characters
|
||||||
|
`[:print:]` | `[ [:graph:]]` | Printable characters
|
||||||
|
`[:punct:]` | | All graphic characters except letters and digits
|
||||||
|
`[:space:]` | `[ \t\n\r\f\v]` | Whitespace characters
|
||||||
|
`[:upper:]` | `[A-Z]` | Uppercase alphabetic characters
|
||||||
|
`[:xdigit:]` | `[0-9A-Fa-f]` | Hexadecimal digits
|
||||||
|
|
||||||
|
%%ANKI
|
||||||
|
Basic
|
||||||
|
Generally speaking, what is a printable character?
|
||||||
|
Back: Characters that can be displayed on screen or printed on paper.
|
||||||
|
Reference: Robbins, Arnold D. “GAWK: Effective AWK Programming,” October 2023. [https://www.gnu.org/software/gawk/manual/gawk.pdf](https://www.gnu.org/software/gawk/manual/gawk.pdf)
|
||||||
|
<!--ID: 1707050923728-->
|
||||||
|
END%%
|
||||||
|
|
||||||
|
%%ANKI
|
||||||
|
Basic
|
||||||
|
Is `'a'` (i.e. the letter *a*) printable and/or visible?
|
||||||
|
Back: It is printable and visible.
|
||||||
|
Reference: Robbins, Arnold D. “GAWK: Effective AWK Programming,” October 2023. [https://www.gnu.org/software/gawk/manual/gawk.pdf](https://www.gnu.org/software/gawk/manual/gawk.pdf)
|
||||||
|
<!--ID: 1707050923732-->
|
||||||
|
END%%
|
||||||
|
|
||||||
|
%%ANKI
|
||||||
|
Basic
|
||||||
|
Is `' '` (i.e. the space character) printable and/or visible?
|
||||||
|
Back: It is printable but not visible.
|
||||||
|
Reference: Robbins, Arnold D. “GAWK: Effective AWK Programming,” October 2023. [https://www.gnu.org/software/gawk/manual/gawk.pdf](https://www.gnu.org/software/gawk/manual/gawk.pdf)
|
||||||
|
<!--ID: 1707050923736-->
|
||||||
|
END%%
|
||||||
|
|
||||||
|
%%ANKI
|
||||||
|
Basic
|
||||||
|
Is `'\t'` (i.e. the tab character) printable and/or visible?
|
||||||
|
Back: It is neither printable nor visible.
|
||||||
|
Reference: Robbins, Arnold D. “GAWK: Effective AWK Programming,” October 2023. [https://www.gnu.org/software/gawk/manual/gawk.pdf](https://www.gnu.org/software/gawk/manual/gawk.pdf)
|
||||||
|
<!--ID: 1707050923740-->
|
||||||
|
END%%
|
||||||
|
|
||||||
|
## References
|
||||||
|
|
||||||
|
* “POSIX Basic Regular Expressions,” accessed February 4, 2024, [https://en.wikibooks.org/wiki/Regular_Expressions/POSIX_Basic_Regular_Expressions](https://en.wikibooks.org/wiki/Regular_Expressions/POSIX_Basic_Regular_Expressions).
|
||||||
|
* Robbins, Arnold D. “GAWK: Effective AWK Programming,” October 2023. [https://www.gnu.org/software/gawk/manual/gawk.pdf](https://www.gnu.org/software/gawk/manual/gawk.pdf)
|
Loading…
Reference in New Issue