Add Apache airflow dev shell.

main
Joshua Potter 2024-01-21 10:59:38 -07:00
parent a4d83776c3
commit 0b6764b2fc
8 changed files with 304 additions and 0 deletions

55
specs/airflow/runner Executable file
View File

@ -0,0 +1,55 @@
#!/usr/bin/env bash
# Exit immediately if the script encounters a non-zero status.
set -e
# If set, Bash includes filenames beginning with a `.` in the results of
# filename expansion. The filenames `.` and `..` must always be matched
# explicitly, even if dotglob is set.
shopt -s dotglob
# ============================================================
# PROLOGUE
# ============================================================
# Create a new top-level directory as fallback in case $BUILD (defined below)
# is ever empty.
mkdir -p "/tmp/bs.postgres"
# Create an intermediate build directory. The final step of this script will
# copy the content from this directory to $OUT.
BUILD=$(mktemp -d -p "/tmp/bs.postgres")
if [ -z "$BUILD" ]; then
>&2 echo "Failed to create temp directory."
exit 1
fi
# Deletes the intermediate build directory on exit. We use a concatenation of
# the intermediate directory with the basename of the generated temp directory
# to ensure we never evaluate to root (i.e. `/`). That should never actually
# happen but a good habit to establish nonetheless.
function cleanup {
rm -r "/tmp/bs.postgres/$(basename "$BUILD")"
}
trap cleanup EXIT
# ============================================================
# BUILD
# ============================================================
# Copy template contents over to the intermediate build directory.
cp -r template/* "$BUILD"
# Explicitly set permissions on all copied files.
find "$BUILD" -type f -execdir chmod 644 {} +
find "$BUILD" -type d -execdir chmod 755 {} +
chmod 755 "$BUILD"/.githooks/pre-commit
# ============================================================
# EPILOGUE
# ============================================================
# Success! Copy contents to target directory.
cp -a "$BUILD"/* "$OUT"

View File

@ -0,0 +1,7 @@
#!/usr/bin/env bash
if command -v git > /dev/null && on_git_branch; then
git config --local core.hooksPath .githooks/
fi
use flake

View File

@ -0,0 +1,21 @@
#!/usr/bin/env bash
set -e
STAGED=$(
git --no-pager diff --name-only --no-color --cached --diff-filter=d |
# Remove quotations used to surrounding filenames with special characters.
sed -e "s/^\"//" -e "s/\"$//g"
)
TARGETS=()
while IFS= read -r FILENAME
do
if [[ "$FILENAME" =~ .*\.py$ ]]; then
TARGETS+=("${FILENAME}")
fi
done <<< "$STAGED"
if (( ${#TARGETS[@]} )); then
black --quiet "${TARGETS[@]}"
git add "${TARGETS[@]}"
fi

13
specs/airflow/template/.gitignore vendored Normal file
View File

@ -0,0 +1,13 @@
# The directory containing configuration files and SQLite database.
airflow/
# Directory used by `direnv` to hold `use flake`-generated profiles.
/.direnv/
# A symlink produced by default when running `nix build`.
/result
# Byte-compiled / optimized / DLL files
__pycache__/
*.py[cod]
*$py.class

View File

@ -0,0 +1,22 @@
# Airflow Dev Shell
This is a small flake template for experimenting with [Apache Airflow](https://airflow.apache.org/)
(version 2.7.3). [direnv](https://direnv.net/) can be used to a launch a dev
shell upon entering this directory (refer to `.envrc`). Otherwise run via:
```bash
$ nix develop
```
Once your environment has booted, you can get started with the UI by running:
```bash
$ airflow standalone
```
The webserver lives at `localhost:8080` by default. The username is `admin` and
the generated password can be found by running:
```bash
$ cat airflow/standalone_admin_password.txt
```
or looking through the console output.
A basic DAG is included in the top-level `dags/` directory. Airflow discovers
this DAG with the help of the `AIRFLOW__CORE__DAGS_FOLDER` environment variable,
set within the nix dev shell.

View File

@ -0,0 +1,82 @@
from __future__ import annotations
import textwrap
from datetime import datetime, timedelta
from airflow.models.dag import DAG
from airflow.operators.bash import BashOperator
with DAG(
"example_custom",
# These args will get passed on to each operator. You can override them on
# a per-task basis during operator initialization.
default_args={
"depends_on_past": False,
"email": ["airflow@example.com"],
"email_on_failure": False,
"email_on_retry": False,
"retries": 1,
"retry_delay": timedelta(minutes=5),
# 'queue': 'bash_queue',
# 'pool': 'backfill',
# 'priority_weight': 10,
# 'end_date': datetime(2016, 1, 1),
# 'wait_for_downstream': False,
# 'sla': timedelta(hours=2),
# 'execution_timeout': timedelta(seconds=300),
# 'on_failure_callback': some_function, # or list of functions
# 'on_success_callback': some_other_function, # or list of functions
# 'on_retry_callback': another_function, # or list of functions
# 'sla_miss_callback': yet_another_function, # or list of functions
# 'trigger_rule': 'all_success'
},
description="An example DAG",
schedule=timedelta(days=1),
start_date=datetime(2021, 1, 1),
catchup=False,
tags=["example"],
) as dag:
# t1, t2 and t3 are examples of tasks created by instantiating operators.
t1 = BashOperator(
task_id="print_date",
bash_command="date",
)
t2 = BashOperator(
task_id="sleep",
depends_on_past=False,
bash_command="sleep 5",
retries=3,
)
t1.doc_md = textwrap.dedent(
"""\
#### Task Documentation
You can document your task using the attributes `doc_md` (markdown),
`doc` (plain text), `doc_rst`, `doc_json`, `doc_yaml` which gets
rendered in the UI's Task Instance Details page.
![img](http://montcs.bloomu.edu/~bobmon/Semesters/2012-01/491/import%20soul.png)
**Image Credit:** Randall Munroe, [XKCD](https://xkcd.com/license.html)
"""
)
dag.doc_md = """
This is a documentation placed anywhere
"""
templated_command = textwrap.dedent(
"""
{% for i in range(5) %}
echo "{{ ds }}"
echo "{{ macros.ds_add(ds, 7)}}"
{% endfor %}
"""
)
t3 = BashOperator(
task_id="templated",
depends_on_past=False,
bash_command=templated_command,
)
t1 >> [t2, t3]

View File

@ -0,0 +1,61 @@
{
"nodes": {
"flake-utils": {
"inputs": {
"systems": "systems"
},
"locked": {
"lastModified": 1694529238,
"narHash": "sha256-zsNZZGTGnMOf9YpHKJqMSsa0dXbfmxeoJ7xHlrt+xmY=",
"owner": "numtide",
"repo": "flake-utils",
"rev": "ff7b65b44d01cf9ba6a71320833626af21126384",
"type": "github"
},
"original": {
"owner": "numtide",
"repo": "flake-utils",
"type": "github"
}
},
"nixpkgs": {
"locked": {
"lastModified": 1700794826,
"narHash": "sha256-RyJTnTNKhO0yqRpDISk03I/4A67/dp96YRxc86YOPgU=",
"owner": "NixOS",
"repo": "nixpkgs",
"rev": "5a09cb4b393d58f9ed0d9ca1555016a8543c2ac8",
"type": "github"
},
"original": {
"owner": "NixOS",
"ref": "nixos-unstable",
"repo": "nixpkgs",
"type": "github"
}
},
"root": {
"inputs": {
"flake-utils": "flake-utils",
"nixpkgs": "nixpkgs"
}
},
"systems": {
"locked": {
"lastModified": 1681028828,
"narHash": "sha256-Vy1rq5AaRuLzOxct8nz4T6wlgyUR7zLU309k9mBC768=",
"owner": "nix-systems",
"repo": "default",
"rev": "da67096a3b9bf56a91d16901293e51ba5b49a27e",
"type": "github"
},
"original": {
"owner": "nix-systems",
"repo": "default",
"type": "github"
}
}
},
"root": "root",
"version": 7
}

View File

@ -0,0 +1,43 @@
{
description = ''
An Airflow dev shell.
To generate a copy of this template elsewhere, install
[bootstrap](https://github.com/jrpotter/bootstrap) and run:
```bash
$ bootstrap airflow
```
'';
inputs = {
flake-utils.url = "github:numtide/flake-utils";
nixpkgs.url = "github:NixOS/nixpkgs/nixos-unstable";
};
outputs = { nixpkgs, flake-utils, ... }:
flake-utils.lib.eachDefaultSystem (system:
let
pkgs = nixpkgs.legacyPackages.${system};
in
{
devShells.default = pkgs.mkShell {
AIRFLOW_HOME = "$PWD/airflow";
AIRFLOW__CORE__DAGS_FOLDER = "$PWD/dags";
LD_LIBRARY_PATH = "${pkgs.stdenv.cc.cc.lib}/lib";
packages = (with pkgs; [
apache-airflow
] ++ (with python311Packages; [
black
debugpy
mccabe
mypy
pycodestyle
pyflakes
pyls-isort
python-lsp-black
python-lsp-server
]));
};
});
}