diff --git a/specs/airflow/runner b/specs/airflow/runner new file mode 100755 index 0000000..9eda5ea --- /dev/null +++ b/specs/airflow/runner @@ -0,0 +1,55 @@ +#!/usr/bin/env bash + +# Exit immediately if the script encounters a non-zero status. +set -e + +# If set, Bash includes filenames beginning with a `.` in the results of +# filename expansion. The filenames `.` and `..` must always be matched +# explicitly, even if dotglob is set. +shopt -s dotglob + +# ============================================================ +# PROLOGUE +# ============================================================ + +# Create a new top-level directory as fallback in case $BUILD (defined below) +# is ever empty. +mkdir -p "/tmp/bs.postgres" + +# Create an intermediate build directory. The final step of this script will +# copy the content from this directory to $OUT. +BUILD=$(mktemp -d -p "/tmp/bs.postgres") + +if [ -z "$BUILD" ]; then + >&2 echo "Failed to create temp directory." + exit 1 +fi + +# Deletes the intermediate build directory on exit. We use a concatenation of +# the intermediate directory with the basename of the generated temp directory +# to ensure we never evaluate to root (i.e. `/`). That should never actually +# happen but a good habit to establish nonetheless. +function cleanup { + rm -r "/tmp/bs.postgres/$(basename "$BUILD")" +} + +trap cleanup EXIT + +# ============================================================ +# BUILD +# ============================================================ + +# Copy template contents over to the intermediate build directory. +cp -r template/* "$BUILD" + +# Explicitly set permissions on all copied files. +find "$BUILD" -type f -execdir chmod 644 {} + +find "$BUILD" -type d -execdir chmod 755 {} + +chmod 755 "$BUILD"/.githooks/pre-commit + +# ============================================================ +# EPILOGUE +# ============================================================ + +# Success! Copy contents to target directory. +cp -a "$BUILD"/* "$OUT" diff --git a/specs/airflow/template/.envrc b/specs/airflow/template/.envrc new file mode 100644 index 0000000..817939c --- /dev/null +++ b/specs/airflow/template/.envrc @@ -0,0 +1,7 @@ +#!/usr/bin/env bash + +if command -v git > /dev/null && on_git_branch; then + git config --local core.hooksPath .githooks/ +fi + +use flake diff --git a/specs/airflow/template/.githooks/pre-commit b/specs/airflow/template/.githooks/pre-commit new file mode 100755 index 0000000..eecd520 --- /dev/null +++ b/specs/airflow/template/.githooks/pre-commit @@ -0,0 +1,21 @@ +#!/usr/bin/env bash +set -e + +STAGED=$( + git --no-pager diff --name-only --no-color --cached --diff-filter=d | + # Remove quotations used to surrounding filenames with special characters. + sed -e "s/^\"//" -e "s/\"$//g" +) + +TARGETS=() +while IFS= read -r FILENAME +do + if [[ "$FILENAME" =~ .*\.py$ ]]; then + TARGETS+=("${FILENAME}") + fi +done <<< "$STAGED" + +if (( ${#TARGETS[@]} )); then + black --quiet "${TARGETS[@]}" + git add "${TARGETS[@]}" +fi diff --git a/specs/airflow/template/.gitignore b/specs/airflow/template/.gitignore new file mode 100644 index 0000000..f845a3d --- /dev/null +++ b/specs/airflow/template/.gitignore @@ -0,0 +1,13 @@ +# The directory containing configuration files and SQLite database. +airflow/ + +# Directory used by `direnv` to hold `use flake`-generated profiles. +/.direnv/ + +# A symlink produced by default when running `nix build`. +/result + +# Byte-compiled / optimized / DLL files +__pycache__/ +*.py[cod] +*$py.class diff --git a/specs/airflow/template/README.md b/specs/airflow/template/README.md new file mode 100644 index 0000000..f5a14d6 --- /dev/null +++ b/specs/airflow/template/README.md @@ -0,0 +1,22 @@ +# Airflow Dev Shell + +This is a small flake template for experimenting with [Apache Airflow](https://airflow.apache.org/) +(version 2.7.3). [direnv](https://direnv.net/) can be used to a launch a dev +shell upon entering this directory (refer to `.envrc`). Otherwise run via: +```bash +$ nix develop +``` +Once your environment has booted, you can get started with the UI by running: +```bash +$ airflow standalone +``` +The webserver lives at `localhost:8080` by default. The username is `admin` and +the generated password can be found by running: +```bash +$ cat airflow/standalone_admin_password.txt +``` +or looking through the console output. + +A basic DAG is included in the top-level `dags/` directory. Airflow discovers +this DAG with the help of the `AIRFLOW__CORE__DAGS_FOLDER` environment variable, +set within the nix dev shell. diff --git a/specs/airflow/template/dags/example_custom.py b/specs/airflow/template/dags/example_custom.py new file mode 100644 index 0000000..0c8988d --- /dev/null +++ b/specs/airflow/template/dags/example_custom.py @@ -0,0 +1,82 @@ +from __future__ import annotations + +import textwrap +from datetime import datetime, timedelta + +from airflow.models.dag import DAG +from airflow.operators.bash import BashOperator + +with DAG( + "example_custom", + # These args will get passed on to each operator. You can override them on + # a per-task basis during operator initialization. + default_args={ + "depends_on_past": False, + "email": ["airflow@example.com"], + "email_on_failure": False, + "email_on_retry": False, + "retries": 1, + "retry_delay": timedelta(minutes=5), + # 'queue': 'bash_queue', + # 'pool': 'backfill', + # 'priority_weight': 10, + # 'end_date': datetime(2016, 1, 1), + # 'wait_for_downstream': False, + # 'sla': timedelta(hours=2), + # 'execution_timeout': timedelta(seconds=300), + # 'on_failure_callback': some_function, # or list of functions + # 'on_success_callback': some_other_function, # or list of functions + # 'on_retry_callback': another_function, # or list of functions + # 'sla_miss_callback': yet_another_function, # or list of functions + # 'trigger_rule': 'all_success' + }, + description="An example DAG", + schedule=timedelta(days=1), + start_date=datetime(2021, 1, 1), + catchup=False, + tags=["example"], +) as dag: + # t1, t2 and t3 are examples of tasks created by instantiating operators. + t1 = BashOperator( + task_id="print_date", + bash_command="date", + ) + + t2 = BashOperator( + task_id="sleep", + depends_on_past=False, + bash_command="sleep 5", + retries=3, + ) + + t1.doc_md = textwrap.dedent( + """\ + #### Task Documentation + You can document your task using the attributes `doc_md` (markdown), + `doc` (plain text), `doc_rst`, `doc_json`, `doc_yaml` which gets + rendered in the UI's Task Instance Details page. + ![img](http://montcs.bloomu.edu/~bobmon/Semesters/2012-01/491/import%20soul.png) + **Image Credit:** Randall Munroe, [XKCD](https://xkcd.com/license.html) + """ + ) + + dag.doc_md = """ + This is a documentation placed anywhere + """ + + templated_command = textwrap.dedent( + """ + {% for i in range(5) %} + echo "{{ ds }}" + echo "{{ macros.ds_add(ds, 7)}}" + {% endfor %} + """ + ) + + t3 = BashOperator( + task_id="templated", + depends_on_past=False, + bash_command=templated_command, + ) + + t1 >> [t2, t3] diff --git a/specs/airflow/template/flake.lock b/specs/airflow/template/flake.lock new file mode 100644 index 0000000..b79aca5 --- /dev/null +++ b/specs/airflow/template/flake.lock @@ -0,0 +1,61 @@ +{ + "nodes": { + "flake-utils": { + "inputs": { + "systems": "systems" + }, + "locked": { + "lastModified": 1694529238, + "narHash": "sha256-zsNZZGTGnMOf9YpHKJqMSsa0dXbfmxeoJ7xHlrt+xmY=", + "owner": "numtide", + "repo": "flake-utils", + "rev": "ff7b65b44d01cf9ba6a71320833626af21126384", + "type": "github" + }, + "original": { + "owner": "numtide", + "repo": "flake-utils", + "type": "github" + } + }, + "nixpkgs": { + "locked": { + "lastModified": 1700794826, + "narHash": "sha256-RyJTnTNKhO0yqRpDISk03I/4A67/dp96YRxc86YOPgU=", + "owner": "NixOS", + "repo": "nixpkgs", + "rev": "5a09cb4b393d58f9ed0d9ca1555016a8543c2ac8", + "type": "github" + }, + "original": { + "owner": "NixOS", + "ref": "nixos-unstable", + "repo": "nixpkgs", + "type": "github" + } + }, + "root": { + "inputs": { + "flake-utils": "flake-utils", + "nixpkgs": "nixpkgs" + } + }, + "systems": { + "locked": { + "lastModified": 1681028828, + "narHash": "sha256-Vy1rq5AaRuLzOxct8nz4T6wlgyUR7zLU309k9mBC768=", + "owner": "nix-systems", + "repo": "default", + "rev": "da67096a3b9bf56a91d16901293e51ba5b49a27e", + "type": "github" + }, + "original": { + "owner": "nix-systems", + "repo": "default", + "type": "github" + } + } + }, + "root": "root", + "version": 7 +} diff --git a/specs/airflow/template/flake.nix b/specs/airflow/template/flake.nix new file mode 100644 index 0000000..570e2f1 --- /dev/null +++ b/specs/airflow/template/flake.nix @@ -0,0 +1,43 @@ +{ + description = '' + An Airflow dev shell. + + To generate a copy of this template elsewhere, install + [bootstrap](https://github.com/jrpotter/bootstrap) and run: + ```bash + $ bootstrap airflow + ``` + ''; + + inputs = { + flake-utils.url = "github:numtide/flake-utils"; + nixpkgs.url = "github:NixOS/nixpkgs/nixos-unstable"; + }; + + outputs = { nixpkgs, flake-utils, ... }: + flake-utils.lib.eachDefaultSystem (system: + let + pkgs = nixpkgs.legacyPackages.${system}; + in + { + devShells.default = pkgs.mkShell { + AIRFLOW_HOME = "$PWD/airflow"; + AIRFLOW__CORE__DAGS_FOLDER = "$PWD/dags"; + LD_LIBRARY_PATH = "${pkgs.stdenv.cc.cc.lib}/lib"; + + packages = (with pkgs; [ + apache-airflow + ] ++ (with python311Packages; [ + black + debugpy + mccabe + mypy + pycodestyle + pyflakes + pyls-isort + python-lsp-black + python-lsp-server + ])); + }; + }); +}