From 8c763c3d631c68d7debd1bb401970fd1c2f7a820 Mon Sep 17 00:00:00 2001 From: Joshua Potter Date: Mon, 11 Dec 2023 08:18:52 -0700 Subject: [PATCH] Add swap partition to DO image. --- digital-ocean/configuration.nix | 199 +++++++++++++++++++++ digital-ocean/default.nix | 72 +++++++- digital-ocean/make-disk-image.nix | 279 ++++++++++++++++++++++++++++++ flake.lock | 12 +- 4 files changed, 550 insertions(+), 12 deletions(-) create mode 100644 digital-ocean/configuration.nix create mode 100644 digital-ocean/make-disk-image.nix diff --git a/digital-ocean/configuration.nix b/digital-ocean/configuration.nix new file mode 100644 index 0000000..1535035 --- /dev/null +++ b/digital-ocean/configuration.nix @@ -0,0 +1,199 @@ +{ config, pkgs, lib, modulesPath, ... }: +with lib; +{ + imports = [ + (modulesPath + "/profiles/qemu-guest.nix") + (modulesPath + "/virtualisation/digital-ocean-init.nix") + ]; + options.virtualisation.digitalOcean = with types; { + setRootPassword = mkOption { + type = bool; + default = false; + example = true; + description = lib.mdDoc "Whether to set the root password from the Digital Ocean metadata"; + }; + setSshKeys = mkOption { + type = bool; + default = true; + example = true; + description = lib.mdDoc "Whether to fetch ssh keys from Digital Ocean"; + }; + seedEntropy = mkOption { + type = bool; + default = true; + example = true; + description = lib.mdDoc "Whether to run the kernel RNG entropy seeding script from the Digital Ocean vendor data"; + }; + }; + config = + let + cfg = config.virtualisation.digitalOcean; + hostName = config.networking.hostName; + doMetadataFile = "/run/do-metadata/v1.json"; + in mkMerge [{ + fileSystems."/" = { + device = "/dev/disk/by-label/nixos"; + autoResize = true; + fsType = "ext4"; + }; + swapDevices = [ + # Configured in `make-disk-image.nix`. + { device = "/dev/disk/by-label/swap"; } + ]; + boot = { + growPartition = true; + kernelParams = [ "console=ttyS0" "panic=1" "boot.panic_on_fail" ]; + initrd.kernelModules = [ "virtio_scsi" ]; + kernelModules = [ "virtio_pci" "virtio_net" ]; + loader = { + grub.device = "/dev/vda"; + timeout = 0; + grub.configurationLimit = 0; + }; + }; + services.openssh = { + enable = mkDefault true; + settings.PasswordAuthentication = mkDefault false; + }; + services.do-agent.enable = mkDefault true; + networking = { + hostName = mkDefault ""; # use Digital Ocean metadata server + }; + + /* Check for and wait for the metadata server to become reachable. + * This serves as a dependency for all the other metadata services. */ + systemd.services.digitalocean-metadata = { + description = "Get host metadata provided by Digitalocean"; + path = [ pkgs.curl ]; + script = '' + set -eu + DO_DELAY_ATTEMPTS=0 + while ! curl -fsSL -o $RUNTIME_DIRECTORY/v1.json http://169.254.169.254/metadata/v1.json; do + DO_DELAY_ATTEMPTS=$((DO_DELAY_ATTEMPTS + 1)) + if (( $DO_DELAY_ATTEMPTS >= $DO_DELAY_ATTEMPTS_MAX )); then + echo "giving up" + exit 1 + fi + + echo "metadata unavailable, trying again in 1s..." + sleep 1 + done + chmod 600 $RUNTIME_DIRECTORY/v1.json + ''; + environment = { + DO_DELAY_ATTEMPTS_MAX = "10"; + }; + serviceConfig = { + Type = "oneshot"; + RemainAfterExit = true; + RuntimeDirectory = "do-metadata"; + RuntimeDirectoryPreserve = "yes"; + }; + unitConfig = { + ConditionPathExists = "!${doMetadataFile}"; + After = [ "network-pre.target" ] ++ + optional config.networking.dhcpcd.enable "dhcpcd.service" ++ + optional config.systemd.network.enable "systemd-networkd.service"; + }; + }; + + /* Fetch the root password from the digital ocean metadata. + * There is no specific route for this, so we use jq to get + * it from the One Big JSON metadata blob */ + systemd.services.digitalocean-set-root-password = mkIf cfg.setRootPassword { + description = "Set root password provided by Digitalocean"; + wantedBy = [ "multi-user.target" ]; + path = [ pkgs.shadow pkgs.jq ]; + script = '' + set -eo pipefail + ROOT_PASSWORD=$(jq -er '.auth_key' ${doMetadataFile}) + echo "root:$ROOT_PASSWORD" | chpasswd + mkdir -p /etc/do-metadata/set-root-password + ''; + serviceConfig = { + Type = "oneshot"; + }; + unitConfig = { + ConditionPathExists = "!/etc/do-metadata/set-root-password"; + Before = optional config.services.openssh.enable "sshd.service"; + After = [ "digitalocean-metadata.service" ]; + Requires = [ "digitalocean-metadata.service" ]; + }; + }; + + /* Set the hostname from Digital Ocean, unless the user configured it in + * the NixOS configuration. The cached metadata file isn't used here + * because the hostname is a mutable part of the droplet. */ + systemd.services.digitalocean-set-hostname = mkIf (hostName == "") { + description = "Set hostname provided by Digitalocean"; + wantedBy = [ "network.target" ]; + path = [ pkgs.curl pkgs.nettools ]; + script = '' + set -e + DIGITALOCEAN_HOSTNAME=$(curl -fsSL http://169.254.169.254/metadata/v1/hostname) + hostname "$DIGITALOCEAN_HOSTNAME" + if [[ ! -e /etc/hostname || -w /etc/hostname ]]; then + printf "%s\n" "$DIGITALOCEAN_HOSTNAME" > /etc/hostname + fi + ''; + serviceConfig = { + Type = "oneshot"; + }; + unitConfig = { + Before = [ "network.target" ]; + After = [ "digitalocean-metadata.service" ]; + Wants = [ "digitalocean-metadata.service" ]; + }; + }; + + /* Fetch the ssh keys for root from Digital Ocean */ + systemd.services.digitalocean-ssh-keys = mkIf cfg.setSshKeys { + description = "Set root ssh keys provided by Digital Ocean"; + wantedBy = [ "multi-user.target" ]; + path = [ pkgs.jq ]; + script = '' + set -e + mkdir -m 0700 -p /root/.ssh + jq -er '.public_keys[]' ${doMetadataFile} > /root/.ssh/authorized_keys + chmod 600 /root/.ssh/authorized_keys + ''; + serviceConfig = { + Type = "oneshot"; + RemainAfterExit = true; + }; + unitConfig = { + ConditionPathExists = "!/root/.ssh/authorized_keys"; + Before = optional config.services.openssh.enable "sshd.service"; + After = [ "digitalocean-metadata.service" ]; + Requires = [ "digitalocean-metadata.service" ]; + }; + }; + + /* Initialize the RNG by running the entropy-seed script from the + * Digital Ocean metadata + */ + systemd.services.digitalocean-entropy-seed = mkIf cfg.seedEntropy { + description = "Run the kernel RNG entropy seeding script from the Digital Ocean vendor data."; + wantedBy = [ "network.target" ]; + path = [ pkgs.jq pkgs.mpack ]; + script = '' + set -eo pipefail + TEMPDIR=$(mktemp -d) + jq -er '.vendor_data' ${doMetadataFile} | munpack -tC $TEMPDIR + ENTROPY_SEED=$(grep -rl "DigitalOcean Entropy Seed script" $TEMPDIR) + ${pkgs.runtimeShell} $ENTROPY_SEED + rm -rf $TEMPDIR + ''; + serviceConfig = { + Type = "oneshot"; + }; + unitConfig = { + Before = [ "network.target" ]; + After = [ "digitalocean-metadata.service" ]; + Requires = [ "digitalocean-metadata.service" ]; + }; + }; + } + ]; + meta.maintainers = with maintainers; [ arianvp eamsden ]; +} diff --git a/digital-ocean/default.nix b/digital-ocean/default.nix index 654d0a4..743d72f 100644 --- a/digital-ocean/default.nix +++ b/digital-ocean/default.nix @@ -1,11 +1,71 @@ { pkgs, stateVersion }: let - module = { modulesPath, ... }: { - imports = [ - (modulesPath + "/virtualisation/digital-ocean-image.nix") - ]; + module = { config, modulesPath, lib, ... }: + let + cfg = config.virtualisation.digitalOceanImage; + in + { + # This import must exist for all configurations to take effect the first + # time launching a droplet. Refer to `config.system.build.toplevel` in + # `make-disk-image.nix`. + imports = [ ./configuration.nix ]; - system.stateVersion = stateVersion; - }; + options = { + virtualisation.digitalOceanImage.diskSize = lib.mkOption { + type = with lib.types; either (enum [ "auto" ]) int; + default = "auto"; + example = 4096; + description = lib.mdDoc '' + Size of disk image. Unit is MB. + ''; + }; + + virtualisation.digitalOceanImage.configFile = lib.mkOption { + type = with lib.types; nullOr path; + default = null; + description = lib.mdDoc '' + A path to a configuration file which will be placed at + `/etc/nixos/configuration.nix` and be used when switching + to a new configuration. If set to `null`, a default + configuration is used that imports + `(modulesPath + "/virtualisation/digital-ocean-config.nix")`. + ''; + }; + + virtualisation.digitalOceanImage.compressionMethod = lib.mkOption { + type = lib.types.enum [ "gzip" "bzip2" ]; + default = "gzip"; + example = "bzip2"; + description = lib.mdDoc '' + Disk image compression method. Choose bzip2 to generate smaller + images that take longer to generate but will consume less metered + storage space on your Digital Ocean account. + ''; + }; + }; + + config = { + system.build.digitalOceanImage = lib.mkForce ( + import ./make-disk-image.nix { + name = "digital-ocean-image"; + format = "qcow2"; + postVM = let + compress = { + "gzip" = "${pkgs.gzip}/bin/gzip"; + "bzip2" = "${pkgs.bzip2}/bin/bzip2"; + }.${cfg.compressionMethod}; + in '' + ${compress} $diskImage + ''; + configFile = ./configuration.nix; + swapSize = 1024; + inherit (cfg) diskSize; + inherit config lib pkgs; + } + ); + + system.stateVersion = stateVersion; + }; + }; in (pkgs.nixos module).digitalOceanImage diff --git a/digital-ocean/make-disk-image.nix b/digital-ocean/make-disk-image.nix new file mode 100644 index 0000000..d62860e --- /dev/null +++ b/digital-ocean/make-disk-image.nix @@ -0,0 +1,279 @@ +{ pkgs +, lib +, name ? "nixos-disk-image" +, # The NixOS configuration to be installed onto the disk image. + config +, # The size of the disk, in megabytes. If "auto", the size is calculated based + # on the contents copied to it and `extraSize` is taken into account. + diskSize ? "auto" +, # Extra disk space, in megabytes. Added to the image if diskSize "auto" is + # used. + extraSize ? 512 +, # Swap space, in megabytes. Addeded to the image unless set to null. + swapSize ? 512 +, # The unique identifier for the swap partition. + swapUUID ? "44444444-4444-4444-8888-888888888887" +, # The label given to the swap partition. + swapLabel ? "swap" +, # Whether to invoke `switch-to-configuration boot` during image creation. + installBootLoader ? true +, # The filesystem label. + label ? "nixos" +, # The initial NixOS configuration file set at `/etc/nixos/configuration.nix`. + configFile ? null +, # Disk image format, one of qcow2, qcow2-compressed, vdi, vpc, raw. + format ? "raw" +, # The root Filesystem Unique Identifier. + rootFSUID ? "F222513B-DED1-49FA-B591-20CE86A2FE7F" +, # Whether a nix channel based on the current source tree should be + # made available inside the image. Useful for interactive use of nix + # utils, but changes the hash of the image when the sources are + # updated. + copyChannel ? true +, # Shell code executed after the VM has finished. + postVM ? "" +, # Guest memory size + memSize ? 1024 +}: +let + format' = if format == "qcow2-compressed" then "qcow2" else format; + + compress = lib.optionalString (format == "qcow2-compressed") "-c"; + + filename = "nixos." + { + qcow2 = "qcow2"; + vdi = "vdi"; + vpc = "vhd"; + raw = "img"; + }.${format'} or format'; + + swapPartition = "1"; + rootPartition = "2"; + swapEnd = toString (1 + swapSize); + + nixpkgs = lib.cleanSource pkgs.path; + + # FIXME: merge with channel.nix / make-channel.nix. + channelSources = pkgs.runCommand "nixos-${config.system.nixos.version}" {} '' + mkdir -p $out + cp -prd ${nixpkgs.outPath} $out/nixos + chmod -R u+w $out/nixos + if [ ! -e $out/nixos/nixpkgs ]; then + ln -s . $out/nixos/nixpkgs + fi + rm -rf $out/nixos/.git + echo -n ${config.system.nixos.versionSuffix} > $out/nixos/.version-suffix + ''; + + binPath = lib.makeBinPath (with pkgs; [ + rsync + util-linux + parted + e2fsprogs + lkl + config.system.build.nixos-install + config.system.build.nixos-enter + nix + systemdMinimal + gptfdisk + ] ++ stdenv.initialPath); + + closureInfo = pkgs.closureInfo { + rootPaths = [ config.system.build.toplevel ] ++ + lib.optional copyChannel channelSources; + }; + + # ext4fs block size (not block device sector size) + blockSize = toString (4 * 1024); + + prepareImage = '' + export PATH=${binPath} + + # Yes, mkfs.ext4 takes different units in different contexts. Fun. + sectorsToKilobytes() { + echo $(( ( "$1" * 512 ) / 1024 )) + } + + sectorsToBytes() { + echo $(( "$1" * 512 )) + } + + # Given lines of numbers, adds them together + sum_lines() { + local acc=0 + while read -r number; do + acc=$((acc+number)) + done + echo "$acc" + } + + mebibyte=$(( 1024 * 1024 )) + + # Approximative percentage of reserved space in an ext4 fs over 512MiB. + # 0.05208587646484375 × 1000, integer part: 52 + compute_fudge() { + echo $(( $1 * 52 / 1000 )) + } + + mkdir $out + + root="$PWD/root" + mkdir -p $root + + export HOME=$TMPDIR + + # Provide a Nix database so that nixos-install can copy closures. + export NIX_STATE_DIR=$TMPDIR/state + nix-store --load-db < ${closureInfo}/registration + + chmod 755 "$TMPDIR" + echo "running nixos-install..." + nixos-install --root $root --no-bootloader --no-root-passwd \ + --system ${config.system.build.toplevel} \ + ${if copyChannel then "--channel ${channelSources}" else "--no-channel-copy"} \ + --substituters "" + + diskImage=nixos.raw + + ${if diskSize == "auto" then '' + # Add the 1MiB aligned reserved space (includes MBR) + reservedSpace=$(( mebibyte )) + + swapSpace=$(( + $(numfmt --from=iec '${toString swapSize}M') + reservedSpace + )) + + extraSpace=$(( + $(numfmt --from=iec '${toString extraSize}M') + reservedSpace + )) + + # Compute required space in filesystem blocks + diskUsage=$( + find . ! -type d -print0 | + du --files0-from=- --apparent-size --block-size "${blockSize}" | + cut -f1 | + sum_lines + ) + # Each inode takes space! + numInodes=$(find . | wc -l) + # Convert to bytes, inodes take two blocks each! + diskUsage=$(( (diskUsage + 2 * numInodes) * ${blockSize} )) + # Then increase the required space to account for the reserved blocks. + fudge=$(compute_fudge $diskUsage) + requiredFilesystemSpace=$(( diskUsage + fudge )) + + diskSize=$(( requiredFilesystemSpace + swapSpace + extraSpace )) + + # Round up to the nearest mebibyte. This ensures whole 512 bytes sector + # sizes in the disk image and helps towards aligning partitions optimally. + if (( diskSize % mebibyte )); then + diskSize=$(( ( diskSize / mebibyte + 1) * mebibyte )) + fi + + truncate -s "$diskSize" $diskImage + + printf "Automatic disk size...\n" + printf " Closure space use: %d bytes\n" $diskUsage + printf " fudge: %d bytes\n" $fudge + printf " Filesystem size needed: %d bytes\n" $requiredFilesystemSpace + printf " Swap space: %d bytes\n" $swapSpace + printf " Extra space: %d bytes\n" $extraSpace + printf " Disk image size: %d bytes\n" $diskSize + '' else '' + truncate -s ${toString diskSize}M $diskImage + ''} + + parted --script $diskImage -- mklabel msdos + parted --script $diskImage -- \ + mkpart primary linux-swap 1MiB ${swapEnd}MiB \ + mkpart primary ext4 ${swapEnd}MiB -1 + + # Get start & length of the root partition in sectors to $START and + # $SECTORS. + eval $(partx $diskImage -o START,SECTORS --nr ${rootPartition} --pairs) + + mkfs.ext4 -b ${blockSize} -F -L ${label} $diskImage -E \ + offset=$(sectorsToBytes $START) $(sectorsToKilobytes $SECTORS)K + + echo "copying staging root to image..." + cptofs -p -P ${rootPartition} -t ext4 -i $diskImage $root/* / || + (echo >&2 "ERROR: cptofs failed. diskSize might be too small for closure."; exit 1) + ''; + + moveOrConvertImage = '' + ${if format' == "raw" then "mv $diskImage $out/${filename}" else '' + ${pkgs.qemu-utils}/bin/qemu-img convert -f raw -O ${format'} ${compress} \ + $diskImage $out/${filename} + ''} + diskImage=$out/${filename} + ''; + + buildImage = pkgs.vmTools.runInLinuxVM ( + pkgs.runCommand name { + preVM = prepareImage; + buildInputs = with pkgs; [ util-linux e2fsprogs dosfstools ]; + postVM = moveOrConvertImage + postVM; + inherit memSize; + } '' + export PATH=${binPath}:$PATH + + rootDisk="/dev/vda${rootPartition}" + + # It is necessary to set root filesystem unique identifier in advance, + # otherwise the bootloader might get the wrong one and fail to boot. At + # the end, we reset again because we want deterministic timestamps. + tune2fs -T now -U ${rootFSUID} -c 0 -i 0 $rootDisk + # Make systemd-boot find ESP without udev. + mkdir /dev/block + ln -s /dev/vda1 /dev/block/254:1 + + mountPoint=/mnt + mkdir $mountPoint + mount $rootDisk $mountPoint + + # Create the swapspace without turning it on. + mkswap -U ${swapUUID} -L ${swapLabel} /dev/vda${swapPartition} + swapon /dev/vda${swapPartition} + + # Install a configuration.nix + mkdir -p /mnt/etc/nixos + ${lib.optionalString (configFile != null) '' + cp ${configFile} /mnt/etc/nixos/configuration.nix + ''} + + ${lib.optionalString installBootLoader '' + # In this throwaway resource, we only have `/dev/vda`, but the actual VM + # may refer to another disk for bootloader, e.g. `/dev/vdb`. Use this + # option to create a symlink from vda to any arbitrary device you want. + ${lib.optionalString ( + config.boot.loader.grub.enable && + config.boot.loader.grub.device != "/dev/vda" + ) '' + mkdir -p $(dirname ${config.boot.loader.grub.device}) + ln -s /dev/vda ${config.boot.loader.grub.device} + ''} + + # Set up core system link, bootloader (sd-boot, GRUB, uboot, etc.), etc. + NIXOS_INSTALL_BOOTLOADER=1 nixos-enter --root $mountPoint -- \ + /nix/var/nix/profiles/system/bin/switch-to-configuration boot + + # The above scripts will generate a random machine-id and we don't want + # to bake a single ID into all our images. + rm -f $mountPoint/etc/machine-id + ''} + + umount -R /mnt + + # Make sure resize2fs works. Note that resize2fs has stricter criteria for + # resizing than a normal mount, so the `-c 0` and `-i 0` don't affect it. + # Setting it to `now` doesn't produce deterministic output, of course, but + # we can fix that when/if we start making images deterministic. This is + # fixed to 1970-01-01 (UNIX timestamp 0). This two-step approach is + # necessary otherwise `tune2fs` will want a fresher filesystem to perform + # some changes. + tune2fs -T now -U ${rootFSUID} -c 0 -i 0 $rootDisk + tune2fs -f -T 19700101 $rootDisk + '' + ); +in + buildImage diff --git a/flake.lock b/flake.lock index 7e8e5d4..1a9e8ce 100644 --- a/flake.lock +++ b/flake.lock @@ -23,11 +23,11 @@ }, "nixpkgs-23_05": { "locked": { - "lastModified": 1701805708, - "narHash": "sha256-hh0S14E816Img0tPaNQSEKFvSscSIrvu1ypubtfh6M4=", + "lastModified": 1702221085, + "narHash": "sha256-Br3GCSkkvkmw46cT6wCz6ro2H1WgDMWbKE0qctbdtL0=", "owner": "NixOS", "repo": "nixpkgs", - "rev": "0561103cedb11e7554cf34cea81e5f5d578a4753", + "rev": "c2786e7084cbad90b4f9472d5b5e35ecb57958af", "type": "github" }, "original": { @@ -39,11 +39,11 @@ }, "nixpkgs-23_11": { "locked": { - "lastModified": 1701952659, - "narHash": "sha256-TJv2srXt6fYPUjxgLAL0cy4nuf1OZD4KuA1TrCiQqg0=", + "lastModified": 1702233072, + "narHash": "sha256-H5G2wgbim2Ku6G6w+NSaQaauv6B6DlPhY9fMvArKqRo=", "owner": "NixOS", "repo": "nixpkgs", - "rev": "b4372c4924d9182034066c823df76d6eaf1f4ec4", + "rev": "781e2a9797ecf0f146e81425c822dca69fe4a348", "type": "github" }, "original": {