#!/usr/bin/busybox sh
# shellcheck shell=busybox disable=SC3001,SC3003

# cmdline format:
#
# use a particular dataset:
#    root=zfs:pool/dataset
# use the bootfs property of a pool:
#    root=zfs:pool
# import all pools and use the first one with a bootfs property:
#    root=zfs

set -eo pipefail

SCRIPT_NAME="zfs-root-generator"
: ${LIB_DIR="/usr/lib/zfs/initcpio"}
# shellcheck source=zfs-functions
. "$LIB_DIR/zfs-functions"


#
# functions
#

place_unit() {
	local unit="$1" dest="$GENERATOR_DIR"
	# HACK: we put sysroot.mount into generator.early/ to override
	#       the sysroot.mount generated by systemd-fstab-generator
	if [[ $unit == sysroot.mount ]]; then
		dest="$GENERATOR_EARLY_DIR"
	fi
	echo "$dest/$unit"
}

enable_unit() {
	local unit="$1" target="$2" mode="${3-wants}"
	log "adding $unit to $target"
	install -dm755 "$(place_unit "$target").$mode"
	ln -sf "$(place_unit "$unit")" "$(place_unit "$target").$mode/"
}

write_unit() {
	local unit="$1"

	log "writing $unit"
	{
		cat <<-"EOF"
		# Automatically generated by zfs-root-generator

		EOF
		cat
	} | install -Dm644 /dev/stdin "$(place_unit "$unit")"
}

write_common_units() {
	write_unit zfs-initrd-prepare.service <<-EOF
	[Unit]
	Description=Prepare to mount ZFS rootfs
	DefaultDependencies=no
	IgnoreOnIsolate=yes
	Requires=zfs-initrd-import.target
	After=zfs-initrd-import.target
	Before=initrd-root-device.target

	[Service]
	Type=oneshot
	RemainAfterExit=yes
	EnvironmentFile=-/etc/default/zfs
	EnvironmentFile=-${STATE_FILE}
	ExecStart=${LIB_DIR}/zfs-prepare-rootfs
	EOF

	enable_unit zfs-initrd-prepare.service initrd-root-device.target requires

	write_unit zfs-initrd-prepare-cache.service <<-EOF
	[Unit]
	Description=Update /etc/zfs/zfs-list.cache
	DefaultDependencies=no
	IgnoreOnIsolate=yes
	After=initrd-root-fs.target
	After=initrd-fs.target
	Before=initrd.target

	[Service]
	Type=oneshot
	RemainAfterExit=yes
	EnvironmentFile=-/etc/default/zfs
	EnvironmentFile=-${STATE_FILE}
	ExecStart=${LIB_DIR}/zfs-prepare-cache
	EOF

	enable_unit zfs-initrd-prepare-cache.service initrd.target wants
}

write_import_unit() {
	local import_cmds import_mode

	case "${ZFS_ROOT_MODE?}" in
		# we are trying to import all pools opportunistically,
		# don't fail if some can't be imported
		all) import_mode=wants; import_cmds="\
ExecStart=/usr/bin/zpool import -N -o cachefile=none -a
" ;;

		pool|dataset) import_mode=requires; import_cmds="\
ExecStart=/usr/bin/zpool import -N -o cachefile=none \${ZFS_ROOT_POOL}
" ;;

		*) die "invalid \$ZFS_ROOT_MODE=\"$ZFS_ROOT_MODE\"" ;;
	esac

	write_unit zfs-initrd-import-scan.service <<-EOF
	[Unit]
	Description=Import ZFS rootfs pool(s) by device scanning
	Documentation=man:zpool(8)
	DefaultDependencies=no
	IgnoreOnIsolate=yes
	Wants=systemd-udev-settle.service
	After=systemd-udev-settle.service
	After=cryptsetup.target
	After=multipathd.service
	Before=zfs-initrd-import.target

	# HACK: systemd-gpt-auto-generator is completely crazy and I cannot wrap my head
	# around the dependency chain it creates, so add a bunch of extra dependencies
	# to order us specifically after the units it creates.
	After=systemd-cryptsetup@root.service

	# HACK: we are not ordered after local-fs-pre.target, so ensure that
	# we do not import any pools simultaneously with hibernation resume
	After=systemd-hibernate-resume.service

	[Service]
	Type=oneshot
	RemainAfterExit=yes
	EnvironmentFile=-/etc/default/zfs
	EnvironmentFile=-${STATE_FILE}
	${import_cmds}
	EOF

	write_unit zfs-initrd-import.target <<-EOF
	[Unit]
	Description=ZFS rootfs pool import target
	DefaultDependencies=no
	IgnoreOnIsolate=yes
	EOF

	enable_unit zfs-initrd-import-scan.service zfs-initrd-import.target "$import_mode"
	# since zfs-initrd-import.target is a materialized unit, we can create a symlink here
	# once ZFS gets rid of a single "import" serializing point and gains hotplug awareness
	# (e.g. via synthetic per-zpool chardevs), we'd have to write a drop-in with Requires=/After= instead
	# (see how systemd does it in shared/generator.c:generator_write_initrd_root_device_deps())
	enable_unit zfs-initrd-import.target initrd-root-device.target requires

	# TODO: all of this does not play nice with systemd-gpt-auto-generator.
	# Perhaps investigate if we could plug the entire zfs-initrd-import.target
	# _after_ initrd-root-device.target, and make sysroot.mount depend on
	# zfs-initrd-import.target specifically?
}

write_mount_unit() {
	local what="$1" where="$2" unit target="$3"

	unit="$(systemd-escape --path --suffix=mount "$where")"
	# HACK: write_unit puts sysroot.mount into generator.early/
	#       to override the sysroot.mount from systemd-fstab-generator
	write_unit "$unit" <<-EOF
	[Unit]
	Requires=initrd-root-device.target
	After=initrd-root-device.target
	Before=${target}

	[Mount]
	Type=zfs
	What=${what}
	Where=${where}
	Options=zfsutil
	EnvironmentFile=${STATE_FILE}
	EOF
	enable_unit "$unit" "$target" requires
}

write_sysroot_unit() {
	write_mount_unit "\${ZFS_ROOT_DATASET}" "/sysroot" "initrd-root-fs.target"
}

write_sysroot_recursive() {
	local name canmount mountpoint
	# mountpoint of the designated root dataset
	# (this is supposed to be /, but might not be; compensate for it)
	# TODO: once openzfs/zfs#4553 and openzfs/zfs#985 are fully resolved,
	#       use temporary mountpoints here
	local mountbase
	# temporary mountpoint under /sysroot
	local sysrootpoint
	local targetunit

	while IFS=$'\t' read -r name canmount mountpoint; do
		if [[ "$name" == "$ZFS_ROOT_DATASET" ]]; then
			if [[ "$mountpoint" != "/" ]]; then
				log "warning: root dataset \"$ZFS_ROOT_DATASET\" has mountpoint=\"$mountpoint\"" 4
				log "note: root dataset should have mountpoint=\"/\"" 5
				log "note: this will be compensated for, but newly created descendant datasets will not be mounted properly until reboot" 5
			fi
			mountbase="$mountpoint"

			if [[ "$canmount" == "off" ]]; then
				log "error: root dataset \"$ZFS_ROOT_DATASET\" has canmount=\"$canmount\"" 3
				log "note: root dataset should have canmount=(noauto|on)" 5
				log "note: this will be compensated for, but things may break" 5
			fi
			canmount="(override)"
		fi

		if [[ "$canmount" == off ]]; then
			log "debug: nested dataset \"$name\" has canmount=\"$canmount\"" 7
			log "debug: this dataset will be ignored" 7
			continue
		fi

		if ! sysrootpoint="$(mountpoint_rebase "$mountpoint" "$mountbase" "/sysroot")"; then
			log "debug: nested dataset \"$name\" has mountpoint=\"$mountpoint\" not under \"$mountbase\"" 7
			log "debug: this dataset will be ignored" 7
			continue
		fi

		if [[ "$sysrootpoint" == /sysroot ]]; then
			targetunit="initrd-root-fs.target"
		# initrd-usr-fs.target is not synchronized before initrd-switch-root.target
		# and we do not support /usr on top of volatile rootfs, so don't use it
		# elif [[ "$sysrootpoint" == /sysroot/usr ]]; then
		# 	targetunit="initrd-usr-fs.target"
		else
			targetunit="initrd-fs.target"
		fi

		log "debug: dataset=\"$name\" canmount=\"$canmount\" mountpoint=\"$mountpoint\" (mountbase=\"$mountbase\") target=\"$sysrootpoint\"" 7
		write_mount_unit "$name" "$sysrootpoint" "$targetunit"

	done < <(zfs list -Ho name,canmount,mountpoint -r "${ZFS_ROOT_DATASET:?}")
}


#
# main
#

setup_debug

GENERATOR_DIR="$1"
GENERATOR_EARLY_DIR="$2"
GENERATOR_LATE_DIR="$3"
if ! [[ "$GENERATOR_DIR" && -d "$GENERATOR_DIR" ]]; then
	die "invalid generator directory \"$GENERATOR_DIR\""
fi
if ! [[ "$GENERATOR_EARLY_DIR" && -d "$GENERATOR_EARLY_DIR" ]]; then
	die "invalid early generator directory \"$GENERATOR_EARLY_DIR\""
fi
if ! [[ "$GENERATOR_LATE_DIR" && -d "$GENERATOR_LATE_DIR" ]]; then
	die "invalid late generator directory \"$GENERATOR_LATE_DIR\""
fi

if vars_present; then
	load_vars
else
	parse_cmdline
fi

case "${ZFS_ROOT_MODE?}" in
all|pool|dataset) ;;
none) log "nothing to do"; exit 0 ;;
*) die "invalid \$ZFS_ROOT_MODE=\"$ZFS_ROOT_MODE\"" ;;
esac

write_common_units
write_import_unit

# check if the designated root pool + dataset is _already_ present
# (proc dance is to avoid triggering zfs.ko auto-load in a synchronous context)
if [[ $ZFS_ROOT_POOL && $ZFS_ROOT_DATASET ]] \
&& [[ -e "/proc/spl/kstat/zfs/$ZFS_ROOT_POOL/state" ]] \
&& zfs list -Ho name "$ZFS_ROOT_DATASET" &>/dev/null; then
	log "pool opened and dataset exists, writing nested mount units"
	write_sysroot_recursive
else
	log "pool not opened or dataset does not exist, writing sysroot.mount only"
	write_sysroot_unit
fi

dump_vars

log "done"
exit 0

# vim: ft=bash ts=8 noet:
