#!/bin/sh
# STACK startup script
# Copyright (C) 2007 Ken Bantoft <ken@xelerance.com>
# Copyright (C) 2007-2008 Paul Wouters <paul@xelerance.com>
# Copyright (C) 2008-2019 Tuomo Soini <tis@foobar.fi>
# Copyright (C) 2012-2014 Paul Wouters <paul@libreswan.org>
# Copyright (C) 2019 Paul Wouters <pwouters@redhat.com>
#
# This program is free software; you can redistribute it and/or modify it
# under the terms of the GNU General Public License as published by the
# Free Software Foundation; either version 2 of the License, or (at your
# option) any later version.  See <https://www.gnu.org/licenses/gpl2.txt>.
#
# This program is distributed in the hope that it will be useful, but
# WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
# or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
# for more details.
#

# nothing to do if the kernel has no module loading/unloading support
[ ! -f /proc/modules ] && exit 0

# These prevent coverity warnings; variables come in via ipsec addconn call
xfrmlifetime=""
overridemtu=""
fragicmp=""
hidetos=""

IPSEC_CONF="${IPSEC_CONF:-/etc/ipsec.conf}"
PATH=/usr/sbin:${PATH}
export PATH
# export config setup items needs rename of - to _
eval $(ASAN_OPTIONS=detect_leaks=0 ipsec addconn  --configsetup | grep -v "#" | sed "s/-/_/g")

test ${IPSEC_INIT_SCRIPT_DEBUG} && set -v -x
MODPROBE="modprobe --quiet --use-blacklist"

xfrm_stat=/proc/sys/net/core/xfrm_acq_expires
klipsstack=/proc/net/ipsec/version
action="${1}"

if [ -z "${action}" ]; then
    echo "no action specified - aborted" >&2
    exit 1
fi

stopnetkey() {
    local wait larval_drop
    wait=""
    larval_drop=""
    # Validate wait option
    [ "${1}" = "--wait" -o "${1}" = "-w" ] && wait="${1}"

    if [ -f ${xfrm_stat} ]; then
	ip xfrm state flush
	ip xfrm policy flush
	if [ -n "$(ip xfrm state)" -o -n "$(ip xfrm policy)" ]; then
	    echo "XFRM IPsec stack could not be cleared" >&2
	fi

	if [ -f /proc/modules ]; then
	    # check if we can write our own settings
	    if [ -n "${wait}" -a -w /proc/sys/net/core/xfrm_larval_drop ]; then
		# read previous state so we can restore it
		larval_drop=$(cat /proc/sys/net/core/xfrm_larval_drop)
		if [ -n "${larval_drop}" ]; then
		    # set to 0 so we can unload modules
		    echo 0 >/proc/sys/net/core/xfrm_larval_drop
		fi
	    fi
	    # netkey stack found, let's unload.
	    for mod in xfrm_ipcomp ipcomp ipcomp6 ip_vti xfrm6_tunnel \
		xfrm6_mode_tunnel xfrm6_mode_beet xfrm6_mode_ro \
		xfrm6_mode_transport xfrm4_mode_transport xfrm4_mode_tunnel \
		xfrm4_tunnel xfrm4_mode_beet esp4 esp6 ah4 ah6 \
		xfrm_user xfrm_interface
	    do
		# first try and unload the modules without the 10s wait pause
		if [ -n "$(grep ^${mod} /proc/modules)" ]; then
		    # echo "unloading module ${mod}" >&2
		    rmmod ${mod} 2>/dev/null
		fi
		# We only run rmmod again with --wait if requested
		if [ -n "${wait}" -a \
		    -n "$(grep ^${mod} /proc/modules)" ]; then
		    # echo "unloading module ${mod} using --wait" >&2
		    # we start rmmod to background so unloading one module
		    # won't block
		    rmmod ${wait} ${mod} 2>/dev/null &
		fi
	    done
	    # Finally we wait for background executed rmmods to complete
	    if [ -n "${wait}" ]; then
		wait
		if [ -n "${larval_drop}" -a \
		    -w /proc/sys/net/core/xfrm_larval_drop ]; then
		    # restore original value of xfrm_larval_drop
		    echo "${larval_drop}" >/proc/sys/net/core/xfrm_larval_drop
		    larval_drop=""
		fi
	    fi
	fi
    fi
    # if we were executed with --wait or -w option we inform about unload
    # failure
    if [ -n "${wait}" -a -n "$(lsmod | grep ^esp)" ]; then
	echo "FAIL" >&2
	exit 1
    fi
}

# We can get called even if we abort with "pluto already running"
startnetkey() {
    # in case pluto crashed
    if pidof pluto > /dev/null; then
	: pluto is running, skip cleanup
    else
	ip xfrm policy flush
	ip xfrm state flush
    fi

    cryptomodules

    if [ -f ${klipsstack} ]; then
	echo "Warning: found KLIPS stack loaded - attempting to unload..." >&2
	stopklips
	if [ -f ${klipsstack} ]; then
	    echo "FAILURE to unload KLIPS module" >&2
	    exit 1
	fi
    fi

    if [ -f /proc/modules ]; then
	# load all XFRM modules
	for mod in ipcomp6 xfrm_ipcomp ipcomp xfrm6_tunnel xfrm6_mode_tunnel \
	    xfrm6_mode_beet xfrm6_mode_ro xfrm6_mode_transport \
	    xfrm4_mode_transport xfrm4_mode_tunnel xfrm4_tunnel \
	    xfrm4_mode_beet esp4 esp6 ah4 ah6 ip_vti xfrm_interface
	do
	    # echo -n "${mod} " >&2
	    ${MODPROBE} ${mod} 2>/dev/null
	done

	# xfrm_user is the old name for xfrm4_tunnel - backwards compatibility
	${MODPROBE} xfrm_user 2>/dev/null

    fi
    # Time before kernel ACQUIRE for ondemand/ Opportunistic expires
    # Also Time before reserved kernel SPI times out
    xcur=$(cat /proc/sys/net/core/xfrm_acq_expires 2>/dev/null)
    if [ -w /proc/sys/net/core/xfrm_acq_expires ]; then
	if [ ${xfrmlifetime} -ne 0${xcur} ]; then
	    echo "changing /proc/sys/net/core/xfrm_acq_expires from ${xcur} to ${xfrmlifetime}"
	    echo ${xfrmlifetime} >/proc/sys/net/core/xfrm_acq_expires
	fi
    else
	echo "WARNING: can not change /proc/sys/net/core/xfrm_acq_expires from ${xcur} to ${xfrmlifetime}" >&2
    fi

    # Fail on error in loading XFRM IPsec stack
    if [ ! -f ${xfrm_stat} ]; then
	echo "FAILURE in loading XFRM IPsec stack" >&2
	exit 1
    fi
}

stopklips() {
    if [ -f ${klipsstack} ]; then
	# Bring down ipsecX interfaces
	ifl=$(grep -v NULL /proc/net/ipsec_tncfg 2>/dev/null | sed -n -e "/^ipsec/s/ .*//p")
	if [ -n "${ifl}" ]; then
	    for iface in  ${ifl}; do
		ip link set ${iface} down
		ip addr flush dev ${iface}
		ipsec tncfg --detach --virtual ${iface}
		# ipsec0 won't let itself be deleted
		ipsec tncfg --delete ${iface}
	    done
	    [ -r /proc/net/ipsec_klipsdebug ] && ipsec klipsdebug --none
	    [ -d /proc/net/ipsec/eroute ] && ipsec eroute --clear
	    [ -d /proc/net/ipsec/spi ] && ipsec spi --clear
	fi

	if [ -f /proc/modules ]; then
	    rmmod ipsec 2>/dev/null
	fi
    fi
    if [ -f ${klipsstack} -o -n "$(lsmod | grep ^ipsec)" ]; then
	echo "FAILURE to unload KLIPS module" >&2
	exit 1
    fi
}

stop() {
    stopnetkey
    stopklips
}

cryptomodules() {
    # load any OCF and CryptoAPI modules we might need for acceleration
    # (OCF works for XFRM IPsec and KLIPS)
    # OCF cryptosoft is for kernel acceleration (ESP/AH)
    ${MODPROBE} cryptosoft 2>/dev/null
    # We skip cryptodev.ko because we no longer support /dev/crypto offloading
    # (the overhead of cryptodev is not worth it even on embedded platforms)

    if [ -d /sys/module/ocf ]; then
	# OCF tuning - If we have enough bogomips and RAM,
	# use bigger OCF queues
	bogomips=$(grep -i bogomips /proc/cpuinfo | head -1 | awk '{print $3;}' | sed "s/\..*$//")
	ram=$(head -1 /proc/meminfo | awk '{print $2;}')
	limit=1000
	if [ ${bogomips} -gt 1000 ]; then
	    if [ ${ram} -gt 262143 ]; then
		limit=10000
		#echo "OCF limit set to ${limit}" >&2
	    fi
	fi
	echo ${limit} >/sys/module/ocf/parameters/crypto_q_max
    fi

    # load the most common ciphers/algo's
    # padlock must load before aes module - though does not exist on newer
    # kernels
    # padlock-aes must load before padlock-sha for some reason
    ${MODPROBE} padlock 2>/dev/null
    ${MODPROBE} padlock-aes 2>/dev/null
    ${MODPROBE} padlock-sha 2>/dev/null
    # load the most common ciphers/algo's
    # aes-x86_64 has higher priority in via crypto api
    # kernel directory does not match uname -m on x86_64 :(
    modules=$(ls /lib/modules/$(uname -r)/kernel/arch/*/crypto/* 2>/dev/null)
    modules="aesni-intel aes-x86_64 geode-aes aes aes_generic des sha512 \
	sha256 md5 cbc xcbc ecb twofish blowfish serpent ccm gcm ctr cts \
	deflate cast5 cast6 lzo sha256_generic sha512_generic camellia \
	cmac chacha20poly1305 ${modules}"
    for module in ${modules}
    do
	module=$(basename ${module} | sed "s/\.ko$//")
	# echo -n "${module} " >&2
	${MODPROBE} ${module} 2>/dev/null
    done
}

startklips() {
    cryptomodules
    if [ -f ${xfrm_stat} ]; then
	echo "Warning: found possible XFRM IPsec stack loaded - attempting to unload..." >&2
	stopnetkey --wait
    fi
    if [ ! -f ${klipsstack} ]; then
	# KLIPS-related paths
	bareversion=$(uname -r | sed -e 's/\.nptl//' | sed -e 's/^\(2\.[0-9]\.[1-9][0-9]*-[1-9][0-9]*\(\.[0-9][0-9]*\)*\(\.x\)*\).*$/\1/')
	case ${bareversion} in
	    2.4*)
		modulename=ipsec.o
		;;
	    *)
		modulename=ipsec.ko
		;;
	esac
	# modprobe does not like specifying .o or .ko, but insmod needs it
	if [ "$(basename "modprobe")" = "modprobe" ]; then
	    ${MODPROBE} ipsec
	else
	    ${MODPROBE} ${modulename}
	fi

	if [ ! -f ${klipsstack} ]; then
	    echo "FAILURE to load KLIPS module" >&2
	    exit 1
	fi

	if [ -d /sys/module/ocf ]; then
	    echo ${limit} >/sys/module/ipsec/parameters/ipsec_irs_cache_allocated_max
	    echo ${limit} >/sys/module/ipsec/parameters/ipsec_ixs_cache_allocated_max
	fi
    fi

    if [ ${stack} = "klips" ]; then
	# in klips mode we attach it to the physical device
	# clear tables out in case dregs have been left over
	ipsec eroute --clear
	ipsec spi --clear
	# detach all ipsecX devices
	ipsec tncfg --clear

	phys=$(ip ro li | grep default | sed "s/^.* dev \([^ ]*\) .*$/\1/")

	if [ -z "${interfaces}" ]; then
		interfaces="%defaultroute"
		if [ -z "$phys" ]; then
			echo "ERROR: no default route found and no interfaces= option set"
			exit 1
		fi
	fi

	for iface in ${interfaces}; do
	    if [ "${iface}" = "%none" ]; then
		echo "ignoring obsolete interface %none" >&2
		continue
	    elif [ "${iface}" = "%defaultroute" ]; then
		virt="ipsec0"
	    else
		virt="$(expr ${iface} : '\([^=]*\)=.*')"
		phys="$(expr ${iface} : '[^=]*=\(.*\)')"
	    fi
	    # ipsecX might not exist yet
	    if [ ! -d /sys/devices/virtual/net/${virt} ]; then
		ipsec tncfg --create ${virt}
	    fi
	    ipsec tncfg --attach --virtual ${virt} --physical ${phys}

	    # configure all the IPv4/IPv6 addresses (including point-to-point)
	    ip addr show dev ${phys} label ${phys} | \
		awk '$1 == "inet" || ($1 == "inet6" && !/ dynamic/) {
		    cmd = "ip addr add"
		    if ($1 == "inet")
			sub(" [^ ]+:[^ ]+"," ")
		    sub("secondary","")
		    sub("/.*","",$2)
		    sub("dynamic","")
		    for (i = 2; i < NF; i++) {
			if ($i == "brd" || $i == "peer" || $i == "secondary")
			    i++
			else
			    cmd = cmd " " $i
		    }
		    if ($NF != phys)
			cmd = cmd " " $NF
		    cmd = cmd " dev " virt ">/dev/null 2>/dev/null"
		    system(cmd)
		}' phys=${phys} virt=${virt}
	    ip link set up dev ${virt}

	    if [ "${overridemtu}" != "" ]; then
		ip link set mtu ${overridemtu} dev ${virt}
	    else
	    	# Double check the mtu is not 0
	    	# if it is set it to a saner default
	    	ip link show dev ${virt} | grep -q 'mtu 0 '
	    	RETVAL=$?
	    	if [ "${RETVAL}" -eq 0 ]; then
		    echo "Fixup of mtu on ${virt} to 16260" >&2
		    ip link set mtu 16260 dev ${virt}
	    	fi
	    fi
	done

    elif [ ${stack} = "none" ]; then
	echo "protostack=none selected - No IPsec SA's will be injected into the kernel" >&2
    fi

    # global module options for KLIPS
    if [ "${fragicmp}" = "yes" ]; then
	echo 1 >/proc/sys/net/ipsec/icmp
    fi
    if [ "${hidetos}" = "no" ]; then
	echo 0 >/proc/sys/net/ipsec/tos
    fi
}

# Start the actual work

if [ $(id -u) -ne 0 ]; then
    echo "permission denied (must be superuser)" >&2
    exit 4
fi

if [ "$2" = "--netkey" -o "$2" = "--xfrm" ]; then
	# manual override for use in docker
	stack=netkey
else
	stack="$(ASAN_OPTIONS=detect_leaks=0 ipsec addconn --config ${IPSEC_CONF} --liststack | grep -v "#")"
fi

case ${stack} in
    netkey|klips|none)
	;;
    auto)
	echo "protostack=auto is not longer supported, defaulting to netkey" >&2
	stack=netkey
	;;
    *)
	echo "unknown stack ${stack}" >&2
	exit 1
	;;
esac

case ${action} in
    stop)
	# We don't unload XFRM IPsec on stop - only when we detect a stack
	# change.
	if [ -f ${klipsstack} ]; then
		ipsec eroute --clear
		# this clears all IP addresses on ipsecX interfaces by
		# unloading the module
		stopklips
	elif [ -f ${xfrm_stat} ]; then
		ip xfrm state flush
		ip xfrm policy flush
		# module unloading skipped on purpose - can hang for a long
		# time or fail
	fi
	;;
    start)
	case ${stack} in
	    netkey)
		startnetkey
		;;
	    klips)
		startklips
		;;
	esac
	;;
    restart)
	stop
	start
	;;
    *)
	echo "unknown action ${action}"  >&2
	exit 1
	;;
esac

exit 0
