#!/bin/bash
# (using bash to support {brace,expansion})
#
# Automate retraining, via sa-learn, of spam or ham
# that has been moved into a retrain folder.
#
# uses inotifywait to trigger running this when messages arrive
# set up a cronjob to ensure that this is started
#   $ crontab -l|grep retrain
#   # ensure that the mail retrainer is running
#   15 09 * * * /home/stewart/bin/retrain -q -b
#
# Author: Stewart Gebbie <sgebbie@gethos.net>

MAX_MAIL_SIZE=10485760 # 10MiB
MAIL=${HOME}/mail
RETRAIN=${MAIL}/retrain
RHAM=${RETRAIN}/ham
RSPAM=${RETRAIN}/spam
SPAM=${MAIL}/spam
INBOX=${HOME}/.maildir
PID=${HOME}/bin/.retrain.pid
SSD=/sbin/start-stop-daemon
TIMEOUT=300

# retrain and move logic
function _retrain {
	# retrain
	sa-learn --max-size ${MAX_MAIL_SIZE} --no-sync --ham ${RHAM}/{cur,new}
	sa-learn --max-size ${MAX_MAIL_SIZE} --no-sync --spam ${RSPAM}/{cur,new}
	sa-learn --sync

	# move mail to spam
	# (note, double move to ensure an atomic link/unlink from tmp to cur)
	for nm in $( find -H ${RSPAM} -type f -a \( -path '*/cur/*' -o -path '*/new/*' \) ); do
		echo moving $nm to ${SPAM}/cur
		x=$(basename $nm);
		mv $nm ${SPAM}/tmp;
		mv ${SPAM}/tmp/$x ${SPAM}/cur/$x;
	done

	# move mail back to inbox
	for nm in $( find -H ${RHAM} -type f -a \( -path '*/cur/*' -o -path '*/new/*' \) ); do
		echo moving $nm to ${INBOX}/new
		x=$(basename $nm);
		mv $nm ${INBOX}/tmp;
		mv ${INBOX}/tmp/$x ${INBOX}/new/$x;
	done
}

# continously watch
function _monitor {
	echo $$ > ${PID}
	while true; do
		inotifywait -q -t ${TIMEOUT} -e move,create,modify -r ${RETRAIN}/{ham,spam}/{cur,new}
		_retrain
	done
}

# parse options
for i in "$@"; do
	case $i in
		-q|--quiet)
		# redirect stdout to /dev/null
		exec 1<>/dev/null
		shift
		;;
		-m|--monitor)
		_monitor
		exit 0
		;;
		-b|--background)
		# --background --no-close
		exec ${SSD} --pidfile ${PID} --startas ${HOME}/bin/retrain --retry 5 --background --start -- --quiet --monitor
		;;
		-k|--kill)
		exec ${SSD} --pidfile ${PID} --startas ${HOME}/bin/retrain --retry 5 --stop
		;;
		-s|--status)
		exec ${SSD} --pidfile ${PID} --startas ${HOME}/bin/retrain --status
		;;
		*)
		echo "Bad command line argument: $i"
		;;
	esac
done

_retrain
exit 0

## Notes ##
#
# set std{err,out} file descriptors in the current shell
# Close STDOUT file descriptor
#exec 1<&-
# Close STDERR FD
#exec 2<&-
# Open STDOUT as $LOG_FILE file for read and write.
#exec 1<>$LOG_FILE
# Redirect STDERR to STDOUT
#exec 2>&1

