#!/bin/bash

# usage: mybackup [host]

# Config file: ~/.mybackup
# Format:      One backup source per line
#              host:dir/	/home/user/backup/  [options]
#              The source dir should have a trailing slash
#              The second dir should be the parent backup dir, with trailing 
#              slash
#              The options are comma-separated, e.g. nofilter,hn=pants
# Behaviour:   By default this script will:
#                  - Ping the host to see if it is online, if not, it will skip
#                  - Run 'hostname' on the remote host (see below)
#                  - Rotate the backup directories (it keeps 4 by default)
#                  - Rsync the latest changes (backups are incremental).
#					
# Options:     noping  : do not ping this host before attempting to backup
#              nofilter: do not add the -F flag, some older rsync clients
#                          don't support it.
#              hn=foo  : use 'foo' as the remote hostname in the hostname check
# 
# Command line argument: Optionally you may provide a hostname on the command 
# line and the script will only backup directories from that host (they still 
# need to be defined in the config file). e.g. 'mybackup mail.example.com'
# 
# Rationale:
# Hostname Check: When I take my laptop home, it updates the dynamic dns service
# but that IP is my home gateway. I want to make sure that when this script
# connects it knows it is on the right machine.
# The hostname check verifies that the result of 'hostname' on the remote host 
# matches the full hostname in the config file.
# The 'hn=foo' option I use for my home machine, which has a dynamic dns of
# bar.example.com but actually has a local hostname of 'foo'.
#
# e.g. mail.student.columbia.edu:mail/ 	/home/oneill/backup/	noping,nofilter
# This is for a machine which is firewalled and doesn't respond to pings, and
# has an ancient rsync client installed.

# Notes on security:
# 	I have setup a single-purpose ssh key pair which I install on the backup
# 	source machines. To create one, do 
#        'ssh-keygen -t dsa' and save to $HOME/.ssh/rsync
#   Then add this text at the beginning of the public key, ~/.ssh/rsync.pub
#     no-port-forwarding,no-X11-forwarding,no-agent-forwarding,no-pty,command="/home/user/local/bin/secure-rsync"
#	Then add that public key to ~/.ssh/authorized_keys on the REMOTE hosts
#        scp .ssh/rsync.pub remotehost:
#    on remote host:
#         cat rsync.pub >> .ssh/authorized_keys
#         chmod 600 .ssh/authorized_keys
# And make sure the file 'secure-rsync' is installed on the remote host at
# ~/local/bin

# TODO:
# I could probably merge the ping and hostname check, they address different
# aspects of the same problem, namely whether I should do the directory
# rotation or not (don't want to do it if the rsync is likely to fail)

DEBUG=0    # set to 1 for a little more output
NUM_SNAPSHOTS=4  # number of incremental backups to keep, should be able to go
                 # quite large since hard links keep space down

ONLYHOST=$1  # The command line argument, only process this host if given

DIRFILE=$HOME/.mybackup        # Config file
DEFAULT_DEST="$HOME/backup"    # default backup destination

# Explanation of rsync flags that I use:
#    -a: archive mode, i.e. recurive, links, etc
#    -h: human readable
#    -F: look for and use .rsync-filter files in the source
#    --rsh: use single-use ssh key
#    --delete: delete files in the backup that aren't present in the source
#    --progress: some useful progress output, turned on if $TERM is set
if [ "$TERM" != "" ]; then
	PROGRESS="--progress"
fi
RSYNC_CMD="rsync"
RSH="ssh -i $HOME/.ssh/rsync"
# put the optiosn together as an array, because quotes aren't sufficient to
# seperate the ssh -i options
RSYNC_OPTS=(-ah -e "$RSH" --delete $PROGRESS)


 
if [ -f $HOME/.mybackuprc ]; then
	. $HOME/.mybackuprc
fi

if ! [ -f $DIRFILE ]; then
	echo "Backup config file $DIRFILE not found"
	exit -1
fi

# Read in the config file now and save in an array. Calling ssh from inside a 
# while/read loop means that only one iteration is completed (I couldn't figure 
# out why, but I guess ssh reads from stdin or something
lines=( )  # array of backup sources
while read l; do
	# skip blank lines
	if [ "$l" == "" ]; then
		continue
	fi
	# skip commented lines ('#')
	if [ `expr match "$l" '#'` -gt 0 ]; then
		continue
	fi
	lines=( "${lines[@]}" "$l" )
done < $DIRFILE

# Now go through the backup source and process them
for l in $(seq 0 $((${#lines[@]} - 1))); do
	line="${lines[$l]}"
	src=`echo $line | awk '{print $1}'`
	dir=`basename $src | awk -F ':' '{print $(NF)}' | awk -F '/' '{print $NF}'`
	dest=`echo $line | awk '{print $2}'`
	if [ "$dest" == "" ]; then
		dest=$DEFAULT_DEST
	fi

	# FIND THE HOSTNAME FOR THIS BACKUP SOURCE
	host=`echo $src | awk -F ':' '{print $1}'`
	host=`echo $host |awk -F '@' '{print $NF}'`
	# ONLYHOST is the hostname specified on the command line. If set then only
	# process that host
	if [ "$ONLYHOST" != "" ] && [ "$ONLYHOST" != "$host" ]; then
		continue
	fi
	dest="$dest/$host"

	echo "Backup source: $src"

	# PARSE THE OPTION STRING (SPLIT BY COMMAS)
	opt=`echo $line | awk '{print $3}'`
	FILTER="-F"
	PING=1
	HOSTNAME_FOR_CHECK=`echo $host |awk -F '.' '{print $1}'`
	for o in `echo $opt |awk -F ',' '{for(i=1;i<NF+1;i++) print $i}'`; do
		case "$o" in
			"noping")
				PING=0
			;;
			"nofilter")
				FILTER=""
			;;
			hn=*)
				HOSTNAME_FOR_CHECK=`echo $opt | awk -F '=' '{print $2}'`
			;;
		esac
	done

	if [ $DEBUG -eq 1 ]; then
		echo "        ping:   $PING"
		echo "        filter: '$FILTER'"
		echo "        hn:     '$HOSTNAME_FOR_CHECK'"
	fi

	# PING HOST TO SEE IF IT'S ONLINE
	if [ $PING -eq 1 ]; then
		echo -n "    Pinging $host to determine status... "
		ping -c 1 -t 30 -W 1 $host > /dev/null
		if [ $? -eq 0 ]; then
			echo "online"
		else
			echo "offline"
			echo "    WARNING: $host not found, not attempting a backup" >&2
			continue
		fi
	else
		echo "    Skipping ping ('noping' option)"
	fi

	# RUN 'hostname' ON REMOTE HOST TO CHECK IT IS THE MACHINE I THINK IT IS
	# Make sure the name of the machine is what I expect (to avoid trying to
	# backup my laptop when it is on my home network)
	# User can override the hostname with 'hn=foo' in options field
	echo -n "    Checking hostname: "
	ssh -i ~/.ssh/rsync $host hostname |grep $HOSTNAME_FOR_CHECK ||
		(echo "    Host is not $HOSTNAME_FOR_CHECK, skipping host";  continue)

	mkdir -p "$dest"    # -p means make directory and parents if necessary 

	# SHUFFLE THE EXISTING BACKUPS
	if [ -e $dest/$dir.$NUM_SNAPSHOTS ]; then
		echo "    Deleting oldest backup $dir.$NUM_SNAPSHOTS"
		rm -rf $dest/$dir.$NUM_SNAPSHOTS
	fi
	for j in `seq $NUM_SNAPSHOTS -1 1`; do
		i=`echo $j - 1 | bc`
		if [ -e "$dest/$dir.$i" ]; then
			echo "    Found old backup $dir.$i, moving to $dir.$j"
			mv "$dest/$dir.$i" "$dest/$dir.$j"
		fi
	done
	echo "    Backing up dir: $src to $dest/$dir.0"
	RSYNC_ARGS="--link-dest=../$dir.1 $src $dest/$dir.0/"
	echo "    $RSYNC_CMD ${RSYNC_OPTS[@]} $FILTER $RSYNC_ARGS"
	$RSYNC_CMD "${RSYNC_OPTS[@]}" $FILTER $RSYNC_ARGS
	echo "Return value: $?"
	echo "====================================================================="
	echo
done

echo "Backup run completed"

