#!/usr/bin/perl -w

# a Hobbit/Xymon plugin to check the status of BackupPC
#
# Based on http://n-backuppc.cvs.sf.net/viewvc/n-backuppc/check_backuppc/check_backuppc?revision=1.21
#
# Tested against BackupPC 2.1.2 and 3.1.0
#   <http://backuppc.sourceforge.net>
#
# Copyright (C) 2006, 2007 Seneca Cunningham <tetragon@users.sourceforge.net>
# Copyright (C) 2011 Axel Beckert <abe@debian.org>
#
#   This program is free software; you can redistribute it and/or modify
#   it under the terms of the GNU General Public License as published by
#   the Free Software Foundation; either version 2 of the License, or
#   (at your option) any later version.
#
#   This program is distributed in the hope that it will be useful,
#   but WITHOUT ANY WARRANTY; without even the implied warranty of
#   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
#   GNU General Public License for more details.
#
#   You should have received a copy of the GNU General Public License
#   along with this program; if not, write to the Free Software
#   Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
#

my $config = "/etc/backuppc/TODO";
use strict;
use 5.010;
use lib '/usr/share/backuppc/lib/';
use BackupPC::Lib;
use Hobbit;

$ENV{'PATH'} = '/bin:/sbin:/usr/bin:/usr/sbin';
$ENV{'LC_ALL'} = 'C';
delete @ENV{'IFS', 'CDPATH', 'ENV', 'BASH_ENV'};

my $bb = new Hobbit({ test => 'bkpc', dont_moan => 1});

#no utf8;

my %ERRORS = ( OK => 0,
               UNKNOWN => 1,
               WARNING => 1,
               CRITICAL => 2 );

use POSIX qw(strftime difftime);
use Getopt::Long;
Getopt::Long::Configure('bundling');

# BackupPC
my $version = '1.1.1';
my $warnLevel = 0;
my $daysOld = 7;
my $verbose = 1;
my $opt_V = 0;
my $opt_h = 0;
my $goodOpt = 0;
my $reduce = 0;
my $backupOnly = 0;
my $archiveOnly = 0;
my $statusOnly = 0;
my @hostsDesired;
my @hostsExcluded;
my @hostsAlwaysCritical;
my @hostsOnlyWarning;

# Process options
$goodOpt = GetOptions(
    'v+' => \$verbose, 'verbose+' => \$verbose,
    'c=f' => \$daysOld, 'critical=f' => \$daysOld,
    'w=f' => \$warnLevel, 'warning=f' => \$warnLevel,
    'V' => \$opt_V, 'version' => \$opt_V,
    'h' => \$opt_h, 'help' => \$opt_h,
    'r=i' => \$reduce, 'reduce' => \$reduce,
    'b' => \$backupOnly, 'backup-only' => \$backupOnly,
    'a' => \$archiveOnly, 'archive-only' => \$archiveOnly,
    's' => \$statusOnly, 'status-only' => \$statusOnly,
    'H=s' => \@hostsDesired, 'hostname=s' => \@hostsDesired,
    'x=s' => \@hostsExcluded, 'exclude=s' => \@hostsExcluded,
    'C=s' => \@hostsAlwaysCritical, 'always-critical=s' => \@hostsAlwaysCritical,
    'W=s' => \@hostsOnlyWarning, 'only-warning=s' => \@hostsOnlyWarning,
);

# Do we really need this? Looks redundant.
@hostsDesired = () if $#hostsDesired < 0;
@hostsExcluded = () if $#hostsExcluded < 0;

if ($opt_V) {
    say "check_backuppc - " . $version;
    exit $ERRORS{'OK'};
}

if ($backupOnly and $archiveOnly) {
    $goodOpt = 0;
    say "Cannot apply both --backup-only and --archive-only, contradictory";
    say "";
}

if ($opt_h or not $goodOpt) {
    say "check_backuppc - " . $version;
    say "A Hobbit plugin to check on BackupPC backup status.";
    say "";
    say "Options:";
    say "  --hostname,-H      only check the specified host";
    say "  --exclude,-x       do not check the specified host";
    say "  --always-critical,-C  failures of this host always become 'red' immediately";
    say "  --only-warning,-W     failures of this host never become 'red' and stay 'yellow'";
    say "  --archive-only,-a  only check the archive hosts";
    say "  --backup-only,-b   only check the backup hosts";
    say "  --status-only,-s   only check backup status, omit connection failures that are";
    say "                     less than \$Conf{FullPeriod} old";
    say "  --warning,-w       days old an errored host must be to cause a warning";
    say "  --critical,-c      number of days old an errored backup must be to be critical";
    say "  --reduce,-r        maximum number of failed hosts for severity reduction";
    say "  --verbose,-v       increase verbosity";
    say "  --version,-V       display plugin version";
    say "  --help,-h          display this message";
    say "";
    exit $ERRORS{'OK'} if $goodOpt;
    exit $ERRORS{'UNKNOWN'};
}

if ($warnLevel > $daysOld) {
    $bb->color_line('red', "CONFIGURATION ERROR - Warning threshold must be <= critical\n");
}

# Connect to BackupPC
my $server;
if (!($server = BackupPC::Lib->new)) {
    $bb->color_line('red', "Couldn't connect to BackupPC\n");
    $bb->send;
    exit $ERRORS{'CRITICAL'};
}

my %Conf = $server->Conf();
$server->ChildInit();

my $err = $server->ServerConnect($Conf{ServerHost}, $Conf{ServerPort});
if ($err) {
    $bb->color_line('red', "Can't connect to server ($err)\n");
    $bb->send;
    exit $ERRORS{'UNKNOWN'};
}

# hashes that BackupPC uses for varios status info
my %Status;
my %Jobs;
my %Info;

# query the BackupPC server for host, job, and server info
my $info_raw = $server->ServerMesg('status info');
my $jobs_raw = $server->ServerMesg('status jobs');
my $status_raw = $server->ServerMesg('status hosts');

# undump the output... BackupPC uses Data::Dumper
eval $info_raw;
eval $jobs_raw;
eval $status_raw;

# check the dumped output
my $hostCount = 0;
my @goodHost;
my %badHost;
my @tooOld;
my @notTooOld;

# host status checks
foreach my $host (sort(keys(%Status)))
{
    next if $host =~ /^ /;
    next if (@hostsDesired and not grep {/$host/} @hostsDesired);
    next if (@hostsExcluded and grep {/$host/} @hostsExcluded);
    next if ($backupOnly and $Status{$host}{'type'} eq 'archive');
    next if ($archiveOnly and $Status{$host}{'type'} ne 'archive');
    $hostCount++;

    # Debug
    if ($verbose == 3) {
        $bb->print("Host $host state " . $Status{$host}{'state'});
        $bb->print(" with error: " . $Status{$host}{'error'} . "\n");
    }

    if ($Status{$host}{'error'}) {
        # Check connectivity errors with greater care
        if ($statusOnly && (
                $Status{$host}{'error'} eq 'ping too slow' ||
                $Status{$host}{'error'} eq 'no ping response' ||
                $Status{$host}{'error'} eq 'host not found')) {
            if ($Status{$host}{'lastGoodBackupTime'} - $Status{$host}{'startTime'}
                <= $Conf{FullPeriod} * 3600 * 24) {
                push @goodHost, $host;
                next;
            }
        }

        # Check high and low priority hosts
        if (grep { $_ eq $host } @hostsAlwaysCritical) {
            $badHost{$host} = 'red';
        } elsif (grep { $_ eq $host } @hostsOnlyWarning) {
            $badHost{$host} = 'yellow';
        } else {
            $badHost{$host} = 'yellow_or_red';
        }

        # Check bad host ages
        $Status{$host}{'lastGoodBackupTime'} = $Status{$host}{'startTime'}
            if (not $Status{$host}{'lastGoodBackupTime'});
        if (difftime(time(), $Status{$host}{'lastGoodBackupTime'}) > ($daysOld * 3600 * 24)) {
            push @tooOld, $host;
        }
        elsif (difftime(time(), $Status{$host}{'lastGoodBackupTime'}) > ($warnLevel * 3600 * 24)) {
            push @notTooOld, $host;
        }
        else {
            if ($badHost{$host} and $badHost{$host} =~ /yellow/) {
                push @goodHost, $host;
                delete $badHost{$host};
            } else {
                $badHost{$host} = 'yellow';
            }
        }

        # Debug
        if ($verbose == 2) {
            $bb->print("Host $host state " . $Status{$host}{'state'});
            $bb->print(" with error: " . $Status{$host}{'error'} . "\n");
        }
    }
    else {
        push @goodHost, $host;
    }
}

# Short cut if there are no bad hosts
if ($hostCount == @goodHost or keys(%badHost) < $reduce and not @tooOld) {
    $bb->color_line('green', "BackupPC OK - (" . keys(%badHost) . "/" . $hostCount . ") failures\n");
    &list_unknown_hosts;
    $bb->send;
    exit $ERRORS{'OK'};
}

&list_unknown_hosts;

# Only failures reach this far
# WARNING
if ($#tooOld < 0 or keys(%badHost) < $reduce) {
    #print "BACKUPPC WARNING - (";
    if ($verbose) {
        foreach my $host (keys %badHost) {
            my $color = $badHost{$host};
            $color = 'yellow' if $color eq 'yellow_or_red';
            $bb->color_line($color, $host . " (" . $Status{$host}{'error'} . ")\n");
        }
        #print ")\n";
    }
    elsif (my $red = grep { $_ eq 'red' } values %badHost) {
        $bb->color_line('red', $red  . "/" . $hostCount .
                        ") critical failures\n");
        my $yellow = keys(%badHost) - $red;
        if ($yellow) {
            $bb->color_line('yellow', $red  . "/" . $hostCount .
                            ") less critical failures\n");
        }
    }
    else {
        $bb->color_line('yellow', keys(%badHost)  . "/" . $hostCount . ") failures\n");
    }

    $bb->send;
    exit $ERRORS{'WARNING'};
}

# CRITICAL
#print "BACKUPPC CRITICAL - (";
if ($#notTooOld >= 0 and $verbose) {
    foreach my $host (@notTooOld) {
        my $color = 'red';
        $color = 'yellow' if ($badHost{$host} eq 'yellow');
        $bb->color_line($color, $host . " (" . $Status{$host}{'error'} . ")\n");
    }
    #print "), (";
}

if ($verbose) {
    foreach my $host (@tooOld) {
        my $color = 'red';
        $color = 'yellow' if ($badHost{$host} eq 'yellow');
        $bb->color_line($color, $host . " (" . $Status{$host}{'error'} . ")\n");
    }
    #print ") critical\n";
}
else {
    $bb->print(keys(%badHost) . "/" . $hostCount . ") failures, ");
    $bb->print(print $#tooOld + 1 . " critical\n");
}

$bb->send;
exit $ERRORS{'CRITICAL'};

sub list_unknown_hosts {
    foreach my $host (@hostsDesired, @hostsExcluded) {
        if (not grep {/$host/} keys(%Status)) {
            $bb->color_line('clear', "Host expected but not configured ($host)\n");
            #exit $ERRORS{'UNKNOWN'};
        }
    }
}
