#!/usr/bin/perl -w
# $Id: bindzone_integrity.pl,v 1.14 2002/11/20 11:16:32 phil Exp $
#
# bindzone_integrity.pl -- perform bind zonefile checks; geared for use with
# vircs.
# Copyright 2001, 2002 Phil Pennock.
# All rights reserved.
#
# Redistribution and use in source and binary forms, with or without
# modification, are permitted provided that the following conditions
# are met:
# 1. Redistributions of source code must retain the above copyright
#    notice, this list of conditions and the following disclaimer.
# 2. Redistributions in binary form must reproduce the above copyright
#    notice, this list of conditions and the following disclaimer in the
#    documentation and/or other materials provided with the distribution.
# 3. The name of the author may not be used to endorse or promote products
#    derived from this software without specific prior written permission
#
# THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
# IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
# OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
# IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
# INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
# NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
# DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
# THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
# THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
#
# TODO:
#  Handle $INCLUDE
#  Handle semi-colons & octothorpes inside strings

# ######################################################################

# Any of these may be 'undef' which means 'no limit applies'
my $default_ttl_min	= '5M';
my $expiretime_warn	= '3H';
my $expiretime_desire	= '14D';

# ######################################################################

use strict;
use IO::File;
use Getopt::Std;
use POSIX 'strftime';

$| = 1;

# $INCLUDE <filename> <opt_domain>
# $ORIGIN <domain>
# <domain> <opt_ttl> <opt_class> <type> <resource_record_data>

sub waffle ($);
sub gah ($);
sub usage ();
sub strvis ($);
sub time_norm ($);
sub dollar_include ($);
sub dollar_origin ($);
sub dollar_ttl ($);
sub handle_ip ($$$$);
sub handle_ip6 ($$$$);
sub handle_host ($$$$;$);
sub host_syntax ($$$);
sub handle_mx ($$$$);
sub handle_text ($$$$);
sub handle_null ($$$$);
sub process_special ($$);
sub process_soa ($);
sub semantics_check ();
sub dump_item ($);
sub parse_zonefile ($);
sub perform_diffs ($$);
sub extract_serial ($);
sub suggest_serial ($);

my @fields = qw/SOA A AAAA NS CNAME MX RP PTR HINFO NULL LOC TXT/;
my %dollar = (
    INCLUDE	=> \&dollar_include,
    ORIGIN	=> \&dollar_origin,
    TTL		=> \&dollar_ttl,
    );
my %handle = (
    A		=> \&handle_ip,
    AAAA	=> \&handle_ip6,
    NS		=> \&handle_host,
    CNAME	=> \&handle_host,
    MX		=> \&handle_mx,
    RP		=> \&handle_text,
    PTR		=> \&handle_host,
    HINFO	=> \&handle_text,
    NULL	=> \&handle_null,
    LOC		=> \&handle_text, # XXX
    TXT		=> \&handle_text,
    );
my @soa_fields = (qw/Domain-name Primary-NS Contact Serial/,
    qw/Refresh Retry Expire TTL/);
# Used characters apparently undocumented; source where bind uses them is:
#  src/lib/nameser/ns_ttl.c ns_parse_ttl()
my %scale_factor = (
    S => 1,
    M => 60,
    H => 3600,
    D => 86400,
    W => 604800,
    );
my $scale_letters = join '', keys %scale_factor;

my $errcount = 0;
my %opts;
my $debug = 0;
my $quiet = 0;
my $verbose = 0;
my $anyday = 0;

getopts('dnhvq', \%opts);
usage		if exists $opts{'h'};
$quiet = 1	if exists $opts{'q'};
$verbose = 1	if exists $opts{'v'};
$anyday = 1	if exists $opts{'d'};

if (exists $opts{'n'}) { # new only
	die "$0: Need a file to check\n" unless defined $ARGV[0];
	parse_zonefile ($ARGV[0]);
	semantics_check;
	exit $errcount;
}

die "$0: see usage (-h), need two filenames\n" unless @ARGV == 2;
my ($old_zone, $new_zone) = @ARGV;

perform_diffs($old_zone, $new_zone);
parse_zonefile($new_zone);
semantics_check;

exit $errcount;


# ######################################################################

# This is from before parse_zonefile became a function
my %storage;
my ($state, $statebuf, $origin, $default_ttl, $seen_explicit_TTL);
my $fh;

sub parse_zonefile ($)
{
	my $fn = shift;

	$fh = IO::File->new($fn, "<");
	die "Can't read '${fn}': $!\n" unless defined $fh;

	%storage = ();
	($state, $statebuf, $origin, $default_ttl) = (undef,undef,undef,undef);
	my ($last, $default_class);
	$origin = $fn .'.'; $origin =~ s{^.*/}{};
	$default_ttl = 0;
	$default_class = 'IN';
	$seen_explicit_TTL = 0;

	my $immediate_abort = 0;
	$_ = <$fh>;
	gah 'No RCS Id on first line' unless /^;.*\$Id.*\$/;
	while (<$fh>) {
		last if $immediate_abort;
		chomp;
		s/\s+$//; # only at end, ws at start significant

		gah "Bad: looks like an auto-generated file"
			if /;.*auto.*gener/i;

		s/\s*;.*//; # hope no semi-colons inside strings
		next if /^\s*$/;
		if (/#/) { # similarly, no octothorpes inside strings?
			gah 'We don\'t use #-comments';
			next;
		}


		if (defined $state or /^\$/) {
			process_special($state, $_);
			next;
		}

		my ($dom, $ttl, $class, $record, $data);
		if (s/^\s+//) {
			$dom = $last;
			if (not defined $dom) {
				gah 'Continuation entry without something' .
					' to continue';
				next;
			}
		} else {
			s/^(\S+)\s+//;
			$dom = $1;
			$dom = $origin if $dom eq '@';
			$dom .= '.'.$origin
				if $dom ne $origin and $dom !~ /\.$/;
			$last = $dom;
		}

		if (s/^(\d+[$scale_letters]?)\s+//) {
			$ttl = time_norm($1);
		} else { $ttl = $default_ttl }

		# Okay, so it's effectively "get rid of IN, it's always IN"
		if (s/^IN\s+//) {
			$class = 'IN'
		} else { $class = $default_class }

		if (not (($record, $data) = /^(\w+)(?:\s+(.*))?$/)) {
			gah 'Parse error, can\'t get record & data';
			next;
		}
		if (not defined $data and $record ne 'NULL') {
			gah 'Record has no value';
			next;
		}
		$data = '' if $record eq 'NULL' and not defined $data;

		if ($record eq 'SOA') {
			if (defined $state) {
				gah 'Trying to start an SOA inside another' .
					' state';
				next;
			}
			waffle 'Setting state to SOA';
			$state = 'SOA';
			$statebuf = $dom .' '. $data;
			process_special('SOA', '') if $statebuf =~ /\)/;
			next;
		}

		if (exists $handle{$record}) {
			# waffle "Handling $record with data `$data'";
			$storage{$record} = {} unless exists $storage{$record};
			$handle{$record}->(
			    $storage{$record}, $dom, $record, $data
			    );
		} else {
			gah "No data-handler for record type \"" .
				strvis($record) . "\"";
			waffle "Unable to cope with `$record $data'";
		}
	}

	# At the end of the file, MUST be in default state
	if (defined $state) {
		gah "Reached end of file whilst in special state \"$state\"";
	}

	$fh->close;
	undef $fh;
}

sub waffle ($)
{
	my $l = defined $fh ? $. : 'Semantics';
	print "$l: $_[0]\n" if $verbose;
}

sub gah ($)
{
	my $l = defined $fh ? $. : 'Semantics';
	warn "$l: $_[0]\n" unless $quiet;
	++$errcount;
}

sub usage ()
{
	print <<"EOU";
Usage: $0 -n [-dqv] <new-zonefile>
Usage: $0 [-dqv] <old-zonefile> <new-zonefile>
 -n   new only
 -d   date irrelevant in SOA (need not be today)
 -q   quiet the normal output
 -v   verbose
A minimalistic lint on <new-zonefile>, according to House Rules.
Ensures, eg, serial changed against old
EOU
	exit 0;
}

# BSD C libraries have something similar with the same name
# get rid of "bad" characters
sub strvis ($)
{
        my @oldtext = split(//, $_[0]);
        my @newtext;
        my ($c, $v);
        while (defined ($c = shift @oldtext)) {
                $v = ord $c;
                if ($v <= 0x1f) {
                        push(@newtext, "\\C^" . chr($v + 0x40));
                } elsif ($v == 0x20) {
                        push(@newtext, "\\040");
                } elsif ($v == 0x7f) {
                        push(@newtext, "\\C^?");
                } elsif ($v >= 0x80 and $v <= 0x9f) {
                        push(@newtext, "\\M^" . chr($v - 0x40));
                } elsif ($v == 0xa0) { 
                        push(@newtext, "\\240");
                } elsif ($v >= 0xa1 and $v <= 0xfe) {
                        push(@newtext, "\\M-" . chr($v - 0x80));
                } elsif ($v == 0xff) { 
                        push(@newtext, "\\M^?");
                } else {
                        push(@newtext, $c);
                }
        }
        return join('', @newtext);
}

sub time_norm ($)
{
	return undef unless defined $_[0];
	if ($_[0] =~ /^(\d+)([$scale_letters]?)$/) {
		my $t = $1;
		$t *= $scale_factor{$2} unless $2 eq '';
		return $t;
	} else {
		return undef;
	}
}

sub process_special ($$)
{
	my ($st, $line) = @_;

	if (not defined $st) {
		if ($line =~ s/^\$(\S+)\s+//) {
			# Yes, case sensitive
			if (exists $dollar{$1}) {
				waffle "Matching \$$1";
				return $dollar{$1}->($line);
			} else {
				gah 'Unknown dollar-directive';
			}
		} else {
			gah 'Internal error; process_special(no state)';
			return;
		}
	}

	if ($st eq 'SOA') {
		$statebuf .= $_;
		if ($statebuf =~ /\)/) {
			waffle 'Have complete SOA';
			undef $state;
			process_soa($statebuf);
		}
		return;
	}

	gah 'Unhandled special state';
	return;
}

sub dollar_origin ($)
{
	my $or = shift;
	if ($or =~ /^([\w.-]+)$/) {
		waffle "Origin: `$origin' -> `$1'";
		$origin = $1;
	} else {
		gah 'Bad $ORIGIN; not word-chars or . or -';
	}
}

sub dollar_ttl ($)
{
	my $ttl = shift;
	my $v = time_norm $ttl;
	if (defined $v) {
		waffle "Default TTL: $default_ttl -> $v";

		my $m = time_norm $default_ttl_min;
		if (defined $m) {
			gah "\$TTL < minimum $default_ttl_min" unless $v >= $m;
		}

		$seen_explicit_TTL = 1;
		$default_ttl = $v;
	} else {
		gah 'Bad $TTL - not a number' . ": `$ttl'";
	}
}

sub dollar_include ($)
{
	gah '$INCLUDE not handled; sorry';
}

sub process_soa ($)
{
	local $_ = shift;

	s/\s+/ /g;
	
	my ($dom) = /^(\S+)/;
#waffle 'SOA-Input: ' . $_;

	if (exists $storage{'SOA'}{$dom}) {
		gah "Already have an SOA for domain `$dom', dumping new one";
		return;
	}
	my @sfields = split /\s+(?:[()]\s+)?/, $_;
	if ($sfields[$#sfields] eq ')') {
		pop @sfields;
	} else {
		gah "SOA not ending in `)' character";
	}
#for (my $i = 0; $i < scalar @sfields; ++$i) { print "$i: $sfields[$i]\n" }

	# double-check origin, effectively?
	return unless host_syntax($dom, 'SOA-domain', $dom);
	return unless host_syntax($dom, 'SOA-primary', $sfields[1]);
	return unless host_syntax($dom, 'SOA-maintainer', $sfields[2]);

	$storage{'SOA'}{$dom} = \@sfields;
}

sub handle_ip ($$$$) # hashref domain record-type data
{
	waffle "ip: hr, dom=$_[1] rec=$_[2] data={$_[3]}";
	if ($_[3] !~ /^(\d+)\.(\d+)\.(\d+)\.(\d+)$/) {
		gah "IP-type record $_[2] does not look like an IP address";
		return;
	}
	my @quads = ($1, $2, $3, $4);
	if ($quads[0] <= 0 or $quads[0] >= 224) {
		gah "IP-type record $_[2] is outside valid /8 range";
		return;
	}
	foreach (@quads[1,2,3]) {
		if ($_ < 0 or $_ > 255) {
			gah "IP-type record $_[2] should have each quad " .
				"be in 0..255 range";
			return;
		}
	}
	if ($quads[3] == 255) {
		gah "IP-type record $_[2] matches broadcast address";
		# no return, enter data anyway
	}

	return unless host_syntax($_[1], $_[2].'-key', $_[1]);

	push @{ $_[0]->{$_[1]} }, $_[3];
}

sub handle_ip6 ($$$$) # hashref domain record-type data
{
	waffle "ip6: hr, dom=$_[1] rec=$_[2] data={$_[3]}";

# RFC 2373 IP Version 6 Addressing Architecture
	my $valid = 0; my $candidate = $_[3];

	if ($candidate =~ /^(?:[0-9a-f]{1,4}:){7}[0-9a-f]{1,4}$/i) {
		$valid = 1 unless $valid < 0;
	} elsif ($candidate =~ /::.*::/) {
		$valid = -1;
	} elsif ($candidate =~ /[^0-9a-fA-F:\.]/) {
		$valid = -1;
	} elsif ($candidate =~ /^
	    (?:[0-9a-f]{1,4}:){0,6} :{1,2}
	    (?:[0-9a-f]{1,4}:){0,6} [0-9a-f]{1,4}
	    $/ix) {
		$valid = 1 unless $valid < 0;
	} elsif ($candidate =~ /^(?:(?:[0-9a-f]{1,4}|:):){1,6}(\d+)\.(\d+)\.(\d+)\.(\d+)$/i) {
		my @quads = ($1, $2, $3, $4);
		if ($quads[0] <= 0 or $quads[0] >= 224) {
			$valid = -1;
		}
		foreach (@quads[1,2,3]) {
			if ($_ < 0 or $_ > 255) {
				$valid = -1;
			}
		}
		$valid = 1 unless $valid < 0;
	}

	if ($valid <= 0) {
		gah "IP6-type record $_[2] does not look like IP6 address";
		return;
	}

	return unless host_syntax($_[1], $_[2].'-key', $_[1]);

	push @{ $_[0]->{$_[1]} }, $candidate;
}

sub handle_host ($$$$;$) # hashref domain record-type data
{
	my ($href, $dom, $type, $data) = (@_[0,1,2,3]);
	my $norecord = $_[4];
	waffle "host: hr, dom=$dom rec=$type data={$data}"
		unless defined $norecord;

	# Accept no dots or require ends in dot; otherwise flag dot-problem
	# and bad chars as errors

	if ($data =~ /\.\./) {
		gah "Malformed host-type record ${type}: double-dot";
	} elsif ($data =~ /^\./) {
		gah "Malformed host-type record ${type}: leading dot";
	} elsif ($data =~ /^[\w-]+$/) {
		push @{ $href->{$dom} }, $data .'.'.$origin
			unless defined $norecord;
	} elsif ($data =~ /^[\w.-]+\.$/) {
		push @{ $href->{$dom} }, $data
			unless defined $norecord;
	} elsif ($data =~ /^[\w.-]*\.[\w.-]*\.[\w.-]*[^.]$/) {
		gah "Host-type record $type contains two dots without ending" .
			" in a dot";
		waffle "Appending $origin to $dom";
		push @{ $href->{$dom} }, $data .'.'.$origin
			unless defined $norecord;
	} elsif ($dom =~ /^\*\.\w[\w.-]*\.$/) {
		push @{ $href->{$dom} }, $data
			unless defined $norecord;
		gah "Host-type record $type is wildcard; we do not like these";
		--$errcount; # major cludge
	} else {
		gah "Malformed host-type record ${type}: unparsed";
	}
}

sub host_syntax ($$$) # domain, record-type, putative host string
{
	my $es = $errcount;
	handle_host(undef, $_[0], $_[1], $_[2], 1);
	return 1 if $errcount == $es;
	return 0;
}


sub handle_mx ($$$$) # hashref domain record-type data
{
	waffle "mx: hr, dom=$_[1] rec=$_[2] data={$_[3]}";
	if ($_[3] =~ /^(\d+) \s+ ([\w.-]+)$/x) {
		my ($pri, $hn) = ($1, $2);
		my $es = $errcount;
		handle_host(undef, $_[1], $_[2].'-host', $hn, 1);
		return unless $errcount == $es;
		handle_host(undef, $_[1], $_[2].'-key', $_[1], 1);
		return unless $errcount == $es;
		push @{ $_[0]->{$_[1]} }, [$pri, $hn];
		return;
	}
	gah "Malformed $_[2] record:";
	$_[3] =~ s/^\d+\s+//;
	host_syntax($_[1], $_[2], $_[3]);
	return;

}

sub handle_text ($$$$) # hashref domain record-type data
{
	waffle "text: hr, dom=$_[1] rec=$_[2] data={$_[3]}";
	gah "Freeform text field `$_[2]' empty"
		unless defined $_[3] and $_[3] ne '';

	host_syntax($_[1], $_[2].'-key', $_[1]);

	push @{ $_[0]->{$_[1]} }, $_[3];
}

sub handle_null ($$$$) # hashref domain record-type data
{
	waffle "null: hr, dom=$_[1] rec=$_[2] data={$_[3]}";
	gah "Null-type `$_[2]' not empty"
		if defined $_[3] and $_[3] ne '';

	host_syntax($_[1], $_[2].'-key', $_[1]);

	$_[0]->{$_[1]}++;
}

sub dump_item ($)
{
	my $ref = shift;

	use Data::Dumper;

	my $d = Data::Dumper->new([$ref]);
	$d->Terse(1)->Useqq(1)->Quotekeys(0)->Indent(1);
	print $d->Dump;
}

sub semantics_check ()
{
#	dump_item \%storage;

	gah "No \$TTL directive" unless $seen_explicit_TTL;

# CNAME record and non-CNAME both exist?
	foreach my $cname (keys %{ $storage{'CNAME'} }) {
#print ">$cname\n";
		foreach my $type (keys %storage) {
#print " t> $type\n";
		    foreach my $list (keys %{ $storage{$type} }) {
#print "  rec> $list\n";
			foreach my $data (@{ $storage{$type}{$list} }) {
#print "    data> $data\n";
			    if ($data eq $cname) {
				gah "RHS is a CNAME for $type `$list'";
			    }
			}
		    }

		    next if $type eq 'CNAME';

		    if (exists $storage{$type}{$cname}) {
			    gah "CNAME and $type exist for $cname";
		    }
		}
	}

	my $base = strftime '%Y%m%d', gmtime;
	foreach my $d (keys %{ $storage{'SOA'} }) {
		my @soa = @{$storage{'SOA'}{$d}};
# 0=dom 1=primary-NS 2=contact 3=serial 4=refresh 5=retry 6=expire 7=defttl
		if (scalar @soa != 8) {
			gah 'SOA has wrong number of entries; ' .
				scalar @soa . ' != 8';
			next;
		}

		if (length $soa[3] != 10) {
			gah "Serial for `$d' not 10 digits: $soa[3]";
		} elsif (substr($soa[3], 0, 8) ne $base) {
			gah "Serial for `$d' not today: $soa[3]" unless $anyday;
		}

		for (my $i = 4; $i <= 7; ++$i) {
			my $t = time_norm $soa[$i];
			gah "SOA: Non-numeric $soa_fields[$i]"
				unless defined $t;
			$soa[$i] = $t;
		}
		gah 'SOA: Retry >= Refresh' unless $soa[5] < $soa[4];

# Bind8: ==
# Bind9: Enforces that SOA-TTL is "minimum", lower bound on TTL for records
# within the domain, whose default is $TTL
		gah 'SOA: TTL > Zone $TTL' unless $soa[7] <= $default_ttl;

		my ($extw, $extd) = (
		    time_norm($expiretime_warn),
		    time_norm($expiretime_desire)
		    );
		if (defined $extw) {
			gah "SOA: Expire < $expiretime_warn" if $soa[6] < $extw;
		} else {
			$extw = 0;
		}
		if (defined $extd) {
			if ($soa[6] >= $extw and $soa[6] != $extd) {
				gah 'SOA: Expire okay, but not ' .
					$expiretime_desire;
				--$errcount;
			}
		}
	}

}

sub perform_diffs ($$)
{
	my ($oldzone, $newzone) = @_;

	my @difflines = `/usr/bin/diff -bu $oldzone $newzone`;
	if (@difflines == 0) {
		print "No differences\n";
		return;
	}

	chomp @difflines;
	shift @difflines; shift @difflines; # --- and +++

	my $oldserial = extract_serial $oldzone;
	my $newserial = extract_serial $newzone;

	if ($oldserial > $newserial) {
		++$errcount;
		print "SOA serial old > new ($oldserial > $newserial)\n";
		print " (Force override if deliberately cycling)\n";
	}

	my @changes;
	foreach (@difflines) {
		next unless /^[-+]/;
		next if /^[+-][#;]/;
		push @changes, $_;
	}

	if (@changes and $newserial le $oldserial) {
		print "There are substantive changes, " .
			"yet new serial not higher\n";
		print "Serial: $newserial\n";
		print " $_\n" foreach @changes;
		print "Try: " . suggest_serial($oldserial) . "\n";
		++$errcount;
	}
}

sub extract_serial ($)
{
	my $fn = shift;
	my $fh = new IO::File $fn, "r"
		or die "Failed to open '${fn}': $!\n";
	my @zlines = grep { /[^;]*SOA/ .. /[^;]*\)/ } $fh->getlines;
	$fh->close;

	chomp @zlines;
	map {s/;.*//} @zlines;
	my $soa = join(' ', @zlines);
	$soa =~ s/\s+/ /g;
	if ( $soa =~ m/\(\s+(\d+)\s+/ ) {
		return $1;
	} else {
		return undef;
	}
}

sub suggest_serial ($)
{
	use POSIX 'strftime';
	my $old = shift;
	my $base = strftime '%Y%m%d', gmtime;
	if ($old =~ s/^\Q$base\E//) {
		++$old;
		die "\n99 changes in one day!\n" if $old == 99;
		return $base . sprintf '%02d', $old;
	} else {
		return $base . '01';
	}

}
