#! /usr/bin/perl -w

# vim:syntax=perl

use strict;

use DB_File;

use lib '/usr/share/perl5';
use Lire::Email;
use Lire::Program qw( :msg :dlf );

my $debug = 0;
sub debug {
    $debug and lr_debug( @_ );
}

my $dbfile = shift 
  or lr_err( "give dumpfilestem as arg\n" );

my %dlfids; # maps dlfids to nof occurrences in log

if ( -e $dbfile ) {
    unlink $dbfile or
      lr_err( "cannot unlink $dbfile: $!\n");
}

tie %dlfids, "DB_File", "$dbfile",
  or lr_err "cannot tie to $dbfile: $!\n";


my $schema = eval { Lire::DlfSchema::load_schema( "email" ) };
lr_err( "failed to load email schema: $@" ) if $@;
my $dlf_maker = 
  $schema->make_hashref2asciidlf_func( qw/time logrelay queueid msgid 
		      from_user from_domain from_relay_host from_relay_ip 
		      size delay
		      to_user to_domain to_relay_host to_relay_ip 
		      stat xstat
				       /);

my $qid = '';
my %line;  # line currently being processed
my ($lines, $errorlines, $outlines) = (0, 0, 0);

sub print_line {
    my $line = shift;

    for my $k (sort keys %{ $line }) {
        # print ", $k=" . $line->{$k};
        print " $k " . $line->{$k};
    }
    print "\n";
}

sub store_dlfid {
    my $dlfid = shift;
    $dlfids{$dlfid}++;
}

sub parse_receive {
    my ( $type, $log ) = @_;

    if ( $type eq 'pickup' ) {
	$line{'from_relay_host'} = "localhost";
	$line{'from_relay_ip'} = '127.0.0.1';
    } else {
	unless ( defined $log->{client} ) {
	    lr_notice ( "no client= field found for queueid '$qid', ",
			"will use default" );
	    $log->{client} = '-';
	}
	($line{from_relay_host}, $line{from_relay_ip}) =
          splitrelay($log->{client} );

	my $fromrelayhost = $line{from_relay_host};
	sanitize( 'relayhost', $fromrelayhost, $line{from_relay_host} );
	
	my $fromrelayip = $line{from_relay_ip};
	sanitize('relayip', $fromrelayip, $line{from_relay_ip});
    }
}

sub parse_cleanup {
    my ( $log ) = $_[0];

    my $mid;
    if (defined $log->{'message-id'}) {
	$mid = $log->{'message-id'};
    } elsif (defined $log->{'resent-message-id'}) {
	$mid = $log->{'resent-message-id'};
    } else {
	lr_warn "no (resent-)message-id= field found for " .
	  "queueid '$qid', using default";
	$mid = '-';
    }

    sanitize( 'msgid', $mid, $line{'msgid'} );
}

sub parse_send {
    my ( $log ) = $_[0];

    for my $k ('to', 'delay', 'status', 'relay') {
	die "missing '$k'= field\n"
	  unless defined $log->{$k} 
    }

    my $to;
    # max one to-address per postfix log line, no need for sanitize_tos here
    sanitize('emailadress', $log->{'to'}, $to);

    my ($touser, $todomain) = splitemailadress($to);

    my $tmp;
    sanitize('stat', $log->{'status'}, $tmp);

    my ($stat, $xstat)      = splitstat($tmp);
    my ($tmphost, $tmpip)   = splitrelay($log->{relay});

    my $torelayhost;
    sanitize('relayhost', $tmphost, $torelayhost);

    my $torelayip;
    sanitize('relayip', $tmpip, $torelayip);

    $line{'to_user'}       = $touser;
    $line{'to_domain'}     = $todomain;
    $line{'to_relay_host'} = $torelayhost;
    $line{'to_relay_ip'}   = $torelayip;
    $line{'delay'}         = $log->{'delay'};
    $line{'stat'}          = $stat;
    $line{'xstat'}         = $xstat;

}

sub parse_qmgr {
    my ( $log ) = @_;

    for my $k ('size', 'from') {
	# lr_err " expected to find " .
	#  "'$k'= field, substituting default for queueid '$qid', " .
	#    "type '$type'\n"

	# can occurr in case message delivery gets deferred.  we find the
	# real from and size later, in such cases.  no need to warn.
	$log->{$k} ||= '-';
    }

    my $size;
    sanitize('size', $log->{'size'}, $size);

    if (!$line{'size'} or $line{'size'} eq '-') {
        # only update size if we can 'improve' it.  some qmgr lines have
        # to=, relay=, delay= and status=, but no size=
        debug("parse_qmgr: size was " . $log->{'size'} . ", assigning $size\n");
        $line{'size'} = $size;
    }
    $line{'from_relay_host'} ||= "localhost";
    $line{'from_relay_ip'}   ||= "127.0.0.1";

    my $tmp;
    sanitize('emailadress', $log->{'from'}, $tmp);
    ( $line{'from_user'}, 
      $line{'from_domain'}) = splitemailadress($tmp);

}

sub parse_postfix {
    my ( $log ) = $_[0];

    $qid = $log->{queueid};
    my $logrelay = $log->{hostname};
    my $dlfid = $log->{hostname} . $qid;

    my ($type) = $log->{process} =~ m!^postfix/(.*)!;

    # Initialize default
    %line = (
	     queueid  => $qid,
	     logrelay => $logrelay,
             dlfid    => $dlfid,
             type     => $type,
             time     => $log->{timestamp},
	    );

  SWITCH:
    for ($type) {
	/^(pickup|smtpd)$/ && do {
	    parse_receive( $type, $log );
	    last SWITCH;
	};
	/^cleanup$/ && do {
	    parse_cleanup( $log );
	    last SWITCH;
	};

        # lmtp: lmtp delivery via unix socket. looks like:
        # Aug 27 04:02:08 mailhost postfix/lmtp[28560]: C15C085E:
        #  to=<joe.user@example.com>,
        #  relay=/var/imap/socket/lmtp[/var/imap/socket/lmtp], delay=1,
        #  status=sent (250 2.1.5 Ok)
        #
        # my goodness! we have 
        # Jun 21 20:41:29 tiggr postfix/smtp[17847]: AE4255DEB: 
        #   to=<log@bind8.logreport.org>, 
        #   relay=my.host.com[001:10:108:201:50:fcff:fe0b:28ec],
        #   delay=1, status=sent (250 Ok: queued as 076EFD980)
        # but also:
        # Jun 21 20:41:28 tiggr postfix/qmgr[520]: AE4255DEB:
        #   to=<-a@local.host.com>, relay=none, delay=0, status=bounced 
        #   (invalid recipient syntax: "-a@local.host.com")
        # (under regular circumstances, qmgr lines look like
        # Jun 21 20:47:08 tiggr postfix/qmgr[520]: 718E85DEB: 
        #    from=<xaa@host.com>, size=685, nrcpt=1 (queue active)
        #
        # Jan 14 14:18:13 srv1 postfix/virtual[25001]: D33803A9E07: 
        #  to=<toto@example.com>, relay=virtual, delay=126, status=sent 
        #  (mailbox)
	(
          /^(local|smtp|lmtp|virtual)$/ || (
            ($type eq 'qmgr' || $type eq 'nqmgr') &&
              defined $log->{to} 
          )
        ) && do {
	    parse_send( $log );
	    last SWITCH;
	};
	/^(qmgr|nqmgr)$/ && do {
	    parse_qmgr( $log );
	    last SWITCH;
	};
	/^(master|pipe)$/ && do {
	    # Ignore those
	    last SWITCH;
	};
	# Default
	die "unknown type '$type'\n";
    }

    # make sure the nof is in sync with what we're feeding to postfix2dlf_main,
    # beware of skipped lines
    store_dlfid($dlfid);
    print_line(\%line);
    $outlines++;
}

my $parser = new Lire::Email();
while (<>) {
    chomp;
    $lines++;
    my $log;
    eval {
	$log = $parser->parse( $_ );
    };
    if ($@) {
	lr_warn( $@ );
	lr_warn( "failed to parse '$_'. skipping" );
        $errorlines++;
	next;
    }
    next unless $log->{process} =~ m!^postfix/!;

    # skip lines like
    #  Dec 1 06:58:22 internetsrv postfix/smtpd[21142]: connect from ...
    #  Dec 1 06:59:22 internetsrv postfix/smtpd[21142]: disconnect from ....
    # we're not using this information.
    next unless defined $log->{'queueid'};

    eval {
	parse_postfix( $log );
    };
    if ($@) {
	lr_warn( $@ );
	lr_warn( "failed to parse '$_'. skipping" );
        $errorlines++;
    }
}

# nope, can't do
#  end_dlf_converter( $lines, $outlines, $errorlines );
# here: lr_db_store should store dlflines only after _main has ran

# overload dlfids to store info we'll need in lr_postfix2dlf_main's
# end_dlf_converter call

$dlfids{'loglines'} = $lines;
$dlfids{'errorlines'} = $errorlines;

untie %dlfids;

lr_info( "read $lines lines; output $outlines internal lines; " .
  "$errorlines errors" );

# lr_db_store will be handled by lr_postfix2dlf_main's end_dlf_converter
# call


__END__

=pod

=head1 NAME

postfix2dlf_pre - preprocess postfix logfiles for postfix2dlf_main

=head1 SYNOPSIS

B<postfix2dlf_pre> I<dumpfile>

=head1 DESCRIPTION

B<postfix2dlf_pre> expect a postfix logfile on STDIN.  It prints preprocessed
logfile lines to STDOUT and creates a Berkeley DB holding a mapping from
hostname-queueid to number-of-lines-with-this-id.  I<dumpfile> can be e.g.
/tmp/dump, ../../var/dump or dump.

=head1 DEVELOPERS

One can use the lr_anondump tool to view the contents of the dumpfile.

E.g. run

 $ zcat postfix.log.gz | ./postfix2dlf_pre /tmp/stem > /tmp/pre
 $ lr_anondump /tmp/stem
 $ ./postfix2dlf_main /tmp/stem < /tmp/pre > /tmp/post

.

=head1 EXAMPLE

The raw log file

 Dec 1 04:02:56 internetsrv postfix/pickup[20919]: 693A3578E:
  uid=0 from=<root>
 Dec 1 04:02:56 internetsrv postfix/cleanup[20921]: 693A3578E:
  message-id=<john.doe.1@example.com>
 Dec 1 04:02:57 internetsrv postfix/qmgr[20164]: 693A3578E:
  from=<john.doe.2@example.com>, size=617 (queue active)
 Dec 1 04:02:58 internetsrv postfix/local[20924]: 693A3578E:
  to=<john.doe.2@example.com>, relay=local, delay=3, status=sent
  (forwarded as E325C578D)

will get converted to

 dlfid internetsrv693A3578E from_relay_host localhost from_relay_ip
  127.0.0.1 logrelay internetsrv queueid 693A3578E time 1007175776
  type pickup
 dlfid internetsrv693A3578E logrelay internetsrv msgid
  <john.doe.1@example.com> queueid 693A3578E time 1007175776 type cleanup
 dlfid internetsrv693A3578E from_domain example.com from_relay_host
  localhost from_relay_ip 127.0.0.1 from_user john.doe.2 logrelay
  internetsrv queueid 693A3578E size 617 time 1007175777 type qmgr
 delay 3 dlfid internetsrv693A3578E logrelay internetsrv queueid
  693A3578E stat sent time 1007175778 to_domain example.com
  to_relay_host localhost to_relay_ip 127.0.0.1 to_user john.doe.2
  type local xstat (forwarded_as_e325c578d)

=head1 EXAMPLES

postfix2dlf_pre will be rarely used on its own: it is called by postfix2dlf(1).
Refer to the postfix2dlf manpage for examples and usage information.

=head1 SEE ALSO

postfix2dlf(1), postfix2dlf_main(1), sendmail2dlf(1), qmail2dlf(1), exim2dlf(1)

=head1 VERSION

$Id: postfix2dlf_pre.in,v 1.13 2006/07/23 13:16:34 vanbaal Exp $

=head1 COPYRIGHT

Copyright (C) 2002 Stichting LogReport Foundation LogReport@LogReport.org

This program is free software; you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by
the Free Software Foundation; either version 2 of the License, or
(at your option) any later version.

This program is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
GNU General Public License for more details.

You should have received a copy of the GNU General Public License
along with this program (see COPYING); if not, check with
http://www.gnu.org/copyleft/gpl.html.

=head1 AUTHOR

Joost van Baal

=cut

# Local Variables:
# mode: cperl
# End:

