WEB LOG TRENDS ANALYZER
Project Summary Page
Download wlta.tar.gz
#!/usr/bin/perl -w
#########################################################################
## WEB LOG TRENDS ANALYZER ##
#########################################################################
#
# The purpose of the "Web Log Trends Analyzer" is to provide a
# work-around for a "bug" in some popular commercial Log Analyzer
# products which make them unable to process multiple log files for
# overlapping periods of time. This situation is usually the result of
# using a cluster of web servers for a single web site for which all
# answer requests, yet each maintain separate log files. This situation
# is also known to occur in certain non-clustered environments, such as
# if you have one web server acting as a front-end for another via
# ProxyPass. (e.g., SSL front-ending non-SSL, light-weight front-ending
# mod_perl, etc.).
#
# If you like your commercial Log Analyzer (as I do!), and don't feel
# like upgrading just yet (or can't afford to spend 10 times the money),
# this little Perl script will do the trick quite nicely. Simply
# specify a list of all your log files and the Web Log Trends Analyzer
# will consolidate them into a single log file, sorted by time. The
# output log will work with your commercial Log Analyzer product without
# a hitch, allowing you to continue using it to generate reports. This
# script is well-suited to run via cron.
#
# While I am certain that commercial "Enterprise" log analysis products
# are excellent systems worthy of their six-figure price tags, the Web
# Log Trends Analyzer is intended to "fill the gap" for the simple
# sys-admin who already own a commercial Log Analyzer and just want to
# find out how their web site is being used.
#
#
# WLTA expects a list of log files. These files are consolidated
# into a single logfile and output to STDOUT.
#
# Usage: wlta.pl [list of log files] > consolidated.log
use Data::Dumper;
use constant DEBUG => $ENV{DEBUG} || 0;
use constant MONTHS => {
'Jan' => 1,
'Feb' => 2,
'Mar' => 3,
'Apr' => 4,
'May' => 5,
'Jun' => 6,
'Jul' => 7,
'Aug' => 8,
'Sep' => 9,
'Oct' => 10,
'Nov' => 11,
'Dec' => 12,
};
main();
exit(0);
sub main {
my @filelist = @ARGV;
# Hash to track time of last request for each file
my $lastreqtime = {};
# Hash to track log of last request for each file
my $lastreqlog = {};
# Hash to track stats for each file
my $fhstats = {};
# Open all the files
my $fhcount = 0;
my @filehandles = ();
foreach my $file (@filelist) {
my $fh = sprintf("fh%02d", $fhcount);
open ($fh, "<$file") || die ("Can't open: $!");
push(@filehandles, $fh);
print STDERR "$fh => '$file'\n" if (DEBUG>0);
update_fh($fh, $lastreqtime, $lastreqlog, $fhstats);
$fhcount++;
}
# Now iterate through each filehandle
while (my $output = get_next_line($lastreqtime, $lastreqlog, $fhstats)) {
print "$output\n";
}
print STDERR "DONE!\n" if (DEBUG>0);
# Close filehandles
foreach my $fh (@filehandles) {
close($fh);
# Print Stats
dump_stats($fh, $fhstats) if (DEBUG>0);
}
}
# Return the next line to output, or null if done.
sub get_next_line {
my ($lastreqtime, $lastreqlog, $fhstats) = @_;
print STDERR "FH Lookup: " . Dumper($lastreqtime) if (DEBUG>4);
# Build reverse lookup hash: lastreqtime => ( filehandles )
my %timeindex = ();
while (my ($fh, $time) = each(%$lastreqtime)) {
$timeindex{$time} = [] unless (exists($timeindex{$time}));
push(@{$timeindex{$time}}, $fh); # Add this $fh to the list of those at this time
}
print STDERR "Reverse Lookup: " . Dumper(\%timeindex) if (DEBUG>4);
# Find the lowest time. Get the first $fh in the list. Guard value: ($early_time<0) == EOF
my @times = sort(keys(%timeindex));
my $early_time = -1;
for (my $eti=0; ($eti{$fh};
# read another line from this $fh
update_fh($fh, $lastreqtime, $lastreqlog, $fhstats);
return $output;
}
# Read another line from this $fh. Update $lastreqtime and $lastreqlog.
sub update_fh {
my ($fh, $lastreqtime, $lastreqlog, $fhstats) = @_;
print STDERR "Reading $fh..." if (DEBUG>2);
my $rawline = <$fh>;
# Check for EOF -- get out early if at EOF.
unless (defined($rawline)) {
$lastreqtime->{$fh} = -1;
$lastreqlog->{$fh} = undef;
return;
}
chomp($rawline);
# Get date string from log line e.g.: "[27/Aug/2001:05:17:30 -0400]"
$rawline =~ m/\[(\d\d)\/(\w\w\w)\/(\d\d\d\d)\:(\d\d)\:(\d\d)\:(\d\d).*\]/;
my ($mday, $mon, $year, $hour, $min, $sec) = ($1, $2, $3, $4, $5, $6);
unless (defined($sec)) {
die ("[$fh] Problem parsing log line:\n\n$rawline\n\n");
}
my $mon_num = ${&MONTHS}{$mon};
die ("No such month '$mon'") unless (defined($mon_num));
my $datekey = sprintf("%4d%02d%02d%02d%02d%02d", $year, $mon_num, $mday, $hour, $min, $sec);
print STDERR " [$datekey]\n" if (DEBUG>2);
# Update $lastreqtime
$lastreqtime->{$fh} = $datekey;
# Update $lastreqlog
$lastreqlog->{$fh} = $rawline;
# Update Stats
$fhstats->{$fh} = {FIRST_LOG=>$datekey, COUNT=>0} unless (exists($fhstats->{$fh}));
$fhstats->{$fh}->{LAST_LOG} = $datekey;
$fhstats->{$fh}->{COUNT}++;
}
sub dump_stats {
my ($fh, $fhstats) = @_;
print STDERR "[$fh] COUNT: ".$fhstats->{$fh}->{COUNT}
." FIRST_LOG: ".$fhstats->{$fh}->{FIRST_LOG}
." LAST_LOG: ".$fhstats->{$fh}->{LAST_LOG}."\n";
}