#!/usr/bin/perl -w use strict; my $fn = "urls_unique.txt"; my @files = glob("http_responses/*.txt"); my %urls; open(FILE, $fn) or die("Unable to open $fn: $!"); while (my $url = ) { chomp($url); $urls{$url} = 1; } close FILE; my @dates; my %seen; foreach my $file (@files) { open(FILE, $file) || die("Can't open $file: $!\n"); my ($date) = $file =~ /(\d\d\d\d-\d\d-\d\d)/; push(@dates, $date); while (my $line = ) { chomp($line); my ($url, $size, $code) = split(/\s/, $line); # Make sure this is an url we are interested in if (!defined $urls{$url}) { #print "$url NOT DEFINED\n"; next; } # There is an error in the data beginning in 12/21/2004 # where size is 304 but that is actually the code. # We must keep the size from the previous time. if ($size == 304) { my ($year, $mon, $day) = split(/-/, $date); if (($mon == 12 && $day >= 21) || $year == 2005) { my $prev_date = $dates[-2]; $size = $seen{$url}{$prev_date}; } } $seen{$url}{$date} = $size; } } # print dates at top print "\t"; foreach (sort @dates) { print "$_\t"; } print "\n"; foreach my $url (keys %seen) { print "$url\t"; foreach my $date (@dates) { my $size = $seen{$url}{$date}; if (!defined $size) { print "N/A\t"; } else { print "$size\t"; } } print "\n"; }