#!/usr/bin/perl -w # Print http results by testing date # This includes duplicate urls from other articles. # Example: # date available unavailable # 2004-09-09 100 200 # 2004-09-10 95 205 # ... use strict; my $url_list = "urls_unique.txt"; my %urls; open(FILE, $url_list) || die("Can't open $url_list: $!"); while (my $line = ) { chomp($line); $urls{$line} = 1; } close FILE; my @files = glob("http_responses/*.txt"); my %seen; my %http_codes; foreach my $file (@files) { open(FILE, $file) || die("Can't open $file: $!\n"); while (my $line = ) { chomp($line); my ($url, $len, $code) = split(/ /, $line); # Some urls that were tested weren't legal next if (!defined $urls{$url}); my ($date) = ($file =~ /(\d\d\d\d-\d\d-\d\d)/); $code = 200 if ($code == 304 || $code == 301 || $code == 302); $len = 9999 if ($len eq ''); $code = "200-0" if ($code == 200 && $len == 0); if ($code eq '200') { $seen{$date}->{available}++; } else { $seen{$date}->{unavailable}++; } } close FILE; } print "date\tavailable\tunavailable\ttotal urls\n"; foreach my $date (sort keys %seen) { #my %codes = $seen{$date}; print "$date\t"; my $total_avail = $seen{$date}->{available}; my $total_unavail = $seen{$date}->{unavailable}; my $total_urls = $total_avail + $total_unavail; print "$total_avail\t$total_unavail\t$total_urls\n"; }