#!/usr/bin/perl -w # Print availability on single line per each url # Put pub date at beginning of line. # If url is already shown, do not show again on subsequent lines. # Example: # 1995 http://abc.com 1 1 0 1 ... # 1995 http://sss.edu 0 1 1 0 ... use strict; # contains date, url, http response my $fn = "http_responses_per_url.txt"; # contains pub years of urls sorted by years my $urls_years = "urls_unique_with_pub_year.txt"; # Build list of dates my %dates; open(FILE, $fn) || die("Can't open $fn: $!\n"); my $line = ; # ignore first line (header) while ($line = ) { chomp($line); my ($date, $url, $code) = split(/\t/, $line); $dates{$date} = 1; } close FILE; # Build hash or hashes my %seen; my %urls; # to make sure url is seen only once open(FILE, $fn) || die("Can't open $fn: $!\n"); $line = ; # ignore first line while ($line = ) { chomp($line); my ($date, $url, $code) = split(/\t/, $line); $seen{$url}{$date} = $code; } close FILE; # Print list print "\t"; # for url foreach my $date (sort keys (%dates)) { print "$date\t"; } print "# times available\n"; open(FILE, $urls_years) || die("Unable to open $urls_years: $!"); while (my $line = ) { chomp($line); my ($year, $url) = split(/\t/, $line); print "$year\t$url\t"; my $avail = 0; foreach my $date (sort keys (%dates)) { my $code = $seen{$url}{$date}; if (defined $code) { if ($code eq '200' || $code eq '301' || $code eq '302' || $code eq '304') { print "1\t"; $avail++; } else { print "0\t"; } } else { print "N/A\t"; } } print "$avail\n"; }