#!/usr/bin/perl -w # Print http results for all dlib urls on the first trial use strict; my $fn = "dlib_and_doi_urls_uniq.txt"; my $first_result = "http_responses/2004-09-09result.txt"; my $total_urls = 0; my $total_avail = 0; my $total_unavail = 0; open(F, $fn) || die("Can't open $fn: $!"); while (my $url = ) { chomp($url); # See if url was found last time my $result = found_url($url); if ($result == -1) { #print "Problem: can't find [$url]\n"; # urls we couldn't find are just the article urls # that we can say were definately available $total_avail++; print "$url\t1\n"; } elsif ($result == 1) { $total_avail++; print "$url\t1\n"; } else { $total_unavail++; print "$url\t0\n"; } $total_urls++; } close F; print "\nTotal urls: $total_urls\n"; print "Total available: $total_avail (" . ($total_avail/$total_urls) . " %)\n"; print "Total not available: $total_unavail (". ($total_unavail/$total_urls) . " %)\n"; # returns 1 if url was available, 0 if not, -1 # if not found sub found_url { my $url_find = shift; my $file; open(FILE, $first_result) || die("Can't open $first_result: $!\n"); while (my $line = ) { chomp($line); my ($url, $len, $code) = split(/ /, $line); if ($url eq $url_find) { $code = 200 if ($code == 304 || $code == 301 || $code == 302); $code = "200-0" if ($code == 200 && $len == 0); if ($code eq '200') { close FILE; return 1; } else { close FILE; return 0; } } } close FILE; # never found url return -1; }