#!/usr/bin/perl -w # Create a list of unique urls from the set of all urls # Use this list to do http tests. use strict; open(URLS, "url_id_list.txt") || die($!); my @lines = ; close URLS; # Grab all article headings. # 1.1 article url # 1.2 article url my @articles; foreach (@lines) { chomp($_); if (/^\d+\.\d+ (.+)$/) { push(@articles, $_); } } my $article_count = @articles; my %seen; foreach my $article (@articles) { #print "$article\n"; if ($article =~ /^(\d+)\.(\d+) (.+)$/) { my $sec1 = $1; my $sec2 = $2; # Now find all urls for this article foreach (@lines) { if (/^$sec1\.$sec2\.\d+ (.+)$/) { my $url = $1; if (!defined $seen{$url}) { $seen{$url} = 1; } } } my ($year) = $article =~ /dlib\/\w+(\d\d)\//; if ($year > 4) { $year = "19$year"; } else { $year = "20$year"; } } } foreach my $url (sort keys %seen) { print "$url\n"; }