#!/usr/bin/perl -w # print out the number of articles per publication year # Example: # 1995 23 # 1996 34 # etc. use strict; my $fn = "urls_by_article.txt"; # articles are not indented. All urls indented under article are the article's urls my %years; open(URLS, $fn) || die("Can't open $fn: $!\n"); my $line = ; while ($line) { chomp($line); #print "line=$line\n"; if ($line !~ /^\t/) { my $article = $line; #print "$article\n"; my ($year) = $article =~ /dlib\/\w+(\d\d)\//; if ($year > 4) { $year = "19$year"; } else { $year = "20$year"; } $years{$year}++; $line = ; while ($line && $line =~ /^\t(.+)$/) { $line = ; } } } close URLS; # Print out number of urls per year print "year\tnum of articles\n"; foreach my $year (sort keys %years) { print "$year\t" . $years{$year} . "\n"; }