#!/usr/bin/perl -w # Generate all data files use strict; # Print the unique set of urls per article url print "print_urls_per_article.pl > urls_by_article.txt ... \n"; my $r = `print_urls_per_article.pl > urls_by_article.txt`; # Print the unique set of urls per article url that contains # all dlib urls but does not contain urls from the footer print "print_urls_per_article_no_tail_urls.pl > urls_by_article_complete.txt ...\n"; $r = `print_urls_per_article_no_tail_urls.pl > urls_by_article_complete.txt`; # Print the number of url refs per article url including # all the dlib refs print "count_urls_per_article.pl | sort > url_count_per_article_complete.txt ...\n"; $r = `count_urls_per_article.pl | sort > url_count_per_article_complete.txt`; # Print the single list of unique urls print "create_unique_url_list.pl > urls_unique.txt ...\n"; $r = `create_unique_url_list.pl > urls_unique.txt`; print "create_unique_url_list_with_pub_year.pl | sort > urls_unique_with_pub_year.txt ...\n"; $r = `create_unique_url_list_with_pub_year.pl | sort > urls_unique_with_pub_year.txt`; print "print_urls_per_year.pl > urls_by_year.txt ...\n"; $r = `print_urls_per_year.pl > urls_by_year.txt`; print "print_http_results_per_date.pl > http_codes_per_date.txt ...\n"; $r = `print_http_results_per_date.pl > http_codes_per_date.txt`; # Print http codes for each url at each date print "print_http_url_codes_per_date.pl > http_codes_urls_per_date.txt ...\n"; $r = `print_http_url_codes_per_date.pl > http_codes_urls_per_date.txt`; # Print the number of articles there are per year # This must come before print_http_results_per_pub_year.pl print "print_articles_per_year.pl > num_of_articles_per_year.txt ...\n"; $r = `print_articles_per_year.pl > num_of_articles_per_year.txt`; print "print_http_results_per_pub_year.pl > http_availability_by_pub_year.txt ...\n"; $r = `print_http_results_per_pub_year.pl > http_availability_by_pub_year.txt`; # Print table sumarizing num of urls available and unavailable at each test date print "print_availability_per_test_date.pl > http_availability_by_test_date.txt ...\n"; $r = `print_availability_per_test_date.pl > http_availability_by_test_date.txt`; print "print_http_per_url_pub_date_availability.pl > http_responses_url_year_availability.txt ...\n"; $r = `print_http_per_url_pub_date_availability.pl > http_responses_url_year_availability.txt`; print "print_http_results_per_url.pl > http_responses_per_url.txt ...\n"; $r = `print_http_results_per_url.pl > http_responses_per_url.txt`; print "print_http_url_availability.pl > http_responses_url_availability.txt ...\n"; $r = `print_http_url_availability.pl > http_responses_url_availability.txt`; print "print_http_url_codes.pl > http_responses_url_codes.txt ...\n"; $r = `print_http_url_codes.pl > http_responses_url_codes.txt`; # Print the byte size of each url at each trial print "print_size_for_url_per_trial.pl > url_size_per_trial.txt ...\n"; $r = `print_size_for_url_per_trial.pl > url_size_per_trial.txt`; # See what the byte changes are for each unique url print "print_size_changes.pl > url_size_changes.txt ...\n"; $r = `print_size_changes.pl > url_size_changes.txt`; # Pull out only those urls who registered at least 1 size change print "print_non_zero_size_changes.pl > url_size_changes_non_zero.txt ...\n"; $r = `print_non_zero_size_changes.pl > url_size_changes_non_zero.txt`; # Pull our onlt those urls who registered at least 1 size change # of at least 1 KB print "print_in_flux_size_changes.pl > url_size_changes_in_flux.txt ...\n"; $r = `print_in_flux_size_changes.pl > url_size_changes_in_flux.txt`; # Print which of the in flux urls registers a change at every date print "print_in_flux_size_changes_every_time.pl > url_size_changes_in_flux_change_every_time.txt ...\n"; $r = `print_in_flux_size_changes_every_time.pl > url_size_changes_in_flux_change_every_time.txt`; print "DONE\n";