%Original file available from http://www.cs.odu.edu/~jbollen/bibliographies/bibtex/infsci_WWW.bib %Last update: Tuesday 13 September 2005 %Current number of entries: 16 @inproceedings{usings:wu2004, title = {Using SiteRank for decentralized computation of Web document ranking}, author = {J. Wu and K. Aberer}, year = 2004, booktitle = {Third {I}nternational {C}onference on {A}daptive {H}ypermedia and {A}daptive {W}eb-{B}ased {S}ystems ({LNCS 3137})}, address = {Eindhoven, Netherlands}, pages = {265--274}, month = {August}, } @inproceedings{webcac:breslau1999, title = {Web caching and Zipf-like distributions: evidence and implications.}, author = {L. Breslau and Pei Cao and Li Fan and G. Phillips and S. Shenker}, pages = {126 -- 34 vol.1}, year = {1999}, booktitle = {INFOCOM'99: Conference on Computer Communications, 21-25 March 1999, New York, NY, USA}, publisher = {Piscataway, NJ, USA : IEEE, 1999}, } @inproceedings{effect:serpanos2000, title = {Effective caching of Web objects using Zipf's law.}, author = {D. N. Serpanos and G. Karakostas and W. H. Wolf}, pages = {727 -- 30 vol.2}, year = {2000}, booktitle = {International Conference on Multimedia and Expo, 30 July-2 Aug. 2000, New York, NY, USA}, publisher = {Piscataway, NJ, USA : IEEE, 2000}, } @MISC{awstats, title = {AWStats - Free log file analyzer for advanced statistics: http://awstats.sourceforge.net}, date = {2004}, URL = {http://awstats.sourceforge.net} } @article{webpag:koehler2002, title = {Web page change and persistence--A four-year longitudinal study}, author = {Wallace Koehler}, journal = {Journal of the American Society for Information Science and Technology}, year = 2002, volume = 53, number = 2, pages = {162--171}, } @article{inform:kobayashi2000, author = {Mei Kobayashi and Koichi Takeda}, title = {Information retrieval on the web}, journal = {ACM Computing Surveys (CSUR)}, volume = {32}, number = {2}, year = {2000}, issn = {0360-0300}, pages = {144--173}, doi = {http://doi.acm.org/10.1145/358923.358934}, publisher = {ACM Press}, } @inproceedings{inferr:gibson1998, author = {David Gibson and Jon Kleinberg and Prabhakar Raghavan}, title = {Inferring Web communities from link topology}, booktitle = {Proceedings of the ninth ACM conference on Hypertext and hypermedia : links, objects, time and space---structure in hypermedia systems}, year = {1998}, isbn = {0-89791-972-6}, pages = {225--234}, location = {Pittsburgh, Pennsylvania, United States}, doi = {http://doi.acm.org/10.1145/276627.276652}, publisher = {ACM Press}, } @article{zipfs:levene2001, author = {Mark Levene and Jose Borges and George Loizou}, title = {Zipf\'s law for web surfers}, year = 2001, journal = {Knowledge and Information Systems}, volume = 3, issue = 1, pages = {120 -- 129}, } @article{hubs:kleinberg1999, author = {Jon M. Kleinberg}, title = {Hubs, authorities, and communities}, journal = {ACM Computing Surveys (CSUR)}, volume = {31}, number = {4es}, year = {1999}, issn = {0360-0300}, pages = {5}, doi = {http://netserv.lib.odu.edu:2383/10.1145/345966.345982}, publisher = {ACM Press}, } @INPROCEEDINGS{webgra:kleinberg1999, author = {J. Kleinberg and S.R. Kumar and P. Raghavan and S. Rajagopalan and A. Tomkins}, title = {The web as a graph: Measurements, models and methods}, year = 1999, pages = {1--17}, editor = {Takao Asano and Hiroshi Imai and D. T. Lee and Shin-Ichi Nakano and Takeshi Tokuyama}, booktitle = {Computing and {C}ombinatorics, 5th {A}nnual {I}nternational {C}onference, {COCOON}'99}, publisher = {Springer}, series = {Lecture Notes in Computer Science}, volume = {1627}, month = {July}, isbn = {3-540-66200-6}, address = {Tokyo, Japan} } @ARTICLE{distri:pirolli1999, author = {P. Pirolli and J. E. Pitkow}, title = {Distributions of Surfers' Paths Through the World Wide Web:Empirical Characterization}, journal = {World Wide Web}, volume = {2}, number = {1,2}, pages = {29--45}, year = 1999, abstract = { Surfing the World Wide Web (WWW) involves traversing hyperlink connections among documents. The ability to predict surfing patterns could solve many problems facing producers and consumers of WWW content. We analyzed WWW server logs for a WWW site, collected over ten days, to compare different path reconstruction methods and to investigate how past surfing behavior predicts future surfing choices. Since log files do not explicitly contain user paths, various methods have evolved to reconstruct user paths. Session times, number of clicks per visit, and Levenshtein Distance analyses were performed to show the impact of various reconstruction methods. Different methods for measuring surfing patterns were also compared. Markov model approximations were used to model the probability of users choosing links conditional on past surfing paths. Information-theoretic (entropy) measurements suggest that information is gained by using longer paths to estimate the conditional probability of link choice given surf path. The improvements diminish, however, as one increases the length of path beyond one. Information-theoretic (total divergence to the average entropy) measurements suggest that the conditional probabilities of link choice given surf path are more stable over time for shorter paths than longer paths. Direct examination of the accuracy of the conditional probability models in predicting test data also suggests that shorter paths yield more stable models and can be estimated reliably with less data than longer paths.} } @ARTICLE{pathpr:schechter1998, author = {Stuart Schechter and Murali Krishnan and Michael D. Smith}, year = 1998, title = {Using Path Profiles to Predict {HTTP} Requests}, journal = {Computer Networks and ISDN Systems}, volume = 30, pages = {457-467}, abstract = {Webmasters often use the following rule of thumb to ensure that HTTP server performance does not degrade when traffic is its heavies provide twice the server capacity required to handle your site's average load. As a result the server will spend half of its CPU cycles idle during normal operation. These cycles could be used to reduce the latency of a significant subset of HTTP transactions handled by the server. In this paper we introduce the use of path profiles for describing HTTP request behavior and describe an algorithm for efficiently creating these profiles. We then show that we can predict request behavior using path profiles with high enough probability to justify generating dynamic content before the client requests it. If requests are correctly predicted and pre-generated by the server, the end user will witness significantly lower latencies for these requests.} } @ARTICLE{mining:chakrabarti1999, title = {Mining the Web's link structure}, journal = {Computer}, volume = 32, number = 8, year = 1999, pages = {60--67}, author = {Soumen Chakrabarti and Byron E. Dom and S. Ravi Kumar and Prabhakar Raghavan and Sridhar Rajagopalan and Andrew Tomkins and David Gibson and Jon Kleinberg}, } @INPROCEEDINGS{author:kleinberg1998, author = {Jon M. Kleinberg}, year = 1998, title = {Authoritative sources in a hyperlinked environment}, booktitle = {Proceedings of the 9th {ACM}-{SIAM} {S}ymposium on {D}iscrete {A}lgorithms}, address = {Baltimore, {MD}}, pages = {668--677} } @INPROCEEDINGS{mining:heylighen2001, author = {Francis Heylighen}, title = {Mining Associative Meanings from the Web: from word disambiguation to the global brain}, year = 2001, booktitle = {Proceedings of the International Colloqium: Trends in Special Language and Language Technology}, pages = {15 -- 44}, editor = {R. Timmerman and M. Lutjeharms}, publisher = {Standaard Editions}, address = {Antwerpen, Belgium}, URL = {http://pespmc1.vub.ac.be/Papers/MiningMeaning.pdf}, abstract = {A general problem in all systems to process language (parsing, translating, etc.) is ambiguity: words have many, fuzzily defined meanings, and meanings shift with the context. This may be tackled by quantifying the connotative or associative meaning, which can be represented as a matrix of mutual association strengths. With many thousands of words, there are billions of possible associations, though, and there is no obvious method to measure all of them. This "knowledge acquisition bottleneck" can be tackled by mining implicit associations from the billions of documents and millions of users on the World-Wide Web. The present paper discusses two methods to achieve this: lexical co-occurrence, a measurement of the frequency with which words appear in each other's neighborhood, and web learning algorithms, an application of the Hebbian rule to create associations between subsequently "activated" words or pages. The mechanism of spreading activation can be applied to the resulting associative networks for clustering, context-driven disambiguation, and personalized recommendation. A generalization of such methods could transform the web into a "global brain", that is, an intelligent, learning network that assimilates the implicit knowledge and preferences of its users.}, } @ARTICLE{collec:heylighen1999, author = {Francis Heylighen}, year = 1999, title = {Collective Intelligence and its Implementation on the Web: algorithms to develop a collective mental map}, journal = {Computational and Mathematical Theory of Organizations}, volume = 5, number = 3, pages = {253--280}, } @BOOK{minin:chakrabarti2003, title = {Mining the Web}, author = {Soumen Chakrabarti}, year = 2003, publisher = {Morgan Kaufmann Publishers}, address = {San Francico}, }