# This is a file retrieved by webwalkers a.k.a. spiders that # conform to a defacto standard. # See # # The webmaster for this site is # # Format is: # User-agent: # Disallow: | # ----------------------------------------------------------------------------- # # Sites listing Bots by User-agent: # - http://net-promoter.com/robots-txt/spider_list # - http://www.pgts.com.au/pgtsj/pgtsj0208d.html # # ----------------------------------------------------------------------------- User-agent: * Disallow: /counter Disallow: /download Disallow: /log Disallow: /ST Disallow: /QuickPlace Disallow: /mail Disallow: /help Disallow: /admin Disallow: /iNotes Disallow: /webapp # Disallow: /formulare.nsf Disallow: /icons Disallow: /stats Disallow: /w/ Disallow: images/*.gif Disallow: images/*.png # some robots ignore the base href so there are some problems with # URLs beeing retrieved from the wrong directory. To fix this I use # the following line Disallow: /0/ User-agent: psbot Disallow: /* User-agent: msnbot Disallow: /*.gif User-agent: baiduspider Disallow: / User-agent: thesubot Disallow: / User-agent: http://www.almaden.ibm.com/cs/crawler disallow: / User-agent: NPBot Disallow: / User-agent: Fasterfox Disallow: /