Free tools

Robots.txt Examples

Robots.txt file content for bu.edu.

Robot.txt file for: bu.edu

      # Directions for robots.  See this URL:
# http://info.webcrawler.com/mak/projects/robots/norobots.html
# for a description of the file format.
# 2008-08-21
#####
# Here is where we override the default action

## Due to a bug in linklint, must first specify a disallow in order for 
## for all other directories to be allowed. Feel free to add other
## disallows below the first disallow line.

User-agent: LinkLint
Disallow: /workaroundForLinkLintRandomDirForConfig/ 

#####
# Allow W3C link Validator for /dev/ and /nisdev/
# skipping other dynamic content or private areas
# 2004-08-27 gaudette
#
User-agent: W3C-checklink
Disallow: /cms/
Disallow: /cgi-bin/
Disallow: /htbin/
Disallow: /htbin.ph/
Disallow: /BUbin/
Disallow: /bubin/
Disallow: /testing/
Disallow: /TESTING/
Disallow: /IT/SoftwareDist/
Disallow: /it/SoftwareDist/
Disallow: /software/
Disallow: /SOFTWARE/
Disallow: /IT/new/
Disallow: /it/new/
Disallow: /nis/
Disallow: /nishd/
Disallow: /library/working/
Disallow: /library/WORKING/
Disallow: /reports/
Disallow: /bulletins/work/
Disallow: /admissions/test/
Disallow: /cas/oldsite/
Disallow: /MPA/
Disallow: /finaid/test/
Disallow: /naitest/
Disallow: /newswire/
Disallow: /practice/
Disallow: /providers/
Disallow: /stats/
Disallow: /usc/test/
Disallow: /webcentral/output/
Disallow: /webmail/
Disallow: /alumni/portfolio/
Disallow: /dev/

#####
# default action - currently it allows access to most of the site
# skipping dynamic content or private areas
#
User-agent: gsa-crawler
Disallow: /cgi-bin/
Disallow: /cms/
Disallow: /htbin/
Disallow: /htbin.ph/
Disallow: /BUbin/
Disallow: /bubin/
Disallow: /testing/
Disallow: /TESTING/
Disallow: /IT/SoftwareDist/
Disallow: /it/SoftwareDist/
Disallow: /software/
Disallow: /SOFTWARE/
Disallow: /IT/new/
Disallow: /it/new/
Disallow: /library/working/
Disallow: /library/WORKING/
Disallow: /reports/
Disallow: /nisdev/
Disallow: /bulletins/work/
Disallow: /admissions/test/
Disallow: /cas/oldsite/
Disallow: /MPA/
Disallow: /finaid/test/
Disallow: /naitest/
Disallow: /newswire/
Disallow: /practice/
Disallow: /providers/
Disallow: /stats/
Disallow: /usc/test/
Disallow: /webcentral/output/
Disallow: /webmail/
Disallow: /alumni/portfolio/
Disallow: /dbin/dos/ocs/
Disallow: /dev/
Disallow: /wbur/arts/
Disallow: /wbur/connection/
Disallow: /wbur/herenow/
Disallow: /wbur/livingonearth/
Disallow: /wbur/miscellaneous/
Disallow: /wbur/onpoint/
Disallow: /wbur/special_projects_unit/
Disallow: /wbur/wburnews/
Disallow: /wbur/woi/
Disallow: /link/
Disallow: /home-media/
# BUniverse exclusions added by kgrin on 2010-04-26
Disallow: /buniverse/add/
Disallow: /buniverse/admin/
Disallow: /buniverse/buniverse1/
Disallow: /buniverse/contact/
Disallow: /buniverse/cron/
Disallow: /buniverse/data/
Disallow: /buniverse/delete/
Disallow: /buniverse/edit/
Disallow: /buniverse/embed/
Disallow: /buniverse/login/
Disallow: /buniverse/logout/
Disallow: /buniverse/messages/
Disallow: /buniverse/my-videos/
Disallow: /buniverse/search/
Disallow: /buniverse/support/
Disallow: /buniverse/util/
Disallow: /buniverse/viewed/
Disallow: /buniverse/vote/
Disallow: /buniverse/youtube/
Disallow: /summer/archive/

### 
# Emergency change 2012-02-14 bfenster, in response to incident
User-agent: 008
Disallow: / 

### 
# Emergency change 2014-11-17 bfenster, in response to incident
User-agent: Netsparker
Disallow: / 


#####
# default action - currently it allows access to most of the site
# skipping dynamic content or private areas
#
User-agent: *
Disallow: /cms/
Disallow: /cgi-bin/
Disallow: /htbin/
Disallow: /htbin.ph/
Disallow: /BUbin/
Disallow: /bubin/
Disallow: /testing/
Disallow: /TESTING/
Disallow: /IT/SoftwareDist/
Disallow: /it/SoftwareDist/
Disallow: /software/
Disallow: /SOFTWARE/
Disallow: /IT/new/
Disallow: /it/new/
Disallow: /nis/
Disallow: /library/working/
Disallow: /library/WORKING/
Disallow: /reports/
Disallow: /nisdev/
Disallow: /bulletins/work/
Disallow: /admissions/test/
Disallow: /cas/oldsite/
Disallow: /MPA/
Disallow: /finaid/test/
Disallow: /naitest/
Disallow: /newswire/
Disallow: /practice/
Disallow: /providers/
Disallow: /stats/
Disallow: /usc/test/
Disallow: /webcentral/output/
Disallow: /webmail/
Disallow: /alumni/portfolio/
Disallow: /dbin/dos/ocs/
Disallow: /dev/
Disallow: /wbur/arts/
Disallow: /wbur/connection/
Disallow: /wbur/herenow/
Disallow: /wbur/livingonearth/
Disallow: /wbur/miscellaneous/
Disallow: /wbur/onpoint/
Disallow: /wbur/special_projects_unit/
Disallow: /wbur/wburnews/
Disallow: /wbur/woi/
Disallow: /link/
Disallow: /home-media/
# BUniverse exclusions added by kgrin on 2010-04-21
Disallow: /buniverse/add/
Disallow: /buniverse/admin/
Disallow: /buniverse/buniverse1/
Disallow: /buniverse/contact/
Disallow: /buniverse/cron/
Disallow: /buniverse/data/
Disallow: /buniverse/delete/
Disallow: /buniverse/edit/
Disallow: /buniverse/embed/
Disallow: /buniverse/login/
Disallow: /buniverse/logout/
Disallow: /buniverse/messages/
Disallow: /buniverse/my-videos/
Disallow: /buniverse/search/
Disallow: /buniverse/support/
Disallow: /buniverse/util/
Disallow: /buniverse/viewed/
Disallow: /buniverse/vote/
Disallow: /buniverse/youtube/
# academics/summer archive exclusions added by kgrin on 2011-07-17
Disallow: /academics/archive/
Disallow: /summer/archive/
Crawl-delay: 15