Free tools

Robots.txt Examples

Robots.txt file content for arxiv.org.

Robot.txt file for: arxiv.org

      # robots.txt for http://arxiv.org/ and mirror sites http://*.arxiv.org/
# Indiscriminate automated downloads from this site are not permitted
# See also: http://arxiv.org/help/robots

User-agent: *
Crawl-delay: 15
Allow: /archive
Allow: /year
Allow: /list
Allow: /abs
Allow: /pdf
Allow: /html
Allow: /catchup
Disallow: /user
Disallow: /e-print
Disallow: /src
Disallow: /ps
Disallow: /dvi
Disallow: /cookies
Disallow: /form
Disallow: /find
Disallow: /view
Disallow: /ftp
Disallow: /refs
Disallow: /cits
Disallow: /format
Disallow: /PS_cache
Disallow: /Stats
Disallow: /seek-and-destroy
Disallow: /IgnoreMe
Disallow: /oai2
Disallow: /auth
Disallow: /tb
Disallow: /tb-recent
Disallow: /trackback
Disallow: /prevnext
Disallow: /ct
Disallow: /api
Disallow: /search
Disallow: /set_author_id
Disallow: /show-email

User-agent: Googlebot
Allow: /archive
Allow: /year
Allow: /list
Allow: /abs
Allow: /pdf
Allow: /html
Allow: /catchup
Disallow: /user
Disallow: /e-print
Disallow: /src
Disallow: /ps
Disallow: /dvi
Disallow: /cookies
Disallow: /form
Disallow: /find
Disallow: /view
Disallow: /ftp
Disallow: /refs
Disallow: /cits
Disallow: /format
Disallow: /PS_cache
Disallow: /Stats
Disallow: /seek-and-destroy
Disallow: /IgnoreMe
Disallow: /oai2
Disallow: /auth
Disallow: /tb
Disallow: /tb-recent
Disallow: /trackback
Disallow: /prevnext
Disallow: /ct
Disallow: /api
Disallow: /search

User-agent: Yahoo! Slurp
Crawl-delay: 1
Allow: /archive
Allow: /year
Allow: /list
Allow: /abs
Allow: /pdf
Allow: /html
Allow: /catchup
Disallow: /user
Disallow: /e-print
Disallow: /src
Disallow: /ps
Disallow: /dvi
Disallow: /cookies
Disallow: /form
Disallow: /find
Disallow: /view
Disallow: /ftp
Disallow: /refs
Disallow: /cits
Disallow: /format
Disallow: /PS_cache
Disallow: /Stats
Disallow: /seek-and-destroy
Disallow: /IgnoreMe
Disallow: /oai2
Disallow: /auth
Disallow: /tb
Disallow: /tb-recent
Disallow: /trackback
Disallow: /prevnext
Disallow: /ct
Disallow: /api
Disallow: /search

User-agent: bingbot
# 2021-10-14 - removed crawl-delay for Bingbot. Needs to be re-added if there are any problems.
# 2021-10-26 - added back
Crawl-delay: 1
Allow: /archive
Allow: /year
Allow: /list
Allow: /abs
Allow: /pdf
Allow: /html
Allow: /catchup
Disallow: /user
Disallow: /e-print
Disallow: /src
Disallow: /ps
Disallow: /dvi
Disallow: /cookies
Disallow: /form
Disallow: /find
Disallow: /view
Disallow: /ftp
Disallow: /refs
Disallow: /cits
Disallow: /format
Disallow: /PS_cache
Disallow: /Stats
Disallow: /seek-and-destroy
Disallow: /IgnoreMe
Disallow: /oai2
Disallow: /auth
Disallow: /tb
Disallow: /tb-recent
Disallow: /trackback
Disallow: /prevnext
Disallow: /ct
Disallow: /api
Disallow: /search

User-agent: Baiduspider
Crawl-delay: 10
Allow: /archive
Allow: /year
Allow: /list
Allow: /abs
Allow: /pdf
Allow: /html
Allow: /catchup
Disallow: /user
Disallow: /e-print
Disallow: /src
Disallow: /ps
Disallow: /dvi
Disallow: /cookies
Disallow: /form
Disallow: /find
Disallow: /view
Disallow: /ftp
Disallow: /refs
Disallow: /cits
Disallow: /format
Disallow: /PS_cache
Disallow: /Stats
Disallow: /seek-and-destroy
Disallow: /IgnoreMe
Disallow: /oai2
Disallow: /auth
Disallow: /tb
Disallow: /tb-recent
Disallow: /trackback
Disallow: /prevnext
Disallow: /ct
Disallow: /api
Disallow: /search

User-agent: ToutiaoSpider
Crawl-delay: 10
Allow: /archive
Allow: /year
Allow: /list
Allow: /abs
Allow: /pdf
Allow: /html
Allow: /catchup
Disallow: /user
Disallow: /e-print
Disallow: /src
Disallow: /ps
Disallow: /dvi
Disallow: /cookies
Disallow: /form
Disallow: /find
Disallow: /view
Disallow: /ftp
Disallow: /refs
Disallow: /cits
Disallow: /format
Disallow: /PS_cache
Disallow: /Stats
Disallow: /seek-and-destroy
Disallow: /IgnoreMe
Disallow: /oai2
Disallow: /auth
Disallow: /tb
Disallow: /tb-recent
Disallow: /trackback
Disallow: /prevnext
Disallow: /ct
Disallow: /api
Disallow: /search

User-agent: SQUID_configured_as_described_at_/help/faq/cache
Crawl-delay: 10
Allow: /list
Allow: /abs
Allow: /pdf
Disallow: /archive
Disallow: /year
Disallow: /html
Disallow: /catchup
Disallow: /user
Disallow: /e-print
Disallow: /src
Disallow: /ps
Disallow: /dvi
Disallow: /cookies
Disallow: /form
Disallow: /find
Disallow: /view
Disallow: /ftp
Disallow: /refs
Disallow: /cits
Disallow: /format
Disallow: /PS_cache
Disallow: /Stats
Disallow: /seek-and-destroy
Disallow: /IgnoreMe
Disallow: /oai2
Disallow: /auth
Disallow: /tb
Disallow: /tb-recent
Disallow: /trackback
Disallow: /prevnext
Disallow: /ct
Disallow: /api
Disallow: /search

User-agent: YandexBot
Crawl-delay: 1
Allow: /archive
Allow: /year
Allow: /list
Allow: /abs
Allow: /pdf
Allow: /html
Allow: /catchup
Disallow: /e-print/
Disallow: /src/
Disallow: /ps/
Disallow: /psfigs/
Disallow: /dvi/
Disallow: /cookies/
Disallow: /form/
Disallow: /find/
Disallow: /view/
Disallow: /ftp/
Disallow: /refs/
Disallow: /cits/
Disallow: /format/
Disallow: /register
Disallow: /submit
Disallow: /replace
Disallow: /cross
Disallow: /jref
Disallow: /paper_passwd/
Disallow: /PS_cache/
Disallow: /Stats/
Disallow: /seek-and-destroy
Disallow: /IgnoreMe
Disallow: /uploads
Disallow: /oai2
Disallow: /auth
Disallow: /tb
Disallow: /tb-recent
Disallow: /trackback
Disallow: /prevnext
Disallow: /ct
Disallow: /api
Disallow: /search

User-agent: Applebot
Crawl-delay: 1
Allow: /archive
Allow: /year
Allow: /list
Allow: /abs
Allow: /pdf
Allow: /html
Allow: /catchup
Disallow: /user
Disallow: /e-print
Disallow: /src
Disallow: /ps
Disallow: /dvi
Disallow: /cookies
Disallow: /form
Disallow: /find
Disallow: /view
Disallow: /ftp
Disallow: /refs
Disallow: /cits
Disallow: /format
Disallow: /PS_cache
Disallow: /Stats
Disallow: /seek-and-destroy
Disallow: /IgnoreMe
Disallow: /oai2
Disallow: /auth
Disallow: /tb
Disallow: /tb/recent
Disallow: /tb-recent
Disallow: /trackback
Disallow: /prevnext
Disallow: /ct
Disallow: /api
Disallow: /search

User-agent: SemrushBot
Disallow: /