November 26, 2024

Robots.txt for bmj.com

        #
# robots.txt
#
# This file is to prevent the crawling and indexing of certain parts
# of your site by web crawlers and spiders run by sites like Yahoo!
# and Google. By telling these "robots" where not to go on your site,
# you save bandwidth and server resources.
#
# This file will be ignored unless it is at the root of your host:
# Used:    http://example.com/robots.txt
# Ignored: http://example.com/site/robots.txt
#
# For more information about the robots.txt standard, see:
# http://www.robotstxt.org/wc/robots.html
#
# For syntax checking, see:
# http://www.sxw.org.uk/computing/robots/check.html

Sitemap: https://www.bmj.com/sitemap.xml
Sitemap: https://www.bmj.com/careers/sitemapindex.xml
User-agent: *
Crawl-delay: 7
# Directories
Disallow: /includes/
Disallow: /misc/
Disallow: /modules/
Disallow: /profiles/
Disallow: /scripts/
Disallow: /themes/
# Files
Disallow: /CHANGELOG.txt
Disallow: /cron.php
Disallow: /INSTALL.mysql.txt
Disallow: /INSTALL.pgsql.txt
Disallow: /INSTALL.sqlite.txt
Disallow: /install.php
Disallow: /INSTALL.txt
Disallow: /LICENSE.txt
Disallow: /MAINTAINERS.txt
Disallow: /update.php
Disallow: /UPGRADE.txt
Disallow: /xmlrpc.php
# Paths (clean URLs)
Disallow: /admin/
Disallow: /comment/reply/
Disallow: /filter/tips/
Disallow: /node/add/
Disallow: /search/
Disallow: /user/register/
Disallow: /user/password/
Disallow: /user/login/
Disallow: /user/logout/
# Paths (no clean URLs)
Disallow: /?q=admin/
Disallow: /?q=comment/reply/
Disallow: /?q=filter/tips/
Disallow: /?q=node/add/
Disallow: /?q=search/
Disallow: /?q=user/password/
Disallow: /?q=user/register/
Disallow: /?q=user/login/
Disallow: /?q=user/logout/

#Distilled crawling changes
Disallow: /user/login
Disallow: /content/*%7Bopenurl%7D
Disallow: /highwire
Disallow: /specialities/*/all-bmj/feed
Disallow: /bmj_countries/
Disallow: /research/feed/
# Adding this per SF-01095909, https://jira.highwire.org/browse/SUPPALLPLA-192
Disallow: /*.full-text.print$
Allow: /research/feed
Allow: /research/highlight/feed
# Adding per SF01158079
Disallow: /*.full.print$

# Page has performance issues, temp fix
Disallow: /rapid-responses

# Added per BESBJS request
Disallow: /node/
Disallow: /boacongress2011/
Disallow: /boacongress2012/

# 01178335
Disallow: /company/?s=
Disallow: /company/search/

User-agent: 008
Disallow: /

User-agent: *