Free tools

Robots.txt Examples

Robots.txt file content for weather.com.

Robot.txt file for: weather.com

      #
# /robots.txt
#

#
# Last updated by arjun.lather 08/29/2023
#

# Disallowed for PhantomJS


User-agent: *
# Crawl-delay: 10

User-agent: CriteoBot/0.1
Disallow:
Crawl-delay: .02

# Directories
Disallow: /includes/
Disallow: /life/
Disallow: /misc/
Disallow: /modules/
Disallow: /profiles/
Disallow: /scripts/
Disallow: /themes/
# Files
Disallow: /appspromo
Disallow: /CHANGELOG.txt
Disallow: /cron.php
Disallow: /INSTALL.mysql.txt
Disallow: /INSTALL.pgsql.txt
Disallow: /INSTALL.sqlite.txt
Disallow: /install.php
Disallow: /INSTALL.txt
Disallow: /LICENSE.txt
Disallow: /MAINTAINERS.txt
Disallow: /update.php
Disallow: /UPGRADE.txt
Disallow: /xmlrpc.php
# Paths (clean URLs)
Disallow: /migration/
Disallow: /admin/
Disallow: /comment/reply/
Disallow: /filter/tips/
Disallow: /node/add/
Disallow: /search/
Disallow: /user/register/
Disallow: /user/password/
Disallow: /user/login/
Disallow: /user/logout/
Disallow: /g00/
Disallow: /g01/
Disallow: /g02/
Disallow: /g03/
Disallow: /g04/
Disallow: /g05/
Disallow: /g06/
Disallow: /g07/
Disallow: /g08/
Disallow: /g09/
Disallow: /g10/
Disallow: /g11/
Disallow: /g12/
Disallow: /g13/
Disallow: /g14/
Disallow: /g15/
Disallow: /g16/
Disallow: /g17/
Disallow: /g18/
Disallow: /g19/
Disallow: /g20/
Disallow: /g21/
Disallow: /g22/
Disallow: /g23/
Disallow: /g24/
Disallow: /g25/
Disallow: /g26/
Disallow: /g27/
Disallow: /g28/
Disallow: /g29/
Disallow: /g30/
Disallow: /g31/
Disallow: /g32/
Disallow: /g33/
Disallow: /g34/
Disallow: /g35/
Disallow: /g36/
Disallow: /g37/
Disallow: /g38/
Disallow: /g39/
Disallow: /g40/
Disallow: /g41/
Disallow: /g42/
Disallow: /g43/
Disallow: /g44/
Disallow: /g45/
Disallow: /g46/
Disallow: /g47/
Disallow: /g48/
Disallow: /g49/
Disallow: /g50/
Disallow: /g51/
Disallow: /g52/
Disallow: /g53/
Disallow: /g54/
Disallow: /g55/
Disallow: /g56/
Disallow: /g57/
Disallow: /g58/
Disallow: /g59/
Disallow: /g60/
Disallow: /g61/
Disallow: /g62/
Disallow: /g63/
Disallow: /g64/
Disallow: /g65/
Disallow: /g66/
Disallow: /g67/
Disallow: /g68/
Disallow: /g69/
Disallow: /g70/
Disallow: /g71/
Disallow: /g72/
Disallow: /g73/
Disallow: /g74/
Disallow: /g75/
Disallow: /g76/
Disallow: /g77/
Disallow: /g78/
Disallow: /g79/
Disallow: /g80/
Disallow: /g81/
Disallow: /g82/
Disallow: /g83/
Disallow: /g84/
Disallow: /g85/
Disallow: /g86/
Disallow: /g87/
Disallow: /g88/
Disallow: /g89/
Disallow: /g90/
Disallow: /g91/
Disallow: /g92/
Disallow: /g93/
Disallow: /g94/
Disallow: /g95/
Disallow: /g96/
Disallow: /g97/
Disallow: /g98/
Disallow: /g99/
# Paths (no clean URLs)
Disallow: /?q=admin/
Disallow: /?q=comment/reply/
Disallow: /?q=filter/tips/
Disallow: /?q=node/add/
Disallow: /?q=search/
Disallow: /?q=user/password/
Disallow: /?q=user/register/
Disallow: /?q=user/login/
Disallow: /?q=user/logout/

Disallow: /sponsored
Disallow: /ugc
Disallow: /sponsored-content


#
# Block Bots
#

User-agent: Amazonbot
Disallow: /

User-agent: anthropic-ai
Disallow: /

User-agent: Applebot-Extended
Disallow: /

User-agent: AwarioRssBot
User-agent: AwarioSmartBot
Disallow: /

User-agent: Bytespider
Disallow: /

User-agent: CCBot
Disallow: /

User-agent: ChatGPT-User
Disallow: /

User-agent: ClaudeBot
Disallow: /

User-agent: Claude-Web
Disallow: /

User-agent: cohere-ai
Disallow: /

User-agent: DataForSeoBot
Disallow: /

User-agent: Diffbot
Disallow: /

User-agent: FacebookBot
Disallow: /

User-agent: FriendlyCrawler
Disallow: /

User-agent: Google-Extended
Disallow: /

User-agent: GPTBot
Disallow: /

User-agent: ImagesiftBot
Disallow: /

User-agent: magpie-crawler
Disallow: /

User-agent: Meta-ExternalAgent
User-agent: meta-externalagent
Disallow: /

User-agent: NewsNow
Disallow: /

User-agent: news-please
Disallow: /

User-agent: OAI-SearchBot
Disallow: /

User-agent: omgili
Disallow: /

User-agent: omgilibot
Disallow: /

User-agent: peer39_crawler
User-agent: peer39_crawler/1.0
Disallow: /

User-agent: PerplexityBot
Disallow: /

User-agent: Quora-Bot
Disallow: /

User-agent: Scrapy
Disallow: /

User-agent: TurnitinBot
Disallow: /

#
# Sitemaps
#

Sitemap: https://weather.com/en-US/sitemaps/sitemap.xml
Sitemap: https://weather.com/pt-PT/sitemaps/sitemap.xml
Sitemap: https://weather.com/de-DE/sitemaps/sitemap.xml
Sitemap: https://weather.com/fr-FR/sitemaps/sitemap.xml
Sitemap: https://weather.com/es-US/sitemaps/sitemap.xml
Sitemap: https://weather.com/es-ES/sitemaps/sitemap.xml
Sitemap: https://weather.com/en-IN/sitemaps/sitemap.xml
Sitemap: https://weather.com/en-GB/sitemaps/sitemap.xml
Sitemap: https://weather.com/en-CA/sitemaps/sitemap.xml