November 26, 2024

Robots.txt for commonsensemedia.org

        #
# robots.txt
#
# This file is to prevent the crawling and indexing of certain parts
# of your site by web crawlers and spiders run by sites like Yahoo!
# and Google. By telling these "robots" where not to go on your site,
# you save bandwidth and server resources.
#
# This file will be ignored unless it is at the root of your host:
# Used:    http://example.com/robots.txt
# Ignored: http://example.com/site/robots.txt
#
# For more information about the robots.txt standard, see:
# http://www.robotstxt.org/robotstxt.html

# AI Bots

# OpenAI
User-agent: OAI-SearchBot
Disallow: /
User-agent: GPTBot
Disallow: /
User-agent: ChatGPT-User
Disallow: /

# Google AI (Bard/Vertex)
User-agent: Google-Extended
Disallow: /

# Common Crawl
User-agent: CCBot
Disallow: /

# Anthropic
User-agent: anthropic-ai
Disallow: /
User-agent: Claude-Web
Disallow: /

# https://webz.io/
User-agent: Omgilibot
Disallow: /
User-agent: omgili
Disallow: /

# https://cohere.com/
User-agent: cohere-ai
Disallow: /

# https://www.perplexity.ai/
User-agent: PerplexityBot
Disallow: /

# https://about.you.com/youbot/
User-agent: YouBot
Disallow: /

# Facebook
User-agent: FacebookBot
Disallow: /

# ADDITIONAL BOTS UNDER REVIEW

# AmazonBot - https://developer.amazon.com/support/amazonbot
# Applebot - https://support.apple.com/en-us/HT204683

# Regular

User-agent: *
# CSS, JS, Images
Allow: /core/*.css$
Allow: /core/*.css?
Allow: /core/*.js$
Allow: /core/*.js?
Allow: /core/*.gif
Allow: /core/*.jpg
Allow: /core/*.jpeg
Allow: /core/*.png
Allow: /core/*.svg
Allow: /profiles/*.css$
Allow: /profiles/*.css?
Allow: /profiles/*.js$
Allow: /profiles/*.js?
Allow: /profiles/*.gif
Allow: /profiles/*.jpg
Allow: /profiles/*.jpeg
Allow: /profiles/*.png
Allow: /profiles/*.svg
# Directories
Disallow: /core/
Disallow: /profiles/
# Files
Disallow: /README.txt
Disallow: /web.config
# Paths (clean URLs)
Disallow: /admin/
Disallow: /comment/reply/
Disallow: /filter/tips
Disallow: /node/add/
Disallow: /search/
Disallow: /user/register/
Disallow: /user/password/
Disallow: /user/login/
Disallow: /user/logout/
Disallow: /users/*
Disallow: /media/oembed
Disallow: /*/media/oembed
# User Review Comments Add Link
Disallow: /app-reviews/*/user-reviews/manage/add
Disallow: /book-reviews/*/user-reviews/manage/add
Disallow: /game-reviews/*/user-reviews/manage/add
Disallow: /movie-reviews/*/user-reviews/manage/add
Disallow: /podcast-reviews/*/user-reviews/manage/add
Disallow: /tv-reviews/*/user-reviews/manage/add
Disallow: /website-reviews/*/user-reviews/manage/add
Disallow: /youtube-reviews/*/user-reviews/manage/add
# Paths (no clean URLs)
Disallow: /index.php/admin/
Disallow: /index.php/comment/reply/
Disallow: /index.php/filter/tips
Disallow: /index.php/node/add/
Disallow: /index.php/search/
Disallow: /index.php/user/password/
Disallow: /index.php/user/register/
Disallow: /index.php/user/login/
Disallow: /index.php/user/logout/
Disallow: /index.php/users/*
Disallow: /index.php/media/oembed
Disallow: /index.php/*/media/oembed
# User Review Comments Add Link
Disallow: /index.php/app-reviews/*/user-reviews/manage/add
Disallow: /index.php/book-reviews/*/user-reviews/manage/add
Disallow: /index.php/game-reviews/*/user-reviews/manage/add
Disallow: /index.php/movie-reviews/*/user-reviews/manage/add
Disallow: /index.php/podcast-reviews/*/user-reviews/manage/add
Disallow: /index.php/tv-reviews/*/user-reviews/manage/add
Disallow: /index.php/website-reviews/*/user-reviews/manage/add
Disallow: /index.php/youtube-reviews/*/user-reviews/manage/add
Sitemap: https://www.commonsensemedia.org/sitemap.xml