November 26, 2024

Robots.txt for cookpad.com

        # See http://www.robotstxt.org/wc/norobots.html for documentation on how to use the robots.txt file
User-agent: *
Disallow: /user/confirm_premium_navi
Allow: /

User-agent: Baiduspider
Allow: /cn
Disallow: /*?_pxhc=*
Disallow: /cn/users
Disallow: /

User-agent: Yandex
Allow: /
Disallow: /*/accounts/new
Clean-param: epik&hl&noredir&find_method&hcb /ru/*
# See below for how Clean-param works for Yandex crawler
# https://yandex.ru/support/webmaster/robot-workings/clean-param.html?lang=en

# OpenAI Crawler
User-agent: GPTBot
Disallow: /

# OpenAI Plugin Bot
User-agent: ChatGPT-User
Disallow: /

# Block CCBot (used to create training datasets)
User-agent: CCBot
Disallow: /

# Anthropic AI bots
User-agent: anthopic-ai
Disallow: /

User-agent: Claude-Web
Disallow: /

User-agent: ClaudeBot
Disallow: /

# Enterprise LLM
User-agent: cohere-ai
Disallow: /

# Generates LLM datasets 
User-agent: Omgilibot
Disallow: /
User-agent: Omgili
Disallow: /

# Default UA for a data scraping tool
User-agent: Diffbot
Disallow: /

# https://developers.facebook.com/docs/sharing/bot/
User-agent: FacebookBot
Disallow: /

# https://developers.facebook.com/docs/sharing/webmasters/web-crawlers
User-agent: Meta-ExternalAgent
Disallow: /

# Claims to be reverse image search, but is part of
# training dataset generator for https://hivemoderation.com
User-agent: ImagesiftBot
Disallow: /

# LLM Search
User-Agent: PerplexityBot
Disallow: /

# TikTok generative LLM scraper
User-agent: Bytespider
Disallow: /

# Advertising tool / LLM
User-agent: Peer39_crawler
Disallow: /
User-agent: Peer39_crawler/1.0
Disallow: /

# AI Data Scraper
# https://darkvisitors.com/agents/timpibot

User-agent: Timpibot
Disallow: /

### 
# The following do not impact search results or functionality, 
# but do tell the companies and bots in question
# not to add crawled content to LLM datasets.
######

# https://developers.google.com/search/docs/crawling-indexing/overview-google-crawlers#google-extended
User-agent: Google-Extended
Disallow: /

# https://support.apple.com/en-us/119829
User-agent: Applebot-Extended
Disallow: /

# https://developers.facebook.com/docs/sharing/bot
User-agent: FacebookBot
Disallow: /