November 26, 2024

Robots.txt for merkur.de

        # robots.txt www.merkur.de
# Legal notice: www.merkur.de expressly reserves the right to use its content for commercial text and data mining (§ 44b UrhG).
# The use of robots or other automated means to access www.merkur.de or collect or mine data without the express permission of www.merkur.de is strictly prohibited.

User-agent: *
Disallow: /lightweight-ajax
Disallow: /*?trafficsource
Disallow: /suche/
Disallow: /*?cmp=defrss
Disallow: /test/
Disallow: /west/
Disallow: /fdn/bootstrap/
Disallow: /bi/bootstrap/
Disallow: /bi/doop/
Disallow: /sso/

Sitemap: https://www.merkur.de/news.xml

User-agent: xovi
Disallow: /

User-agent: sistrix
Disallow: /

User-agent: SearchmetricsBot
Disallow: /

User-agent: bingbot
Disallow: /test/
Disallow: /west/

User-agent: GPTBot
Allow: /ueber-uns/
Disallow: /

User-agent: CCBot
Allow: /ueber-uns/
Disallow: /

User-agent: msnbot
Crawl-delay: 5
Disallow: /test/
Disallow: /west/

User-agent: Amazonbot
User-agent: Anthropic-ai
User-agent: Applebot-Extended
User-agent: AwarioRssBot
User-agent: AwarioSmartBot
User-agent: Bytespider
User-agent: CCBot
User-agent: ChatGPT-User
User-agent: ClaudeBot
User-agent: Claude-Web
User-agent: Cohere-ai
User-agent: DataForSeoBot
User-agent: FacebookBot
User-agent: Google-Extended
User-agent: ImagesiftBot
User-agent: Magpie-crawler
User-agent: Omgili
User-agent: Omgilibot
User-agent: Peer39_crawler
User-agent: Peer39_crawler/1.0
User-agent: PerplexityBot
User-agent: YouBot
Disallow: /