Robots.txt for lemonde.fr

        # 16/08/2019
# Il est interdit d'utiliser des robots d'indexation Web ou d'autres méthodes automatiques de feuilletage ou de navigation sur ce site Web.
# Nous interdisons de crawler notre site Web en utilisant un agent d'utilisateur volé qui ne correspond pas à votre identité.
# « Violation du droit du producteur de base de données - article L 342-1 et suivant le Code de la propriété intellectuelle ».
# Nous vous invitons à nous contacter pour contracter une licence d'utilisation. Seuls les partenaires sont habilités à utiliser nos contenus pour un usage autre que strictement individuel.
#
#
User-agent: *
Allow: /ws/1/live/*
Allow: /ws/1/related_content/*

#
Disallow: /ajax/
Disallow: /ajah/
Disallow: /api/
Disallow: /beta
Disallow: /element/commun/afficher/
Disallow: /petites-annonces/
Disallow: /qui-sommes-nous/
Disallow: /txt/
Disallow: /verification/source/*
Disallow: /noscript/
Disallow: /ws/*
Disallow: /recherche/*
Disallow: /lemonde-beta/*
Disallow: /_rprt/*
Disallow: /layout/*
Disallow: /cgi-bin/*
Disallow: /envoyer-par-email/*
Disallow: /lmdgft/*
Disallow: /article-offert/*
Disallow: /*?s=43260*
Disallow: /*?contributions
Disallow: */reactions/
Disallow: */mmpub/

# WordPress
Disallow: /blog/*/wp-admin/
Disallow: /blog/*/wp-includes/
Disallow: /blog/*/wp-content/plugins/
Disallow: /blog/*/wp-content/themes/
Disallow: /blog/*/wp-login.php
Disallow: /blog/*/wp-register.php
Disallow: /blog/*/author/admin/

#
# Sitemaps
Sitemap: https://www.lemonde.fr/sitemap_news.xml
Sitemap: https://www.lemonde.fr/sitemap_index.xml
Sitemap: https://www.lemonde.fr/codespromo/sitemap.xml
# Sitemaps EN
Sitemap: https://www.lemonde.fr/en/sitemap_news.xml
Sitemap: https://www.lemonde.fr/en/sitemap_index.xml
#
User-agent: Googlebot-Image
Allow: /image/
#
User-agent: Googlebot-News
Disallow: /archives/
#
# Robots exclus de toute indexation.
User-agent: Meltwater
Disallow: /
#
User-agent: Digimind
Disallow: /
#
User-agent: Knowings
Disallow: /
#
User-agent: Sindup
Disallow: /
#
User-agent: Cision
Disallow: /
#
User-agent: Talkwater
Disallow: /
#
User-agent: TurnitinBot
Disallow: /
#
User-agent: ConveraCrawler
Disallow: /
#
User-agent: Jetbot
Disallow: /
#
User-agent: NewsNow
Disallow: /
#
User-agent: kbcrawl
Disallow: /
#
User-agent: AmiSoftware
Disallow: /
#
User-agent: Newzbin
Disallow: /
#
User-agent: Ask n read
Disallow: /
#
User-agent: Qwam content intelligence
Disallow: /
#
User-agent: Zite
Disallow: /
#
User-agent: flipboard
Disallow: /
#
User-agent: Youmag
Disallow: /
#
User-agent: Synthesio
Disallow: /
#
User-agent: trendybuzz
Disallow: /
#
User-agent: spotter
Disallow: /
#
User-agent: scoop.it
Disallow: /
#
User-agent: linkfluence
Disallow: /
#
User-agent: 5emeRue
Disallow: /
#
User-agent: Augure
Disallow: /
#
User-agent: Corporama
Disallow: /
#
User-agent: grub-client
Disallow: /
#
User-agent: ia_archiver
Allow: /$
Disallow: /*
#
User-agent: ia_archiver-web.archive.org
Allow: /$
Disallow: /*
#
User-agent: k2spider
Disallow: /
#
User-agent: libwww
Disallow: /
#
User-agent: wget
Disallow: /
#
User-agent: 5erue
Disallow: /
#
User-agent: adequat
Disallow: /
#
User-agent: adequat-systems
Disallow: /
#
User-agent: auramundi
Disallow: /
#
User-agent: coexel
Disallow: /
#
User-agent: ellisphere
Disallow: /
#
User-agent: leadbox
Disallow: /
#
User-agent: mention
Disallow: /
#
User-agent: Moreover
Disallow: /
#
User-agent: mytwip
Disallow: /
#
User-agent: NewsNow
Disallow: /
#
User-agent: Newzbin
Disallow: /
#
User-agent: opinion-tracker
Disallow: /
#
User-agent: proxem
Disallow: /
#
User-agent: score3
Disallow: /
#
User-agent: trendeo
Disallow: /
#
User-agent: vecteurplus
Disallow: /
#
User-agent: verticalsearch
Disallow: /
#
User-agent: vsw
Disallow: /
#
User-agent: winello
Disallow: /
#
User-agent: Fetch
Disallow: /
#
User-agent: infoseek
Disallow: /
#
User-agent: MSIECrawler
Disallow: /
#
User-agent: Offline Explorer
Disallow: /
#
User-agent: sitecheck.internetseer.com
Disallow: /
#
User-agent: Teleport
Disallow: /
#
User-agent: TeleportPro
Disallow: /
#
User-agent: WebCopier
Disallow: /
#
User-agent: WebStripper
Disallow: /
#
User-agent: Zealbot
Disallow: /
#
User-agent: asknread.com
Disallow: /
#
User-agent: ellisphere
Disallow: /
#
User-agent: spotter
Disallow: /
#
User-Agent: omgilibot
Disallow: /
#
User-Agent: omgili
Disallow: /
#
User-agent: CCBot
Disallow: /
#
User-agent: Google-Extended
Disallow: /
#
User-Agent: PerplexityBot
Disallow: /
#
User-agent: Bytespider
Disallow: /
#
User-agent: Diffbot
Disallow: /
#
User-agent: FacebookBot
Disallow: /
#
User-agent: YouBot
Disallow: /
#
User-agent: anthropic-ai
Disallow: /
#
User-agent: Claude-Web
Disallow: /
#
User-agent: ClaudeBot
Disallow: /

User-agent: cohere-ai
Disallow: /

User-agent: Applebot-Extended
Disallow: /

User-agent: Webzio-Extended
Disallow: /

User-agent: Meta-ExternalFetcher
Disallow: /

User-agent: Amazonbot
Disallow: /

User-agent: Timpibot
Disallow: /