Free tools

Robots.txt Examples

Robots.txt file content for 20minutos.es.

Robot.txt file for: 20minutos.es

      User-agent: *

Disallow: /view/
Disallow: /view/*
Disallow: /buscar
Disallow: /busqueda/
Disallow: /busqueda/*
Disallow: /imprimir/
Disallow: /mini20
Disallow: /mini20/
Disallow: /home
Disallow: /home/
Disallow: /img_validator/
Disallow: /aviso_comentario/
Disallow: /enviar_amigo/
Disallow: /usuarios/
Disallow: /proc/
Disallow: /iphoneapp
Disallow: /iphoneapp/
Disallow: /ajax
Disallow: /compartir
Disallow: /sso/
Disallow: /especial/especial-de-prueba/
Disallow: /especial/pruebas-comercial/
Disallow: /widgets/
Disallow: /boletin/baja/
Disallow: /archivo/
Disallow: /archivo/*
Disallow: /*.woff2$
Disallow: /*.ttf$

# Agentes permitidos explicitamente
User-agent: proximic
Disallow:

# Agentes bloqueados por idioma

User-agent: Yandex
Disallow: /
User-agent: Baiduspider
Disallow: /

# Agentes nocivos

User-agent: 008
Disallow: /
User-agent: UnisterBot
Disallow: /
User-agent: JikeSpider
Disallow: /
User-agent: Zealbot
Disallow: /
User-agent: MSIECrawler
Disallow: /
User-agent: SiteSnagger
Disallow: /
User-agent: WebStripper
Disallow: /
User-agent: WebCopier
Disallow: /
User-agent: Fetch
Disallow: /
User-agent: Offline Explorer
Disallow: /
User-agent: Teleport
Disallow: /
User-agent: TeleportPro
Disallow: /
User-agent: WebZIP
Disallow: /
User-agent: linko
Disallow: /
User-agent: HTTrack
Disallow: /
User-agent: larbin
Disallow: /
User-agent: libwww
Disallow: /
User-agent: ZyBORG
Disallow: /
User-agent: UbiCrawler
Disallow: /
User-agent: DOC
Disallow: /
User-agent: Zao
Disallow: /
User-agent: sitecheck.internetseer.com
Disallow: /
User-agent: Zealbot
Disallow: /
User-agent: Download Ninja
Disallow: /
User-agent: Maxthon
Disallow: /
User-agent: CNCDialer
Disallow: /
User-agent: wget
Disallow: /
User-agent: grub-client
Disallow: /
User-agent: k2spider
Disallow: /
User-agent: NPBot
Disallow: /
User-agent: WebReaper
Disallow: /
User-agent: TurnitinBot
Disallow: /
User-agent: MJ12bot
Disallow: /
User-agent: magpie-crawler
Disallow: /
User-agent: Flamingo_SearchEngine
Disallow: /
User-agent: Exabot
Disallow: /
User-agent: Moreoverbot
Disallow: /
User-Agent: CyberAlert
Disallow: /
User-agent: Newscan
Disallow: /
User-agent: Spinn3r
Disallow: /
User-agent: LexxeBot/1.0
Disallow: /
User-agent: NextGenSearchBot
Disallow: /
User-agent: Sosospider
Disallow: /
User-agent: rogerbot
Disallow: /
User-agent: SiteBot/0.1
Disallow: /
User-agent: SiteBot
Disallow: /
User-agent: CrystalSemanticsBot
Disallow: /
User-agent: NetSeer crawler
Disallow: /
User-agent: trovitBot
Disallow: /
User-agent: DotBot
Disallow: /
User-agent: Ezooms
Disallow: /
User-agent: discobot
Disallow: /
User-agent: Jyxobot
Disallow: /
User-agent: sogou
Disallow: /
User-agent: heritrix
Disallow: /
User-agent: NerdByNature.Bot
Disallow: /
User-agent: psbot
Disallow: /
User-agent: WBSearchBot
Disallow: /
User-agent: AddThis.com
Disallow: /
User-agent: discoverybot
Disallow: /
User-agent: bl.uk_lddc_bot
Disallow: /
User-agent: IstellaBot
Disallow: /
User-agent: seokicks
Disallow: /
User-agent: Bender
Disallow: /
User-agent: wotbox
Disallow: /
User-agent: Yasni
Disallow: /
User-agent: netEstate NE Crawler
Disallow: /
User-agent: Pixray-Seeker
Disallow: /
User-agent: integromedb
Disallow: /
User-agent: BLEXBot
Disallow: /
User-agent: BDCbot
Disallow: /
User-agent: WeSEE:Search
Disallow: /
User-agent: admantx
Disallow: /
User-agent: spbot
Disallow: /
User-agent: BUbiNG
Disallow: /

# IAs

User-agent: GPTBot
Disallow: /
User-agent: Google-Extended
Disallow: /