November 26, 2024

Robots.txt for almaany.com

textile

# robots.txt for http://www.almaany.com/
# disallow all 
User-agent: *
Disallow: /
Disallow: /srchg2.php
Disallow: /12198389/
Disallow: /arab/
Disallow: /.php
Disallow: /.php?
Disallow: /cdn-cgi/
Disallow: /answers/message/
Disallow: /answers/ask
Crawl-delay: 5

# but allow only important bots
User-agent: Googlebot
Disallow: /ur/context/ar-en/
Disallow: /ur/context/ar-fr/
Disallow: /ur/context/ar-ar/
Disallow: /ur/context/ar-tr/
Disallow: /ur/context/ar-fa/
Disallow: /ur/context/ar-id/
Disallow: /ur/context/ar-de/
Disallow: /en/dict/ar-ar/
Disallow: /id/dict/ar-ar/
Disallow: /de/dict/ar-ar/
Disallow: /fa/dict/ar-ar/
Disallow: /ur/dict/ar-ar/
Disallow: /answers/message/
Disallow: /answers/ask
Disallow: /cdn-cgi/challenge-platform/

User-agent: Googlebot-Image
Disallow: /ur/context/ar-en/
Disallow: /ur/context/ar-fr/
Disallow: /ur/context/ar-ar/
Disallow: /ur/context/ar-tr/
Disallow: /ur/context/ar-fa/
Disallow: /ur/context/ar-id/
Disallow: /ur/context/ar-de/
Disallow: /en/dict/ar-ar/
Disallow: /id/dict/ar-ar/
Disallow: /de/dict/ar-ar/
Disallow: /fa/dict/ar-ar/
Disallow: /ur/dict/ar-ar/

User-agent: Mediapartners-Google
Disallow: /ur/context/ar-en/
Disallow: /ur/context/ar-fr/
Disallow: /ur/context/ar-ar/
Disallow: /ur/context/ar-tr/
Disallow: /ur/context/ar-fa/
Disallow: /ur/context/ar-id/
Disallow: /ur/context/ar-de/
Disallow: /en/dict/ar-ar/
Disallow: /id/dict/ar-ar/
Disallow: /de/dict/ar-ar/
Disallow: /fa/dict/ar-ar/
Disallow: /ur/dict/ar-ar/

#Yandex allow only ru content
User-agent: Yandex
Allow: /ru/dict/ar-ru/
Allow: /ar/dict/ar-ru/
Disallow: /

# Block Ahrefs
User-agent: AhrefsBot
Disallow: /

# Block SEOkicks
User-agent: SEOkicks-Robot
Disallow: /

# Block SISTRIX
User-agent: SISTRIX Crawler
Disallow: /

# Block Uptime robot
User-agent: UptimeRobot/2.0
Disallow: /

User-agent: 008
Disallow: /

# Block Ezooms Robot
User-agent: Ezooms Robot
Disallow: /

# Block Perl LWP
User-agent: Perl LWP
Disallow: /

# Block BlexBot
User-agent: BLEXBot
Disallow: /

# Block netEstate NE Crawler (+http://www.website-datenbank.de/)
User-agent: netEstate NE Crawler (+http://www.website-datenbank.de/)
Disallow: /

# Block WiseGuys Robot
User-agent: WiseGuys Robot
Disallow: /

# Block Turnitin Robot
User-agent: Turnitin Robot
Disallow: /

User-agent: TurnitinBot
Disallow: /

User-agent: Turnitin Bot
Disallow: /

User-agent: TurnitinBot/3.0 (http://www.turnitin.com/robot/crawlerinfo.html)
Disallow: /

User-agent: TurnitinBot/3.0
Disallow: /

# Block Heritrix
User-agent: Heritrix
Disallow: /

# Block pricepi
User-agent: pimonster
Disallow: /
User-agent: Pimonster
Disallow: /

# Block Searchmetrics Bot
User-agent: SearchmetricsBot
Disallow: /

# Block Eniro
User-agent: ECCP/1.0 (search@eniro.com)
Disallow: /

# Block Baidu
User-agent: Baiduspider
User-agent: Baiduspider-video
User-agent: Baiduspider-image
User-agent: Mozilla/5.0 (compatible; Baiduspider/2.0; +http://www.baidu.com/search/spider.html)
User-agent: Mozilla/5.0 (compatible; Baiduspider/3.0; +http://www.baidu.com/search/spider.html)
User-agent: Mozilla/5.0 (compatible; Baiduspider/4.0; +http://www.baidu.com/search/spider.html)
User-agent: Mozilla/5.0 (compatible; Baiduspider/5.0; +http://www.baidu.com/search/spider.html)
User-agent: Baiduspider/2.0
User-agent: Baiduspider/3.0
User-agent: Baiduspider/4.0
User-agent: Baiduspider/5.0
Disallow: /

# Block SoGou
User-agent: Sogou Spider
Disallow: /

# Block Youdao
User-agent: YoudaoBot
Disallow: /

# Block Nikon JP Crawler
User-agent: gsa-crawler (Enterprise; T4-KNHH62CDKC2W3; gsa_manage@nikon-sys.co.jp)
Disallow: /

# Block  MegaIndex.ru
User-agent: MegaIndex.ru/2.0
Disallow: /

User-agent: MegaIndex.ru
Disallow: /

User-agent: megaIndex.ru
Disallow: /

User-agent: Mail.RU_Bot/2.0
Disallow: /
User-agent: Mail.RU
Disallow: /
User-agent: Mail.RU_Bot/2.0; +http://go.mail.ru/help/robots
Disallow: /
User-Agent: MJ12bot
Disallow: /
User-Agent: MJ12bot/v1.4.3
Disallow: /

User-agent: UbiCrawler
Disallow: /

User-agent: DOC
Disallow: /

User-agent: Zao
Disallow: /

User-agent: Twiceler
Disallow: /

#
User-agent: sitecheck.internetseer.com
Disallow: /

User-agent: Zealbot
Disallow: /

User-agent: MSIECrawler
Disallow: /

User-agent: SiteSnagger
Disallow: /

User-agent: WebStripper
Disallow: /

User-agent: WebCopier
Disallow: /

User-agent: Fetch
Disallow: /

User-agent: Offline Explorer
Disallow: /

User-agent: Teleport
Disallow: /

User-agent: TeleportPro
Disallow: /

User-agent: WebZIP
Disallow: /

User-agent: linko
Disallow: /

User-agent: HTTrack
Disallow: /

User-agent: Microsoft.URL.Control
Disallow: /

User-agent: Xenu
Disallow: /

User-agent: larbin
Disallow: /

User-agent: libwww
Disallow: /

User-agent: ZyBORG
Disallow: /

User-agent: Download Ninja
Disallow: /

User-agent: Nutch
Disallow: /

User-agent: spock
Disallow: /

User-agent: OmniExplorer_Bot
Disallow: /

User-agent: BecomeBot
Disallow: /

User-agent: genieBot
Disallow: /

User-agent: dotbot
Disallow: /

User-agent: MLBot
Disallow: /

User-agent: 80bot
Disallow: /

User-agent: Linguee Bot
Disallow: /

User-agent: aiHitBot
Disallow: /

User-agent: Exabot
Disallow: /

User-agent: SBIder/Nutch
Disallow: /

User-agent: Jyxobot
Disallow: /

User-agent: mAgent
Disallow: /

User-agent: Speedy Spider
Disallow: /

User-agent: ShopWiki
Disallow: /

User-agent: Huasai
Disallow: /

User-agent: DataCha0s
Disallow: /

User-agent: Baiduspider
Disallow: /

User-agent: Atomic_Email_Hunter
Disallow: /

User-agent: Mp3Bot
Disallow: /

User-agent: WinHttp
Disallow: /

User-agent: betaBot
Disallow: /

User-agent: core-project
Disallow: /

User-agent: panscient.com
Disallow: /

User-agent: Java
Disallow: /

User-agent: libwww-perl
Disallow: /

User-agent: ia_archiver
Disallow: /

User-agent: archive.org_bot 
Disallow: /