November 26, 2024

Robots.txt for familysearch.org

        # LAST CHANGED: Tue Mar 29 2022, at 11:00:00 GMT+0000 (GMT)

# Version 1.0.10

User-agent: *

Disallow: /ark:/61903/1:
Disallow: /ark:/61903/2:
Disallow: /campaign/
Disallow: /cgi-bin/
Disallow: /Eng/
Disallow: /frontier/
Disallow: /identity/settings/
Disallow: /learningcenter
Disallow: /mgmt/
Disallow: /pal:/
Disallow: /photos/album/
Disallow: /photos/person/
Disallow: /photos/view/
Disallow: /profile/
Disallow: /records/pal:/
Disallow: /Search/
Disallow: /service/temple/cards
Disallow: /tree
Allow: /tree/
Disallow: /tree/contributions
Disallow: /tree/find
Disallow: /tree/following
Disallow: /tree/import
Disallow: /tree/improve-place-names
Disallow: /tree/pedigree
Disallow: /tree/person
Disallow: /tree/sources


Sitemap: https://www.familysearch.org/photos/sitemapIndex?category=artifacts&artifactCategory=IMAGE
Sitemap: https://www.familysearch.org/photos/sitemapIndex?category=artifacts&artifactCategory=TEXT

# Allow Algolia to search /frontier
User-agent: Algolia Crawler
Allow: /frontier

## Specific rules for /wiki/

# Please note: There are a lot of pages on this site, and there are some misbehaved spiders out there
# that go _way_ too fast. If you're irresponsible, your access to the site may be blocked.
#

# advertising-related bots:
User-agent: Mediapartners-Google*
Disallow: /wiki/

# Wikipedia work bots:
User-agent: IsraBot
Disallow: /wiki/

User-agent: Orthogaffe
Disallow: /wiki/

# Crawlers that are kind enough to obey, but which we'd rather not have
# unless they're feeding search engines.
User-agent: UbiCrawler
Disallow: /wiki/

User-agent: DOC
Disallow: /wiki/

User-agent: Zao
Disallow: /wiki/

# Some bots are known to be trouble, particularly those designed to copy
# entire sites. Please obey robots.txt.
User-agent: sitecheck.internetseer.com
Disallow: /wiki/

User-agent: Zealbot
Disallow: /wiki/

User-agent: MSIECrawler
Disallow: /wiki/

User-agent: SiteSnagger
Disallow: /wiki/

User-agent: WebStripper
Disallow: /wiki/

User-agent: WebCopier
Disallow: /wiki/

User-agent: Fetch
Disallow: /wiki/

User-agent: Offline Explorer
Disallow: /wiki/

User-agent: Teleport
Disallow: /wiki/

User-agent: TeleportPro
Disallow: /wiki/

User-agent: WebZIP
Disallow: /wiki/

User-agent: linko
Disallow: /wiki/

User-agent: HTTrack
Disallow: /wiki/

User-agent: Microsoft.URL.Control
Disallow: /wiki/

User-agent: Xenu
Disallow: /wiki/

User-agent: larbin
Disallow: /wiki/

User-agent: libwww
Disallow: /wiki/

User-agent: ZyBORG
Disallow: /wiki/

User-agent: Download Ninja
Disallow: /wiki/

# Misbehaving: requests much too fast:
User-agent: fast
Disallow: /wiki/

#
# Sorry, wget in its recursive mode is a frequent problem.
# Please read the man page and use it properly; there is a
# --wait option you can use to set the delay between hits,
# for instance.
#
User-agent: wget
Disallow: /wiki/

#
# The 'grub' distributed client has been *very* poorly behaved.
#
User-agent: grub-client
Disallow: /wiki/

#
# Doesn't follow robots.txt anyway, but...
#
User-agent: k2spider
Disallow: /wiki/

#
# Hits many times per second, not acceptable
# http://www.nameprotect.com/botinfo.html
User-agent: NPBot
Disallow: /wiki/

# A capture bot, downloads gazillions of pages with no public benefit
# http://www.webreaper.net/
User-agent: WebReaper
Disallow: /wiki/

# Wayback Machine
# User-agent: archive.org_bot
# Treated like anyone else

# Allow the Internet Archiver to index action=raw and thereby store the raw wikitext of pages
User-agent: ia_archiver
Allow: /wiki/*&action=raw

#
# Friendly, low-speed bots are welcome viewing article pages, but not
# dynamically-generated pages please.
#
# Inktomi's "Slurp" can read a minimum delay between hits; if your
# bot supports such a thing using the 'Crawl-delay' or another
# instruction, please let us know.
#
# There is a special exception for API mobileview to allow dynamic
# mobile web & app views to load section content.
# These views aren't HTTP-cached but use parser cache aggressively
# and don't expose special: pages etc.
#
# Another exception is for REST API documentation, located at
# /api/rest_v1/?doc.
#
User-agent: *
Allow: /wiki/w/api.php?action=mobileview&
Allow: /wiki/w/load.php?
Allow: /wiki/api/rest_v1/?doc
# Disallow indexing of non-article content
Disallow: /wiki/w/
Disallow: /wiki/api/
Disallow: /wiki/trap/
#
Disallow: /wiki/Special:*