# Seattle Lawyer AI — robots.txt
# Last updated: 2026-06-12
#
# Policy: this site does not authorize AI model TRAINING, embedding
# extraction, or bulk republishing of its content (case-evaluation
# logic, WA legal commentary, fee schedules, or any other proprietary
# material). Standard search-engine crawling for human-readable indexing
# (Google, Bing, DuckDuckGo) is permitted, and AI assistant SEARCH /
# USER-FETCH agents are permitted so the firm can be found and cited in
# AI-assistant answers. Training crawlers remain blocked below.
#
# AI vendors that respect robots.txt MUST honor the rules below.
# Vendors that ignore robots.txt are also blocked at the Cloudflare
# edge via "Block AI Scrapers" + WAF rules (review WAF exceptions for
# the allowed agents below if Bot Management still challenges them).

# ── AI assistant SEARCH and USER-FETCH agents — allowed (same limits as search engines)
User-agent: ChatGPT-User
Allow: /
Disallow: /js/main.js$
Disallow: /api/

User-agent: OAI-SearchBot
Allow: /
Disallow: /js/main.js$
Disallow: /api/

User-agent: Claude-User
Allow: /
Disallow: /js/main.js$
Disallow: /api/

User-agent: Claude-Web
Allow: /
Disallow: /js/main.js$
Disallow: /api/

User-agent: Claude-SearchBot
Allow: /
Disallow: /js/main.js$
Disallow: /api/

User-agent: PerplexityBot
Allow: /
Disallow: /js/main.js$
Disallow: /api/

User-agent: Perplexity-User
Allow: /
Disallow: /js/main.js$
Disallow: /api/

# ── AI training / model-update crawlers — blocked ────────────────────
User-agent: GPTBot
Disallow: /

User-agent: ClaudeBot
Disallow: /

User-agent: anthropic-ai
Disallow: /

User-agent: Google-Extended
Disallow: /

User-agent: GoogleOther
Disallow: /

User-agent: CCBot
Disallow: /

User-agent: cohere-ai
Disallow: /

User-agent: cohere-training-data-crawler
Disallow: /

User-agent: Meta-ExternalAgent
Disallow: /

User-agent: Meta-ExternalFetcher
Disallow: /

User-agent: FacebookBot
Disallow: /

User-agent: Applebot-Extended
Disallow: /

User-agent: Bytespider
Disallow: /

User-agent: ImagesiftBot
Disallow: /

User-agent: Diffbot
Disallow: /

User-agent: PetalBot
Disallow: /

User-agent: Amazonbot
Disallow: /

User-agent: YouBot
Disallow: /

User-agent: AwarioRssBot
Disallow: /

User-agent: AwarioSmartBot
Disallow: /

User-agent: SemrushBot
Disallow: /

User-agent: AhrefsBot
Disallow: /

User-agent: DataForSeoBot
Disallow: /

User-agent: MJ12bot
Disallow: /

User-agent: Timpibot
Disallow: /

User-agent: omgili
Disallow: /

User-agent: omgilibot
Disallow: /

User-agent: peer39_crawler
Disallow: /

User-agent: peer39_crawler/1.0
Disallow: /

User-agent: ICC-Crawler
Disallow: /

User-agent: ISSCyberRiskCrawler
Disallow: /

User-agent: Kangaroo Bot
Disallow: /

User-agent: img2dataset
Disallow: /

User-agent: Scrapy
Disallow: /

User-agent: python-requests
Disallow: /

User-agent: node-fetch
Disallow: /

User-agent: axios
Disallow: /

User-agent: Go-http-client
Disallow: /

User-agent: Java
Disallow: /

User-agent: curl
Disallow: /

User-agent: Wget
Disallow: /

# ── Standard search engines: full access (we want SEO) ───────────────
# Internal-only paths (drafts, notes, worker source, contract templates) are
# not deployed to the public host — they're kept in the source repo only.
# Their absence from this file is deliberate; do not add Disallow lines that
# enumerate internal directory names.
User-agent: Googlebot
Allow: /
Disallow: /js/main.js$

User-agent: Bingbot
Allow: /
Disallow: /js/main.js$

User-agent: DuckDuckBot
Allow: /
Disallow: /js/main.js$

# ── Default policy: human-readable indexing only ─────────────────────
User-agent: *
Disallow: /js/main.js$
Disallow: /api/

Sitemap: https://seattlelawyer.ai/sitemap.xml