# B.A. Media — robots.txt
# https://ba-media.nl
# Laatste update: mei 2026
#
# STRATEGIE: AI-zoekmachines mogen citeren.
# Trainingscrawlers mogen niet scrapen.

# ═══════════════════════════════════════
# ZOEKMACHINES — WELKOM
# ═══════════════════════════════════════

User-agent: Googlebot
Allow: /

User-agent: Googlebot-Image
Allow: /

User-agent: Googlebot-Video
Allow: /

User-agent: Googlebot-News
Allow: /

User-agent: Storebot-Google
Allow: /

User-agent: bingbot
Allow: /

User-agent: msnbot
Allow: /

User-agent: DuckDuckBot
Allow: /

User-agent: Baiduspider
Allow: /

User-agent: Yandex
Allow: /

User-agent: Slurp
Allow: /

User-agent: Applebot
Allow: /

# ═══════════════════════════════════════
# SOCIAL MEDIA BOTS — WELKOM
# ═══════════════════════════════════════

User-agent: Twitterbot
Allow: /

User-agent: LinkedInBot
Allow: /

User-agent: facebookexternalhit
Allow: /

User-agent: WhatsApp
Allow: /

User-agent: TelegramBot
Allow: /

User-agent: Discordbot
Allow: /

User-agent: Slackbot
Allow: /

# ═══════════════════════════════════════
# AI ZOEK & CITATIE BOTS — WELKOM
# Deze halen content op wanneer een
# gebruiker een vraag stelt. Ze citeren
# uw site = gratis zichtbaarheid.
# ═══════════════════════════════════════

# OpenAI — ChatGPT zoekresultaten
User-agent: ChatGPT-User
Allow: /

User-agent: OAI-SearchBot
Allow: /

# Anthropic — Claude zoekresultaten
User-agent: Claude-User
Allow: /

User-agent: Claude-SearchBot
Allow: /

# Google AI — Gemini, NotebookLM
User-agent: Google-Agent
Allow: /

User-agent: GoogleAgent-URLContext
Allow: /

User-agent: Gemini-Deep-Research
Allow: /

User-agent: Google-NotebookLM
Allow: /

User-agent: Google-Read-Aloud
Allow: /

# Perplexity — antwoord-zoekmachine
User-agent: PerplexityBot
Allow: /

User-agent: Perplexity-User
Allow: /

# DuckDuckGo AI
User-agent: DuckAssistBot
Allow: /

# Mistral AI
User-agent: MistralAI-User
Allow: /

# ═══════════════════════════════════════
# AI TRAINING CRAWLERS — GEBLOKKEERD
# Deze scrapen content om modellen te
# trainen. Geen citatie, geen link terug.
# ═══════════════════════════════════════

# OpenAI — training
User-agent: GPTBot
Disallow: /

# Google — AI training data
User-agent: Google-Extended
Disallow: /

User-agent: Google-CloudVertexBot
Disallow: /

# Anthropic — training
User-agent: ClaudeBot
Disallow: /

User-agent: anthropic-ai
Disallow: /

# Meta — Llama training
User-agent: Meta-ExternalAgent
Disallow: /

User-agent: Meta-ExternalFetcher
Disallow: /

User-agent: FacebookBot
Disallow: /

# Apple — AI training
User-agent: Applebot-Extended
Disallow: /

# Amazon
User-agent: Amazonbot
Disallow: /

# ByteDance / TikTok
User-agent: Bytespider
Disallow: /

User-agent: TikTokSpider
Disallow: /

# Common Crawl — open dataset training
User-agent: CCBot
Disallow: /

# Cohere
User-agent: cohere-ai
Disallow: /

User-agent: cohere-training
Disallow: /

# AI21 Labs
User-agent: AI2Bot
Disallow: /

User-agent: ai2bot-dolma
Disallow: /

# Data scrapers
User-agent: Diffbot
Disallow: /

User-agent: PetalBot
Disallow: /

User-agent: Omgilibot
Disallow: /

User-agent: Omgili
Disallow: /

User-agent: webzio-extended
Disallow: /

User-agent: img2dataset
Disallow: /

User-agent: DataForSeoBot
Disallow: /

User-agent: Timpibot
Disallow: /

User-agent: SeekrBot
Disallow: /

# ═══════════════════════════════════════
# ALGEMENE REGELS
# ═══════════════════════════════════════

User-agent: *
Allow: /
Disallow: /wp-admin/
Disallow: /admin/
Disallow: /api/
Disallow: /bedankt
Disallow: /_next/
Disallow: /tmp/
Disallow: /search?
Disallow: /*?utm_*
Disallow: /*?ref=*
Disallow: /*?fbclid=*
Disallow: /*?gclid=*

# ═══════════════════════════════════════
# SITEMAP & LLMs.txt
# ═══════════════════════════════════════

Sitemap: https://ba-media.nl/sitemap.xml