# For all robots
User-agent: *
# Block access to specific groups of pages
Disallow: /uk/basket
Disallow: /uk/checkout
Disallow: /uk/my-account
Disallow: /uk/account
Disallow: /us/basket
Disallow: /us/checkout
Disallow: /us/my-account
Disallow: /us/account
# Allow search crawlers to discover the sitemap
Sitemap: https://www.thewhitecompany.com/uk/sitemap.xml
Sitemap: https://www.thewhitecompany.com/us/sitemap.xml
Disallow: /row
Disallow: /quickView
Disallow: ?page=6
Request-rate: 1/5 # maximum rate is one page every 5 seconds
Crawl-delay: 5 # 5 seconds between page requests
Visit-time: 0400-0845 # only visit between 04:00 and 08:45 UTC
# Remove duplication caused by URL facets.
Disallow: *?q=
# Block CazoodleBot as it does not present correct accept content headers
User-agent: CazoodleBot
Disallow: /
# Block MJ12bot as it is just noise
User-agent: MJ12bot
Disallow: /
# Block dotbot as it cannot parse base urls properly
User-agent: dotbot/1.0
Disallow: /
# Block Gigabot
User-agent: Gigabot
Disallow: /
# Block PDF
Disallow: *.pdf$