{
  "openapi": "3.1.0",
  "info": {
    "title": "crawlcrawl API",
    "version": "0.5.0",
    "description": "Multi-tenant HTTP crawler API. URL \u2192 markdown, signals, structured data. Anti-bot routing on demand. Recurring monitors with diff-only webhooks. 30 endpoints across crawl, scan, cloud passthrough, monitors, account management.",
    "contact": {
      "name": "crawlcrawl support",
      "email": "hello@crawlcrawl.com"
    }
  },
  "servers": [
    {
      "url": "https://api.crawlcrawl.com",
      "description": "Production"
    }
  ],
  "security": [
    {
      "bearerAuth": []
    }
  ],
  "components": {
    "securitySchemes": {
      "bearerAuth": {
        "type": "http",
        "scheme": "bearer",
        "bearerFormat": "crk_<random>",
        "description": "Project API key. Self-rotate at /v1/keys/rotate."
      }
    },
    "schemas": {
      "ErrorEnvelope": {
        "type": "object",
        "properties": {
          "error": {
            "type": "object",
            "properties": {
              "code": {
                "type": "string",
                "example": "RATE_LIMITED"
              },
              "message": {
                "type": "string"
              },
              "retry_after_seconds": {
                "type": "integer"
              }
            }
          }
        }
      },
      "CrawlRunRef": {
        "type": "object",
        "required": [
          "id",
          "status",
          "url"
        ],
        "properties": {
          "id": {
            "type": "integer",
            "format": "int64"
          },
          "status": {
            "type": "string",
            "enum": [
              "queued",
              "running",
              "done",
              "failed",
              "cancelled"
            ]
          },
          "url": {
            "type": "string",
            "format": "uri"
          }
        }
      },
      "CrawlRunDetail": {
        "type": "object",
        "properties": {
          "id": {
            "type": "integer",
            "format": "int64"
          },
          "url": {
            "type": "string",
            "format": "uri"
          },
          "status": {
            "type": "string"
          },
          "page_count": {
            "type": "integer",
            "format": "int64"
          },
          "error_count": {
            "type": "integer",
            "format": "int64"
          },
          "enqueued_at": {
            "type": "string",
            "format": "date-time"
          },
          "started_at": {
            "type": "string",
            "format": "date-time",
            "nullable": true
          },
          "finished_at": {
            "type": "string",
            "format": "date-time",
            "nullable": true
          },
          "error_message": {
            "type": "string",
            "nullable": true
          },
          "ai_bot_policy": {
            "type": "object",
            "additionalProperties": {
              "type": "string"
            },
            "nullable": true
          },
          "llms_txt": {
            "type": "string",
            "nullable": true
          },
          "llms_full_txt": {
            "type": "string",
            "nullable": true
          }
        }
      },
      "Signals": {
        "type": "object",
        "properties": {
          "title": {
            "type": "string",
            "nullable": true
          },
          "description": {
            "type": "string",
            "nullable": true
          },
          "canonical": {
            "type": "string",
            "nullable": true
          },
          "lang": {
            "type": "string",
            "nullable": true
          },
          "h1": {
            "type": "array",
            "items": {
              "type": "string"
            }
          },
          "h2": {
            "type": "array",
            "items": {
              "type": "string"
            }
          },
          "h3": {
            "type": "array",
            "items": {
              "type": "string"
            }
          },
          "hreflang": {
            "type": "object",
            "additionalProperties": {
              "type": "string"
            }
          },
          "robots_meta": {
            "type": "array",
            "items": {
              "type": "string"
            }
          },
          "ai_directives": {
            "type": "object"
          },
          "jsonld_types": {
            "type": "array",
            "items": {
              "type": "string"
            }
          },
          "word_count": {
            "type": "integer"
          },
          "char_count": {
            "type": "integer"
          },
          "links": {
            "type": "object",
            "properties": {
              "internal": {
                "type": "integer"
              },
              "external": {
                "type": "integer"
              },
              "nofollow": {
                "type": "integer"
              },
              "total": {
                "type": "integer"
              }
            }
          }
        }
      }
    }
  },
  "paths": {
    "/v1/health": {
      "get": {
        "summary": "Liveness check",
        "security": [],
        "responses": {
          "200": {
            "description": "ok"
          }
        }
      }
    },
    "/v1/ready": {
      "get": {
        "summary": "Readiness check (DB + worker heartbeat)",
        "security": [],
        "responses": {
          "200": {
            "description": "ready"
          },
          "503": {
            "description": "unhealthy"
          }
        }
      }
    },
    "/v1/crawls": {
      "post": {
        "summary": "Start an asynchronous multi-page crawl",
        "requestBody": {
          "required": true,
          "content": {
            "application/json": {
              "schema": {
                "type": "object",
                "required": [
                  "url"
                ],
                "properties": {
                  "url": {
                    "type": "string",
                    "format": "uri"
                  },
                  "max_pages": {
                    "type": "integer",
                    "minimum": 1,
                    "maximum": 100000,
                    "default": 1000
                  },
                  "depth": {
                    "type": "integer",
                    "minimum": 1,
                    "maximum": 50,
                    "default": 5
                  },
                  "concurrency": {
                    "type": "integer",
                    "minimum": 1
                  },
                  "delay_ms": {
                    "type": "integer",
                    "default": 250
                  },
                  "subdomains": {
                    "type": "boolean",
                    "default": false
                  },
                  "respect_robots": {
                    "type": "boolean",
                    "default": true
                  },
                  "store_html": {
                    "type": "boolean",
                    "default": true
                  },
                  "seed_kind": {
                    "type": "string",
                    "enum": [
                      "url",
                      "sitemap"
                    ],
                    "default": "url"
                  },
                  "headers": {
                    "type": "object",
                    "additionalProperties": {
                      "type": "string"
                    }
                  },
                  "cookies": {
                    "type": "string"
                  },
                  "exclude_paths": {
                    "type": "array",
                    "items": {
                      "type": "string"
                    },
                    "description": "Regex patterns to skip"
                  },
                  "include_paths": {
                    "type": "array",
                    "items": {
                      "type": "string"
                    },
                    "description": "Regex patterns to allow only"
                  },
                  "webhook_url": {
                    "type": "string",
                    "format": "uri"
                  },
                  "max_age_seconds": {
                    "type": "integer",
                    "description": "Return cached run if a recent done one exists for the same URL."
                  },
                  "user_agent": {
                    "type": "string"
                  },
                  "proxy_url": {
                    "type": "string"
                  },
                  "webhook_events": {
                    "type": "array",
                    "items": {
                      "type": "string",
                      "enum": [
                        "crawl.done",
                        "crawl.diff_detected"
                      ]
                    },
                    "description": "Subset of events to deliver via webhook. Default ['crawl.done']."
                  },
                  "return_only_changed": {
                    "type": "boolean",
                    "default": false,
                    "description": "For recurring monitors only: skip webhook unless content_hash diff vs previous run is non-zero."
                  },
                  "cron": {
                    "type": "string",
                    "description": "5-field UTC cron expression. When set, creates a recurring monitor instead of one-shot run. Manage via /v1/crons."
                  }
                }
              }
            }
          }
        },
        "responses": {
          "201": {
            "description": "Created",
            "content": {
              "application/json": {
                "schema": {
                  "$ref": "#/components/schemas/CrawlRunRef"
                }
              }
            }
          },
          "401": {
            "description": "Bad token",
            "content": {
              "application/json": {
                "schema": {
                  "$ref": "#/components/schemas/ErrorEnvelope"
                }
              }
            }
          },
          "422": {
            "description": "Validation failed"
          },
          "429": {
            "description": "Quota exceeded \u2014 see Retry-After header"
          }
        }
      }
    },
    "/v1/crawls/{id}": {
      "get": {
        "summary": "Crawl status",
        "parameters": [
          {
            "name": "id",
            "in": "path",
            "required": true,
            "schema": {
              "type": "integer",
              "format": "int64"
            }
          }
        ],
        "responses": {
          "200": {
            "description": "ok",
            "content": {
              "application/json": {
                "schema": {
                  "$ref": "#/components/schemas/CrawlRunDetail"
                }
              }
            }
          }
        }
      },
      "delete": {
        "summary": "Cancel + cascade-delete the crawl",
        "parameters": [
          {
            "name": "id",
            "in": "path",
            "required": true,
            "schema": {
              "type": "integer",
              "format": "int64"
            }
          }
        ],
        "responses": {
          "204": {
            "description": "deleted"
          },
          "404": {
            "description": "not found"
          }
        }
      }
    },
    "/v1/crawls/{id}/pages": {
      "get": {
        "summary": "List pages from a crawl run",
        "parameters": [
          {
            "name": "id",
            "in": "path",
            "required": true,
            "schema": {
              "type": "integer",
              "format": "int64"
            }
          },
          {
            "name": "limit",
            "in": "query",
            "schema": {
              "type": "integer",
              "default": 100,
              "maximum": 100000
            }
          },
          {
            "name": "offset",
            "in": "query",
            "schema": {
              "type": "integer",
              "default": 0
            }
          },
          {
            "name": "status",
            "in": "query",
            "schema": {
              "type": "integer"
            }
          },
          {
            "name": "format",
            "in": "query",
            "schema": {
              "type": "string",
              "enum": [
                "json",
                "ndjson",
                "csv"
              ],
              "default": "json"
            }
          }
        ],
        "responses": {
          "200": {
            "description": "ok",
            "content": {
              "application/json": {
                "schema": {
                  "type": "object"
                }
              },
              "application/x-ndjson": {
                "schema": {
                  "type": "string"
                }
              },
              "text/csv": {
                "schema": {
                  "type": "string"
                }
              }
            }
          }
        }
      }
    },
    "/v1/crawls/{id}/links": {
      "get": {
        "summary": "Internal link graph (lazy-extracted)",
        "parameters": [
          {
            "name": "id",
            "in": "path",
            "required": true,
            "schema": {
              "type": "integer",
              "format": "int64"
            }
          },
          {
            "name": "limit",
            "in": "query",
            "schema": {
              "type": "integer",
              "default": 100
            }
          },
          {
            "name": "offset",
            "in": "query",
            "schema": {
              "type": "integer",
              "default": 0
            }
          },
          {
            "name": "from_page_id",
            "in": "query",
            "schema": {
              "type": "integer",
              "format": "int64"
            }
          }
        ],
        "responses": {
          "200": {
            "description": "ok"
          }
        }
      }
    },
    "/v1/crawls/{id}/orphans": {
      "get": {
        "summary": "Pages with zero inbound internal links",
        "parameters": [
          {
            "name": "id",
            "in": "path",
            "required": true,
            "schema": {
              "type": "integer",
              "format": "int64"
            }
          }
        ],
        "responses": {
          "200": {
            "description": "ok"
          }
        }
      }
    },
    "/v1/pages/{id}": {
      "get": {
        "summary": "Fetch one stored page",
        "parameters": [
          {
            "name": "id",
            "in": "path",
            "required": true,
            "schema": {
              "type": "integer",
              "format": "int64"
            }
          },
          {
            "name": "format",
            "in": "query",
            "schema": {
              "type": "string",
              "enum": [
                "html",
                "markdown",
                "article",
                "both",
                "full",
                "signals"
              ],
              "default": "html"
            }
          }
        ],
        "responses": {
          "200": {
            "description": "ok"
          }
        }
      }
    },
    "/v1/scan": {
      "post": {
        "summary": "Synchronous single-URL scan \u2192 markdown + signals + AI bot policy + llms.txt",
        "requestBody": {
          "required": true,
          "content": {
            "application/json": {
              "schema": {
                "type": "object",
                "required": [
                  "url"
                ],
                "properties": {
                  "url": {
                    "type": "string",
                    "format": "uri"
                  },
                  "user_agent": {
                    "type": "string"
                  },
                  "max_age_seconds": {
                    "type": "integer",
                    "description": "Hydrate from a stored page row if one exists within this many seconds."
                  },
                  "cloud_mode": {
                    "type": "string",
                    "enum": [
                      "none",
                      "auto",
                      "unblocker",
                      "browser"
                    ],
                    "description": "Override the project's default routing mode."
                  },
                  "metadata_only": {
                    "type": "boolean",
                    "description": "Skip markdown/article extraction; return only metadata + signals."
                  },
                  "only_main_content": {
                    "type": "boolean",
                    "description": "Strip nav/footer/sidebar before markdown conversion."
                  },
                  "include_links": {
                    "type": "boolean",
                    "description": "Return the full anchor list with text and rel attributes."
                  },
                  "screenshot_inline": {
                    "type": "boolean",
                    "description": "Include a base64-encoded full-page screenshot (requires cloud)."
                  }
                }
              }
            }
          }
        },
        "responses": {
          "200": {
            "description": "ok"
          }
        }
      }
    },
    "/v1/scan/bulk": {
      "post": {
        "summary": "Parallel multi-URL scan (max 100 URLs per call)",
        "requestBody": {
          "required": true,
          "content": {
            "application/json": {
              "schema": {
                "type": "object",
                "required": [
                  "urls"
                ],
                "properties": {
                  "urls": {
                    "type": "array",
                    "items": {
                      "type": "string",
                      "format": "uri"
                    },
                    "maxItems": 100
                  },
                  "user_agent": {
                    "type": "string"
                  },
                  "max_age_seconds": {
                    "type": "integer"
                  },
                  "concurrency": {
                    "type": "integer",
                    "default": 8,
                    "maximum": 32
                  }
                }
              }
            }
          }
        },
        "responses": {
          "200": {
            "description": "ok"
          }
        }
      }
    },
    "/v1/cloud/scrape": {
      "post": {
        "summary": "Direct anti-bot scrape via cloud backend",
        "requestBody": {
          "required": true,
          "content": {
            "application/json": {
              "schema": {
                "type": "object",
                "required": [
                  "url"
                ],
                "properties": {
                  "url": {
                    "type": "string",
                    "format": "uri"
                  },
                  "return_format": {
                    "type": "string",
                    "enum": [
                      "markdown",
                      "raw",
                      "text",
                      "commonmark"
                    ],
                    "default": "markdown"
                  },
                  "chrome": {
                    "type": "boolean",
                    "default": false
                  }
                }
              }
            }
          }
        },
        "responses": {
          "200": {
            "description": "ok"
          }
        }
      }
    },
    "/v1/cloud/crawl": {
      "post": {
        "summary": "Multi-page anti-bot crawl via cloud backend",
        "requestBody": {
          "required": true,
          "content": {
            "application/json": {
              "schema": {
                "type": "object",
                "required": [
                  "url"
                ],
                "properties": {
                  "url": {
                    "type": "string",
                    "format": "uri"
                  },
                  "limit": {
                    "type": "integer",
                    "default": 20,
                    "maximum": 500
                  },
                  "return_format": {
                    "type": "string",
                    "default": "markdown"
                  },
                  "chrome": {
                    "type": "boolean",
                    "default": false
                  }
                }
              }
            }
          }
        },
        "responses": {
          "200": {
            "description": "ok"
          }
        }
      }
    },
    "/v1/cloud/search": {
      "post": {
        "summary": "SERP-style search across the open web",
        "requestBody": {
          "required": true,
          "content": {
            "application/json": {
              "schema": {
                "type": "object",
                "required": [
                  "query"
                ],
                "properties": {
                  "query": {
                    "type": "string"
                  },
                  "limit": {
                    "type": "integer",
                    "default": 10,
                    "maximum": 50
                  },
                  "return_format": {
                    "type": "string",
                    "default": "markdown"
                  }
                }
              }
            }
          }
        },
        "responses": {
          "200": {
            "description": "ok"
          }
        }
      }
    },
    "/v1/cloud/links": {
      "post": {
        "summary": "Fast link discovery on a domain (anti-bot)",
        "requestBody": {
          "required": true,
          "content": {
            "application/json": {
              "schema": {
                "type": "object",
                "required": [
                  "url"
                ],
                "properties": {
                  "url": {
                    "type": "string",
                    "format": "uri"
                  },
                  "limit": {
                    "type": "integer",
                    "default": 100,
                    "maximum": 5000
                  }
                }
              }
            }
          }
        },
        "responses": {
          "200": {
            "description": "ok"
          }
        }
      }
    },
    "/v1/cloud/screenshot": {
      "post": {
        "summary": "Full-page screenshot PNG",
        "requestBody": {
          "required": true,
          "content": {
            "application/json": {
              "schema": {
                "type": "object",
                "required": [
                  "url"
                ],
                "properties": {
                  "url": {
                    "type": "string",
                    "format": "uri"
                  }
                }
              }
            }
          }
        },
        "responses": {
          "200": {
            "description": "PNG bytes",
            "content": {
              "image/png": {
                "schema": {
                  "type": "string",
                  "format": "binary"
                }
              }
            }
          }
        }
      }
    },
    "/v1/cloud/balance": {
      "get": {
        "summary": "Remaining cloud-backend balance for the account",
        "responses": {
          "200": {
            "description": "ok"
          }
        }
      }
    },
    "/v1/usage": {
      "get": {
        "summary": "Current period usage + tier caps + retry guidance",
        "responses": {
          "200": {
            "description": "ok"
          }
        }
      }
    },
    "/v1/usage/history": {
      "get": {
        "summary": "Daily usage buckets",
        "parameters": [
          {
            "name": "days",
            "in": "query",
            "schema": {
              "type": "integer",
              "default": 30,
              "maximum": 365
            }
          }
        ],
        "responses": {
          "200": {
            "description": "ok"
          }
        }
      }
    },
    "/v1/keys": {
      "get": {
        "summary": "List API keys (active, expired, revoked)",
        "responses": {
          "200": {
            "description": "ok"
          }
        }
      }
    },
    "/v1/keys/rotate": {
      "post": {
        "summary": "Issue new key, set grace period on existing keys",
        "requestBody": {
          "content": {
            "application/json": {
              "schema": {
                "type": "object",
                "properties": {
                  "label": {
                    "type": "string"
                  },
                  "grace_seconds": {
                    "type": "integer",
                    "default": 86400
                  }
                }
              }
            }
          }
        },
        "responses": {
          "200": {
            "description": "new key \u2014 shown ONCE"
          }
        }
      }
    },
    "/v1/keys/{prefix}": {
      "delete": {
        "summary": "Revoke a key by its 12-char prefix",
        "parameters": [
          {
            "name": "prefix",
            "in": "path",
            "required": true,
            "schema": {
              "type": "string"
            }
          }
        ],
        "responses": {
          "200": {
            "description": "revoked"
          },
          "409": {
            "description": "would leave project with no active key"
          }
        }
      }
    },
    "/v1/webhook/secret": {
      "get": {
        "summary": "HMAC-SHA256 secret for verifying webhook signatures",
        "responses": {
          "200": {
            "description": "ok"
          }
        }
      }
    },
    "/v1/robots-policy": {
      "get": {
        "summary": "Parsed AI-bot policy + raw robots.txt + llms.txt for any host",
        "parameters": [
          {
            "name": "url",
            "in": "query",
            "required": true,
            "schema": {
              "type": "string",
              "format": "uri"
            }
          },
          {
            "name": "user_agent",
            "in": "query",
            "schema": {
              "type": "string"
            }
          }
        ],
        "responses": {
          "200": {
            "description": "ok"
          }
        }
      }
    },
    "/v1/health/cloud": {
      "get": {
        "summary": "Cloud backend balance + this project's last-24h cloud usage",
        "responses": {
          "200": {
            "description": "ok"
          }
        }
      }
    },
    "/v1/logs": {
      "get": {
        "summary": "Audit log \u2014 last N requests for this project",
        "parameters": [
          {
            "name": "limit",
            "in": "query",
            "schema": {
              "type": "integer",
              "default": 100,
              "maximum": 500
            }
          },
          {
            "name": "offset",
            "in": "query",
            "schema": {
              "type": "integer",
              "default": 0
            }
          },
          {
            "name": "status_min",
            "in": "query",
            "schema": {
              "type": "integer"
            }
          },
          {
            "name": "status_max",
            "in": "query",
            "schema": {
              "type": "integer"
            }
          }
        ],
        "responses": {
          "200": {
            "description": "ok"
          }
        }
      }
    },
    "/v1/llms-txt-build": {
      "post": {
        "summary": "Crawl a domain and emit a llms.txt file ready to publish",
        "requestBody": {
          "required": true,
          "content": {
            "application/json": {
              "schema": {
                "type": "object",
                "required": [
                  "url"
                ],
                "properties": {
                  "url": {
                    "type": "string",
                    "format": "uri"
                  },
                  "max_pages": {
                    "type": "integer",
                    "default": 30,
                    "maximum": 200
                  },
                  "site_name": {
                    "type": "string"
                  },
                  "summary": {
                    "type": "string"
                  },
                  "wait_seconds": {
                    "type": "integer",
                    "default": 60,
                    "maximum": 180
                  }
                }
              }
            }
          }
        },
        "responses": {
          "200": {
            "description": "llms.txt content",
            "content": {
              "text/plain": {
                "schema": {
                  "type": "string"
                }
              }
            }
          },
          "408": {
            "description": "Crawl exceeded wait_seconds \u2014 poll /v1/crawls/{run_id} to recover"
          }
        }
      }
    },
    "/v1/crawls/{old_id}/diff/{new_id}": {
      "get": {
        "summary": "Compare two crawl runs by content_hash",
        "parameters": [
          {
            "name": "old_id",
            "in": "path",
            "required": true,
            "schema": {
              "type": "integer",
              "format": "int64"
            }
          },
          {
            "name": "new_id",
            "in": "path",
            "required": true,
            "schema": {
              "type": "integer",
              "format": "int64"
            }
          }
        ],
        "responses": {
          "200": {
            "description": "diff result with added/removed/changed/unchanged"
          }
        }
      }
    },
    "/v1/crons": {
      "get": {
        "summary": "List active recurring monitors",
        "responses": {
          "200": {
            "description": "ok"
          }
        }
      }
    },
    "/v1/crons/{id}": {
      "delete": {
        "summary": "Delete a recurring monitor",
        "parameters": [
          {
            "name": "id",
            "in": "path",
            "required": true,
            "schema": {
              "type": "integer",
              "format": "int64"
            }
          }
        ],
        "responses": {
          "200": {
            "description": "deleted"
          },
          "404": {
            "description": "not found"
          }
        }
      }
    }
  }
}