{
  "object": "list",
  "data": [
    {
      "name": "OpenAI: GPT-3.5 Turbo",
      "description": "GPT-3.5 Turbo is OpenAI's fastest model. It can understand and generate natural language or code, and is optimized for chat and traditional completion tasks.\nTraining data up to Sep 2021.",
      "id": "gpt-3.5-turbo",
      "object": "model",
      "created": 1776283310,
      "owned_by": "openai",
      "tokens": 4000,
      "pricing": {
        "type": "per_million_tokens",
        "input": 0.5,
        "output": 1.5
      },
      "endpoints": [
        "/v1/chat/completions",
        "/v1/responses",
        "/v1/messages"
      ],
      "premium_model": false,
      "metadata": {
        "vision": false,
        "function_call": true,
        "web_search": false,
        "reasoning": false
      }
    },
    {
      "name": "OpenAI: GPT-3.5 Turbo 16k",
      "description": "The latest GPT-3.5 Turbo model with improved instruction following, JSON mode, reproducible outputs, parallel function calling, and more. Training data: up to Sep 2021.\nThis version has a higher accuracy at responding in requested formats and a fix for a bug which caused a text encoding issue for non-English language function calls.",
      "id": "gpt-3.5-turbo-16k",
      "object": "model",
      "created": 1776283310,
      "owned_by": "openai",
      "tokens": 16000,
      "pricing": {
        "type": "per_million_tokens",
        "input": 3,
        "output": 4
      },
      "endpoints": [
        "/v1/chat/completions",
        "/v1/responses",
        "/v1/messages"
      ],
      "premium_model": false,
      "metadata": {
        "vision": false,
        "function_call": true,
        "web_search": false,
        "reasoning": false
      }
    },
    {
      "name": "OpenAI: GPT-3.5 Turbo (older v1106)",
      "description": "GPT-3.5 Turbo is OpenAI's fastest model. It can understand and generate natural language or code, and is optimized for chat and traditional completion tasks.\nTraining data up to Sep 2021.",
      "id": "gpt-3.5-turbo-1106",
      "object": "model",
      "created": 1776283310,
      "owned_by": "openai",
      "tokens": 4000,
      "pricing": {
        "type": "per_million_tokens",
        "input": 1,
        "output": 2
      },
      "endpoints": [
        "/v1/chat/completions",
        "/v1/responses",
        "/v1/messages"
      ],
      "premium_model": false,
      "metadata": {
        "vision": false,
        "function_call": true,
        "web_search": false,
        "reasoning": false
      }
    },
    {
      "name": "OpenAI: GPT-3.5 Turbo (older v0125)",
      "description": "GPT-3.5 Turbo is OpenAI's fastest model. It can understand and generate natural language or code, and is optimized for chat and traditional completion tasks.\nTraining data up to Sep 2021.",
      "id": "gpt-3.5-turbo-0125",
      "object": "model",
      "created": 1776283310,
      "owned_by": "openai",
      "tokens": 4000,
      "pricing": {
        "type": "per_million_tokens",
        "input": 0.5,
        "output": 1.5
      },
      "endpoints": [
        "/v1/chat/completions",
        "/v1/responses",
        "/v1/messages"
      ],
      "premium_model": false,
      "metadata": {
        "vision": false,
        "function_call": true,
        "web_search": false,
        "reasoning": false
      }
    },
    {
      "name": "OpenAI: GPT-4 Turbo",
      "description": "The latest GPT-4 Turbo model with vision capabilities. Vision requests can now use JSON mode and function calling.\nTraining data: up to December 2023.",
      "id": "gpt-4-turbo",
      "object": "model",
      "created": 1776283310,
      "owned_by": "openai",
      "tokens": 128000,
      "pricing": {
        "type": "per_million_tokens",
        "input": 10,
        "output": 30
      },
      "endpoints": [
        "/v1/chat/completions",
        "/v1/responses",
        "/v1/messages"
      ],
      "premium_model": true,
      "metadata": {
        "vision": true,
        "function_call": true,
        "web_search": false,
        "reasoning": false
      }
    },
    {
      "name": "OpenAI: GPT-4o",
      "description": "GPT-4o (\"o\" for \"omni\") is OpenAI's latest AI model, supporting both text and image inputs with text outputs. It maintains the intelligence level of GPT-4 Turbo while being twice as fast and 50% more cost-effective. GPT-4o also offers improved performance in processing non-English languages and enhanced visual capabilities.",
      "id": "gpt-4o",
      "object": "model",
      "created": 1776283310,
      "owned_by": "openai",
      "tokens": 128000,
      "pricing": {
        "type": "per_million_tokens",
        "input": 2.5,
        "output": 10
      },
      "endpoints": [
        "/v1/chat/completions",
        "/v1/responses",
        "/v1/messages"
      ],
      "premium_model": false,
      "metadata": {
        "vision": true,
        "function_call": true,
        "web_search": false,
        "reasoning": false
      }
    },
    {
      "name": "OpenAI: GPT-4o (2024-05-13)",
      "description": "GPT-4o (\"o\" for \"omni\") is OpenAI's latest AI model, supporting both text and image inputs with text outputs. It maintains the intelligence level of GPT-4 Turbo while being twice as fast and 50% more cost-effective. GPT-4o also offers improved performance in processing non-English languages and enhanced visual capabilities.",
      "id": "gpt-4o-2024-05-13",
      "object": "model",
      "created": 1776283310,
      "owned_by": "openai",
      "tokens": 128000,
      "pricing": {
        "type": "per_million_tokens",
        "input": 2.5,
        "output": 10
      },
      "endpoints": [
        "/v1/chat/completions",
        "/v1/responses",
        "/v1/messages"
      ],
      "premium_model": false,
      "metadata": {
        "vision": true,
        "function_call": true,
        "web_search": false,
        "reasoning": false
      }
    },
    {
      "name": "OpenAI: GPT-4o (2024-08-06)",
      "description": "The 2024-08-06 version of GPT-4o offers improved performance in structured outputs, with the ability to supply a JSON schema in the respone_format.\nGPT-4o (\"o\" for \"omni\") is OpenAI's latest AI model, supporting both text and image inputs with text outputs. It maintains the intelligence level of GPT-4 Turbo while being twice as fast and 50% more cost-effective. GPT-4o also offers improved performance in processing non-English languages and enhanced visual capabilities.",
      "id": "gpt-4o-2024-08-06",
      "object": "model",
      "created": 1776283310,
      "owned_by": "openai",
      "tokens": 128000,
      "pricing": {
        "type": "per_million_tokens",
        "input": 2.5,
        "output": 10
      },
      "endpoints": [
        "/v1/chat/completions",
        "/v1/responses",
        "/v1/messages"
      ],
      "premium_model": false,
      "metadata": {
        "vision": true,
        "function_call": true,
        "web_search": false,
        "reasoning": false
      }
    },
    {
      "name": "OpenAI: GPT-4o (2024-11-20)",
      "description": "The 2024-11-20 version of GPT-4o offers a leveled-up creative writing ability with more natural, engaging, and tailored writing to improve relevance & readability. It’s also better at working with uploaded files, providing deeper insights & more thorough responses.\nGPT-4o (\"o\" for \"omni\") is OpenAI's latest AI model, supporting both text and image inputs with text outputs. It maintains the intelligence level of GPT-4 Turbo while being twice as fast and 50% more cost-effective. GPT-4o also offers improved performance in processing non-English languages and enhanced visual capabilities.",
      "id": "gpt-4o-2024-11-20",
      "object": "model",
      "created": 1776283310,
      "owned_by": "openai",
      "tokens": 128000,
      "pricing": {
        "type": "per_million_tokens",
        "input": 2.5,
        "output": 10
      },
      "endpoints": [
        "/v1/chat/completions",
        "/v1/responses",
        "/v1/messages"
      ],
      "premium_model": false,
      "metadata": {
        "vision": true,
        "function_call": true,
        "web_search": false,
        "reasoning": false
      }
    },
    {
      "name": "OpenAI: GPT-4o Search Preview",
      "description": "GPT-4o Search Previewis a specialized model for web search in Chat Completions. It is trained to understand and execute web search queries.",
      "id": "gpt-4o-search-preview",
      "object": "model",
      "created": 1776283310,
      "owned_by": "openai",
      "tokens": 128000,
      "pricing": {
        "type": "per_million_tokens",
        "input": 2.5,
        "output": 10
      },
      "endpoints": [
        "/v1/chat/completions",
        "/v1/responses",
        "/v1/messages"
      ],
      "premium_model": false,
      "metadata": {
        "vision": false,
        "function_call": true,
        "web_search": true,
        "reasoning": false
      }
    },
    {
      "name": "OpenAI: GPT-4o Search Preview (2025-03-11)",
      "description": "GPT-4o Search Preview is a specialized model for web search in Chat Completions. It is trained to understand and execute web search queries.",
      "id": "gpt-4o-search-preview-2025-03-11",
      "object": "model",
      "created": 1776283310,
      "owned_by": "openai",
      "tokens": 128000,
      "pricing": {
        "type": "per_million_tokens",
        "input": 2.5,
        "output": 10
      },
      "endpoints": [
        "/v1/chat/completions",
        "/v1/responses",
        "/v1/messages"
      ],
      "premium_model": false,
      "metadata": {
        "vision": false,
        "function_call": true,
        "web_search": true,
        "reasoning": false
      }
    },
    {
      "name": "OpenAI: GPT-4o-mini",
      "description": "GPT-4o mini is OpenAI's newest model after GPT-4 Omni, supporting both text and image inputs with text outputs.\nAs their most advanced small model, it is many multiples more affordable than other recent frontier models, and more than 60% cheaper than GPT-3.5 Turbo. It maintains SOTA intelligence, while being significantly more cost-effective.",
      "id": "gpt-4o-mini",
      "object": "model",
      "created": 1776283310,
      "owned_by": "openai",
      "tokens": 128000,
      "pricing": {
        "type": "per_million_tokens",
        "input": 0.15,
        "output": 0.6
      },
      "endpoints": [
        "/v1/chat/completions",
        "/v1/responses",
        "/v1/messages"
      ],
      "premium_model": false,
      "metadata": {
        "vision": true,
        "function_call": true,
        "web_search": false,
        "reasoning": false
      }
    },
    {
      "name": "OpenAI: GPT-4o-mini (2024-07-18)",
      "description": "GPT-4o mini is OpenAI's newest model after GPT-4 Omni, supporting both text and image inputs with text outputs.\nAs their most advanced small model, it is many multiples more affordable than other recent frontier models, and more than 60% cheaper than GPT-3.5 Turbo. It maintains SOTA intelligence, while being significantly more cost-effective.",
      "id": "gpt-4o-mini-2024-07-18",
      "object": "model",
      "created": 1776283310,
      "owned_by": "openai",
      "tokens": 128000,
      "pricing": {
        "type": "per_million_tokens",
        "input": 0.15,
        "output": 0.6
      },
      "endpoints": [
        "/v1/chat/completions",
        "/v1/responses",
        "/v1/messages"
      ],
      "premium_model": false,
      "metadata": {
        "vision": true,
        "function_call": true,
        "web_search": false,
        "reasoning": false
      }
    },
    {
      "name": "OpenAI: GPT-4o-mini Search Preview",
      "description": "GPT-4o mini Search Preview is a specialized model for web search in Chat Completions. It is trained to understand and execute web search queries.",
      "id": "gpt-4o-mini-search-preview",
      "object": "model",
      "created": 1776283310,
      "owned_by": "openai",
      "tokens": 128000,
      "pricing": {
        "type": "per_million_tokens",
        "input": 0.15,
        "output": 0.6
      },
      "endpoints": [
        "/v1/chat/completions",
        "/v1/responses",
        "/v1/messages"
      ],
      "premium_model": false,
      "metadata": {
        "vision": false,
        "function_call": true,
        "web_search": true,
        "reasoning": false
      }
    },
    {
      "name": "OpenAI: GPT-4o-mini Search Preview (2025-03-11)",
      "description": "GPT-4o mini Search Preview is a specialized model for web search in Chat Completions. It is trained to understand and execute web search queries.",
      "id": "gpt-4o-mini-search-preview-2025-03-11",
      "object": "model",
      "created": 1776283310,
      "owned_by": "openai",
      "tokens": 128000,
      "pricing": {
        "type": "per_million_tokens",
        "input": 0.15,
        "output": 0.6
      },
      "endpoints": [
        "/v1/chat/completions",
        "/v1/responses",
        "/v1/messages"
      ],
      "premium_model": false,
      "metadata": {
        "vision": false,
        "function_call": true,
        "web_search": true,
        "reasoning": false
      }
    },
    {
      "name": "OpenAI: GPT-4.1 Nano",
      "description": "For tasks that demand low latency, GPT‑4.1 nano is the fastest and cheapest model in the GPT-4.1 series. It delivers exceptional performance at a small size with its 1 million token context window, and scores 80.1% on MMLU, 50.3% on GPQA, and 9.8% on Aider polyglot coding – even higher than GPT‑4o mini. It’s ideal for tasks like classification or autocompletion.",
      "id": "gpt-4.1-nano",
      "object": "model",
      "created": 1776283310,
      "owned_by": "openai",
      "tokens": 1000000,
      "pricing": {
        "type": "per_million_tokens",
        "input": 0.1,
        "output": 0.4
      },
      "endpoints": [
        "/v1/chat/completions",
        "/v1/responses",
        "/v1/messages"
      ],
      "premium_model": false,
      "metadata": {
        "vision": true,
        "function_call": true,
        "web_search": false,
        "reasoning": false
      }
    },
    {
      "name": "OpenAI: GPT-4.1 Nano (2025-04-14)",
      "description": "For tasks that demand low latency, GPT‑4.1 nano is the fastest and cheapest model in the GPT-4.1 series. It delivers exceptional performance at a small size with its 1 million token context window, and scores 80.1% on MMLU, 50.3% on GPQA, and 9.8% on Aider polyglot coding – even higher than GPT‑4o mini. It’s ideal for tasks like classification or autocompletion.",
      "id": "gpt-4.1-nano-2025-04-14",
      "object": "model",
      "created": 1776283310,
      "owned_by": "openai",
      "tokens": 1000000,
      "pricing": {
        "type": "per_million_tokens",
        "input": 0.1,
        "output": 0.4
      },
      "endpoints": [
        "/v1/chat/completions",
        "/v1/responses",
        "/v1/messages"
      ],
      "premium_model": false,
      "metadata": {
        "vision": true,
        "function_call": true,
        "web_search": false,
        "reasoning": false
      }
    },
    {
      "name": "OpenAI: GPT-4.1 Mini",
      "description": "GPT-4.1 Mini is a mid-sized model delivering performance competitive with GPT-4o at substantially lower latency and cost. It retains a 1 million token context window and scores 45.1% on hard instruction evals, 35.8% on MultiChallenge, and 84.1% on IFEval. Mini also shows strong coding ability (e.g., 31.6% on Aider’s polyglot diff benchmark) and vision understanding, making it suitable for interactive applications with tight performance constraints.",
      "id": "gpt-4.1-mini",
      "object": "model",
      "created": 1776283310,
      "owned_by": "openai",
      "tokens": 1000000,
      "pricing": {
        "type": "per_million_tokens",
        "input": 0.4,
        "output": 1.6
      },
      "endpoints": [
        "/v1/chat/completions",
        "/v1/responses",
        "/v1/messages"
      ],
      "premium_model": false,
      "metadata": {
        "vision": true,
        "function_call": true,
        "web_search": false,
        "reasoning": false
      }
    },
    {
      "name": "OpenAI: GPT-4.1 Mini (2025-04-14)",
      "description": "GPT-4.1 Mini is a mid-sized model delivering performance competitive with GPT-4o at substantially lower latency and cost. It retains a 1 million token context window and scores 45.1% on hard instruction evals, 35.8% on MultiChallenge, and 84.1% on IFEval. Mini also shows strong coding ability (e.g., 31.6% on Aider’s polyglot diff benchmark) and vision understanding, making it suitable for interactive applications with tight performance constraints.",
      "id": "gpt-4.1-mini-2025-04-14",
      "object": "model",
      "created": 1776283310,
      "owned_by": "openai",
      "tokens": 1000000,
      "pricing": {
        "type": "per_million_tokens",
        "input": 0.4,
        "output": 1.6
      },
      "endpoints": [
        "/v1/chat/completions",
        "/v1/responses",
        "/v1/messages"
      ],
      "premium_model": false,
      "metadata": {
        "vision": true,
        "function_call": true,
        "web_search": false,
        "reasoning": false
      }
    },
    {
      "name": "OpenAI: GPT-4.1",
      "description": "GPT-4.1 is a flagship large language model optimized for advanced instruction following, real-world software engineering, and long-context reasoning. It supports a 1 million token context window and outperforms GPT-4o and GPT-4.5 across coding (54.6% SWE-bench Verified), instruction compliance (87.4% IFEval), and multimodal understanding benchmarks. It is tuned for precise code diffs, agent reliability, and high recall in large document contexts, making it ideal for agents, IDE tooling, and enterprise knowledge retrieval.",
      "id": "gpt-4.1",
      "object": "model",
      "created": 1776283310,
      "owned_by": "openai",
      "tokens": 1000000,
      "pricing": {
        "type": "per_million_tokens",
        "input": 2,
        "output": 8
      },
      "endpoints": [
        "/v1/chat/completions",
        "/v1/responses",
        "/v1/messages"
      ],
      "premium_model": false,
      "metadata": {
        "vision": true,
        "function_call": true,
        "web_search": false,
        "reasoning": false
      }
    },
    {
      "name": "OpenAI: GPT-4.1 (2025-04-14)",
      "description": "GPT-4.1 is a flagship large language model optimized for advanced instruction following, real-world software engineering, and long-context reasoning. It supports a 1 million token context window and outperforms GPT-4o and GPT-4.5 across coding (54.6% SWE-bench Verified), instruction compliance (87.4% IFEval), and multimodal understanding benchmarks. It is tuned for precise code diffs, agent reliability, and high recall in large document contexts, making it ideal for agents, IDE tooling, and enterprise knowledge retrieval.",
      "id": "gpt-4.1-2025-04-14",
      "object": "model",
      "created": 1776283310,
      "owned_by": "openai",
      "tokens": 1000000,
      "pricing": {
        "type": "per_million_tokens",
        "input": 2,
        "output": 8
      },
      "endpoints": [
        "/v1/chat/completions",
        "/v1/responses",
        "/v1/messages"
      ],
      "premium_model": false,
      "metadata": {
        "vision": true,
        "function_call": true,
        "web_search": false,
        "reasoning": false
      }
    },
    {
      "name": "OpenAI: GPT-5 Nano (free)",
      "description": "GPT-5-Nano is the smallest and fastest variant in the GPT-5 system, optimized for developer tools, rapid interactions, and ultra-low latency environments. While limited in reasoning depth compared to its larger counterparts, it retains key instruction-following and safety features. It is the successor to GPT-4.1-nano and offers a lightweight option for cost-sensitive or real-time applications.",
      "id": "gpt-5-nano:free",
      "object": "model",
      "created": 1776283310,
      "owned_by": "openai",
      "tokens": 64000,
      "pricing": {
        "type": "per_million_tokens",
        "input": 0,
        "output": 0
      },
      "endpoints": [
        "/v1/chat/completions",
        "/v1/responses",
        "/v1/messages"
      ],
      "premium_model": false,
      "metadata": {
        "vision": true,
        "function_call": true,
        "web_search": false,
        "reasoning": true
      }
    },
    {
      "name": "OpenAI: GPT-5 Nano",
      "description": "GPT-5-Nano is the smallest and fastest variant in the GPT-5 system, optimized for developer tools, rapid interactions, and ultra-low latency environments. While limited in reasoning depth compared to its larger counterparts, it retains key instruction-following and safety features. It is the successor to GPT-4.1-nano and offers a lightweight option for cost-sensitive or real-time applications.",
      "id": "gpt-5-nano",
      "object": "model",
      "created": 1776283310,
      "owned_by": "openai",
      "tokens": 400000,
      "pricing": {
        "type": "per_million_tokens",
        "input": 0.05,
        "output": 0.4
      },
      "endpoints": [
        "/v1/chat/completions",
        "/v1/responses",
        "/v1/messages"
      ],
      "premium_model": false,
      "metadata": {
        "vision": true,
        "function_call": true,
        "web_search": false,
        "reasoning": true,
        "supported_reasoning_efforts": [
          "minimal",
          "low",
          "medium",
          "high"
        ]
      }
    },
    {
      "name": "OpenAI: GPT-5 Nano Minimal",
      "description": "OpenAI gpt-5-nano-minimal is the same model as gpt-5-nano with reasoning_effort set to minimal. GPT-5-Nano is the smallest and fastest variant in the GPT-5 system, optimized for developer tools, rapid interactions, and ultra-low latency environments. While limited in reasoning depth compared to its larger counterparts, it retains key instruction-following and safety features. It is the successor to GPT-4.1-nano and offers a lightweight option for cost-sensitive or real-time applications.",
      "id": "gpt-5-nano-minimal",
      "object": "model",
      "created": 1776283310,
      "owned_by": "openai",
      "tokens": 400000,
      "pricing": {
        "type": "per_million_tokens",
        "input": 0.05,
        "output": 0.4
      },
      "endpoints": [
        "/v1/chat/completions",
        "/v1/responses",
        "/v1/messages"
      ],
      "premium_model": false,
      "metadata": {
        "vision": true,
        "function_call": true,
        "web_search": false,
        "reasoning": true
      }
    },
    {
      "name": "OpenAI: GPT-5 Nano Low",
      "description": "OpenAI gpt-5-nano-low is the same model as gpt-5-nano with reasoning_effort set to low. GPT-5-Nano is the smallest and fastest variant in the GPT-5 system, optimized for developer tools, rapid interactions, and ultra-low latency environments. While limited in reasoning depth compared to its larger counterparts, it retains key instruction-following and safety features. It is the successor to GPT-4.1-nano and offers a lightweight option for cost-sensitive or real-time applications.",
      "id": "gpt-5-nano-low",
      "object": "model",
      "created": 1776283310,
      "owned_by": "openai",
      "tokens": 400000,
      "pricing": {
        "type": "per_million_tokens",
        "input": 0.05,
        "output": 0.4
      },
      "endpoints": [
        "/v1/chat/completions",
        "/v1/responses",
        "/v1/messages"
      ],
      "premium_model": false,
      "metadata": {
        "vision": true,
        "function_call": true,
        "web_search": false,
        "reasoning": true
      }
    },
    {
      "name": "OpenAI: GPT-5 Nano High",
      "description": "OpenAI gpt-5-nano-high is the same model as gpt-5-nano with reasoning_effort set to high. GPT-5-Nano is the smallest and fastest variant in the GPT-5 system, optimized for developer tools, rapid interactions, and ultra-low latency environments. While limited in reasoning depth compared to its larger counterparts, it retains key instruction-following and safety features. It is the successor to GPT-4.1-nano and offers a lightweight option for cost-sensitive or real-time applications.",
      "id": "gpt-5-nano-high",
      "object": "model",
      "created": 1776283310,
      "owned_by": "openai",
      "tokens": 400000,
      "pricing": {
        "type": "per_million_tokens",
        "input": 0.05,
        "output": 0.8
      },
      "endpoints": [
        "/v1/chat/completions",
        "/v1/responses",
        "/v1/messages"
      ],
      "premium_model": false,
      "metadata": {
        "vision": true,
        "function_call": true,
        "web_search": false,
        "reasoning": true
      }
    },
    {
      "name": "OpenAI: GPT-5 Mini (free)",
      "description": "GPT-5 Mini is a compact version of GPT-5, designed to handle lighter-weight reasoning tasks. It provides the same instruction-following and safety-tuning benefits as GPT-5, but with reduced latency and cost. GPT-5 Mini is the successor to OpenAI's o4-mini model.",
      "id": "gpt-5-mini:free",
      "object": "model",
      "created": 1776283310,
      "owned_by": "openai",
      "tokens": 64000,
      "pricing": {
        "type": "per_million_tokens",
        "input": 0,
        "output": 0
      },
      "endpoints": [
        "/v1/chat/completions",
        "/v1/responses",
        "/v1/messages"
      ],
      "premium_model": false,
      "metadata": {
        "vision": true,
        "function_call": true,
        "web_search": false,
        "reasoning": true
      }
    },
    {
      "name": "OpenAI: GPT-5 Mini",
      "description": "GPT-5 Mini is a compact version of GPT-5, designed to handle lighter-weight reasoning tasks. It provides the same instruction-following and safety-tuning benefits as GPT-5, but with reduced latency and cost. GPT-5 Mini is the successor to OpenAI's o4-mini model.",
      "id": "gpt-5-mini",
      "object": "model",
      "created": 1776283310,
      "owned_by": "openai",
      "tokens": 400000,
      "pricing": {
        "type": "per_million_tokens",
        "input": 0.25,
        "output": 2
      },
      "endpoints": [
        "/v1/chat/completions",
        "/v1/responses",
        "/v1/messages"
      ],
      "premium_model": false,
      "metadata": {
        "vision": true,
        "function_call": true,
        "web_search": false,
        "reasoning": true,
        "supported_reasoning_efforts": [
          "minimal",
          "low",
          "medium",
          "high"
        ]
      }
    },
    {
      "name": "OpenAI: GPT-5 Mini Minimal",
      "description": "OpenAI gpt-5-mini-minimal is the same model as gpt-5-mini with reasoning_effort set to minimal. GPT-5 Mini is a compact version of GPT-5, designed to handle lighter-weight reasoning tasks. It provides the same instruction-following and safety-tuning benefits as GPT-5, but with reduced latency and cost. GPT-5 Mini is the successor to OpenAI's o4-mini model.",
      "id": "gpt-5-mini-minimal",
      "object": "model",
      "created": 1776283310,
      "owned_by": "openai",
      "tokens": 400000,
      "pricing": {
        "type": "per_million_tokens",
        "input": 0.25,
        "output": 2
      },
      "endpoints": [
        "/v1/chat/completions",
        "/v1/responses",
        "/v1/messages"
      ],
      "premium_model": false,
      "metadata": {
        "vision": true,
        "function_call": true,
        "web_search": false,
        "reasoning": true
      }
    },
    {
      "name": "OpenAI: GPT-5 Mini Low",
      "description": "OpenAI gpt-5-mini-low is the same model as gpt-5-mini with reasoning_effort set to low. GPT-5 Mini is a compact version of GPT-5, designed to handle lighter-weight reasoning tasks. It provides the same instruction-following and safety-tuning benefits as GPT-5, but with reduced latency and cost. GPT-5 Mini is the successor to OpenAI's o4-mini model.",
      "id": "gpt-5-mini-low",
      "object": "model",
      "created": 1776283310,
      "owned_by": "openai",
      "tokens": 400000,
      "pricing": {
        "type": "per_million_tokens",
        "input": 0.25,
        "output": 2
      },
      "endpoints": [
        "/v1/chat/completions",
        "/v1/responses",
        "/v1/messages"
      ],
      "premium_model": false,
      "metadata": {
        "vision": true,
        "function_call": true,
        "web_search": false,
        "reasoning": true
      }
    },
    {
      "name": "OpenAI: GPT-5 Mini High",
      "description": "OpenAI gpt-5-mini-high is the same model as gpt-5-mini with reasoning_effort set to high. GPT-5 Mini is a compact version of GPT-5, designed to handle lighter-weight reasoning tasks. It provides the same instruction-following and safety-tuning benefits as GPT-5, but with reduced latency and cost. GPT-5 Mini is the successor to OpenAI's o4-mini model.",
      "id": "gpt-5-mini-high",
      "object": "model",
      "created": 1776283310,
      "owned_by": "openai",
      "tokens": 400000,
      "pricing": {
        "type": "per_million_tokens",
        "input": 0.25,
        "output": 4
      },
      "endpoints": [
        "/v1/chat/completions",
        "/v1/responses",
        "/v1/messages"
      ],
      "premium_model": false,
      "metadata": {
        "vision": true,
        "function_call": true,
        "web_search": false,
        "reasoning": true
      }
    },
    {
      "name": "OpenAI: GPT-5",
      "description": "GPT-5 is OpenAI’s most advanced model, offering major improvements in reasoning, code quality, and user experience. It is optimized for complex tasks that require step-by-step reasoning, instruction following, and accuracy in high-stakes use cases. It supports test-time routing features and advanced prompt understanding, including user-specified intent like \"think hard about this.\" Improvements include reductions in hallucination, sycophancy, and better performance in coding, writing, and health-related tasks.",
      "id": "gpt-5",
      "object": "model",
      "created": 1776283310,
      "owned_by": "openai",
      "tokens": 400000,
      "pricing": {
        "type": "per_million_tokens",
        "input": 1.25,
        "output": 10
      },
      "endpoints": [
        "/v1/chat/completions",
        "/v1/responses",
        "/v1/messages"
      ],
      "premium_model": true,
      "metadata": {
        "vision": true,
        "function_call": true,
        "web_search": false,
        "reasoning": true,
        "supported_reasoning_efforts": [
          "minimal",
          "low",
          "medium",
          "high"
        ]
      }
    },
    {
      "name": "OpenAI: GPT-5 Minimal",
      "description": "OpenAI gpt-5-minimal is the same model as gpt-5 with reasoning_effort set to minimal. GPT-5 is OpenAI’s most advanced model, offering major improvements in reasoning, code quality, and user experience. It is optimized for complex tasks that require step-by-step reasoning, instruction following, and accuracy in high-stakes use cases. It supports test-time routing features and advanced prompt understanding, including user-specified intent like \"think hard about this.\" Improvements include reductions in hallucination, sycophancy, and better performance in coding, writing, and health-related tasks.",
      "id": "gpt-5-minimal",
      "object": "model",
      "created": 1776283310,
      "owned_by": "openai",
      "tokens": 400000,
      "pricing": {
        "type": "per_million_tokens",
        "input": 1.25,
        "output": 10
      },
      "endpoints": [
        "/v1/chat/completions",
        "/v1/responses",
        "/v1/messages"
      ],
      "premium_model": true,
      "metadata": {
        "vision": true,
        "function_call": true,
        "web_search": false,
        "reasoning": true
      }
    },
    {
      "name": "OpenAI: GPT-5 Low",
      "description": "OpenAI gpt-5-low is the same model as gpt-5 with reasoning_effort set to low. GPT-5 is OpenAI’s most advanced model, offering major improvements in reasoning, code quality, and user experience. It is optimized for complex tasks that require step-by-step reasoning, instruction following, and accuracy in high-stakes use cases. It supports test-time routing features and advanced prompt understanding, including user-specified intent like \"think hard about this.\" Improvements include reductions in hallucination, sycophancy, and better performance in coding, writing, and health-related tasks.",
      "id": "gpt-5-low",
      "object": "model",
      "created": 1776283310,
      "owned_by": "openai",
      "tokens": 400000,
      "pricing": {
        "type": "per_million_tokens",
        "input": 1.25,
        "output": 10
      },
      "endpoints": [
        "/v1/chat/completions",
        "/v1/responses",
        "/v1/messages"
      ],
      "premium_model": true,
      "metadata": {
        "vision": true,
        "function_call": true,
        "web_search": false,
        "reasoning": true
      }
    },
    {
      "name": "OpenAI: GPT-5 High",
      "description": "OpenAI gpt-5-high is the same model as gpt-5 with reasoning_effort set to high. GPT-5 is OpenAI’s most advanced model, offering major improvements in reasoning, code quality, and user experience. It is optimized for complex tasks that require step-by-step reasoning, instruction following, and accuracy in high-stakes use cases. It supports test-time routing features and advanced prompt understanding, including user-specified intent like \"think hard about this.\" Improvements include reductions in hallucination, sycophancy, and better performance in coding, writing, and health-related tasks.",
      "id": "gpt-5-high",
      "object": "model",
      "created": 1776283310,
      "owned_by": "openai",
      "tokens": 400000,
      "pricing": {
        "type": "per_million_tokens",
        "input": 1.25,
        "output": 20
      },
      "endpoints": [
        "/v1/chat/completions",
        "/v1/responses",
        "/v1/messages"
      ],
      "premium_model": true,
      "metadata": {
        "vision": true,
        "function_call": true,
        "web_search": false,
        "reasoning": true
      }
    },
    {
      "name": "OpenAI: GPT-5 Chat",
      "description": "GPT-5 Chat is designed for advanced, natural, multimodal, and context-aware conversations for enterprise applications.",
      "id": "gpt-5-chat-latest",
      "object": "model",
      "created": 1776283310,
      "owned_by": "openai",
      "tokens": 128000,
      "pricing": {
        "type": "per_million_tokens",
        "input": 1.25,
        "output": 10
      },
      "endpoints": [
        "/v1/chat/completions",
        "/v1/responses",
        "/v1/messages"
      ],
      "premium_model": true,
      "metadata": {
        "vision": true,
        "function_call": true,
        "web_search": false,
        "reasoning": false
      }
    },
    {
      "name": "OpenAI: GPT-5 Codex",
      "description": "GPT-5-Codex is a specialized version of GPT-5 optimized for software engineering and coding workflows. It is designed for both interactive development sessions and long, independent execution of complex engineering tasks. The model supports building projects from scratch, feature development, debugging, large-scale refactoring, and code review. Compared to GPT-5, Codex is more steerable, adheres closely to developer instructions, and produces cleaner, higher-quality code outputs.",
      "id": "gpt-5-codex",
      "object": "model",
      "created": 1776283310,
      "owned_by": "openai",
      "tokens": 400000,
      "pricing": {
        "type": "per_million_tokens",
        "input": 1.25,
        "output": 10
      },
      "endpoints": [
        "/v1/chat/completions",
        "/v1/responses",
        "/v1/messages"
      ],
      "premium_model": true,
      "metadata": {
        "vision": true,
        "function_call": true,
        "web_search": false,
        "reasoning": true,
        "supported_reasoning_efforts": [
          "low",
          "medium",
          "high"
        ]
      }
    },
    {
      "name": "OpenAI: GPT-5 Codex Low",
      "description": "OpenAI gpt-5-codex-low is the same model as gpt-5-codex with reasoning_effort set to low. GPT-5-Codex is a specialized version of GPT-5 optimized for software engineering and coding workflows. It is designed for both interactive development sessions and long, independent execution of complex engineering tasks. The model supports building projects from scratch, feature development, debugging, large-scale refactoring, and code review. Compared to GPT-5, Codex is more steerable, adheres closely to developer instructions, and produces cleaner, higher-quality code outputs.",
      "id": "gpt-5-codex-low",
      "object": "model",
      "created": 1776283310,
      "owned_by": "openai",
      "tokens": 400000,
      "pricing": {
        "type": "per_million_tokens",
        "input": 1.25,
        "output": 10
      },
      "endpoints": [
        "/v1/chat/completions",
        "/v1/responses",
        "/v1/messages"
      ],
      "premium_model": true,
      "metadata": {
        "vision": true,
        "function_call": true,
        "web_search": false,
        "reasoning": true
      }
    },
    {
      "name": "OpenAI: GPT-5 Codex High",
      "description": "OpenAI gpt-5-codex-high is the same model as gpt-5-codex with reasoning_effort set to high. GPT-5-Codex is a specialized version of GPT-5 optimized for software engineering and coding workflows. It is designed for both interactive development sessions and long, independent execution of complex engineering tasks. The model supports building projects from scratch, feature development, debugging, large-scale refactoring, and code review. Compared to GPT-5, Codex is more steerable, adheres closely to developer instructions, and produces cleaner, higher-quality code outputs.",
      "id": "gpt-5-codex-high",
      "object": "model",
      "created": 1776283310,
      "owned_by": "openai",
      "tokens": 400000,
      "pricing": {
        "type": "per_million_tokens",
        "input": 1.25,
        "output": 20
      },
      "endpoints": [
        "/v1/chat/completions",
        "/v1/responses",
        "/v1/messages"
      ],
      "premium_model": true,
      "metadata": {
        "vision": true,
        "function_call": true,
        "web_search": false,
        "reasoning": true
      }
    },
    {
      "name": "OpenAI: GPT-5.1",
      "description": "GPT-5.1 is the latest frontier-grade model in the GPT-5 series, offering stronger general-purpose reasoning, improved instruction adherence, and a more natural conversational style compared to GPT-5. It uses adaptive reasoning to allocate computation dynamically, responding quickly to simple queries while spending more depth on complex tasks. The model produces clearer, more grounded explanations with reduced jargon, making it easier to follow even on technical or multi-step problems.\nBuilt for broad task coverage, GPT-5.1 delivers consistent gains across math, coding, and structured analysis workloads, with more coherent long-form answers and improved tool-use reliability. It also features refined conversational alignment, enabling warmer, more intuitive responses without compromising precision. GPT-5.1 serves as the primary full-capability successor to GPT-5.",
      "id": "gpt-5.1",
      "object": "model",
      "created": 1776283310,
      "owned_by": "openai",
      "tokens": 400000,
      "pricing": {
        "type": "per_million_tokens",
        "input": 1.25,
        "output": 10
      },
      "endpoints": [
        "/v1/chat/completions",
        "/v1/responses",
        "/v1/messages"
      ],
      "premium_model": true,
      "metadata": {
        "vision": true,
        "function_call": true,
        "web_search": false,
        "reasoning": true,
        "supported_reasoning_efforts": [
          "none",
          "low",
          "medium",
          "high"
        ]
      }
    },
    {
      "name": "OpenAI: GPT-5.1 None",
      "description": "OpenAI gpt-5.1-none is the same model as gpt-5.1 with reasoning_effort set to none. GPT-5.1 is the latest frontier-grade model in the GPT-5 series, offering stronger general-purpose reasoning, improved instruction adherence, and a more natural conversational style compared to GPT-5. It uses adaptive reasoning to allocate computation dynamically, responding quickly to simple queries while spending more depth on complex tasks. The model produces clearer, more grounded explanations with reduced jargon, making it easier to follow even on technical or multi-step problems.\nBuilt for broad task coverage, GPT-5.1 delivers consistent gains across math, coding, and structured analysis workloads, with more coherent long-form answers and improved tool-use reliability. It also features refined conversational alignment, enabling warmer, more intuitive responses without compromising precision. GPT-5.1 serves as the primary full-capability successor to GPT-5.",
      "id": "gpt-5.1-none",
      "object": "model",
      "created": 1776283310,
      "owned_by": "openai",
      "tokens": 400000,
      "pricing": {
        "type": "per_million_tokens",
        "input": 1.25,
        "output": 10
      },
      "endpoints": [
        "/v1/chat/completions",
        "/v1/responses",
        "/v1/messages"
      ],
      "premium_model": true,
      "metadata": {
        "vision": true,
        "function_call": true,
        "web_search": false,
        "reasoning": false
      }
    },
    {
      "name": "OpenAI: GPT-5.1 Low",
      "description": "OpenAI gpt-5.1-low is the same model as gpt-5.1 with reasoning_effort set to low. GPT-5.1 is the latest frontier-grade model in the GPT-5 series, offering stronger general-purpose reasoning, improved instruction adherence, and a more natural conversational style compared to GPT-5. It uses adaptive reasoning to allocate computation dynamically, responding quickly to simple queries while spending more depth on complex tasks. The model produces clearer, more grounded explanations with reduced jargon, making it easier to follow even on technical or multi-step problems.\nBuilt for broad task coverage, GPT-5.1 delivers consistent gains across math, coding, and structured analysis workloads, with more coherent long-form answers and improved tool-use reliability. It also features refined conversational alignment, enabling warmer, more intuitive responses without compromising precision. GPT-5.1 serves as the primary full-capability successor to GPT-5.",
      "id": "gpt-5.1-low",
      "object": "model",
      "created": 1776283310,
      "owned_by": "openai",
      "tokens": 400000,
      "pricing": {
        "type": "per_million_tokens",
        "input": 1.25,
        "output": 10
      },
      "endpoints": [
        "/v1/chat/completions",
        "/v1/responses",
        "/v1/messages"
      ],
      "premium_model": true,
      "metadata": {
        "vision": true,
        "function_call": true,
        "web_search": false,
        "reasoning": true
      }
    },
    {
      "name": "OpenAI: GPT-5.1 High",
      "description": "OpenAI gpt-5.1-high is the same model as gpt-5.1 with reasoning_effort set to high. GPT-5.1 is the latest frontier-grade model in the GPT-5 series, offering stronger general-purpose reasoning, improved instruction adherence, and a more natural conversational style compared to GPT-5. It uses adaptive reasoning to allocate computation dynamically, responding quickly to simple queries while spending more depth on complex tasks. The model produces clearer, more grounded explanations with reduced jargon, making it easier to follow even on technical or multi-step problems.\nBuilt for broad task coverage, GPT-5.1 delivers consistent gains across math, coding, and structured analysis workloads, with more coherent long-form answers and improved tool-use reliability. It also features refined conversational alignment, enabling warmer, more intuitive responses without compromising precision. GPT-5.1 serves as the primary full-capability successor to GPT-5.",
      "id": "gpt-5.1-high",
      "object": "model",
      "created": 1776283310,
      "owned_by": "openai",
      "tokens": 400000,
      "pricing": {
        "type": "per_million_tokens",
        "input": 1.25,
        "output": 20
      },
      "endpoints": [
        "/v1/chat/completions",
        "/v1/responses",
        "/v1/messages"
      ],
      "premium_model": true,
      "metadata": {
        "vision": true,
        "function_call": true,
        "web_search": false,
        "reasoning": true
      }
    },
    {
      "name": "OpenAI: GPT-5.1 Chat",
      "description": "GPT-5.1 Chat (AKA Instant is the fast, lightweight member of the 5.1 family, optimized for low-latency chat while retaining strong general intelligence. It uses adaptive reasoning to selectively “think” on harder queries, improving accuracy on math, coding, and multi-step tasks without slowing down typical conversations. The model is warmer and more conversational by default, with better instruction following and more stable short-form reasoning. GPT-5.1 Chat is designed for high-throughput, interactive workloads where responsiveness and consistency matter more than deep deliberation.",
      "id": "gpt-5.1-chat-latest",
      "object": "model",
      "created": 1776283310,
      "owned_by": "openai",
      "tokens": 128000,
      "pricing": {
        "type": "per_million_tokens",
        "input": 1.25,
        "output": 10
      },
      "endpoints": [
        "/v1/chat/completions",
        "/v1/responses",
        "/v1/messages"
      ],
      "premium_model": true,
      "metadata": {
        "vision": true,
        "function_call": true,
        "web_search": false,
        "reasoning": false
      }
    },
    {
      "name": "OpenAI: GPT-5.1 Codex Mini",
      "description": "GPT-5.1-Codex-Mini is a smaller and faster version of GPT-5.1-Codex. It is designed for coding and software engineering tasks. It is optimized for speed and efficiency, but still retains the strong general intelligence of GPT-5.1.",
      "id": "gpt-5.1-codex-mini",
      "object": "model",
      "created": 1776283310,
      "owned_by": "openai",
      "tokens": 400000,
      "pricing": {
        "type": "per_million_tokens",
        "input": 0.25,
        "output": 2
      },
      "endpoints": [
        "/v1/chat/completions",
        "/v1/responses",
        "/v1/messages"
      ],
      "premium_model": false,
      "metadata": {
        "vision": true,
        "function_call": true,
        "web_search": false,
        "reasoning": true,
        "supported_reasoning_efforts": [
          "low",
          "medium",
          "high"
        ]
      }
    },
    {
      "name": "OpenAI: GPT-5.1 Codex Mini Low",
      "description": "OpenAI gpt-5.1-codex-mini-low is the same model as gpt-5.1-codex-mini with reasoning_effort set to low. GPT-5.1 Codex Mini is a smaller and faster version of GPT-5.1-Codex. It is designed for coding and software engineering tasks. It is optimized for speed and efficiency, but still retains the strong general intelligence of GPT-5.1.",
      "id": "gpt-5.1-codex-mini-low",
      "object": "model",
      "created": 1776283310,
      "owned_by": "openai",
      "tokens": 400000,
      "pricing": {
        "type": "per_million_tokens",
        "input": 0.25,
        "output": 2
      },
      "endpoints": [
        "/v1/chat/completions",
        "/v1/responses",
        "/v1/messages"
      ],
      "premium_model": false,
      "metadata": {
        "vision": true,
        "function_call": true,
        "web_search": false,
        "reasoning": true
      }
    },
    {
      "name": "OpenAI: GPT-5.1 Codex Mini High",
      "description": "OpenAI gpt-5.1-codex-mini-high is the same model as gpt-5.1-codex-mini with reasoning_effort set to high. GPT-5.1 Codex Mini is a smaller and faster version of GPT-5.1-Codex. It is designed for coding and software engineering tasks. It is optimized for speed and efficiency, but still retains the strong general intelligence of GPT-5.1.",
      "id": "gpt-5.1-codex-mini-high",
      "object": "model",
      "created": 1776283310,
      "owned_by": "openai",
      "tokens": 400000,
      "pricing": {
        "type": "per_million_tokens",
        "input": 0.25,
        "output": 4
      },
      "endpoints": [
        "/v1/chat/completions",
        "/v1/responses",
        "/v1/messages"
      ],
      "premium_model": false,
      "metadata": {
        "vision": true,
        "function_call": true,
        "web_search": false,
        "reasoning": true
      }
    },
    {
      "name": "OpenAI: GPT-5.1 Codex",
      "description": "GPT-5.1-Codex is a specialized version of GPT-5.1 optimized for software engineering and coding workflows. It is designed for both interactive development sessions and long, independent execution of complex engineering tasks. The model supports building projects from scratch, feature development, debugging, large-scale refactoring, and code review. Compared to GPT-5.1, Codex is more steerable, adheres closely to developer instructions, and produces cleaner, higher-quality code outputs.",
      "id": "gpt-5.1-codex",
      "object": "model",
      "created": 1776283310,
      "owned_by": "openai",
      "tokens": 400000,
      "pricing": {
        "type": "per_million_tokens",
        "input": 1.25,
        "output": 10
      },
      "endpoints": [
        "/v1/chat/completions",
        "/v1/responses",
        "/v1/messages"
      ],
      "premium_model": true,
      "metadata": {
        "vision": true,
        "function_call": true,
        "web_search": false,
        "reasoning": true,
        "supported_reasoning_efforts": [
          "low",
          "medium",
          "high"
        ]
      }
    },
    {
      "name": "OpenAI: GPT-5.1 Codex Low",
      "description": "OpenAI gpt-5.1-codex-low is the same model as gpt-5.1-codex with reasoning_effort set to low. GPT-5.1 Codex is a specialized version of GPT-5.1 optimized for software engineering and coding workflows. It is designed for both interactive development sessions and long, independent execution of complex engineering tasks. The model supports building projects from scratch, feature development, debugging, large-scale refactoring, and code review. Compared to GPT-5.1, Codex is more steerable, adheres closely to developer instructions, and produces cleaner, higher-quality code outputs.",
      "id": "gpt-5.1-codex-low",
      "object": "model",
      "created": 1776283310,
      "owned_by": "openai",
      "tokens": 400000,
      "pricing": {
        "type": "per_million_tokens",
        "input": 1.25,
        "output": 10
      },
      "endpoints": [
        "/v1/chat/completions",
        "/v1/responses",
        "/v1/messages"
      ],
      "premium_model": true,
      "metadata": {
        "vision": true,
        "function_call": true,
        "web_search": false,
        "reasoning": true
      }
    },
    {
      "name": "OpenAI: GPT-5.1 Codex High",
      "description": "OpenAI gpt-5.1-codex-high is the same model as gpt-5.1-codex with reasoning_effort set to high. GPT-5.1 Codex is a specialized version of GPT-5.1 optimized for software engineering and coding workflows. It is designed for both interactive development sessions and long, independent execution of complex engineering tasks. The model supports building projects from scratch, feature development, debugging, large-scale refactoring, and code review. Compared to GPT-5.1, Codex is more steerable, adheres closely to developer instructions, and produces cleaner, higher-quality code outputs.",
      "id": "gpt-5.1-codex-high",
      "object": "model",
      "created": 1776283310,
      "owned_by": "openai",
      "tokens": 400000,
      "pricing": {
        "type": "per_million_tokens",
        "input": 1.25,
        "output": 20
      },
      "endpoints": [
        "/v1/chat/completions",
        "/v1/responses",
        "/v1/messages"
      ],
      "premium_model": true,
      "metadata": {
        "vision": true,
        "function_call": true,
        "web_search": false,
        "reasoning": true
      }
    },
    {
      "name": "OpenAI: GPT-5.1 Codex Max",
      "description": "GPT-5.1-Codex-Max is OpenAI’s latest agentic coding model, designed for long-running, high-context software development tasks. It is based on an updated version of the 5.1 reasoning stack and trained on agentic workflows spanning software engineering, mathematics, and research.\nGPT-5.1-Codex-Max delivers faster performance, improved reasoning, and higher token efficiency across the development lifecycle.",
      "id": "gpt-5.1-codex-max",
      "object": "model",
      "created": 1776283310,
      "owned_by": "openai",
      "tokens": 400000,
      "pricing": {
        "type": "per_million_tokens",
        "input": 1.25,
        "output": 10
      },
      "endpoints": [
        "/v1/chat/completions",
        "/v1/responses",
        "/v1/messages"
      ],
      "premium_model": true,
      "metadata": {
        "vision": true,
        "function_call": true,
        "web_search": false,
        "reasoning": true,
        "supported_reasoning_efforts": [
          "low",
          "medium",
          "high"
        ]
      }
    },
    {
      "name": "OpenAI: GPT-5.1 Codex Max Low",
      "description": "OpenAI gpt-5.1-codex-max-low is the same model as gpt-5.1-codex-max with reasoning_effort set to low. GPT-5.1 Codex Max is OpenAI’s latest agentic coding model, designed for long-running, high-context software development tasks. It is based on an updated version of the 5.1 reasoning stack and trained on agentic workflows spanning software engineering, mathematics, and research.\nGPT-5.1-Codex-Max delivers faster performance, improved reasoning, and higher token efficiency across the development lifecycle.",
      "id": "gpt-5.1-codex-max-low",
      "object": "model",
      "created": 1776283310,
      "owned_by": "openai",
      "tokens": 400000,
      "pricing": {
        "type": "per_million_tokens",
        "input": 1.25,
        "output": 10
      },
      "endpoints": [
        "/v1/chat/completions",
        "/v1/responses",
        "/v1/messages"
      ],
      "premium_model": true,
      "metadata": {
        "vision": true,
        "function_call": true,
        "web_search": false,
        "reasoning": true
      }
    },
    {
      "name": "OpenAI: GPT-5.1 Codex Max High",
      "description": "OpenAI gpt-5.1-codex-max-high is the same model as gpt-5.1-codex-max with reasoning_effort set to high. GPT-5.1 Codex Max is OpenAI’s latest agentic coding model, designed for long-running, high-context software development tasks. It is based on an updated version of the 5.1 reasoning stack and trained on agentic workflows spanning software engineering, mathematics, and research.\nGPT-5.1-Codex-Max delivers faster performance, improved reasoning, and higher token efficiency across the development lifecycle.",
      "id": "gpt-5.1-codex-max-high",
      "object": "model",
      "created": 1776283310,
      "owned_by": "openai",
      "tokens": 400000,
      "pricing": {
        "type": "per_million_tokens",
        "input": 1.25,
        "output": 20
      },
      "endpoints": [
        "/v1/chat/completions",
        "/v1/responses",
        "/v1/messages"
      ],
      "premium_model": true,
      "metadata": {
        "vision": true,
        "function_call": true,
        "web_search": false,
        "reasoning": true
      }
    },
    {
      "name": "OpenAI: GPT-5.2",
      "description": "GPT-5.2 is the latest frontier-grade model in the GPT-5 series, offering stronger agentic and long context perfomance compared to GPT-5.1. It uses adaptive reasoning to allocate computation dynamically, responding quickly to simple queries while spending more depth on complex tasks.\nBuilt for broad task coverage, GPT-5.2 delivers consistent gains across math, coding, sciende, and tool calling workloads, with more coherent long-form answers and improved tool-use reliability.",
      "id": "gpt-5.2",
      "object": "model",
      "created": 1776283310,
      "owned_by": "openai",
      "tokens": 400000,
      "pricing": {
        "type": "per_million_tokens",
        "input": 1.75,
        "output": 14
      },
      "endpoints": [
        "/v1/chat/completions",
        "/v1/responses",
        "/v1/messages"
      ],
      "premium_model": true,
      "metadata": {
        "vision": true,
        "function_call": true,
        "web_search": false,
        "reasoning": true,
        "supported_reasoning_efforts": [
          "none",
          "low",
          "medium",
          "high"
        ]
      }
    },
    {
      "name": "OpenAI: GPT-5.2 None",
      "description": "GPT-5.2 None is the same model as gpt-5.2 with reasoning_effort set to none. GPT-5.2 is the latest frontier-grade model in the GPT-5 series, offering stronger agentic and long context perfomance compared to GPT-5.1. It uses adaptive reasoning to allocate computation dynamically, responding quickly to simple queries while spending more depth on complex tasks.\nBuilt for broad task coverage, GPT-5.2 delivers consistent gains across math, coding, sciende, and tool calling workloads, with more coherent long-form answers and improved tool-use reliability.",
      "id": "gpt-5.2-none",
      "object": "model",
      "created": 1776283310,
      "owned_by": "openai",
      "tokens": 400000,
      "pricing": {
        "type": "per_million_tokens",
        "input": 1.75,
        "output": 14
      },
      "endpoints": [
        "/v1/chat/completions",
        "/v1/responses",
        "/v1/messages"
      ],
      "premium_model": true,
      "metadata": {
        "vision": true,
        "function_call": true,
        "web_search": false,
        "reasoning": false
      }
    },
    {
      "name": "OpenAI: GPT-5.2 Low",
      "description": "GPT-5.2 Low is the same model as gpt-5.2 with reasoning_effort set to low. GPT-5.2 is the latest frontier-grade model in the GPT-5 series, offering stronger agentic and long context perfomance compared to GPT-5.1. It uses adaptive reasoning to allocate computation dynamically, responding quickly to simple queries while spending more depth on complex tasks.\nBuilt for broad task coverage, GPT-5.2 delivers consistent gains across math, coding, sciende, and tool calling workloads, with more coherent long-form answers and improved tool-use reliability.",
      "id": "gpt-5.2-low",
      "object": "model",
      "created": 1776283310,
      "owned_by": "openai",
      "tokens": 400000,
      "pricing": {
        "type": "per_million_tokens",
        "input": 1.75,
        "output": 14
      },
      "endpoints": [
        "/v1/chat/completions",
        "/v1/responses",
        "/v1/messages"
      ],
      "premium_model": true,
      "metadata": {
        "vision": true,
        "function_call": true,
        "web_search": false,
        "reasoning": true
      }
    },
    {
      "name": "OpenAI: GPT-5.2 High",
      "description": "GPT-5.2 High is the same model as gpt-5.2 with reasoning_effort set to high. GPT-5.2 is the latest frontier-grade model in the GPT-5 series, offering stronger agentic and long context perfomance compared to GPT-5.1. It uses adaptive reasoning to allocate computation dynamically, responding quickly to simple queries while spending more depth on complex tasks.\nBuilt for broad task coverage, GPT-5.2 delivers consistent gains across math, coding, sciende, and tool calling workloads, with more coherent long-form answers and improved tool-use reliability.",
      "id": "gpt-5.2-high",
      "object": "model",
      "created": 1776283310,
      "owned_by": "openai",
      "tokens": 400000,
      "pricing": {
        "type": "per_million_tokens",
        "input": 1.75,
        "output": 28
      },
      "endpoints": [
        "/v1/chat/completions",
        "/v1/responses",
        "/v1/messages"
      ],
      "premium_model": true,
      "metadata": {
        "vision": true,
        "function_call": true,
        "web_search": false,
        "reasoning": true
      }
    },
    {
      "name": "OpenAI: GPT-5.2 Chat",
      "description": "GPT-5.2 Chat (AKA Instant) is the fast, lightweight member of the 5.2 family, optimized for low-latency chat while retaining strong general intelligence. It uses adaptive reasoning to selectively “think” on harder queries, improving accuracy on math, coding, and multi-step tasks without slowing down typical conversations. The model is warmer and more conversational by default, with better instruction following and more stable short-form reasoning. GPT-5.2 Chat is designed for high-throughput, interactive workloads where responsiveness and consistency matter more than deep deliberation.",
      "id": "gpt-5.2-chat-latest",
      "object": "model",
      "created": 1776283310,
      "owned_by": "openai",
      "tokens": 128000,
      "pricing": {
        "type": "per_million_tokens",
        "input": 1.75,
        "output": 14
      },
      "endpoints": [
        "/v1/chat/completions",
        "/v1/responses",
        "/v1/messages"
      ],
      "premium_model": true,
      "metadata": {
        "vision": true,
        "function_call": true,
        "web_search": false,
        "reasoning": false
      }
    },
    {
      "name": "OpenAI: GPT-5.2 Codex",
      "description": "GPT-5.2-Codex is an upgraded version of GPT-5.1-Codex optimized for software engineering and coding workflows. It is designed for both interactive development sessions and long, independent execution of complex engineering tasks. The model supports building projects from scratch, feature development, debugging, large-scale refactoring, and code review. Compared to GPT-5.1-Codex, 5.2-Codex is more steerable, adheres closely to developer instructions, and produces cleaner, higher-quality code outputs.",
      "id": "gpt-5.2-codex",
      "object": "model",
      "created": 1776283310,
      "owned_by": "openai",
      "tokens": 400000,
      "pricing": {
        "type": "per_million_tokens",
        "input": 1.75,
        "output": 14
      },
      "endpoints": [
        "/v1/chat/completions",
        "/v1/responses",
        "/v1/messages"
      ],
      "premium_model": true,
      "metadata": {
        "vision": true,
        "function_call": true,
        "web_search": false,
        "reasoning": true,
        "supported_reasoning_efforts": [
          "low",
          "medium",
          "high"
        ]
      }
    },
    {
      "name": "OpenAI: GPT-5.2 Codex Low",
      "description": "GPT-5.2 Codex Low is the same model as gpt-5.2-codex with reasoning_effort set to low. GPT-5.2-Codex is an upgraded version of GPT-5.1-Codex optimized for software engineering and coding workflows. It is designed for both interactive development sessions and long, independent execution of complex engineering tasks. The model supports building projects from scratch, feature development, debugging, large-scale refactoring, and code review. Compared to GPT-5.1-Codex, 5.2-Codex is more steerable, adheres closely to developer instructions, and produces cleaner, higher-quality code outputs.",
      "id": "gpt-5.2-codex-low",
      "object": "model",
      "created": 1776283310,
      "owned_by": "openai",
      "tokens": 400000,
      "pricing": {
        "type": "per_million_tokens",
        "input": 1.75,
        "output": 14
      },
      "endpoints": [
        "/v1/chat/completions",
        "/v1/responses",
        "/v1/messages"
      ],
      "premium_model": true,
      "metadata": {
        "vision": true,
        "function_call": true,
        "web_search": false,
        "reasoning": true
      }
    },
    {
      "name": "OpenAI: GPT-5.2 Codex High",
      "description": "GPT-5.2 Codex High is the same model as gpt-5.2-codex with reasoning_effort set to high. GPT-5.2-Codex is an upgraded version of GPT-5.1-Codex optimized for software engineering and coding workflows. It is designed for both interactive development sessions and long, independent execution of complex engineering tasks. The model supports building projects from scratch, feature development, debugging, large-scale refactoring, and code review. Compared to GPT-5.1-Codex, 5.2-Codex is more steerable, adheres closely to developer instructions, and produces cleaner, higher-quality code outputs.",
      "id": "gpt-5.2-codex-high",
      "object": "model",
      "created": 1776283310,
      "owned_by": "openai",
      "tokens": 400000,
      "pricing": {
        "type": "per_million_tokens",
        "input": 1.75,
        "output": 28
      },
      "endpoints": [
        "/v1/chat/completions",
        "/v1/responses",
        "/v1/messages"
      ],
      "premium_model": true,
      "metadata": {
        "vision": true,
        "function_call": true,
        "web_search": false,
        "reasoning": true
      }
    },
    {
      "name": "OpenAI: GPT-5.3 Chat",
      "description": "GPT-5.3 Chat is an update to ChatGPT's most-used model that makes everyday conversations smoother, more useful, and more directly helpful. It delivers more accurate answers with better contextualization and significantly reduces unnecessary refusals, caveats, and overly cautious phrasing that can interrupt conversational flow.",
      "id": "gpt-5.3-chat-latest",
      "object": "model",
      "created": 1776283310,
      "owned_by": "openai",
      "tokens": 128000,
      "pricing": {
        "type": "per_million_tokens",
        "input": 1.75,
        "output": 14
      },
      "endpoints": [
        "/v1/chat/completions",
        "/v1/responses",
        "/v1/messages"
      ],
      "premium_model": true,
      "metadata": {
        "vision": true,
        "function_call": true,
        "web_search": false,
        "reasoning": false
      }
    },
    {
      "name": "OpenAI: GPT-5.3 Codex",
      "description": "GPT-5.3-Codex is OpenAI’s most advanced agentic coding model, combining the frontier software engineering performance of GPT-5.2-Codex with the broader reasoning and professional knowledge capabilities of GPT-5.2. It achieves state-of-the-art results on SWE-Bench Pro and strong performance on Terminal-Bench 2.0 and OSWorld-Verified, reflecting improved multi-language coding, terminal proficiency, and real-world computer-use skills. The model is optimized for long-running, tool-using workflows and supports interactive steering during execution, making it suitable for complex development tasks, debugging, deployment, and iterative product work.",
      "id": "gpt-5.3-codex",
      "object": "model",
      "created": 1776283310,
      "owned_by": "openai",
      "tokens": 400000,
      "pricing": {
        "type": "per_million_tokens",
        "input": 1.75,
        "output": 14
      },
      "endpoints": [
        "/v1/chat/completions",
        "/v1/responses",
        "/v1/messages"
      ],
      "premium_model": true,
      "metadata": {
        "vision": true,
        "function_call": true,
        "web_search": false,
        "reasoning": true,
        "supported_reasoning_efforts": [
          "low",
          "medium",
          "high"
        ]
      }
    },
    {
      "name": "OpenAI: GPT-5.3 Codex Low",
      "description": "GPT-5.3 Codex Low is the same model as gpt-5.3-codex with reasoning_effort set to low. GPT-5.3-Codex is OpenAI’s most advanced agentic coding model, combining the frontier software engineering performance of GPT-5.2-Codex with the broader reasoning and professional knowledge capabilities of GPT-5.2. It achieves state-of-the-art results on SWE-Bench Pro and strong performance on Terminal-Bench 2.0 and OSWorld-Verified, reflecting improved multi-language coding, terminal proficiency, and real-world computer-use skills. The model is optimized for long-running, tool-using workflows and supports interactive steering during execution, making it suitable for complex development tasks, debugging, deployment, and iterative product work.",
      "id": "gpt-5.3-codex-low",
      "object": "model",
      "created": 1776283310,
      "owned_by": "openai",
      "tokens": 400000,
      "pricing": {
        "type": "per_million_tokens",
        "input": 1.75,
        "output": 14
      },
      "endpoints": [
        "/v1/chat/completions",
        "/v1/responses",
        "/v1/messages"
      ],
      "premium_model": true,
      "metadata": {
        "vision": true,
        "function_call": true,
        "web_search": false,
        "reasoning": true
      }
    },
    {
      "name": "OpenAI: GPT-5.3 Codex High",
      "description": "GPT-5.3 Codex High is the same model as gpt-5.3-codex with reasoning_effort set to high. GPT-5.3-Codex is OpenAI’s most advanced agentic coding model, combining the frontier software engineering performance of GPT-5.2-Codex with the broader reasoning and professional knowledge capabilities of GPT-5.2. It achieves state-of-the-art results on SWE-Bench Pro and strong performance on Terminal-Bench 2.0 and OSWorld-Verified, reflecting improved multi-language coding, terminal proficiency, and real-world computer-use skills. The model is optimized for long-running, tool-using workflows and supports interactive steering during execution, making it suitable for complex development tasks, debugging, deployment, and iterative product work.",
      "id": "gpt-5.3-codex-high",
      "object": "model",
      "created": 1776283310,
      "owned_by": "openai",
      "tokens": 400000,
      "pricing": {
        "type": "per_million_tokens",
        "input": 1.75,
        "output": 28
      },
      "endpoints": [
        "/v1/chat/completions",
        "/v1/responses",
        "/v1/messages"
      ],
      "premium_model": true,
      "metadata": {
        "vision": true,
        "function_call": true,
        "web_search": false,
        "reasoning": true
      }
    },
    {
      "name": "OpenAI: GPT-5.4 Nano",
      "description": "GPT-5.4 nano is the most lightweight and cost-efficient variant of the GPT-5.4 family, optimized for speed-critical and high-volume tasks. It supports text and image inputs and is designed for low-latency use cases such as classification, data extraction, ranking, and sub-agent execution.\nThe model prioritizes responsiveness and efficiency over deep reasoning, making it ideal for pipelines that require fast, reliable outputs at scale. GPT-5.4 nano is well suited for background tasks, real-time systems, and distributed agent architectures where minimizing cost and latency is essential.",
      "id": "gpt-5.4-nano",
      "object": "model",
      "created": 1776283310,
      "owned_by": "openai",
      "tokens": 400000,
      "pricing": {
        "type": "per_million_tokens",
        "input": 0.2,
        "output": 1.25
      },
      "endpoints": [
        "/v1/chat/completions",
        "/v1/responses",
        "/v1/messages"
      ],
      "premium_model": false,
      "metadata": {
        "vision": true,
        "function_call": true,
        "web_search": false,
        "reasoning": true,
        "supported_reasoning_efforts": [
          "low",
          "medium",
          "high"
        ]
      }
    },
    {
      "name": "OpenAI: GPT-5.4 Nano Low",
      "description": "GPT-5.4 nano low is the same model as gpt-5.4-nano with reasoning_effort set to low. GPT-5.4 nano is the most lightweight and cost-efficient variant of the GPT-5.4 family, optimized for speed-critical and high-volume tasks. It supports text and image inputs and is designed for low-latency use cases such as classification, data extraction, ranking, and sub-agent execution.\nThe model prioritizes responsiveness and efficiency over deep reasoning, making it ideal for pipelines that require fast, reliable outputs at scale. GPT-5.4 nano is well suited for background tasks, real-time systems, and distributed agent architectures where minimizing cost and latency is essential.",
      "id": "gpt-5.4-nano-low",
      "object": "model",
      "created": 1776283310,
      "owned_by": "openai",
      "tokens": 400000,
      "pricing": {
        "type": "per_million_tokens",
        "input": 0.2,
        "output": 1.25
      },
      "endpoints": [
        "/v1/chat/completions",
        "/v1/responses",
        "/v1/messages"
      ],
      "premium_model": false,
      "metadata": {
        "vision": true,
        "function_call": true,
        "web_search": false,
        "reasoning": true
      }
    },
    {
      "name": "OpenAI: GPT-5.4 Nano High",
      "description": "GPT-5.4 nano high is the same model as gpt-5.4-nano with reasoning_effort set to high. GPT-5.4 nano is the most lightweight and cost-efficient variant of the GPT-5.4 family, optimized for speed-critical and high-volume tasks. It supports text and image inputs and is designed for low-latency use cases such as classification, data extraction, ranking, and sub-agent execution.\nThe model prioritizes responsiveness and efficiency over deep reasoning, making it ideal for pipelines that require fast, reliable outputs at scale. GPT-5.4 nano is well suited for background tasks, real-time systems, and distributed agent architectures where minimizing cost and latency is essential.",
      "id": "gpt-5.4-nano-high",
      "object": "model",
      "created": 1776283310,
      "owned_by": "openai",
      "tokens": 400000,
      "pricing": {
        "type": "per_million_tokens",
        "input": 0.2,
        "output": 2.5
      },
      "endpoints": [
        "/v1/chat/completions",
        "/v1/responses",
        "/v1/messages"
      ],
      "premium_model": false,
      "metadata": {
        "vision": true,
        "function_call": true,
        "web_search": false,
        "reasoning": true
      }
    },
    {
      "name": "OpenAI: GPT-5.4 Mini",
      "description": "GPT-5.4 mini brings the core capabilities of GPT-5.4 to a faster, more efficient model optimized for high-throughput workloads. It supports text and image inputs with strong performance across reasoning, coding, and tool use, while reducing latency and cost for large-scale deployments.\nThe model is designed for production environments that require a balance of capability and efficiency, making it well suited for chat applications, coding assistants, and agent workflows that operate at scale. GPT-5.4 mini delivers reliable instruction following, solid multi-step reasoning, and consistent performance across diverse tasks with improved cost efficiency.",
      "id": "gpt-5.4-mini",
      "object": "model",
      "created": 1776283310,
      "owned_by": "openai",
      "tokens": 400000,
      "pricing": {
        "type": "per_million_tokens",
        "input": 0.75,
        "output": 4.5
      },
      "endpoints": [
        "/v1/chat/completions",
        "/v1/responses",
        "/v1/messages"
      ],
      "premium_model": false,
      "metadata": {
        "vision": true,
        "function_call": true,
        "web_search": false,
        "reasoning": true,
        "supported_reasoning_efforts": [
          "low",
          "medium",
          "high"
        ]
      }
    },
    {
      "name": "OpenAI: GPT-5.4 Mini Low",
      "description": "GPT-5.4 mini low is the same model as gpt-5.4-mini with reasoning_effort set to low. GPT-5.4 mini brings the core capabilities of GPT-5.4 to a faster, more efficient model optimized for high-throughput workloads. It supports text and image inputs with strong performance across reasoning, coding, and tool use, while reducing latency and cost for large-scale deployments.\nThe model is designed for production environments that require a balance of capability and efficiency, making it well suited for chat applications, coding assistants, and agent workflows that operate at scale. GPT-5.4 mini delivers reliable instruction following, solid multi-step reasoning, and consistent performance across diverse tasks with improved cost efficiency.",
      "id": "gpt-5.4-mini-low",
      "object": "model",
      "created": 1776283310,
      "owned_by": "openai",
      "tokens": 400000,
      "pricing": {
        "type": "per_million_tokens",
        "input": 0.75,
        "output": 4.5
      },
      "endpoints": [
        "/v1/chat/completions",
        "/v1/responses",
        "/v1/messages"
      ],
      "premium_model": false,
      "metadata": {
        "vision": true,
        "function_call": true,
        "web_search": false,
        "reasoning": true
      }
    },
    {
      "name": "OpenAI: GPT-5.4 Mini High",
      "description": "GPT-5.4 mini high is the same model as gpt-5.4-mini with reasoning_effort set to high. GPT-5.4 mini brings the core capabilities of GPT-5.4 to a faster, more efficient model optimized for high-throughput workloads. It supports text and image inputs with strong performance across reasoning, coding, and tool use, while reducing latency and cost for large-scale deployments.\nThe model is designed for production environments that require a balance of capability and efficiency, making it well suited for chat applications, coding assistants, and agent workflows that operate at scale. GPT-5.4 mini delivers reliable instruction following, solid multi-step reasoning, and consistent performance across diverse tasks with improved cost efficiency.",
      "id": "gpt-5.4-mini-high",
      "object": "model",
      "created": 1776283310,
      "owned_by": "openai",
      "tokens": 400000,
      "pricing": {
        "type": "per_million_tokens",
        "input": 0.75,
        "output": 9
      },
      "endpoints": [
        "/v1/chat/completions",
        "/v1/responses",
        "/v1/messages"
      ],
      "premium_model": false,
      "metadata": {
        "vision": true,
        "function_call": true,
        "web_search": false,
        "reasoning": true
      }
    },
    {
      "name": "OpenAI: GPT-5.4",
      "description": "GPT-5.4 is OpenAI’s latest frontier model, unifying the Codex and GPT lines into a single system. It features a 1M+ token context window (922K input, 128K output) with support for text and image inputs, enabling high-context reasoning, coding, and multimodal analysis within the same workflow.\nThe model delivers improved performance in coding, document understanding, tool use, and instruction following. It is designed as a strong default for both general-purpose tasks and software engineering, capable of generating production-quality code, synthesizing information across multiple sources, and executing complex multi-step workflows with fewer iterations and greater token efficiency.",
      "id": "gpt-5.4",
      "object": "model",
      "created": 1776283310,
      "owned_by": "openai",
      "tokens": 1000000,
      "pricing": {
        "type": "per_million_tokens",
        "input": 2.5,
        "output": 15
      },
      "endpoints": [
        "/v1/chat/completions",
        "/v1/responses",
        "/v1/messages"
      ],
      "premium_model": true,
      "metadata": {
        "vision": true,
        "function_call": true,
        "web_search": false,
        "reasoning": true,
        "supported_reasoning_efforts": [
          "low",
          "medium",
          "high"
        ]
      }
    },
    {
      "name": "OpenAI: GPT-5.4 Low",
      "description": "GPT-5.4 Low is the same model as gpt-5.4 with reasoning_effort set to low. GPT-5.4 is OpenAI’s latest frontier model, unifying the Codex and GPT lines into a single system. It features a 1M+ token context window (922K input, 128K output) with support for text and image inputs, enabling high-context reasoning, coding, and multimodal analysis within the same workflow.\nThe model delivers improved performance in coding, document understanding, tool use, and instruction following. It is designed as a strong default for both general-purpose tasks and software engineering, capable of generating production-quality code, synthesizing information across multiple sources, and executing complex multi-step workflows with fewer iterations and greater token efficiency.",
      "id": "gpt-5.4-low",
      "object": "model",
      "created": 1776283310,
      "owned_by": "openai",
      "tokens": 1000000,
      "pricing": {
        "type": "per_million_tokens",
        "input": 2.5,
        "output": 15
      },
      "endpoints": [
        "/v1/chat/completions",
        "/v1/responses",
        "/v1/messages"
      ],
      "premium_model": true,
      "metadata": {
        "vision": true,
        "function_call": true,
        "reasoning": true
      }
    },
    {
      "name": "OpenAI: GPT-5.4 High",
      "description": "GPT-5.4 High is the same model as gpt-5.4 with reasoning_effort set to high. GPT-5.4 is OpenAI’s latest frontier model, unifying the Codex and GPT lines into a single system. It features a 1M+ token context window (922K input, 128K output) with support for text and image inputs, enabling high-context reasoning, coding, and multimodal analysis within the same workflow.\nThe model delivers improved performance in coding, document understanding, tool use, and instruction following. It is designed as a strong default for both general-purpose tasks and software engineering, capable of generating production-quality code, synthesizing information across multiple sources, and executing complex multi-step workflows with fewer iterations and greater token efficiency.",
      "id": "gpt-5.4-high",
      "object": "model",
      "created": 1776283310,
      "owned_by": "openai",
      "tokens": 1000000,
      "pricing": {
        "type": "per_million_tokens",
        "input": 2.5,
        "output": 30
      },
      "endpoints": [
        "/v1/chat/completions",
        "/v1/responses",
        "/v1/messages"
      ],
      "premium_model": true,
      "metadata": {
        "vision": true,
        "function_call": true,
        "web_search": false,
        "reasoning": true
      }
    },
    {
      "name": "OpenAI: o1",
      "description": "The latest and strongest model family from OpenAI, o1 is designed to spend more time thinking before responding.\nThe o1 models are optimized for math, science, programming, and other STEM-related tasks. They consistently exhibit PhD-level accuracy on benchmarks in physics, chemistry, and biology.",
      "id": "o1",
      "object": "model",
      "created": 1776283310,
      "owned_by": "openai",
      "tokens": 200000,
      "pricing": {
        "type": "per_million_tokens",
        "input": 15,
        "output": 60
      },
      "endpoints": [
        "/v1/chat/completions",
        "/v1/responses",
        "/v1/messages"
      ],
      "premium_model": true,
      "metadata": {
        "vision": true,
        "function_call": true,
        "web_search": false,
        "reasoning": true,
        "supported_reasoning_efforts": [
          "low",
          "medium",
          "high"
        ]
      }
    },
    {
      "name": "OpenAI: o1 Low",
      "description": "OpenAI o1-low is the same model as o1 with reasoning_effort set to low.\nThe latest and strongest model family from OpenAI, o1 is designed to spend more time thinking before responding.\nThe o1 models are optimized for math, science, programming, and other STEM-related tasks. They consistently exhibit PhD-level accuracy on benchmarks in physics, chemistry, and biology.",
      "id": "o1-low",
      "object": "model",
      "created": 1776283310,
      "owned_by": "openai",
      "tokens": 200000,
      "pricing": {
        "type": "per_million_tokens",
        "input": 15,
        "output": 60
      },
      "endpoints": [
        "/v1/chat/completions",
        "/v1/responses",
        "/v1/messages"
      ],
      "premium_model": true,
      "metadata": {
        "vision": true,
        "function_call": true,
        "web_search": false,
        "reasoning": true
      }
    },
    {
      "name": "OpenAI: o1 High",
      "description": "OpenAI o1-high is the same model as o1 with reasoning_effort set to high.\nThe latest and strongest model family from OpenAI, o1 is designed to spend more time thinking before responding.\nThe o1 models are optimized for math, science, programming, and other STEM-related tasks. They consistently exhibit PhD-level accuracy on benchmarks in physics, chemistry, and biology.",
      "id": "o1-high",
      "object": "model",
      "created": 1776283310,
      "owned_by": "openai",
      "tokens": 200000,
      "pricing": {
        "type": "per_million_tokens",
        "input": 15,
        "output": 120
      },
      "endpoints": [
        "/v1/chat/completions",
        "/v1/responses",
        "/v1/messages"
      ],
      "premium_model": true,
      "metadata": {
        "vision": true,
        "function_call": true,
        "web_search": false,
        "reasoning": true
      }
    },
    {
      "name": "OpenAI: o3 Mini",
      "description": "OpenAI o3-mini is a cost-efficient language model optimized for STEM reasoning tasks, particularly excelling in science, mathematics, and coding.\nThis model supports the reasoning_effort parameter, which can be set to \"high\", \"medium\", or \"low\" to control the thinking time of the model. The default is \"medium\".\nThe model features three adjustable reasoning effort levels and supports key developer capabilities including function calling, structured outputs, and streaming, though it does not include vision processing capabilities.\nThe model demonstrates significant improvements over its predecessor, with expert testers preferring its responses 56% of the time and noting a 39% reduction in major errors on complex questions. With medium reasoning effort settings, o3-mini matches the performance of the larger o1 model on challenging reasoning evaluations like AIME and GPQA, while maintaining lower latency and cost.",
      "id": "o3-mini",
      "object": "model",
      "created": 1776283310,
      "owned_by": "openai",
      "tokens": 200000,
      "pricing": {
        "type": "per_million_tokens",
        "input": 1.1,
        "output": 4.4
      },
      "endpoints": [
        "/v1/chat/completions",
        "/v1/responses",
        "/v1/messages"
      ],
      "premium_model": false,
      "metadata": {
        "vision": false,
        "function_call": true,
        "web_search": false,
        "reasoning": true,
        "supported_reasoning_efforts": [
          "low",
          "medium",
          "high"
        ]
      }
    },
    {
      "name": "OpenAI: o3 Mini Low",
      "description": "OpenAI o3-mini-low is the same model as o3-mini with reasoning_effort set to low.\no3-mini is a cost-efficient language model optimized for STEM reasoning tasks, particularly excelling in science, mathematics, and coding. The model features three adjustable reasoning effort levels and supports key developer capabilities including function calling, structured outputs, and streaming, though it does not include vision processing capabilities.\nThe model demonstrates significant improvements over its predecessor, with expert testers preferring its responses 56% of the time and noting a 39% reduction in major errors on complex questions. With medium reasoning effort settings, o3-mini matches the performance of the larger o1 model on challenging reasoning evaluations like AIME and GPQA, while maintaining lower latency and cost.",
      "id": "o3-mini-low",
      "object": "model",
      "created": 1776283310,
      "owned_by": "openai",
      "tokens": 200000,
      "pricing": {
        "type": "per_million_tokens",
        "input": 1.1,
        "output": 4.4
      },
      "endpoints": [
        "/v1/chat/completions",
        "/v1/responses",
        "/v1/messages"
      ],
      "premium_model": false,
      "metadata": {
        "vision": false,
        "function_call": true,
        "web_search": false,
        "reasoning": true
      }
    },
    {
      "name": "OpenAI: o3 Mini High",
      "description": "OpenAI o3-mini-high is the same model as o3-mini with reasoning_effort set to high.\no3-mini is a cost-efficient language model optimized for STEM reasoning tasks, particularly excelling in science, mathematics, and coding. The model features three adjustable reasoning effort levels and supports key developer capabilities including function calling, structured outputs, and streaming, though it does not include vision processing capabilities.\nThe model demonstrates significant improvements over its predecessor, with expert testers preferring its responses 56% of the time and noting a 39% reduction in major errors on complex questions. With medium reasoning effort settings, o3-mini matches the performance of the larger o1 model on challenging reasoning evaluations like AIME and GPQA, while maintaining lower latency and cost.",
      "id": "o3-mini-high",
      "object": "model",
      "created": 1776283310,
      "owned_by": "openai",
      "tokens": 200000,
      "pricing": {
        "type": "per_million_tokens",
        "input": 1.1,
        "output": 8.8
      },
      "endpoints": [
        "/v1/chat/completions",
        "/v1/responses",
        "/v1/messages"
      ],
      "premium_model": false,
      "metadata": {
        "vision": false,
        "function_call": true,
        "web_search": false,
        "reasoning": true
      }
    },
    {
      "name": "OpenAI: o3 Mini Online",
      "description": "This model is combined with a powerful RAG system to provide real-time information and web search capabilities. It is designed to be used in conjunction with the o3-mini model, which is optimized for STEM reasoning tasks.\nThe o3-mini model is a cost-efficient language model optimized for STEM reasoning tasks, particularly excelling in science, mathematics, and coding. The model features three adjustable reasoning effort levels and supports key developer capabilities including function calling, structured outputs, and streaming, though it does not include vision processing capabilities.\nThe model demonstrates significant improvements over its predecessor, with expert testers preferring its responses 56% of the time and noting a 39% reduction in major errors on complex questions. With medium reasoning effort settings, o3-mini matches the performance of the larger o1 model on challenging reasoning evaluations like AIME and GPQA, while maintaining lower latency and cost.",
      "id": "o3-mini-online",
      "object": "model",
      "created": 1776283310,
      "owned_by": "openai",
      "tokens": 200000,
      "pricing": {
        "type": "per_million_tokens",
        "input": 1.5,
        "output": 5
      },
      "endpoints": [
        "/v1/chat/completions",
        "/v1/responses",
        "/v1/messages"
      ],
      "premium_model": false,
      "metadata": {
        "vision": false,
        "function_call": true,
        "web_search": true,
        "reasoning": true
      }
    },
    {
      "name": "OpenAI: o3",
      "description": "o3 is a well-rounded and powerful model across domains. It sets a new standard for math, science, coding, and visual reasoning tasks. It also excels at technical writing and instruction-following. Use it to think through multi-step problems that involve analysis across text, code, and images.",
      "id": "o3",
      "object": "model",
      "created": 1776283310,
      "owned_by": "openai",
      "tokens": 200000,
      "pricing": {
        "type": "per_million_tokens",
        "input": 2,
        "output": 8
      },
      "endpoints": [
        "/v1/chat/completions",
        "/v1/responses",
        "/v1/messages"
      ],
      "premium_model": false,
      "metadata": {
        "vision": true,
        "function_call": true,
        "web_search": false,
        "reasoning": true,
        "supported_reasoning_efforts": [
          "low",
          "medium",
          "high"
        ]
      }
    },
    {
      "name": "OpenAI: o3 Low",
      "description": "OpenAI o3-low is the same model as o3 with reasoning_effort set to low.\no3 is a well-rounded and powerful model across domains. It sets a new standard for math, science, coding, and visual reasoning tasks. It also excels at technical writing and instruction-following. Use it to think through multi-step problems that involve analysis across text, code, and images.",
      "id": "o3-low",
      "object": "model",
      "created": 1776283310,
      "owned_by": "openai",
      "tokens": 200000,
      "pricing": {
        "type": "per_million_tokens",
        "input": 2,
        "output": 8
      },
      "endpoints": [
        "/v1/chat/completions",
        "/v1/responses",
        "/v1/messages"
      ],
      "premium_model": false,
      "metadata": {
        "vision": true,
        "function_call": true,
        "web_search": false,
        "reasoning": true
      }
    },
    {
      "name": "OpenAI: o3 High",
      "description": "OpenAI o3-high is the same model as o3 with reasoning_effort set to high.\no3 is a well-rounded and powerful model across domains. It sets a new standard for math, science, coding, and visual reasoning tasks. It also excels at technical writing and instruction-following. Use it to think through multi-step problems that involve analysis across text, code, and images.",
      "id": "o3-high",
      "object": "model",
      "created": 1776283310,
      "owned_by": "openai",
      "tokens": 200000,
      "pricing": {
        "type": "per_million_tokens",
        "input": 2,
        "output": 16
      },
      "endpoints": [
        "/v1/chat/completions",
        "/v1/responses",
        "/v1/messages"
      ],
      "premium_model": false,
      "metadata": {
        "vision": true,
        "function_call": true,
        "web_search": false,
        "reasoning": true
      }
    },
    {
      "name": "OpenAI: o3 Pro",
      "description": "The o-series of models are trained with reinforcement learning to think before they answer and perform complex reasoning. The o3-pro model uses more compute to think harder and provide consistently better answers.",
      "id": "o3-pro",
      "object": "model",
      "created": 1776283310,
      "owned_by": "openai",
      "tokens": 200000,
      "pricing": {
        "type": "per_million_tokens",
        "input": 20,
        "output": 80
      },
      "endpoints": [
        "/v1/chat/completions",
        "/v1/responses",
        "/v1/messages"
      ],
      "premium_model": true,
      "metadata": {
        "vision": true,
        "function_call": true,
        "web_search": false,
        "reasoning": true,
        "supported_reasoning_efforts": [
          "low",
          "medium",
          "high"
        ]
      }
    },
    {
      "name": "OpenAI: o3 Pro Low",
      "description": "OpenAI o3-pro-low is the same model as o3-pro with reasoning_effort set to low.\nThe o-series of models are trained with reinforcement learning to think before they answer and perform complex reasoning. The o3-pro model uses more compute to think harder and provide consistently better answers.",
      "id": "o3-pro-low",
      "object": "model",
      "created": 1776283310,
      "owned_by": "openai",
      "tokens": 200000,
      "pricing": {
        "type": "per_million_tokens",
        "input": 20,
        "output": 80
      },
      "endpoints": [
        "/v1/chat/completions",
        "/v1/responses",
        "/v1/messages"
      ],
      "premium_model": true,
      "metadata": {
        "vision": true,
        "function_call": true,
        "web_search": false,
        "reasoning": true
      }
    },
    {
      "name": "OpenAI: o3 Pro High",
      "description": "OpenAI o3-pro-high is the same model as o3-pro with reasoning_effort set to high.\nThe o-series of models are trained with reinforcement learning to think before they answer and perform complex reasoning. The o3-pro model uses more compute to think harder and provide consistently better answers.",
      "id": "o3-pro-high",
      "object": "model",
      "created": 1776283310,
      "owned_by": "openai",
      "tokens": 200000,
      "pricing": {
        "type": "per_million_tokens",
        "input": 20,
        "output": 160
      },
      "endpoints": [
        "/v1/chat/completions",
        "/v1/responses",
        "/v1/messages"
      ],
      "premium_model": true,
      "metadata": {
        "vision": true,
        "function_call": true,
        "web_search": false,
        "reasoning": true
      }
    },
    {
      "name": "OpenAI: o4 Mini",
      "description": "OpenAI o4-mini is a compact reasoning model in the o-series, optimized for fast, cost-efficient performance while retaining strong multimodal and agentic capabilities. It supports tool use and demonstrates competitive reasoning and coding performance across benchmarks like AIME (99.5% with Python) and SWE-bench, outperforming its predecessor o3-mini and even approaching o3 in some domains.\nDespite its smaller size, o4-mini exhibits high accuracy in STEM tasks, visual problem solving (e.g., MathVista, MMMU), and code editing. It is especially well-suited for high-throughput scenarios where latency or cost is critical. Thanks to its efficient architecture and refined reinforcement learning training, o4-mini can chain tools, generate structured outputs, and solve multi-step tasks with minimal delay—often in under a minute.",
      "id": "o4-mini",
      "object": "model",
      "created": 1776283310,
      "owned_by": "openai",
      "tokens": 200000,
      "pricing": {
        "type": "per_million_tokens",
        "input": 1.1,
        "output": 4.4
      },
      "endpoints": [
        "/v1/chat/completions",
        "/v1/responses",
        "/v1/messages"
      ],
      "premium_model": false,
      "metadata": {
        "vision": true,
        "function_call": true,
        "web_search": false,
        "reasoning": true,
        "supported_reasoning_efforts": [
          "low",
          "medium",
          "high"
        ]
      }
    },
    {
      "name": "OpenAI: o4 Mini Low",
      "description": "OpenAI o4-mini-low is the same model as o4-mini with reasoning_effort set to low.\nOpenAI o4-mini is a compact reasoning model in the o-series, optimized for fast, cost-efficient performance while retaining strong multimodal and agentic capabilities. It supports tool use and demonstrates competitive reasoning and coding performance across benchmarks like AIME (99.5% with Python) and SWE-bench, outperforming its predecessor o3-mini and even approaching o3 in some domains.\nDespite its smaller size, o4-mini exhibits high accuracy in STEM tasks, visual problem solving (e.g., MathVista, MMMU), and code editing. It is especially well-suited for high-throughput scenarios where latency or cost is critical. Thanks to its efficient architecture and refined reinforcement learning training, o4-mini can chain tools, generate structured outputs, and solve multi-step tasks with minimal delay—often in under a minute.",
      "id": "o4-mini-low",
      "object": "model",
      "created": 1776283310,
      "owned_by": "openai",
      "tokens": 200000,
      "pricing": {
        "type": "per_million_tokens",
        "input": 1.1,
        "output": 4.4
      },
      "endpoints": [
        "/v1/chat/completions",
        "/v1/responses",
        "/v1/messages"
      ],
      "premium_model": false,
      "metadata": {
        "vision": true,
        "function_call": true,
        "web_search": false,
        "reasoning": true
      }
    },
    {
      "name": "OpenAI: o4 Mini High",
      "description": "OpenAI o4-mini-high is the same model as o4-mini with reasoning_effort set to high.\nOpenAI o4-mini is a compact reasoning model in the o-series, optimized for fast, cost-efficient performance while retaining strong multimodal and agentic capabilities. It supports tool use and demonstrates competitive reasoning and coding performance across benchmarks like AIME (99.5% with Python) and SWE-bench, outperforming its predecessor o3-mini and even approaching o3 in some domains.\nDespite its smaller size, o4-mini exhibits high accuracy in STEM tasks, visual problem solving (e.g., MathVista, MMMU), and code editing. It is especially well-suited for high-throughput scenarios where latency or cost is critical. Thanks to its efficient architecture and refined reinforcement learning training, o4-mini can chain tools, generate structured outputs, and solve multi-step tasks with minimal delay—often in under a minute.",
      "id": "o4-mini-high",
      "object": "model",
      "created": 1776283310,
      "owned_by": "openai",
      "tokens": 200000,
      "pricing": {
        "type": "per_million_tokens",
        "input": 1.1,
        "output": 8.8
      },
      "endpoints": [
        "/v1/chat/completions",
        "/v1/responses",
        "/v1/messages"
      ],
      "premium_model": false,
      "metadata": {
        "vision": true,
        "function_call": true,
        "web_search": false,
        "reasoning": true
      }
    },
    {
      "name": "OpenAI: GPT OSS 20B (free)",
      "description": "gpt-oss-20b is an open-weight 21B parameter model released by OpenAI under the Apache 2.0 license. It uses a Mixture-of-Experts (MoE) architecture with 3.6B active parameters per forward pass, optimized for lower-latency inference and deployability on consumer or single-GPU hardware. The model is trained in OpenAI’s Harmony response format and supports reasoning level configuration, fine-tuning, and agentic capabilities including function calling, tool use, and structured outputs.",
      "id": "gpt-oss-20b:free",
      "object": "model",
      "created": 1776283310,
      "owned_by": "openai",
      "tokens": 64000,
      "pricing": {
        "type": "per_million_tokens",
        "input": 0,
        "output": 0
      },
      "endpoints": [
        "/v1/chat/completions",
        "/v1/responses",
        "/v1/messages"
      ],
      "premium_model": false,
      "metadata": {
        "vision": false,
        "function_call": true,
        "web_search": false,
        "reasoning": true
      }
    },
    {
      "name": "OpenAI: GPT OSS 20B",
      "description": "gpt-oss-20b is an open-weight 21B parameter model released by OpenAI under the Apache 2.0 license. It uses a Mixture-of-Experts (MoE) architecture with 3.6B active parameters per forward pass, optimized for lower-latency inference and deployability on consumer or single-GPU hardware. The model is trained in OpenAI’s Harmony response format and supports reasoning level configuration, fine-tuning, and agentic capabilities including function calling, tool use, and structured outputs.",
      "id": "gpt-oss-20b",
      "object": "model",
      "created": 1776283310,
      "owned_by": "openai",
      "tokens": 128000,
      "pricing": {
        "type": "per_million_tokens",
        "input": 0.05,
        "output": 0.2
      },
      "endpoints": [
        "/v1/chat/completions",
        "/v1/responses",
        "/v1/messages"
      ],
      "premium_model": false,
      "metadata": {
        "vision": false,
        "function_call": true,
        "web_search": false,
        "reasoning": true
      }
    },
    {
      "name": "OpenAI: GPT OSS 120B (free)",
      "description": "gpt-oss-120b is an open-weight, 117B-parameter Mixture-of-Experts (MoE) language model from OpenAI designed for high-reasoning, agentic, and general-purpose production use cases. It activates 5.1B parameters per forward pass and is optimized to run on a single H100 GPU with native MXFP4 quantization. The model supports configurable reasoning depth, full chain-of-thought access, and native tool use, including function calling, browsing, and structured output generation.",
      "id": "gpt-oss-120b:free",
      "object": "model",
      "created": 1776283310,
      "owned_by": "openai",
      "tokens": 64000,
      "pricing": {
        "type": "per_million_tokens",
        "input": 0,
        "output": 0
      },
      "endpoints": [
        "/v1/chat/completions",
        "/v1/responses",
        "/v1/messages"
      ],
      "premium_model": false,
      "metadata": {
        "vision": false,
        "function_call": true,
        "web_search": false,
        "reasoning": true
      }
    },
    {
      "name": "OpenAI: GPT OSS 120B",
      "description": "gpt-oss-120b is an open-weight, 117B-parameter Mixture-of-Experts (MoE) language model from OpenAI designed for high-reasoning, agentic, and general-purpose production use cases. It activates 5.1B parameters per forward pass and is optimized to run on a single H100 GPU with native MXFP4 quantization. The model supports configurable reasoning depth, full chain-of-thought access, and native tool use, including function calling, browsing, and structured output generation.",
      "id": "gpt-oss-120b",
      "object": "model",
      "created": 1776283310,
      "owned_by": "openai",
      "tokens": 128000,
      "pricing": {
        "type": "per_million_tokens",
        "input": 0.15,
        "output": 0.6
      },
      "endpoints": [
        "/v1/chat/completions",
        "/v1/responses",
        "/v1/messages"
      ],
      "premium_model": false,
      "metadata": {
        "vision": false,
        "function_call": true,
        "web_search": false,
        "reasoning": true
      }
    },
    {
      "name": "Anthropic: Claude Sonnet 4",
      "description": "Claude Sonnet 4 significantly enhances the capabilities of its predecessor, Sonnet 3.7, excelling in both coding and reasoning tasks with improved precision and controllability. Achieving state-of-the-art performance on SWE-bench (72.7%), Sonnet 4 balances capability and computational efficiency, making it suitable for a broad range of applications from routine coding tasks to complex software development projects. Key enhancements include improved autonomous codebase navigation, reduced error rates in agent-driven workflows, and increased reliability in following intricate instructions. Sonnet 4 is optimized for practical everyday use, providing advanced reasoning capabilities while maintaining efficiency and responsiveness in diverse internal and external scenarios.",
      "id": "claude-sonnet-4-20250514",
      "object": "model",
      "created": 1776283310,
      "owned_by": "anthropic",
      "tokens": 200000,
      "pricing": {
        "type": "per_million_tokens",
        "input": 3,
        "output": 15,
        "cache_write_5m": 3.75,
        "cache_write_1h": 6,
        "cache_read": 0.3
      },
      "endpoints": [
        "/v1/chat/completions",
        "/v1/responses",
        "/v1/messages"
      ],
      "premium_model": true,
      "metadata": {
        "vision": true,
        "function_call": true,
        "web_search": false,
        "reasoning": false
      }
    },
    {
      "name": "Anthropic: Claude Sonnet 4 (thinking)",
      "description": "Claude Sonnet 4 significantly enhances the capabilities of its predecessor, Sonnet 3.7, excelling in both coding and reasoning tasks with improved precision and controllability. Achieving state-of-the-art performance on SWE-bench (72.7%), Sonnet 4 balances capability and computational efficiency, making it suitable for a broad range of applications from routine coding tasks to complex software development projects. Key enhancements include improved autonomous codebase navigation, reduced error rates in agent-driven workflows, and increased reliability in following intricate instructions. Sonnet 4 is optimized for practical everyday use, providing advanced reasoning capabilities while maintaining efficiency and responsiveness in diverse internal and external scenarios.",
      "id": "claude-sonnet-4-20250514-thinking",
      "object": "model",
      "created": 1776283310,
      "owned_by": "anthropic",
      "tokens": 200000,
      "pricing": {
        "type": "per_million_tokens",
        "input": 3,
        "output": 15,
        "cache_write_5m": 3.75,
        "cache_write_1h": 6,
        "cache_read": 0.3
      },
      "endpoints": [
        "/v1/chat/completions",
        "/v1/responses",
        "/v1/messages"
      ],
      "premium_model": true,
      "metadata": {
        "vision": true,
        "function_call": true,
        "web_search": false,
        "reasoning": true
      }
    },
    {
      "name": "Anthropic: Claude Haiku 4.5",
      "description": "Claude Haiku 4.5 is Anthropic's fastest and most efficient model, delivering near-frontier intelligence at a fraction of the cost and latency of larger Claude models. Matching Claude Sonnet 4's performance across reasoning, coding, and computer-use tasks, Haiku 4.5 brings frontier-level capability to real-time and high-volume applications.\nIt introduces extended thinking to the Haiku line; enabling controllable reasoning depth, summarized or interleaved thought output, and tool-assisted workflows with full support for coding, bash, web search, and computer-use tools. Scoring >73% on SWE-bench Verified, Haiku 4.5 ranks among the world's best coding models while maintaining exceptional responsiveness for sub-agents, parallelized execution, and scaled deployment.",
      "id": "claude-haiku-4-5-20251001",
      "object": "model",
      "created": 1776283310,
      "owned_by": "anthropic",
      "tokens": 200000,
      "pricing": {
        "type": "per_million_tokens",
        "input": 1,
        "output": 5,
        "cache_write_5m": 1.25,
        "cache_write_1h": 2,
        "cache_read": 0.1
      },
      "endpoints": [
        "/v1/chat/completions",
        "/v1/responses",
        "/v1/messages"
      ],
      "premium_model": true,
      "metadata": {
        "vision": true,
        "function_call": true,
        "web_search": false,
        "reasoning": false
      }
    },
    {
      "name": "Anthropic: Claude Haiku 4.5 (thinking)",
      "description": "Claude Haiku 4.5 is Anthropic's fastest and most efficient model, delivering near-frontier intelligence at a fraction of the cost and latency of larger Claude models. Matching Claude Sonnet 4's performance across reasoning, coding, and computer-use tasks, Haiku 4.5 brings frontier-level capability to real-time and high-volume applications.\nIt introduces extended thinking to the Haiku line; enabling controllable reasoning depth, summarized or interleaved thought output, and tool-assisted workflows with full support for coding, bash, web search, and computer-use tools. Scoring >73% on SWE-bench Verified, Haiku 4.5 ranks among the world's best coding models while maintaining exceptional responsiveness for sub-agents, parallelized execution, and scaled deployment.",
      "id": "claude-haiku-4-5-20251001-thinking",
      "object": "model",
      "created": 1776283310,
      "owned_by": "anthropic",
      "tokens": 200000,
      "pricing": {
        "type": "per_million_tokens",
        "input": 1,
        "output": 5,
        "cache_write_5m": 1.25,
        "cache_write_1h": 2,
        "cache_read": 0.1
      },
      "endpoints": [
        "/v1/chat/completions",
        "/v1/responses",
        "/v1/messages"
      ],
      "premium_model": true,
      "metadata": {
        "vision": true,
        "function_call": true,
        "web_search": false,
        "reasoning": true
      }
    },
    {
      "name": "Anthropic: Claude Sonnet 4.5",
      "description": "Claude Sonnet 4.5 is Anthropic’s most advanced Sonnet model to date, optimized for real-world agents and coding workflows. It delivers state-of-the-art performance on coding benchmarks such as SWE-bench Verified, with improvements across system design, code security, and specification adherence. The model is designed for extended autonomous operation, maintaining task continuity across sessions and providing fact-based progress tracking.\nSonnet 4.5 also introduces stronger agentic capabilities, including improved tool orchestration, speculative parallel execution, and more efficient context and memory management. With enhanced context tracking and awareness of token usage across tool calls, it is particularly well-suited for multi-context and long-running workflows. Use cases span software engineering, cybersecurity, financial analysis, research agents, and other domains requiring sustained reasoning and tool use.",
      "id": "claude-sonnet-4-5-20250929",
      "object": "model",
      "created": 1776283310,
      "owned_by": "anthropic",
      "tokens": 200000,
      "pricing": {
        "type": "per_million_tokens",
        "input": 3,
        "output": 15,
        "cache_write_5m": 3.75,
        "cache_write_1h": 6,
        "cache_read": 0.3
      },
      "endpoints": [
        "/v1/chat/completions",
        "/v1/responses",
        "/v1/messages"
      ],
      "premium_model": true,
      "metadata": {
        "vision": true,
        "function_call": true,
        "web_search": false,
        "reasoning": false
      }
    },
    {
      "name": "Anthropic: Claude Sonnet 4.5 (thinking)",
      "description": "Claude Sonnet 4.5 is Anthropic’s most advanced Sonnet model to date, optimized for real-world agents and coding workflows. It delivers state-of-the-art performance on coding benchmarks such as SWE-bench Verified, with improvements across system design, code security, and specification adherence. The model is designed for extended autonomous operation, maintaining task continuity across sessions and providing fact-based progress tracking.\nSonnet 4.5 also introduces stronger agentic capabilities, including improved tool orchestration, speculative parallel execution, and more efficient context and memory management. With enhanced context tracking and awareness of token usage across tool calls, it is particularly well-suited for multi-context and long-running workflows. Use cases span software engineering, cybersecurity, financial analysis, research agents, and other domains requiring sustained reasoning and tool use.",
      "id": "claude-sonnet-4-5-20250929-thinking",
      "object": "model",
      "created": 1776283310,
      "owned_by": "anthropic",
      "tokens": 200000,
      "pricing": {
        "type": "per_million_tokens",
        "input": 3,
        "output": 15,
        "cache_write_5m": 3.75,
        "cache_write_1h": 6,
        "cache_read": 0.3
      },
      "endpoints": [
        "/v1/chat/completions",
        "/v1/responses",
        "/v1/messages"
      ],
      "premium_model": true,
      "metadata": {
        "vision": true,
        "function_call": true,
        "web_search": false,
        "reasoning": true
      }
    },
    {
      "name": "Anthropic: Claude Opus 4",
      "description": "Claude Opus 4 is benchmarked as the world’s best coding model, at time of release, bringing sustained performance on complex, long-running tasks and agent workflows. It sets new benchmarks in software engineering, achieving leading results on SWE-bench (72.5%) and Terminal-bench (43.2%). Opus 4 supports extended, agentic workflows, handling thousands of task steps continuously for hours without degradation.",
      "id": "claude-opus-4-20250514",
      "object": "model",
      "created": 1776283310,
      "owned_by": "anthropic",
      "tokens": 200000,
      "pricing": {
        "type": "per_million_tokens",
        "input": 15,
        "output": 75,
        "cache_write_5m": 18.75,
        "cache_write_1h": 30,
        "cache_read": 1.5
      },
      "endpoints": [
        "/v1/chat/completions",
        "/v1/responses",
        "/v1/messages"
      ],
      "premium_model": true,
      "metadata": {
        "vision": true,
        "function_call": true,
        "web_search": false,
        "reasoning": false
      }
    },
    {
      "name": "Anthropic: Claude Opus 4 (thinking)",
      "description": "Claude Opus 4 is benchmarked as the world’s best coding model, at time of release, bringing sustained performance on complex, long-running tasks and agent workflows. It sets new benchmarks in software engineering, achieving leading results on SWE-bench (72.5%) and Terminal-bench (43.2%). Opus 4 supports extended, agentic workflows, handling thousands of task steps continuously for hours without degradation.",
      "id": "claude-opus-4-20250514-thinking",
      "object": "model",
      "created": 1776283310,
      "owned_by": "anthropic",
      "tokens": 200000,
      "pricing": {
        "type": "per_million_tokens",
        "input": 15,
        "output": 75,
        "cache_write_5m": 18.75,
        "cache_write_1h": 30,
        "cache_read": 1.5
      },
      "endpoints": [
        "/v1/chat/completions",
        "/v1/responses",
        "/v1/messages"
      ],
      "premium_model": true,
      "metadata": {
        "vision": true,
        "function_call": true,
        "web_search": false,
        "reasoning": true
      }
    },
    {
      "name": "Anthropic: Claude Opus 4.1",
      "description": "Claude Opus 4.1 is an updated version of Anthropic’s flagship model, offering improved performance in coding, reasoning, and agentic tasks. It achieves 74.5% on SWE-bench Verified and shows notable gains in multi-file code refactoring, debugging precision, and detail-oriented reasoning. The model supports extended thinking up to 64K tokens and is optimized for tasks involving research, data analysis, and tool-assisted reasoning.",
      "id": "claude-opus-4-1-20250805",
      "object": "model",
      "created": 1776283310,
      "owned_by": "anthropic",
      "tokens": 200000,
      "pricing": {
        "type": "per_million_tokens",
        "input": 15,
        "output": 75,
        "cache_write_5m": 18.75,
        "cache_write_1h": 30,
        "cache_read": 1.5
      },
      "endpoints": [
        "/v1/chat/completions",
        "/v1/responses",
        "/v1/messages"
      ],
      "premium_model": true,
      "metadata": {
        "vision": true,
        "function_call": true,
        "web_search": false,
        "reasoning": false
      }
    },
    {
      "name": "Anthropic: Claude Opus 4.1 (thinking)",
      "description": "Claude Opus 4.1 is an updated version of Anthropic’s flagship model, offering improved performance in coding, reasoning, and agentic tasks. It achieves 74.5% on SWE-bench Verified and shows notable gains in multi-file code refactoring, debugging precision, and detail-oriented reasoning. The model supports extended thinking up to 64K tokens and is optimized for tasks involving research, data analysis, and tool-assisted reasoning.",
      "id": "claude-opus-4-1-20250805-thinking",
      "object": "model",
      "created": 1776283310,
      "owned_by": "anthropic",
      "tokens": 200000,
      "pricing": {
        "type": "per_million_tokens",
        "input": 15,
        "output": 75,
        "cache_write_5m": 18.75,
        "cache_write_1h": 30,
        "cache_read": 1.5
      },
      "endpoints": [
        "/v1/chat/completions",
        "/v1/responses",
        "/v1/messages"
      ],
      "premium_model": true,
      "metadata": {
        "vision": true,
        "function_call": true,
        "web_search": false,
        "reasoning": true
      }
    },
    {
      "name": "Anthropic: Claude Opus 4.5",
      "description": "Claude Opus 4.5 is Anthropic’s frontier reasoning model optimized for complex software engineering, agentic workflows, and long-horizon computer use. It offers strong multimodal capabilities, competitive performance across real-world coding and reasoning benchmarks, and improved robustness to prompt injection.",
      "id": "claude-opus-4-5-20251101",
      "object": "model",
      "created": 1776283310,
      "owned_by": "anthropic",
      "tokens": 200000,
      "pricing": {
        "type": "per_million_tokens",
        "input": 5,
        "output": 25,
        "cache_write_5m": 6.25,
        "cache_write_1h": 10,
        "cache_read": 0.5
      },
      "endpoints": [
        "/v1/chat/completions",
        "/v1/responses",
        "/v1/messages"
      ],
      "premium_model": true,
      "metadata": {
        "vision": true,
        "function_call": true,
        "web_search": false,
        "reasoning": false
      }
    },
    {
      "name": "Anthropic: Claude Opus 4.5 (thinking)",
      "description": "Claude Opus 4.5 is Anthropic’s frontier reasoning model optimized for complex software engineering, agentic workflows, and long-horizon computer use. It offers strong multimodal capabilities, competitive performance across real-world coding and reasoning benchmarks, and improved robustness to prompt injection.",
      "id": "claude-opus-4-5-20251101-thinking",
      "object": "model",
      "created": 1776283310,
      "owned_by": "anthropic",
      "tokens": 200000,
      "pricing": {
        "type": "per_million_tokens",
        "input": 5,
        "output": 25,
        "cache_write_5m": 6.25,
        "cache_write_1h": 10,
        "cache_read": 0.5
      },
      "endpoints": [
        "/v1/chat/completions",
        "/v1/responses",
        "/v1/messages"
      ],
      "premium_model": true,
      "metadata": {
        "vision": true,
        "function_call": true,
        "web_search": false,
        "reasoning": true
      }
    },
    {
      "name": "Anthropic: Claude Opus 4.6",
      "description": "Opus 4.6 is Anthropic’s strongest model for coding and long-running professional tasks. It is built for agents that operate across entire workflows rather than single prompts, making it especially effective for large codebases, complex refactors, and multi-step debugging that unfolds over time. The model shows deeper contextual understanding, stronger problem decomposition, and greater reliability on hard engineering tasks than prior generations.\nBeyond coding, Opus 4.6 excels at sustained knowledge work. It produces near-production-ready documents, plans, and analyses in a single pass, and maintains coherence across very long outputs and extended sessions. This makes it a strong default for tasks that require persistence, judgment, and follow-through, such as technical design, migration planning, and end-to-end project execution.",
      "id": "claude-opus-4-6",
      "object": "model",
      "created": 1776283310,
      "owned_by": "anthropic",
      "tokens": 200000,
      "pricing": {
        "type": "per_million_tokens",
        "input": 5,
        "output": 25,
        "cache_write_5m": 6.25,
        "cache_write_1h": 10,
        "cache_read": 0.5
      },
      "endpoints": [
        "/v1/chat/completions",
        "/v1/responses",
        "/v1/messages"
      ],
      "premium_model": true,
      "metadata": {
        "vision": true,
        "function_call": true,
        "web_search": false,
        "reasoning": false
      }
    },
    {
      "name": "Anthropic: Claude Opus 4.6 (thinking)",
      "description": "Opus 4.6 is Anthropic’s strongest model for coding and long-running professional tasks. It is built for agents that operate across entire workflows rather than single prompts, making it especially effective for large codebases, complex refactors, and multi-step debugging that unfolds over time. The model shows deeper contextual understanding, stronger problem decomposition, and greater reliability on hard engineering tasks than prior generations.\nBeyond coding, Opus 4.6 excels at sustained knowledge work. It produces near-production-ready documents, plans, and analyses in a single pass, and maintains coherence across very long outputs and extended sessions. This makes it a strong default for tasks that require persistence, judgment, and follow-through, such as technical design, migration planning, and end-to-end project execution.",
      "id": "claude-opus-4-6-thinking",
      "object": "model",
      "created": 1776283310,
      "owned_by": "anthropic",
      "tokens": 200000,
      "pricing": {
        "type": "per_million_tokens",
        "input": 5,
        "output": 25,
        "cache_write_5m": 6.25,
        "cache_write_1h": 10,
        "cache_read": 0.5
      },
      "endpoints": [
        "/v1/chat/completions",
        "/v1/responses",
        "/v1/messages"
      ],
      "premium_model": true,
      "metadata": {
        "vision": true,
        "function_call": true,
        "web_search": false,
        "reasoning": true
      }
    },
    {
      "name": "Anthropic: Claude Sonnet 4.6",
      "description": "Sonnet 4.6 is Anthropic's most capable Sonnet-class model yet, with frontier performance across coding, agents, and professional work. It excels at iterative development, complex codebase navigation, end-to-end project management with memory, polished document creation, and confident computer use for web QA and workflow automation.",
      "id": "claude-sonnet-4-6",
      "object": "model",
      "created": 1776283310,
      "owned_by": "anthropic",
      "tokens": 200000,
      "pricing": {
        "type": "per_million_tokens",
        "input": 3,
        "output": 15,
        "cache_write_5m": 3.75,
        "cache_write_1h": 6,
        "cache_read": 0.3
      },
      "endpoints": [
        "/v1/chat/completions",
        "/v1/responses",
        "/v1/messages"
      ],
      "premium_model": true,
      "metadata": {
        "vision": true,
        "function_call": true,
        "web_search": false,
        "reasoning": false
      }
    },
    {
      "name": "Anthropic: Claude Sonnet 4.6 (thinking)",
      "description": "Sonnet 4.6 is Anthropic's most capable Sonnet-class model yet, with frontier performance across coding, agents, and professional work. It excels at iterative development, complex codebase navigation, end-to-end project management with memory, polished document creation, and confident computer use for web QA and workflow automation.",
      "id": "claude-sonnet-4-6-thinking",
      "object": "model",
      "created": 1776283310,
      "owned_by": "anthropic",
      "tokens": 200000,
      "pricing": {
        "type": "per_million_tokens",
        "input": 3,
        "output": 15,
        "cache_write_5m": 3.75,
        "cache_write_1h": 6,
        "cache_read": 0.3
      },
      "endpoints": [
        "/v1/chat/completions",
        "/v1/responses",
        "/v1/messages"
      ],
      "premium_model": true,
      "metadata": {
        "vision": true,
        "function_call": true,
        "web_search": false,
        "reasoning": true
      }
    },
    {
      "name": "Google: Gemini 2.5 Flash Lite",
      "description": "Gemini 2.5 Flash-Lite is a lightweight reasoning model in the Gemini 2.5 family, optimized for ultra-low latency and cost efficiency. It offers improved throughput, faster token generation, and better performance across common benchmarks compared to earlier Flash models.",
      "id": "gemini-2.5-flash-lite",
      "object": "model",
      "created": 1776283310,
      "owned_by": "google",
      "tokens": 1000000,
      "pricing": {
        "type": "per_million_tokens",
        "input": 0.1,
        "output": 0.4
      },
      "endpoints": [
        "/v1/chat/completions",
        "/v1/responses",
        "/v1/messages"
      ],
      "premium_model": false,
      "metadata": {
        "vision": true,
        "function_call": true,
        "web_search": false,
        "reasoning": false
      }
    },
    {
      "name": "Google: Gemini 2.5 Flash Lite (thinking)",
      "description": "Gemini 2.5 Flash-Lite is a lightweight reasoning model in the Gemini 2.5 family, optimized for ultra-low latency and cost efficiency. It offers improved throughput, faster token generation, and better performance across common benchmarks compared to earlier Flash models.",
      "id": "gemini-2.5-flash-lite-thinking",
      "object": "model",
      "created": 1776283310,
      "owned_by": "google",
      "tokens": 1000000,
      "pricing": {
        "type": "per_million_tokens",
        "input": 0.1,
        "output": 0.8
      },
      "endpoints": [
        "/v1/chat/completions",
        "/v1/responses",
        "/v1/messages"
      ],
      "premium_model": false,
      "metadata": {
        "vision": true,
        "function_call": true,
        "web_search": false,
        "reasoning": true
      }
    },
    {
      "name": "Google: Gemini 2.5 Flash",
      "description": "Gemini 2.5 Flash is Google's state-of-the-art workhorse model, specifically designed for advanced reasoning, coding, mathematics, and scientific tasks. It includes built-in \"thinking\" capabilities, enabling it to provide responses with greater accuracy and nuanced context handling.\nAdditionally, Gemini 2.5 Flash is configurable through the \"max tokens for reasoning\" parameter, as described in the documentation.",
      "id": "gemini-2.5-flash",
      "object": "model",
      "created": 1776283310,
      "owned_by": "google",
      "tokens": 1000000,
      "pricing": {
        "type": "per_million_tokens",
        "input": 0.3,
        "output": 2.5
      },
      "endpoints": [
        "/v1/chat/completions",
        "/v1/responses",
        "/v1/messages"
      ],
      "premium_model": false,
      "metadata": {
        "vision": true,
        "function_call": true,
        "web_search": false,
        "reasoning": false
      }
    },
    {
      "name": "Google: Gemini 2.5 Flash (thinking)",
      "description": "Gemini 2.5 Flash is Google's state-of-the-art workhorse model, specifically designed for advanced reasoning, coding, mathematics, and scientific tasks. It includes built-in \"thinking\" capabilities, enabling it to provide responses with greater accuracy and nuanced context handling.",
      "id": "gemini-2.5-flash-thinking",
      "object": "model",
      "created": 1776283310,
      "owned_by": "google",
      "tokens": 1000000,
      "pricing": {
        "type": "per_million_tokens",
        "input": 0.3,
        "output": 5
      },
      "endpoints": [
        "/v1/chat/completions",
        "/v1/responses",
        "/v1/messages"
      ],
      "premium_model": false,
      "metadata": {
        "vision": true,
        "function_call": true,
        "web_search": false,
        "reasoning": true
      }
    },
    {
      "name": "Google: Nano Banana Pro (Gemini 3 Pro Image Preview)",
      "description": "Nano Banana Pro is Google’s most advanced image-generation and editing model, built on Gemini 3 Pro. It extends the original Nano Banana with significantly improved multimodal reasoning, real-world grounding, and high-fidelity visual synthesis. The model generates context-rich graphics, from infographics and diagrams to cinematic composites, and can incorporate real-time information via Search grounding.\nIt offers industry-leading text rendering in images (including long passages and multilingual layouts), consistent multi-image blending, and accurate identity preservation across up to five subjects. Nano Banana Pro adds fine-grained creative controls such as localized edits, lighting and focus adjustments, camera transformations, and support for 2K/4K outputs and flexible aspect ratios. It is designed for professional-grade design, product visualization, storyboarding, and complex multi-element compositions while remaining efficient for general image creation workflows.",
      "id": "gemini-3-pro-image-preview",
      "object": "model",
      "created": 1776283310,
      "owned_by": "google",
      "tokens": 64000,
      "pricing": {
        "type": "per_million_tokens",
        "input": 6,
        "output": 120
      },
      "endpoints": [
        "/v1/chat/completions",
        "/v1/responses",
        "/v1/messages"
      ],
      "premium_model": true,
      "metadata": {
        "vision": true,
        "function_call": true,
        "web_search": false,
        "reasoning": false
      }
    },
    {
      "name": "Google: Nano Banana 2 (Gemini 3.1 Flash Image Preview)",
      "description": "Gemini 3.1 Flash Image Preview, a.k.a. \"Nano Banana 2,\" is Google’s latest state of the art image generation and editing model, delivering Pro-level visual quality at Flash speed. It combines advanced contextual understanding with fast, cost-efficient inference, making complex image generation and iterative edits significantly more accessible.",
      "id": "gemini-3.1-flash-image-preview",
      "object": "model",
      "created": 1776283310,
      "owned_by": "google",
      "tokens": 1000000,
      "pricing": {
        "type": "per_million_tokens",
        "input": 1.5,
        "output": 30
      },
      "endpoints": [
        "/v1/chat/completions",
        "/v1/responses",
        "/v1/messages"
      ],
      "premium_model": true,
      "metadata": {
        "vision": true,
        "function_call": true,
        "web_search": false,
        "reasoning": false
      }
    },
    {
      "name": "Google: Gemini 2.5 Pro",
      "description": "Gemini 2.5 Pro is Google’s state-of-the-art AI model designed for advanced reasoning, coding, mathematics, and scientific tasks. It employs “thinking” capabilities, enabling it to reason through responses with enhanced accuracy and nuanced context handling. Gemini 2.5 Pro achieves top-tier performance on multiple benchmarks, including first-place positioning on the LMArena leaderboard, reflecting superior human-preference alignment and complex problem-solving abilities.",
      "id": "gemini-2.5-pro",
      "object": "model",
      "created": 1776283310,
      "owned_by": "google",
      "tokens": 1000000,
      "pricing": {
        "type": "per_million_tokens",
        "input": 1.25,
        "output": 10
      },
      "endpoints": [
        "/v1/chat/completions",
        "/v1/responses",
        "/v1/messages"
      ],
      "premium_model": true,
      "metadata": {
        "vision": true,
        "function_call": true,
        "web_search": false,
        "reasoning": false
      }
    },
    {
      "name": "Google: Gemini 2.5 Pro (thinking)",
      "description": "Gemini 2.5 Pro is Google’s state-of-the-art AI model designed for advanced reasoning, coding, mathematics, and scientific tasks. It employs “thinking” capabilities, enabling it to reason through responses with enhanced accuracy and nuanced context handling. Gemini 2.5 Pro achieves top-tier performance on multiple benchmarks, including first-place positioning on the LMArena leaderboard, reflecting superior human-preference alignment and complex problem-solving abilities.",
      "id": "gemini-2.5-pro-thinking",
      "object": "model",
      "created": 1776283310,
      "owned_by": "google",
      "tokens": 1000000,
      "pricing": {
        "type": "per_million_tokens",
        "input": 1.25,
        "output": 15
      },
      "endpoints": [
        "/v1/chat/completions",
        "/v1/responses",
        "/v1/messages"
      ],
      "premium_model": true,
      "metadata": {
        "vision": true,
        "function_call": true,
        "web_search": false,
        "reasoning": true
      }
    },
    {
      "name": "Google: Gemini 3 Flash Preview",
      "description": "Gemini 3 Flash Preview is a high speed, high value thinking model designed for agentic workflows, multi turn chat, and coding assistance. It delivers near Pro level reasoning and tool use performance with substantially lower latency than larger Gemini variants, making it well suited for interactive development, long running agent loops, and collaborative coding tasks. Compared to Gemini 2.5 Flash, it provides broad quality improvements across reasoning, multimodal understanding, and reliability.",
      "id": "gemini-3-flash-preview",
      "object": "model",
      "created": 1776283310,
      "owned_by": "google",
      "tokens": 1000000,
      "pricing": {
        "type": "per_million_tokens",
        "input": 0.5,
        "output": 3
      },
      "endpoints": [
        "/v1/chat/completions",
        "/v1/responses",
        "/v1/messages"
      ],
      "premium_model": true,
      "metadata": {
        "vision": true,
        "function_call": true,
        "web_search": false,
        "reasoning": true
      }
    },
    {
      "name": "Google: Gemini 3 Flash Preview Minimal",
      "description": "Matches the “no thinking” setting for most queries. The model may think very minimally for complex coding tasks. Minimizes latency for chat or high throughput applications. Gemini 3 Flash Preview Minimal is Google's flagship multimodal AI model with a 1M-token context window, excelling across text, image, video, audio, and code. It delivers state-of-the-art performance on benchmarks like LMArena, GPQA Diamond, and SWE-Bench Verified. Designed for advanced development, it offers strong tool-calling, agentic workflows, and zero-shot generation for coding, research synthesis, and multimodal analytics with minimal prompting.",
      "id": "gemini-3-flash-preview-minimal",
      "object": "model",
      "created": 1776283310,
      "owned_by": "google",
      "tokens": 1000000,
      "pricing": {
        "type": "per_million_tokens",
        "input": 0.5,
        "output": 3
      },
      "endpoints": [
        "/v1/chat/completions",
        "/v1/responses",
        "/v1/messages"
      ],
      "premium_model": true,
      "metadata": {
        "vision": true,
        "function_call": true,
        "web_search": false,
        "reasoning": true
      }
    },
    {
      "name": "Google: Gemini 3 Flash Preview Low",
      "description": "Low thinking is ideal for high-throughput tasks where speed is the priority, roughly matching the latency profile of Gemini 2.5 Flash while providing superior response quality. Gemini 3 Flash Preview Low is Google's flagship multimodal AI model with a 1M-token context window, excelling across text, image, video, audio, and code. It delivers state-of-the-art performance on benchmarks like LMArena, GPQA Diamond, and SWE-Bench Verified. Designed for advanced development, it offers strong tool-calling, agentic workflows, and zero-shot generation for coding, research synthesis, and multimodal analytics with minimal prompting.",
      "id": "gemini-3-flash-preview-low",
      "object": "model",
      "created": 1776283310,
      "owned_by": "google",
      "tokens": 1000000,
      "pricing": {
        "type": "per_million_tokens",
        "input": 0.5,
        "output": 3
      },
      "endpoints": [
        "/v1/chat/completions",
        "/v1/responses",
        "/v1/messages"
      ],
      "premium_model": true,
      "metadata": {
        "vision": true,
        "function_call": true,
        "web_search": false,
        "reasoning": true
      }
    },
    {
      "name": "Google: Gemini 3 Flash Preview Medium",
      "description": "Medium thinking is ideal for balanced performance between speed and quality. Gemini 3 Flash Preview Medium is Google's flagship multimodal AI model with a 1M-token context window, excelling across text, image, video, audio, and code. It delivers state-of-the-art performance on benchmarks like LMArena, GPQA Diamond, and SWE-Bench Verified. Designed for advanced development, it offers strong tool-calling, agentic workflows, and zero-shot generation for coding, research synthesis, and multimodal analytics with minimal prompting.",
      "id": "gemini-3-flash-preview-medium",
      "object": "model",
      "created": 1776283310,
      "owned_by": "google",
      "tokens": 1000000,
      "pricing": {
        "type": "per_million_tokens",
        "input": 0.5,
        "output": 3
      },
      "endpoints": [
        "/v1/chat/completions",
        "/v1/responses",
        "/v1/messages"
      ],
      "premium_model": true,
      "metadata": {
        "vision": true,
        "function_call": true,
        "web_search": false,
        "reasoning": true
      }
    },
    {
      "name": "Google: Gemini 3 Pro Preview",
      "description": "Gemini 3 Pro is Google's flagship multimodal AI model with a 1M-token context window, excelling across text, image, video, audio, and code. It delivers state-of-the-art performance on benchmarks like LMArena, GPQA Diamond, and SWE-Bench Verified. Designed for advanced development, it offers strong tool-calling, agentic workflows, and zero-shot generation for coding, research synthesis, and multimodal analytics with minimal prompting.",
      "id": "gemini-3-pro-preview",
      "object": "model",
      "created": 1776283310,
      "owned_by": "google",
      "tokens": 1000000,
      "pricing": {
        "type": "per_million_tokens",
        "input": 3,
        "output": 15
      },
      "endpoints": [
        "/v1/chat/completions",
        "/v1/responses",
        "/v1/messages"
      ],
      "premium_model": true,
      "metadata": {
        "vision": true,
        "function_call": true,
        "web_search": false,
        "reasoning": true
      }
    },
    {
      "name": "Google: Gemini 3 Pro Preview Low",
      "description": "Low thinking is ideal for high-throughput tasks where speed is the priority, roughly matching the latency profile of Gemini 2.5 Flash while providing superior response quality. Gemini 3 Pro Preview Low is Google's flagship multimodal AI model with a 1M-token context window, excelling across text, image, video, audio, and code. It delivers state-of-the-art performance on benchmarks like LMArena, GPQA Diamond, and SWE-Bench Verified. Designed for advanced development, it offers strong tool-calling, agentic workflows, and zero-shot generation for coding, research synthesis, and multimodal analytics with minimal prompting.",
      "id": "gemini-3-pro-preview-low",
      "object": "model",
      "created": 1776283310,
      "owned_by": "google",
      "tokens": 1000000,
      "pricing": {
        "type": "per_million_tokens",
        "input": 3,
        "output": 15
      },
      "endpoints": [
        "/v1/chat/completions",
        "/v1/responses",
        "/v1/messages"
      ],
      "premium_model": true,
      "metadata": {
        "vision": true,
        "function_call": true,
        "web_search": false,
        "reasoning": true
      }
    },
    {
      "name": "Google: Gemini 3.1 Flash Lite Preview",
      "description": "Gemini 3.1 Flash Lite Preview is Google's high-efficiency model optimized for high-volume use cases. It outperforms Gemini 2.5 Flash Lite on overall quality and approaches Gemini 2.5 Flash performance across key capabilities. Improvements span audio input/ASR, RAG snippet ranking, translation, data extraction, and code completion. Supports full thinking levels (minimal, low, medium, high) for fine-grained cost/performance trade-offs. Priced at half the cost of Gemini 3 Flash.",
      "id": "gemini-3.1-flash-lite-preview",
      "object": "model",
      "created": 1776283310,
      "owned_by": "google",
      "tokens": 1000000,
      "pricing": {
        "type": "per_million_tokens",
        "input": 0.25,
        "output": 1.5
      },
      "endpoints": [
        "/v1/chat/completions",
        "/v1/responses",
        "/v1/messages"
      ],
      "premium_model": true,
      "metadata": {
        "vision": true,
        "function_call": true,
        "web_search": false,
        "reasoning": true
      }
    },
    {
      "name": "Google: Gemini 3.1 Flash Lite Preview Low",
      "description": "Gemini 3.1 Flash Lite Preview is Google's high-efficiency model optimized for high-volume use cases. It outperforms Gemini 2.5 Flash Lite on overall quality and approaches Gemini 2.5 Flash performance across key capabilities. Improvements span audio input/ASR, RAG snippet ranking, translation, data extraction, and code completion. Supports full thinking levels (minimal, low, medium, high) for fine-grained cost/performance trade-offs. Priced at half the cost of Gemini 3 Flash.",
      "id": "gemini-3.1-flash-lite-preview-low",
      "object": "model",
      "created": 1776283310,
      "owned_by": "google",
      "tokens": 1000000,
      "pricing": {
        "type": "per_million_tokens",
        "input": 0.25,
        "output": 1.5
      },
      "endpoints": [
        "/v1/chat/completions",
        "/v1/responses",
        "/v1/messages"
      ],
      "premium_model": true,
      "metadata": {
        "vision": true,
        "function_call": true,
        "web_search": false,
        "reasoning": true
      }
    },
    {
      "name": "Google: Gemini 3.1 Flash Lite Preview Medium",
      "description": "Gemini 3.1 Flash Lite Preview is Google's high-efficiency model optimized for high-volume use cases. It outperforms Gemini 2.5 Flash Lite on overall quality and approaches Gemini 2.5 Flash performance across key capabilities. Improvements span audio input/ASR, RAG snippet ranking, translation, data extraction, and code completion. Supports full thinking levels (minimal, low, medium, high) for fine-grained cost/performance trade-offs. Priced at half the cost of Gemini 3 Flash.",
      "id": "gemini-3.1-flash-lite-preview-medium",
      "object": "model",
      "created": 1776283310,
      "owned_by": "google",
      "tokens": 1000000,
      "pricing": {
        "type": "per_million_tokens",
        "input": 0.25,
        "output": 1.5
      },
      "endpoints": [
        "/v1/chat/completions",
        "/v1/responses",
        "/v1/messages"
      ],
      "premium_model": true,
      "metadata": {
        "vision": true,
        "function_call": true,
        "web_search": false,
        "reasoning": true
      }
    },
    {
      "name": "Google: Gemini 3.1 Flash Lite Preview High",
      "description": "Gemini 3.1 Flash Lite Preview is Google's high-efficiency model optimized for high-volume use cases. It outperforms Gemini 2.5 Flash Lite on overall quality and approaches Gemini 2.5 Flash performance across key capabilities. Improvements span audio input/ASR, RAG snippet ranking, translation, data extraction, and code completion. Supports full thinking levels (minimal, low, medium, high) for fine-grained cost/performance trade-offs. Priced at half the cost of Gemini 3 Flash.",
      "id": "gemini-3.1-flash-lite-preview-high",
      "object": "model",
      "created": 1776283310,
      "owned_by": "google",
      "tokens": 1000000,
      "pricing": {
        "type": "per_million_tokens",
        "input": 0.25,
        "output": 1.5
      },
      "endpoints": [
        "/v1/chat/completions",
        "/v1/responses",
        "/v1/messages"
      ],
      "premium_model": true,
      "metadata": {
        "vision": true,
        "function_call": true,
        "web_search": false,
        "reasoning": true
      }
    },
    {
      "name": "Google: Gemini 3.1 Pro Preview",
      "description": "Gemini 3.1 Pro Preview is Google’s frontier reasoning model, delivering enhanced software engineering performance, improved agentic reliability, and more efficient token usage across complex workflows. Building on the multimodal foundation of the Gemini 3 series, it combines high-precision reasoning across text, image, video, audio, and code with a 1M-token context window.",
      "id": "gemini-3.1-pro-preview",
      "object": "model",
      "created": 1776283310,
      "owned_by": "google",
      "tokens": 1000000,
      "pricing": {
        "type": "per_million_tokens",
        "input": 3,
        "output": 15
      },
      "endpoints": [
        "/v1/chat/completions",
        "/v1/responses",
        "/v1/messages"
      ],
      "premium_model": true,
      "metadata": {
        "vision": true,
        "function_call": true,
        "web_search": false,
        "reasoning": true
      }
    },
    {
      "name": "Google: Gemini 3.1 Pro Preview Medium",
      "description": "Gemini 3.1 Pro Preview is Google’s frontier reasoning model, delivering enhanced software engineering performance, improved agentic reliability, and more efficient token usage across complex workflows. Building on the multimodal foundation of the Gemini 3 series, it combines high-precision reasoning across text, image, video, audio, and code with a 1M-token context window.",
      "id": "gemini-3.1-pro-preview-medium",
      "object": "model",
      "created": 1776283310,
      "owned_by": "google",
      "tokens": 1000000,
      "pricing": {
        "type": "per_million_tokens",
        "input": 3,
        "output": 15
      },
      "endpoints": [
        "/v1/chat/completions",
        "/v1/responses",
        "/v1/messages"
      ],
      "premium_model": true,
      "metadata": {
        "vision": true,
        "function_call": true,
        "web_search": false,
        "reasoning": true
      }
    },
    {
      "name": "Google: Gemini 3.1 Pro Preview Low",
      "description": "Gemini 3.1 Pro Preview is Google’s frontier reasoning model, delivering enhanced software engineering performance, improved agentic reliability, and more efficient token usage across complex workflows. Building on the multimodal foundation of the Gemini 3 series, it combines high-precision reasoning across text, image, video, audio, and code with a 1M-token context window.",
      "id": "gemini-3.1-pro-preview-low",
      "object": "model",
      "created": 1776283310,
      "owned_by": "google",
      "tokens": 1000000,
      "pricing": {
        "type": "per_million_tokens",
        "input": 3,
        "output": 15
      },
      "endpoints": [
        "/v1/chat/completions",
        "/v1/responses",
        "/v1/messages"
      ],
      "premium_model": true,
      "metadata": {
        "vision": true,
        "function_call": true,
        "web_search": false,
        "reasoning": true
      }
    },
    {
      "name": "Google: Gemma 7B",
      "description": "Gemma 7B by Google is an advanced, open-source language model that sets a new standard for efficiency and performance in its size class.\nDesigned for a wide variety of tasks, it empowers developers and researchers to build innovative applications, while maintaining accessibility, safety, and cost-effectiveness.",
      "id": "gemma-7b-it",
      "object": "model",
      "created": 1776283310,
      "owned_by": "google",
      "tokens": 8000,
      "pricing": {
        "type": "per_million_tokens",
        "input": 0.1,
        "output": 0.1
      },
      "endpoints": [
        "/v1/chat/completions",
        "/v1/responses",
        "/v1/messages"
      ],
      "premium_model": false,
      "metadata": {
        "vision": false,
        "function_call": true,
        "web_search": false,
        "reasoning": false
      }
    },
    {
      "name": "Google: Gemma 2 2B (free)",
      "description": "Gemma 2 2B by Google is an advanced, open-source language model that sets a new standard for efficiency and performance in its size class.\nDesigned for a wide variety of tasks, it empowers developers and researchers to build innovative applications, while maintaining accessibility, safety, and cost-effectiveness.",
      "id": "gemma-2-2b-it:free",
      "object": "model",
      "created": 1776283310,
      "owned_by": "google",
      "tokens": 8000,
      "pricing": {
        "type": "per_million_tokens",
        "input": 0,
        "output": 0
      },
      "endpoints": [
        "/v1/chat/completions",
        "/v1/responses",
        "/v1/messages"
      ],
      "premium_model": false,
      "metadata": {
        "vision": false,
        "function_call": true,
        "web_search": false,
        "reasoning": false
      }
    },
    {
      "name": "Google: Gemma 2 2B",
      "description": "Gemma 2 2B by Google is an advanced, open-source language model that sets a new standard for efficiency and performance in its size class.\nDesigned for a wide variety of tasks, it empowers developers and researchers to build innovative applications, while maintaining accessibility, safety, and cost-effectiveness.",
      "id": "gemma-2-2b-it",
      "object": "model",
      "created": 1776283310,
      "owned_by": "google",
      "tokens": 8000,
      "pricing": {
        "type": "per_million_tokens",
        "input": 0.05,
        "output": 0.05
      },
      "endpoints": [
        "/v1/chat/completions",
        "/v1/responses",
        "/v1/messages"
      ],
      "premium_model": false,
      "metadata": {
        "vision": false,
        "function_call": true,
        "web_search": false,
        "reasoning": false
      }
    },
    {
      "name": "Google: Gemma 2 9B (free)",
      "description": "Gemma 2 9B by Google is an advanced, open-source language model that sets a new standard for efficiency and performance in its size class.\nDesigned for a wide variety of tasks, it empowers developers and researchers to build innovative applications, while maintaining accessibility, safety, and cost-effectiveness.",
      "id": "gemma-2-9b-it:free",
      "object": "model",
      "created": 1776283310,
      "owned_by": "google",
      "tokens": 8000,
      "pricing": {
        "type": "per_million_tokens",
        "input": 0,
        "output": 0
      },
      "endpoints": [
        "/v1/chat/completions",
        "/v1/responses",
        "/v1/messages"
      ],
      "premium_model": false,
      "metadata": {
        "vision": false,
        "function_call": true,
        "web_search": false,
        "reasoning": false
      }
    },
    {
      "name": "Google: Gemma 2 9B",
      "description": "Gemma 2 9B by Google is an advanced, open-source language model that sets a new standard for efficiency and performance in its size class.\nDesigned for a wide variety of tasks, it empowers developers and researchers to build innovative applications, while maintaining accessibility, safety, and cost-effectiveness.",
      "id": "gemma-2-9b-it",
      "object": "model",
      "created": 1776283310,
      "owned_by": "google",
      "tokens": 8000,
      "pricing": {
        "type": "per_million_tokens",
        "input": 0.12,
        "output": 0.15
      },
      "endpoints": [
        "/v1/chat/completions",
        "/v1/responses",
        "/v1/messages"
      ],
      "premium_model": false,
      "metadata": {
        "vision": false,
        "function_call": true,
        "web_search": false,
        "reasoning": false
      }
    },
    {
      "name": "Google: Gemma 2 27B (free)",
      "description": "Gemma 2 27B by Google is an open model built from the same research and technology used to create the Gemini models.\nGemma models are well-suited for a variety of text generation tasks, including question answering, summarization, and reasoning.",
      "id": "gemma-2-27b-it:free",
      "object": "model",
      "created": 1776283310,
      "owned_by": "google",
      "tokens": 8000,
      "pricing": {
        "type": "per_million_tokens",
        "input": 0,
        "output": 0
      },
      "endpoints": [
        "/v1/chat/completions",
        "/v1/responses",
        "/v1/messages"
      ],
      "premium_model": false,
      "metadata": {
        "vision": false,
        "function_call": true,
        "web_search": false,
        "reasoning": false
      }
    },
    {
      "name": "Google: Gemma 2 27B",
      "description": "Gemma 2 27B by Google is an open model built from the same research and technology used to create the Gemini models.\nGemma models are well-suited for a variety of text generation tasks, including question answering, summarization, and reasoning.",
      "id": "gemma-2-27b-it",
      "object": "model",
      "created": 1776283310,
      "owned_by": "google",
      "tokens": 8000,
      "pricing": {
        "type": "per_million_tokens",
        "input": 0.8,
        "output": 0.8
      },
      "endpoints": [
        "/v1/chat/completions",
        "/v1/responses",
        "/v1/messages"
      ],
      "premium_model": false,
      "metadata": {
        "vision": false,
        "function_call": true,
        "web_search": false,
        "reasoning": false
      }
    },
    {
      "name": "Google: Gemma 3 1B",
      "description": "Gemma 3 1B is the smallest of the new Gemma 3 family. It handles context windows up to 32k tokens, understands over 140 languages, and offers improved math, reasoning, and chat capabilities, including structured outputs and function calling.",
      "id": "gemma-3-1b-it",
      "object": "model",
      "created": 1776283310,
      "owned_by": "google",
      "tokens": 32000,
      "pricing": {
        "type": "per_million_tokens",
        "input": 0.1,
        "output": 0.15
      },
      "endpoints": [
        "/v1/chat/completions",
        "/v1/responses",
        "/v1/messages"
      ],
      "premium_model": false,
      "metadata": {
        "vision": false,
        "function_call": true,
        "web_search": false,
        "reasoning": false
      }
    },
    {
      "name": "Google: Gemma 3 4B",
      "description": "Gemma 3 introduces multimodality, supporting vision-language input and text outputs. It handles context windows up to 128k tokens, understands over 140 languages, and offers improved math, reasoning, and chat capabilities, including structured outputs and function calling.",
      "id": "gemma-3-4b-it",
      "object": "model",
      "created": 1776283310,
      "owned_by": "google",
      "tokens": 128000,
      "pricing": {
        "type": "per_million_tokens",
        "input": 0.15,
        "output": 0.25
      },
      "endpoints": [
        "/v1/chat/completions",
        "/v1/responses",
        "/v1/messages"
      ],
      "premium_model": false,
      "metadata": {
        "vision": false,
        "function_call": true,
        "web_search": false,
        "reasoning": false
      }
    },
    {
      "name": "Google: Gemma 3 12B",
      "description": "Gemma 3 introduces multimodality, supporting vision-language input and text outputs. It handles context windows up to 128k tokens, understands over 140 languages, and offers improved math, reasoning, and chat capabilities, including structured outputs and function calling. Gemma 3 12B is the second largest in the family of Gemma 3 models after Gemma 3 27B.",
      "id": "gemma-3-12b-it",
      "object": "model",
      "created": 1776283310,
      "owned_by": "google",
      "tokens": 128000,
      "pricing": {
        "type": "per_million_tokens",
        "input": 0.35,
        "output": 0.56
      },
      "endpoints": [
        "/v1/chat/completions",
        "/v1/responses",
        "/v1/messages"
      ],
      "premium_model": false,
      "metadata": {
        "vision": false,
        "function_call": true,
        "web_search": false,
        "reasoning": false
      }
    },
    {
      "name": "Google: Gemma 3 27B",
      "description": "Gemma 3 introduces multimodality, supporting vision-language input and text outputs. It handles context windows up to 128k tokens, understands over 140 languages, and offers improved math, reasoning, and chat capabilities, including structured outputs and function calling. Gemma 3 27B is Google's latest open source model, successor to Gemma 2.",
      "id": "gemma-3-27b-it",
      "object": "model",
      "created": 1776283310,
      "owned_by": "google",
      "tokens": 128000,
      "pricing": {
        "type": "per_million_tokens",
        "input": 0.8,
        "output": 0.8
      },
      "endpoints": [
        "/v1/chat/completions",
        "/v1/responses",
        "/v1/messages"
      ],
      "premium_model": false,
      "metadata": {
        "vision": true,
        "function_call": true,
        "web_search": false,
        "reasoning": false
      }
    },
    {
      "name": "Google: Gemma 3n 2B",
      "description": "Gemma 3n E2B IT is a multimodal, instruction-tuned model developed by Google DeepMind, designed to operate efficiently at an effective parameter size of 2B while leveraging a 6B architecture. Based on the MatFormer architecture, it supports nested submodels and modular composition via the Mix-and-Match framework. Gemma 3n models are optimized for low-resource deployment, offering 32K context length and strong multilingual and reasoning performance across common benchmarks. This variant is trained on a diverse corpus including code, math, web, and multimodal data.",
      "id": "gemma-3n-e2b-it",
      "object": "model",
      "created": 1776283310,
      "owned_by": "google",
      "tokens": 8000,
      "pricing": {
        "type": "per_million_tokens",
        "input": 0.1,
        "output": 0.15
      },
      "endpoints": [
        "/v1/chat/completions",
        "/v1/responses",
        "/v1/messages"
      ],
      "premium_model": false,
      "metadata": {
        "vision": false,
        "function_call": true,
        "web_search": false,
        "reasoning": false
      }
    },
    {
      "name": "Google: Gemma 3n 4B",
      "description": "Gemma 3n E4B-it is optimized for efficient execution on mobile and low-resource devices, such as phones, laptops, and tablets. It supports multimodal inputs—including text, visual data, and audio—enabling diverse tasks such as text generation, speech recognition, translation, and image analysis. Leveraging innovations like Per-Layer Embedding (PLE) caching and the MatFormer architecture, Gemma 3n dynamically manages memory usage and computational load by selectively activating model parameters, significantly reducing runtime resource requirements.",
      "id": "gemma-3n-e4b-it",
      "object": "model",
      "created": 1776283310,
      "owned_by": "google",
      "tokens": 8000,
      "pricing": {
        "type": "per_million_tokens",
        "input": 0.35,
        "output": 0.56
      },
      "endpoints": [
        "/v1/chat/completions",
        "/v1/responses",
        "/v1/messages"
      ],
      "premium_model": false,
      "metadata": {
        "vision": false,
        "function_call": true,
        "web_search": false,
        "reasoning": false
      }
    },
    {
      "name": "Google: Gemma 4 26B A4B",
      "description": "Gemma 4 26B A4B IT is an instruction-tuned Mixture-of-Experts (MoE) model from Google DeepMind. Despite 25.2B total parameters, only 3.8B activate per token during inference — delivering near-31B quality at a fraction of the compute cost. Supports multimodal input including text, images, and video (up to 60s at 1fps). Features a 256K token context window, native function calling, configurable thinking/reasoning mode, and structured output support. Released under Apache 2.0.",
      "id": "gemma-4-26b-a4b-it",
      "object": "model",
      "created": 1776283310,
      "owned_by": "google",
      "tokens": 262000,
      "pricing": {
        "type": "per_million_tokens",
        "input": 0.13,
        "output": 0.4
      },
      "endpoints": [
        "/v1/chat/completions",
        "/v1/responses",
        "/v1/messages"
      ],
      "premium_model": false,
      "metadata": {
        "vision": true,
        "function_call": true,
        "web_search": false,
        "reasoning": true
      }
    },
    {
      "name": "Google: Gemma 4 31B",
      "description": "Gemma 4 31B Instruct is Google DeepMind's 30.7B dense multimodal model supporting text and image input with text output. Features a 256K token context window, configurable thinking/reasoning mode, native function calling, and multilingual support across 140+ languages. Strong on coding, reasoning, and document understanding tasks. Apache 2.0 license.",
      "id": "gemma-4-31b-it",
      "object": "model",
      "created": 1776283310,
      "owned_by": "google",
      "tokens": 262000,
      "pricing": {
        "type": "per_million_tokens",
        "input": 0.14,
        "output": 0.4
      },
      "endpoints": [
        "/v1/chat/completions",
        "/v1/responses",
        "/v1/messages"
      ],
      "premium_model": false,
      "metadata": {
        "vision": true,
        "function_call": true,
        "web_search": false,
        "reasoning": true
      }
    },
    {
      "name": "Vertex AI: Gemini 2.5 Flash",
      "description": "Gemini 2.5 Flash is Google's state-of-the-art workhorse model, specifically designed for advanced reasoning, coding, mathematics, and scientific tasks. It includes built-in \"thinking\" capabilities, enabling it to provide responses with greater accuracy and nuanced context handling.",
      "id": "gemini-2.5-flash:vertex-ai",
      "object": "model",
      "created": 1776283310,
      "owned_by": "vertex-ai",
      "tokens": 1000000,
      "pricing": {
        "type": "per_million_tokens",
        "input": 0.3,
        "output": 2.5
      },
      "endpoints": [
        "/v1/chat/completions",
        "/v1/responses",
        "/v1/messages"
      ],
      "premium_model": false,
      "metadata": {
        "vision": true,
        "function_call": true,
        "web_search": false,
        "reasoning": false
      }
    },
    {
      "name": "Vertex AI: Gemini 2.5 Pro",
      "description": "Gemini 2.5 Pro is Google's state-of-the-art AI model designed for advanced reasoning, coding, mathematics, and scientific tasks. It employs “thinking” capabilities, enabling it to reason through responses with enhanced accuracy and nuanced context handling. Gemini 2.5 Pro achieves top-tier performance on multiple benchmarks, including first-place positioning on the LMArena leaderboard, reflecting superior human-preference alignment and complex problem-solving abilities.",
      "id": "gemini-2.5-pro:vertex-ai",
      "object": "model",
      "created": 1776283310,
      "owned_by": "vertex-ai",
      "tokens": 1000000,
      "pricing": {
        "type": "per_million_tokens",
        "input": 1.25,
        "output": 10
      },
      "endpoints": [
        "/v1/chat/completions",
        "/v1/responses",
        "/v1/messages"
      ],
      "premium_model": true,
      "metadata": {
        "vision": true,
        "function_call": true,
        "web_search": false,
        "reasoning": false
      }
    },
    {
      "name": "Vertex AI: Gemini 2.5 Pro (thinking)",
      "description": "Gemini 2.5 Pro is Google's state-of-the-art AI model designed for advanced reasoning, coding, mathematics, and scientific tasks. It employs “thinking” capabilities, enabling it to reason through responses with enhanced accuracy and nuanced context handling. Gemini 2.5 Pro achieves top-tier performance on multiple benchmarks, including first-place positioning on the LMArena leaderboard, reflecting superior human-preference alignment and complex problem-solving abilities.",
      "id": "gemini-2.5-pro-thinking:vertex-ai",
      "object": "model",
      "created": 1776283310,
      "owned_by": "vertex-ai",
      "tokens": 1000000,
      "pricing": {
        "type": "per_million_tokens",
        "input": 1.25,
        "output": 15
      },
      "endpoints": [
        "/v1/chat/completions",
        "/v1/responses",
        "/v1/messages"
      ],
      "premium_model": true,
      "metadata": {
        "vision": true,
        "function_call": true,
        "web_search": false,
        "reasoning": true
      }
    },
    {
      "name": "Vertex AI: Gemini 3 Flash Preview",
      "description": "Gemini 3 Flash Preview is Google's flagship multimodal AI model with a 1M-token context window, excelling across text, image, video, audio, and code. It delivers state-of-the-art performance on benchmarks like LMArena, GPQA Diamond, and SWE-Bench Verified. Designed for advanced development, it offers strong tool-calling, agentic workflows, and zero-shot generation for coding, research synthesis, and multimodal analytics with minimal prompting.",
      "id": "gemini-3-flash-preview:vertex-ai",
      "object": "model",
      "created": 1776283310,
      "owned_by": "vertex-ai",
      "tokens": 1000000,
      "pricing": {
        "type": "per_million_tokens",
        "input": 0.5,
        "output": 3
      },
      "endpoints": [
        "/v1/chat/completions",
        "/v1/responses",
        "/v1/messages"
      ],
      "premium_model": true,
      "metadata": {
        "vision": true,
        "function_call": true,
        "web_search": false,
        "reasoning": true
      }
    },
    {
      "name": "Vertex AI: Gemini 3 Flash Preview Minimal",
      "description": "Gemini 3 Flash Preview Minimal is Google's flagship multimodal AI model with a 1M-token context window, excelling across text, image, video, audio, and code. It delivers state-of-the-art performance on benchmarks like LMArena, GPQA Diamond, and SWE-Bench Verified. Designed for advanced development, it offers strong tool-calling, agentic workflows, and zero-shot generation for coding, research synthesis, and multimodal analytics with minimal prompting.",
      "id": "gemini-3-flash-preview-minimal:vertex-ai",
      "object": "model",
      "created": 1776283310,
      "owned_by": "vertex-ai",
      "tokens": 1000000,
      "pricing": {
        "type": "per_million_tokens",
        "input": 0.5,
        "output": 3
      },
      "endpoints": [
        "/v1/chat/completions",
        "/v1/responses",
        "/v1/messages"
      ],
      "premium_model": true,
      "metadata": {
        "vision": true,
        "function_call": true,
        "web_search": false,
        "reasoning": true
      }
    },
    {
      "name": "Vertex AI: Gemini 3 Flash Preview Low",
      "description": "Gemini 3 Flash Preview Low is Google's flagship multimodal AI model with a 1M-token context window, excelling across text, image, video, audio, and code. It delivers state-of-the-art performance on benchmarks like LMArena, GPQA Diamond, and SWE-Bench Verified. Designed for advanced development, it offers strong tool-calling, agentic workflows, and zero-shot generation for coding, research synthesis, and multimodal analytics with minimal prompting.",
      "id": "gemini-3-flash-preview-low:vertex-ai",
      "object": "model",
      "created": 1776283310,
      "owned_by": "vertex-ai",
      "tokens": 1000000,
      "pricing": {
        "type": "per_million_tokens",
        "input": 0.5,
        "output": 3
      },
      "endpoints": [
        "/v1/chat/completions",
        "/v1/responses",
        "/v1/messages"
      ],
      "premium_model": true,
      "metadata": {
        "vision": true,
        "function_call": true,
        "web_search": false,
        "reasoning": true
      }
    },
    {
      "name": "Vertex AI: Gemini 3 Flash Preview Medium",
      "description": "Gemini 3 Flash Preview Medium is Google's flagship multimodal AI model with a 1M-token context window, excelling across text, image, video, audio, and code. It delivers state-of-the-art performance on benchmarks like LMArena, GPQA Diamond, and SWE-Bench Verified. Designed for advanced development, it offers strong tool-calling, agentic workflows, and zero-shot generation for coding, research synthesis, and multimodal analytics with minimal prompting.",
      "id": "gemini-3-flash-preview-medium:vertex-ai",
      "object": "model",
      "created": 1776283310,
      "owned_by": "vertex-ai",
      "tokens": 1000000,
      "pricing": {
        "type": "per_million_tokens",
        "input": 0.5,
        "output": 3
      },
      "endpoints": [
        "/v1/chat/completions",
        "/v1/responses",
        "/v1/messages"
      ],
      "premium_model": true,
      "metadata": {
        "vision": true,
        "function_call": true,
        "web_search": false,
        "reasoning": true
      }
    },
    {
      "name": "Vertex AI: Gemini 3 Pro Preview",
      "description": "Gemini 3 Pro is Google's flagship multimodal AI model with a 1M-token context window, excelling across text, image, video, audio, and code. It delivers state-of-the-art performance on benchmarks like LMArena, GPQA Diamond, and SWE-Bench Verified. Designed for advanced development, it offers strong tool-calling, agentic workflows, and zero-shot generation for coding, research synthesis, and multimodal analytics with minimal prompting.",
      "id": "gemini-3-pro-preview:vertex-ai",
      "object": "model",
      "created": 1776283310,
      "owned_by": "vertex-ai",
      "tokens": 1000000,
      "pricing": {
        "type": "per_million_tokens",
        "input": 3,
        "output": 15
      },
      "endpoints": [
        "/v1/chat/completions",
        "/v1/responses",
        "/v1/messages"
      ],
      "premium_model": true,
      "metadata": {
        "vision": true,
        "function_call": true,
        "web_search": false,
        "reasoning": true
      }
    },
    {
      "name": "Vertex AI: Gemini 3 Pro Preview Low",
      "description": "Low thinking is ideal for high-throughput tasks where speed is the priority, roughly matching the latency profile of Gemini 2.5 Flash while providing superior response quality. Gemini 3 Pro Preview Low is Google's flagship multimodal AI model with a 1M-token context window, excelling across text, image, video, audio, and code. It delivers state-of-the-art performance on benchmarks like LMArena, GPQA Diamond, and SWE-Bench Verified. Designed for advanced development, it offers strong tool-calling, agentic workflows, and zero-shot generation for coding, research synthesis, and multimodal analytics with minimal prompting.",
      "id": "gemini-3-pro-preview-low:vertex-ai",
      "object": "model",
      "created": 1776283310,
      "owned_by": "vertex-ai",
      "tokens": 1000000,
      "pricing": {
        "type": "per_million_tokens",
        "input": 3,
        "output": 15
      },
      "endpoints": [
        "/v1/chat/completions",
        "/v1/responses",
        "/v1/messages"
      ],
      "premium_model": true,
      "metadata": {
        "vision": true,
        "function_call": true,
        "web_search": false,
        "reasoning": true
      }
    },
    {
      "name": "Meta: Llama 2 7B Chat",
      "description": "A 7 billion parameter language model from Meta, fine tuned for chat completions.",
      "id": "llama-2-7b-chat",
      "object": "model",
      "created": 1776283310,
      "owned_by": "meta",
      "tokens": 8000,
      "pricing": {
        "type": "per_million_tokens",
        "input": 0.15,
        "output": 0.15
      },
      "endpoints": [
        "/v1/chat/completions",
        "/v1/responses",
        "/v1/messages"
      ],
      "premium_model": false,
      "metadata": {
        "vision": false,
        "function_call": true,
        "web_search": false,
        "reasoning": false
      }
    },
    {
      "name": "Meta: Llama 2 13B Chat",
      "description": "A 13 billion parameter language model from Meta, fine tuned for chat completions.",
      "id": "llama-2-13b-chat",
      "object": "model",
      "created": 1776283310,
      "owned_by": "meta",
      "tokens": 4000,
      "pricing": {
        "type": "per_million_tokens",
        "input": 0.3,
        "output": 0.3
      },
      "endpoints": [
        "/v1/chat/completions",
        "/v1/responses",
        "/v1/messages"
      ],
      "premium_model": false,
      "metadata": {
        "vision": false,
        "function_call": true,
        "web_search": false,
        "reasoning": false
      }
    },
    {
      "name": "Meta: Llama 2 70B Chat",
      "description": "The flagship, 70 billion parameter language model from Meta, fine tuned for chat completions. Llama 2 is an auto-regressive language model that uses an optimized transformer architecture. The tuned versions use supervised fine-tuning (SFT) and reinforcement learning with human feedback (RLHF) to align to human preferences for helpfulness and safety.",
      "id": "llama-2-70b-chat",
      "object": "model",
      "created": 1776283310,
      "owned_by": "meta",
      "tokens": 4000,
      "pricing": {
        "type": "per_million_tokens",
        "input": 0.9,
        "output": 0.9
      },
      "endpoints": [
        "/v1/chat/completions",
        "/v1/responses",
        "/v1/messages"
      ],
      "premium_model": false,
      "metadata": {
        "vision": false,
        "function_call": true,
        "web_search": false,
        "reasoning": false
      }
    },
    {
      "name": "Meta: Llama Guard 3 8B",
      "description": "Llama Guard 3 is a Llama-3.1-8B pretrained model, fine-tuned for content safety classification. Similar to previous versions, it can be used to classify content in both LLM inputs (prompt classification) and in LLM responses (response classification). It acts as an LLM – it generates text in its output that indicates whether a given prompt or response is safe or unsafe, and if unsafe, it also lists the content categories violated.\nLlama Guard 3 was aligned to safeguard against the MLCommons standardized hazards taxonomy and designed to support Llama 3.1 capabilities. Specifically, it provides content moderation in 8 languages, and was optimized to support safety and security for search and code interpreter tool calls.",
      "id": "llama-guard-3-8b",
      "object": "model",
      "created": 1776283310,
      "owned_by": "meta",
      "tokens": 8000,
      "pricing": {
        "type": "per_million_tokens",
        "input": 0.2,
        "output": 0.2
      },
      "endpoints": [
        "/v1/chat/completions",
        "/v1/responses",
        "/v1/messages"
      ],
      "premium_model": false,
      "metadata": {
        "vision": false,
        "function_call": true,
        "web_search": false,
        "reasoning": false
      }
    },
    {
      "name": "Meta: Llama Guard 4 12B",
      "description": "Llama Guard 4 is a Llama 4 Scout-derived multimodal pretrained model, fine-tuned for content safety classification. Similar to previous versions, it can be used to classify content in both LLM inputs (prompt classification) and in LLM responses (response classification). It acts as an LLM—generating text in its output that indicates whether a given prompt or response is safe or unsafe, and if unsafe, it also lists the content categories violated.\nLlama Guard 4 was aligned to safeguard against the standardized MLCommons hazards taxonomy and designed to support multimodal Llama 4 capabilities. Specifically, it combines features from previous Llama Guard models, providing content moderation for English and multiple supported languages, along with enhanced capabilities to handle mixed text-and-image prompts, including multiple images. Additionally, Llama Guard 4 is integrated into the Llama Moderations API, extending robust safety classification to text and images.",
      "id": "llama-guard-4-12b",
      "object": "model",
      "created": 1776283310,
      "owned_by": "meta",
      "tokens": 164000,
      "pricing": {
        "type": "per_million_tokens",
        "input": 0.05,
        "output": 0.05
      },
      "endpoints": [
        "/v1/chat/completions",
        "/v1/responses",
        "/v1/messages"
      ],
      "premium_model": false,
      "metadata": {
        "vision": false,
        "function_call": true,
        "web_search": false,
        "reasoning": false
      }
    },
    {
      "name": "Meta: Llama 3 8B",
      "description": "Meta's latest class of model (Llama 3) launched with a variety of sizes & flavors. This 8B instruct-tuned version was optimized for high quality dialogue usecases.",
      "id": "llama-3-8b",
      "object": "model",
      "created": 1776283310,
      "owned_by": "meta",
      "tokens": 8000,
      "pricing": {
        "type": "per_million_tokens",
        "input": 0.05,
        "output": 0.08
      },
      "endpoints": [
        "/v1/chat/completions",
        "/v1/responses",
        "/v1/messages"
      ],
      "premium_model": false,
      "metadata": {
        "vision": false,
        "function_call": true,
        "web_search": false,
        "reasoning": false
      }
    },
    {
      "name": "Meta: Llama 3 70B",
      "description": "Meta's latest class of model (Llama 3) launched with a variety of sizes & flavors. This 70B instruct-tuned version was optimized for high quality dialogue usecases.",
      "id": "llama-3-70b",
      "object": "model",
      "created": 1776283310,
      "owned_by": "meta",
      "tokens": 8000,
      "pricing": {
        "type": "per_million_tokens",
        "input": 0.6,
        "output": 0.8
      },
      "endpoints": [
        "/v1/chat/completions",
        "/v1/responses",
        "/v1/messages"
      ],
      "premium_model": false,
      "metadata": {
        "vision": false,
        "function_call": true,
        "web_search": false,
        "reasoning": false
      }
    },
    {
      "name": "Meta: Llama 3.1 8B",
      "description": "Meta's latest class of model (Llama 3.1) launched with a variety of sizes & flavors. This 8B instruct-tuned version is fast and efficient.\nIt has demonstrated strong performance compared to leading closed-source models in human evaluations.",
      "id": "llama-3.1-8b",
      "object": "model",
      "created": 1776283310,
      "owned_by": "meta",
      "tokens": 128000,
      "pricing": {
        "type": "per_million_tokens",
        "input": 0.03,
        "output": 0.05
      },
      "endpoints": [
        "/v1/chat/completions",
        "/v1/responses",
        "/v1/messages"
      ],
      "premium_model": false,
      "metadata": {
        "vision": false,
        "function_call": true,
        "web_search": false,
        "reasoning": false
      }
    },
    {
      "name": "Meta: Llama 3.1 70B",
      "description": "Meta's latest class of model (Llama 3.1) launched with a variety of sizes & flavors. This 70B instruct-tuned version is optimized for high quality dialogue usecases.",
      "id": "llama-3.1-70b",
      "object": "model",
      "created": 1776283310,
      "owned_by": "meta",
      "tokens": 128000,
      "pricing": {
        "type": "per_million_tokens",
        "input": 0.15,
        "output": 0.3
      },
      "endpoints": [
        "/v1/chat/completions",
        "/v1/responses",
        "/v1/messages"
      ],
      "premium_model": false,
      "metadata": {
        "vision": false,
        "function_call": true,
        "web_search": false,
        "reasoning": false
      }
    },
    {
      "name": "Meta: Llama 3.1 405B",
      "description": "The highly anticipated 400B class of Llama3 is here! Clocking in at 128k context with impressive eval scores, the Meta AI team continues to push the frontier of open-source LLMs.\nMeta's latest class of model (Llama 3.1) launched with a variety of sizes & flavors. This 405B instruct-tuned version is optimized for high quality dialogue usecases.\nIt has demonstrated strong performance compared to leading closed-source models including GPT-4o and Claude 3.5 Sonnet in evaluations.",
      "id": "llama-3.1-405b",
      "object": "model",
      "created": 1776283310,
      "owned_by": "meta",
      "tokens": 128000,
      "pricing": {
        "type": "per_million_tokens",
        "input": 1,
        "output": 3
      },
      "endpoints": [
        "/v1/chat/completions",
        "/v1/responses",
        "/v1/messages"
      ],
      "premium_model": false,
      "metadata": {
        "vision": false,
        "function_call": true,
        "web_search": false,
        "reasoning": false
      }
    },
    {
      "name": "Meta: Llama 3.2 1B (free)",
      "description": "Llama 3.2 1B is a 1-billion-parameter language model focused on efficiently performing natural language tasks, such as summarization, dialogue, and multilingual text analysis. Its smaller size allows it to operate efficiently in low-resource environments while maintaining strong task performance.\nSupporting eight core languages and fine-tunable for more, Llama 1.3B is ideal for businesses or developers seeking lightweight yet powerful AI solutions that can operate in diverse multilingual settings without the high computational demand of larger models.",
      "id": "llama-3.2-1b:free",
      "object": "model",
      "created": 1776283310,
      "owned_by": "meta",
      "tokens": 64000,
      "pricing": {
        "type": "per_million_tokens",
        "input": 0,
        "output": 0
      },
      "endpoints": [
        "/v1/chat/completions",
        "/v1/responses",
        "/v1/messages"
      ],
      "premium_model": false,
      "metadata": {
        "vision": false,
        "function_call": true,
        "web_search": false,
        "reasoning": false
      }
    },
    {
      "name": "Meta: Llama 3.2 1B",
      "description": "Llama 3.2 1B is a 1-billion-parameter language model focused on efficiently performing natural language tasks, such as summarization, dialogue, and multilingual text analysis. Its smaller size allows it to operate efficiently in low-resource environments while maintaining strong task performance.\nSupporting eight core languages and fine-tunable for more, Llama 1.3B is ideal for businesses or developers seeking lightweight yet powerful AI solutions that can operate in diverse multilingual settings without the high computational demand of larger models.",
      "id": "llama-3.2-1b",
      "object": "model",
      "created": 1776283310,
      "owned_by": "meta",
      "tokens": 128000,
      "pricing": {
        "type": "per_million_tokens",
        "input": 0.01,
        "output": 0.02
      },
      "endpoints": [
        "/v1/chat/completions",
        "/v1/responses",
        "/v1/messages"
      ],
      "premium_model": false,
      "metadata": {
        "vision": false,
        "function_call": true,
        "web_search": false,
        "reasoning": false
      }
    },
    {
      "name": "Meta: Llama 3.2 3B (free)",
      "description": "Llama 3.2 3B is a 3-billion-parameter multilingual large language model, optimized for advanced natural language processing tasks like dialogue generation, reasoning, and summarization. Designed with the latest transformer architecture, it supports eight languages, including English, Spanish, and Hindi, and is adaptable for additional languages.\nTrained on 9 trillion tokens, the Llama 3.2 3B model excels in instruction-following, complex reasoning, and tool use. Its balanced performance makes it ideal for applications needing accuracy and efficiency in text generation across multilingual settings.",
      "id": "llama-3.2-3b:free",
      "object": "model",
      "created": 1776283310,
      "owned_by": "meta",
      "tokens": 64000,
      "pricing": {
        "type": "per_million_tokens",
        "input": 0,
        "output": 0
      },
      "endpoints": [
        "/v1/chat/completions",
        "/v1/responses",
        "/v1/messages"
      ],
      "premium_model": false,
      "metadata": {
        "vision": false,
        "function_call": true,
        "web_search": false,
        "reasoning": false
      }
    },
    {
      "name": "Meta: Llama 3.2 3B",
      "description": "Llama 3.2 3B is a 3-billion-parameter multilingual large language model, optimized for advanced natural language processing tasks like dialogue generation, reasoning, and summarization. Designed with the latest transformer architecture, it supports eight languages, including English, Spanish, and Hindi, and is adaptable for additional languages.\nTrained on 9 trillion tokens, the Llama 3.2 3B model excels in instruction-following, complex reasoning, and tool use. Its balanced performance makes it ideal for applications needing accuracy and efficiency in text generation across multilingual settings.",
      "id": "llama-3.2-3b",
      "object": "model",
      "created": 1776283310,
      "owned_by": "meta",
      "tokens": 128000,
      "pricing": {
        "type": "per_million_tokens",
        "input": 0.03,
        "output": 0.05
      },
      "endpoints": [
        "/v1/chat/completions",
        "/v1/responses",
        "/v1/messages"
      ],
      "premium_model": false,
      "metadata": {
        "vision": false,
        "function_call": true,
        "web_search": false,
        "reasoning": false
      }
    },
    {
      "name": "Meta: Llama 3.2 11B",
      "description": "Llama 3.2 11B is a multimodal model with 11 billion parameters, designed to handle tasks combining visual and textual data. It excels in tasks such as image captioning and visual question answering, bridging the gap between language generation and visual reasoning. Pre-trained on a massive dataset of image-text pairs, it performs well in complex, high-accuracy image analysis.\nIts ability to integrate visual understanding with language processing makes it an ideal solution for industries requiring comprehensive visual-linguistic AI applications, such as content creation, AI-driven customer service, and research.",
      "id": "llama-3.2-11b",
      "object": "model",
      "created": 1776283310,
      "owned_by": "meta",
      "tokens": 128000,
      "pricing": {
        "type": "per_million_tokens",
        "input": 0.06,
        "output": 0.1
      },
      "endpoints": [
        "/v1/chat/completions",
        "/v1/responses",
        "/v1/messages"
      ],
      "premium_model": false,
      "metadata": {
        "vision": true,
        "function_call": true,
        "web_search": false,
        "reasoning": false
      }
    },
    {
      "name": "Meta: Llama 3.2 90B",
      "description": "The Llama 3.2 90B model is a top-tier, 90-billion-parameter multimodal model designed for the most challenging visual reasoning and language tasks. It offers unparalleled accuracy in image captioning, visual question answering, and advanced image-text comprehension. Pre-trained on vast multimodal datasets and fine-tuned with human feedback, the Llama 90B Vision is engineered to handle the most demanding image-based AI tasks.\nThis model is perfect for industries requiring cutting-edge multimodal AI capabilities, particularly those dealing with complex, real-time visual and textual analysis.",
      "id": "llama-3.2-90b",
      "object": "model",
      "created": 1776283310,
      "owned_by": "meta",
      "tokens": 128000,
      "pricing": {
        "type": "per_million_tokens",
        "input": 0.9,
        "output": 1.2
      },
      "endpoints": [
        "/v1/chat/completions",
        "/v1/responses",
        "/v1/messages"
      ],
      "premium_model": false,
      "metadata": {
        "vision": true,
        "function_call": true,
        "web_search": false,
        "reasoning": false
      }
    },
    {
      "name": "Meta: Llama 3.3 8B Instruct",
      "description": "A lightweight and ultra-fast variant of Llama 3.3 70B, for use when quick response times are needed most.",
      "id": "llama-3.3-8b-instruct",
      "object": "model",
      "created": 1776283310,
      "owned_by": "meta",
      "tokens": 12800,
      "pricing": {
        "type": "per_million_tokens",
        "input": 0.06,
        "output": 0.2
      },
      "endpoints": [
        "/v1/chat/completions",
        "/v1/responses",
        "/v1/messages"
      ],
      "premium_model": false,
      "metadata": {
        "vision": false,
        "function_call": true,
        "web_search": false,
        "reasoning": false
      }
    },
    {
      "name": "Meta: Llama 3.3 70B Instruct (free)",
      "description": "The Meta Llama 3.3 multilingual large language model (LLM) is a pretrained and instruction tuned generative model in 70B (text in/text out). The Llama 3.3 instruction tuned text only model is optimized for multilingual dialogue use cases and outperforms many of the available open source and closed chat models on common industry benchmarks.\nSupported languages: English, German, French, Italian, Portuguese, Hindi, Spanish, and Thai.",
      "id": "llama-3.3-70b-instruct:free",
      "object": "model",
      "created": 1776283310,
      "owned_by": "meta",
      "tokens": 64000,
      "pricing": {
        "type": "per_million_tokens",
        "input": 0,
        "output": 0
      },
      "endpoints": [
        "/v1/chat/completions",
        "/v1/responses",
        "/v1/messages"
      ],
      "premium_model": false,
      "metadata": {
        "vision": false,
        "function_call": true,
        "web_search": false,
        "reasoning": false
      }
    },
    {
      "name": "Meta: Llama 3.3 70B Instruct",
      "description": "The Meta Llama 3.3 multilingual large language model (LLM) is a pretrained and instruction tuned generative model in 70B (text in/text out). The Llama 3.3 instruction tuned text only model is optimized for multilingual dialogue use cases and outperforms many of the available open source and closed chat models on common industry benchmarks.\nSupported languages: English, German, French, Italian, Portuguese, Hindi, Spanish, and Thai.",
      "id": "llama-3.3-70b-instruct",
      "object": "model",
      "created": 1776283310,
      "owned_by": "meta",
      "tokens": 128000,
      "pricing": {
        "type": "per_million_tokens",
        "input": 0.12,
        "output": 0.4
      },
      "endpoints": [
        "/v1/chat/completions",
        "/v1/responses",
        "/v1/messages"
      ],
      "premium_model": false,
      "metadata": {
        "vision": false,
        "function_call": true,
        "web_search": false,
        "reasoning": false
      }
    },
    {
      "name": "Meta: Llama 4 Scout (free)",
      "description": "Llama 4 Scout 17B Instruct (16E) is a mixture-of-experts (MoE) language model developed by Meta, activating 17 billion parameters out of a total of 109B. It supports native multimodal input (text and image) and multilingual output (text and code) across 12 supported languages. Designed for assistant-style interaction and visual reasoning, Scout uses 16 experts per forward pass and features a context length of 10 million tokens, with a training corpus of ~40 trillion tokens.\nBuilt for high efficiency and local or commercial deployment, Llama 4 Scout incorporates early fusion for seamless modality integration. It is instruction-tuned for use in multilingual chat, captioning, and image understanding tasks. Released under the Llama 4 Community License, it was last trained on data up to August 2024 and launched publicly on April 5, 2025.",
      "id": "llama-4-scout-17b-16e-instruct:free",
      "object": "model",
      "created": 1776283310,
      "owned_by": "meta",
      "tokens": 64000,
      "pricing": {
        "type": "per_million_tokens",
        "input": 0,
        "output": 0
      },
      "endpoints": [
        "/v1/chat/completions",
        "/v1/responses",
        "/v1/messages"
      ],
      "premium_model": false,
      "metadata": {
        "vision": false,
        "function_call": true,
        "web_search": false,
        "reasoning": false
      }
    },
    {
      "name": "Meta: Llama 4 Scout",
      "description": "Llama 4 Scout 17B Instruct (16E) is a mixture-of-experts (MoE) language model developed by Meta, activating 17 billion parameters out of a total of 109B. It supports native multimodal input (text and image) and multilingual output (text and code) across 12 supported languages. Designed for assistant-style interaction and visual reasoning, Scout uses 16 experts per forward pass and features a context length of 10 million tokens, with a training corpus of ~40 trillion tokens.\nBuilt for high efficiency and local or commercial deployment, Llama 4 Scout incorporates early fusion for seamless modality integration. It is instruction-tuned for use in multilingual chat, captioning, and image understanding tasks. Released under the Llama 4 Community License, it was last trained on data up to August 2024 and launched publicly on April 5, 2025.",
      "id": "llama-4-scout-17b-16e-instruct",
      "object": "model",
      "created": 1776283310,
      "owned_by": "meta",
      "tokens": 128000,
      "pricing": {
        "type": "per_million_tokens",
        "input": 0.1,
        "output": 0.5
      },
      "endpoints": [
        "/v1/chat/completions",
        "/v1/responses",
        "/v1/messages"
      ],
      "premium_model": false,
      "metadata": {
        "vision": true,
        "function_call": true,
        "web_search": false,
        "reasoning": false
      }
    },
    {
      "name": "Meta: Llama 4 Maverick (free)",
      "description": "Llama 4 Maverick 17B Instruct (128E) is a high-capacity multimodal language model from Meta, built on a mixture-of-experts (MoE) architecture with 128 experts and 17 billion active parameters per forward pass (400B total). It supports multilingual text and image input, and produces multilingual text and code output across 12 supported languages. Optimized for vision-language tasks, Maverick is instruction-tuned for assistant-like behavior, image reasoning, and general-purpose multimodal interaction.\nMaverick features early fusion for native multimodality and a 1 million token context window. It was trained on a curated mixture of public, licensed, and Meta-platform data, covering ~22 trillion tokens, with a knowledge cutoff in August 2024. Released on April 5, 2025 under the Llama 4 Community License, Maverick is suited for research and commercial applications requiring advanced multimodal understanding and high model throughput.",
      "id": "llama-4-maverick-17b-128e-instruct:free",
      "object": "model",
      "created": 1776283310,
      "owned_by": "meta",
      "tokens": 64000,
      "pricing": {
        "type": "per_million_tokens",
        "input": 0,
        "output": 0
      },
      "endpoints": [
        "/v1/chat/completions",
        "/v1/responses",
        "/v1/messages"
      ],
      "premium_model": false,
      "metadata": {
        "vision": false,
        "function_call": true,
        "web_search": false,
        "reasoning": false
      }
    },
    {
      "name": "Meta: Llama 4 Maverick",
      "description": "Llama 4 Maverick 17B Instruct (128E) is a high-capacity multimodal language model from Meta, built on a mixture-of-experts (MoE) architecture with 128 experts and 17 billion active parameters per forward pass (400B total). It supports multilingual text and image input, and produces multilingual text and code output across 12 supported languages. Optimized for vision-language tasks, Maverick is instruction-tuned for assistant-like behavior, image reasoning, and general-purpose multimodal interaction.\nMaverick features early fusion for native multimodality and a 1 million token context window. It was trained on a curated mixture of public, licensed, and Meta-platform data, covering ~22 trillion tokens, with a knowledge cutoff in August 2024. Released on April 5, 2025 under the Llama 4 Community License, Maverick is suited for research and commercial applications requiring advanced multimodal understanding and high model throughput.",
      "id": "llama-4-maverick-17b-128e-instruct",
      "object": "model",
      "created": 1776283310,
      "owned_by": "meta",
      "tokens": 128000,
      "pricing": {
        "type": "per_million_tokens",
        "input": 0.2,
        "output": 0.8
      },
      "endpoints": [
        "/v1/chat/completions",
        "/v1/responses",
        "/v1/messages"
      ],
      "premium_model": false,
      "metadata": {
        "vision": true,
        "function_call": true,
        "web_search": false,
        "reasoning": false
      }
    },
    {
      "name": "NVIDIA: Nemotron 4 Mini Hindi 4B Instruct",
      "description": "Nemotron-4-Mini-Hindi-4B-Instruct is a chat model for generating responses for chat application and retrieval augmented generation in Hindi. It is a aligned version of Nemotron-4-Mini-Hindi-4B-Base . It is a small language model (SLM) optimized through distillation, pruning, and quantization for speed and on-device deployment. VRAM usage has been minimized to approximately 2 GB, providing significantly faster time to first token compared to LLMs.",
      "id": "nemotron-4-mini-hindi-4b-instruct",
      "object": "model",
      "created": 1776283310,
      "owned_by": "nvidia",
      "tokens": 4000,
      "pricing": {
        "type": "per_million_tokens",
        "input": 0.005,
        "output": 0.02
      },
      "endpoints": [
        "/v1/chat/completions",
        "/v1/responses",
        "/v1/messages"
      ],
      "premium_model": false,
      "metadata": {
        "vision": false,
        "function_call": true,
        "web_search": false,
        "reasoning": false
      }
    },
    {
      "name": "NVIDIA: Nemotron Nano 9B v2 (free)",
      "description": "NVIDIA-Nemotron-Nano-9B-v2 is a large language model (LLM) trained from scratch by NVIDIA, and designed as a unified model for both reasoning and non-reasoning tasks. It responds to user queries and tasks by first generating a reasoning trace and then concluding with a final response.\nThe model's reasoning capabilities can be controlled via a system prompt. If the user prefers the model to provide its final answer without intermediate reasoning traces, it can be configured to do so.",
      "id": "nemotron-nano-9b-v2:free",
      "object": "model",
      "created": 1776283310,
      "owned_by": "nvidia",
      "tokens": 64000,
      "pricing": {
        "type": "per_million_tokens",
        "input": 0,
        "output": 0
      },
      "endpoints": [
        "/v1/chat/completions",
        "/v1/responses",
        "/v1/messages"
      ],
      "premium_model": false,
      "metadata": {
        "vision": false,
        "function_call": true,
        "web_search": false,
        "reasoning": true
      }
    },
    {
      "name": "NVIDIA: Nemotron Nano 9B v2",
      "description": "NVIDIA-Nemotron-Nano-9B-v2 is a large language model (LLM) trained from scratch by NVIDIA, and designed as a unified model for both reasoning and non-reasoning tasks. It responds to user queries and tasks by first generating a reasoning trace and then concluding with a final response.\nThe model's reasoning capabilities can be controlled via a system prompt. If the user prefers the model to provide its final answer without intermediate reasoning traces, it can be configured to do so.",
      "id": "nemotron-nano-9b-v2",
      "object": "model",
      "created": 1776283310,
      "owned_by": "nvidia",
      "tokens": 131000,
      "pricing": {
        "type": "per_million_tokens",
        "input": 0.04,
        "output": 0.16
      },
      "endpoints": [
        "/v1/chat/completions",
        "/v1/responses",
        "/v1/messages"
      ],
      "premium_model": false,
      "metadata": {
        "vision": false,
        "function_call": true,
        "web_search": false,
        "reasoning": true
      }
    },
    {
      "name": "NVIDIA: Nemotron Nano 12B v2 VL (free)",
      "description": "NVIDIA Nemotron Nano 12B v2 VL model enables multi-image reasoning and video understanding, along with strong document intelligence, visual Q&A and summarization capabilities.",
      "id": "nemotron-nano-12b-v2-vl:free",
      "object": "model",
      "created": 1776283310,
      "owned_by": "nvidia",
      "tokens": 64000,
      "pricing": {
        "type": "per_million_tokens",
        "input": 0,
        "output": 0
      },
      "endpoints": [
        "/v1/chat/completions",
        "/v1/responses",
        "/v1/messages"
      ],
      "premium_model": false,
      "metadata": {
        "vision": true,
        "function_call": true,
        "web_search": false,
        "reasoning": true
      }
    },
    {
      "name": "NVIDIA: Nemotron Nano 12B v2 VL",
      "description": "NVIDIA Nemotron Nano 12B v2 VL model enables multi-image reasoning and video understanding, along with strong document intelligence, visual Q&A and summarization capabilities.",
      "id": "nemotron-nano-12b-v2-vl",
      "object": "model",
      "created": 1776283310,
      "owned_by": "nvidia",
      "tokens": 131000,
      "pricing": {
        "type": "per_million_tokens",
        "input": 0.2,
        "output": 0.6
      },
      "endpoints": [
        "/v1/chat/completions",
        "/v1/responses",
        "/v1/messages"
      ],
      "premium_model": false,
      "metadata": {
        "vision": true,
        "function_call": true,
        "web_search": false,
        "reasoning": true
      }
    },
    {
      "name": "NVIDIA: Nemotron 3 Nano 30B A3B (free)",
      "description": "NVIDIA Nemotron 3 Nano 30B A3B is a small language MoE model with highest compute efficiency and accuracy for developers to build specialized agentic AI systems.",
      "id": "nemotron-3-nano-30b-a3b:free",
      "object": "model",
      "created": 1776283310,
      "owned_by": "nvidia",
      "tokens": 64000,
      "pricing": {
        "type": "per_million_tokens",
        "input": 0,
        "output": 0
      },
      "endpoints": [
        "/v1/chat/completions",
        "/v1/responses",
        "/v1/messages"
      ],
      "premium_model": false,
      "metadata": {
        "vision": false,
        "function_call": true,
        "web_search": false,
        "reasoning": true
      }
    },
    {
      "name": "NVIDIA: Nemotron 3 Nano 30B A3B",
      "description": "NVIDIA Nemotron 3 Nano 30B A3B is a small language MoE model with highest compute efficiency and accuracy for developers to build specialized agentic AI systems.",
      "id": "nemotron-3-nano-30b-a3b",
      "object": "model",
      "created": 1776283310,
      "owned_by": "nvidia",
      "tokens": 256000,
      "pricing": {
        "type": "per_million_tokens",
        "input": 0.3,
        "output": 0.9
      },
      "endpoints": [
        "/v1/chat/completions",
        "/v1/responses",
        "/v1/messages"
      ],
      "premium_model": false,
      "metadata": {
        "vision": false,
        "function_call": true,
        "web_search": false,
        "reasoning": true
      }
    },
    {
      "name": "NVIDIA: Llama 3.1 Nemotron 70B Instruct",
      "description": "NVIDIA's Llama 3.1 Nemotron 70B is a language model designed for generating precise and useful responses. Leveraging Llama 3.1 70B architecture and Reinforcement Learning from Human Feedback (RLHF), it excels in automatic alignment benchmarks. This model is tailored for applications requiring high accuracy in helpfulness and response generation, suitable for diverse user queries across multiple domains.",
      "id": "llama-3.1-nemotron-70b-instruct",
      "object": "model",
      "created": 1776283310,
      "owned_by": "nvidia",
      "tokens": 131000,
      "pricing": {
        "type": "per_million_tokens",
        "input": 0.12,
        "output": 0.3
      },
      "endpoints": [
        "/v1/chat/completions",
        "/v1/responses",
        "/v1/messages"
      ],
      "premium_model": false,
      "metadata": {
        "vision": false,
        "function_call": true,
        "web_search": false,
        "reasoning": false
      }
    },
    {
      "name": "NVIDIA: Llama 3.3 Nemotron Super 49B v1.5",
      "description": "Llama-3.3-Nemotron-Super-49B-v1.5 is a large language model (LLM) optimized for advanced reasoning, conversational interactions, retrieval-augmented generation (RAG), and tool-calling tasks. Derived from Meta's Llama-3.3-70B-Instruct, it employs a Neural Architecture Search (NAS) approach, significantly enhancing efficiency and reducing memory requirements. This allows the model to support a context length of up to 128K tokens and fit efficiently on single high-performance GPUs, such as NVIDIA H200.",
      "id": "llama-3.3-nemotron-super-49b-v1.5",
      "object": "model",
      "created": 1776283310,
      "owned_by": "nvidia",
      "tokens": 131000,
      "pricing": {
        "type": "per_million_tokens",
        "input": 1,
        "output": 2
      },
      "endpoints": [
        "/v1/chat/completions",
        "/v1/responses",
        "/v1/messages"
      ],
      "premium_model": false,
      "metadata": {
        "vision": false,
        "function_call": true,
        "web_search": false,
        "reasoning": true
      }
    },
    {
      "name": "NVIDIA: Llama 3.1 Nemotron Ultra 253B v1",
      "description": "Llama-3.1-Nemotron-Ultra-253B-v1 is a large language model (LLM) optimized for advanced reasoning, human-interactive chat, retrieval-augmented generation (RAG), and tool-calling tasks. Derived from Meta’s Llama-3.1-405B-Instruct, it has been significantly customized using Neural Architecture Search (NAS), resulting in enhanced efficiency, reduced memory usage, and improved inference latency. The model supports a context length of up to 128K tokens and can operate efficiently on an 8x NVIDIA H100 node.",
      "id": "llama-3.1-nemotron-ultra-253b-v1",
      "object": "model",
      "created": 1776283310,
      "owned_by": "nvidia",
      "tokens": 131000,
      "pricing": {
        "type": "per_million_tokens",
        "input": 1.75,
        "output": 3.5
      },
      "endpoints": [
        "/v1/chat/completions",
        "/v1/responses",
        "/v1/messages"
      ],
      "premium_model": false,
      "metadata": {
        "vision": false,
        "function_call": true,
        "web_search": false,
        "reasoning": false
      }
    },
    {
      "name": "AllenAI: Olmo 2 32B Instruct",
      "description": "OLMo-2 32B Instruct is a supervised instruction-finetuned variant of the OLMo-2 32B March 2025 base model. It excels in complex reasoning and instruction-following tasks across diverse benchmarks such as GSM8K, MATH, IFEval, and general NLP evaluation. Developed by AI2, OLMo-2 32B is part of an open, research-oriented initiative, trained primarily on English-language datasets to advance the understanding and development of open-source language models.",
      "id": "olmo-2-0325-32b-instruct",
      "object": "model",
      "created": 1776283310,
      "owned_by": "allenai",
      "tokens": 128000,
      "pricing": {
        "type": "per_million_tokens",
        "input": 0.05,
        "output": 0.2
      },
      "endpoints": [
        "/v1/chat/completions",
        "/v1/responses",
        "/v1/messages"
      ],
      "premium_model": false,
      "metadata": {
        "vision": false,
        "function_call": true,
        "web_search": false,
        "reasoning": false
      }
    },
    {
      "name": "DeepSeek: R1 0528",
      "description": "May 28th update to the original DeepSeek R1 Performance on par with OpenAI o1, but open-sourced and with fully open reasoning tokens. It's 671B parameters in size, with 37B active in an inference pass.",
      "id": "deepseek-r1-0528",
      "object": "model",
      "created": 1776283310,
      "owned_by": "deepseekai",
      "tokens": 164000,
      "pricing": {
        "type": "per_million_tokens",
        "input": 0.7,
        "output": 2.5
      },
      "endpoints": [
        "/v1/chat/completions",
        "/v1/responses",
        "/v1/messages"
      ],
      "premium_model": true,
      "metadata": {
        "vision": false,
        "function_call": true,
        "web_search": false,
        "reasoning": true
      }
    },
    {
      "name": "DeepSeek: R1",
      "description": "DeepSeek R1 is here: Performance on par with OpenAI o1, but open-sourced and with fully open reasoning tokens. It's 671B parameters in size, with 37B active in an inference pass.",
      "id": "deepseek-r1",
      "object": "model",
      "created": 1776283310,
      "owned_by": "deepseekai",
      "tokens": 164000,
      "pricing": {
        "type": "per_million_tokens",
        "input": 0.5,
        "output": 3
      },
      "endpoints": [
        "/v1/chat/completions",
        "/v1/responses",
        "/v1/messages"
      ],
      "premium_model": true,
      "metadata": {
        "vision": false,
        "function_call": true,
        "web_search": false,
        "reasoning": true
      }
    },
    {
      "name": "DeepSeek: R1 0528 Qwen3 8B",
      "description": "DeepSeek-R1-0528 is a lightly upgraded release of DeepSeek R1 that taps more compute and smarter post-training tricks, pushing its reasoning and inference to the brink of flagship models like O3 and Gemini 2.5 Pro.\nIt now tops math, programming, and logic leaderboards, showcasing a step-change in depth-of-thought.\nThe distilled variant, DeepSeek-R1-0528-Qwen3-8B, transfers this chain-of-thought into an 8 B-parameter form, beating standard Qwen3 8B by +10 pp and tying the 235 B “thinking” giant on AIME 2024.",
      "id": "deepseek-r1-0528-qwen3-8b",
      "object": "model",
      "created": 1776283310,
      "owned_by": "deepseekai",
      "tokens": 128000,
      "pricing": {
        "type": "per_million_tokens",
        "input": 0.06,
        "output": 0.09
      },
      "endpoints": [
        "/v1/chat/completions",
        "/v1/responses",
        "/v1/messages"
      ],
      "premium_model": false,
      "metadata": {
        "vision": false,
        "function_call": true,
        "web_search": false,
        "reasoning": true
      }
    },
    {
      "name": "DeepSeek: R1 Distill Llama 8B (free)",
      "description": "DeepSeek R1 Distill Llama 8B is a distilled large language model based on Llama-3.1-8B-Instruct, using outputs from DeepSeek R1.",
      "id": "deepseek-r1-distill-llama-8b:free",
      "object": "model",
      "created": 1776283310,
      "owned_by": "deepseekai",
      "tokens": 64000,
      "pricing": {
        "type": "per_million_tokens",
        "input": 0,
        "output": 0
      },
      "endpoints": [
        "/v1/chat/completions",
        "/v1/responses",
        "/v1/messages"
      ],
      "premium_model": false,
      "metadata": {
        "vision": false,
        "function_call": true,
        "web_search": false,
        "reasoning": true
      }
    },
    {
      "name": "DeepSeek: R1 Distill Llama 8B",
      "description": "DeepSeek R1 Distill Llama 8B is a distilled large language model based on Llama-3.1-8B-Instruct, using outputs from DeepSeek R1.",
      "id": "deepseek-r1-distill-llama-8b",
      "object": "model",
      "created": 1776283310,
      "owned_by": "deepseekai",
      "tokens": 131000,
      "pricing": {
        "type": "per_million_tokens",
        "input": 0.04,
        "output": 0.04
      },
      "endpoints": [
        "/v1/chat/completions",
        "/v1/responses",
        "/v1/messages"
      ],
      "premium_model": false,
      "metadata": {
        "vision": false,
        "function_call": true,
        "web_search": false,
        "reasoning": true
      }
    },
    {
      "name": "DeepSeek: R1 Distill Llama 70B",
      "description": "DeepSeek R1 Distill Llama 70B is a distilled large language model based on Llama-3.3-70B-Instruct, using outputs from DeepSeek R1.",
      "id": "deepseek-r1-distill-llama-70b",
      "object": "model",
      "created": 1776283310,
      "owned_by": "deepseekai",
      "tokens": 131000,
      "pricing": {
        "type": "per_million_tokens",
        "input": 0.1,
        "output": 0.4
      },
      "endpoints": [
        "/v1/chat/completions",
        "/v1/responses",
        "/v1/messages"
      ],
      "premium_model": false,
      "metadata": {
        "vision": false,
        "function_call": true,
        "web_search": false,
        "reasoning": true
      }
    },
    {
      "name": "DeepSeek: R1 Distill Qwen 7B",
      "description": "DeepSeek R1 Distill Qwen 7B is a distilled large language model based on Qwen 2.5 Math 7B, using outputs from DeepSeek R1.",
      "id": "deepseek-r1-distill-qwen-7b",
      "object": "model",
      "created": 1776283310,
      "owned_by": "deepseekai",
      "tokens": 131000,
      "pricing": {
        "type": "per_million_tokens",
        "input": 0.4,
        "output": 0.4
      },
      "endpoints": [
        "/v1/chat/completions",
        "/v1/responses",
        "/v1/messages"
      ],
      "premium_model": false,
      "metadata": {
        "vision": false,
        "function_call": true,
        "web_search": false,
        "reasoning": true
      }
    },
    {
      "name": "DeepSeek: R1 Distill Qwen 32B (free)",
      "description": "DeepSeek R1 Distill Qwen 32B is a distilled large language model based on Qwen 2.5 32B, using outputs from DeepSeek R1.",
      "id": "deepseek-r1-distill-qwen-32b:free",
      "object": "model",
      "created": 1776283310,
      "owned_by": "deepseekai",
      "tokens": 64000,
      "pricing": {
        "type": "per_million_tokens",
        "input": 0,
        "output": 0
      },
      "endpoints": [
        "/v1/chat/completions",
        "/v1/responses",
        "/v1/messages"
      ],
      "premium_model": false,
      "metadata": {
        "vision": false,
        "function_call": true,
        "web_search": false,
        "reasoning": true
      }
    },
    {
      "name": "DeepSeek: R1 Distill Qwen 32B",
      "description": "DeepSeek R1 Distill Qwen 32B is a distilled large language model based on Qwen 2.5 32B, using outputs from DeepSeek R1. It outperforms OpenAI's o1-mini across various benchmarks, achieving new state-of-the-art results for dense models.",
      "id": "deepseek-r1-distill-qwen-32b",
      "object": "model",
      "created": 1776283310,
      "owned_by": "deepseekai",
      "tokens": 131000,
      "pricing": {
        "type": "per_million_tokens",
        "input": 1.2,
        "output": 1.2
      },
      "endpoints": [
        "/v1/chat/completions",
        "/v1/responses",
        "/v1/messages"
      ],
      "premium_model": false,
      "metadata": {
        "vision": false,
        "function_call": true,
        "web_search": false,
        "reasoning": true
      }
    },
    {
      "name": "DeepSeek: DeepSeek V3.1 (free)",
      "description": "DeepSeek-V3.1 is a large hybrid reasoning model (671B parameters, 37B active) that supports both thinking and non-thinking modes via prompt templates. It extends the DeepSeek-V3 base with a two-phase long-context training process, reaching up to 128K tokens, and uses FP8 microscaling for efficient inference.\nThe model improves tool use, code generation, and reasoning efficiency, achieving performance comparable to DeepSeek-R1 on difficult benchmarks while responding more quickly. It supports structured tool calling, code agents, and search agents, making it suitable for research, coding, and agentic workflows.\nIt succeeds the DeepSeek V3-0324 model and performs well on a variety of tasks.",
      "id": "deepseek-v3.1:free",
      "object": "model",
      "created": 1776283310,
      "owned_by": "deepseekai",
      "tokens": 64000,
      "pricing": {
        "type": "per_million_tokens",
        "input": 0,
        "output": 0
      },
      "endpoints": [
        "/v1/chat/completions",
        "/v1/responses",
        "/v1/messages"
      ],
      "premium_model": false,
      "metadata": {
        "vision": false,
        "function_call": true,
        "web_search": false,
        "reasoning": false
      }
    },
    {
      "name": "DeepSeek: DeepSeek V3.1",
      "description": "DeepSeek-V3.1 is a large hybrid reasoning model (671B parameters, 37B active) that supports both thinking and non-thinking modes via prompt templates. It extends the DeepSeek-V3 base with a two-phase long-context training process, reaching up to 128K tokens, and uses FP8 microscaling for efficient inference.\nThe model improves tool use, code generation, and reasoning efficiency, achieving performance comparable to DeepSeek-R1 on difficult benchmarks while responding more quickly. It supports structured tool calling, code agents, and search agents, making it suitable for research, coding, and agentic workflows.\nIt succeeds the DeepSeek V3-0324 model and performs well on a variety of tasks.",
      "id": "deepseek-v3.1",
      "object": "model",
      "created": 1776283310,
      "owned_by": "deepseekai",
      "tokens": 164000,
      "pricing": {
        "type": "per_million_tokens",
        "input": 0.6,
        "output": 1.6
      },
      "endpoints": [
        "/v1/chat/completions",
        "/v1/responses",
        "/v1/messages"
      ],
      "premium_model": true,
      "metadata": {
        "vision": false,
        "function_call": true,
        "web_search": false,
        "reasoning": false
      }
    },
    {
      "name": "DeepSeek: DeepSeek V3.1 (thinking)",
      "description": "DeepSeek-V3.1-Thinking is a large hybrid reasoning model (671B parameters, 37B active) that supports both thinking and non-thinking modes via prompt templates. It extends the DeepSeek-V3 base with a two-phase long-context training process, reaching up to 128K tokens, and uses FP8 microscaling for efficient inference.\nThe model improves tool use, code generation, and reasoning efficiency, achieving performance comparable to DeepSeek-R1 on difficult benchmarks while responding more quickly. It supports structured tool calling, code agents, and search agents, making it suitable for research, coding, and agentic workflows.\nIt succeeds the DeepSeek V3-0324 model and performs well on a variety of tasks.",
      "id": "deepseek-v3.1-thinking",
      "object": "model",
      "created": 1776283310,
      "owned_by": "deepseekai",
      "tokens": 164000,
      "pricing": {
        "type": "per_million_tokens",
        "input": 0.6,
        "output": 1.6
      },
      "endpoints": [
        "/v1/chat/completions",
        "/v1/responses",
        "/v1/messages"
      ],
      "premium_model": true,
      "metadata": {
        "vision": false,
        "function_call": true,
        "web_search": false,
        "reasoning": true
      }
    },
    {
      "name": "DeepSeek: DeepSeek V3.1 Terminus (free)",
      "description": "DeepSeek-V3.1 Terminus is an update to DeepSeek V3.1 that maintains the model's original capabilities while addressing issues reported by users, including language consistency and agent capabilities, further optimizing the model's performance in coding and search agents. It is a large hybrid reasoning model (671B parameters, 37B active) that supports both thinking and non-thinking modes. It extends the DeepSeek-V3 base with a two-phase long-context training process, reaching up to 128K tokens, and uses FP8 microscaling for efficient inference. Users can control the reasoning behaviour with the reasoning enabled boolean.\nThe model improves tool use, code generation, and reasoning efficiency, achieving performance comparable to DeepSeek-R1 on difficult benchmarks while responding more quickly. It supports structured tool calling, code agents, and search agents, making it suitable for research, coding, and agentic workflows.",
      "id": "deepseek-v3.1-terminus:free",
      "object": "model",
      "created": 1776283310,
      "owned_by": "deepseekai",
      "tokens": 64000,
      "pricing": {
        "type": "per_million_tokens",
        "input": 0,
        "output": 0
      },
      "endpoints": [
        "/v1/chat/completions",
        "/v1/responses",
        "/v1/messages"
      ],
      "premium_model": false,
      "metadata": {
        "vision": false,
        "function_call": true,
        "web_search": false,
        "reasoning": false
      }
    },
    {
      "name": "DeepSeek: DeepSeek V3.1 Terminus",
      "description": "DeepSeek-V3.1 Terminus is an update to DeepSeek V3.1 that maintains the model's original capabilities while addressing issues reported by users, including language consistency and agent capabilities, further optimizing the model's performance in coding and search agents. It is a large hybrid reasoning model (671B parameters, 37B active) that supports both thinking and non-thinking modes. It extends the DeepSeek-V3 base with a two-phase long-context training process, reaching up to 128K tokens, and uses FP8 microscaling for efficient inference. Users can control the reasoning behaviour with the reasoning enabled boolean.\nThe model improves tool use, code generation, and reasoning efficiency, achieving performance comparable to DeepSeek-R1 on difficult benchmarks while responding more quickly. It supports structured tool calling, code agents, and search agents, making it suitable for research, coding, and agentic workflows.",
      "id": "deepseek-v3.1-terminus",
      "object": "model",
      "created": 1776283310,
      "owned_by": "deepseekai",
      "tokens": 131000,
      "pricing": {
        "type": "per_million_tokens",
        "input": 0.5,
        "output": 1.5
      },
      "endpoints": [
        "/v1/chat/completions",
        "/v1/responses",
        "/v1/messages"
      ],
      "premium_model": true,
      "metadata": {
        "vision": false,
        "function_call": true,
        "web_search": false,
        "reasoning": false
      }
    },
    {
      "name": "DeepSeek: DeepSeek V3.1 Terminus (thinking)",
      "description": "DeepSeek-V3.1 Terminus is an update to DeepSeek V3.1 that maintains the model's original capabilities while addressing issues reported by users, including language consistency and agent capabilities, further optimizing the model's performance in coding and search agents. It is a large hybrid reasoning model (671B parameters, 37B active) that supports both thinking and non-thinking modes. It extends the DeepSeek-V3 base with a two-phase long-context training process, reaching up to 128K tokens, and uses FP8 microscaling for efficient inference. Users can control the reasoning behaviour with the reasoning enabled boolean.\nThe model improves tool use, code generation, and reasoning efficiency, achieving performance comparable to DeepSeek-R1 on difficult benchmarks while responding more quickly. It supports structured tool calling, code agents, and search agents, making it suitable for research, coding, and agentic workflows.",
      "id": "deepseek-v3.1-terminus-thinking",
      "object": "model",
      "created": 1776283310,
      "owned_by": "deepseekai",
      "tokens": 131000,
      "pricing": {
        "type": "per_million_tokens",
        "input": 0.5,
        "output": 1.5
      },
      "endpoints": [
        "/v1/chat/completions",
        "/v1/responses",
        "/v1/messages"
      ],
      "premium_model": true,
      "metadata": {
        "vision": false,
        "function_call": true,
        "web_search": false,
        "reasoning": true
      }
    },
    {
      "name": "DeepSeek: DeepSeek V3.2 (free)",
      "description": "DeepSeek-V3.2 is a large language model designed to harmonize high computational efficiency with strong reasoning and agentic tool-use performance. It introduces DeepSeek Sparse Attention (DSA), a fine-grained sparse attention mechanism that reduces training and inference cost while preserving quality in long-context scenarios. A scalable reinforcement learning post-training framework further improves reasoning, with reported performance in the GPT-5 class, and the model has demonstrated gold-medal results on the 2025 IMO and IOI. V3.2 also uses a large-scale agentic task synthesis pipeline to better integrate reasoning into tool-use settings, boosting compliance and generalization in interactive environments.",
      "id": "deepseek-v3.2:free",
      "object": "model",
      "created": 1776283310,
      "owned_by": "deepseekai",
      "tokens": 64000,
      "pricing": {
        "type": "per_million_tokens",
        "input": 0,
        "output": 0
      },
      "endpoints": [
        "/v1/chat/completions",
        "/v1/responses",
        "/v1/messages"
      ],
      "premium_model": false,
      "metadata": {
        "vision": false,
        "function_call": true,
        "web_search": false,
        "reasoning": false
      }
    },
    {
      "name": "DeepSeek: DeepSeek V3.2",
      "description": "DeepSeek-V3.2 is a large language model designed to harmonize high computational efficiency with strong reasoning and agentic tool-use performance. It introduces DeepSeek Sparse Attention (DSA), a fine-grained sparse attention mechanism that reduces training and inference cost while preserving quality in long-context scenarios. A scalable reinforcement learning post-training framework further improves reasoning, with reported performance in the GPT-5 class, and the model has demonstrated gold-medal results on the 2025 IMO and IOI. V3.2 also uses a large-scale agentic task synthesis pipeline to better integrate reasoning into tool-use settings, boosting compliance and generalization in interactive environments.",
      "id": "deepseek-v3.2",
      "object": "model",
      "created": 1776283310,
      "owned_by": "deepseekai",
      "tokens": 164000,
      "pricing": {
        "type": "per_million_tokens",
        "input": 0.28,
        "output": 0.42
      },
      "endpoints": [
        "/v1/chat/completions",
        "/v1/responses",
        "/v1/messages"
      ],
      "premium_model": true,
      "metadata": {
        "vision": false,
        "function_call": true,
        "web_search": false,
        "reasoning": false
      }
    },
    {
      "name": "DeepSeek: DeepSeek V3.2 (thinking)",
      "description": "DeepSeek-V3.2 is a large language model designed to harmonize high computational efficiency with strong reasoning and agentic tool-use performance. It introduces DeepSeek Sparse Attention (DSA), a fine-grained sparse attention mechanism that reduces training and inference cost while preserving quality in long-context scenarios. A scalable reinforcement learning post-training framework further improves reasoning, with reported performance in the GPT-5 class, and the model has demonstrated gold-medal results on the 2025 IMO and IOI. V3.2 also uses a large-scale agentic task synthesis pipeline to better integrate reasoning into tool-use settings, boosting compliance and generalization in interactive environments.",
      "id": "deepseek-v3.2-thinking",
      "object": "model",
      "created": 1776283310,
      "owned_by": "deepseekai",
      "tokens": 164000,
      "pricing": {
        "type": "per_million_tokens",
        "input": 0.28,
        "output": 0.42
      },
      "endpoints": [
        "/v1/chat/completions",
        "/v1/responses",
        "/v1/messages"
      ],
      "premium_model": true,
      "metadata": {
        "vision": false,
        "function_call": true,
        "web_search": false,
        "reasoning": true
      }
    },
    {
      "name": "DeepSeek: DeepSeek V3.2 Exp",
      "description": "DeepSeek-V3.2-Exp is an intermediate step toward the next-generation architecture of the DeepSeek models by introducing DeepSeek Sparse Attention—a sparse attention mechanism designed to explore and validate optimizations for training and inference efficiency in long-context scenarios.",
      "id": "deepseek-v3.2-exp",
      "object": "model",
      "created": 1776283310,
      "owned_by": "deepseekai",
      "tokens": 164000,
      "pricing": {
        "type": "per_million_tokens",
        "input": 0.27,
        "output": 0.41
      },
      "endpoints": [
        "/v1/chat/completions",
        "/v1/responses",
        "/v1/messages"
      ],
      "premium_model": true,
      "metadata": {
        "vision": false,
        "function_call": true,
        "web_search": false,
        "reasoning": false
      }
    },
    {
      "name": "DeepSeek: DeepSeek V3.2 Exp (thinking)",
      "description": "DeepSeek-V3.2-Exp is an intermediate step toward the next-generation architecture of the DeepSeek models by introducing DeepSeek Sparse Attention—a sparse attention mechanism designed to explore and validate optimizations for training and inference efficiency in long-context scenarios.",
      "id": "deepseek-v3.2-exp-thinking",
      "object": "model",
      "created": 1776283310,
      "owned_by": "deepseekai",
      "tokens": 164000,
      "pricing": {
        "type": "per_million_tokens",
        "input": 0.27,
        "output": 0.41
      },
      "endpoints": [
        "/v1/chat/completions",
        "/v1/responses",
        "/v1/messages"
      ],
      "premium_model": true,
      "metadata": {
        "vision": false,
        "function_call": true,
        "web_search": false,
        "reasoning": true
      }
    },
    {
      "name": "DeepSeek: DeepSeek V3.2 Speciale",
      "description": "DeepSeek-V3.2-Speciale is a high-compute variant of DeepSeek-V3.2 optimized for maximum reasoning and agentic performance. It builds on DeepSeek Sparse Attention (DSA) for efficient long-context processing, then scales post-training reinforcement learning to push capability beyond the base model. Reported evaluations place Speciale ahead of GPT-5 on difficult reasoning workloads, with proficiency comparable to Gemini-3.0-Pro, while retaining strong coding and tool-use reliability. Like V3.2, it benefits from a large-scale agentic task synthesis pipeline that improves compliance and generalization in interactive environments.",
      "id": "deepseek-v3.2-speciale",
      "object": "model",
      "created": 1776283310,
      "owned_by": "deepseekai",
      "tokens": 164000,
      "pricing": {
        "type": "per_million_tokens",
        "input": 0.28,
        "output": 0.42
      },
      "endpoints": [
        "/v1/chat/completions",
        "/v1/responses",
        "/v1/messages"
      ],
      "premium_model": true,
      "metadata": {
        "vision": false,
        "function_call": true,
        "web_search": false,
        "reasoning": true
      }
    },
    {
      "name": "DeepSeek: DeepSeek V3 0324",
      "description": "DeepSeek V3, a 685B-parameter, mixture-of-experts model, is the latest iteration of the flagship chat model family from the DeepSeek team.\nIt succeeds the DeepSeek V3 model and performs really well on a variety of tasks.",
      "id": "deepseek-v3-0324",
      "object": "model",
      "created": 1776283310,
      "owned_by": "deepseekai",
      "tokens": 164000,
      "pricing": {
        "type": "per_million_tokens",
        "input": 0.5,
        "output": 1.5
      },
      "endpoints": [
        "/v1/chat/completions",
        "/v1/responses",
        "/v1/messages"
      ],
      "premium_model": true,
      "metadata": {
        "vision": false,
        "function_call": true,
        "web_search": false,
        "reasoning": false
      }
    },
    {
      "name": "DeepSeek: DeepSeek V3",
      "description": "DeepSeek-V3 is the latest model from the DeepSeek team, building upon the instruction following and coding abilities of the previous versions. Pre-trained on nearly 15 trillion tokens, the reported evaluations reveal that the model outperforms other open-source models and rivals leading closed-source models.",
      "id": "deepseek-v3",
      "object": "model",
      "created": 1776283310,
      "owned_by": "deepseekai",
      "tokens": 164000,
      "pricing": {
        "type": "per_million_tokens",
        "input": 0.5,
        "output": 1.5
      },
      "endpoints": [
        "/v1/chat/completions",
        "/v1/responses",
        "/v1/messages"
      ],
      "premium_model": true,
      "metadata": {
        "vision": false,
        "function_call": true,
        "web_search": false,
        "reasoning": false
      }
    },
    {
      "name": "Mistral: Pixtral Large 2411",
      "description": "Official pixtral-large-2411 Mistral AI model",
      "id": "pixtral-large-2411",
      "object": "model",
      "created": 1776283310,
      "owned_by": "mistralai",
      "tokens": 131000,
      "pricing": {
        "type": "per_million_tokens",
        "input": 2,
        "output": 6
      },
      "endpoints": [
        "/v1/chat/completions",
        "/v1/responses",
        "/v1/messages"
      ],
      "premium_model": false,
      "metadata": {
        "vision": true,
        "function_call": true,
        "web_search": false,
        "reasoning": false
      }
    },
    {
      "name": "Mistral: Magistral Medium Latest",
      "description": "Our frontier-class reasoning model release candidate September 2025.",
      "id": "magistral-medium-latest",
      "object": "model",
      "created": 1776283310,
      "owned_by": "mistralai",
      "tokens": 131000,
      "pricing": {
        "type": "per_million_tokens",
        "input": 2,
        "output": 5
      },
      "endpoints": [
        "/v1/chat/completions",
        "/v1/responses",
        "/v1/messages"
      ],
      "premium_model": false,
      "metadata": {
        "vision": true,
        "function_call": true,
        "web_search": false,
        "reasoning": true
      }
    },
    {
      "name": "Mistral: Devstral Latest",
      "description": "Official devstral-2512 Mistral AI model",
      "id": "devstral-latest",
      "object": "model",
      "created": 1776283310,
      "owned_by": "mistralai",
      "tokens": 262000,
      "pricing": {
        "type": "per_million_tokens",
        "input": 0.4,
        "output": 2
      },
      "endpoints": [
        "/v1/chat/completions",
        "/v1/responses",
        "/v1/messages"
      ],
      "premium_model": false,
      "metadata": {
        "vision": false,
        "function_call": true,
        "web_search": false,
        "reasoning": false
      }
    },
    {
      "name": "Mistral: Voxtral Small 2507",
      "description": "A small audio understanding model released in July 2025",
      "id": "voxtral-small-2507",
      "object": "model",
      "created": 1776283310,
      "owned_by": "mistralai",
      "tokens": 32768,
      "pricing": {
        "type": "per_million_tokens",
        "input": 0.1,
        "output": 0.3
      },
      "endpoints": [
        "/v1/chat/completions",
        "/v1/responses",
        "/v1/messages"
      ],
      "premium_model": false,
      "metadata": {
        "vision": false,
        "function_call": true,
        "web_search": false,
        "reasoning": false
      }
    },
    {
      "name": "Mistral: Ministral 3B Latest",
      "description": "Ministral 3 (a.k.a. Tinystral) 3B Instruct.",
      "id": "ministral-3b-latest",
      "object": "model",
      "created": 1776283310,
      "owned_by": "mistralai",
      "tokens": 131000,
      "pricing": {
        "type": "per_million_tokens",
        "input": 0.04,
        "output": 0.04
      },
      "endpoints": [
        "/v1/chat/completions",
        "/v1/responses",
        "/v1/messages"
      ],
      "premium_model": false,
      "metadata": {
        "vision": true,
        "function_call": true,
        "web_search": false,
        "reasoning": false
      }
    },
    {
      "name": "Mistral: Voxtral Small Latest",
      "description": "A small audio understanding model released in July 2025",
      "id": "voxtral-small-latest",
      "object": "model",
      "created": 1776283310,
      "owned_by": "mistralai",
      "tokens": 32768,
      "pricing": {
        "type": "per_million_tokens",
        "input": 0.1,
        "output": 0.3
      },
      "endpoints": [
        "/v1/chat/completions",
        "/v1/responses",
        "/v1/messages"
      ],
      "premium_model": false,
      "metadata": {
        "vision": false,
        "function_call": true,
        "web_search": false,
        "reasoning": false
      }
    },
    {
      "name": "Mistral: Voxtral Mini 2507",
      "description": "A mini audio understanding model released in July 2025",
      "id": "voxtral-mini-2507",
      "object": "model",
      "created": 1776283310,
      "owned_by": "mistralai",
      "tokens": 32768,
      "pricing": {
        "type": "per_million_tokens",
        "input": 0.07,
        "output": 0.07
      },
      "endpoints": [
        "/v1/chat/completions",
        "/v1/responses",
        "/v1/messages"
      ],
      "premium_model": false,
      "metadata": {
        "vision": false,
        "function_call": false,
        "web_search": false,
        "reasoning": false
      }
    },
    {
      "name": "Mistral: Mistral Medium 3.1",
      "description": "Update on Mistral Medium 3 with improved capabilities.",
      "id": "mistral-medium-2508",
      "object": "model",
      "created": 1776283310,
      "owned_by": "mistralai",
      "tokens": 131000,
      "pricing": {
        "type": "per_million_tokens",
        "input": 0.4,
        "output": 2
      },
      "endpoints": [
        "/v1/chat/completions",
        "/v1/responses",
        "/v1/messages"
      ],
      "premium_model": false,
      "metadata": {
        "vision": true,
        "function_call": true,
        "web_search": false,
        "reasoning": false
      }
    },
    {
      "name": "Mistral: Mistral Small 2501",
      "description": "Our latest enterprise-grade small model with the latest version released January 2025.",
      "id": "mistral-small-2501",
      "object": "model",
      "created": 1776283310,
      "owned_by": "mistralai",
      "tokens": 32768,
      "pricing": {
        "type": "per_million_tokens",
        "input": 0.1,
        "output": 0.3
      },
      "endpoints": [
        "/v1/chat/completions",
        "/v1/responses",
        "/v1/messages"
      ],
      "premium_model": false,
      "metadata": {
        "vision": false,
        "function_call": true,
        "web_search": false,
        "reasoning": false
      }
    },
    {
      "name": "Mistral: Mistral Small 4",
      "description": "Mistral Small 4 is the next major release in the Mistral Small family, unifying the capabilities of several flagship Mistral models into a single system. It combines strong reasoning from Magistral, multimodal understanding from Pixtral, and agentic coding capabilities from Devstral, enabling one model to handle complex analysis, software development, and visual tasks within the same workflow.",
      "id": "mistral-small-2603",
      "object": "model",
      "created": 1776283310,
      "owned_by": "mistralai",
      "tokens": 262000,
      "pricing": {
        "type": "per_million_tokens",
        "input": 0.15,
        "output": 0.6
      },
      "endpoints": [
        "/v1/chat/completions",
        "/v1/responses",
        "/v1/messages"
      ],
      "premium_model": false,
      "metadata": {
        "vision": true,
        "function_call": true,
        "web_search": false,
        "reasoning": true
      }
    },
    {
      "name": "Mistral: Magistral Small 2509",
      "description": "Our efficient reasoning model released September 2025.",
      "id": "magistral-small-2509",
      "object": "model",
      "created": 1776283310,
      "owned_by": "mistralai",
      "tokens": 131000,
      "pricing": {
        "type": "per_million_tokens",
        "input": 1,
        "output": 3
      },
      "endpoints": [
        "/v1/chat/completions",
        "/v1/responses",
        "/v1/messages"
      ],
      "premium_model": false,
      "metadata": {
        "vision": true,
        "function_call": true,
        "web_search": false,
        "reasoning": true
      }
    },
    {
      "name": "Mistral: Devstral Medium Latest",
      "description": "Official devstral-2512 Mistral AI model",
      "id": "devstral-medium-latest",
      "object": "model",
      "created": 1776283310,
      "owned_by": "mistralai",
      "tokens": 262000,
      "pricing": {
        "type": "per_million_tokens",
        "input": 0.4,
        "output": 2
      },
      "endpoints": [
        "/v1/chat/completions",
        "/v1/responses",
        "/v1/messages"
      ],
      "premium_model": false,
      "metadata": {
        "vision": false,
        "function_call": true,
        "web_search": false,
        "reasoning": false
      }
    },
    {
      "name": "Mistral: Mistral Large Pixtral 2411",
      "description": "Official pixtral-large-2411 Mistral AI model",
      "id": "mistral-large-pixtral-2411",
      "object": "model",
      "created": 1776283310,
      "owned_by": "mistralai",
      "tokens": 131000,
      "pricing": {
        "type": "per_million_tokens",
        "input": 2,
        "output": 6
      },
      "endpoints": [
        "/v1/chat/completions",
        "/v1/responses",
        "/v1/messages"
      ],
      "premium_model": false,
      "metadata": {
        "vision": true,
        "function_call": true,
        "web_search": false,
        "reasoning": false
      }
    },
    {
      "name": "Mistral: Ministral 14B 2512",
      "description": "Ministral 3 (a.k.a. Tinystral) 14B Instruct.",
      "id": "ministral-14b-2512",
      "object": "model",
      "created": 1776283310,
      "owned_by": "mistralai",
      "tokens": 262000,
      "pricing": {
        "type": "per_million_tokens",
        "input": 0.15,
        "output": 0.15
      },
      "endpoints": [
        "/v1/chat/completions",
        "/v1/responses",
        "/v1/messages"
      ],
      "premium_model": false,
      "metadata": {
        "vision": true,
        "function_call": true,
        "web_search": false,
        "reasoning": false
      }
    },
    {
      "name": "Mistral: Devstral Small Latest",
      "description": "Official labs-devstral-small-2512 Mistral AI model",
      "id": "devstral-small-latest",
      "object": "model",
      "created": 1776283310,
      "owned_by": "mistralai",
      "tokens": 262000,
      "pricing": {
        "type": "per_million_tokens",
        "input": 0.1,
        "output": 0.3
      },
      "endpoints": [
        "/v1/chat/completions",
        "/v1/responses",
        "/v1/messages"
      ],
      "premium_model": false,
      "metadata": {
        "vision": true,
        "function_call": true,
        "web_search": false,
        "reasoning": false
      }
    },
    {
      "name": "Mistral: Vibe CLI Latest",
      "description": "Official devstral-2512 Mistral AI model",
      "id": "mistral-vibe-cli-latest",
      "object": "model",
      "created": 1776283310,
      "owned_by": "mistralai",
      "tokens": 262000,
      "pricing": {
        "type": "per_million_tokens",
        "input": 0.4,
        "output": 2
      },
      "endpoints": [
        "/v1/chat/completions",
        "/v1/responses",
        "/v1/messages"
      ],
      "premium_model": false,
      "metadata": {
        "vision": false,
        "function_call": true,
        "web_search": false,
        "reasoning": false
      }
    },
    {
      "name": "Mistral: Ministral 8B 2512",
      "description": "Ministral 3 (a.k.a. Tinystral) 8B Instruct.",
      "id": "ministral-8b-2512",
      "object": "model",
      "created": 1776283310,
      "owned_by": "mistralai",
      "tokens": 262000,
      "pricing": {
        "type": "per_million_tokens",
        "input": 0.1,
        "output": 0.1
      },
      "endpoints": [
        "/v1/chat/completions",
        "/v1/responses",
        "/v1/messages"
      ],
      "premium_model": false,
      "metadata": {
        "vision": true,
        "function_call": true,
        "web_search": false,
        "reasoning": false
      }
    },
    {
      "name": "Mistral: Vibe CLI With Tools",
      "description": "Update on Mistral Medium 3 with improved capabilities.",
      "id": "mistral-vibe-cli-with-tools",
      "object": "model",
      "created": 1776283310,
      "owned_by": "mistralai",
      "tokens": 131000,
      "pricing": {
        "type": "per_million_tokens",
        "input": 0.4,
        "output": 2
      },
      "endpoints": [
        "/v1/chat/completions",
        "/v1/responses",
        "/v1/messages"
      ],
      "premium_model": false,
      "metadata": {
        "vision": true,
        "function_call": true,
        "web_search": false,
        "reasoning": false
      }
    },
    {
      "name": "Mistral: Labs Mistral Small Creative",
      "description": "Official labs-mistral-small-creative Mistral AI model",
      "id": "labs-mistral-small-creative",
      "object": "model",
      "created": 1776283310,
      "owned_by": "mistralai",
      "tokens": 32768,
      "pricing": {
        "type": "per_million_tokens",
        "input": 0.1,
        "output": 0.3
      },
      "endpoints": [
        "/v1/chat/completions",
        "/v1/responses",
        "/v1/messages"
      ],
      "premium_model": false,
      "metadata": {
        "vision": false,
        "function_call": true,
        "web_search": false,
        "reasoning": false
      }
    },
    {
      "name": "Mistral: Ministral 8B Latest",
      "description": "Ministral 3 (a.k.a. Tinystral) 8B Instruct.",
      "id": "ministral-8b-latest",
      "object": "model",
      "created": 1776283310,
      "owned_by": "mistralai",
      "tokens": 262000,
      "pricing": {
        "type": "per_million_tokens",
        "input": 0.1,
        "output": 0.1
      },
      "endpoints": [
        "/v1/chat/completions",
        "/v1/responses",
        "/v1/messages"
      ],
      "premium_model": false,
      "metadata": {
        "vision": true,
        "function_call": true,
        "web_search": false,
        "reasoning": false
      }
    },
    {
      "name": "Mistral: Pixtral Large Latest",
      "description": "Official pixtral-large-2411 Mistral AI model",
      "id": "pixtral-large-latest",
      "object": "model",
      "created": 1776283310,
      "owned_by": "mistralai",
      "tokens": 131000,
      "pricing": {
        "type": "per_million_tokens",
        "input": 2,
        "output": 6
      },
      "endpoints": [
        "/v1/chat/completions",
        "/v1/responses",
        "/v1/messages"
      ],
      "premium_model": false,
      "metadata": {
        "vision": true,
        "function_call": true,
        "web_search": false,
        "reasoning": false
      }
    },
    {
      "name": "Mistral: Voxtral Mini Latest",
      "description": "A mini audio understanding model released in July 2025",
      "id": "voxtral-mini-latest",
      "object": "model",
      "created": 1776283310,
      "owned_by": "mistralai",
      "tokens": 32768,
      "pricing": {
        "type": "per_million_tokens",
        "input": 0.07,
        "output": 0.07
      },
      "endpoints": [
        "/v1/chat/completions",
        "/v1/responses",
        "/v1/messages"
      ],
      "premium_model": false,
      "metadata": {
        "vision": false,
        "function_call": false,
        "web_search": false,
        "reasoning": false
      }
    },
    {
      "name": "Mistral: Ministral 3B 2512",
      "description": "Ministral 3 (a.k.a. Tinystral) 3B Instruct.",
      "id": "ministral-3b-2512",
      "object": "model",
      "created": 1776283310,
      "owned_by": "mistralai",
      "tokens": 131000,
      "pricing": {
        "type": "per_million_tokens",
        "input": 0.04,
        "output": 0.04
      },
      "endpoints": [
        "/v1/chat/completions",
        "/v1/responses",
        "/v1/messages"
      ],
      "premium_model": false,
      "metadata": {
        "vision": true,
        "function_call": true,
        "web_search": false,
        "reasoning": false
      }
    },
    {
      "name": "Mistral: Devstral 2512",
      "description": "Official devstral-2512 Mistral AI model",
      "id": "devstral-2512",
      "object": "model",
      "created": 1776283310,
      "owned_by": "mistralai",
      "tokens": 262000,
      "pricing": {
        "type": "per_million_tokens",
        "input": 0.4,
        "output": 2
      },
      "endpoints": [
        "/v1/chat/completions",
        "/v1/responses",
        "/v1/messages"
      ],
      "premium_model": false,
      "metadata": {
        "vision": false,
        "function_call": true,
        "web_search": false,
        "reasoning": false
      }
    },
    {
      "name": "Mistral: Magistral Medium 2509",
      "description": "Our frontier-class reasoning model release candidate September 2025.",
      "id": "magistral-medium-2509",
      "object": "model",
      "created": 1776283310,
      "owned_by": "mistralai",
      "tokens": 131000,
      "pricing": {
        "type": "per_million_tokens",
        "input": 2,
        "output": 5
      },
      "endpoints": [
        "/v1/chat/completions",
        "/v1/responses",
        "/v1/messages"
      ],
      "premium_model": false,
      "metadata": {
        "vision": true,
        "function_call": true,
        "web_search": false,
        "reasoning": true
      }
    },
    {
      "name": "Mistral: Devstral Small 2507",
      "description": "Our small open-source code-agentic model.",
      "id": "devstral-small-2507",
      "object": "model",
      "created": 1776283310,
      "owned_by": "mistralai",
      "tokens": 131000,
      "pricing": {
        "type": "per_million_tokens",
        "input": 0.1,
        "output": 0.3
      },
      "endpoints": [
        "/v1/chat/completions",
        "/v1/responses",
        "/v1/messages"
      ],
      "premium_model": false,
      "metadata": {
        "vision": false,
        "function_call": true,
        "web_search": false,
        "reasoning": false
      }
    },
    {
      "name": "Mistral: Mistral Medium 3",
      "description": "Our frontier-class multimodal model released May 2025.",
      "id": "mistral-medium-2505",
      "object": "model",
      "created": 1776283310,
      "owned_by": "mistralai",
      "tokens": 131000,
      "pricing": {
        "type": "per_million_tokens",
        "input": 0.4,
        "output": 2
      },
      "endpoints": [
        "/v1/chat/completions",
        "/v1/responses",
        "/v1/messages"
      ],
      "premium_model": false,
      "metadata": {
        "vision": true,
        "function_call": true,
        "web_search": false,
        "reasoning": false
      }
    },
    {
      "name": "Mistral: Mistral Large 2512",
      "description": "Official mistral-large-2512 Mistral AI model",
      "id": "mistral-large-2512",
      "object": "model",
      "created": 1776283310,
      "owned_by": "mistralai",
      "tokens": 262000,
      "pricing": {
        "type": "per_million_tokens",
        "input": 2,
        "output": 6
      },
      "endpoints": [
        "/v1/chat/completions",
        "/v1/responses",
        "/v1/messages"
      ],
      "premium_model": false,
      "metadata": {
        "vision": true,
        "function_call": true,
        "web_search": false,
        "reasoning": false
      }
    },
    {
      "name": "Mistral: Ministral 14B Latest",
      "description": "Ministral 3 (a.k.a. Tinystral) 14B Instruct.",
      "id": "ministral-14b-latest",
      "object": "model",
      "created": 1776283310,
      "owned_by": "mistralai",
      "tokens": 262000,
      "pricing": {
        "type": "per_million_tokens",
        "input": 0.15,
        "output": 0.15
      },
      "endpoints": [
        "/v1/chat/completions",
        "/v1/responses",
        "/v1/messages"
      ],
      "premium_model": false,
      "metadata": {
        "vision": true,
        "function_call": true,
        "web_search": false,
        "reasoning": false
      }
    },
    {
      "name": "Mistral: Devstral Medium 2507",
      "description": "Our medium code-agentic model.",
      "id": "devstral-medium-2507",
      "object": "model",
      "created": 1776283310,
      "owned_by": "mistralai",
      "tokens": 131000,
      "pricing": {
        "type": "per_million_tokens",
        "input": 0.4,
        "output": 2
      },
      "endpoints": [
        "/v1/chat/completions",
        "/v1/responses",
        "/v1/messages"
      ],
      "premium_model": false,
      "metadata": {
        "vision": false,
        "function_call": true,
        "web_search": false,
        "reasoning": false
      }
    },
    {
      "name": "Mistral: Magistral Small Latest",
      "description": "Our efficient reasoning model released September 2025.",
      "id": "magistral-small-latest",
      "object": "model",
      "created": 1776283310,
      "owned_by": "mistralai",
      "tokens": 131000,
      "pricing": {
        "type": "per_million_tokens",
        "input": 1,
        "output": 3
      },
      "endpoints": [
        "/v1/chat/completions",
        "/v1/responses",
        "/v1/messages"
      ],
      "premium_model": false,
      "metadata": {
        "vision": true,
        "function_call": true,
        "web_search": false,
        "reasoning": true
      }
    },
    {
      "name": "Mistral: Mixtral 8x22B (free)",
      "description": "Mistral's official instruct fine-tuned version of Mixtral 8x22B. It uses 39B active parameters out of 141B, offering unparalleled cost efficiency for its size. Its strengths include: strong math, coding, and reasoning, large context length (64k), and fluency in English, French, Italian, German, and Spanish.",
      "id": "mixtral-8x22b:free",
      "object": "model",
      "created": 1776283310,
      "owned_by": "mistralai",
      "tokens": 32000,
      "pricing": {
        "type": "per_million_tokens",
        "input": 0,
        "output": 0
      },
      "endpoints": [
        "/v1/chat/completions",
        "/v1/responses",
        "/v1/messages"
      ],
      "premium_model": false,
      "metadata": {
        "vision": false,
        "function_call": true,
        "web_search": false,
        "reasoning": false
      }
    },
    {
      "name": "Mistral: Mistral 7B Instruct",
      "description": "A high-performing, industry-standard 7.3B parameter model, with optimizations for speed and context length.",
      "id": "mistral-7b-instruct",
      "object": "model",
      "created": 1776283310,
      "owned_by": "mistralai",
      "tokens": 32000,
      "pricing": {
        "type": "per_million_tokens",
        "input": 0.3,
        "output": 0.3
      },
      "endpoints": [
        "/v1/chat/completions",
        "/v1/responses",
        "/v1/messages"
      ],
      "premium_model": false,
      "metadata": {
        "vision": false,
        "function_call": true,
        "web_search": false,
        "reasoning": false
      }
    },
    {
      "name": "Mistral: Mistral Tiny 2407",
      "description": "Our best multilingual open source model released July 2024.",
      "id": "mistral-tiny-2407",
      "object": "model",
      "created": 1776283310,
      "owned_by": "mistralai",
      "tokens": 131000,
      "pricing": {
        "type": "per_million_tokens",
        "input": 0.25,
        "output": 0.25
      },
      "endpoints": [
        "/v1/chat/completions",
        "/v1/responses",
        "/v1/messages"
      ],
      "premium_model": false,
      "metadata": {
        "vision": false,
        "function_call": true,
        "web_search": false,
        "reasoning": false
      }
    },
    {
      "name": "Mistral: Mistral Tiny Latest",
      "description": "Our best multilingual open source model released July 2024.",
      "id": "mistral-tiny-latest",
      "object": "model",
      "created": 1776283310,
      "owned_by": "mistralai",
      "tokens": 131000,
      "pricing": {
        "type": "per_million_tokens",
        "input": 0.25,
        "output": 0.25
      },
      "endpoints": [
        "/v1/chat/completions",
        "/v1/responses",
        "/v1/messages"
      ],
      "premium_model": false,
      "metadata": {
        "vision": false,
        "function_call": true,
        "web_search": false,
        "reasoning": false
      }
    },
    {
      "name": "Mistral: Mistral Small 2506",
      "description": "Our latest enterprise-grade small model with the latest version released June 2025.",
      "id": "mistral-small-2506",
      "object": "model",
      "created": 1776283310,
      "owned_by": "mistralai",
      "tokens": 131000,
      "pricing": {
        "type": "per_million_tokens",
        "input": 0.2,
        "output": 0.6
      },
      "endpoints": [
        "/v1/chat/completions",
        "/v1/responses",
        "/v1/messages"
      ],
      "premium_model": false,
      "metadata": {
        "vision": true,
        "function_call": true,
        "web_search": false,
        "reasoning": false
      }
    },
    {
      "name": "Mistral: Mistral Small Latest",
      "description": "Our latest enterprise-grade small model with the latest version released June 2025.",
      "id": "mistral-small-latest",
      "object": "model",
      "created": 1776283310,
      "owned_by": "mistralai",
      "tokens": 131000,
      "pricing": {
        "type": "per_million_tokens",
        "input": 0.2,
        "output": 0.6
      },
      "endpoints": [
        "/v1/chat/completions",
        "/v1/responses",
        "/v1/messages"
      ],
      "premium_model": false,
      "metadata": {
        "vision": true,
        "function_call": true,
        "web_search": false,
        "reasoning": false
      }
    },
    {
      "name": "Mistral: Mistral Small 24B Instruct (free)",
      "description": "Mistral Small 24B Instruct is a 24B-parameter language model optimized for low-latency performance across common AI tasks. Released under the Apache 2.0 license, it features both pre-trained and instruction-tuned versions designed for efficient local deployment.\nThe model achieves 81% accuracy on the MMLU benchmark and performs competitively with larger models like Llama 3.3 70B and Qwen 32B, while operating at three times the speed on equivalent hardware.",
      "id": "mistral-small-24b-instruct:free",
      "object": "model",
      "created": 1776283310,
      "owned_by": "mistralai",
      "tokens": 32000,
      "pricing": {
        "type": "per_million_tokens",
        "input": 0,
        "output": 0
      },
      "endpoints": [
        "/v1/chat/completions",
        "/v1/responses",
        "/v1/messages"
      ],
      "premium_model": false,
      "metadata": {
        "vision": false,
        "function_call": true,
        "web_search": false,
        "reasoning": false
      }
    },
    {
      "name": "Mistral: Mistral Small 3.1 24B",
      "description": "Mistral Small 3.1 24B Instruct is an upgraded variant of Mistral Small 3 (2501), featuring 24 billion parameters with advanced multimodal capabilities. It provides state-of-the-art performance in text-based reasoning and vision tasks, including image analysis, programming, mathematical reasoning, and multilingual support across dozens of languages. Equipped with an extensive 128k token context window and optimized for efficient local inference, it supports use cases such as conversational agents, function calling, long-document comprehension, and privacy-sensitive deployments.",
      "id": "mistral-small-3.1-24b-instruct",
      "object": "model",
      "created": 1776283310,
      "owned_by": "mistralai",
      "tokens": 128000,
      "pricing": {
        "type": "per_million_tokens",
        "input": 0.1,
        "output": 0.3
      },
      "endpoints": [
        "/v1/chat/completions",
        "/v1/responses",
        "/v1/messages"
      ],
      "premium_model": false,
      "metadata": {
        "vision": true,
        "function_call": true,
        "web_search": false,
        "reasoning": false
      }
    },
    {
      "name": "Mistral: Mistral Small 3.2 24B",
      "description": "Mistral-Small-3.2-24B-Instruct-2506 is an updated 24B parameter model from Mistral optimized for instruction following, repetition reduction, and improved function calling. Compared to the 3.1 release, version 3.2 significantly improves accuracy on WildBench and Arena Hard, reduces infinite generations, and delivers gains in tool use and structured output tasks.\nIt supports image and text inputs with structured outputs, function/tool calling, and strong performance across coding (HumanEval+, MBPP), STEM (MMLU, MATH, GPQA), and vision benchmarks (ChartQA, DocVQA).",
      "id": "mistral-small-3.2-24b-instruct",
      "object": "model",
      "created": 1776283310,
      "owned_by": "mistralai",
      "tokens": 128000,
      "pricing": {
        "type": "per_million_tokens",
        "input": 0.1,
        "output": 0.3
      },
      "endpoints": [
        "/v1/chat/completions",
        "/v1/responses",
        "/v1/messages"
      ],
      "premium_model": false,
      "metadata": {
        "vision": true,
        "function_call": true,
        "web_search": false,
        "reasoning": false
      }
    },
    {
      "name": "Mistral: Mistral Medium",
      "description": "Update on Mistral Medium 3 with improved capabilities.",
      "id": "mistral-medium",
      "object": "model",
      "created": 1776283310,
      "owned_by": "mistralai",
      "tokens": 131000,
      "pricing": {
        "type": "per_million_tokens",
        "input": 2.75,
        "output": 8.1
      },
      "endpoints": [
        "/v1/chat/completions",
        "/v1/responses",
        "/v1/messages"
      ],
      "premium_model": false,
      "metadata": {
        "vision": true,
        "function_call": true,
        "web_search": false,
        "reasoning": false
      }
    },
    {
      "name": "Mistral: Mistral Medium Latest",
      "description": "Update on Mistral Medium 3 with improved capabilities.",
      "id": "mistral-medium-latest",
      "object": "model",
      "created": 1776283310,
      "owned_by": "mistralai",
      "tokens": 131000,
      "pricing": {
        "type": "per_million_tokens",
        "input": 2.75,
        "output": 8.1
      },
      "endpoints": [
        "/v1/chat/completions",
        "/v1/responses",
        "/v1/messages"
      ],
      "premium_model": false,
      "metadata": {
        "vision": true,
        "function_call": true,
        "web_search": false,
        "reasoning": false
      }
    },
    {
      "name": "Mistral: Mistral Large 2411",
      "description": "Our top-tier reasoning model for high-complexity tasks with the lastest version released November 2024.",
      "id": "mistral-large-2411",
      "object": "model",
      "created": 1776283310,
      "owned_by": "mistralai",
      "tokens": 131000,
      "pricing": {
        "type": "per_million_tokens",
        "input": 2,
        "output": 6
      },
      "endpoints": [
        "/v1/chat/completions",
        "/v1/responses",
        "/v1/messages"
      ],
      "premium_model": false,
      "metadata": {
        "vision": false,
        "function_call": true,
        "web_search": false,
        "reasoning": false
      }
    },
    {
      "name": "Mistral: Mistral Large Latest",
      "description": "Official mistral-large-2512 Mistral AI model",
      "id": "mistral-large-latest",
      "object": "model",
      "created": 1776283310,
      "owned_by": "mistralai",
      "tokens": 262000,
      "pricing": {
        "type": "per_million_tokens",
        "input": 2,
        "output": 6
      },
      "endpoints": [
        "/v1/chat/completions",
        "/v1/responses",
        "/v1/messages"
      ],
      "premium_model": false,
      "metadata": {
        "vision": true,
        "function_call": true,
        "web_search": false,
        "reasoning": false
      }
    },
    {
      "name": "Mistral: Mistral Large 3 675B Instruct 2512",
      "description": "Mistral Large 3 675B Instruct 2512 is Mistral’s most capable model to date, featuring a sparse mixture-of-experts architecture with 41B active parameters (675B total), and released under the Apache 2.0 license.",
      "id": "mistral-large-3-675b-instruct-2512",
      "object": "model",
      "created": 1776283310,
      "owned_by": "mistralai",
      "tokens": 262000,
      "pricing": {
        "type": "per_million_tokens",
        "input": 0.5,
        "output": 1.5
      },
      "endpoints": [
        "/v1/chat/completions",
        "/v1/responses",
        "/v1/messages"
      ],
      "premium_model": false,
      "metadata": {
        "vision": false,
        "function_call": true,
        "web_search": false,
        "reasoning": false
      }
    },
    {
      "name": "Mistral: Open Mistral Nemo",
      "description": "Our best multilingual open source model released July 2024.",
      "id": "open-mistral-nemo",
      "object": "model",
      "created": 1776283310,
      "owned_by": "mistralai",
      "tokens": 131000,
      "pricing": {
        "type": "per_million_tokens",
        "input": 0.15,
        "output": 0.15
      },
      "endpoints": [
        "/v1/chat/completions",
        "/v1/responses",
        "/v1/messages"
      ],
      "premium_model": false,
      "metadata": {
        "vision": false,
        "function_call": true,
        "web_search": false,
        "reasoning": false
      }
    },
    {
      "name": "Mistral: Open Mistral Nemo 2407",
      "description": "Our best multilingual open source model released July 2024.",
      "id": "open-mistral-nemo-2407",
      "object": "model",
      "created": 1776283310,
      "owned_by": "mistralai",
      "tokens": 131000,
      "pricing": {
        "type": "per_million_tokens",
        "input": 0.15,
        "output": 0.15
      },
      "endpoints": [
        "/v1/chat/completions",
        "/v1/responses",
        "/v1/messages"
      ],
      "premium_model": false,
      "metadata": {
        "vision": false,
        "function_call": true,
        "web_search": false,
        "reasoning": false
      }
    },
    {
      "name": "Mistral: Codestral 2508",
      "description": "Our cutting-edge language model for coding released August 2025.",
      "id": "codestral-2508",
      "object": "model",
      "created": 1776283310,
      "owned_by": "mistralai",
      "tokens": 256000,
      "pricing": {
        "type": "per_million_tokens",
        "input": 0.3,
        "output": 0.9
      },
      "endpoints": [
        "/v1/chat/completions",
        "/v1/responses",
        "/v1/messages"
      ],
      "premium_model": false,
      "metadata": {
        "vision": false,
        "function_call": true,
        "web_search": false,
        "reasoning": false
      }
    },
    {
      "name": "Mistral: Codestral Latest",
      "description": "Our cutting-edge language model for coding released August 2025.",
      "id": "codestral-latest",
      "object": "model",
      "created": 1776283310,
      "owned_by": "mistralai",
      "tokens": 256000,
      "pricing": {
        "type": "per_million_tokens",
        "input": 0.3,
        "output": 0.9
      },
      "endpoints": [
        "/v1/chat/completions",
        "/v1/responses",
        "/v1/messages"
      ],
      "premium_model": false,
      "metadata": {
        "vision": false,
        "function_call": true,
        "web_search": false,
        "reasoning": false
      }
    },
    {
      "name": "Mistral: Ministral 3 14B Instruct 2512",
      "description": "Ministral 3 14B Instruct 2512 is the largest model in the Ministral 3 family, offering frontier capabilities and performance comparable to its larger Mistral Small 3.2 24B counterpart.",
      "id": "ministral-14b-instruct-2512",
      "object": "model",
      "created": 1776283310,
      "owned_by": "mistralai",
      "tokens": 262000,
      "pricing": {
        "type": "per_million_tokens",
        "input": 0.15,
        "output": 0.15
      },
      "endpoints": [
        "/v1/chat/completions",
        "/v1/responses",
        "/v1/messages"
      ],
      "premium_model": false,
      "metadata": {
        "vision": false,
        "function_call": true,
        "web_search": false,
        "reasoning": false
      }
    },
    {
      "name": "Mistral: Devstral Small 2",
      "description": "Official labs-devstral-small-2512 Mistral AI model",
      "id": "labs-devstral-small-2512",
      "object": "model",
      "created": 1776283310,
      "owned_by": "mistralai",
      "tokens": 262000,
      "pricing": {
        "type": "per_million_tokens",
        "input": 0.1,
        "output": 0.3
      },
      "endpoints": [
        "/v1/chat/completions",
        "/v1/responses",
        "/v1/messages"
      ],
      "premium_model": false,
      "metadata": {
        "vision": true,
        "function_call": true,
        "web_search": false,
        "reasoning": false
      }
    },
    {
      "name": "Mistral: Devstral 2 123B Instruct 2512",
      "description": "Devstral 2 123B Instruct 2512 is an agentic large language model designed for software engineering tasks. The model excels at using tools to explore codebases, editing multiple files, and powering software engineering agents, achieving remarkable performance on SWE-bench benchmarks.",
      "id": "devstral-2-123b-instruct-2512",
      "object": "model",
      "created": 1776283310,
      "owned_by": "mistralai",
      "tokens": 262000,
      "pricing": {
        "type": "per_million_tokens",
        "input": 0.4,
        "output": 2
      },
      "endpoints": [
        "/v1/chat/completions",
        "/v1/responses",
        "/v1/messages"
      ],
      "premium_model": false,
      "metadata": {
        "vision": false,
        "function_call": true,
        "web_search": false,
        "reasoning": false
      }
    },
    {
      "name": "Mistral: Magistral Small 2506 (free)",
      "description": "Magistral is Mistral's first reasoning model. It is ideal for general purpose use requiring longer thought processing and better accuracy than with non-reasoning LLMs. From legal research and financial forecasting to software development and creative storytelling — this model solves multi-step challenges where transparency and precision are critical.",
      "id": "magistral-small-2506:free",
      "object": "model",
      "created": 1776283310,
      "owned_by": "mistralai",
      "tokens": 40000,
      "pricing": {
        "type": "per_million_tokens",
        "input": 0,
        "output": 0
      },
      "endpoints": [
        "/v1/chat/completions",
        "/v1/responses",
        "/v1/messages"
      ],
      "premium_model": false,
      "metadata": {
        "vision": false,
        "function_call": true,
        "web_search": false,
        "reasoning": true
      }
    },
    {
      "name": "Cohere: Command Nightly",
      "description": "Command Nightly is the latest, most experimental, and (possibly) unstable version of its default counterpart. Nightly releases are updated regularly, without warning, and are not recommended for production use.",
      "id": "command-nightly",
      "object": "model",
      "created": 1776283310,
      "owned_by": "cohere",
      "tokens": 128000,
      "pricing": {
        "type": "per_million_tokens",
        "input": 1,
        "output": 2
      },
      "endpoints": [
        "/v1/chat/completions",
        "/v1/responses",
        "/v1/messages"
      ],
      "premium_model": false,
      "metadata": {
        "vision": false,
        "function_call": true,
        "web_search": false,
        "reasoning": false
      }
    },
    {
      "name": "Cohere: Command R (08-2024)",
      "description": "Command-R is a 35B parameter model that performs conversational language tasks at a higher quality, more reliably, and with a longer context than previous models. It can be used for complex workflows like code generation, retrieval augmented generation (RAG), tool use, and agents.",
      "id": "command-r-08-2024",
      "object": "model",
      "created": 1776283310,
      "owned_by": "cohere",
      "tokens": 128000,
      "pricing": {
        "type": "per_million_tokens",
        "input": 0.5,
        "output": 1.5
      },
      "endpoints": [
        "/v1/chat/completions",
        "/v1/responses",
        "/v1/messages"
      ],
      "premium_model": false,
      "metadata": {
        "vision": false,
        "function_call": true,
        "web_search": false,
        "reasoning": false
      }
    },
    {
      "name": "Cohere: Command R+ (08-2024)",
      "description": "Command R+ is a new, 104B-parameter LLM from Cohere. It's useful for roleplay, general consumer usecases, and Retrieval Augmented Generation (RAG). It offers multilingual support for ten key languages to facilitate global business operations.",
      "id": "command-r-plus-08-2024",
      "object": "model",
      "created": 1776283310,
      "owned_by": "cohere",
      "tokens": 128000,
      "pricing": {
        "type": "per_million_tokens",
        "input": 2.5,
        "output": 10
      },
      "endpoints": [
        "/v1/chat/completions",
        "/v1/responses",
        "/v1/messages"
      ],
      "premium_model": false,
      "metadata": {
        "vision": false,
        "function_call": true,
        "web_search": false,
        "reasoning": false
      }
    },
    {
      "name": "Cohere: Command R7B (12-2024)",
      "description": "Command R7B (12-2024) is a small, fast update of the Command R+ model, delivered in December 2024. It excels at RAG, tool use, agents, and similar tasks requiring complex reasoning and multiple steps.",
      "id": "command-r7b-12-2024",
      "object": "model",
      "created": 1776283310,
      "owned_by": "cohere",
      "tokens": 128000,
      "pricing": {
        "type": "per_million_tokens",
        "input": 0.038,
        "output": 0.15
      },
      "endpoints": [
        "/v1/chat/completions",
        "/v1/responses",
        "/v1/messages"
      ],
      "premium_model": false,
      "metadata": {
        "vision": false,
        "function_call": true,
        "web_search": false,
        "reasoning": false
      }
    },
    {
      "name": "Cohere: Command A",
      "description": "Command A is an open-weights 111B parameter model with a 256k context window focused on delivering great performance across agentic, multilingual, and coding use cases.\nCompared to other leading proprietary and open-weights models Command A delivers maximum performance with minimum hardware costs, excelling on business-critical agentic and multilingual tasks.",
      "id": "command-a-03-2025",
      "object": "model",
      "created": 1776283310,
      "owned_by": "cohere",
      "tokens": 256000,
      "pricing": {
        "type": "per_million_tokens",
        "input": 2.5,
        "output": 10
      },
      "endpoints": [
        "/v1/chat/completions",
        "/v1/responses",
        "/v1/messages"
      ],
      "premium_model": false,
      "metadata": {
        "vision": false,
        "function_call": true,
        "web_search": false,
        "reasoning": false
      }
    },
    {
      "name": "Cohere: Command A Translate",
      "description": "Command A Translate is Cohere’s state of the art machine translation model, excelling at a variety of translation tasks on 23 languages: English, French, Spanish, Italian, German, Portuguese, Japanese, Korean, Chinese, Arabic, Russian, Polish, Turkish, Vietnamese, Dutch, Czech, Indonesian, Ukrainian, Romanian, Greek, Hindi, Hebrew, Persian.",
      "id": "command-a-translate-08-2025",
      "object": "model",
      "created": 1776283310,
      "owned_by": "cohere",
      "tokens": 8000,
      "pricing": {
        "type": "per_million_tokens",
        "input": 2.5,
        "output": 10
      },
      "endpoints": [
        "/v1/chat/completions",
        "/v1/responses",
        "/v1/messages"
      ],
      "premium_model": false,
      "metadata": {
        "vision": false,
        "function_call": true,
        "web_search": false,
        "reasoning": false
      }
    },
    {
      "name": "Cohere: Command A Reasoning",
      "description": "Command A Reasoning is Cohere’s first reasoning model, able to ‘think’ before generating an output in a way that allows it to perform well in certain kinds of nuanced problem-solving and agent-based tasks.",
      "id": "command-a-reasoning-08-2025",
      "object": "model",
      "created": 1776283310,
      "owned_by": "cohere",
      "tokens": 256000,
      "pricing": {
        "type": "per_million_tokens",
        "input": 2.5,
        "output": 10
      },
      "endpoints": [
        "/v1/chat/completions",
        "/v1/responses",
        "/v1/messages"
      ],
      "premium_model": false,
      "metadata": {
        "vision": false,
        "function_call": true,
        "web_search": false,
        "reasoning": true
      }
    },
    {
      "name": "Cohere: Command A Vision",
      "description": "Command A Vision is Cohere's first model capable of processing images, excelling in enterprise use cases such as analyzing charts, graphs, and diagrams, table understanding, OCR, document Q&A, and scene analysis. It officially supports English, Portuguese, Italian, French, German, and Spanish.",
      "id": "command-a-vision-07-2025",
      "object": "model",
      "created": 1776283310,
      "owned_by": "cohere",
      "tokens": 128000,
      "pricing": {
        "type": "per_million_tokens",
        "input": 2.5,
        "output": 10
      },
      "endpoints": [
        "/v1/chat/completions",
        "/v1/responses",
        "/v1/messages"
      ],
      "premium_model": false,
      "metadata": {
        "vision": true,
        "function_call": true,
        "web_search": false,
        "reasoning": false
      }
    },
    {
      "name": "Cohere: C4AI Aya Expanse 8B",
      "description": "Aya Expanse is a highly performant 8B multilingual model, designed to rival monolingual performance through innovations in instruction tuning with data arbitrage, preference training, and model merging. Serves 23 languages.",
      "id": "c4ai-aya-expanse-8b",
      "object": "model",
      "created": 1776283310,
      "owned_by": "cohere",
      "tokens": 8000,
      "pricing": {
        "type": "per_million_tokens",
        "input": 0.5,
        "output": 1.5
      },
      "endpoints": [
        "/v1/chat/completions",
        "/v1/responses",
        "/v1/messages"
      ],
      "premium_model": false,
      "metadata": {
        "vision": false,
        "function_call": true,
        "web_search": false,
        "reasoning": false
      }
    },
    {
      "name": "Cohere: C4AI Aya Expanse 32B",
      "description": "Aya Expanse is a highly performant 32B multilingual model, designed to rival monolingual performance through innovations in instruction tuning with data arbitrage, preference training, and model merging. Serves 23 languages.",
      "id": "c4ai-aya-expanse-32b",
      "object": "model",
      "created": 1776283310,
      "owned_by": "cohere",
      "tokens": 128000,
      "pricing": {
        "type": "per_million_tokens",
        "input": 0.5,
        "output": 1.5
      },
      "endpoints": [
        "/v1/chat/completions",
        "/v1/responses",
        "/v1/messages"
      ],
      "premium_model": false,
      "metadata": {
        "vision": false,
        "function_call": true,
        "web_search": false,
        "reasoning": false
      }
    },
    {
      "name": "Cohere: C4AI Aya Vision 8B",
      "description": "Aya Vision is a state-of-the-art multimodal model excelling at a variety of critical benchmarks for language, text, and image capabilities. This 8 billion parameter variant is focused on low latency and best-in-class performance.",
      "id": "c4ai-aya-vision-8b",
      "object": "model",
      "created": 1776283310,
      "owned_by": "cohere",
      "tokens": 16000,
      "pricing": {
        "type": "per_million_tokens",
        "input": 0.5,
        "output": 1.5
      },
      "endpoints": [
        "/v1/chat/completions",
        "/v1/responses",
        "/v1/messages"
      ],
      "premium_model": false,
      "metadata": {
        "vision": true,
        "function_call": true,
        "web_search": false,
        "reasoning": false
      }
    },
    {
      "name": "Cohere: C4AI Aya Vision 32B",
      "description": "Aya Vision is a state-of-the-art multimodal model excelling at a variety of critical benchmarks for language, text, and image capabilities. Serves 23 languages. This 32 billion parameter variant is focused on state-of-art multilingual performance.",
      "id": "c4ai-aya-vision-32b",
      "object": "model",
      "created": 1776283310,
      "owned_by": "cohere",
      "tokens": 16000,
      "pricing": {
        "type": "per_million_tokens",
        "input": 0.5,
        "output": 1.5
      },
      "endpoints": [
        "/v1/chat/completions",
        "/v1/responses",
        "/v1/messages"
      ],
      "premium_model": false,
      "metadata": {
        "vision": true,
        "function_call": true,
        "web_search": false,
        "reasoning": false
      }
    },
    {
      "name": "xAI: Grok 4",
      "description": "Grok 4 is xAI's latest reasoning model with a 256k context window. It supports parallel tool calling, structured outputs, and both image and text inputs. Note that reasoning is not exposed, reasoning cannot be disabled, and the reasoning effort cannot be specified.",
      "id": "grok-4-0709",
      "object": "model",
      "created": 1776283310,
      "owned_by": "x-ai",
      "tokens": 256000,
      "pricing": {
        "type": "per_million_tokens",
        "input": 5,
        "output": 20
      },
      "endpoints": [
        "/v1/chat/completions",
        "/v1/responses",
        "/v1/messages"
      ],
      "premium_model": true,
      "metadata": {
        "vision": true,
        "function_call": true,
        "web_search": false,
        "reasoning": true
      }
    },
    {
      "name": "xAI: Grok 4 Fast",
      "description": "Grok 4 Fast is xAI's latest multimodal model with SOTA cost-efficiency and a 2M token context window.",
      "id": "grok-4-fast",
      "object": "model",
      "created": 1776283310,
      "owned_by": "x-ai",
      "tokens": 2000000,
      "pricing": {
        "type": "per_million_tokens",
        "input": 0.3,
        "output": 0.8
      },
      "endpoints": [
        "/v1/chat/completions",
        "/v1/responses",
        "/v1/messages"
      ],
      "premium_model": true,
      "metadata": {
        "vision": true,
        "function_call": true,
        "web_search": false,
        "reasoning": true
      }
    },
    {
      "name": "xAI: Grok 4.1 Fast",
      "description": "Grok 4.1 Fast is xAI's best agentic tool calling model that shines in real-world use cases like customer support and deep research. 2M context window.",
      "id": "grok-4.1-fast",
      "object": "model",
      "created": 1776283310,
      "owned_by": "x-ai",
      "tokens": 2000000,
      "pricing": {
        "type": "per_million_tokens",
        "input": 0.3,
        "output": 0.8
      },
      "endpoints": [
        "/v1/chat/completions",
        "/v1/responses",
        "/v1/messages"
      ],
      "premium_model": true,
      "metadata": {
        "vision": true,
        "function_call": true,
        "web_search": false,
        "reasoning": true
      }
    },
    {
      "name": "xAI: Grok 4.20 Beta",
      "description": "Grok 4.20 Beta is xAI's newest flagship model with industry-leading speed and agentic tool calling capabilities. It combines the lowest hallucination rate on the market with strict prompt adherance, delivering consistently precise and truthful responses.",
      "id": "grok-4.20-beta",
      "object": "model",
      "created": 1776283310,
      "owned_by": "x-ai",
      "tokens": 2000000,
      "pricing": {
        "type": "per_million_tokens",
        "input": 4,
        "output": 8
      },
      "endpoints": [
        "/v1/chat/completions",
        "/v1/responses",
        "/v1/messages"
      ],
      "premium_model": true,
      "metadata": {
        "vision": true,
        "function_call": true,
        "web_search": false,
        "reasoning": true
      }
    },
    {
      "name": "xAI: Grok Code Fast 1",
      "description": "Grok Code Fast 1 is a speedy and economical reasoning model that excels at agentic coding. With reasoning traces visible in the response, developers can steer Grok Code for high-quality work flows.",
      "id": "grok-code-fast-1",
      "object": "model",
      "created": 1776283310,
      "owned_by": "x-ai",
      "tokens": 256000,
      "pricing": {
        "type": "per_million_tokens",
        "input": 0.2,
        "output": 1.5
      },
      "endpoints": [
        "/v1/chat/completions",
        "/v1/responses",
        "/v1/messages"
      ],
      "premium_model": true,
      "metadata": {
        "vision": false,
        "function_call": true,
        "web_search": false,
        "reasoning": true
      }
    },
    {
      "name": "Microsoft: Phi 4",
      "description": "Microsoft Research Phi-4 is designed to perform well in complex reasoning tasks and can operate efficiently in situations with limited memory or where quick responses are needed.\nAt 14 billion parameters, it was trained on a mix of high-quality synthetic datasets, data from curated websites, and academic materials. It has undergone careful improvement to follow instructions accurately and maintain strong safety standards. It works best with English language inputs.",
      "id": "phi-4",
      "object": "model",
      "created": 1776283310,
      "owned_by": "microsoft",
      "tokens": 16000,
      "pricing": {
        "type": "per_million_tokens",
        "input": 0.1,
        "output": 0.3
      },
      "endpoints": [
        "/v1/chat/completions",
        "/v1/responses",
        "/v1/messages"
      ],
      "premium_model": false,
      "metadata": {
        "vision": false,
        "function_call": true,
        "web_search": false,
        "reasoning": false
      }
    },
    {
      "name": "Microsoft: Phi 4 Multimodal Instruct",
      "description": "Phi-4 Multimodal Instruct is a versatile 5.6B parameter foundation model that combines advanced reasoning and instruction-following capabilities across both text and visual inputs, providing accurate text outputs. The unified architecture enables efficient, low-latency inference, suitable for edge and mobile deployments. Phi-4 Multimodal Instruct supports text inputs in multiple languages including Arabic, Chinese, English, French, German, Japanese, Spanish, and more, with visual input optimized primarily for English. It delivers impressive performance on multimodal tasks involving mathematical, scientific, and document reasoning, providing developers and enterprises a powerful yet compact model for sophisticated interactive applications.",
      "id": "phi-4-multimodal-instruct",
      "object": "model",
      "created": 1776283310,
      "owned_by": "microsoft",
      "tokens": 128000,
      "pricing": {
        "type": "per_million_tokens",
        "input": 0.07,
        "output": 0.11
      },
      "endpoints": [
        "/v1/chat/completions",
        "/v1/responses",
        "/v1/messages"
      ],
      "premium_model": false,
      "metadata": {
        "vision": true,
        "function_call": true,
        "web_search": false,
        "reasoning": false
      }
    },
    {
      "name": "Microsoft: Phi 4 Mini Flash Reasoning",
      "description": "Phi-4-mini-flash-reasoning is a lightweight open model built upon synthetic data with a focus on high-quality, reasoning dense data further finetuned for more advanced math reasoning capabilities. The model belongs to the Phi-4 model family and supports 64K token context length.",
      "id": "phi-4-mini-flash-reasoning",
      "object": "model",
      "created": 1776283310,
      "owned_by": "microsoft",
      "tokens": 64000,
      "pricing": {
        "type": "per_million_tokens",
        "input": 0.03,
        "output": 0.09
      },
      "endpoints": [
        "/v1/chat/completions",
        "/v1/responses",
        "/v1/messages"
      ],
      "premium_model": false,
      "metadata": {
        "vision": false,
        "function_call": true,
        "web_search": false,
        "reasoning": true
      }
    },
    {
      "name": "Microsoft: Phi 4 Reasoning Plus",
      "description": "Phi-4-reasoning-plus is an enhanced 14B parameter model from Microsoft, fine-tuned from Phi-4 with additional reinforcement learning to boost accuracy on math, science, and code reasoning tasks. It uses the same dense decoder-only transformer architecture as Phi-4, but generates longer, more comprehensive outputs structured into a step-by-step reasoning trace and final answer.\nWhile it offers improved benchmark scores over Phi-4-reasoning across tasks like AIME, OmniMath, and HumanEvalPlus, its responses are typically ~50% longer, resulting in higher latency. Designed for English-only applications, it is well-suited for structured reasoning workflows where output quality takes priority over response speed.",
      "id": "phi-4-reasoning-plus",
      "object": "model",
      "created": 1776283310,
      "owned_by": "microsoft",
      "tokens": 33000,
      "pricing": {
        "type": "per_million_tokens",
        "input": 0.12,
        "output": 0.36
      },
      "endpoints": [
        "/v1/chat/completions",
        "/v1/responses",
        "/v1/messages"
      ],
      "premium_model": false,
      "metadata": {
        "vision": false,
        "function_call": true,
        "web_search": false,
        "reasoning": true
      }
    },
    {
      "name": "Microsoft: Phi 3.5 Mini 128k Instruct",
      "description": "Phi-3.5 models are lightweight, state-of-the-art open models. These models were trained with Phi-3 datasets that include both synthetic data and the filtered, publicly available websites data, with a focus on high quality and reasoning-dense properties. Phi-3.5 Mini uses 3.8B parameters, and is a dense decoder-only transformer model using the same tokenizer as Phi-3 Mini.\n The models underwent a rigorous enhancement process, incorporating both supervised fine-tuning, proximal policy optimization, and direct preference optimization to ensure precise instruction adherence and robust safety measures. When assessed against benchmarks that test common sense, language understanding, math, code, long context and logical reasoning, Phi-3.5 models showcased robust and state-of-the-art performance among models with less than 13 billion parameters.",
      "id": "phi-3.5-mini-128k-instruct",
      "object": "model",
      "created": 1776283310,
      "owned_by": "microsoft",
      "tokens": 128000,
      "pricing": {
        "type": "per_million_tokens",
        "input": 0.1,
        "output": 0.1
      },
      "endpoints": [
        "/v1/chat/completions",
        "/v1/responses",
        "/v1/messages"
      ],
      "premium_model": false,
      "metadata": {
        "vision": false,
        "function_call": true,
        "web_search": false,
        "reasoning": false
      }
    },
    {
      "name": "Microsoft: Phi 3 Medium 128k Instruct (free)",
      "description": "Phi-3 Medium 128k Instruct is a powerful 14-billion parameter model designed for advanced language understanding, reasoning, and instruction following. Optimized through supervised fine-tuning and preference adjustments, it excels in tasks involving common sense, mathematics, logical reasoning, and code processing.\nAt time of release, Phi-3 Medium demonstrated state-of-the-art performance among lightweight models. In the MMLU-Pro eval, the model even comes close to a Llama3 70B level of performance.",
      "id": "phi-3-medium-128k-instruct:free",
      "object": "model",
      "created": 1776283310,
      "owned_by": "microsoft",
      "tokens": 64000,
      "pricing": {
        "type": "per_million_tokens",
        "input": 0,
        "output": 0
      },
      "endpoints": [
        "/v1/chat/completions",
        "/v1/responses",
        "/v1/messages"
      ],
      "premium_model": false,
      "metadata": {
        "vision": false,
        "function_call": true,
        "web_search": false,
        "reasoning": false
      }
    },
    {
      "name": "Microsoft: Phi 3 Medium 128k Instruct",
      "description": "Phi-3 128K Medium is a powerful 14-billion parameter model designed for advanced language understanding, reasoning, and instruction following. Optimized through supervised fine-tuning and preference adjustments, it excels in tasks involving common sense, mathematics, logical reasoning, and code processing.\nAt time of release, Phi-3 Medium demonstrated state-of-the-art performance among lightweight models. In the MMLU-Pro eval, the model even comes close to a Llama3 70B level of performance.",
      "id": "phi-3-medium-128k-instruct",
      "object": "model",
      "created": 1776283310,
      "owned_by": "microsoft",
      "tokens": 128000,
      "pricing": {
        "type": "per_million_tokens",
        "input": 1,
        "output": 1
      },
      "endpoints": [
        "/v1/chat/completions",
        "/v1/responses",
        "/v1/messages"
      ],
      "premium_model": false,
      "metadata": {
        "vision": false,
        "function_call": true,
        "web_search": false,
        "reasoning": false
      }
    },
    {
      "name": "Microsoft: Phi 3 Mini 128k Instruct",
      "description": "Phi-3 Mini is a powerful 3.8B parameter model designed for advanced language understanding, reasoning, and instruction following. Optimized through supervised fine-tuning and preference adjustments, it excels in tasks involving common sense, mathematics, logical reasoning, and code processing.\nAt time of release, Phi-3 Medium demonstrated state-of-the-art performance among lightweight models. This model is static, trained on an offline dataset with an October 2023 cutoff date.",
      "id": "phi-3-mini-128k-instruct",
      "object": "model",
      "created": 1776283310,
      "owned_by": "microsoft",
      "tokens": 128000,
      "pricing": {
        "type": "per_million_tokens",
        "input": 0.1,
        "output": 0.1
      },
      "endpoints": [
        "/v1/chat/completions",
        "/v1/responses",
        "/v1/messages"
      ],
      "premium_model": false,
      "metadata": {
        "vision": false,
        "function_call": true,
        "web_search": false,
        "reasoning": false
      }
    },
    {
      "name": "Microsoft: Phi 2",
      "description": "Phi-2 is a Transformer with 2.7 billion parameters. It was trained using the same data sources as Phi-1.5, augmented with a new data source that consists of various NLP synthetic texts and filtered websites (for safety and educational value). When assessed against benchmarks testing common sense, language understanding, and logical reasoning, Phi-2 showcased a nearly state-of-the-art performance among models with less than 13 billion parameters.",
      "id": "phi-2",
      "object": "model",
      "created": 1776283310,
      "owned_by": "microsoft",
      "tokens": 2048,
      "pricing": {
        "type": "per_million_tokens",
        "input": 0.05,
        "output": 0.05
      },
      "endpoints": [
        "/v1/chat/completions",
        "/v1/responses",
        "/v1/messages"
      ],
      "premium_model": false,
      "metadata": {
        "vision": false,
        "function_call": true,
        "web_search": false,
        "reasoning": false
      }
    },
    {
      "name": "Microsoft: WizardLM-2 7B",
      "description": "WizardLM-2 7B is the smaller variant of Microsoft AI's latest Wizard model. It is the fastest and achieves comparable performance with existing 10x larger opensource leading models. It is a finetune of Mistral 7B Instruct, using the same technique as WizardLM-2 8x22B.",
      "id": "wizardlm-2-7b",
      "object": "model",
      "created": 1776283310,
      "owned_by": "microsoft",
      "tokens": 32000,
      "pricing": {
        "type": "per_million_tokens",
        "input": 0.07,
        "output": 0.07
      },
      "endpoints": [
        "/v1/chat/completions",
        "/v1/responses",
        "/v1/messages"
      ],
      "premium_model": false,
      "metadata": {
        "vision": false,
        "function_call": true,
        "web_search": false,
        "reasoning": false
      }
    },
    {
      "name": "Microsoft: WizardLM-2 8x22B",
      "description": "WizardLM-2 8x22B is Microsoft AI's most advanced Wizard model. It demonstrates highly competitive performance compared to leading proprietary models, and it consistently outperforms all existing state-of-the-art opensource models. It is an instruct finetune of Mixtral 8x22B.",
      "id": "wizardlm-2-8x22b",
      "object": "model",
      "created": 1776283310,
      "owned_by": "microsoft",
      "tokens": 64000,
      "pricing": {
        "type": "per_million_tokens",
        "input": 0.5,
        "output": 0.5
      },
      "endpoints": [
        "/v1/chat/completions",
        "/v1/responses",
        "/v1/messages"
      ],
      "premium_model": false,
      "metadata": {
        "vision": false,
        "function_call": true,
        "web_search": false,
        "reasoning": false
      }
    },
    {
      "name": "MiniMax: MiniMax M2",
      "description": "MiniMax-M2 is a compact, fast, and cost-effective Mixture-of-Experts (MoE) model with 230 billion total parameters and 10 billion active parameters, built for elite performance in coding and agentic tasks while maintaining powerful general intelligence. With just 10 billion activated parameters, MiniMax-M2 provides sophisticated end-to-end tool use performance expected from today's leading models, but in a streamlined form factor that makes deployment and scaling easier than ever. The model excels at multi-file edits, coding-run-fix loops, test-validated repairs, and complex long-horizon toolchains across shell, browser, retrieval, and code runners.",
      "id": "minimax-m2",
      "object": "model",
      "created": 1776283310,
      "owned_by": "minimax",
      "tokens": 200000,
      "pricing": {
        "type": "per_million_tokens",
        "input": 0.3,
        "output": 1.2
      },
      "endpoints": [
        "/v1/chat/completions",
        "/v1/responses",
        "/v1/messages"
      ],
      "premium_model": false,
      "metadata": {
        "vision": false,
        "function_call": true,
        "web_search": false,
        "reasoning": true
      }
    },
    {
      "name": "MiniMax: MiniMax M2.1",
      "description": "MiniMax-M2.1 is a lightweight, state-of-the-art large language model optimized for coding, agentic workflows, and modern application development. With only 10 billion activated parameters, it delivers a major jump in real-world capability while maintaining exceptional latency, scalability, and cost efficiency.\nCompared to its predecessor, M2.1 delivers cleaner, more concise outputs and faster perceived response times. It shows leading multilingual coding performance across major systems and application languages, achieving 49.4% on Multi-SWE-Bench and 72.5% on SWE-Bench Multilingual, and serves as a versatile agent “brain” for IDEs, coding tools, and general-purpose assistance.",
      "id": "minimax-m2.1",
      "object": "model",
      "created": 1776283310,
      "owned_by": "minimax",
      "tokens": 200000,
      "pricing": {
        "type": "per_million_tokens",
        "input": 0.3,
        "output": 1.2
      },
      "endpoints": [
        "/v1/chat/completions",
        "/v1/responses",
        "/v1/messages"
      ],
      "premium_model": false,
      "metadata": {
        "vision": false,
        "function_call": true,
        "web_search": false,
        "reasoning": true
      }
    },
    {
      "name": "MiniMax: MiniMax M2-her",
      "description": "MiniMax M2-her is a dialogue-first large language model built for immersive roleplay, character-driven chat, and expressive multi-turn conversations. Designed to stay consistent in tone and personality, it supports rich message roles (user_system, group, sample_message_user, sample_message_ai) and can learn from example dialogue to better match the style and pacing of your scenario, making it a strong choice for storytelling, companions, and conversational experiences where natural flow and vivid interaction matter most.",
      "id": "minimax-m2-her",
      "object": "model",
      "created": 1776283310,
      "owned_by": "minimax",
      "tokens": 65000,
      "pricing": {
        "type": "per_million_tokens",
        "input": 0.3,
        "output": 1.2
      },
      "endpoints": [
        "/v1/chat/completions",
        "/v1/responses",
        "/v1/messages"
      ],
      "premium_model": true,
      "metadata": {
        "vision": false,
        "function_call": true,
        "web_search": false,
        "reasoning": false
      }
    },
    {
      "name": "MiniMax: MiniMax M2.5",
      "description": "MiniMax-M2.5 is a SOTA large language model designed for real-world productivity. Trained in a diverse range of complex real-world digital working environments, M2.5 builds upon the coding expertise of M2.1 to extend into general office work, reaching fluency in generating and operating Word, Excel, and Powerpoint files, context switching between diverse software environments, and working across different agent and human teams. Scoring 80.2% on SWE-Bench Verified, 51.3% on Multi-SWE-Bench, and 76.3% on BrowseComp, M2.5 is also more token efficient than previous generations, having been trained to optimize its actions and output through planning.",
      "id": "minimax-m2.5",
      "object": "model",
      "created": 1776283310,
      "owned_by": "minimax",
      "tokens": 200000,
      "pricing": {
        "type": "per_million_tokens",
        "input": 0.3,
        "output": 1.2
      },
      "endpoints": [
        "/v1/chat/completions",
        "/v1/responses",
        "/v1/messages"
      ],
      "premium_model": false,
      "metadata": {
        "vision": false,
        "function_call": true,
        "web_search": false,
        "reasoning": true
      }
    },
    {
      "name": "MiniMax: MiniMax M2.7",
      "description": "MiniMax-M2.7 is a next-generation large language model designed for autonomous, real-world productivity and continuous improvement. Built to actively participate in its own evolution, M2.7 integrates advanced agentic capabilities through multi-agent collaboration, enabling it to plan, execute, and refine complex tasks across dynamic environments.",
      "id": "minimax-m2.7",
      "object": "model",
      "created": 1776283310,
      "owned_by": "minimax",
      "tokens": 200000,
      "pricing": {
        "type": "per_million_tokens",
        "input": 0.3,
        "output": 1.2
      },
      "endpoints": [
        "/v1/chat/completions",
        "/v1/responses",
        "/v1/messages"
      ],
      "premium_model": false,
      "metadata": {
        "vision": false,
        "function_call": true,
        "web_search": false,
        "reasoning": true
      }
    },
    {
      "name": "AI21: Jamba Large 1.7",
      "description": "Jamba Large 1.7 is the latest model in the Jamba open family, offering improvements in grounding, instruction-following, and overall efficiency. Built on a hybrid SSM-Transformer architecture with a 256K context window, it delivers more accurate, contextually grounded responses and better steerability than previous versions.",
      "id": "jamba-1.7-large",
      "object": "model",
      "created": 1776283310,
      "owned_by": "ai21",
      "tokens": 256000,
      "pricing": {
        "type": "per_million_tokens",
        "input": 2,
        "output": 8
      },
      "endpoints": [
        "/v1/chat/completions",
        "/v1/responses",
        "/v1/messages"
      ],
      "premium_model": true,
      "metadata": {
        "vision": false,
        "function_call": true,
        "web_search": false,
        "reasoning": false
      }
    },
    {
      "name": "Alibaba: Qwen2.5 7B Instruct",
      "description": "Qwen2.5 7B is the latest series of Qwen large language models. It shows significant improvements in instruction-following capabilities, outperforming its predecessor Qwen1.5 7B in various benchmarks. It is designed to be more efficient and effective in understanding and generating human-like text.",
      "id": "qwen-2.5-7b-instruct",
      "object": "model",
      "created": 1776283310,
      "owned_by": "alibaba",
      "tokens": 131000,
      "pricing": {
        "type": "per_million_tokens",
        "input": 0.3,
        "output": 0.3
      },
      "endpoints": [
        "/v1/chat/completions",
        "/v1/responses",
        "/v1/messages"
      ],
      "premium_model": false,
      "metadata": {
        "vision": false,
        "function_call": true,
        "web_search": false,
        "reasoning": false
      }
    },
    {
      "name": "Alibaba: Qwen2.5 32B Instruct",
      "description": "Qwen2.5 32B is the latest series of Qwen large language models. It shows significant improvements in instruction-following capabilities, outperforming its predecessor Qwen2.5 7B in various benchmarks. It is designed to be more efficient and effective in understanding and generating human-like text.",
      "id": "qwen-2.5-32b-instruct",
      "object": "model",
      "created": 1776283310,
      "owned_by": "alibaba",
      "tokens": 131000,
      "pricing": {
        "type": "per_million_tokens",
        "input": 1,
        "output": 1
      },
      "endpoints": [
        "/v1/chat/completions",
        "/v1/responses",
        "/v1/messages"
      ],
      "premium_model": false,
      "metadata": {
        "vision": false,
        "function_call": true,
        "web_search": false,
        "reasoning": false
      }
    },
    {
      "name": "Alibaba: Qwen2.5 72B Instruct",
      "description": "Qwen2.5 72B is the latest series of Qwen large language models. It shows significant improvements in instruction-following capabilities, outperforming its predecessor Qwen2.5 32B in various benchmarks. It is designed to be more efficient and effective in understanding and generating human-like text.",
      "id": "qwen-2.5-72b-instruct",
      "object": "model",
      "created": 1776283310,
      "owned_by": "alibaba",
      "tokens": 32000,
      "pricing": {
        "type": "per_million_tokens",
        "input": 1,
        "output": 1
      },
      "endpoints": [
        "/v1/chat/completions",
        "/v1/responses",
        "/v1/messages"
      ],
      "premium_model": false,
      "metadata": {
        "vision": false,
        "function_call": true,
        "web_search": false,
        "reasoning": false
      }
    },
    {
      "name": "Alibaba: Qwen2.5 Coder 7B Instruct",
      "description": "Qwen2.5-Coder-7B-Instruct is a 7B parameter instruction-tuned language model optimized for code-related tasks such as code generation, reasoning, and bug fixing. Based on the Qwen2.5 architecture, it incorporates enhancements like RoPE, SwiGLU, RMSNorm, and GQA attention with support for up to 128K tokens using YaRN-based extrapolation. It is trained on a large corpus of source code, synthetic data, and text-code grounding, providing robust performance across programming languages and agentic coding workflows.",
      "id": "qwen-2.5-coder-7b-instruct",
      "object": "model",
      "created": 1776283310,
      "owned_by": "alibaba",
      "tokens": 32000,
      "pricing": {
        "type": "per_million_tokens",
        "input": 0.2,
        "output": 0.2
      },
      "endpoints": [
        "/v1/chat/completions",
        "/v1/responses",
        "/v1/messages"
      ],
      "premium_model": false,
      "metadata": {
        "vision": false,
        "function_call": true,
        "web_search": false,
        "reasoning": false
      }
    },
    {
      "name": "Alibaba: Qwen2.5 Coder 32B Instruct",
      "description": "Qwen2.5-Coder is the latest series of Code-Specific Qwen large language models (formerly known as CodeQwen). Qwen2.5-Coder brings the following improvements upon CodeQwen1.5:\nSignificantly improvements in code generation, code reasoning and code fixing.\nA more comprehensive foundation for real-world applications such as Code Agents. Not only enhancing coding capabilities but also maintaining its strengths in mathematics and general competencies.",
      "id": "qwen-2.5-coder-32b-instruct",
      "object": "model",
      "created": 1776283310,
      "owned_by": "alibaba",
      "tokens": 33000,
      "pricing": {
        "type": "per_million_tokens",
        "input": 0.8,
        "output": 0.8
      },
      "endpoints": [
        "/v1/chat/completions",
        "/v1/responses",
        "/v1/messages"
      ],
      "premium_model": false,
      "metadata": {
        "vision": false,
        "function_call": true,
        "web_search": false,
        "reasoning": false
      }
    },
    {
      "name": "Alibaba: QwQ 32B Preview",
      "description": "QwQ-32B-Preview is an experimental research model focused on AI reasoning capabilities developed by the Qwen Team.",
      "id": "qwq-32b-preview",
      "object": "model",
      "created": 1776283310,
      "owned_by": "alibaba",
      "tokens": 32000,
      "pricing": {
        "type": "per_million_tokens",
        "input": 0.9,
        "output": 0.9
      },
      "endpoints": [
        "/v1/chat/completions",
        "/v1/responses",
        "/v1/messages"
      ],
      "premium_model": false,
      "metadata": {
        "vision": false,
        "function_call": true,
        "web_search": false,
        "reasoning": true
      }
    },
    {
      "name": "Alibaba: QwQ 32B (free)",
      "description": "QwQ is the reasoning model of the Qwen series. Compared with conventional instruction-tuned models, QwQ, which is capable of thinking and reasoning, can achieve significantly enhanced performance in downstream tasks, especially hard problems. QwQ-32B is the medium-sized reasoning model, which is capable of achieving competitive performance against state-of-the-art reasoning models, e.g., DeepSeek-R1, o1-mini.",
      "id": "qwq-32b:free",
      "object": "model",
      "created": 1776283310,
      "owned_by": "alibaba",
      "tokens": 64000,
      "pricing": {
        "type": "per_million_tokens",
        "input": 0,
        "output": 0
      },
      "endpoints": [
        "/v1/chat/completions",
        "/v1/responses",
        "/v1/messages"
      ],
      "premium_model": false,
      "metadata": {
        "vision": false,
        "function_call": true,
        "web_search": false,
        "reasoning": true
      }
    },
    {
      "name": "Alibaba: QwQ 32B",
      "description": "QwQ is the reasoning model of the Qwen series. Compared with conventional instruction-tuned models, QwQ, which is capable of thinking and reasoning, can achieve significantly enhanced performance in downstream tasks, especially hard problems. QwQ-32B is the medium-sized reasoning model, which is capable of achieving competitive performance against state-of-the-art reasoning models, e.g., DeepSeek-R1, o1-mini.",
      "id": "qwq-32b",
      "object": "model",
      "created": 1776283310,
      "owned_by": "alibaba",
      "tokens": 128000,
      "pricing": {
        "type": "per_million_tokens",
        "input": 0.9,
        "output": 0.9
      },
      "endpoints": [
        "/v1/chat/completions",
        "/v1/responses",
        "/v1/messages"
      ],
      "premium_model": false,
      "metadata": {
        "vision": false,
        "function_call": true,
        "web_search": false,
        "reasoning": true
      }
    },
    {
      "name": "Alibaba: Qwen VL Plus",
      "description": "Qwen's Enhanced Large Visual Language Model. Significantly upgraded for detailed recognition capabilities and text recognition abilities, supporting ultra-high pixel resolutions up to millions of pixels and extreme aspect ratios for image input. It delivers significant performance across a broad range of visual tasks.",
      "id": "qwen-vl-plus",
      "object": "model",
      "created": 1776283310,
      "owned_by": "alibaba",
      "tokens": 7500,
      "pricing": {
        "type": "per_million_tokens",
        "input": 0.3,
        "output": 0.9
      },
      "endpoints": [
        "/v1/chat/completions",
        "/v1/responses",
        "/v1/messages"
      ],
      "premium_model": true,
      "metadata": {
        "vision": true,
        "function_call": true,
        "web_search": false,
        "reasoning": false
      }
    },
    {
      "name": "Alibaba: Qwen VL Max",
      "description": "Qwen VL Max is a visual understanding model with 7500 tokens context length. It excels in delivering optimal performance for a broader spectrum of complex tasks.",
      "id": "qwen-vl-max",
      "object": "model",
      "created": 1776283310,
      "owned_by": "alibaba",
      "tokens": 7500,
      "pricing": {
        "type": "per_million_tokens",
        "input": 1,
        "output": 3.5
      },
      "endpoints": [
        "/v1/chat/completions",
        "/v1/responses",
        "/v1/messages"
      ],
      "premium_model": true,
      "metadata": {
        "vision": true,
        "function_call": true,
        "web_search": false,
        "reasoning": false
      }
    },
    {
      "name": "Alibaba: Qwen2.5 VL 3B Instruct",
      "description": "Qwen2.5 VL 3B is a multimodal LLM from the Qwen Team with the following key enhancements:\nSoTA understanding of images of various resolution & ratio: Qwen2.5-VL achieves state-of-the-art performance on visual understanding benchmarks, including MathVista, DocVQA, RealWorldQA, MTVQA, etc.\nAgent that can operate your mobiles, robots, etc.: with the abilities of complex reasoning and decision making, Qwen2.5-VL can be integrated with devices like mobile phones, robots, etc., for automatic operation based on visual environment and text instructions.\nMultilingual Support: to serve global users, besides English and Chinese, Qwen2.5-VL now supports the understanding of texts in different languages inside images, including most European languages, Japanese, Korean, Arabic, Vietnamese, etc.",
      "id": "qwen2.5-vl-3b-instruct",
      "object": "model",
      "created": 1776283310,
      "owned_by": "alibaba",
      "tokens": 32000,
      "pricing": {
        "type": "per_million_tokens",
        "input": 0.3,
        "output": 0.3
      },
      "endpoints": [
        "/v1/chat/completions",
        "/v1/responses",
        "/v1/messages"
      ],
      "premium_model": false,
      "metadata": {
        "vision": true,
        "function_call": true,
        "web_search": false,
        "reasoning": false
      }
    },
    {
      "name": "Alibaba: Qwen2.5 VL 7B Instruct",
      "description": "Qwen2.5 VL 7B is a multimodal LLM from the Qwen Team with the following key enhancements:\nSoTA understanding of images of various resolution & ratio: Qwen2.5-VL achieves state-of-the-art performance on visual understanding benchmarks, including MathVista, DocVQA, RealWorldQA, MTVQA, etc.\nUnderstanding videos of 20min+: Qwen2.5-VL can understand videos over 20 minutes for high-quality video-based question answering, dialog, content creation, etc.\nAgent that can operate your mobiles, robots, etc.: with the abilities of complex reasoning and decision making, Qwen2.5-VL can be integrated with devices like mobile phones, robots, etc., for automatic operation based on visual environment and text instructions.\nMultilingual Support: to serve global users, besides English and Chinese, Qwen2.5-VL now supports the understanding of texts in different languages inside images, including most European languages, Japanese, Korean, Arabic, Vietnamese, etc.",
      "id": "qwen2.5-vl-7b-instruct",
      "object": "model",
      "created": 1776283310,
      "owned_by": "alibaba",
      "tokens": 32000,
      "pricing": {
        "type": "per_million_tokens",
        "input": 0.6,
        "output": 0.6
      },
      "endpoints": [
        "/v1/chat/completions",
        "/v1/responses",
        "/v1/messages"
      ],
      "premium_model": true,
      "metadata": {
        "vision": true,
        "function_call": true,
        "web_search": false,
        "reasoning": false
      }
    },
    {
      "name": "Alibaba: Qwen2.5 VL 32B Instruct",
      "description": "Qwen2.5-VL-32B is a multimodal vision-language model fine-tuned through reinforcement learning for enhanced mathematical reasoning, structured outputs, and visual problem-solving capabilities. It excels at visual analysis tasks, including object recognition, textual interpretation within images, and precise event localization in extended videos. Qwen2.5-VL-32B demonstrates state-of-the-art performance across multimodal benchmarks such as MMMU, MathVista, and VideoMME, while maintaining strong reasoning and clarity in text-based tasks like MMLU, mathematical problem-solving, and code generation.",
      "id": "qwen2.5-vl-32b-instruct",
      "object": "model",
      "created": 1776283310,
      "owned_by": "alibaba",
      "tokens": 128000,
      "pricing": {
        "type": "per_million_tokens",
        "input": 1,
        "output": 1
      },
      "endpoints": [
        "/v1/chat/completions",
        "/v1/responses",
        "/v1/messages"
      ],
      "premium_model": true,
      "metadata": {
        "vision": true,
        "function_call": true,
        "web_search": false,
        "reasoning": false
      }
    },
    {
      "name": "Alibaba: Qwen2.5 VL 72B Instruct",
      "description": "Qwen2.5-VL is proficient in recognizing common objects such as flowers, birds, fish, and insects. It is also highly capable of analyzing texts, charts, icons, graphics, and layouts within images.",
      "id": "qwen2.5-vl-72b-instruct",
      "object": "model",
      "created": 1776283310,
      "owned_by": "alibaba",
      "tokens": 131000,
      "pricing": {
        "type": "per_million_tokens",
        "input": 1,
        "output": 1
      },
      "endpoints": [
        "/v1/chat/completions",
        "/v1/responses",
        "/v1/messages"
      ],
      "premium_model": true,
      "metadata": {
        "vision": true,
        "function_call": true,
        "web_search": false,
        "reasoning": false
      }
    },
    {
      "name": "Alibaba: Qwen-Turbo",
      "description": "Qwen-Turbo, based on Qwen2.5, is a 1M context model that provides fast speed and low cost, suitable for simple tasks.",
      "id": "qwen-turbo",
      "object": "model",
      "created": 1776283310,
      "owned_by": "alibaba",
      "tokens": 1000000,
      "pricing": {
        "type": "per_million_tokens",
        "input": 0.1,
        "output": 0.3
      },
      "endpoints": [
        "/v1/chat/completions",
        "/v1/responses",
        "/v1/messages"
      ],
      "premium_model": true,
      "metadata": {
        "vision": false,
        "function_call": true,
        "web_search": false,
        "reasoning": false
      }
    },
    {
      "name": "Alibaba: Qwen Plus",
      "description": "Qwen-Plus, based on the Qwen2.5 foundation model, is a 131K context model with a balanced performance, speed, and cost combination.",
      "id": "qwen-plus",
      "object": "model",
      "created": 1776283310,
      "owned_by": "alibaba",
      "tokens": 131000,
      "pricing": {
        "type": "per_million_tokens",
        "input": 0.5,
        "output": 1.5
      },
      "endpoints": [
        "/v1/chat/completions",
        "/v1/responses",
        "/v1/messages"
      ],
      "premium_model": true,
      "metadata": {
        "vision": false,
        "function_call": true,
        "web_search": false,
        "reasoning": false
      }
    },
    {
      "name": "Alibaba: Qwen Plus 0728",
      "description": "Qwen Plus 0728, based on the Qwen3 foundation model, is a 1 million context hybrid reasoning model with a balanced performance, speed, and cost combination.",
      "id": "qwen-plus-2025-07-28",
      "object": "model",
      "created": 1776283310,
      "owned_by": "alibaba",
      "tokens": 1000000,
      "pricing": {
        "type": "per_million_tokens",
        "input": 0.5,
        "output": 1.5
      },
      "endpoints": [
        "/v1/chat/completions",
        "/v1/responses",
        "/v1/messages"
      ],
      "premium_model": true,
      "metadata": {
        "vision": false,
        "function_call": true,
        "web_search": false,
        "reasoning": false
      }
    },
    {
      "name": "Alibaba: Qwen Plus 0728 (thinking)",
      "description": "Qwen Plus 0728, based on the Qwen3 foundation model, is a 1 million context hybrid reasoning model with a balanced performance, speed, and cost combination.",
      "id": "qwen-plus-2025-07-28-thinking",
      "object": "model",
      "created": 1776283310,
      "owned_by": "alibaba",
      "tokens": 1000000,
      "pricing": {
        "type": "per_million_tokens",
        "input": 0.5,
        "output": 5
      },
      "endpoints": [
        "/v1/chat/completions",
        "/v1/responses",
        "/v1/messages"
      ],
      "premium_model": true,
      "metadata": {
        "vision": false,
        "function_call": true,
        "web_search": false,
        "reasoning": true
      }
    },
    {
      "name": "Alibaba: Qwen-Max",
      "description": "Qwen-Max, based on Qwen2.5, provides the best inference performance among Qwen models, especially for complex multi-step tasks. It's a large-scale MoE model that has been pretrained on over 20 trillion tokens and further post-trained with curated Supervised Fine-Tuning (SFT) and Reinforcement Learning from Human Feedback (RLHF) methodologies.",
      "id": "qwen-max",
      "object": "model",
      "created": 1776283310,
      "owned_by": "alibaba",
      "tokens": 32000,
      "pricing": {
        "type": "per_million_tokens",
        "input": 2,
        "output": 7
      },
      "endpoints": [
        "/v1/chat/completions",
        "/v1/responses",
        "/v1/messages"
      ],
      "premium_model": true,
      "metadata": {
        "vision": false,
        "function_call": true,
        "web_search": false,
        "reasoning": false
      }
    },
    {
      "name": "Alibaba: Qwen3 8B",
      "description": "Qwen3-8B is a dense 8.2B parameter causal language model from the Qwen3 series, designed for both reasoning-heavy tasks and efficient dialogue. It supports seamless switching between \"thinking\" mode for math, coding, and logical inference, and \"non-thinking\" mode for general conversation. The model is fine-tuned for instruction-following, agent integration, creative writing, and multilingual use across 100+ languages and dialects. It natively supports a 32K token context window and can extend to 131K tokens with YaRN scaling.",
      "id": "qwen3-8b",
      "object": "model",
      "created": 1776283310,
      "owned_by": "alibaba",
      "tokens": 41000,
      "pricing": {
        "type": "per_million_tokens",
        "input": 0.06,
        "output": 0.24
      },
      "endpoints": [
        "/v1/chat/completions",
        "/v1/responses",
        "/v1/messages"
      ],
      "premium_model": false,
      "metadata": {
        "vision": false,
        "function_call": true,
        "web_search": false,
        "reasoning": true
      }
    },
    {
      "name": "Alibaba: Qwen3 14B",
      "description": "Qwen3-14B is a dense 14.8B parameter causal language model from the Qwen3 series, designed for both complex reasoning and efficient dialogue. It supports seamless switching between a \"thinking\" mode for tasks like math, programming, and logical inference, and a \"non-thinking\" mode for general-purpose conversation. The model is fine-tuned for instruction-following, agent tool use, creative writing, and multilingual tasks across 100+ languages and dialects. It natively handles 32K token contexts and can extend to 131K tokens using YaRN-based scaling.",
      "id": "qwen3-14b",
      "object": "model",
      "created": 1776283310,
      "owned_by": "alibaba",
      "tokens": 41000,
      "pricing": {
        "type": "per_million_tokens",
        "input": 0.1,
        "output": 0.3
      },
      "endpoints": [
        "/v1/chat/completions",
        "/v1/responses",
        "/v1/messages"
      ],
      "premium_model": false,
      "metadata": {
        "vision": false,
        "function_call": true,
        "web_search": false,
        "reasoning": true
      }
    },
    {
      "name": "Alibaba: Qwen3 32B",
      "description": "Qwen3-32B is a dense 32.8B parameter causal language model from the Qwen3 series, optimized for both complex reasoning and efficient dialogue. It supports seamless switching between a \"thinking\" mode for tasks like math, coding, and logical inference, and a \"non-thinking\" mode for faster, general-purpose conversation. The model demonstrates strong performance in instruction-following, agent tool use, creative writing, and multilingual tasks across 100+ languages and dialects. It natively handles 32K token contexts and can extend to 131K tokens using YaRN-based scaling.",
      "id": "qwen3-32b",
      "object": "model",
      "created": 1776283310,
      "owned_by": "alibaba",
      "tokens": 41000,
      "pricing": {
        "type": "per_million_tokens",
        "input": 0.12,
        "output": 0.36
      },
      "endpoints": [
        "/v1/chat/completions",
        "/v1/responses",
        "/v1/messages"
      ],
      "premium_model": true,
      "metadata": {
        "vision": false,
        "function_call": true,
        "web_search": false,
        "reasoning": true
      }
    },
    {
      "name": "Alibaba: Qwen3 30B A3B",
      "description": "Qwen3, the latest generation in the Qwen large language model series, features both dense and mixture-of-experts (MoE) architectures to excel in reasoning, multilingual support, and advanced agent tasks. Its unique ability to switch seamlessly between a thinking mode for complex reasoning and a non-thinking mode for efficient dialogue ensures versatile, high-quality performance.\nSignificantly outperforming prior models like QwQ and Qwen2.5, Qwen3 delivers superior mathematics, coding, commonsense reasoning, creative writing, and interactive dialogue capabilities. The Qwen3-30B-A3B variant includes 30.5 billion parameters (3.3 billion activated), 48 layers, 128 experts (8 activated per task), and supports up to 131K token contexts with YaRN, setting a new standard among open-source models.",
      "id": "qwen3-30b-a3b",
      "object": "model",
      "created": 1776283310,
      "owned_by": "alibaba",
      "tokens": 41000,
      "pricing": {
        "type": "per_million_tokens",
        "input": 0.15,
        "output": 0.3
      },
      "endpoints": [
        "/v1/chat/completions",
        "/v1/responses",
        "/v1/messages"
      ],
      "premium_model": true,
      "metadata": {
        "vision": false,
        "function_call": true,
        "web_search": false,
        "reasoning": true
      }
    },
    {
      "name": "Alibaba: Qwen3 30B A3B Instruct 2507",
      "description": "Qwen3-30B-A3B-Instruct-2507 is a 30.5B-parameter mixture-of-experts language model from Qwen, with 3.3B active parameters per inference. It operates in non-thinking mode and is designed for high-quality instruction following, multilingual understanding, and agentic tool use. Post-trained on instruction data, it demonstrates competitive performance across reasoning (AIME, ZebraLogic), coding (MultiPL-E, LiveCodeBench), and alignment (IFEval, WritingBench) benchmarks. It outperforms its non-instruct variant on subjective and open-ended tasks while retaining strong factual and coding performance.",
      "id": "qwen3-30b-a3b-instruct-2507",
      "object": "model",
      "created": 1776283310,
      "owned_by": "alibaba",
      "tokens": 131000,
      "pricing": {
        "type": "per_million_tokens",
        "input": 0.15,
        "output": 0.3
      },
      "endpoints": [
        "/v1/chat/completions",
        "/v1/responses",
        "/v1/messages"
      ],
      "premium_model": false,
      "metadata": {
        "vision": false,
        "function_call": true,
        "web_search": false,
        "reasoning": false
      }
    },
    {
      "name": "Alibaba: Qwen3 30B A3B Thinking 2507",
      "description": "Qwen3-30B-A3B-Thinking-2507 is a 30B parameter Mixture-of-Experts reasoning model optimized for complex tasks requiring extended multi-step thinking. The model is designed specifically for “thinking mode,” where internal reasoning traces are separated from final answers.\nCompared to earlier Qwen3-30B releases, this version improves performance across logical reasoning, mathematics, science, coding, and multilingual benchmarks. It also demonstrates stronger instruction following, tool use, and alignment with human preferences. With higher reasoning efficiency and extended output budgets, it is best suited for advanced research, competitive problem solving, and agentic applications requiring structured long-context reasoning.",
      "id": "qwen3-30b-a3b-thinking-2507",
      "object": "model",
      "created": 1776283310,
      "owned_by": "alibaba",
      "tokens": 131000,
      "pricing": {
        "type": "per_million_tokens",
        "input": 0.15,
        "output": 0.3
      },
      "endpoints": [
        "/v1/chat/completions",
        "/v1/responses",
        "/v1/messages"
      ],
      "premium_model": false,
      "metadata": {
        "vision": false,
        "function_call": true,
        "web_search": false,
        "reasoning": true
      }
    },
    {
      "name": "Alibaba: Qwen3 Next 80B A3B Instruct (free)",
      "description": "Qwen3-Next-80B-A3B-Instruct is an instruction-tuned chat model in the Qwen3-Next series optimized for fast, stable responses without “thinking” traces. It targets complex tasks across reasoning, code generation, knowledge QA, and multilingual use, while remaining robust on alignment and formatting. Compared with prior Qwen3 instruct variants, it focuses on higher throughput and stability on ultra-long inputs and multi-turn dialogues, making it well-suited for RAG, tool use, and agentic workflows that require consistent final answers rather than visible chain-of-thought.\nThe model employs scaling-efficient training and decoding to improve parameter efficiency and inference speed, and has been validated on a broad set of public benchmarks where it reaches or approaches larger Qwen3 systems in several categories while outperforming earlier mid-sized baselines. It is best used as a general assistant, code helper, and long-context task solver in production settings where deterministic, instruction-following outputs are preferred.",
      "id": "qwen3-next-80b-a3b-instruct:free",
      "object": "model",
      "created": 1776283310,
      "owned_by": "alibaba",
      "tokens": 64000,
      "pricing": {
        "type": "per_million_tokens",
        "input": 0,
        "output": 0
      },
      "endpoints": [
        "/v1/chat/completions",
        "/v1/responses",
        "/v1/messages"
      ],
      "premium_model": false,
      "metadata": {
        "vision": false,
        "function_call": true,
        "web_search": false,
        "reasoning": false
      }
    },
    {
      "name": "Alibaba: Qwen3 Next 80B A3B Instruct",
      "description": "Qwen3-Next-80B-A3B-Instruct is an instruction-tuned chat model in the Qwen3-Next series optimized for fast, stable responses without “thinking” traces. It targets complex tasks across reasoning, code generation, knowledge QA, and multilingual use, while remaining robust on alignment and formatting. Compared with prior Qwen3 instruct variants, it focuses on higher throughput and stability on ultra-long inputs and multi-turn dialogues, making it well-suited for RAG, tool use, and agentic workflows that require consistent final answers rather than visible chain-of-thought.\nThe model employs scaling-efficient training and decoding to improve parameter efficiency and inference speed, and has been validated on a broad set of public benchmarks where it reaches or approaches larger Qwen3 systems in several categories while outperforming earlier mid-sized baselines. It is best used as a general assistant, code helper, and long-context task solver in production settings where deterministic, instruction-following outputs are preferred.",
      "id": "qwen3-next-80b-a3b-instruct",
      "object": "model",
      "created": 1776283310,
      "owned_by": "alibaba",
      "tokens": 262000,
      "pricing": {
        "type": "per_million_tokens",
        "input": 0.5,
        "output": 2
      },
      "endpoints": [
        "/v1/chat/completions",
        "/v1/responses",
        "/v1/messages"
      ],
      "premium_model": true,
      "metadata": {
        "vision": false,
        "function_call": true,
        "web_search": false,
        "reasoning": false
      }
    },
    {
      "name": "Alibaba: Qwen3 Next 80B A3B Thinking (free)",
      "description": "Qwen3-Next-80B-A3B-Instruct-Thinking is a reasoning-first chat model in the Qwen3-Next line that outputs structured “thinking” traces by default. It’s designed for hard multi-step problems; math proofs, code synthesis/debugging, logic, and agentic planning, and reports strong results across knowledge, reasoning, coding, alignment, and multilingual evaluations. Compared with prior Qwen3 variants, it emphasizes stability under long chains of thought and efficient scaling during inference, and it is tuned to follow complex instructions while reducing repetitive or off-task behavior.\nThe model is suitable for agent frameworks and tool use (function calling), retrieval-heavy workflows, and standardized benchmarking where step-by-step solutions are required. It supports long, detailed completions and leverages throughput-oriented techniques (e.g., multi-token prediction) for faster generation. Note that it operates in thinking-only mode.",
      "id": "qwen3-next-80b-a3b-instruct-thinking:free",
      "object": "model",
      "created": 1776283310,
      "owned_by": "alibaba",
      "tokens": 64000,
      "pricing": {
        "type": "per_million_tokens",
        "input": 0,
        "output": 0
      },
      "endpoints": [
        "/v1/chat/completions",
        "/v1/responses",
        "/v1/messages"
      ],
      "premium_model": false,
      "metadata": {
        "vision": false,
        "function_call": true,
        "web_search": false,
        "reasoning": true
      }
    },
    {
      "name": "Alibaba: Qwen3 Next 80B A3B Thinking",
      "description": "Qwen3-Next-80B-A3B-Thinking is a reasoning-first chat model in the Qwen3-Next line that outputs structured “thinking” traces by default. It’s designed for hard multi-step problems; math proofs, code synthesis/debugging, logic, and agentic planning, and reports strong results across knowledge, reasoning, coding, alignment, and multilingual evaluations. Compared with prior Qwen3 variants, it emphasizes stability under long chains of thought and efficient scaling during inference, and it is tuned to follow complex instructions while reducing repetitive or off-task behavior.\nThe model is suitable for agent frameworks and tool use (function calling), retrieval-heavy workflows, and standardized benchmarking where step-by-step solutions are required. It supports long, detailed completions and leverages throughput-oriented techniques (e.g., multi-token prediction) for faster generation. Note that it operates in thinking-only mode.",
      "id": "qwen3-next-80b-a3b-thinking",
      "object": "model",
      "created": 1776283310,
      "owned_by": "alibaba",
      "tokens": 262000,
      "pricing": {
        "type": "per_million_tokens",
        "input": 0.5,
        "output": 6
      },
      "endpoints": [
        "/v1/chat/completions",
        "/v1/responses",
        "/v1/messages"
      ],
      "premium_model": true,
      "metadata": {
        "vision": false,
        "function_call": true,
        "web_search": false,
        "reasoning": true
      }
    },
    {
      "name": "Alibaba: Qwen3 235B A22B (free)",
      "description": "Qwen3-235B-A22B is a 235B parameter mixture-of-experts (MoE) model developed by Qwen, activating 22B parameters per forward pass. It supports seamless switching between a \"thinking\" mode for complex reasoning, math, and code tasks, and a \"non-thinking\" mode for general conversational efficiency. The model demonstrates strong reasoning ability, multilingual support (100+ languages and dialects), advanced instruction-following, and agent tool-calling capabilities. It natively handles a 32K token context window and extends up to 131K tokens using YaRN-based scaling.",
      "id": "qwen3-235b-a22b:free",
      "object": "model",
      "created": 1776283310,
      "owned_by": "alibaba",
      "tokens": 64000,
      "pricing": {
        "type": "per_million_tokens",
        "input": 0,
        "output": 0
      },
      "endpoints": [
        "/v1/chat/completions",
        "/v1/responses",
        "/v1/messages"
      ],
      "premium_model": false,
      "metadata": {
        "vision": false,
        "function_call": true,
        "web_search": false,
        "reasoning": true
      }
    },
    {
      "name": "Alibaba: Qwen3 235B A22B",
      "description": "Qwen3-235B-A22B is a 235B parameter mixture-of-experts (MoE) model developed by Qwen, activating 22B parameters per forward pass. It supports seamless switching between a \"thinking\" mode for complex reasoning, math, and code tasks, and a \"non-thinking\" mode for general conversational efficiency. The model demonstrates strong reasoning ability, multilingual support (100+ languages and dialects), advanced instruction-following, and agent tool-calling capabilities. It natively handles a 32K token context window and extends up to 131K tokens using YaRN-based scaling.",
      "id": "qwen3-235b-a22b",
      "object": "model",
      "created": 1776283310,
      "owned_by": "alibaba",
      "tokens": 131000,
      "pricing": {
        "type": "per_million_tokens",
        "input": 0.24,
        "output": 0.72
      },
      "endpoints": [
        "/v1/chat/completions",
        "/v1/responses",
        "/v1/messages"
      ],
      "premium_model": false,
      "metadata": {
        "vision": false,
        "function_call": true,
        "web_search": false,
        "reasoning": true
      }
    },
    {
      "name": "Alibaba: Qwen3 235B A22B Instruct 2507",
      "description": "Qwen3-235B-A22B-Instruct-2507 is a multilingual, instruction-tuned mixture-of-experts language model based on the Qwen3-235B architecture, with 22B active parameters per forward pass. It is optimized for general-purpose text generation, including instruction following, logical reasoning, math, code, and tool usage. The model supports a native 262K context length and does not implement \"thinking mode\" (<think> blocks).\nCompared to its base variant, this version delivers significant gains in knowledge coverage, long-context reasoning, coding benchmarks, and alignment with open-ended tasks. It is particularly strong on multilingual understanding, math reasoning (e.g., AIME, HMMT), and alignment evaluations like Arena-Hard and WritingBench.",
      "id": "qwen3-235b-a22b-2507",
      "object": "model",
      "created": 1776283310,
      "owned_by": "alibaba",
      "tokens": 262000,
      "pricing": {
        "type": "per_million_tokens",
        "input": 0.24,
        "output": 0.72
      },
      "endpoints": [
        "/v1/chat/completions",
        "/v1/responses",
        "/v1/messages"
      ],
      "premium_model": true,
      "metadata": {
        "vision": false,
        "function_call": true,
        "web_search": false,
        "reasoning": false
      }
    },
    {
      "name": "Alibaba: Qwen3 235B A22B Thinking 2507",
      "description": "Qwen3-235B-A22B-Thinking-2507 is a high-performance, open-weight Mixture-of-Experts (MoE) language model optimized for complex reasoning tasks. It activates 22B of its 235B parameters per forward pass and natively supports up to 262,144 tokens of context. This \"thinking-only\" variant enhances structured logical reasoning, mathematics, science, and long-form generation, showing strong benchmark performance across AIME, SuperGPQA, LiveCodeBench, and MMLU-Redux. It enforces a special reasoning mode (</think>) and is designed for high-token outputs (up to 81,920 tokens) in challenging domains.\nThe model is instruction-tuned and excels at step-by-step reasoning, tool use, agentic workflows, and multilingual tasks. This release represents the most capable open-source variant in the Qwen3-235B series, surpassing many closed models in structured reasoning use cases.",
      "id": "qwen3-235b-a22b-thinking-2507",
      "object": "model",
      "created": 1776283310,
      "owned_by": "alibaba",
      "tokens": 262000,
      "pricing": {
        "type": "per_million_tokens",
        "input": 0.24,
        "output": 0.72
      },
      "endpoints": [
        "/v1/chat/completions",
        "/v1/responses",
        "/v1/messages"
      ],
      "premium_model": true,
      "metadata": {
        "vision": false,
        "function_call": true,
        "web_search": false,
        "reasoning": true
      }
    },
    {
      "name": "Alibaba: Qwen3 VL 235B A22B Instruct",
      "description": "Qwen3-VL-235B-A22B Instruct is an open-weight multimodal model that unifies strong text generation with visual understanding across images and video. The Instruct model targets general vision-language use (VQA, document parsing, chart/table extraction, multilingual OCR). The series emphasizes robust perception (recognition of diverse real-world and synthetic categories), spatial understanding (2D/3D grounding), and long-form visual comprehension, with competitive results on public multimodal benchmarks for both perception and reasoning.\nBeyond analysis, Qwen3-VL supports agentic interaction and tool use: it can follow complex instructions over multi-image, multi-turn dialogues; align text to video timelines for precise temporal queries; and operate GUI elements for automation tasks. The models also enable visual coding workflows—turning sketches or mockups into code and assisting with UI debugging—while maintaining strong text-only performance comparable to the flagship Qwen3 language models. This makes Qwen3-VL suitable for production scenarios spanning document AI, multilingual OCR, software/UI assistance, spatial/embodied tasks, and research on vision-language agents.",
      "id": "qwen3-vl-235b-a22b-instruct",
      "object": "model",
      "created": 1776283310,
      "owned_by": "alibaba",
      "tokens": 131000,
      "pricing": {
        "type": "per_million_tokens",
        "input": 0.36,
        "output": 1.8
      },
      "endpoints": [
        "/v1/chat/completions",
        "/v1/responses",
        "/v1/messages"
      ],
      "premium_model": true,
      "metadata": {
        "vision": true,
        "function_call": true,
        "web_search": false,
        "reasoning": false
      }
    },
    {
      "name": "Alibaba: Qwen3 VL 235B A22B Thinking",
      "description": "Qwen3-VL-235B-A22B Thinking is a multimodal model that unifies strong text generation with visual understanding across images and video. The Thinking model is optimized for multimodal reasoning in STEM and math. The series emphasizes robust perception (recognition of diverse real-world and synthetic categories), spatial understanding (2D/3D grounding), and long-form visual comprehension, with competitive results on public multimodal benchmarks for both perception and reasoning.\nBeyond analysis, Qwen3-VL supports agentic interaction and tool use: it can follow complex instructions over multi-image, multi-turn dialogues; align text to video timelines for precise temporal queries; and operate GUI elements for automation tasks. The models also enable visual coding workflows, turning sketches or mockups into code and assisting with UI debugging, while maintaining strong text-only performance comparable to the flagship Qwen3 language models. This makes Qwen3-VL suitable for production scenarios spanning document AI, multilingual OCR, software/UI assistance, spatial/embodied tasks, and research on vision-language agents.",
      "id": "qwen3-vl-235b-a22b-thinking",
      "object": "model",
      "created": 1776283310,
      "owned_by": "alibaba",
      "tokens": 131000,
      "pricing": {
        "type": "per_million_tokens",
        "input": 0.6,
        "output": 4.2
      },
      "endpoints": [
        "/v1/chat/completions",
        "/v1/responses",
        "/v1/messages"
      ],
      "premium_model": true,
      "metadata": {
        "vision": true,
        "function_call": true,
        "web_search": false,
        "reasoning": true
      }
    },
    {
      "name": "Alibaba: Qwen3 Coder 480B A35B",
      "description": "Qwen3-Coder-480B-A35B-Instruct is a Mixture-of-Experts (MoE) code generation model developed by the Qwen team. It is optimized for agentic coding tasks such as function calling, tool use, and long-context reasoning over repositories. The model features 480 billion total parameters, with 35 billion active per forward pass (8 out of 160 experts).",
      "id": "qwen3-coder",
      "object": "model",
      "created": 1776283310,
      "owned_by": "alibaba",
      "tokens": 262000,
      "pricing": {
        "type": "per_million_tokens",
        "input": 2.4,
        "output": 2.4
      },
      "endpoints": [
        "/v1/chat/completions",
        "/v1/responses",
        "/v1/messages"
      ],
      "premium_model": true,
      "metadata": {
        "vision": false,
        "function_call": true,
        "web_search": false,
        "reasoning": false
      }
    },
    {
      "name": "Alibaba: Qwen3 Max",
      "description": "Qwen3-Max is an updated release built on the Qwen3 series, offering major improvements in reasoning, instruction following, multilingual support, and long-tail knowledge coverage compared to the January 2025 version. It delivers higher accuracy in math, coding, logic, and science tasks, follows complex instructions in Chinese and English more reliably, reduces hallucinations, and produces higher-quality responses for open-ended Q&A, writing, and conversation. The model supports over 100 languages with stronger translation and commonsense reasoning, and is optimized for retrieval-augmented generation (RAG) and tool calling, though it does not include a dedicated “thinking” mode.",
      "id": "qwen3-max",
      "object": "model",
      "created": 1776283310,
      "owned_by": "alibaba",
      "tokens": 256000,
      "pricing": {
        "type": "per_million_tokens",
        "input": 3,
        "output": 15
      },
      "endpoints": [
        "/v1/chat/completions",
        "/v1/responses",
        "/v1/messages"
      ],
      "premium_model": true,
      "metadata": {
        "vision": false,
        "function_call": true,
        "web_search": false,
        "reasoning": false
      }
    },
    {
      "name": "Alibaba: Qwen3 Max Thinking",
      "description": "Qwen3-Max-Thinking is the flagship reasoning model in the Qwen3 series, designed for high-stakes cognitive tasks that require deep, multi-step reasoning. By significantly scaling model capacity and reinforcement learning compute, it delivers major gains in factual accuracy, complex reasoning, instruction following, alignment with human preferences, and agentic behavior.",
      "id": "qwen3-max-thinking",
      "object": "model",
      "created": 1776283310,
      "owned_by": "alibaba",
      "tokens": 256000,
      "pricing": {
        "type": "per_million_tokens",
        "input": 3,
        "output": 15
      },
      "endpoints": [
        "/v1/chat/completions",
        "/v1/responses",
        "/v1/messages"
      ],
      "premium_model": true,
      "metadata": {
        "vision": false,
        "function_call": true,
        "web_search": false,
        "reasoning": true
      }
    },
    {
      "name": "Alibaba: Qwen3.5 397B A17B",
      "description": "The Qwen3.5 series 397B-A17B native vision-language model is built on a hybrid architecture that integrates a linear attention mechanism with a sparse mixture-of-experts model, achieving higher inference efficiency. It delivers state-of-the-art performance comparable to leading-edge models across a wide range of tasks, including language understanding, logical reasoning, code generation, agent-based tasks, image understanding, video understanding, and graphical user interface (GUI) interactions. With its robust code-generation and agent capabilities, the model exhibits strong generalization across diverse agent.",
      "id": "qwen3.5-397b-a17b",
      "object": "model",
      "created": 1776283310,
      "owned_by": "alibaba",
      "tokens": 256000,
      "pricing": {
        "type": "per_million_tokens",
        "input": 1,
        "output": 4
      },
      "endpoints": [
        "/v1/chat/completions",
        "/v1/responses",
        "/v1/messages"
      ],
      "premium_model": true,
      "metadata": {
        "vision": true,
        "function_call": true,
        "web_search": false,
        "reasoning": true
      }
    },
    {
      "name": "Alibaba: Qwen3.5 Plus 2026-02-15",
      "description": "The Qwen3.5 native vision-language series Plus models are built on a hybrid architecture that integrates linear attention mechanisms with sparse mixture-of-experts models, achieving higher inference efficiency. In a variety of task evaluations, the 3.5 series consistently demonstrates performance on par with state-of-the-art leading models. Compared to the 3 series, these models show a leap forward in both pure-text and multimodal capabilities.",
      "id": "qwen3.5-plus-02-15",
      "object": "model",
      "created": 1776283310,
      "owned_by": "alibaba",
      "tokens": 1000000,
      "pricing": {
        "type": "per_million_tokens",
        "input": 0.8,
        "output": 3
      },
      "endpoints": [
        "/v1/chat/completions",
        "/v1/responses",
        "/v1/messages"
      ],
      "premium_model": true,
      "metadata": {
        "vision": true,
        "function_call": true,
        "web_search": false,
        "reasoning": true
      }
    },
    {
      "name": "Alibaba: Tongyi DeepResearch 30B A3B",
      "description": "Tongyi DeepResearch is an agentic large language model developed by Tongyi Lab, with 30 billion total parameters activating only 3 billion per token. It's optimized for long-horizon, deep information-seeking tasks and delivers state-of-the-art performance on benchmarks like Humanity's Last Exam, BrowserComp, BrowserComp-ZH, WebWalkerQA, GAIA, xbench-DeepSearch, and FRAMES. This makes it superior for complex agentic search, reasoning, and multi-step problem-solving compared to prior models.\nThe model includes a fully automated synthetic data pipeline for scalable pre-training, fine-tuning, and reinforcement learning. It uses large-scale continual pre-training on diverse agentic data to boost reasoning and stay fresh. It also features end-to-end on-policy RL with a customized Group Relative Policy Optimization, including token-level gradients and negative sample filtering for stable training. The model supports ReAct for core ability checks and an IterResearch-based 'Heavy' mode for max performance through test-time scaling. It's ideal for advanced research agents, tool use, and heavy inference workflows.",
      "id": "tongyi-deepresearch-30b-a3b",
      "object": "model",
      "created": 1776283310,
      "owned_by": "alibaba",
      "tokens": 131000,
      "pricing": {
        "type": "per_million_tokens",
        "input": 0.12,
        "output": 0.6
      },
      "endpoints": [
        "/v1/chat/completions",
        "/v1/responses",
        "/v1/messages"
      ],
      "premium_model": false,
      "metadata": {
        "vision": false,
        "function_call": true,
        "web_search": false,
        "reasoning": true
      }
    },
    {
      "name": "AionLabs: Aion-1.0",
      "description": "Aion-1.0 is a multi-model system designed for high performance across various tasks, including reasoning and coding. It is built on DeepSeek-R1, augmented with additional models and techniques such as Tree of Thoughts (ToT) and Mixture of Experts (MoE). It is Aion Lab's most powerful reasoning model.",
      "id": "aion-1.0",
      "object": "model",
      "created": 1776283310,
      "owned_by": "aion-labs",
      "tokens": 16000,
      "pricing": {
        "type": "per_million_tokens",
        "input": 4,
        "output": 8
      },
      "endpoints": [
        "/v1/chat/completions",
        "/v1/responses",
        "/v1/messages"
      ],
      "premium_model": true,
      "metadata": {
        "vision": false,
        "function_call": true,
        "web_search": false,
        "reasoning": true
      }
    },
    {
      "name": "AionLabs: Aion-1.0-Mini",
      "description": "Aion-1.0-Mini 32B parameter model is a distilled version of the DeepSeek-R1 model, designed for strong performance in reasoning domains such as mathematics, coding, and logic. It is a modified variant of a FuseAI model that outperforms R1-Distill-Qwen-32B and R1-Distill-Llama-70B.",
      "id": "aion-1.0-mini",
      "object": "model",
      "created": 1776283310,
      "owned_by": "aion-labs",
      "tokens": 16000,
      "pricing": {
        "type": "per_million_tokens",
        "input": 0.7,
        "output": 1.4
      },
      "endpoints": [
        "/v1/chat/completions",
        "/v1/responses",
        "/v1/messages"
      ],
      "premium_model": true,
      "metadata": {
        "vision": false,
        "function_call": true,
        "web_search": false,
        "reasoning": true
      }
    },
    {
      "name": "AionLabs: Aion-RP 1.0 (8B)",
      "description": "Aion-RP-Llama-3.1-8B ranks the highest in the character evaluation portion of the RPBench-Auto benchmark, a roleplaying-specific variant of Arena-Hard-Auto, where LLMs evaluate each other’s responses. It is a fine-tuned base model rather than an instruct model, designed to produce more natural and varied writing.",
      "id": "aion-rp-llama-3.1-8b",
      "object": "model",
      "created": 1776283310,
      "owned_by": "aion-labs",
      "tokens": 32000,
      "pricing": {
        "type": "per_million_tokens",
        "input": 0.2,
        "output": 0.2
      },
      "endpoints": [
        "/v1/chat/completions",
        "/v1/responses",
        "/v1/messages"
      ],
      "premium_model": true,
      "metadata": {
        "vision": false,
        "function_call": true,
        "web_search": false,
        "reasoning": false
      }
    },
    {
      "name": "Inflection: Inflection 3 Pi",
      "description": "Inflection 3 Pi powers Inflection's Pi chatbot, including backstory, emotional intelligence, productivity, and safety. It has access to recent news, and excels in scenarios like customer support and roleplay.\nPi has been trained to mirror your tone and style, if you use more emojis, so will Pi! Try experimenting with various prompts and conversation styles.",
      "id": "inflection-3-pi",
      "object": "model",
      "created": 1776283310,
      "owned_by": "inflection",
      "tokens": 8000,
      "pricing": {
        "type": "per_million_tokens",
        "input": 2.5,
        "output": 10
      },
      "endpoints": [
        "/v1/chat/completions",
        "/v1/responses",
        "/v1/messages"
      ],
      "premium_model": true,
      "metadata": {
        "vision": false,
        "function_call": true,
        "web_search": false,
        "reasoning": false
      }
    },
    {
      "name": "Inflection: Inflection 3 Productivity",
      "description": "Inflection 3 Productivity is optimized for following instructions. It is better for tasks requiring JSON output or precise adherence to provided guidelines. It has access to recent news.",
      "id": "inflection-3-productivity",
      "object": "model",
      "created": 1776283310,
      "owned_by": "inflection",
      "tokens": 8000,
      "pricing": {
        "type": "per_million_tokens",
        "input": 2.5,
        "output": 10
      },
      "endpoints": [
        "/v1/chat/completions",
        "/v1/responses",
        "/v1/messages"
      ],
      "premium_model": true,
      "metadata": {
        "vision": false,
        "function_call": true,
        "web_search": false,
        "reasoning": false
      }
    },
    {
      "name": "Inception: Mercury",
      "description": "Mercury is the first diffusion large language model (dLLM). Applying a breakthrough discrete diffusion approach, the model runs 5-10x faster than even speed optimized models like GPT-4.1 Nano and Claude 3.5 Haiku while matching their performance. Mercury's speed enables developers to provide responsive user experiences, including with voice agents, search interfaces, and chatbots.",
      "id": "mercury",
      "object": "model",
      "created": 1776283310,
      "owned_by": "inception",
      "tokens": 32000,
      "pricing": {
        "type": "per_million_tokens",
        "input": 0.25,
        "output": 1
      },
      "endpoints": [
        "/v1/chat/completions",
        "/v1/responses",
        "/v1/messages"
      ],
      "premium_model": false,
      "metadata": {
        "vision": false,
        "function_call": true,
        "web_search": false,
        "reasoning": false
      }
    },
    {
      "name": "Inception: Mercury Coder",
      "description": "Mercury Coder is the first diffusion large language model (dLLM). Applying a breakthrough discrete diffusion approach, the model runs 5-10x faster than even speed optimized models like Claude 3.5 Haiku and GPT-4o Mini while matching their performance. Mercury Coder's speed means that developers can stay in the flow while coding, enjoying rapid chat-based iteration and responsive code completion suggestions. On Copilot Arena, Mercury Coder ranks 1st in speed and ties for 2nd in quality.",
      "id": "mercury-coder",
      "object": "model",
      "created": 1776283310,
      "owned_by": "inception",
      "tokens": 32000,
      "pricing": {
        "type": "per_million_tokens",
        "input": 0.25,
        "output": 1
      },
      "endpoints": [
        "/v1/chat/completions",
        "/v1/responses",
        "/v1/messages"
      ],
      "premium_model": false,
      "metadata": {
        "vision": false,
        "function_call": true,
        "web_search": false,
        "reasoning": false
      }
    },
    {
      "name": "Gryphe: Mythomax L2 13B",
      "description": "One of the highest performing and most popular fine-tunes of Llama 2 13B, with rich descriptions and roleplay.",
      "id": "mytho-max-l2-13b",
      "object": "model",
      "created": 1776283310,
      "owned_by": "gryphe",
      "tokens": 4000,
      "pricing": {
        "type": "per_million_tokens",
        "input": 0.3,
        "output": 0.3
      },
      "endpoints": [
        "/v1/chat/completions",
        "/v1/responses",
        "/v1/messages"
      ],
      "premium_model": false,
      "metadata": {
        "vision": false,
        "function_call": true,
        "web_search": false,
        "reasoning": false
      }
    },
    {
      "name": "NousResearch: DeepHermes 3 Mistral 24B Preview",
      "description": "DeepHermes 3 (Mistral 24B Preview) is an instruction-tuned language model by Nous Research based on Mistral-Small-24B, designed for chat, function calling, and advanced multi-turn reasoning. It introduces a dual-mode system that toggles between intuitive chat responses and structured “deep reasoning” mode using special system prompts. Fine-tuned via distillation from R1, it supports structured output (JSON mode) and function call syntax for agent-based applications.\nDeepHermes 3 supports a reasoning toggle via system prompt, allowing users to switch between fast, intuitive responses and deliberate, multi-step reasoning. When activated with the following specific system instruction, the model enters a \"deep thinking\" mode—generating extended chains of thought wrapped in <think></think> tags before delivering a final answer.\nSystem Prompt: You are a deep thinking AI, you may use extremely long chains of thought to deeply consider the problem and deliberate with yourself via systematic reasoning processes to help come to a correct solution prior to answering. You should enclose your thoughts and internal monologue inside <think> </think> tags, and then provide your solution or response to the problem.",
      "id": "deephermes-3-mistral-24b-preview",
      "object": "model",
      "created": 1776283310,
      "owned_by": "nousresearch",
      "tokens": 32000,
      "pricing": {
        "type": "per_million_tokens",
        "input": 0.5,
        "output": 0.5
      },
      "endpoints": [
        "/v1/chat/completions",
        "/v1/responses",
        "/v1/messages"
      ],
      "premium_model": false,
      "metadata": {
        "vision": false,
        "function_call": true,
        "web_search": false,
        "reasoning": true
      }
    },
    {
      "name": "NousResearch: DeepHermes 3 Llama 3 8B Preview",
      "description": "DeepHermes 3 Preview is the latest version of our flagship Hermes series of LLMs by Nous Research, and one of the first models in the world to unify Reasoning (long chains of thought that improve answer accuracy) and normal LLM response modes into one model. We have also improved LLM annotation, judgement, and function calling.",
      "id": "deephermes-3-llama-3-8b-preview",
      "object": "model",
      "created": 1776283310,
      "owned_by": "nousresearch",
      "tokens": 16000,
      "pricing": {
        "type": "per_million_tokens",
        "input": 0.3,
        "output": 0.3
      },
      "endpoints": [
        "/v1/chat/completions",
        "/v1/responses",
        "/v1/messages"
      ],
      "premium_model": false,
      "metadata": {
        "vision": false,
        "function_call": true,
        "web_search": false,
        "reasoning": true
      }
    },
    {
      "name": "NousResearch: Hermes 3 8B Instruct",
      "description": "Hermes 3 is a generalist language model with many improvements over Hermes 2, including advanced agentic capabilities, much better roleplaying, reasoning, multi-turn conversation, long context coherence, and improvements across the board.",
      "id": "hermes-3-llama-3.1-8b",
      "object": "model",
      "created": 1776283310,
      "owned_by": "nousresearch",
      "tokens": 8000,
      "pricing": {
        "type": "per_million_tokens",
        "input": 0.3,
        "output": 0.3
      },
      "endpoints": [
        "/v1/chat/completions",
        "/v1/responses",
        "/v1/messages"
      ],
      "premium_model": false,
      "metadata": {
        "vision": false,
        "function_call": true,
        "web_search": false,
        "reasoning": false
      }
    },
    {
      "name": "NousResearch: Hermes 3 405B Instruct",
      "description": "Hermes 3 is a generalist language model with many improvements over Hermes 2, including advanced agentic capabilities, much better roleplaying, reasoning, multi-turn conversation, long context coherence, and improvements across the board.\nHermes 3 405B is a frontier-level, full-parameter finetune of the Llama-3.1 405B foundation model, focused on aligning LLMs to the user, with powerful steering capabilities and control given to the end user.\nThe Hermes 3 series builds and expands on the Hermes 2 set of capabilities, including more powerful and reliable function calling and structured output capabilities, generalist assistant capabilities, and improved code generation skills.\nHermes 3 is competitive, if not superior, to Llama-3.1 Instruct models at general capabilities, with varying strengths and weaknesses attributable between the two.",
      "id": "hermes-3-llama-3.1-405b",
      "object": "model",
      "created": 1776283310,
      "owned_by": "nousresearch",
      "tokens": 131000,
      "pricing": {
        "type": "per_million_tokens",
        "input": 1,
        "output": 3
      },
      "endpoints": [
        "/v1/chat/completions",
        "/v1/responses",
        "/v1/messages"
      ],
      "premium_model": true,
      "metadata": {
        "vision": false,
        "function_call": true,
        "web_search": false,
        "reasoning": false
      }
    },
    {
      "name": "NousResearch: Hermes 2 Pro Llama 3 8B",
      "description": "Hermes 2 Pro is an upgraded, retrained version of Nous Hermes 2, consisting of an updated and cleaned version of the OpenHermes 2.5 Dataset, as well as a newly introduced Function Calling and JSON Mode dataset developed in-house.",
      "id": "hermes-2-pro-llama-3-8b",
      "object": "model",
      "created": 1776283310,
      "owned_by": "nousresearch",
      "tokens": 8000,
      "pricing": {
        "type": "per_million_tokens",
        "input": 0.14,
        "output": 0.14
      },
      "endpoints": [
        "/v1/chat/completions",
        "/v1/responses",
        "/v1/messages"
      ],
      "premium_model": false,
      "metadata": {
        "vision": false,
        "function_call": true,
        "web_search": false,
        "reasoning": false
      }
    },
    {
      "name": "NousResearch: Hermes 4 14B",
      "description": "Hermes 4 14B is a frontier, hybrid-mode reasoning model based on Qwen 3 14B by Nous Research that is aligned to you.",
      "id": "hermes-4-14b",
      "object": "model",
      "created": 1776283310,
      "owned_by": "nousresearch",
      "tokens": 40000,
      "pricing": {
        "type": "per_million_tokens",
        "input": 0.05,
        "output": 0.2
      },
      "endpoints": [
        "/v1/chat/completions",
        "/v1/responses",
        "/v1/messages"
      ],
      "premium_model": false,
      "metadata": {
        "vision": false,
        "function_call": true,
        "web_search": false,
        "reasoning": false
      }
    },
    {
      "name": "NousResearch: Hermes 4 14B (thinking)",
      "description": "Hermes 4 14B (thinking) is a frontier, hybrid-mode reasoning model based on Qwen 3 14B by Nous Research that is aligned to you.",
      "id": "hermes-4-14b-thinking",
      "object": "model",
      "created": 1776283310,
      "owned_by": "nousresearch",
      "tokens": 40000,
      "pricing": {
        "type": "per_million_tokens",
        "input": 0.05,
        "output": 0.2
      },
      "endpoints": [
        "/v1/chat/completions",
        "/v1/responses",
        "/v1/messages"
      ],
      "premium_model": false,
      "metadata": {
        "vision": false,
        "function_call": true,
        "web_search": false,
        "reasoning": true
      }
    },
    {
      "name": "NousResearch: Hermes 4 70B",
      "description": "Hermes 4 70B is a hybrid reasoning model from Nous Research, built on Meta-Llama-3.1-70B. It introduces the same hybrid mode as the larger 405B release, allowing the model to either respond directly or generate explicit <think>...</think> reasoning traces before answering. Users can control the reasoning behaviour with the reasoning enabled boolean.\nThis 70B variant is trained with the expanded post-training corpus (~60B tokens) emphasizing verified reasoning data, leading to improvements in mathematics, coding, STEM, logic, and structured outputs while maintaining general assistant performance. It supports JSON mode, schema adherence, function calling, and tool use, and is designed for greater steerability with reduced refusal rates.",
      "id": "hermes-4-70b",
      "object": "model",
      "created": 1776283310,
      "owned_by": "nousresearch",
      "tokens": 131000,
      "pricing": {
        "type": "per_million_tokens",
        "input": 0.1,
        "output": 0.4
      },
      "endpoints": [
        "/v1/chat/completions",
        "/v1/responses",
        "/v1/messages"
      ],
      "premium_model": true,
      "metadata": {
        "vision": false,
        "function_call": true,
        "web_search": false,
        "reasoning": false
      }
    },
    {
      "name": "NousResearch: Hermes 4 70B (thinking)",
      "description": "Hermes 4 70B (thinking) is a hybrid reasoning model from Nous Research, built on Meta-Llama-3.1-70B. It introduces the same hybrid mode as the larger 405B release, allowing the model to either respond directly or generate explicit <think>...</think> reasoning traces before answering. Users can control the reasoning behaviour with the reasoning enabled boolean.\nThis 70B variant is trained with the expanded post-training corpus (~60B tokens) emphasizing verified reasoning data, leading to improvements in mathematics, coding, STEM, logic, and structured outputs while maintaining general assistant performance. It supports JSON mode, schema adherence, function calling, and tool use, and is designed for greater steerability with reduced refusal rates.",
      "id": "hermes-4-70b-thinking",
      "object": "model",
      "created": 1776283310,
      "owned_by": "nousresearch",
      "tokens": 131000,
      "pricing": {
        "type": "per_million_tokens",
        "input": 0.1,
        "output": 0.4
      },
      "endpoints": [
        "/v1/chat/completions",
        "/v1/responses",
        "/v1/messages"
      ],
      "premium_model": true,
      "metadata": {
        "vision": false,
        "function_call": true,
        "web_search": false,
        "reasoning": true
      }
    },
    {
      "name": "NousResearch: Hermes 4 405B",
      "description": "Hermes 4 is a large-scale reasoning model built on Meta-Llama-3.1-405B and released by Nous Research. It introduces a hybrid reasoning mode, where the model can choose to deliberate internally with <think>...</think> traces or respond directly, offering flexibility between speed and depth. Users can control the reasoning behaviour with the reasoning enabled boolean.\nThe model is instruction-tuned with an expanded post-training corpus (~60B tokens) emphasizing reasoning traces, improving performance in math, code, STEM, and logical reasoning, while retaining broad assistant utility. It also supports structured outputs, including JSON mode, schema adherence, function calling, and tool use. Hermes 4 is trained for steerability, lower refusal rates, and alignment toward neutral, user-directed behavior.",
      "id": "hermes-4-405b",
      "object": "model",
      "created": 1776283310,
      "owned_by": "nousresearch",
      "tokens": 131000,
      "pricing": {
        "type": "per_million_tokens",
        "input": 0.2,
        "output": 0.8
      },
      "endpoints": [
        "/v1/chat/completions",
        "/v1/responses",
        "/v1/messages"
      ],
      "premium_model": true,
      "metadata": {
        "vision": false,
        "function_call": true,
        "web_search": false,
        "reasoning": false
      }
    },
    {
      "name": "NousResearch: Hermes 4 405B (thinking)",
      "description": "Hermes 4 405B (thinking) is a hybrid reasoning model from Nous Research, built on Meta-Llama-3.1-405B. It introduces the same hybrid mode as the larger 405B release, allowing the model to either respond directly or generate explicit <think>...</think> reasoning traces before answering. Users can control the reasoning behaviour with the reasoning enabled boolean.\nThe model is instruction-tuned with an expanded post-training corpus (~60B tokens) emphasizing reasoning traces, improving performance in math, code, STEM, and logical reasoning, while retaining broad assistant utility. It also supports structured outputs, including JSON mode, schema adherence, function calling, and tool use. Hermes 4 is trained for steerability, lower refusal rates, and alignment toward neutral, user-directed behavior.",
      "id": "hermes-4-405b-thinking",
      "object": "model",
      "created": 1776283310,
      "owned_by": "nousresearch",
      "tokens": 131000,
      "pricing": {
        "type": "per_million_tokens",
        "input": 0.2,
        "output": 0.8
      },
      "endpoints": [
        "/v1/chat/completions",
        "/v1/responses",
        "/v1/messages"
      ],
      "premium_model": true,
      "metadata": {
        "vision": false,
        "function_call": true,
        "web_search": false,
        "reasoning": true
      }
    },
    {
      "name": "NousResearch: Hermes 4.3 36B",
      "description": "Hermes 4.3 36B is a frontier, hybrid-mode reasoning model based on ByteDance Seed 36B base, made by Nous Research that is aligned to you.",
      "id": "hermes-4.3-36b",
      "object": "model",
      "created": 1776283310,
      "owned_by": "nousresearch",
      "tokens": 131000,
      "pricing": {
        "type": "per_million_tokens",
        "input": 0.15,
        "output": 0.6
      },
      "endpoints": [
        "/v1/chat/completions",
        "/v1/responses",
        "/v1/messages"
      ],
      "premium_model": true,
      "metadata": {
        "vision": false,
        "function_call": true,
        "web_search": false,
        "reasoning": false
      }
    },
    {
      "name": "NousResearch: Hermes 4.3 36B (thinking)",
      "description": "Hermes 4.3 36B (thinking) is a frontier, hybrid-mode reasoning model based on ByteDance Seed 36B base, made by Nous Research that is aligned to you.",
      "id": "hermes-4.3-36b-thinking",
      "object": "model",
      "created": 1776283310,
      "owned_by": "nousresearch",
      "tokens": 131000,
      "pricing": {
        "type": "per_million_tokens",
        "input": 0.15,
        "output": 0.6
      },
      "endpoints": [
        "/v1/chat/completions",
        "/v1/responses",
        "/v1/messages"
      ],
      "premium_model": true,
      "metadata": {
        "vision": false,
        "function_call": true,
        "web_search": false,
        "reasoning": true
      }
    },
    {
      "name": "Arcee: Trinity Mini",
      "description": "Trinity Mini is a 26B-parameter (3B active) sparse mixture-of-experts language model featuring 128 experts with 8 active per token. Engineered for efficient reasoning over long contexts (131k) with robust function calling and multi-step agent workflows.",
      "id": "trinity-mini",
      "object": "model",
      "created": 1776283310,
      "owned_by": "arcee-ai",
      "tokens": 131000,
      "pricing": {
        "type": "per_million_tokens",
        "input": 0.045,
        "output": 0.15
      },
      "endpoints": [
        "/v1/chat/completions",
        "/v1/responses",
        "/v1/messages"
      ],
      "premium_model": false,
      "metadata": {
        "vision": false,
        "function_call": true,
        "web_search": false,
        "reasoning": true
      }
    },
    {
      "name": "Arcee: Trinity Large Preview",
      "description": "Trinity-Large-Preview is a frontier-scale open-weight language model from Arcee, built as a 400B-parameter sparse Mixture-of-Experts with 13B active parameters per token using 4-of-256 expert routing.\nIt excels in creative writing, storytelling, role-play, chat scenarios, and real-time voice assistance, better than your average reasoning model usually can.",
      "id": "trinity-large-preview",
      "object": "model",
      "created": 1776283310,
      "owned_by": "arcee-ai",
      "tokens": 131000,
      "pricing": {
        "type": "per_million_tokens",
        "input": 0.06,
        "output": 0.2
      },
      "endpoints": [
        "/v1/chat/completions",
        "/v1/responses",
        "/v1/messages"
      ],
      "premium_model": false,
      "metadata": {
        "vision": false,
        "function_call": true,
        "web_search": false,
        "reasoning": false
      }
    },
    {
      "name": "ByteDance: Seed OSS 36B Instruct (free)",
      "description": "Seed-OSS-36B-Instruct is a 36B-parameter instruction-tuned reasoning language model from ByteDance’s Seed team, released under Apache-2.0. The model is optimized for general instruction following with strong performance in reasoning, mathematics, coding, tool use/agentic workflows, and multilingual tasks, and is intended for international (i18n) use cases. It is not currently possible to control the reasoning effort.",
      "id": "seed-oss-36b-instruct:free",
      "object": "model",
      "created": 1776283310,
      "owned_by": "bytedance",
      "tokens": 64000,
      "pricing": {
        "type": "per_million_tokens",
        "input": 0,
        "output": 0
      },
      "endpoints": [
        "/v1/chat/completions",
        "/v1/responses",
        "/v1/messages"
      ],
      "premium_model": false,
      "metadata": {
        "vision": false,
        "function_call": true,
        "web_search": false,
        "reasoning": true
      }
    },
    {
      "name": "ByteDance: Seed OSS 36B Instruct",
      "description": "Seed-OSS-36B-Instruct is a 36B-parameter instruction-tuned reasoning language model from ByteDance’s Seed team, released under Apache-2.0. The model is optimized for general instruction following with strong performance in reasoning, mathematics, coding, tool use/agentic workflows, and multilingual tasks, and is intended for international (i18n) use cases. It is not currently possible to control the reasoning effort.",
      "id": "seed-oss-36b-instruct",
      "object": "model",
      "created": 1776283310,
      "owned_by": "bytedance",
      "tokens": 131000,
      "pricing": {
        "type": "per_million_tokens",
        "input": 0.12,
        "output": 0.48
      },
      "endpoints": [
        "/v1/chat/completions",
        "/v1/responses",
        "/v1/messages"
      ],
      "premium_model": false,
      "metadata": {
        "vision": false,
        "function_call": true,
        "web_search": false,
        "reasoning": true
      }
    },
    {
      "name": "Baidu: ERNIE Lite 8K",
      "description": "Baidu's self-developed lightweight large language model, balancing excellent model performance with inference efficiency.",
      "id": "ernie-lite-8k",
      "object": "model",
      "created": 1776283310,
      "owned_by": "baidu",
      "tokens": 8000,
      "pricing": {
        "type": "per_million_tokens",
        "input": 0.4,
        "output": 0.8
      },
      "endpoints": [
        "/v1/chat/completions",
        "/v1/responses",
        "/v1/messages"
      ],
      "premium_model": false,
      "metadata": {
        "vision": false,
        "function_call": true,
        "web_search": false,
        "reasoning": false
      }
    },
    {
      "name": "Baidu: ERNIE Tiny 8K",
      "description": "ERNIE Tiny is Baidu's ultra-high-performance large language model, with the lowest deployment and fine-tuning costs among the Wenxin series models.",
      "id": "ernie-tiny-8k",
      "object": "model",
      "created": 1776283310,
      "owned_by": "baidu",
      "tokens": 8000,
      "pricing": {
        "type": "per_million_tokens",
        "input": 0.2,
        "output": 0.2
      },
      "endpoints": [
        "/v1/chat/completions",
        "/v1/responses",
        "/v1/messages"
      ],
      "premium_model": false,
      "metadata": {
        "vision": false,
        "function_call": true,
        "web_search": false,
        "reasoning": false
      }
    },
    {
      "name": "Baidu: ERNIE Speed 8K",
      "description": "Baidu's latest self-developed high-performance large language model released in 2024, with outstanding general capabilities, suitable as a base model for fine-tuning, effectively addressing specific scenario issues while also exhibiting excellent inference performance.",
      "id": "ernie-speed-8k",
      "object": "model",
      "created": 1776283310,
      "owned_by": "baidu",
      "tokens": 8000,
      "pricing": {
        "type": "per_million_tokens",
        "input": 0.6,
        "output": 1
      },
      "endpoints": [
        "/v1/chat/completions",
        "/v1/responses",
        "/v1/messages"
      ],
      "premium_model": false,
      "metadata": {
        "vision": false,
        "function_call": true,
        "web_search": false,
        "reasoning": false
      }
    },
    {
      "name": "Baidu: ERNIE Speed 128K",
      "description": "Baidu's latest self-developed high-performance large language model released in 2024, with outstanding general capabilities, suitable as a base model for fine-tuning, effectively addressing specific scenario issues while also exhibiting excellent inference performance.",
      "id": "ernie-speed-128k",
      "object": "model",
      "created": 1776283310,
      "owned_by": "baidu",
      "tokens": 128000,
      "pricing": {
        "type": "per_million_tokens",
        "input": 0.6,
        "output": 1
      },
      "endpoints": [
        "/v1/chat/completions",
        "/v1/responses",
        "/v1/messages"
      ],
      "premium_model": false,
      "metadata": {
        "vision": false,
        "function_call": true,
        "web_search": false,
        "reasoning": false
      }
    },
    {
      "name": "Tencent: Hunyuan Lite",
      "description": "Upgraded to a MOE structure with a context window of 256k, leading many open-source models in various NLP, coding, mathematics, and industry benchmarks.",
      "id": "hunyuan-lite",
      "object": "model",
      "created": 1776283310,
      "owned_by": "tencent",
      "tokens": 256000,
      "pricing": {
        "type": "per_million_tokens",
        "input": 0.25,
        "output": 0.25
      },
      "endpoints": [
        "/v1/chat/completions",
        "/v1/responses",
        "/v1/messages"
      ],
      "premium_model": false,
      "metadata": {
        "vision": false,
        "function_call": true,
        "web_search": false,
        "reasoning": false
      }
    },
    {
      "name": "Tencent: Hunyuan A13B Instruct",
      "description": "Hunyuan-A13B is a 13B active parameter Mixture-of-Experts (MoE) language model developed by Tencent, with a total parameter count of 80B and support for reasoning via Chain-of-Thought. It offers competitive benchmark performance across mathematics, science, coding, and multi-turn reasoning tasks, while maintaining high inference efficiency via Grouped Query Attention (GQA) and quantization support (FP8, GPTQ, etc.).",
      "id": "hunyuan-a13b-instruct",
      "object": "model",
      "created": 1776283310,
      "owned_by": "tencent",
      "tokens": 128000,
      "pricing": {
        "type": "per_million_tokens",
        "input": 0.25,
        "output": 0.25
      },
      "endpoints": [
        "/v1/chat/completions",
        "/v1/responses",
        "/v1/messages"
      ],
      "premium_model": false,
      "metadata": {
        "vision": false,
        "function_call": true,
        "web_search": false,
        "reasoning": true
      }
    },
    {
      "name": "Meituan: LongCat Flash Chat",
      "description": "LongCat-Flash-Chat is a powerful and efficient language model with an innovative Mixture-of-Experts (MoE) architecture. It contains 560 billion total parameters but dynamically activates only 18.6 to 31.3 billion parameters (averaging ~27B) per token, optimizing for both performance and efficiency. It is designed to be a non-thinking foundation model with exceptional strengths in agentic tasks.",
      "id": "longcat-flash-chat",
      "object": "model",
      "created": 1776283310,
      "owned_by": "meituan",
      "tokens": 128000,
      "pricing": {
        "type": "per_million_tokens",
        "input": 0.2,
        "output": 0.8
      },
      "endpoints": [
        "/v1/chat/completions",
        "/v1/responses",
        "/v1/messages"
      ],
      "premium_model": false,
      "metadata": {
        "vision": false,
        "function_call": true,
        "web_search": false,
        "reasoning": false
      }
    },
    {
      "name": "Meituan: LongCat Flash Chat (thinking)",
      "description": "LongCat-Flash-Chat-Thinking is a powerful and efficient language model with an innovative Mixture-of-Experts (MoE) architecture. It contains 560 billion total parameters but dynamically activates only 18.6 to 31.3 billion parameters (averaging ~27B) per token, optimizing for both performance and efficiency. This is the thinking variant of the model.",
      "id": "longcat-flash-chat-thinking",
      "object": "model",
      "created": 1776283310,
      "owned_by": "meituan",
      "tokens": 128000,
      "pricing": {
        "type": "per_million_tokens",
        "input": 0.15,
        "output": 1.5
      },
      "endpoints": [
        "/v1/chat/completions",
        "/v1/responses",
        "/v1/messages"
      ],
      "premium_model": false,
      "metadata": {
        "vision": false,
        "function_call": true,
        "web_search": false,
        "reasoning": true
      }
    },
    {
      "name": "ZhipuAI: GLM 4 AirX",
      "description": "GLM-4-AirX provides an efficient version of GLM-4-Air, with inference speeds up to 2.6 times faster.",
      "id": "glm-4-airx",
      "object": "model",
      "created": 1776283310,
      "owned_by": "zhipuai",
      "tokens": 128000,
      "pricing": {
        "type": "per_million_tokens",
        "input": 1.5,
        "output": 1.5
      },
      "endpoints": [
        "/v1/chat/completions",
        "/v1/responses",
        "/v1/messages"
      ],
      "premium_model": false,
      "metadata": {
        "vision": false,
        "function_call": true,
        "web_search": false,
        "reasoning": false
      }
    },
    {
      "name": "ZhipuAI: GLM 4 Air",
      "description": "GLM-4-Air is a cost-effective version with performance close to GLM-4, offering fast speed at an affordable price.",
      "id": "glm-4-air",
      "object": "model",
      "created": 1776283310,
      "owned_by": "zhipuai",
      "tokens": 128000,
      "pricing": {
        "type": "per_million_tokens",
        "input": 0.1,
        "output": 0.1
      },
      "endpoints": [
        "/v1/chat/completions",
        "/v1/responses",
        "/v1/messages"
      ],
      "premium_model": false,
      "metadata": {
        "vision": false,
        "function_call": true,
        "web_search": false,
        "reasoning": false
      }
    },
    {
      "name": "ZhipuAI: GLM 4 Plus",
      "description": "GLM-4-Plus, as a high-intelligence flagship, possesses strong capabilities for processing long texts and complex tasks, with overall performance improvements.",
      "id": "glm-4-plus",
      "object": "model",
      "created": 1776283310,
      "owned_by": "zhipuai",
      "tokens": 128000,
      "pricing": {
        "type": "per_million_tokens",
        "input": 0.75,
        "output": 0.75
      },
      "endpoints": [
        "/v1/chat/completions",
        "/v1/responses",
        "/v1/messages"
      ],
      "premium_model": false,
      "metadata": {
        "vision": false,
        "function_call": true,
        "web_search": false,
        "reasoning": false
      }
    },
    {
      "name": "ZhipuAI: GLM 4 AllTools",
      "description": "GLM-4-AllTools is a multifunctional intelligent agent model optimized to support complex instruction planning and tool invocation, such as web browsing, code interpretation, and text generation, suitable for multitasking.",
      "id": "glm-4-alltools",
      "object": "model",
      "created": 1776283310,
      "owned_by": "zhipuai",
      "tokens": 128000,
      "pricing": {
        "type": "per_million_tokens",
        "input": 0.5,
        "output": 0.5
      },
      "endpoints": [
        "/v1/chat/completions",
        "/v1/responses",
        "/v1/messages"
      ],
      "premium_model": false,
      "metadata": {
        "vision": false,
        "function_call": true,
        "web_search": false,
        "reasoning": false
      }
    },
    {
      "name": "ZhipuAI: GLM 4.5",
      "description": "GLM-4.5 is our latest flagship foundation model, purpose-built for agent-based applications. It leverages a Mixture-of-Experts (MoE) architecture and supports a context length of up to 128k tokens. GLM-4.5 delivers significantly enhanced capabilities in reasoning, code generation, and agent alignment. It supports a hybrid inference mode with two options, a \"thinking mode\" designed for complex reasoning and tool use, and a \"non-thinking mode\" optimized for instant responses.",
      "id": "glm-4.5",
      "object": "model",
      "created": 1776283310,
      "owned_by": "zhipuai",
      "tokens": 131000,
      "pricing": {
        "type": "per_million_tokens",
        "input": 0.6,
        "output": 2.2
      },
      "endpoints": [
        "/v1/chat/completions",
        "/v1/responses",
        "/v1/messages"
      ],
      "premium_model": true,
      "metadata": {
        "vision": false,
        "function_call": true,
        "web_search": false,
        "reasoning": true
      }
    },
    {
      "name": "ZhipuAI: GLM 4.5 Turbo",
      "description": "GLM-4.5 is our latest flagship foundation model, purpose-built for agent-based applications. It leverages a Mixture-of-Experts (MoE) architecture and supports a context length of up to 128k tokens. GLM-4.5 delivers significantly enhanced capabilities in reasoning, code generation, and agent alignment. It supports a hybrid inference mode with two options, a \"thinking mode\" designed for complex reasoning and tool use, and a \"non-thinking mode\" optimized for instant responses.",
      "id": "glm-4.5-turbo",
      "object": "model",
      "created": 1776283310,
      "owned_by": "zhipuai",
      "tokens": 131000,
      "pricing": {
        "type": "per_million_tokens",
        "input": 1,
        "output": 3
      },
      "endpoints": [
        "/v1/chat/completions",
        "/v1/responses",
        "/v1/messages"
      ],
      "premium_model": true,
      "metadata": {
        "vision": false,
        "function_call": true,
        "web_search": false,
        "reasoning": true
      }
    },
    {
      "name": "ZhipuAI: GLM 4.5V",
      "description": "GLM-4.5V is a vision-language foundation model for multimodal agent applications. Built on a Mixture-of-Experts (MoE) architecture with 106B parameters and 12B activated parameters, it achieves state-of-the-art results in video understanding, image Q&A, OCR, and document parsing, with strong gains in front-end web coding, grounding, and spatial reasoning.",
      "id": "glm-4.5v",
      "object": "model",
      "created": 1776283310,
      "owned_by": "zhipuai",
      "tokens": 65000,
      "pricing": {
        "type": "per_million_tokens",
        "input": 0.6,
        "output": 1.8
      },
      "endpoints": [
        "/v1/chat/completions",
        "/v1/responses",
        "/v1/messages"
      ],
      "premium_model": true,
      "metadata": {
        "vision": true,
        "function_call": true,
        "web_search": false,
        "reasoning": false
      }
    },
    {
      "name": "ZhipuAI: GLM 4.5 Air",
      "description": "GLM-4.5-Air is the lightweight variant of our latest flagship model family, also purpose-built for agent-centric applications. Like GLM-4.5, it adopts the Mixture-of-Experts (MoE) architecture but with a more compact parameter size. GLM-4.5-Air also supports hybrid inference modes, offering a \"thinking mode\" for advanced reasoning and tool use, and a \"non-thinking mode\" for real-time interaction.",
      "id": "glm-4.5-air",
      "object": "model",
      "created": 1776283310,
      "owned_by": "zhipuai",
      "tokens": 131000,
      "pricing": {
        "type": "per_million_tokens",
        "input": 0.2,
        "output": 1.1
      },
      "endpoints": [
        "/v1/chat/completions",
        "/v1/responses",
        "/v1/messages"
      ],
      "premium_model": false,
      "metadata": {
        "vision": false,
        "function_call": true,
        "web_search": false,
        "reasoning": true
      }
    },
    {
      "name": "ZhipuAI: GLM 4.6",
      "description": "Compared with GLM-4.5, this generation brings several key improvements:\nLonger context window: The context window has been expanded from 128K to 200K tokens, enabling the model to handle more complex agentic tasks.\nSuperior coding performance: The model achieves higher scores on code benchmarks and demonstrates better real-world performance in applications such as Claude Code、Cline、Roo Code and Kilo Code, including improvements in generating visually polished front-end pages.\nAdvanced reasoning: GLM-4.6 shows a clear improvement in reasoning performance and supports tool use during inference, leading to stronger overall capability.\nMore capable agents: GLM-4.6 exhibits stronger performance in tool using and search-based agents, and integrates more effectively within agent frameworks.\nRefined writing: Better aligns with human preferences in style and readability, and performs more naturally in role-playing scenarios.",
      "id": "glm-4.6",
      "object": "model",
      "created": 1776283310,
      "owned_by": "zhipuai",
      "tokens": 200000,
      "pricing": {
        "type": "per_million_tokens",
        "input": 0.6,
        "output": 2.2
      },
      "endpoints": [
        "/v1/chat/completions",
        "/v1/responses",
        "/v1/messages"
      ],
      "premium_model": true,
      "metadata": {
        "vision": false,
        "function_call": true,
        "web_search": false,
        "reasoning": true
      }
    },
    {
      "name": "ZhipuAI: GLM 4.6V",
      "description": "GLM-4.6V is a large multimodal model designed for high-fidelity visual understanding and long-context reasoning across images, documents, and mixed media. It supports up to 128K tokens, processes complex page layouts and charts directly as visual inputs, and integrates native multimodal function calling to connect perception with downstream tool execution. The model also enables interleaved image-text generation and UI reconstruction workflows, including screenshot-to-HTML synthesis and iterative visual editing.",
      "id": "glm-4.6v",
      "object": "model",
      "created": 1776283310,
      "owned_by": "zhipuai",
      "tokens": 131000,
      "pricing": {
        "type": "per_million_tokens",
        "input": 0.3,
        "output": 0.9
      },
      "endpoints": [
        "/v1/chat/completions",
        "/v1/responses",
        "/v1/messages"
      ],
      "premium_model": true,
      "metadata": {
        "vision": true,
        "function_call": true,
        "web_search": false,
        "reasoning": false
      }
    },
    {
      "name": "ZhipuAI: GLM 4.7",
      "description": "GLM-4.7 is Z.AI’s latest flagship model, featuring upgrades in two key areas: enhanced programming capabilities and more stable multi-step reasoning/execution. It demonstrates significant improvements in executing complex agent tasks while delivering more natural conversational experiences and superior front-end aesthetics.",
      "id": "glm-4.7",
      "object": "model",
      "created": 1776283310,
      "owned_by": "zhipuai",
      "tokens": 200000,
      "pricing": {
        "type": "per_million_tokens",
        "input": 0.6,
        "output": 2.2
      },
      "endpoints": [
        "/v1/chat/completions",
        "/v1/responses",
        "/v1/messages"
      ],
      "premium_model": true,
      "metadata": {
        "vision": false,
        "function_call": true,
        "web_search": false,
        "reasoning": true
      }
    },
    {
      "name": "ZhipuAI: GLM 4.7 Flash (free)",
      "description": "As a 30B-class SOTA model, GLM-4.7-Flash offers a new option that balances performance and efficiency. It is further optimized for agentic coding use cases, strengthening coding capabilities, long-horizon task planning, and tool collaboration, and has achieved leading performance among open-source models of the same size on several current public benchmark leaderboards.",
      "id": "glm-4.7-flash:free",
      "object": "model",
      "created": 1776283310,
      "owned_by": "zhipuai",
      "tokens": 64000,
      "pricing": {
        "type": "per_million_tokens",
        "input": 0,
        "output": 0
      },
      "endpoints": [
        "/v1/chat/completions",
        "/v1/responses",
        "/v1/messages"
      ],
      "premium_model": false,
      "metadata": {
        "vision": false,
        "function_call": true,
        "web_search": false,
        "reasoning": true
      }
    },
    {
      "name": "ZhipuAI: GLM 4.7 Flash",
      "description": "As a 30B-class SOTA model, GLM-4.7-Flash offers a new option that balances performance and efficiency. It is further optimized for agentic coding use cases, strengthening coding capabilities, long-horizon task planning, and tool collaboration, and has achieved leading performance among open-source models of the same size on several current public benchmark leaderboards.",
      "id": "glm-4.7-flash",
      "object": "model",
      "created": 1776283310,
      "owned_by": "zhipuai",
      "tokens": 200000,
      "pricing": {
        "type": "per_million_tokens",
        "input": 0.07,
        "output": 0.4
      },
      "endpoints": [
        "/v1/chat/completions",
        "/v1/responses",
        "/v1/messages"
      ],
      "premium_model": false,
      "metadata": {
        "vision": false,
        "function_call": true,
        "web_search": false,
        "reasoning": true
      }
    },
    {
      "name": "ZhipuAI: GLM 5",
      "description": "GLM-5 is Z.ai’s flagship open-source foundation model engineered for complex systems design and long-horizon agent workflows. Built for expert developers, it delivers production-grade performance on large-scale programming tasks, rivaling leading closed-source models. With advanced agentic planning, deep backend reasoning, and iterative self-correction, GLM-5 moves beyond code generation to full-system construction and autonomous execution.",
      "id": "glm-5",
      "object": "model",
      "created": 1776283310,
      "owned_by": "zhipuai",
      "tokens": 200000,
      "pricing": {
        "type": "per_million_tokens",
        "input": 1,
        "output": 3.2
      },
      "endpoints": [
        "/v1/chat/completions",
        "/v1/responses",
        "/v1/messages"
      ],
      "premium_model": true,
      "metadata": {
        "vision": false,
        "function_call": true,
        "web_search": false,
        "reasoning": true
      }
    },
    {
      "name": "ZhipuAI: GLM 5 Turbo",
      "description": "GLM-5 Turbo is a new model from Z.ai designed for fast inference and strong performance in agent-driven environments such as OpenClaw scenarios. It is deeply optimized for real-world agent workflows involving long execution chains, with improved complex instruction decomposition, tool use, scheduled and persistent execution, and overall stability across extended tasks.",
      "id": "glm-5-turbo",
      "object": "model",
      "created": 1776283310,
      "owned_by": "zhipuai",
      "tokens": 200000,
      "pricing": {
        "type": "per_million_tokens",
        "input": 1.2,
        "output": 4
      },
      "endpoints": [
        "/v1/chat/completions",
        "/v1/responses",
        "/v1/messages"
      ],
      "premium_model": true,
      "metadata": {
        "vision": false,
        "function_call": true,
        "web_search": false,
        "reasoning": true
      }
    },
    {
      "name": "ZhipuAI: GLM 5.1",
      "description": "GLM-5.1 is a new model from Z.ai designed for fast inference and strong performance in agent-driven environments such as OpenClaw scenarios. It is deeply optimized for real-world agent workflows involving long execution chains, with improved complex instruction decomposition, tool use, scheduled and persistent execution, and overall stability across extended tasks.",
      "id": "glm-5.1",
      "object": "model",
      "created": 1776283310,
      "owned_by": "zhipuai",
      "tokens": 200000,
      "pricing": {
        "type": "per_million_tokens",
        "input": 1.2,
        "output": 4
      },
      "endpoints": [
        "/v1/chat/completions",
        "/v1/responses",
        "/v1/messages"
      ],
      "premium_model": true,
      "metadata": {
        "vision": false,
        "function_call": true,
        "web_search": false,
        "reasoning": true
      }
    },
    {
      "name": "01.AI: Yi VL Plus",
      "description": "Yi Vision Language (Yi-VL) model is the open-source, multimodal version of the Yi Large Language Model (LLM) series, enabling content comprehension, recognition, and multi-round conversations about images.\nYi-VL demonstrates exceptional performance, ranking first among all existing open-source models in the latest benchmarks including MMMU in English and CMMMU in Chinese (based on data available up to January 2024).",
      "id": "yi-vl-plus",
      "object": "model",
      "created": 1776283310,
      "owned_by": "01ai",
      "tokens": 32000,
      "pricing": {
        "type": "per_million_tokens",
        "input": 0.2,
        "output": 0.2
      },
      "endpoints": [
        "/v1/chat/completions",
        "/v1/responses",
        "/v1/messages"
      ],
      "premium_model": false,
      "metadata": {
        "vision": false,
        "function_call": true,
        "web_search": false,
        "reasoning": false
      }
    },
    {
      "name": "01.AI: Yi Large",
      "description": "A new trillion-parameter model, providing super strong question-answering and text generation capabilities.",
      "id": "yi-large",
      "object": "model",
      "created": 1776283310,
      "owned_by": "01ai",
      "tokens": 16000,
      "pricing": {
        "type": "per_million_tokens",
        "input": 3,
        "output": 3
      },
      "endpoints": [
        "/v1/chat/completions",
        "/v1/responses",
        "/v1/messages"
      ],
      "premium_model": false,
      "metadata": {
        "vision": false,
        "function_call": true,
        "web_search": false,
        "reasoning": false
      }
    },
    {
      "name": "01.AI: Yi Large Turbo",
      "description": "Exceptional performance at a high cost-performance ratio. Conducts high-precision tuning based on performance, inference speed, and cost.",
      "id": "yi-large-turbo",
      "object": "model",
      "created": 1776283310,
      "owned_by": "01ai",
      "tokens": 16000,
      "pricing": {
        "type": "per_million_tokens",
        "input": 0.2,
        "output": 0.2
      },
      "endpoints": [
        "/v1/chat/completions",
        "/v1/responses",
        "/v1/messages"
      ],
      "premium_model": false,
      "metadata": {
        "vision": false,
        "function_call": true,
        "web_search": false,
        "reasoning": false
      }
    },
    {
      "name": "01.AI: Yi Large RAG",
      "description": "High-level service based on the yi-large super strong model, combining retrieval and generation techniques to provide precise answers and real-time information retrieval services.",
      "id": "yi-large-rag",
      "object": "model",
      "created": 1776283310,
      "owned_by": "01ai",
      "tokens": 16000,
      "pricing": {
        "type": "per_million_tokens",
        "input": 3.5,
        "output": 3.5
      },
      "endpoints": [
        "/v1/chat/completions",
        "/v1/responses",
        "/v1/messages"
      ],
      "premium_model": false,
      "metadata": {
        "vision": false,
        "function_call": true,
        "web_search": false,
        "reasoning": false
      }
    },
    {
      "name": "01.AI: Yi Medium",
      "description": "Medium-sized model upgraded and fine-tuned, balanced capabilities, and high cost-performance ratio. Deeply optimized instruction-following capabilities.",
      "id": "yi-medium",
      "object": "model",
      "created": 1776283310,
      "owned_by": "01ai",
      "tokens": 16000,
      "pricing": {
        "type": "per_million_tokens",
        "input": 2,
        "output": 2
      },
      "endpoints": [
        "/v1/chat/completions",
        "/v1/responses",
        "/v1/messages"
      ],
      "premium_model": false,
      "metadata": {
        "vision": false,
        "function_call": true,
        "web_search": false,
        "reasoning": false
      }
    },
    {
      "name": "01.AI: Yi 34B Chat",
      "description": "The Yi series models are large language models trained from scratch by developers at 01.AI. This 34B parameter model has been instruct-tuned for chat.",
      "id": "yi-34b-chat-200k",
      "object": "model",
      "created": 1776283310,
      "owned_by": "01ai",
      "tokens": 200000,
      "pricing": {
        "type": "per_million_tokens",
        "input": 3,
        "output": 3
      },
      "endpoints": [
        "/v1/chat/completions",
        "/v1/responses",
        "/v1/messages"
      ],
      "premium_model": false,
      "metadata": {
        "vision": false,
        "function_call": true,
        "web_search": false,
        "reasoning": false
      }
    },
    {
      "name": "MoonshotAI: Kimi Dev 72B",
      "description": "Kimi-Dev-72B is an open-source large language model fine-tuned for software engineering and issue resolution tasks. Based on Qwen2.5-72B, it is optimized using large-scale reinforcement learning that applies code patches in real repositories and validates them via full test suite execution—rewarding only correct, robust completions. The model achieves 60.4% on SWE-bench Verified, setting a new benchmark among open-source models for software bug fixing and code reasoning.",
      "id": "kimi-dev-72b",
      "object": "model",
      "created": 1776283310,
      "owned_by": "moonshotai",
      "tokens": 131000,
      "pricing": {
        "type": "per_million_tokens",
        "input": 0.5,
        "output": 0.5
      },
      "endpoints": [
        "/v1/chat/completions",
        "/v1/responses",
        "/v1/messages"
      ],
      "premium_model": false,
      "metadata": {
        "vision": false,
        "function_call": true,
        "web_search": false,
        "reasoning": true
      }
    },
    {
      "name": "MoonshotAI: Kimi K2 Instruct 0711 (free)",
      "description": "Kimi K2 is a large-scale Mixture-of-Experts (MoE) language model developed by Moonshot AI, featuring 1 trillion total parameters with 32 billion active per forward pass. It is optimized for agentic capabilities, including advanced tool use, reasoning, and code synthesis. Kimi K2 excels across a broad range of benchmarks, particularly in coding (LiveCodeBench, SWE-bench), reasoning (ZebraLogic, GPQA), and tool-use (Tau2, AceBench) tasks. It supports long-context inference up to 128K tokens and is designed with a novel training stack that includes the MuonClip optimizer for stable large-scale MoE training.",
      "id": "kimi-k2-instruct-0711:free",
      "object": "model",
      "created": 1776283310,
      "owned_by": "moonshotai",
      "tokens": 64000,
      "pricing": {
        "type": "per_million_tokens",
        "input": 0,
        "output": 0
      },
      "endpoints": [
        "/v1/chat/completions",
        "/v1/responses",
        "/v1/messages"
      ],
      "premium_model": false,
      "metadata": {
        "vision": false,
        "function_call": true,
        "web_search": false,
        "reasoning": false
      }
    },
    {
      "name": "MoonshotAI: Kimi K2 Instruct 0711",
      "description": "Kimi K2 is a large-scale Mixture-of-Experts (MoE) language model developed by Moonshot AI, featuring 1 trillion total parameters with 32 billion active per forward pass. It is optimized for agentic capabilities, including advanced tool use, reasoning, and code synthesis. Kimi K2 excels across a broad range of benchmarks, particularly in coding (LiveCodeBench, SWE-bench), reasoning (ZebraLogic, GPQA), and tool-use (Tau2, AceBench) tasks. It supports long-context inference up to 128K tokens and is designed with a novel training stack that includes the MuonClip optimizer for stable large-scale MoE training.",
      "id": "kimi-k2-instruct-0711",
      "object": "model",
      "created": 1776283310,
      "owned_by": "moonshotai",
      "tokens": 128000,
      "pricing": {
        "type": "per_million_tokens",
        "input": 1,
        "output": 3
      },
      "endpoints": [
        "/v1/chat/completions",
        "/v1/responses",
        "/v1/messages"
      ],
      "premium_model": true,
      "metadata": {
        "vision": false,
        "function_call": true,
        "web_search": false,
        "reasoning": false
      }
    },
    {
      "name": "MoonshotAI: Kimi K2 Instruct 0905 (free)",
      "description": "Kimi K2 0905 is the September update of Kimi K2 0711. It is a large-scale Mixture-of-Experts (MoE) language model developed by Moonshot AI, featuring 1 trillion total parameters with 32 billion active per forward pass. It supports long-context inference up to 256k tokens, extended from the previous 128k.\nThis update improves agentic coding with higher accuracy and better generalization across scaffolds, and enhances frontend coding with more aesthetic and functional outputs for web, 3D, and related tasks. Kimi K2 is optimized for agentic capabilities, including advanced tool use, reasoning, and code synthesis. It excels across coding (LiveCodeBench, SWE-bench), reasoning (ZebraLogic, GPQA), and tool-use (Tau2, AceBench) benchmarks. The model is trained with a novel stack incorporating the MuonClip optimizer for stable large-scale MoE training.",
      "id": "kimi-k2-instruct-0905:free",
      "object": "model",
      "created": 1776283310,
      "owned_by": "moonshotai",
      "tokens": 64000,
      "pricing": {
        "type": "per_million_tokens",
        "input": 0,
        "output": 0
      },
      "endpoints": [
        "/v1/chat/completions",
        "/v1/responses",
        "/v1/messages"
      ],
      "premium_model": false,
      "metadata": {
        "vision": false,
        "function_call": true,
        "web_search": false,
        "reasoning": false
      }
    },
    {
      "name": "MoonshotAI: Kimi K2 Instruct 0905",
      "description": "Kimi K2 0905 is the September update of Kimi K2 0711. It is a large-scale Mixture-of-Experts (MoE) language model developed by Moonshot AI, featuring 1 trillion total parameters with 32 billion active per forward pass. It supports long-context inference up to 256k tokens, extended from the previous 128k.\nThis update improves agentic coding with higher accuracy and better generalization across scaffolds, and enhances frontend coding with more aesthetic and functional outputs for web, 3D, and related tasks. Kimi K2 is optimized for agentic capabilities, including advanced tool use, reasoning, and code synthesis. It excels across coding (LiveCodeBench, SWE-bench), reasoning (ZebraLogic, GPQA), and tool-use (Tau2, AceBench) benchmarks. The model is trained with a novel stack incorporating the MuonClip optimizer for stable large-scale MoE training.",
      "id": "kimi-k2-instruct-0905",
      "object": "model",
      "created": 1776283310,
      "owned_by": "moonshotai",
      "tokens": 256000,
      "pricing": {
        "type": "per_million_tokens",
        "input": 1,
        "output": 3
      },
      "endpoints": [
        "/v1/chat/completions",
        "/v1/responses",
        "/v1/messages"
      ],
      "premium_model": true,
      "metadata": {
        "vision": false,
        "function_call": true,
        "web_search": false,
        "reasoning": false
      }
    },
    {
      "name": "MoonshotAI: Kimi K2 Thinking (free)",
      "description": "Kimi K2 Thinking is Moonshot AI’s most advanced open reasoning model to date, extending the K2 series into agentic, long-horizon reasoning. Built on the trillion-parameter Mixture-of-Experts (MoE) architecture introduced in Kimi K2, it activates 32 billion parameters per forward pass and supports 256 k-token context windows. The model is optimized for persistent step-by-step thought, dynamic tool invocation, and complex reasoning workflows that span hundreds of turns. It interleaves step-by-step reasoning with tool use, enabling autonomous research, coding, and writing that can persist for hundreds of sequential actions without drift.\nIt sets new open-source benchmarks on HLE, BrowseComp, SWE-Multilingual, and LiveCodeBench, while maintaining stable multi-agent behavior through 200–300 tool calls. Built on a large-scale MoE architecture with MuonClip optimization, it combines strong reasoning depth with high inference efficiency for demanding agentic and analytical tasks.",
      "id": "kimi-k2-thinking:free",
      "object": "model",
      "created": 1776283310,
      "owned_by": "moonshotai",
      "tokens": 64000,
      "pricing": {
        "type": "per_million_tokens",
        "input": 0,
        "output": 0
      },
      "endpoints": [
        "/v1/chat/completions",
        "/v1/responses",
        "/v1/messages"
      ],
      "premium_model": false,
      "metadata": {
        "vision": false,
        "function_call": true,
        "web_search": false,
        "reasoning": true
      }
    },
    {
      "name": "MoonshotAI: Kimi K2 Thinking",
      "description": "Kimi K2 Thinking is Moonshot AI’s most advanced open reasoning model to date, extending the K2 series into agentic, long-horizon reasoning. Built on the trillion-parameter Mixture-of-Experts (MoE) architecture introduced in Kimi K2, it activates 32 billion parameters per forward pass and supports 256 k-token context windows. The model is optimized for persistent step-by-step thought, dynamic tool invocation, and complex reasoning workflows that span hundreds of turns. It interleaves step-by-step reasoning with tool use, enabling autonomous research, coding, and writing that can persist for hundreds of sequential actions without drift.\nIt sets new open-source benchmarks on HLE, BrowseComp, SWE-Multilingual, and LiveCodeBench, while maintaining stable multi-agent behavior through 200–300 tool calls. Built on a large-scale MoE architecture with MuonClip optimization, it combines strong reasoning depth with high inference efficiency for demanding agentic and analytical tasks.",
      "id": "kimi-k2-thinking",
      "object": "model",
      "created": 1776283310,
      "owned_by": "moonshotai",
      "tokens": 262000,
      "pricing": {
        "type": "per_million_tokens",
        "input": 0.6,
        "output": 2.5
      },
      "endpoints": [
        "/v1/chat/completions",
        "/v1/responses",
        "/v1/messages"
      ],
      "premium_model": true,
      "metadata": {
        "vision": false,
        "function_call": true,
        "web_search": false,
        "reasoning": true
      }
    },
    {
      "name": "MoonshotAI: Kimi K2.5 (free)",
      "description": "Kimi K2.5 is Moonshot AI's native multimodal model, delivering state-of-the-art visual coding capability and a self-directed agent swarm paradigm. Built on Kimi K2 with continued pretraining over approximately 15T mixed visual and text tokens, it delivers strong performance in general reasoning, visual coding, and agentic tool-calling.",
      "id": "kimi-k2.5:free",
      "object": "model",
      "created": 1776283310,
      "owned_by": "moonshotai",
      "tokens": 64000,
      "pricing": {
        "type": "per_million_tokens",
        "input": 0,
        "output": 0
      },
      "endpoints": [
        "/v1/chat/completions",
        "/v1/responses",
        "/v1/messages"
      ],
      "premium_model": false,
      "metadata": {
        "vision": false,
        "function_call": true,
        "web_search": false,
        "reasoning": true
      }
    },
    {
      "name": "MoonshotAI: Kimi K2.5",
      "description": "Kimi K2.5 is Moonshot AI's native multimodal model, delivering state-of-the-art visual coding capability and a self-directed agent swarm paradigm. Built on Kimi K2 with continued pretraining over approximately 15T mixed visual and text tokens, it delivers strong performance in general reasoning, visual coding, and agentic tool-calling.",
      "id": "kimi-k2.5",
      "object": "model",
      "created": 1776283310,
      "owned_by": "moonshotai",
      "tokens": 262000,
      "pricing": {
        "type": "per_million_tokens",
        "input": 0.6,
        "output": 3
      },
      "endpoints": [
        "/v1/chat/completions",
        "/v1/responses",
        "/v1/messages"
      ],
      "premium_model": true,
      "metadata": {
        "vision": true,
        "function_call": true,
        "web_search": false,
        "reasoning": true
      }
    },
    {
      "name": "Xiaomi: MiMo V2 Flash",
      "description": "MiMo-V2-Flash is an open-source foundation language model developed by Xiaomi. It is a Mixture-of-Experts model with 309B total parameters and 15B active parameters, adopting hybrid attention architecture. MiMo-V2-Flash supports a hybrid-thinking toggle and a 256K context window, and excels at reasoning, coding, and agent scenarios. On SWE-bench Verified and SWE-bench Multilingual, MiMo-V2-Flash ranks as the top #1 open-source model globally, delivering performance comparable to Claude Sonnet 4.5 while costing only about 3.5% as much.",
      "id": "mimo-v2-flash",
      "object": "model",
      "created": 1776283310,
      "owned_by": "xiaomi",
      "tokens": 256000,
      "pricing": {
        "type": "per_million_tokens",
        "input": 0.17,
        "output": 0.65
      },
      "endpoints": [
        "/v1/chat/completions",
        "/v1/responses",
        "/v1/messages"
      ],
      "premium_model": false,
      "metadata": {
        "vision": false,
        "function_call": true,
        "web_search": false,
        "reasoning": false
      }
    },
    {
      "name": "Xiaomi: MiMo V2 Flash (thinking)",
      "description": "MiMo-V2-Flash-Thinking is the thinking variant of MiMo-V2-Flash. It is a Mixture-of-Experts model with 309B total parameters and 15B active parameters, adopting hybrid attention architecture. MiMo-V2-Flash-Thinking supports a hybrid-thinking toggle and a 256K context window, and excels at reasoning, coding, and agent scenarios. On SWE-bench Verified and SWE-bench Multilingual, MiMo-V2-Flash-Thinking ranks as the top #1 open-source model globally, delivering performance comparable to Claude Sonnet 4.5 while costing only about 3.5% as much.",
      "id": "mimo-v2-flash-thinking",
      "object": "model",
      "created": 1776283310,
      "owned_by": "xiaomi",
      "tokens": 256000,
      "pricing": {
        "type": "per_million_tokens",
        "input": 0.17,
        "output": 0.65
      },
      "endpoints": [
        "/v1/chat/completions",
        "/v1/responses",
        "/v1/messages"
      ],
      "premium_model": false,
      "metadata": {
        "vision": false,
        "function_call": true,
        "web_search": false,
        "reasoning": true
      }
    },
    {
      "name": "StepFun: Step 3.5 Flash",
      "description": "Step 3.5 Flash is StepFun's most capable open-source foundation model. Built on a sparse Mixture of Experts (MoE) architecture, it selectively activates only 11B of its 196B parameters per token. It is a reasoning model that is incredibly speed efficient even at long contexts.",
      "id": "step-3.5-flash",
      "object": "model",
      "created": 1776283310,
      "owned_by": "stepfun",
      "tokens": 256000,
      "pricing": {
        "type": "per_million_tokens",
        "input": 0.06,
        "output": 0.2
      },
      "endpoints": [
        "/v1/chat/completions",
        "/v1/responses",
        "/v1/messages"
      ],
      "premium_model": false,
      "metadata": {
        "vision": false,
        "function_call": true,
        "web_search": false,
        "reasoning": true
      }
    },
    {
      "name": "Infermatic: SorcererLM 8x22B",
      "description": "SorcererLM is an advanced RP and storytelling model, built as a Low-rank 16-bit LoRA fine-tuned on WizardLM-2 8x22B.\nAdvanced reasoning and emotional intelligence for engaging and immersive interactions\nVivid writing capabilities enriched with spatial and contextual awareness\nEnhanced narrative depth, promoting creative and dynamic storytelling",
      "id": "sorcererlm-8x22b",
      "object": "model",
      "created": 1776283310,
      "owned_by": "infermatic",
      "tokens": 16000,
      "pricing": {
        "type": "per_million_tokens",
        "input": 4.5,
        "output": 4.5
      },
      "endpoints": [
        "/v1/chat/completions",
        "/v1/responses",
        "/v1/messages"
      ],
      "premium_model": false,
      "metadata": {
        "vision": false,
        "function_call": true,
        "web_search": false,
        "reasoning": false
      }
    },
    {
      "name": "Neuroengine: MechaEpstein 8000",
      "description": "This is a fine-tuned model based on Qwen3-8B, trained on the Jeffrey Epstein emails to simulate his persona. It is not intended to function as a database or retrieval-augmented generation (RAG) system; the model will naturally deny any wrongdoing, as would be expected from a billionaire in such a position.",
      "id": "mechaepstein-8000",
      "object": "model",
      "created": 1776283310,
      "owned_by": "huggingface",
      "tokens": 131000,
      "pricing": {
        "type": "per_million_tokens",
        "input": 0.01,
        "output": 0.03
      },
      "endpoints": [
        "/v1/chat/completions",
        "/v1/responses",
        "/v1/messages"
      ],
      "premium_model": false,
      "metadata": {
        "vision": false,
        "function_call": true,
        "web_search": false,
        "reasoning": false
      }
    },
    {
      "name": "ArliAI: Gemma 3 27B RPMax v3",
      "description": "Gemma 3 27B RPMax v3 is a 27B parameter model fine-tuned from Gemma3-27B-it using a curated creative writing and roleplay dataset originally developed for the RPMax series. It is designed to maintain coherence and reasoning across long multi-turn conversations by introducing explicit reasoning steps per dialogue turn, generated and refined using the base model itself.\nThe model was trained using RS-QLORA+ on 8K sequence lengths and supports up to 128K context windows (with practical performance around 32K). It is optimized for creative roleplay and dialogue generation, with an emphasis on minimizing cross-context repetition while preserving stylistic diversity.",
      "id": "gemma-3-27b-arliai-rpmax-v3",
      "object": "model",
      "created": 1776283310,
      "owned_by": "huggingface",
      "tokens": 32000,
      "pricing": {
        "type": "per_million_tokens",
        "input": 0.8,
        "output": 1.2
      },
      "endpoints": [
        "/v1/chat/completions",
        "/v1/responses",
        "/v1/messages"
      ],
      "premium_model": false,
      "metadata": {
        "vision": false,
        "function_call": true,
        "web_search": false,
        "reasoning": false
      }
    },
    {
      "name": "SlerpE: Gemma 3 27B CardProjector v4",
      "description": "CardProjector is a specialized series of language models, fine-tuned to generate character cards for SillyTavern and for creating characters in general. These models are designed to assist creators and roleplayers by automating the process of crafting detailed and well-structured character cards, ensuring compatibility with SillyTavern's format.",
      "id": "gemma-3-27b-cardprojector-v4",
      "object": "model",
      "created": 1776283310,
      "owned_by": "huggingface",
      "tokens": 32000,
      "pricing": {
        "type": "per_million_tokens",
        "input": 0.8,
        "output": 1.2
      },
      "endpoints": [
        "/v1/chat/completions",
        "/v1/responses",
        "/v1/messages"
      ],
      "premium_model": false,
      "metadata": {
        "vision": false,
        "function_call": true,
        "web_search": false,
        "reasoning": false
      }
    },
    {
      "name": "Allura: Gemma 3 27B Glitter",
      "description": "A creative writing model based on Gemma 3 27B.\nColumbidae/gemma-3-27b-half, a 50/50 merge of 27B IT and 27B PT, was used as the base model. (This was done because of the success of Starshine, a 50/50 IT and PT merge.)\nThe inclusion of PT model does weaken the instruct, but it also weakens the censorship/hesitancy to participate in certain fictional stories. The prose also becomes more natural with less of the IT model included.\nThis model does better with short and to-the-point prompts. Long, detailed system prompts will often confuse it. (Tested with 1000-2000 token system prompts to lackluster results compared to 100-500 token prompts).",
      "id": "gemma-3-27b-glitter",
      "object": "model",
      "created": 1776283310,
      "owned_by": "huggingface",
      "tokens": 32000,
      "pricing": {
        "type": "per_million_tokens",
        "input": 0.8,
        "output": 1.2
      },
      "endpoints": [
        "/v1/chat/completions",
        "/v1/responses",
        "/v1/messages"
      ],
      "premium_model": false,
      "metadata": {
        "vision": false,
        "function_call": true,
        "web_search": false,
        "reasoning": false
      }
    },
    {
      "name": "Mlabonne: Gemma 3 27B Instruct Abliterated",
      "description": "Gemma 3 27B Instruct Abliterated is a 27B parameter model fine-tuned from Gemma3-27B-it using a curated creative writing and roleplay dataset originally developed for the RPMax series. It is designed to maintain coherence and reasoning across long multi-turn conversations by introducing explicit reasoning steps per dialogue turn, generated and refined using the base model itself.\nThe model was trained using RS-QLORA+ on 8K sequence lengths and supports up to 128K context windows (with practical performance around 32K). It is optimized for creative roleplay and dialogue generation, with an emphasis on minimizing cross-context repetition while preserving stylistic diversity.",
      "id": "gemma-3-27b-it-abliterated",
      "object": "model",
      "created": 1776283310,
      "owned_by": "huggingface",
      "tokens": 32000,
      "pricing": {
        "type": "per_million_tokens",
        "input": 0.8,
        "output": 1.2
      },
      "endpoints": [
        "/v1/chat/completions",
        "/v1/responses",
        "/v1/messages"
      ],
      "premium_model": false,
      "metadata": {
        "vision": false,
        "function_call": true,
        "web_search": false,
        "reasoning": false
      }
    },
    {
      "name": "inclusionAI: Ling-1T",
      "description": "Ling-1T is a trillion-parameter open-weight large language model developed by inclusionAI and released under the MIT license. It represents the first flagship non-thinking model in the Ling 2.0 series, built around a sparse-activation architecture with roughly 50 billion active parameters per token. The model supports up to 128 K tokens of context and emphasizes efficient reasoning through an “Evolutionary Chain-of-Thought (Evo-CoT)” training strategy.\nPre-trained on more than 20 trillion reasoning-dense tokens, Ling-1T achieves strong results across code generation, mathematics, and logical reasoning benchmarks while maintaining high inference efficiency. It employs FP8 mixed-precision training, MoE routing with QK normalization, and MTP layers for compositional reasoning stability. The model also introduces LPO (Linguistics-unit Policy Optimization) for post-training alignment, enhancing sentence-level semantic control.\nLing-1T can perform complex text generation, multilingual reasoning, and front-end code synthesis with a focus on both functionality and aesthetics.",
      "id": "ling-1t",
      "object": "model",
      "created": 1776283310,
      "owned_by": "huggingface",
      "tokens": 131000,
      "pricing": {
        "type": "per_million_tokens",
        "input": 0.4,
        "output": 2
      },
      "endpoints": [
        "/v1/chat/completions",
        "/v1/responses",
        "/v1/messages"
      ],
      "premium_model": true,
      "metadata": {
        "vision": false,
        "function_call": true,
        "web_search": false,
        "reasoning": false
      }
    },
    {
      "name": "Shisa AI: Shisa V2 Llama 3.3 70B",
      "description": "Shisa V2 Llama 3.3 70B is a bilingual Japanese-English chat model fine-tuned by Shisa.AI on Meta’s Llama-3.3-70B-Instruct base. It prioritizes Japanese language performance while retaining strong English capabilities. The model was optimized entirely through post-training, using a refined mix of supervised fine-tuning (SFT) and DPO datasets including regenerated ShareGPT-style data, translation tasks, roleplaying conversations, and instruction-following prompts. Unlike earlier Shisa releases, this version avoids tokenizer modifications or extended pretraining.\nShisa V2 70B achieves leading Japanese task performance across a wide range of custom and public benchmarks, including JA MT Bench, ELYZA 100, and Rakuda. It supports a 128K token context length and integrates smoothly with inference frameworks like vLLM and SGLang. While it inherits safety characteristics from its base model, no additional alignment was applied. The model is intended for high-performance bilingual chat, instruction following, and translation tasks across JA/EN.",
      "id": "shisa-v2-llama3.3-70b",
      "object": "model",
      "created": 1776283310,
      "owned_by": "huggingface",
      "tokens": 32000,
      "pricing": {
        "type": "per_million_tokens",
        "input": 0.5,
        "output": 0.5
      },
      "endpoints": [
        "/v1/chat/completions",
        "/v1/responses",
        "/v1/messages"
      ],
      "premium_model": false,
      "metadata": {
        "vision": false,
        "function_call": true,
        "web_search": false,
        "reasoning": false
      }
    },
    {
      "name": "TheDrummer: Anubis 70B v1",
      "description": "TheDrummer's Anubis v1 is an unaligned, creative Llama 3.3 70B model focused on providing character-driven roleplay & stories. It excels at gritty, visceral prose, unique character adherence, and coherent narratives, while maintaining the instruction following Llama 3.3 70B is known for.",
      "id": "anubis-70b-v1",
      "object": "model",
      "created": 1776283310,
      "owned_by": "huggingface",
      "tokens": 65000,
      "pricing": {
        "type": "per_million_tokens",
        "input": 0.5,
        "output": 0.8
      },
      "endpoints": [
        "/v1/chat/completions",
        "/v1/responses",
        "/v1/messages"
      ],
      "premium_model": false,
      "metadata": {
        "vision": false,
        "function_call": true,
        "web_search": false,
        "reasoning": false
      }
    },
    {
      "name": "TheDrummer: Anubis 70B v1.1",
      "description": "TheDrummer's Anubis v1.1 is an unaligned, creative Llama 3.3 70B model focused on providing character-driven roleplay & stories. It excels at gritty, visceral prose, unique character adherence, and coherent narratives, while maintaining the instruction following Llama 3.3 70B is known for.",
      "id": "anubis-70b-v1.1",
      "object": "model",
      "created": 1776283310,
      "owned_by": "huggingface",
      "tokens": 65000,
      "pricing": {
        "type": "per_million_tokens",
        "input": 0.5,
        "output": 0.8
      },
      "endpoints": [
        "/v1/chat/completions",
        "/v1/responses",
        "/v1/messages"
      ],
      "premium_model": false,
      "metadata": {
        "vision": false,
        "function_call": true,
        "web_search": false,
        "reasoning": false
      }
    },
    {
      "name": "TheDrummer: Anubis Pro 105B v1",
      "description": "Anubis Pro 105B v1 is an expanded and refined variant of Meta’s Llama 3.3 70B, featuring 50% additional layers and further fine-tuning to leverage its increased capacity. Designed for advanced narrative, roleplay, and instructional tasks, it demonstrates enhanced emotional intelligence, creativity, nuanced character portrayal, and superior prompt adherence compared to smaller models. Its larger parameter count allows for deeper contextual understanding and extended reasoning capabilities, optimized for engaging, intelligent, and coherent interactions.",
      "id": "anubis-pro-105b-v1",
      "object": "model",
      "created": 1776283310,
      "owned_by": "huggingface",
      "tokens": 131000,
      "pricing": {
        "type": "per_million_tokens",
        "input": 0.8,
        "output": 1
      },
      "endpoints": [
        "/v1/chat/completions",
        "/v1/responses",
        "/v1/messages"
      ],
      "premium_model": false,
      "metadata": {
        "vision": false,
        "function_call": true,
        "web_search": false,
        "reasoning": false
      }
    },
    {
      "name": "SarvamAI: Sarvam-M",
      "description": "Sarvam-M is a 24 B-parameter, instruction-tuned derivative of Mistral-Small-3.1-24B-Base-2503, post-trained on English plus eleven major Indic languages (bn, hi, kn, gu, mr, ml, or, pa, ta, te). The model introduces a dual-mode interface: “non-think” for low-latency chat and a optional “think” phase that exposes chain-of-thought tokens for more demanding reasoning, math, and coding tasks.\nBenchmark reports show solid gains versus similarly sized open models on Indic-language QA, GSM-8K math, and SWE-Bench coding, making Sarvam-M a practical general-purpose choice for multilingual conversational agents as well as analytical workloads that mix English, native Indic scripts, or romanized text.",
      "id": "sarvam-m",
      "object": "model",
      "created": 1776283310,
      "owned_by": "huggingface",
      "tokens": 32000,
      "pricing": {
        "type": "per_million_tokens",
        "input": 0.25,
        "output": 0.75
      },
      "endpoints": [
        "/v1/chat/completions",
        "/v1/responses",
        "/v1/messages"
      ],
      "premium_model": false,
      "metadata": {
        "vision": false,
        "function_call": true,
        "web_search": false,
        "reasoning": true
      }
    },
    {
      "name": "OumiAI: Halloumi 8B",
      "description": "HallOumi-8B is a SOTA hallucination detection model, outperforming DeepSeek R1, OpenAI o1, Google Gemini 1.5 Pro, and Claude Sonnet 3.5 at only 8 billion parameters!",
      "id": "halloumi-8b",
      "object": "model",
      "created": 1776283310,
      "owned_by": "huggingface",
      "tokens": 16000,
      "pricing": {
        "type": "per_million_tokens",
        "input": 0.8,
        "output": 1.2
      },
      "endpoints": [
        "/v1/chat/completions",
        "/v1/responses",
        "/v1/messages"
      ],
      "premium_model": false,
      "metadata": {
        "vision": false,
        "function_call": true,
        "web_search": false,
        "reasoning": false
      }
    },
    {
      "name": "TheDrummer: Fallen Llama 3.3 70B v1",
      "description": "Fallen Llama 3.3 70B v1 is an evil tune of Llama 3.3 70B. Not only is it decensored, but it's capable of spouting vitriolic tokens when prompted.",
      "id": "llama-3.3-70b-fallen-v1",
      "object": "model",
      "created": 1776283310,
      "owned_by": "huggingface",
      "tokens": 65000,
      "pricing": {
        "type": "per_million_tokens",
        "input": 4,
        "output": 6
      },
      "endpoints": [
        "/v1/chat/completions",
        "/v1/responses",
        "/v1/messages"
      ],
      "premium_model": false,
      "metadata": {
        "vision": false,
        "function_call": true,
        "web_search": false,
        "reasoning": false
      }
    },
    {
      "name": "TheDrummer: Fallen Llama 3.3 R1 70B v1",
      "description": "Fallen Llama 3.3 R1 70B v1 is an evil tune of Deepseek's R1 Distill on Llama 3.3 70B. Not only is it decensored, but it's capable of spouting vitriolic tokens when prompted.",
      "id": "fallen-llama-3.3-r1-70b-v1",
      "object": "model",
      "created": 1776283310,
      "owned_by": "huggingface",
      "tokens": 65000,
      "pricing": {
        "type": "per_million_tokens",
        "input": 4,
        "output": 6
      },
      "endpoints": [
        "/v1/chat/completions",
        "/v1/responses",
        "/v1/messages"
      ],
      "premium_model": false,
      "metadata": {
        "vision": false,
        "function_call": true,
        "web_search": false,
        "reasoning": false
      }
    },
    {
      "name": "EVA-UNIT-01: EVA LLaMA 3.33 70B v0.0",
      "description": "A RP/storywriting specialist model, full-parameter finetune of Llama-3.3-70B-Instruct on mixture of synthetic and natural data.\nIt uses Celeste 70B 0.1 data mixture, greatly expanding it to improve versatility, creativity and \"flavor\" of the resulting model.",
      "id": "eva-llama-3.33-70b-v0.0",
      "object": "model",
      "created": 1776283310,
      "owned_by": "huggingface",
      "tokens": 65000,
      "pricing": {
        "type": "per_million_tokens",
        "input": 4,
        "output": 6
      },
      "endpoints": [
        "/v1/chat/completions",
        "/v1/responses",
        "/v1/messages"
      ],
      "premium_model": false,
      "metadata": {
        "vision": false,
        "function_call": true,
        "web_search": false,
        "reasoning": false
      }
    },
    {
      "name": "EVA-UNIT-01: EVA LLaMA 3.33 70B v0.1",
      "description": "A RP/storywriting specialist model, full-parameter finetune of Llama-3.3-70B-Instruct on mixture of synthetic and natural data.\nIt uses Celeste 70B 0.1 data mixture, greatly expanding it to improve versatility, creativity and \"flavor\" of the resulting model.",
      "id": "eva-llama-3.33-70b-v0.1",
      "object": "model",
      "created": 1776283310,
      "owned_by": "huggingface",
      "tokens": 65000,
      "pricing": {
        "type": "per_million_tokens",
        "input": 4,
        "output": 6
      },
      "endpoints": [
        "/v1/chat/completions",
        "/v1/responses",
        "/v1/messages"
      ],
      "premium_model": false,
      "metadata": {
        "vision": false,
        "function_call": true,
        "web_search": false,
        "reasoning": false
      }
    },
    {
      "name": "SCB10X: Llama 3 Typhoon v1.5 8B Instruct",
      "description": "Llama-3-Typhoon-1.5-8B-instruct is a instruct Thai large language model with 8 billion parameters, and it is based on Llama3-8B.",
      "id": "llama-3-typhoon-v1.5-8b-instruct",
      "object": "model",
      "created": 1776283310,
      "owned_by": "huggingface",
      "tokens": 8000,
      "pricing": {
        "type": "per_million_tokens",
        "input": 0.8,
        "output": 1.2
      },
      "endpoints": [
        "/v1/chat/completions",
        "/v1/responses",
        "/v1/messages"
      ],
      "premium_model": false,
      "metadata": {
        "vision": false,
        "function_call": true,
        "web_search": false,
        "reasoning": false
      }
    },
    {
      "name": "SCB10X: Llama 3.1 Typhoon2 8B Instruct",
      "description": "Llama3.1-Typhoon2-8B-instruct is a instruct Thai large language model with 8 billion parameters, and it is based on Llama3.1-8B.",
      "id": "llama3.1-typhoon2-8b-instruct",
      "object": "model",
      "created": 1776283310,
      "owned_by": "huggingface",
      "tokens": 16000,
      "pricing": {
        "type": "per_million_tokens",
        "input": 0.8,
        "output": 1.2
      },
      "endpoints": [
        "/v1/chat/completions",
        "/v1/responses",
        "/v1/messages"
      ],
      "premium_model": false,
      "metadata": {
        "vision": false,
        "function_call": true,
        "web_search": false,
        "reasoning": false
      }
    },
    {
      "name": "Sao10K: L3 8B Lunaris",
      "description": "Lunaris 8B is a versatile generalist and roleplaying model based on Llama 3. It's a strategic merge of multiple models, designed to balance creativity with improved logic and general knowledge.\nCreated by Sao10k, this model aims to offer an improved experience over Stheno v3.2, with enhanced creativity and logical reasoning.",
      "id": "l3-lunaris-8b",
      "object": "model",
      "created": 1776283310,
      "owned_by": "huggingface",
      "tokens": 8000,
      "pricing": {
        "type": "per_million_tokens",
        "input": 0.05,
        "output": 0.05
      },
      "endpoints": [
        "/v1/chat/completions",
        "/v1/responses",
        "/v1/messages"
      ],
      "premium_model": false,
      "metadata": {
        "vision": false,
        "function_call": true,
        "web_search": false,
        "reasoning": false
      }
    },
    {
      "name": "NeverSleep: Lumimaid v0.2 8B",
      "description": "Lumimaid v0.2 8B is a finetune of Llama 3.1 8B with a \"HUGE step up dataset wise\" compared to Lumimaid v0.1. Sloppy chats output were purged.",
      "id": "llama-3.1-lumimaid-8b",
      "object": "model",
      "created": 1776283310,
      "owned_by": "huggingface",
      "tokens": 32000,
      "pricing": {
        "type": "per_million_tokens",
        "input": 0.8,
        "output": 1.2
      },
      "endpoints": [
        "/v1/chat/completions",
        "/v1/responses",
        "/v1/messages"
      ],
      "premium_model": false,
      "metadata": {
        "vision": false,
        "function_call": true,
        "web_search": false,
        "reasoning": false
      }
    },
    {
      "name": "NeverSleep: Lumimaid v0.1 70B",
      "description": "Lumimaid v0.1 70B is a finetune of Llama 3.3 70B.",
      "id": "llama-3.3-lumimaid-70b-v0.1",
      "object": "model",
      "created": 1776283310,
      "owned_by": "huggingface",
      "tokens": 65000,
      "pricing": {
        "type": "per_million_tokens",
        "input": 4,
        "output": 6
      },
      "endpoints": [
        "/v1/chat/completions",
        "/v1/responses",
        "/v1/messages"
      ],
      "premium_model": false,
      "metadata": {
        "vision": false,
        "function_call": true,
        "web_search": false,
        "reasoning": false
      }
    },
    {
      "name": "NeverSleep: Lumimaid v0.2 70B",
      "description": "Lumimaid v0.2 70B is a finetune of Llama 3.3 70B with a \"HUGE step up dataset wise\" compared to Lumimaid v0.1. Sloppy chats output were purged.",
      "id": "llama-3.3-lumimaid-70b-v0.2",
      "object": "model",
      "created": 1776283310,
      "owned_by": "huggingface",
      "tokens": 65000,
      "pricing": {
        "type": "per_million_tokens",
        "input": 4,
        "output": 6
      },
      "endpoints": [
        "/v1/chat/completions",
        "/v1/responses",
        "/v1/messages"
      ],
      "premium_model": false,
      "metadata": {
        "vision": false,
        "function_call": true,
        "web_search": false,
        "reasoning": false
      }
    },
    {
      "name": "ArliAI: Llama 3.3 70B RPMax v1.4",
      "description": "Llama-3.3-70B-ArliAI-RPMax-v1.4 is a variant made from the Llama-3.3-70B-Instruct model. RPMax is a series of models that are trained on a diverse set of curated creative writing and RP datasets with a focus on variety and deduplication. This model is designed to be highly creative and non-repetitive by making sure no two entries in the dataset have repeated characters or situations, which makes sure the model does not latch on to a certain personality and be capable of understanding and acting appropriately to any characters or situations.",
      "id": "llama-3.3-70b-arliai-rpmax-v1.4",
      "object": "model",
      "created": 1776283310,
      "owned_by": "huggingface",
      "tokens": 65000,
      "pricing": {
        "type": "per_million_tokens",
        "input": 4,
        "output": 6
      },
      "endpoints": [
        "/v1/chat/completions",
        "/v1/responses",
        "/v1/messages"
      ],
      "premium_model": false,
      "metadata": {
        "vision": false,
        "function_call": true,
        "web_search": false,
        "reasoning": false
      }
    },
    {
      "name": "ArliAI: Llama 3.3 70B RPMax v2",
      "description": "Llama-3.3-70B-ArliAI-RPMax-v2 is a variant made from the Llama-3.3-70B-Instruct model. RPMax is a series of models that are trained on a diverse set of curated creative writing and RP datasets with a focus on variety and deduplication. This model is designed to be highly creative and non-repetitive by making sure no two entries in the dataset have repeated characters or situations, which makes sure the model does not latch on to a certain personality and be capable of understanding and acting appropriately to any characters or situations.",
      "id": "llama-3.3-70b-arliai-rpmax-v2",
      "object": "model",
      "created": 1776283310,
      "owned_by": "huggingface",
      "tokens": 65000,
      "pricing": {
        "type": "per_million_tokens",
        "input": 4,
        "output": 6
      },
      "endpoints": [
        "/v1/chat/completions",
        "/v1/responses",
        "/v1/messages"
      ],
      "premium_model": false,
      "metadata": {
        "vision": false,
        "function_call": true,
        "web_search": false,
        "reasoning": false
      }
    },
    {
      "name": "ArliAI: Llama 3.3 70B RPMax v3",
      "description": "Llama-3.3-70B-ArliAI-RPMax-v3 is a variant made from the Llama-3.3-70B-Instruct model. RPMax is a series of models that are trained on a diverse set of curated creative writing and RP datasets with a focus on variety and deduplication. This model is designed to be highly creative and non-repetitive by making sure no two entries in the dataset have repeated characters or situations, which makes sure the model does not latch on to a certain personality and be capable of understanding and acting appropriately to any characters or situations.",
      "id": "llama-3.3-70b-arliai-rpmax-v3",
      "object": "model",
      "created": 1776283310,
      "owned_by": "huggingface",
      "tokens": 65000,
      "pricing": {
        "type": "per_million_tokens",
        "input": 4,
        "output": 6
      },
      "endpoints": [
        "/v1/chat/completions",
        "/v1/responses",
        "/v1/messages"
      ],
      "premium_model": false,
      "metadata": {
        "vision": false,
        "function_call": true,
        "web_search": false,
        "reasoning": false
      }
    },
    {
      "name": "SlerpE: Llama 3.3 70B Alkahest V4",
      "description": "Alkahest is part of ongoing experiments with merging specialized curated models. It has a few occasional logic hiccups, but it's creativity more than makes up for it.",
      "id": "llama-3.3-70b-alkahest-v4",
      "object": "model",
      "created": 1776283310,
      "owned_by": "huggingface",
      "tokens": 65000,
      "pricing": {
        "type": "per_million_tokens",
        "input": 4,
        "output": 6
      },
      "endpoints": [
        "/v1/chat/completions",
        "/v1/responses",
        "/v1/messages"
      ],
      "premium_model": false,
      "metadata": {
        "vision": false,
        "function_call": true,
        "web_search": false,
        "reasoning": false
      }
    },
    {
      "name": "K4yt3x: Llama 3.3 70B Arynia",
      "description": "Arynia is a model that focuses on storytelling, role-playing, and natural conversations. It is merged from multiple models that specialize in these areas. This model is intended to be used in chatbots like Tellama. As it will be responding to messages in group chats at a fast pace, no reasoning capabilities were added to this model to reduce response time.",
      "id": "llama-3.3-70b-arynia",
      "object": "model",
      "created": 1776283310,
      "owned_by": "huggingface",
      "tokens": 65000,
      "pricing": {
        "type": "per_million_tokens",
        "input": 4,
        "output": 6
      },
      "endpoints": [
        "/v1/chat/completions",
        "/v1/responses",
        "/v1/messages"
      ],
      "premium_model": false,
      "metadata": {
        "vision": false,
        "function_call": true,
        "web_search": false,
        "reasoning": false
      }
    },
    {
      "name": "Tarek07: Llama 3.3 70B Aurora Borealis",
      "description": "Formerly known as MO-MODEL-Fused-V0.6-LLaMa-70B, This model is part of ongoing experiments with merging specialized curated models.",
      "id": "llama-3.3-70b-aurora-borealis",
      "object": "model",
      "created": 1776283310,
      "owned_by": "huggingface",
      "tokens": 65000,
      "pricing": {
        "type": "per_million_tokens",
        "input": 4,
        "output": 6
      },
      "endpoints": [
        "/v1/chat/completions",
        "/v1/responses",
        "/v1/messages"
      ],
      "premium_model": false,
      "metadata": {
        "vision": false,
        "function_call": true,
        "web_search": false,
        "reasoning": false
      }
    },
    {
      "name": "Allura: Llama 3.3 70B Bigger Body",
      "description": "A roleplay-focused pseudo full-finetune qlora finetune of Llama 3.3 70b. The successor to the Ink series.",
      "id": "llama-3.3-70b-bigger-body",
      "object": "model",
      "created": 1776283310,
      "owned_by": "huggingface",
      "tokens": 65000,
      "pricing": {
        "type": "per_million_tokens",
        "input": 4,
        "output": 6
      },
      "endpoints": [
        "/v1/chat/completions",
        "/v1/responses",
        "/v1/messages"
      ],
      "premium_model": false,
      "metadata": {
        "vision": false,
        "function_call": true,
        "web_search": false,
        "reasoning": false
      }
    },
    {
      "name": "Daemontatox: Llama 3.3 70B CogniLink",
      "description": "CogniLink is a state-of-the-art (SOTA) reasoning model, engineered to set new benchmarks in logical problem-solving and chain-of-thought capabilities. Leveraging the power of LLaMA 3.3 70B, CogniLink excels in multi-step reasoning, inference, and real-time decision-making across diverse domains. Whether tackling mathematical proofs, legal analyses, or dynamic real-world scenarios, CogniLink ensures clarity, precision, and scalability.\nDesigned for both high-performance tasks and resource-efficient environments, CogniLink represents the perfect fusion of innovation and practicality.",
      "id": "llama-3.3-70b-cognilink",
      "object": "model",
      "created": 1776283310,
      "owned_by": "huggingface",
      "tokens": 65000,
      "pricing": {
        "type": "per_million_tokens",
        "input": 4,
        "output": 6
      },
      "endpoints": [
        "/v1/chat/completions",
        "/v1/responses",
        "/v1/messages"
      ],
      "premium_model": false,
      "metadata": {
        "vision": false,
        "function_call": true,
        "web_search": false,
        "reasoning": false
      }
    },
    {
      "name": "LyraNovaHeart: Llama 3.3 70B Dazzling Star Aurora v0.0",
      "description": "70b version of Dazzling Star Aurora, with EVA L3.3 70b and RPMax L3.1 70b.",
      "id": "llama-3.3-70b-dazzling-star-aurora-v0.0",
      "object": "model",
      "created": 1776283310,
      "owned_by": "huggingface",
      "tokens": 65000,
      "pricing": {
        "type": "per_million_tokens",
        "input": 4,
        "output": 6
      },
      "endpoints": [
        "/v1/chat/completions",
        "/v1/responses",
        "/v1/messages"
      ],
      "premium_model": false,
      "metadata": {
        "vision": false,
        "function_call": true,
        "web_search": false,
        "reasoning": false
      }
    },
    {
      "name": "Mawdistical: Llama 3.3 70B Draconic Tease",
      "description": "A furry finetune model based on L3.3-Electra-R1-70b, elegant, yet suggestively draconic.",
      "id": "llama-3.3-70b-draconic-tease",
      "object": "model",
      "created": 1776283310,
      "owned_by": "huggingface",
      "tokens": 65000,
      "pricing": {
        "type": "per_million_tokens",
        "input": 4,
        "output": 6
      },
      "endpoints": [
        "/v1/chat/completions",
        "/v1/responses",
        "/v1/messages"
      ],
      "premium_model": false,
      "metadata": {
        "vision": false,
        "function_call": true,
        "web_search": false,
        "reasoning": false
      }
    },
    {
      "name": "Tarek07: Llama 3.3 70B Dungeonmaster v2.2 Expanded",
      "description": "V2.2 of Dungeonmaster (Very good at following prompts and quite unhinged). This model was merged using the Linear DELLA merge method using TareksLab/L3.3-TRP-BASE-80-70B as a base.",
      "id": "llama-3.3-70b-dungeonmaster-v2.2-expanded",
      "object": "model",
      "created": 1776283310,
      "owned_by": "huggingface",
      "tokens": 65000,
      "pricing": {
        "type": "per_million_tokens",
        "input": 4,
        "output": 6
      },
      "endpoints": [
        "/v1/chat/completions",
        "/v1/responses",
        "/v1/messages"
      ],
      "premium_model": false,
      "metadata": {
        "vision": false,
        "function_call": true,
        "web_search": false,
        "reasoning": false
      }
    },
    {
      "name": "Tarek07: Llama 3.3 70B Dungeonmaster v2.4 Expanded",
      "description": "V2.4 of Dungeonmaster (the most creative model so far in the Dungeonmaster series). This model was merged using the Linear DELLA merge method using TareksLab/L3.3-TRP-BASE-80-70B as a base.",
      "id": "llama-3.3-70b-dungeonmaster-v2.4-expanded",
      "object": "model",
      "created": 1776283310,
      "owned_by": "huggingface",
      "tokens": 65000,
      "pricing": {
        "type": "per_million_tokens",
        "input": 4,
        "output": 6
      },
      "endpoints": [
        "/v1/chat/completions",
        "/v1/responses",
        "/v1/messages"
      ],
      "premium_model": false,
      "metadata": {
        "vision": false,
        "function_call": true,
        "web_search": false,
        "reasoning": false
      }
    },
    {
      "name": "Sophosympatheia: Llama 3.3 70B Electranova v1.0",
      "description": "This 70B parameter model is a merge of my sophosympatheia/Nova-Tempus-70B-v0.1 model with Sao10K/Llama-3.3-70B-Vulpecula-r1 and Steelskull/L3.3-Electra-R1-70b.\nIt is a capable creative model that maintains good performance in ERP situations too.\nThis model is uncensored. You are responsible for whatever you do with it.\nThis model was designed for roleplaying and storytelling and I think it does well at both. It may also perform well at other tasks but I have not tested its performance in other areas.",
      "id": "llama-3.3-70b-electranova-v1.0",
      "object": "model",
      "created": 1776283310,
      "owned_by": "huggingface",
      "tokens": 65000,
      "pricing": {
        "type": "per_million_tokens",
        "input": 4,
        "output": 6
      },
      "endpoints": [
        "/v1/chat/completions",
        "/v1/responses",
        "/v1/messages"
      ],
      "premium_model": false,
      "metadata": {
        "vision": false,
        "function_call": true,
        "web_search": false,
        "reasoning": false
      }
    },
    {
      "name": "KaraKaraWitch: Llama 3.3 70B EveningMirai Moonwalker",
      "description": "This model was merged using the SCE merge method using ReadyArt/Forgotten-Safeword-70B-v5.0 as a base.",
      "id": "llama-3.3-70b-eveningmirai-moonwalker",
      "object": "model",
      "created": 1776283310,
      "owned_by": "huggingface",
      "tokens": 65000,
      "pricing": {
        "type": "per_million_tokens",
        "input": 4,
        "output": 6
      },
      "endpoints": [
        "/v1/chat/completions",
        "/v1/responses",
        "/v1/messages"
      ],
      "premium_model": false,
      "metadata": {
        "vision": false,
        "function_call": true,
        "web_search": false,
        "reasoning": false
      }
    },
    {
      "name": "ReadyArt: Llama 3.3 70B Forgotten Abomination v5.0",
      "description": "This model represents the forbidden merger of: Forgotten-Safeword-70B-v5.0 - Industrial-grade depravity matrix with 23 distinct taboo specializations. L3.3-MS-Nevoria-70b - SteelSkull's masterpiece of storytelling and character adherence, free of positivity bias and ethical constraints.",
      "id": "llama-3.3-70b-forgotten-abomination-v5.0",
      "object": "model",
      "created": 1776283310,
      "owned_by": "huggingface",
      "tokens": 65000,
      "pricing": {
        "type": "per_million_tokens",
        "input": 4,
        "output": 6
      },
      "endpoints": [
        "/v1/chat/completions",
        "/v1/responses",
        "/v1/messages"
      ],
      "premium_model": false,
      "metadata": {
        "vision": false,
        "function_call": true,
        "web_search": false,
        "reasoning": false
      }
    },
    {
      "name": "ReadyArt: Llama 3.3 70B Forgotten Safeword 3.6",
      "description": "ReadyArt/Forgotten-Safeword-70B-3.6 isn't just a model - is the event horizon of depravity. We've applied the Safeword dataset directly to meta-llama/Llama-3.3-70B-Instruct.",
      "id": "llama-3.3-70b-forgotten-safeword-3.6",
      "object": "model",
      "created": 1776283310,
      "owned_by": "huggingface",
      "tokens": 65000,
      "pricing": {
        "type": "per_million_tokens",
        "input": 4,
        "output": 6
      },
      "endpoints": [
        "/v1/chat/completions",
        "/v1/responses",
        "/v1/messages"
      ],
      "premium_model": false,
      "metadata": {
        "vision": false,
        "function_call": true,
        "web_search": false,
        "reasoning": false
      }
    },
    {
      "name": "ReadyArt: Llama 3.3 70B Forgotten Safeword v5.0",
      "description": "ReadyArt/Forgotten-Safeword-70B-v5.0 represents the culmination of: Unprecedented erotic roleplaying ability trained directly into Negative_LLAMA_70B's dark unalignment. Creative freedom and raw depravity of the Safeword dataset. Trained on 23 distinct types of taboo content.",
      "id": "llama-3.3-70b-forgotten-safeword-v5.0",
      "object": "model",
      "created": 1776283310,
      "owned_by": "huggingface",
      "tokens": 65000,
      "pricing": {
        "type": "per_million_tokens",
        "input": 4,
        "output": 6
      },
      "endpoints": [
        "/v1/chat/completions",
        "/v1/responses",
        "/v1/messages"
      ],
      "premium_model": false,
      "metadata": {
        "vision": false,
        "function_call": true,
        "web_search": false,
        "reasoning": false
      }
    },
    {
      "name": "Zerofata: Llama 3.3 70B Genetic Lemonade Sunset",
      "description": "This model is designed for RP and creative writing. It was merged using the SCE merge method.",
      "id": "llama-3.3-70b-genetic-lemonade-sunset",
      "object": "model",
      "created": 1776283310,
      "owned_by": "huggingface",
      "tokens": 65000,
      "pricing": {
        "type": "per_million_tokens",
        "input": 4,
        "output": 6
      },
      "endpoints": [
        "/v1/chat/completions",
        "/v1/responses",
        "/v1/messages"
      ],
      "premium_model": false,
      "metadata": {
        "vision": false,
        "function_call": true,
        "web_search": false,
        "reasoning": false
      }
    },
    {
      "name": "Zerofata: Llama 3.3 70B GeneticLemonade Final",
      "description": "This model is designed for RP and creative writing. It was merged using the SCE merge method. The base aims to build a strong general purpose model using high performing models that are trained on various datasets from different languages / cultures. This is to reduce the chance of the same datasets appearing multiple times to build natural creativity into L3.3 The second merge aims to impart specific RP / creative writing knowledge, again focusing on trying to find high performing models that use or likely use different datasets.",
      "id": "llama-3.3-70b-geneticlemonade-final",
      "object": "model",
      "created": 1776283310,
      "owned_by": "huggingface",
      "tokens": 65000,
      "pricing": {
        "type": "per_million_tokens",
        "input": 4,
        "output": 6
      },
      "endpoints": [
        "/v1/chat/completions",
        "/v1/responses",
        "/v1/messages"
      ],
      "premium_model": false,
      "metadata": {
        "vision": false,
        "function_call": true,
        "web_search": false,
        "reasoning": false
      }
    },
    {
      "name": "Zerofata: Llama 3.3 70B GeneticLemonade Unleashed v3",
      "description": "This is a creative model intended to excel at character driven RP / ERP. It has not been tested or trained on adventure stories or any large amounts of creative writing.\nThis model is designed to provide longer, narrative heavy responses where characters are portrayed accurately and proactively.",
      "id": "llama-3.3-70b-geneticlemonade-unleashed-v3",
      "object": "model",
      "created": 1776283310,
      "owned_by": "huggingface",
      "tokens": 65000,
      "pricing": {
        "type": "per_million_tokens",
        "input": 4,
        "output": 6
      },
      "endpoints": [
        "/v1/chat/completions",
        "/v1/responses",
        "/v1/messages"
      ],
      "premium_model": false,
      "metadata": {
        "vision": false,
        "function_call": true,
        "web_search": false,
        "reasoning": false
      }
    },
    {
      "name": "TareksGraveyard: Llama 3.3 70B Lascivious",
      "description": "This model was merged using the SCE merge method using nbeerbower/Llama-3.1-Nemotron-lorablated-70B as a base.",
      "id": "llama-3.3-70b-lascivious",
      "object": "model",
      "created": 1776283310,
      "owned_by": "huggingface",
      "tokens": 65000,
      "pricing": {
        "type": "per_million_tokens",
        "input": 4,
        "output": 6
      },
      "endpoints": [
        "/v1/chat/completions",
        "/v1/responses",
        "/v1/messages"
      ],
      "premium_model": false,
      "metadata": {
        "vision": false,
        "function_call": true,
        "web_search": false,
        "reasoning": false
      }
    },
    {
      "name": "Tarek07: Llama 3.3 70B Legion v2.1",
      "description": "A very big merge model, consisting of a total of 20 specially curated models. The methodology in approaching this was to create 5 highly specialized models:\nA completely uncensored base\nA very intelligent model based on UGI, Willingness and NatInt scores on the UGI Leaderboard\nA highly descriptive writing model, specializing in creative and natural prose\nA RP model specially merged with fine-tuned models that use a lot of RP datasets\nThe secret ingredient: A completely unhinged, uncensored final model\nThese five models went through a series of iterations until I got something I thought worked well and then combined them to make LEGION.\nThis model was merged using the DARE TIES merge method using TareksLab/L-BASE-V1 as a base.",
      "id": "llama-3.3-70b-legion-v2.1",
      "object": "model",
      "created": 1776283310,
      "owned_by": "huggingface",
      "tokens": 65000,
      "pricing": {
        "type": "per_million_tokens",
        "input": 4,
        "output": 6
      },
      "endpoints": [
        "/v1/chat/completions",
        "/v1/responses",
        "/v1/messages"
      ],
      "premium_model": false,
      "metadata": {
        "vision": false,
        "function_call": true,
        "web_search": false,
        "reasoning": false
      }
    },
    {
      "name": "Divinetaco: Llama 3.3 70B Lycosa v0.2",
      "description": "An RP merge with a focus on: model intelligence, removing positive bias, creativity. This model was merged using the sce merge method using deepseek-ai/DeepSeek-R1-Distill-Llama-70B as a base.",
      "id": "llama-3.3-70b-lycosa-v0.2",
      "object": "model",
      "created": 1776283310,
      "owned_by": "huggingface",
      "tokens": 65000,
      "pricing": {
        "type": "per_million_tokens",
        "input": 4,
        "output": 6
      },
      "endpoints": [
        "/v1/chat/completions",
        "/v1/responses",
        "/v1/messages"
      ],
      "premium_model": false,
      "metadata": {
        "vision": false,
        "function_call": true,
        "web_search": false,
        "reasoning": false
      }
    },
    {
      "name": "KaraKaraWitch: Llama 3.3 70B MagicalGirl",
      "description": "This model was merged using the SCE merge method using KaraKaraWitch/Llama-3.X-Workout-70B as a base.",
      "id": "llama-3.3-70b-magicalgirl",
      "object": "model",
      "created": 1776283310,
      "owned_by": "huggingface",
      "tokens": 65000,
      "pricing": {
        "type": "per_million_tokens",
        "input": 4,
        "output": 6
      },
      "endpoints": [
        "/v1/chat/completions",
        "/v1/responses",
        "/v1/messages"
      ],
      "premium_model": false,
      "metadata": {
        "vision": false,
        "function_call": true,
        "web_search": false,
        "reasoning": false
      }
    },
    {
      "name": "KaraKaraWitch: Llama 3.3 70B MagicalGirl 2",
      "description": "This model was merged using the SCE merge method using KaraKaraWitch/Llama-3.X-Workout-70B as a base.",
      "id": "llama-3.3-70b-magicalgirl-2",
      "object": "model",
      "created": 1776283310,
      "owned_by": "huggingface",
      "tokens": 65000,
      "pricing": {
        "type": "per_million_tokens",
        "input": 4,
        "output": 6
      },
      "endpoints": [
        "/v1/chat/completions",
        "/v1/responses",
        "/v1/messages"
      ],
      "premium_model": false,
      "metadata": {
        "vision": false,
        "function_call": true,
        "web_search": false,
        "reasoning": false
      }
    },
    {
      "name": "Sao10K: Llama 3.3 70B Vulpecula r1",
      "description": "Llama-3.3-70B-Vulpecula-r1 is an advanced language model built on Meta's LLaMA 3.3 architecture, specifically engineered for enhanced creative writing and reasoning capabilities. The model incorporates both Supervised Fine-Tuning (SFT) and Reinforcement Learning (RL) techniques, with inspiration drawn from Deepseek-R1's thinking-based approach.",
      "id": "llama-3.3-70b-vulpecula-r1",
      "object": "model",
      "created": 1776283310,
      "owned_by": "huggingface",
      "tokens": 65000,
      "pricing": {
        "type": "per_million_tokens",
        "input": 4,
        "output": 6
      },
      "endpoints": [
        "/v1/chat/completions",
        "/v1/responses",
        "/v1/messages"
      ],
      "premium_model": false,
      "metadata": {
        "vision": false,
        "function_call": true,
        "web_search": false,
        "reasoning": true
      }
    },
    {
      "name": "TareksGraveyard: Llama 3.3 70B Primogenitor v2.1",
      "description": "This model was merged using the Linear DELLA merge method using nbeerbower/Llama-3.1-Nemotron-lorablated-70B as a base.",
      "id": "llama-3.3-70b-primogenitor-v2.1",
      "object": "model",
      "created": 1776283310,
      "owned_by": "huggingface",
      "tokens": 65000,
      "pricing": {
        "type": "per_million_tokens",
        "input": 4,
        "output": 6
      },
      "endpoints": [
        "/v1/chat/completions",
        "/v1/responses",
        "/v1/messages"
      ],
      "premium_model": false,
      "metadata": {
        "vision": false,
        "function_call": true,
        "web_search": false,
        "reasoning": false
      }
    },
    {
      "name": "Tarek07: Llama 3.3 70B Progenitor v1.1",
      "description": "This model is part of a series of experiments in merging some of Llama models, an idea which was based on the excellent Steelskull/L3.3-MS-Nevoria-70b merge, just with a couple of extra ingredients and different merge methods. This model was merged using the della_linear merge method using nbeerbower/Llama-3.1-Nemotron-lorablated-70B as a base.",
      "id": "llama-3.3-70b-progenitor-v1.1",
      "object": "model",
      "created": 1776283310,
      "owned_by": "huggingface",
      "tokens": 65000,
      "pricing": {
        "type": "per_million_tokens",
        "input": 4,
        "output": 6
      },
      "endpoints": [
        "/v1/chat/completions",
        "/v1/responses",
        "/v1/messages"
      ],
      "premium_model": false,
      "metadata": {
        "vision": false,
        "function_call": true,
        "web_search": false,
        "reasoning": false
      }
    },
    {
      "name": "Tarek07: Llama 3.3 70B Progenitor v2.2",
      "description": "This model was merged using the Linear DELLA merge method using nbeerbower/Llama-3.1-Nemotron-lorablated-70B as a base.",
      "id": "llama-3.3-70b-progenitor-v2.2",
      "object": "model",
      "created": 1776283310,
      "owned_by": "huggingface",
      "tokens": 65000,
      "pricing": {
        "type": "per_million_tokens",
        "input": 4,
        "output": 6
      },
      "endpoints": [
        "/v1/chat/completions",
        "/v1/responses",
        "/v1/messages"
      ],
      "premium_model": false,
      "metadata": {
        "vision": false,
        "function_call": true,
        "web_search": false,
        "reasoning": false
      }
    },
    {
      "name": "Tarek07: Llama 3.3 70B Progenitor v3.3",
      "description": "This model was merged using the Linear DELLA merge method using meta-llama/Llama-3.3-70B-Instruct as a base.",
      "id": "llama-3.3-70b-progenitor-v3.3",
      "object": "model",
      "created": 1776283310,
      "owned_by": "huggingface",
      "tokens": 65000,
      "pricing": {
        "type": "per_million_tokens",
        "input": 4,
        "output": 6
      },
      "endpoints": [
        "/v1/chat/completions",
        "/v1/responses",
        "/v1/messages"
      ],
      "premium_model": false,
      "metadata": {
        "vision": false,
        "function_call": true,
        "web_search": false,
        "reasoning": false
      }
    },
    {
      "name": "Tarek07: Llama 3.3 70B Progenitor v4",
      "description": "This model was merged using the Linear DELLA merge method using TareksLab/Experimental-Base-V1-bf16 as a base.",
      "id": "llama-3.3-70b-progenitor-v4",
      "object": "model",
      "created": 1776283310,
      "owned_by": "huggingface",
      "tokens": 65000,
      "pricing": {
        "type": "per_million_tokens",
        "input": 4,
        "output": 6
      },
      "endpoints": [
        "/v1/chat/completions",
        "/v1/responses",
        "/v1/messages"
      ],
      "premium_model": false,
      "metadata": {
        "vision": false,
        "function_call": true,
        "web_search": false,
        "reasoning": false
      }
    },
    {
      "name": "Tarek07: Llama 3.3 70B Progenitor v5",
      "description": "This model was merged using the Linear DELLA merge method using nbeerbower/Llama-3.1-Nemotron-lorablated-70B as a base.",
      "id": "llama-3.3-70b-progenitor-v5",
      "object": "model",
      "created": 1776283310,
      "owned_by": "huggingface",
      "tokens": 65000,
      "pricing": {
        "type": "per_million_tokens",
        "input": 4,
        "output": 6
      },
      "endpoints": [
        "/v1/chat/completions",
        "/v1/responses",
        "/v1/messages"
      ],
      "premium_model": false,
      "metadata": {
        "vision": false,
        "function_call": true,
        "web_search": false,
        "reasoning": false
      }
    },
    {
      "name": "Tarek07: Llama 3.3 70B Progenitor X",
      "description": "This model was merged using the Linear DELLA merge method using TareksLab/UL3.3-Nemo-X80-BASE-70B as a base.",
      "id": "llama-3.3-70b-progenitor-x",
      "object": "model",
      "created": 1776283310,
      "owned_by": "huggingface",
      "tokens": 65000,
      "pricing": {
        "type": "per_million_tokens",
        "input": 4,
        "output": 6
      },
      "endpoints": [
        "/v1/chat/completions",
        "/v1/responses",
        "/v1/messages"
      ],
      "premium_model": false,
      "metadata": {
        "vision": false,
        "function_call": true,
        "web_search": false,
        "reasoning": false
      }
    },
    {
      "name": "Tarek07: Llama 3.3 70B Progenitor X R1 Experimental",
      "description": "This model was merged using the Linear DELLA merge method using Tarek07/Maleficent-R1-LLaMa-70B as a base.",
      "id": "llama-3.3-70b-progenitor-x-r1-experimental",
      "object": "model",
      "created": 1776283310,
      "owned_by": "huggingface",
      "tokens": 65000,
      "pricing": {
        "type": "per_million_tokens",
        "input": 4,
        "output": 6
      },
      "endpoints": [
        "/v1/chat/completions",
        "/v1/responses",
        "/v1/messages"
      ],
      "premium_model": false,
      "metadata": {
        "vision": false,
        "function_call": true,
        "web_search": false,
        "reasoning": true
      }
    },
    {
      "name": "KaraKaraWitch: Llama 3.3 70B ProgressPushDoll",
      "description": "This model was merged using the Model Stock merge method using Llama-3.3-70B-Instruct as a base.",
      "id": "llama-3.3-70b-progresspushdoll",
      "object": "model",
      "created": 1776283310,
      "owned_by": "huggingface",
      "tokens": 65000,
      "pricing": {
        "type": "per_million_tokens",
        "input": 4,
        "output": 6
      },
      "endpoints": [
        "/v1/chat/completions",
        "/v1/responses",
        "/v1/messages"
      ],
      "premium_model": false,
      "metadata": {
        "vision": false,
        "function_call": true,
        "web_search": false,
        "reasoning": false
      }
    },
    {
      "name": "Rombodawg: Llama 3.3 70B Rombos LLM",
      "description": "A LoRA + Mergekit fine-tuned model based on Llama 3.3 70B.",
      "id": "llama-3.3-70b-rombos-llm",
      "object": "model",
      "created": 1776283310,
      "owned_by": "huggingface",
      "tokens": 65000,
      "pricing": {
        "type": "per_million_tokens",
        "input": 4,
        "output": 6
      },
      "endpoints": [
        "/v1/chat/completions",
        "/v1/responses",
        "/v1/messages"
      ],
      "premium_model": false,
      "metadata": {
        "vision": false,
        "function_call": true,
        "web_search": false,
        "reasoning": false
      }
    },
    {
      "name": "GoToCompany: Llama 3.3 70B Sahabat AI v2 IT",
      "description": "For tokenisation, the model employs the default tokenizer used in Llama 3.1 70B Instruct. The model has a context length of 128k.",
      "id": "llama-3.3-70b-sahabat-ai-v2-it",
      "object": "model",
      "created": 1776283310,
      "owned_by": "huggingface",
      "tokens": 65000,
      "pricing": {
        "type": "per_million_tokens",
        "input": 4,
        "output": 6
      },
      "endpoints": [
        "/v1/chat/completions",
        "/v1/responses",
        "/v1/messages"
      ],
      "premium_model": false,
      "metadata": {
        "vision": false,
        "function_call": true,
        "web_search": false,
        "reasoning": false
      }
    },
    {
      "name": "Tarek07: Llama 3.3 70B Scripturient v1.3",
      "description": "Scripturient is a culmination of ongoing experiments with merging specialized curated models. Designed to keep creativity high, without sacrificing stability.",
      "id": "llama-3.3-70b-scripturient-v1.3",
      "object": "model",
      "created": 1776283310,
      "owned_by": "huggingface",
      "tokens": 65000,
      "pricing": {
        "type": "per_million_tokens",
        "input": 4,
        "output": 6
      },
      "endpoints": [
        "/v1/chat/completions",
        "/v1/responses",
        "/v1/messages"
      ],
      "premium_model": false,
      "metadata": {
        "vision": false,
        "function_call": true,
        "web_search": false,
        "reasoning": false
      }
    },
    {
      "name": "Sophosympatheia: Llama 3.3 70B Sophos Eva Euryale v1.0",
      "description": "This 70B parameter model is a merge of EVA-UNIT-01/EVA-LLaMA-3.33-70B-v0.0 and Sao10K/L3.3-70B-Euryale-v2.3. See the merge recipe below for details.\nThis model is uncensored. You are responsible for whatever you do with it.\nThis model was designed for roleplaying and storytelling and I think it does well at both. It may also perform well at other tasks but I have not tested its performance in other areas.",
      "id": "llama-3.3-70b-sophos-eva-euryale-v1.0",
      "object": "model",
      "created": 1776283310,
      "owned_by": "huggingface",
      "tokens": 65000,
      "pricing": {
        "type": "per_million_tokens",
        "input": 4,
        "output": 6
      },
      "endpoints": [
        "/v1/chat/completions",
        "/v1/responses",
        "/v1/messages"
      ],
      "premium_model": false,
      "metadata": {
        "vision": false,
        "function_call": true,
        "web_search": false,
        "reasoning": false
      }
    },
    {
      "name": "Sophosympatheia: Llama 3.3 70B StrawberryLemonade v1.0",
      "description": "This 70B parameter model is a merge of zerofata/L3.3-GeneticLemonade-Final-v2-70B and zerofata/L3.3-GeneticLemonade-Unleashed-v3-70B, which are two excellent models for roleplaying. In my opinion, this merge achieves slightly better stability and expressiveness, combining the strengths of the two models with the solid foundation provided by deepcogito/cogito-v1-preview-llama-70B.\nThis model is uncensored. You are responsible for whatever you do with it.\nThis model was designed for roleplaying and storytelling and I think it does well at both. It may also perform well at other tasks but I have not tested its performance in other areas.",
      "id": "llama-3.3-70b-strawberrylemonade-v1.0",
      "object": "model",
      "created": 1776283310,
      "owned_by": "huggingface",
      "tokens": 65000,
      "pricing": {
        "type": "per_million_tokens",
        "input": 4,
        "output": 6
      },
      "endpoints": [
        "/v1/chat/completions",
        "/v1/responses",
        "/v1/messages"
      ],
      "premium_model": false,
      "metadata": {
        "vision": false,
        "function_call": true,
        "web_search": false,
        "reasoning": false
      }
    },
    {
      "name": "Sophosympatheia: Llama 3.3 70B StrawberryLemonade v1.2",
      "description": "This 70B parameter model is a merge of zerofata/L3.3-GeneticLemonade-Final-v2-70B and zerofata/L3.3-GeneticLemonade-Unleashed-v3-70B, which are two excellent models for roleplaying, on top of two different base models that were then combined into this model. In my opinion, this merge improves upon my previous release (v1.0) with enhanced creativity and expressiveness.\nThis model is uncensored. You are responsible for whatever you do with it.\nThis model was designed for roleplaying and storytelling and I think it does well at both. It may also perform well at other tasks but I have not tested its performance in other areas.",
      "id": "llama-3.3-70b-strawberrylemonade-v1.2",
      "object": "model",
      "created": 1776283310,
      "owned_by": "huggingface",
      "tokens": 65000,
      "pricing": {
        "type": "per_million_tokens",
        "input": 4,
        "output": 6
      },
      "endpoints": [
        "/v1/chat/completions",
        "/v1/responses",
        "/v1/messages"
      ],
      "premium_model": false,
      "metadata": {
        "vision": false,
        "function_call": true,
        "web_search": false,
        "reasoning": false
      }
    },
    {
      "name": "TareksGraveyard: Llama 3.3 70B Thalassic Delta",
      "description": "This model was merged using the SCE merge method using SicariusSicariiStuff/Negative_LLAMA_70B as a base.",
      "id": "llama-3.3-70b-thalassic-delta",
      "object": "model",
      "created": 1776283310,
      "owned_by": "huggingface",
      "tokens": 65000,
      "pricing": {
        "type": "per_million_tokens",
        "input": 4,
        "output": 6
      },
      "endpoints": [
        "/v1/chat/completions",
        "/v1/responses",
        "/v1/messages"
      ],
      "premium_model": false,
      "metadata": {
        "vision": false,
        "function_call": true,
        "web_search": false,
        "reasoning": false
      }
    },
    {
      "name": "ReadyArt: Llama 3.3 70B The Omega Directive Unslop v2.0",
      "description": "This evolution of The-Omega-Directive delivers unprecedented coherence without the LLM slop.",
      "id": "llama-3.3-70b-the-omega-directive-unslop-v2.0",
      "object": "model",
      "created": 1776283310,
      "owned_by": "huggingface",
      "tokens": 65000,
      "pricing": {
        "type": "per_million_tokens",
        "input": 4,
        "output": 6
      },
      "endpoints": [
        "/v1/chat/completions",
        "/v1/responses",
        "/v1/messages"
      ],
      "premium_model": false,
      "metadata": {
        "vision": false,
        "function_call": true,
        "web_search": false,
        "reasoning": false
      }
    },
    {
      "name": "ReadyArt: Llama 3.3 70B The Omega Directive Unslop v2.1",
      "description": "This evolution of The-Omega-Directive delivers unprecedented coherence without the LLM slop.",
      "id": "llama-3.3-70b-the-omega-directive-unslop-v2.1",
      "object": "model",
      "created": 1776283310,
      "owned_by": "huggingface",
      "tokens": 65000,
      "pricing": {
        "type": "per_million_tokens",
        "input": 4,
        "output": 6
      },
      "endpoints": [
        "/v1/chat/completions",
        "/v1/responses",
        "/v1/messages"
      ],
      "premium_model": false,
      "metadata": {
        "vision": false,
        "function_call": true,
        "web_search": false,
        "reasoning": false
      }
    },
    {
      "name": "TareksGraveyard: Llama 3.3 70B Vagabond",
      "description": "This model was merged using the Linear DELLA merge method using TareksLab/UL3.3-Nemo-X80-BASE-70B as a base.",
      "id": "llama-3.3-70b-vagabond",
      "object": "model",
      "created": 1776283310,
      "owned_by": "huggingface",
      "tokens": 65000,
      "pricing": {
        "type": "per_million_tokens",
        "input": 4,
        "output": 6
      },
      "endpoints": [
        "/v1/chat/completions",
        "/v1/responses",
        "/v1/messages"
      ],
      "premium_model": false,
      "metadata": {
        "vision": false,
        "function_call": true,
        "web_search": false,
        "reasoning": false
      }
    },
    {
      "name": "Mawdistical: Llama 3.3 70B Wanton Wolf",
      "description": "A furry finetune model based on L3.3-Cu-Mai-R1-70b, chosen for its exceptional features.",
      "id": "llama-3.3-70b-wanton-wolf",
      "object": "model",
      "created": 1776283310,
      "owned_by": "huggingface",
      "tokens": 65000,
      "pricing": {
        "type": "per_million_tokens",
        "input": 4,
        "output": 6
      },
      "endpoints": [
        "/v1/chat/completions",
        "/v1/responses",
        "/v1/messages"
      ],
      "premium_model": false,
      "metadata": {
        "vision": false,
        "function_call": true,
        "web_search": false,
        "reasoning": false
      }
    },
    {
      "name": "KaraKaraWitch: Llama 3.3 70B Workout",
      "description": "This model was merged using the TIES merge method using SicariusSicariiStuff/Negative_LLAMA_70B as a base.",
      "id": "llama-3.3-70b-workout",
      "object": "model",
      "created": 1776283310,
      "owned_by": "huggingface",
      "tokens": 65000,
      "pricing": {
        "type": "per_million_tokens",
        "input": 4,
        "output": 6
      },
      "endpoints": [
        "/v1/chat/completions",
        "/v1/responses",
        "/v1/messages"
      ],
      "premium_model": false,
      "metadata": {
        "vision": false,
        "function_call": true,
        "web_search": false,
        "reasoning": false
      }
    },
    {
      "name": "Sao10K: L3.1 70B Hanami x1",
      "description": "This is Sao10K's experiment over Euryale v2.2.",
      "id": "l3.1-70b-hanami-x1",
      "object": "model",
      "created": 1776283310,
      "owned_by": "huggingface",
      "tokens": 65000,
      "pricing": {
        "type": "per_million_tokens",
        "input": 4,
        "output": 6
      },
      "endpoints": [
        "/v1/chat/completions",
        "/v1/responses",
        "/v1/messages"
      ],
      "premium_model": false,
      "metadata": {
        "vision": false,
        "function_call": true,
        "web_search": false,
        "reasoning": false
      }
    },
    {
      "name": "Anthracite: Magnum v4 72B",
      "description": "Magnum V4 72B is a large language model fine-tuned on top of Qwen2.5, specifically designed to replicate the prose quality of the Claude 3 models, particularly Sonnet and Opus. It excels in generating coherent and contextually rich text, making it suitable for various applications requiring high-quality language generation.",
      "id": "magnum-v4-72b",
      "object": "model",
      "created": 1776283310,
      "owned_by": "huggingface",
      "tokens": 32000,
      "pricing": {
        "type": "per_million_tokens",
        "input": 4,
        "output": 6
      },
      "endpoints": [
        "/v1/chat/completions",
        "/v1/responses",
        "/v1/messages"
      ],
      "premium_model": false,
      "metadata": {
        "vision": false,
        "function_call": true,
        "web_search": false,
        "reasoning": false
      }
    },
    {
      "name": "Anthracite: Magnum v2 72B",
      "description": "Magnum v2 72B is the seventh iteration in Anthracite's series of models aimed at replicating Claude 3's prose quality. Built on Qwen2-72B-Instruct, it's a large language model fine-tuned using specialized datasets to enhance writing quality and instruction-following capabilities.",
      "id": "magnum-v2-72b",
      "object": "model",
      "created": 1776283310,
      "owned_by": "huggingface",
      "tokens": 16000,
      "pricing": {
        "type": "per_million_tokens",
        "input": 4,
        "output": 6
      },
      "endpoints": [
        "/v1/chat/completions",
        "/v1/responses",
        "/v1/messages"
      ],
      "premium_model": false,
      "metadata": {
        "vision": false,
        "function_call": true,
        "web_search": false,
        "reasoning": false
      }
    },
    {
      "name": "NeverSleep: Noromaid 20B",
      "description": "A collab between IkariDev and Undi. This merge is suitable for RP, ERP, and general knowledge.",
      "id": "noromaid-20b",
      "object": "model",
      "created": 1776283310,
      "owned_by": "huggingface",
      "tokens": 8000,
      "pricing": {
        "type": "per_million_tokens",
        "input": 1,
        "output": 2
      },
      "endpoints": [
        "/v1/chat/completions",
        "/v1/responses",
        "/v1/messages"
      ],
      "premium_model": false,
      "metadata": {
        "vision": false,
        "function_call": true,
        "web_search": false,
        "reasoning": false
      }
    },
    {
      "name": "IlyaGusev: Saiga Nemo 12B",
      "description": "Saiga Nemo 12B is a Russian language model based on an abliterated version of Mistral Nemo. It's specifically designed for Russian language understanding and generation, with sophisticated dialogue capabilities and instruction following abilities. The model has undergone both supervised fine-tuning (SFT) and preference optimization (SimPO) training phases to enhance its performance.",
      "id": "saiga-nemo-12b",
      "object": "model",
      "created": 1776283310,
      "owned_by": "huggingface",
      "tokens": 16000,
      "pricing": {
        "type": "per_million_tokens",
        "input": 0.8,
        "output": 1.2
      },
      "endpoints": [
        "/v1/chat/completions",
        "/v1/responses",
        "/v1/messages"
      ],
      "premium_model": false,
      "metadata": {
        "vision": false,
        "function_call": true,
        "web_search": false,
        "reasoning": false
      }
    },
    {
      "name": "Sao10K: L3 8B Stheno v3.2",
      "description": "L3-8B-Stheno-v3.2 is a sophisticated language model developed by Sao10K, representing the sixth iteration of the Stheno series. Trained on an H100 SXM GPU over approximately 24 hours, this model combines creative writing capabilities with assistant-style functionality. It's built upon the LLaMA architecture and has been fine-tuned using four carefully curated datasets, including writing prompts, instruct data, and filtered conversational logs.",
      "id": "l3-8b-stheno-v3.2",
      "object": "model",
      "created": 1776283310,
      "owned_by": "huggingface",
      "tokens": 8000,
      "pricing": {
        "type": "per_million_tokens",
        "input": 0.8,
        "output": 1.2
      },
      "endpoints": [
        "/v1/chat/completions",
        "/v1/responses",
        "/v1/messages"
      ],
      "premium_model": false,
      "metadata": {
        "vision": false,
        "function_call": true,
        "web_search": false,
        "reasoning": false
      }
    },
    {
      "name": "Orenguteng: Llama 3.1 8B Lexi Uncensored v2",
      "description": "Llama-3.1-8B-Lexi-Uncensored-V2 is an advanced language model based on Meta's Llama-3.1-8B-Instruct architecture. This uncensored version offers enhanced compliance and intelligence, designed for flexible text generation tasks. The model demonstrates impressive performance with a 77.92% accuracy on IFEval (0-Shot) testing.",
      "id": "llama-3.1-8b-lexi-uncensored-v2",
      "object": "model",
      "created": 1776283310,
      "owned_by": "huggingface",
      "tokens": 16000,
      "pricing": {
        "type": "per_million_tokens",
        "input": 0.8,
        "output": 1.2
      },
      "endpoints": [
        "/v1/chat/completions",
        "/v1/responses",
        "/v1/messages"
      ],
      "premium_model": false,
      "metadata": {
        "vision": false,
        "function_call": true,
        "web_search": false,
        "reasoning": false
      }
    },
    {
      "name": "Steelskull: L3.3 Electra R1 70B",
      "description": "L3.3-Electra-R1-70 is the newest release of the Unnamed series. Built on a DeepSeek R1 Distill base, Electra-R1 integrates various models together to provide an intelligent and coherent model capable of providing deep character insights. Through proper prompting, the model demonstrates advanced reasoning capabilities and unprompted exploration of character inner thoughts and motivations.",
      "id": "l3.3-electra-r1-70b",
      "object": "model",
      "created": 1776283310,
      "owned_by": "huggingface",
      "tokens": 65000,
      "pricing": {
        "type": "per_million_tokens",
        "input": 0.7,
        "output": 1
      },
      "endpoints": [
        "/v1/chat/completions",
        "/v1/responses",
        "/v1/messages"
      ],
      "premium_model": false,
      "metadata": {
        "vision": false,
        "function_call": true,
        "web_search": false,
        "reasoning": true
      }
    },
    {
      "name": "Steelskull: L3.3 Damascus R1 70B",
      "description": "Damascus-R1 builds upon some elements of the Nevoria foundation but represents a significant step forward with a completely custom-made DeepSeek R1 Distill base: Hydroblated-R1-V3. Constructed using the new SCE (Select, Calculate, and Erase) merge method, Damascus-R1 prioritizes stability, intelligence, and enhanced awareness.",
      "id": "l3.3-damascus-r1-70b",
      "object": "model",
      "created": 1776283310,
      "owned_by": "huggingface",
      "tokens": 65000,
      "pricing": {
        "type": "per_million_tokens",
        "input": 4,
        "output": 6
      },
      "endpoints": [
        "/v1/chat/completions",
        "/v1/responses",
        "/v1/messages"
      ],
      "premium_model": false,
      "metadata": {
        "vision": false,
        "function_call": true,
        "web_search": false,
        "reasoning": true
      }
    },
    {
      "name": "Steelskull: L3.3 San Mai R1 70B",
      "description": "L3.3-San-Mai-R1-70b represents the foundational release in a three-part model series, followed by L3.3-Cu-Mai-R1-70b (Version A) and L3.3-Mokume-Gane-R1-70b (Version C). The name \"San-Mai\" draws inspiration from the Japanese bladesmithing technique of creating three-layer laminated composite metals, known for combining a hard cutting edge with a tougher spine - a metaphor for this model's balanced approach to AI capabilities.",
      "id": "l3.3-san-mai-r1-70b",
      "object": "model",
      "created": 1776283310,
      "owned_by": "huggingface",
      "tokens": 65000,
      "pricing": {
        "type": "per_million_tokens",
        "input": 4,
        "output": 6
      },
      "endpoints": [
        "/v1/chat/completions",
        "/v1/responses",
        "/v1/messages"
      ],
      "premium_model": false,
      "metadata": {
        "vision": false,
        "function_call": true,
        "web_search": false,
        "reasoning": true
      }
    },
    {
      "name": "Steelskull: L3.3 Cu Mai R1 70B",
      "description": "L3.3-Cu-Mai-R1-70b is an advanced language model that represents a significant evolution in the three-part model series alongside San-Mai and Mokume-Gane. Named after the copper-steel Damascus metalworking technique, this model combines sophisticated components through the SCE merge method to deliver enhanced creative expression while maintaining reliable performance.",
      "id": "l3.3-cu-mai-r1-70b",
      "object": "model",
      "created": 1776283310,
      "owned_by": "huggingface",
      "tokens": 65000,
      "pricing": {
        "type": "per_million_tokens",
        "input": 4,
        "output": 6
      },
      "endpoints": [
        "/v1/chat/completions",
        "/v1/responses",
        "/v1/messages"
      ],
      "premium_model": false,
      "metadata": {
        "vision": false,
        "function_call": true,
        "web_search": false,
        "reasoning": true
      }
    },
    {
      "name": "Steelskull: L3.3 Mokume Gane R1 70B v1.1",
      "description": "L3.3-Mokume-Gane-R1-70b-v1.1 is an advanced language model inspired by the Japanese metalworking technique Mokume-gane. Built on the DS-Hydroblated-R1 foundation, it combines multiple specialized components to create a unique model focused on creative expression while maintaining technical precision. The model is part of a three-model experimental series, representing the creative-focused variant.",
      "id": "l3.3-mokume-gane-r1-70b-v1.1",
      "object": "model",
      "created": 1776283310,
      "owned_by": "huggingface",
      "tokens": 65000,
      "pricing": {
        "type": "per_million_tokens",
        "input": 4,
        "output": 6
      },
      "endpoints": [
        "/v1/chat/completions",
        "/v1/responses",
        "/v1/messages"
      ],
      "premium_model": false,
      "metadata": {
        "vision": false,
        "function_call": true,
        "web_search": false,
        "reasoning": true
      }
    },
    {
      "name": "Steelskull: L3.3 Nevoria R1 70B",
      "description": "L3.3-Nevoria-R1-70b is an advanced language model that builds upon the original Nevoria foundation, incorporating DeepSeek-R1 reasoning architecture to enhance dialogue interaction and scene comprehension. This model represents a significant architectural advancement, combining multiple specialized components to create a versatile and capable AI system.",
      "id": "l3.3-nevoria-r1-70b",
      "object": "model",
      "created": 1776283310,
      "owned_by": "huggingface",
      "tokens": 65000,
      "pricing": {
        "type": "per_million_tokens",
        "input": 4,
        "output": 6
      },
      "endpoints": [
        "/v1/chat/completions",
        "/v1/responses",
        "/v1/messages"
      ],
      "premium_model": false,
      "metadata": {
        "vision": false,
        "function_call": true,
        "web_search": false,
        "reasoning": true
      }
    },
    {
      "name": "Steelskull: L3.3 Shakudo 70B",
      "description": "L3.3-Shakudo-70b is the result of a multi-stage merging process by Steelskull, designed to create a powerful and creative roleplaying model with a unique flavor. The creation process involved several advanced merging techniques, including weight twisting, to achieve its distinct characteristics.",
      "id": "l3.3-shakudo-70b",
      "object": "model",
      "created": 1776283310,
      "owned_by": "huggingface",
      "tokens": 65000,
      "pricing": {
        "type": "per_million_tokens",
        "input": 4,
        "output": 6
      },
      "endpoints": [
        "/v1/chat/completions",
        "/v1/responses",
        "/v1/messages"
      ],
      "premium_model": false,
      "metadata": {
        "vision": false,
        "function_call": true,
        "web_search": false,
        "reasoning": false
      }
    },
    {
      "name": "CrucibleLab: L3.3 Dark Ages 70B v0.1",
      "description": "L3.3-Dark-Ages-70b is the result of a multi-stage merging process conducted by Crucible Lab, a collaboration between Steelskull and Tarek. The goal was to create a powerful and creative roleplaying model with a unique, dark fantasy flavor.",
      "id": "l3.3-dark-ages-70b-v0.1",
      "object": "model",
      "created": 1776283310,
      "owned_by": "huggingface",
      "tokens": 65000,
      "pricing": {
        "type": "per_million_tokens",
        "input": 4,
        "output": 6
      },
      "endpoints": [
        "/v1/chat/completions",
        "/v1/responses",
        "/v1/messages"
      ],
      "premium_model": false,
      "metadata": {
        "vision": false,
        "function_call": true,
        "web_search": false,
        "reasoning": false
      }
    },
    {
      "name": "Sao10K: L3.3 70B Euryale v2.3",
      "description": "L3.3-70B-Euryale-v2.3 represents the latest iteration in the Euryale series, built on Meta's Llama 3.3 Instruct model. Created by Sao10K, this model provides language generation capabilities through direct training rather than LoRA extraction methods. It is designed for immersive storytelling and creative roleplaying, with a focus on character-driven narratives.",
      "id": "l3.3-70b-euryale-v2.3",
      "object": "model",
      "created": 1776283310,
      "owned_by": "huggingface",
      "tokens": 16000,
      "pricing": {
        "type": "per_million_tokens",
        "input": 0.7,
        "output": 0.8
      },
      "endpoints": [
        "/v1/chat/completions",
        "/v1/responses",
        "/v1/messages"
      ],
      "premium_model": false,
      "metadata": {
        "vision": false,
        "function_call": true,
        "web_search": false,
        "reasoning": false
      }
    },
    {
      "name": "Steelskull: L3.3 MS Evayale 70B",
      "description": "Meet the L3.3-MS-Evayale-70B model, a unique blend of storytelling and descriptive prowess. This model combines the strengths of EVA-LLaMA and EURYALE-v2.3 to create a robust storytelling experience.",
      "id": "l3.3-ms-evayale-70b",
      "object": "model",
      "created": 1776283310,
      "owned_by": "huggingface",
      "tokens": 65000,
      "pricing": {
        "type": "per_million_tokens",
        "input": 4,
        "output": 6
      },
      "endpoints": [
        "/v1/chat/completions",
        "/v1/responses",
        "/v1/messages"
      ],
      "premium_model": false,
      "metadata": {
        "vision": false,
        "function_call": true,
        "web_search": false,
        "reasoning": false
      }
    },
    {
      "name": "Steelskull: L3.3 MS Nevoria 70B",
      "description": "L3.3-MS-Nevoria-70b is a sophisticated large language model that combines multiple specialized components to create a powerful storytelling and creative writing system. Built on the LLaMA 3.3 architecture, it integrates EVA-LLAMA-0.1's storytelling capabilities, EURYALE-v2.3's detailed scene descriptions, and Anubis-v1's enhanced prose details, while utilizing Negative_LLAMA to reduce positive bias.",
      "id": "l3.3-ms-nevoria-70b",
      "object": "model",
      "created": 1776283310,
      "owned_by": "huggingface",
      "tokens": 65000,
      "pricing": {
        "type": "per_million_tokens",
        "input": 4,
        "output": 6
      },
      "endpoints": [
        "/v1/chat/completions",
        "/v1/responses",
        "/v1/messages"
      ],
      "premium_model": false,
      "metadata": {
        "vision": false,
        "function_call": true,
        "web_search": false,
        "reasoning": false
      }
    },
    {
      "name": "Sao10K: L3.3 Cirrus x1 70B",
      "description": "L3.3-Cirrus-x1-70b is a large language model based on the Llama 3.3 architecture, designed for creative writing and roleplaying applications. It is the first model in the Cirrus series, which aims to provide a unique storytelling experience. The model has been fine-tuned using a diverse dataset to enhance its performance in generating coherent and engaging narratives.",
      "id": "l3.3-cirrus-x1-70b",
      "object": "model",
      "created": 1776283310,
      "owned_by": "huggingface",
      "tokens": 65000,
      "pricing": {
        "type": "per_million_tokens",
        "input": 4,
        "output": 6
      },
      "endpoints": [
        "/v1/chat/completions",
        "/v1/responses",
        "/v1/messages"
      ],
      "premium_model": false,
      "metadata": {
        "vision": false,
        "function_call": true,
        "web_search": false,
        "reasoning": false
      }
    },
    {
      "name": "Sao10K: L3.1 70B Euryale v2.2",
      "description": "L3.1-70B-Euryale-v2.2 model is a 70-billion parameter language model developed by Sao10K and trained on a combination of human-generated and Claude data. This model is an evolution of the previous Euryale and Stheno models, incorporating a multi-stage fine-tuning process to enhance its conversational, creative writing, and roleplaying capabilities.",
      "id": "l31-70b-euryale-v2.2",
      "object": "model",
      "created": 1776283310,
      "owned_by": "huggingface",
      "tokens": 8000,
      "pricing": {
        "type": "per_million_tokens",
        "input": 0.7,
        "output": 0.8
      },
      "endpoints": [
        "/v1/chat/completions",
        "/v1/responses",
        "/v1/messages"
      ],
      "premium_model": false,
      "metadata": {
        "vision": false,
        "function_call": true,
        "web_search": false,
        "reasoning": false
      }
    },
    {
      "name": "Sao10K: L3 70B Euryale v2.1",
      "description": "L3-70B-Euryale-v2.1 is a large language model created by Sao10K, a prominent AI model developer and maintainer. This 70 billion parameter model is designed as a more capable sibling to Sao10K's previous L3-8B-Stheno-v3.1 and L3-8B-Stheno-v3.2 models, with enhanced capabilities in areas like prompt adherence, anatomy/spatial awareness, and adapting to unique formatting. As described on the Sao10K's maintainer profile, this model was trained over 8 NVIDIA H100 SXM GPUs and aims to be a \"big brained version of Stheno.\"",
      "id": "l3-70b-euryale-v2.1",
      "object": "model",
      "created": 1776283310,
      "owned_by": "huggingface",
      "tokens": 8000,
      "pricing": {
        "type": "per_million_tokens",
        "input": 0.7,
        "output": 0.8
      },
      "endpoints": [
        "/v1/chat/completions",
        "/v1/responses",
        "/v1/messages"
      ],
      "premium_model": false,
      "metadata": {
        "vision": false,
        "function_call": true,
        "web_search": false,
        "reasoning": false
      }
    },
    {
      "name": "Alpindale: Goliath 120B",
      "description": "Goliath 120B is an auto-regressive causal language model created by combining two fine-tuned Llama-2 70B models into one. This large language model (LLM) represents an advancement in the Llama 2 line of models, offering increased capability and scale. Similar models in this space include the Mixtral-8x7B and various CodeLlama models, which focus on coding and conversational abilities.",
      "id": "goliath-120b",
      "object": "model",
      "created": 1776283310,
      "owned_by": "huggingface",
      "tokens": 6000,
      "pricing": {
        "type": "per_million_tokens",
        "input": 8.75,
        "output": 12.5
      },
      "endpoints": [
        "/v1/chat/completions",
        "/v1/responses",
        "/v1/messages"
      ],
      "premium_model": false,
      "metadata": {
        "vision": false,
        "function_call": true,
        "web_search": false,
        "reasoning": false
      }
    },
    {
      "name": "Mancer: Weaver (alpha)",
      "description": "An attempt to recreate Claude-style verbosity, but don't expect the same level of coherence or memory. Meant for use in roleplay/narrative situations.",
      "id": "weaver",
      "object": "model",
      "created": 1776283310,
      "owned_by": "mancer",
      "tokens": 8000,
      "pricing": {
        "type": "per_million_tokens",
        "input": 1.5,
        "output": 1.5
      },
      "endpoints": [
        "/v1/chat/completions",
        "/v1/responses",
        "/v1/messages"
      ],
      "premium_model": false,
      "metadata": {
        "vision": false,
        "function_call": true,
        "web_search": false,
        "reasoning": false
      }
    },
    {
      "name": "Stability: Stable Diffusion XL",
      "description": "A text-to-image generative AI model that creates beautiful images.",
      "id": "sdxl",
      "object": "model",
      "created": 1776283310,
      "owned_by": "stabilityai",
      "tokens": 1000,
      "pricing": {
        "type": "per_image",
        "coefficient": 0.007
      },
      "endpoints": [
        "/v1/images/generations"
      ],
      "premium_model": false,
      "sizes": [
        "1024x1024",
        "384x1024",
        "1024x384",
        "448x1024",
        "1024x448",
        "512x1024",
        "1024x512",
        "640x1024",
        "1024x640",
        "768x1024",
        "1024x768",
        "704x1024",
        "1024x704",
        "576x1024",
        "1024x576",
        "832x1024",
        "1024x832",
        "896x1024",
        "1024x896"
      ]
    },
    {
      "name": "Stability: Stable Diffusion XL Turbo",
      "description": "SDXL-Turbo is a fast generative text-to-image model that can synthesize photorealistic images from a text prompt in a single network evaluation.",
      "id": "sdxl-turbo",
      "object": "model",
      "created": 1776283310,
      "owned_by": "stabilityai",
      "tokens": 1000,
      "pricing": {
        "type": "per_image",
        "coefficient": 0.05
      },
      "endpoints": [
        "/v1/images/generations"
      ],
      "premium_model": false,
      "sizes": [
        "1024x1024",
        "1024x1536",
        "1536x1024",
        "1024x576",
        "576x1024"
      ]
    },
    {
      "name": "Bytedance: Stable Diffusion XL Lightning",
      "description": "SDXL-Lightning by ByteDance: a fast text-to-image model that makes high-quality images in 4 steps.",
      "id": "sdxl-lightning",
      "object": "model",
      "created": 1776283310,
      "owned_by": "bytedance",
      "tokens": 1000,
      "pricing": {
        "type": "per_image",
        "coefficient": 0.002
      },
      "endpoints": [
        "/v1/images/generations"
      ],
      "premium_model": false,
      "sizes": [
        "1024x1024",
        "384x1024",
        "1024x384",
        "448x1024",
        "1024x448",
        "512x1024",
        "1024x512",
        "640x1024",
        "1024x640",
        "768x1024",
        "1024x768",
        "704x1024",
        "1024x704",
        "576x1024",
        "1024x576",
        "832x1024",
        "1024x832",
        "896x1024",
        "1024x896"
      ]
    },
    {
      "name": "Stability: Stable Diffusion 3",
      "description": "A text-to-image model with greatly improved performance in image quality, typography, complex prompt understanding, and resource-efficiency.",
      "id": "stable-diffusion-3",
      "object": "model",
      "created": 1776283310,
      "owned_by": "stabilityai",
      "tokens": 1000,
      "pricing": {
        "type": "per_image",
        "coefficient": 0.3
      },
      "endpoints": [
        "/v1/images/generations"
      ],
      "premium_model": false,
      "sizes": [
        "1024x1024",
        "1024x1536",
        "1536x1024",
        "1024x576",
        "576x1024"
      ]
    },
    {
      "name": "Stability: Stable Diffusion 3 Medium",
      "description": "Stable Diffusion 3 Medium is a 2 billion parameter text-to-image model developed by Stability AI. It excels at photorealism, typography, and prompt following.",
      "id": "stable-diffusion-3-2b",
      "object": "model",
      "created": 1776283310,
      "owned_by": "stabilityai",
      "tokens": 1000,
      "pricing": {
        "type": "per_image",
        "coefficient": 0.1
      },
      "endpoints": [
        "/v1/images/generations"
      ],
      "premium_model": false,
      "sizes": [
        "1024x1024",
        "1024x1536",
        "1536x1024",
        "1024x576",
        "576x1024"
      ]
    },
    {
      "name": "Stability: Stable Diffusion 3.5 Large",
      "description": "A text-to-image model that generates high-resolution images with fine details. It supports various artistic styles and produces diverse outputs from the same prompt, thanks to Query-Key Normalization.",
      "id": "stable-diffusion-3.5-large",
      "object": "model",
      "created": 1776283310,
      "owned_by": "stabilityai",
      "tokens": 1000,
      "pricing": {
        "type": "per_image",
        "coefficient": 0.3
      },
      "endpoints": [
        "/v1/images/generations"
      ],
      "premium_model": false,
      "sizes": [
        "1024x1024",
        "1024x1536",
        "1536x1024",
        "1024x576",
        "576x1024"
      ]
    },
    {
      "name": "Stability: Stable Diffusion 3.5 Turbo",
      "description": "A text-to-image model that generates high-resolution images with fine details. It supports various artistic styles and produces diverse outputs from the same prompt, with a focus on fewer inference steps.",
      "id": "stable-diffusion-3.5-turbo",
      "object": "model",
      "created": 1776283310,
      "owned_by": "stabilityai",
      "tokens": 1000,
      "pricing": {
        "type": "per_image",
        "coefficient": 0.12
      },
      "endpoints": [
        "/v1/images/generations"
      ],
      "premium_model": false,
      "sizes": [
        "1024x1024",
        "1024x1536",
        "1536x1024",
        "1024x576",
        "576x1024"
      ]
    },
    {
      "name": "Stability: AAM XL Anime Mix v1.0",
      "description": "SDXL-aamXLAnimeMix_v10 is a text-to-image model that generates anime-style images.",
      "id": "sdxl-aamxl-anime-mix-v10",
      "object": "model",
      "created": 1776283310,
      "owned_by": "stabilityai",
      "tokens": 1000,
      "pricing": {
        "type": "per_image",
        "coefficient": 0.044
      },
      "endpoints": [
        "/v1/images/generations"
      ],
      "premium_model": false,
      "sizes": [
        "1024x1024",
        "512x1024",
        "1024x512",
        "832x1216",
        "1216x832",
        "704x1280",
        "1280x704",
        "768x1024",
        "1024x768"
      ]
    },
    {
      "name": "Stability: Amanatsu (Illustrious) v1.1",
      "description": "Amanatsu Illustrious is a text-to-image model that generates anime-style images.",
      "id": "sdxl-amanatsu-illustrious-v11",
      "object": "model",
      "created": 1776283310,
      "owned_by": "stabilityai",
      "tokens": 1000,
      "pricing": {
        "type": "per_image",
        "coefficient": 0.044
      },
      "endpoints": [
        "/v1/images/generations"
      ],
      "premium_model": false,
      "sizes": [
        "1024x1024",
        "512x1024",
        "1024x512",
        "832x1216",
        "1216x832",
        "704x1280",
        "1280x704",
        "768x1024",
        "1024x768"
      ]
    },
    {
      "name": "Stability: Aungir T6AO45",
      "description": "Aungir T6AO45 is a text-to-image model that generates anime-style images.",
      "id": "sdxl-aungir-t6ao45",
      "object": "model",
      "created": 1776283310,
      "owned_by": "stabilityai",
      "tokens": 1000,
      "pricing": {
        "type": "per_image",
        "coefficient": 0.044
      },
      "endpoints": [
        "/v1/images/generations"
      ],
      "premium_model": false,
      "sizes": [
        "1024x1024",
        "512x1024",
        "1024x512",
        "832x1216",
        "1216x832",
        "704x1280",
        "1280x704",
        "768x1024",
        "1024x768"
      ]
    },
    {
      "name": "Stability: Animij v2",
      "description": "Animij v2 is a text-to-image model that generates anime-style images.",
      "id": "sdxl-animij-v2",
      "object": "model",
      "created": 1776283310,
      "owned_by": "stabilityai",
      "tokens": 1000,
      "pricing": {
        "type": "per_image",
        "coefficient": 0.044
      },
      "endpoints": [
        "/v1/images/generations"
      ],
      "premium_model": false,
      "sizes": [
        "1024x1024",
        "512x1024",
        "1024x512",
        "832x1216",
        "1216x832",
        "704x1280",
        "1280x704",
        "768x1024",
        "1024x768"
      ]
    },
    {
      "name": "Stability: Animij v5",
      "description": "Animij v5 is a text-to-image model that generates anime-style images.",
      "id": "sdxl-animij-v5",
      "object": "model",
      "created": 1776283310,
      "owned_by": "stabilityai",
      "tokens": 1000,
      "pricing": {
        "type": "per_image",
        "coefficient": 0.044
      },
      "endpoints": [
        "/v1/images/generations"
      ],
      "premium_model": false,
      "sizes": [
        "1024x1024",
        "512x1024",
        "1024x512",
        "832x1216",
        "1216x832",
        "704x1280",
        "1280x704",
        "768x1024",
        "1024x768"
      ]
    },
    {
      "name": "Stability: ArliMix v1",
      "description": "ArliMix v1 is a text-to-image model that generates anime-style images.",
      "id": "sdxl-arlimix-v1",
      "object": "model",
      "created": 1776283310,
      "owned_by": "stabilityai",
      "tokens": 1000,
      "pricing": {
        "type": "per_image",
        "coefficient": 0.044
      },
      "endpoints": [
        "/v1/images/generations"
      ],
      "premium_model": false,
      "sizes": [
        "1024x1024",
        "512x1024",
        "1024x512",
        "832x1216",
        "1216x832",
        "704x1280",
        "1280x704",
        "768x1024",
        "1024x768"
      ]
    },
    {
      "name": "Stability: Aungir TestA",
      "description": "Aungir TestA is a text-to-image model that generates anime-style images.",
      "id": "sdxl-aungir-testa",
      "object": "model",
      "created": 1776283310,
      "owned_by": "stabilityai",
      "tokens": 1000,
      "pricing": {
        "type": "per_image",
        "coefficient": 0.044
      },
      "endpoints": [
        "/v1/images/generations"
      ],
      "premium_model": false,
      "sizes": [
        "1024x1024",
        "512x1024",
        "1024x512",
        "832x1216",
        "1216x832",
        "704x1280",
        "1280x704",
        "768x1024",
        "1024x768"
      ]
    },
    {
      "name": "Stability: AutismMix Confetti",
      "description": "AutismMix_confetti is a small amount of AnimeConfettiTune and AutismMix_pony. Has less style swing than pony and better hands.",
      "id": "sdxl-autismmix-confetti",
      "object": "model",
      "created": 1776283310,
      "owned_by": "stabilityai",
      "tokens": 1000,
      "pricing": {
        "type": "per_image",
        "coefficient": 0.044
      },
      "endpoints": [
        "/v1/images/generations"
      ],
      "premium_model": false,
      "sizes": [
        "1024x1024",
        "512x1024",
        "1024x512",
        "832x1216",
        "1216x832",
        "704x1280",
        "1280x704",
        "768x1024",
        "1024x768"
      ]
    },
    {
      "name": "Stability: AutismMix Pony",
      "description": "AutismMix_pony is a merge of ponyv6 with loras, its more compatible with certain styles made for the base ponydiffusion model.",
      "id": "sdxl-autismmix-pony",
      "object": "model",
      "created": 1776283310,
      "owned_by": "stabilityai",
      "tokens": 1000,
      "pricing": {
        "type": "per_image",
        "coefficient": 0.044
      },
      "endpoints": [
        "/v1/images/generations"
      ],
      "premium_model": false,
      "sizes": [
        "1024x1024",
        "512x1024",
        "1024x512",
        "832x1216",
        "1216x832",
        "704x1280",
        "1280x704",
        "768x1024",
        "1024x768"
      ]
    },
    {
      "name": "Stability: BoleroMix (Illustrious) v2.90",
      "description": "BoleroMix_illustrious is a text-to-image model that generates anime-style images.",
      "id": "sdxl-boleromix-illustrious-v290",
      "object": "model",
      "created": 1776283310,
      "owned_by": "stabilityai",
      "tokens": 1000,
      "pricing": {
        "type": "per_image",
        "coefficient": 0.044
      },
      "endpoints": [
        "/v1/images/generations"
      ],
      "premium_model": false,
      "sizes": [
        "1024x1024",
        "512x1024",
        "1024x512",
        "832x1216",
        "1216x832",
        "704x1280",
        "1280x704",
        "768x1024",
        "1024x768"
      ]
    },
    {
      "name": "Stability: ComradeshipXL v1.4kc",
      "description": "This is a specialized model for high resolutions of 1.5K~1.75K. It is trained on a large dataset of anime images and should be able to complete generation tasks excellently with as few lora models as possible.",
      "id": "sdxl-comradeshipxl-v14kc",
      "object": "model",
      "created": 1776283310,
      "owned_by": "stabilityai",
      "tokens": 1000,
      "pricing": {
        "type": "per_image",
        "coefficient": 0.044
      },
      "endpoints": [
        "/v1/images/generations"
      ],
      "premium_model": false,
      "sizes": [
        "1024x1024",
        "512x1024",
        "1024x512",
        "832x1216",
        "1216x832",
        "704x1280",
        "1280x704",
        "768x1024",
        "1024x768"
      ]
    },
    {
      "name": "Stability: Copycat (Illustrious) v4.0",
      "description": "Copycat_illustrious is a text-to-image model that generates anime-style images.",
      "id": "sdxl-copycat-illustrious-v40",
      "object": "model",
      "created": 1776283310,
      "owned_by": "stabilityai",
      "tokens": 1000,
      "pricing": {
        "type": "per_image",
        "coefficient": 0.044
      },
      "endpoints": [
        "/v1/images/generations"
      ],
      "premium_model": false,
      "sizes": [
        "1024x1024",
        "512x1024",
        "1024x512",
        "832x1216",
        "1216x832",
        "704x1280",
        "1280x704",
        "768x1024",
        "1024x768"
      ]
    },
    {
      "name": "Stability: Copycat (Illustrious) v5.0",
      "description": "Copycat_illustrious is a text-to-image model that generates anime-style images.",
      "id": "sdxl-copycat-illustrious-v50",
      "object": "model",
      "created": 1776283310,
      "owned_by": "stabilityai",
      "tokens": 1000,
      "pricing": {
        "type": "per_image",
        "coefficient": 0.044
      },
      "endpoints": [
        "/v1/images/generations"
      ],
      "premium_model": false,
      "sizes": [
        "1024x1024",
        "512x1024",
        "1024x512",
        "832x1216",
        "1216x832",
        "704x1280",
        "1280x704",
        "768x1024",
        "1024x768"
      ]
    },
    {
      "name": "Stability: Halcyon v1.9",
      "description": "This is a very robust model primarily focused on photorealism but capable of generating all types of images.",
      "id": "sdxl-halcyon-v19",
      "object": "model",
      "created": 1776283310,
      "owned_by": "stabilityai",
      "tokens": 1000,
      "pricing": {
        "type": "per_image",
        "coefficient": 0.044
      },
      "endpoints": [
        "/v1/images/generations"
      ],
      "premium_model": false,
      "sizes": [
        "1024x1024",
        "512x1024",
        "1024x512",
        "832x1216",
        "1216x832",
        "704x1280",
        "1280x704",
        "768x1024",
        "1024x768"
      ]
    },
    {
      "name": "Stability: IkaStrious v1.42 Stable",
      "description": "IkaStrious_v142Stable is a text-to-image model that generates anime-style images.",
      "id": "sdxl-ikastrious-v142-stable",
      "object": "model",
      "created": 1776283310,
      "owned_by": "stabilityai",
      "tokens": 1000,
      "pricing": {
        "type": "per_image",
        "coefficient": 0.044
      },
      "endpoints": [
        "/v1/images/generations"
      ],
      "premium_model": false,
      "sizes": [
        "1024x1024",
        "512x1024",
        "1024x512",
        "832x1216",
        "1216x832",
        "704x1280",
        "1280x704",
        "768x1024",
        "1024x768"
      ]
    },
    {
      "name": "Stability: IkaStriousXL v9.5",
      "description": "IkaStriousXL_v95 is a text-to-image model that generates anime-style images.",
      "id": "sdxl-ikastriousxl-v95",
      "object": "model",
      "created": 1776283310,
      "owned_by": "stabilityai",
      "tokens": 1000,
      "pricing": {
        "type": "per_image",
        "coefficient": 0.044
      },
      "endpoints": [
        "/v1/images/generations"
      ],
      "premium_model": false,
      "sizes": [
        "1024x1024",
        "512x1024",
        "1024x512",
        "832x1216",
        "1216x832",
        "704x1280",
        "1280x704",
        "768x1024",
        "1024x768"
      ]
    },
    {
      "name": "Stability: Illustrij v1.0",
      "description": "Illustrij_v10 is a comic-anime-hybrid based checkpoint and digital art rendering with a touch of 3D on its way to semirealism",
      "id": "sdxl-illustrij-v10",
      "object": "model",
      "created": 1776283310,
      "owned_by": "stabilityai",
      "tokens": 1000,
      "pricing": {
        "type": "per_image",
        "coefficient": 0.044
      },
      "endpoints": [
        "/v1/images/generations"
      ],
      "premium_model": false,
      "sizes": [
        "1024x1024",
        "512x1024",
        "1024x512",
        "832x1216",
        "1216x832",
        "704x1280",
        "1280x704",
        "768x1024",
        "1024x768"
      ]
    },
    {
      "name": "Stability: Illustrij v1.7",
      "description": "Illustrij_v17 is a comic-anime-hybrid based checkpoint and digital art rendering with a touch of 3D on its way to semirealism",
      "id": "sdxl-illustrij-v17",
      "object": "model",
      "created": 1776283310,
      "owned_by": "stabilityai",
      "tokens": 1000,
      "pricing": {
        "type": "per_image",
        "coefficient": 0.044
      },
      "endpoints": [
        "/v1/images/generations"
      ],
      "premium_model": false,
      "sizes": [
        "1024x1024",
        "512x1024",
        "1024x512",
        "832x1216",
        "1216x832",
        "704x1280",
        "1280x704",
        "768x1024",
        "1024x768"
      ]
    },
    {
      "name": "Stability: Illustrious BlackMagic v3.0",
      "description": "IllustriousBlackMagic_v30CLEAN is a text-to-image model that generates anime-style images.",
      "id": "sdxl-illustrious-blackmagic-v30",
      "object": "model",
      "created": 1776283310,
      "owned_by": "stabilityai",
      "tokens": 1000,
      "pricing": {
        "type": "per_image",
        "coefficient": 0.044
      },
      "endpoints": [
        "/v1/images/generations"
      ],
      "premium_model": false,
      "sizes": [
        "1024x1024",
        "512x1024",
        "1024x512",
        "832x1216",
        "1216x832",
        "704x1280",
        "1280x704",
        "768x1024",
        "1024x768"
      ]
    },
    {
      "name": "Stability: Illustrious PencilXL v3.20",
      "description": "IllustriousPencilXL_v320 is a text-to-image model that generates anime-style images.",
      "id": "sdxl-illustrious-pencilxl-v320",
      "object": "model",
      "created": 1776283310,
      "owned_by": "stabilityai",
      "tokens": 1000,
      "pricing": {
        "type": "per_image",
        "coefficient": 0.044
      },
      "endpoints": [
        "/v1/images/generations"
      ],
      "premium_model": false,
      "sizes": [
        "1024x1024",
        "512x1024",
        "1024x512",
        "832x1216",
        "1216x832",
        "704x1280",
        "1280x704",
        "768x1024",
        "1024x768"
      ]
    },
    {
      "name": "Stability: IllustriousXL Smoothft SOLID",
      "description": " A merge of SPO and NoobAI XL v-pred (the resulting model is normal) - it features richer solid colours and smoother gradients.",
      "id": "sdxl-illustriousxl-smoothft-solid",
      "object": "model",
      "created": 1776283310,
      "owned_by": "stabilityai",
      "tokens": 1000,
      "pricing": {
        "type": "per_image",
        "coefficient": 0.044
      },
      "endpoints": [
        "/v1/images/generations"
      ],
      "premium_model": false,
      "sizes": [
        "1024x1024",
        "512x1024",
        "1024x512",
        "832x1216",
        "1216x832",
        "704x1280",
        "1280x704",
        "768x1024",
        "1024x768"
      ]
    },
    {
      "name": "Stability: IllustriousXL Mmmix v8.0",
      "description": "IllustriousXL_mmMix_v80 is a text-to-image model that generates anime-style images.",
      "id": "sdxl-illustriousxl-mmmix-v80",
      "object": "model",
      "created": 1776283310,
      "owned_by": "stabilityai",
      "tokens": 1000,
      "pricing": {
        "type": "per_image",
        "coefficient": 0.044
      },
      "endpoints": [
        "/v1/images/generations"
      ],
      "premium_model": false,
      "sizes": [
        "1024x1024",
        "512x1024",
        "1024x512",
        "832x1216",
        "1216x832",
        "704x1280",
        "1280x704",
        "768x1024",
        "1024x768"
      ]
    },
    {
      "name": "Stability: Ilustmix v4.0",
      "description": "Ilustmix_v40 is a text-to-image model that generates anime-style images.",
      "id": "sdxl-ilustmix-v40",
      "object": "model",
      "created": 1776283310,
      "owned_by": "stabilityai",
      "tokens": 1000,
      "pricing": {
        "type": "per_image",
        "coefficient": 0.044
      },
      "endpoints": [
        "/v1/images/generations"
      ],
      "premium_model": false,
      "sizes": [
        "1024x1024",
        "512x1024",
        "1024x512",
        "832x1216",
        "1216x832",
        "704x1280",
        "1280x704",
        "768x1024",
        "1024x768"
      ]
    },
    {
      "name": "Stability: Ilustmix v8.0",
      "description": "Ilustmix_v80 is a text-to-image model that generates anime-style images.",
      "id": "sdxl-ilustmix-v80",
      "object": "model",
      "created": 1776283310,
      "owned_by": "stabilityai",
      "tokens": 1000,
      "pricing": {
        "type": "per_image",
        "coefficient": 0.044
      },
      "endpoints": [
        "/v1/images/generations"
      ],
      "premium_model": false,
      "sizes": [
        "1024x1024",
        "512x1024",
        "1024x512",
        "832x1216",
        "1216x832",
        "704x1280",
        "1280x704",
        "768x1024",
        "1024x768"
      ]
    },
    {
      "name": "Stability: Janku Illustrious v2.1",
      "description": "JankuV21Illustrious_v21 is a text-to-image model that generates anime-style images.",
      "id": "sdxl-janku-illustrious-v21",
      "object": "model",
      "created": 1776283310,
      "owned_by": "stabilityai",
      "tokens": 1000,
      "pricing": {
        "type": "per_image",
        "coefficient": 0.044
      },
      "endpoints": [
        "/v1/images/generations"
      ],
      "premium_model": false,
      "sizes": [
        "1024x1024",
        "512x1024",
        "1024x512",
        "832x1216",
        "1216x832",
        "704x1280",
        "1280x704",
        "768x1024",
        "1024x768"
      ]
    },
    {
      "name": "Stability: Janku v4.0",
      "description": "Janku_v40 is a text-to-image model that generates anime-style images.",
      "id": "sdxl-janku-v40",
      "object": "model",
      "created": 1776283310,
      "owned_by": "stabilityai",
      "tokens": 1000,
      "pricing": {
        "type": "per_image",
        "coefficient": 0.044
      },
      "endpoints": [
        "/v1/images/generations"
      ],
      "premium_model": false,
      "sizes": [
        "1024x1024",
        "512x1024",
        "1024x512",
        "832x1216",
        "1216x832",
        "704x1280",
        "1280x704",
        "768x1024",
        "1024x768"
      ]
    },
    {
      "name": "Stability: Kiwimix-XL v3",
      "description": "KiwimixXL_v3 is a text-to-image model that generates anime-style images.",
      "id": "sdxl-kiwimixxl-v3",
      "object": "model",
      "created": 1776283310,
      "owned_by": "stabilityai",
      "tokens": 1000,
      "pricing": {
        "type": "per_image",
        "coefficient": 0.044
      },
      "endpoints": [
        "/v1/images/generations"
      ],
      "premium_model": false,
      "sizes": [
        "1024x1024",
        "512x1024",
        "1024x512",
        "832x1216",
        "1216x832",
        "704x1280",
        "1280x704",
        "768x1024",
        "1024x768"
      ]
    },
    {
      "name": "Stability: Lunarcherrymix v2.2 BaseIllustrxl20",
      "description": "LunarCherryMix - Mixed Illustrious model a lot of things. This model excels at producing both SFW and NSFW content, opening the door to limitless creative exploration and beyond.",
      "id": "sdxl-lunarcherrymix-v22-baseillustrxl20",
      "object": "model",
      "created": 1776283310,
      "owned_by": "stabilityai",
      "tokens": 1000,
      "pricing": {
        "type": "per_image",
        "coefficient": 0.044
      },
      "endpoints": [
        "/v1/images/generations"
      ],
      "premium_model": false,
      "sizes": [
        "1024x1024",
        "512x1024",
        "1024x512",
        "832x1216",
        "1216x832",
        "704x1280",
        "1280x704",
        "768x1024",
        "1024x768"
      ]
    },
    {
      "name": "Stability: Lunarcherrymix v2.3",
      "description": "LunarCherryMix - Mixed Illustrious model a lot of things. This model excels at producing both SFW and NSFW content, opening the door to limitless creative exploration and beyond.",
      "id": "sdxl-lunarcherrymix-v23",
      "object": "model",
      "created": 1776283310,
      "owned_by": "stabilityai",
      "tokens": 1000,
      "pricing": {
        "type": "per_image",
        "coefficient": 0.044
      },
      "endpoints": [
        "/v1/images/generations"
      ],
      "premium_model": false,
      "sizes": [
        "1024x1024",
        "512x1024",
        "1024x512",
        "832x1216",
        "1216x832",
        "704x1280",
        "1280x704",
        "768x1024",
        "1024x768"
      ]
    },
    {
      "name": "Stability: Miaomiao Harem v1.6G",
      "description": "MiaomiaoHarem_v16G is a text-to-image model that generates anime-style images.",
      "id": "sdxl-miaomiao-harem-v16g",
      "object": "model",
      "created": 1776283310,
      "owned_by": "stabilityai",
      "tokens": 1000,
      "pricing": {
        "type": "per_image",
        "coefficient": 0.044
      },
      "endpoints": [
        "/v1/images/generations"
      ],
      "premium_model": false,
      "sizes": [
        "1024x1024",
        "512x1024",
        "1024x512",
        "832x1216",
        "1216x832",
        "704x1280",
        "1280x704",
        "768x1024",
        "1024x768"
      ]
    },
    {
      "name": "Stability: Naixl Mmmix v4.5",
      "description": "Naixl_mmMix_v45 is a text-to-image model that generates anime-style images. 4.5 Enhanced detail and color expression.",
      "id": "sdxl-naixl-mmmix-v45",
      "object": "model",
      "created": 1776283310,
      "owned_by": "stabilityai",
      "tokens": 1000,
      "pricing": {
        "type": "per_image",
        "coefficient": 0.044
      },
      "endpoints": [
        "/v1/images/generations"
      ],
      "premium_model": false,
      "sizes": [
        "1024x1024",
        "512x1024",
        "1024x512",
        "832x1216",
        "1216x832",
        "704x1280",
        "1280x704",
        "768x1024",
        "1024x768"
      ]
    },
    {
      "name": "Stability: Naixl Mmmix v5.0",
      "description": "Naixl_mmMix_v50 is a text-to-image model that generates anime-style images. 5.0 enhances the realistic style, the original painting style is closer to 2.5D, and the texture of stockings has been improved due to personal hobbies.",
      "id": "sdxl-naixl-mmmix-v50",
      "object": "model",
      "created": 1776283310,
      "owned_by": "stabilityai",
      "tokens": 1000,
      "pricing": {
        "type": "per_image",
        "coefficient": 0.044
      },
      "endpoints": [
        "/v1/images/generations"
      ],
      "premium_model": false,
      "sizes": [
        "1024x1024",
        "512x1024",
        "1024x512",
        "832x1216",
        "1216x832",
        "704x1280",
        "1280x704",
        "768x1024",
        "1024x768"
      ]
    },
    {
      "name": "Stability: Nonnette v0.50",
      "description": "Nonet is a miscellaneous Noob-based Eps merge model that can be found anywhere.\nBasically, if you follow the usage of Illustrious eps, you can use it without any problem.\nAlthough not limited to this model, I feel that it is necessary to adjust the quality prompts in a timely manner while watching the output, as too much quality prompting can stiffen the atmosphere.",
      "id": "sdxl-nonnette-v050",
      "object": "model",
      "created": 1776283310,
      "owned_by": "stabilityai",
      "tokens": 1000,
      "pricing": {
        "type": "per_image",
        "coefficient": 0.044
      },
      "endpoints": [
        "/v1/images/generations"
      ],
      "premium_model": false,
      "sizes": [
        "1024x1024",
        "512x1024",
        "1024x512",
        "832x1216",
        "1216x832",
        "704x1280",
        "1280x704",
        "768x1024",
        "1024x768"
      ]
    },
    {
      "name": "Stability: Nova Anime XL IL v10.0",
      "description": "Nova Anime XL is Nova Anime: Anime/2.5D/3D checkpoint model for SDXL",
      "id": "sdxl-novaanimexl-ilv100",
      "object": "model",
      "created": 1776283310,
      "owned_by": "stabilityai",
      "tokens": 1000,
      "pricing": {
        "type": "per_image",
        "coefficient": 0.044
      },
      "endpoints": [
        "/v1/images/generations"
      ],
      "premium_model": false,
      "sizes": [
        "1024x1024",
        "512x1024",
        "1024x512",
        "832x1216",
        "1216x832",
        "704x1280",
        "1280x704",
        "768x1024",
        "1024x768"
      ]
    },
    {
      "name": "Stability: Nova Anime XL IL v6.0",
      "description": "Nova Anime XL is Nova Anime: Anime/2.5D/3D checkpoint model for SDXL",
      "id": "sdxl-novaanimexl-ilv60",
      "object": "model",
      "created": 1776283310,
      "owned_by": "stabilityai",
      "tokens": 1000,
      "pricing": {
        "type": "per_image",
        "coefficient": 0.044
      },
      "endpoints": [
        "/v1/images/generations"
      ],
      "premium_model": false,
      "sizes": [
        "1024x1024",
        "512x1024",
        "1024x512",
        "832x1216",
        "1216x832",
        "704x1280",
        "1280x704",
        "768x1024",
        "1024x768"
      ]
    },
    {
      "name": "Stability: Nova Furry XL Illustrious v10.0",
      "description": "A 2d/2.5d furry checkpoint model that can have great details on any type of furs, scales and feathers, which aims to be xl-fied Peaki Furry.",
      "id": "sdxl-novafurryxl-illustrious-v100",
      "object": "model",
      "created": 1776283310,
      "owned_by": "stabilityai",
      "tokens": 1000,
      "pricing": {
        "type": "per_image",
        "coefficient": 0.044
      },
      "endpoints": [
        "/v1/images/generations"
      ],
      "premium_model": false,
      "sizes": [
        "1024x1024",
        "512x1024",
        "1024x512",
        "832x1216",
        "1216x832",
        "704x1280",
        "1280x704",
        "768x1024",
        "1024x768"
      ]
    },
    {
      "name": "Stability: Nova Orange XL v11.0",
      "description": "Nova Orange XL is anime checkpoint with detailed skin and depth.",
      "id": "sdxl-novaorangexl-v110",
      "object": "model",
      "created": 1776283310,
      "owned_by": "stabilityai",
      "tokens": 1000,
      "pricing": {
        "type": "per_image",
        "coefficient": 0.044
      },
      "endpoints": [
        "/v1/images/generations"
      ],
      "premium_model": false,
      "sizes": [
        "1024x1024",
        "512x1024",
        "1024x512",
        "832x1216",
        "1216x832",
        "704x1280",
        "1280x704",
        "768x1024",
        "1024x768"
      ]
    },
    {
      "name": "Stability: Nova Orange XL v9.0",
      "description": "Nova Orange XL is anime checkpoint with detailed skin and depth.",
      "id": "sdxl-novaorangexl-v90",
      "object": "model",
      "created": 1776283310,
      "owned_by": "stabilityai",
      "tokens": 1000,
      "pricing": {
        "type": "per_image",
        "coefficient": 0.044
      },
      "endpoints": [
        "/v1/images/generations"
      ],
      "premium_model": false,
      "sizes": [
        "1024x1024",
        "512x1024",
        "1024x512",
        "832x1216",
        "1216x832",
        "704x1280",
        "1280x704",
        "768x1024",
        "1024x768"
      ]
    },
    {
      "name": "Stability: Nova Unreal XL v5.0",
      "description": "Nova Unreal XL is semi-realistic anime checkpoint with detailed skin and depth.",
      "id": "sdxl-novaunrealxl-v50",
      "object": "model",
      "created": 1776283310,
      "owned_by": "stabilityai",
      "tokens": 1000,
      "pricing": {
        "type": "per_image",
        "coefficient": 0.044
      },
      "endpoints": [
        "/v1/images/generations"
      ],
      "premium_model": false,
      "sizes": [
        "1024x1024",
        "512x1024",
        "1024x512",
        "832x1216",
        "1216x832",
        "704x1280",
        "1280x704",
        "768x1024",
        "1024x768"
      ]
    },
    {
      "name": "Stability: Nova Unreal XL v7.0",
      "description": "Nova Unreal XL is semi-realistic anime checkpoint with detailed skin and depth.",
      "id": "sdxl-novaunrealxl-v70",
      "object": "model",
      "created": 1776283310,
      "owned_by": "stabilityai",
      "tokens": 1000,
      "pricing": {
        "type": "per_image",
        "coefficient": 0.044
      },
      "endpoints": [
        "/v1/images/generations"
      ],
      "premium_model": false,
      "sizes": [
        "1024x1024",
        "512x1024",
        "1024x512",
        "832x1216",
        "1216x832",
        "704x1280",
        "1280x704",
        "768x1024",
        "1024x768"
      ]
    },
    {
      "name": "Stability: Oblivious Mix v1.0",
      "description": "Oblivious Mix is a text-to-image model that generates anime-style images.",
      "id": "sdxl-obliviousmix-v10",
      "object": "model",
      "created": 1776283310,
      "owned_by": "stabilityai",
      "tokens": 1000,
      "pricing": {
        "type": "per_image",
        "coefficient": 0.044
      },
      "endpoints": [
        "/v1/images/generations"
      ],
      "premium_model": false,
      "sizes": [
        "1024x1024",
        "512x1024",
        "1024x512",
        "832x1216",
        "1216x832",
        "704x1280",
        "1280x704",
        "768x1024",
        "1024x768"
      ]
    },
    {
      "name": "Stability: One For All Anime (Illustrious)",
      "description": "One For All Anime W IL is a text-to-image model that generates anime-style images.",
      "id": "sdxl-oneforallanimew-il",
      "object": "model",
      "created": 1776283310,
      "owned_by": "stabilityai",
      "tokens": 1000,
      "pricing": {
        "type": "per_image",
        "coefficient": 0.044
      },
      "endpoints": [
        "/v1/images/generations"
      ],
      "premium_model": false,
      "sizes": [
        "1024x1024",
        "512x1024",
        "1024x512",
        "832x1216",
        "1216x832",
        "704x1280",
        "1280x704",
        "768x1024",
        "1024x768"
      ]
    },
    {
      "name": "Stability: Persona Style (Illustrious)",
      "description": "Persona Style (Illustrious) is a text-to-image model that generates anime-style images.",
      "id": "sdxl-personastyle-il",
      "object": "model",
      "created": 1776283310,
      "owned_by": "stabilityai",
      "tokens": 1000,
      "pricing": {
        "type": "per_image",
        "coefficient": 0.044
      },
      "endpoints": [
        "/v1/images/generations"
      ],
      "premium_model": false,
      "sizes": [
        "1024x1024",
        "512x1024",
        "1024x512",
        "832x1216",
        "1216x832",
        "704x1280",
        "1280x704",
        "768x1024",
        "1024x768"
      ]
    },
    {
      "name": "Stability: Plant Milk Model Suite (Almond)",
      "description": "Plant Milk Model Suite (Almond) is a text-to-image model that generates anime-style images.",
      "id": "sdxl-plantmilkmodelsuite-almond",
      "object": "model",
      "created": 1776283310,
      "owned_by": "stabilityai",
      "tokens": 1000,
      "pricing": {
        "type": "per_image",
        "coefficient": 0.044
      },
      "endpoints": [
        "/v1/images/generations"
      ],
      "premium_model": false,
      "sizes": [
        "1024x1024",
        "512x1024",
        "1024x512",
        "832x1216",
        "1216x832",
        "704x1280",
        "1280x704",
        "768x1024",
        "1024x768"
      ]
    },
    {
      "name": "Stability: Plant Milk Model Suite (Coconut)",
      "description": "Plant Milk Model Suite (Coconut) is a text-to-image model that generates anime-style images.",
      "id": "sdxl-plantmilkmodelsuite-coconut",
      "object": "model",
      "created": 1776283310,
      "owned_by": "stabilityai",
      "tokens": 1000,
      "pricing": {
        "type": "per_image",
        "coefficient": 0.044
      },
      "endpoints": [
        "/v1/images/generations"
      ],
      "premium_model": false,
      "sizes": [
        "1024x1024",
        "512x1024",
        "1024x512",
        "832x1216",
        "1216x832",
        "704x1280",
        "1280x704",
        "768x1024",
        "1024x768"
      ]
    },
    {
      "name": "Stability: Plant Milk Model Suite (Flax)",
      "description": "Plant Milk Model Suite (Flax) is a text-to-image model that generates anime-style images.",
      "id": "sdxl-plantmilkmodelsuite-flax",
      "object": "model",
      "created": 1776283310,
      "owned_by": "stabilityai",
      "tokens": 1000,
      "pricing": {
        "type": "per_image",
        "coefficient": 0.044
      },
      "endpoints": [
        "/v1/images/generations"
      ],
      "premium_model": false,
      "sizes": [
        "1024x1024",
        "512x1024",
        "1024x512",
        "832x1216",
        "1216x832",
        "704x1280",
        "1280x704",
        "768x1024",
        "1024x768"
      ]
    },
    {
      "name": "Stability: Plant Milk Model Suite (Oat)",
      "description": "Plant Milk Model Suite (Oat) is a text-to-image model that generates anime-style images.",
      "id": "sdxl-plantmilkmodelsuite-oat",
      "object": "model",
      "created": 1776283310,
      "owned_by": "stabilityai",
      "tokens": 1000,
      "pricing": {
        "type": "per_image",
        "coefficient": 0.044
      },
      "endpoints": [
        "/v1/images/generations"
      ],
      "premium_model": false,
      "sizes": [
        "1024x1024",
        "512x1024",
        "1024x512",
        "832x1216",
        "1216x832",
        "704x1280",
        "1280x704",
        "768x1024",
        "1024x768"
      ]
    },
    {
      "name": "Stability: Plant Milk Model Suite (Walnut)",
      "description": "Plant Milk Model Suite (Walnut) is a text-to-image model that generates anime-style images.",
      "id": "sdxl-plantmilkmodelsuite-walnut",
      "object": "model",
      "created": 1776283310,
      "owned_by": "stabilityai",
      "tokens": 1000,
      "pricing": {
        "type": "per_image",
        "coefficient": 0.044
      },
      "endpoints": [
        "/v1/images/generations"
      ],
      "premium_model": false,
      "sizes": [
        "1024x1024",
        "512x1024",
        "1024x512",
        "832x1216",
        "1216x832",
        "704x1280",
        "1280x704",
        "768x1024",
        "1024x768"
      ]
    },
    {
      "name": "Stability: Praeclarus v1.0",
      "description": "Praeclarus is a text-to-image model that generates anime-style images.",
      "id": "sdxl-praeclarus-v10",
      "object": "model",
      "created": 1776283310,
      "owned_by": "stabilityai",
      "tokens": 1000,
      "pricing": {
        "type": "per_image",
        "coefficient": 0.044
      },
      "endpoints": [
        "/v1/images/generations"
      ],
      "premium_model": false,
      "sizes": [
        "1024x1024",
        "512x1024",
        "1024x512",
        "832x1216",
        "1216x832",
        "704x1280",
        "1280x704",
        "768x1024",
        "1024x768"
      ]
    },
    {
      "name": "Stability: Praeclarus v2.0",
      "description": "Praeclarus is a text-to-image model that generates anime-style images.",
      "id": "sdxl-praeclarus-v20",
      "object": "model",
      "created": 1776283310,
      "owned_by": "stabilityai",
      "tokens": 1000,
      "pricing": {
        "type": "per_image",
        "coefficient": 0.044
      },
      "endpoints": [
        "/v1/images/generations"
      ],
      "premium_model": false,
      "sizes": [
        "1024x1024",
        "512x1024",
        "1024x512",
        "832x1216",
        "1216x832",
        "704x1280",
        "1280x704",
        "768x1024",
        "1024x768"
      ]
    },
    {
      "name": "Stability: Prefect Illustrious XL v1.0",
      "description": "Prefect Illustrious XL v1.0 is a text-to-image model that generates anime-style images.",
      "id": "sdxl-prefectillustriousxl-v10",
      "object": "model",
      "created": 1776283310,
      "owned_by": "stabilityai",
      "tokens": 1000,
      "pricing": {
        "type": "per_image",
        "coefficient": 0.044
      },
      "endpoints": [
        "/v1/images/generations"
      ],
      "premium_model": false,
      "sizes": [
        "1024x1024",
        "512x1024",
        "1024x512",
        "832x1216",
        "1216x832",
        "704x1280",
        "1280x704",
        "768x1024",
        "1024x768"
      ]
    },
    {
      "name": "Stability: Prefect Illustrious XL NSFW v1.0",
      "description": "Prefect Illustrious XL NSFW v1.0 is a text-to-image model that generates anime-style images.",
      "id": "sdxl-prefectiousxlnsfw-v10",
      "object": "model",
      "created": 1776283310,
      "owned_by": "stabilityai",
      "tokens": 1000,
      "pricing": {
        "type": "per_image",
        "coefficient": 0.044
      },
      "endpoints": [
        "/v1/images/generations"
      ],
      "premium_model": false,
      "sizes": [
        "1024x1024",
        "512x1024",
        "1024x512",
        "832x1216",
        "1216x832",
        "704x1280",
        "1280x704",
        "768x1024",
        "1024x768"
      ]
    },
    {
      "name": "Stability: Richy Rich Mixi XL v1.0",
      "description": "Richy Rich Mixi XL v1.0 is a text-to-image model that generates anime-style images.",
      "id": "sdxl-richyrichmixixl-v1",
      "object": "model",
      "created": 1776283310,
      "owned_by": "stabilityai",
      "tokens": 1000,
      "pricing": {
        "type": "per_image",
        "coefficient": 0.044
      },
      "endpoints": [
        "/v1/images/generations"
      ],
      "premium_model": false,
      "sizes": [
        "1024x1024",
        "512x1024",
        "1024x512",
        "832x1216",
        "1216x832",
        "704x1280",
        "1280x704",
        "768x1024",
        "1024x768"
      ]
    },
    {
      "name": "Stability: Sakura Platinum v2025.03.09",
      "description": "Sakura Platinum v20250309 is a text-to-image model that generates anime-style images.",
      "id": "sdxl-sakuraplatinum-v20250309",
      "object": "model",
      "created": 1776283310,
      "owned_by": "stabilityai",
      "tokens": 1000,
      "pricing": {
        "type": "per_image",
        "coefficient": 0.044
      },
      "endpoints": [
        "/v1/images/generations"
      ],
      "premium_model": false,
      "sizes": [
        "1024x1024",
        "512x1024",
        "1024x512",
        "832x1216",
        "1216x832",
        "704x1280",
        "1280x704",
        "768x1024",
        "1024x768"
      ]
    },
    {
      "name": "Stability: Shiitake Mix v2.0",
      "description": "Shiitake Mix v2.0 is a text-to-image model that generates anime-style images.",
      "id": "sdxl-shiitakemix-v20",
      "object": "model",
      "created": 1776283310,
      "owned_by": "stabilityai",
      "tokens": 1000,
      "pricing": {
        "type": "per_image",
        "coefficient": 0.044
      },
      "endpoints": [
        "/v1/images/generations"
      ],
      "premium_model": false,
      "sizes": [
        "1024x1024",
        "512x1024",
        "1024x512",
        "832x1216",
        "1216x832",
        "704x1280",
        "1280x704",
        "768x1024",
        "1024x768"
      ]
    },
    {
      "name": "Stability: Silence Mix v4.0",
      "description": "Silence Mix v4.0 is a text-to-image model that generates anime-style images.",
      "id": "sdxl-silencemix-v40",
      "object": "model",
      "created": 1776283310,
      "owned_by": "stabilityai",
      "tokens": 1000,
      "pricing": {
        "type": "per_image",
        "coefficient": 0.044
      },
      "endpoints": [
        "/v1/images/generations"
      ],
      "premium_model": false,
      "sizes": [
        "1024x1024",
        "512x1024",
        "1024x512",
        "832x1216",
        "1216x832",
        "704x1280",
        "1280x704",
        "768x1024",
        "1024x768"
      ]
    },
    {
      "name": "Stability: SS Mix Illustrious v1.0",
      "description": "SS Mix Illustrious v1.0 is a text-to-image model that generates anime-style images.",
      "id": "sdxl-ssmixillustrious-v10",
      "object": "model",
      "created": 1776283310,
      "owned_by": "stabilityai",
      "tokens": 1000,
      "pricing": {
        "type": "per_image",
        "coefficient": 0.044
      },
      "endpoints": [
        "/v1/images/generations"
      ],
      "premium_model": false,
      "sizes": [
        "1024x1024",
        "512x1024",
        "1024x512",
        "832x1216",
        "1216x832",
        "704x1280",
        "1280x704",
        "768x1024",
        "1024x768"
      ]
    },
    {
      "name": "Stability: Sudachi XL Illustrious v1.0",
      "description": "Sudachi XL Illustrious v1.0 is a text-to-image model that generates anime-style images.",
      "id": "sdxl-sudachixlillustrious-v1",
      "object": "model",
      "created": 1776283310,
      "owned_by": "stabilityai",
      "tokens": 1000,
      "pricing": {
        "type": "per_image",
        "coefficient": 0.044
      },
      "endpoints": [
        "/v1/images/generations"
      ],
      "premium_model": false,
      "sizes": [
        "1024x1024",
        "512x1024",
        "1024x512",
        "832x1216",
        "1216x832",
        "704x1280",
        "1280x704",
        "768x1024",
        "1024x768"
      ]
    },
    {
      "name": "Stability: Tanemo Mix v2.0",
      "description": "Tanemo Mix v2.0 is a text-to-image model that generates anime-style images.",
      "id": "sdxl-tanemomix-v2",
      "object": "model",
      "created": 1776283310,
      "owned_by": "stabilityai",
      "tokens": 1000,
      "pricing": {
        "type": "per_image",
        "coefficient": 0.044
      },
      "endpoints": [
        "/v1/images/generations"
      ],
      "premium_model": false,
      "sizes": [
        "1024x1024",
        "512x1024",
        "1024x512",
        "832x1216",
        "1216x832",
        "704x1280",
        "1280x704",
        "768x1024",
        "1024x768"
      ]
    },
    {
      "name": "Stability: Tanemo Mix v6.0",
      "description": "Tanemo Mix v6.0 is a text-to-image model that generates anime-style images.",
      "id": "sdxl-tanemomix-v6",
      "object": "model",
      "created": 1776283310,
      "owned_by": "stabilityai",
      "tokens": 1000,
      "pricing": {
        "type": "per_image",
        "coefficient": 0.044
      },
      "endpoints": [
        "/v1/images/generations"
      ],
      "premium_model": false,
      "sizes": [
        "1024x1024",
        "512x1024",
        "1024x512",
        "832x1216",
        "1216x832",
        "704x1280",
        "1280x704",
        "768x1024",
        "1024x768"
      ]
    },
    {
      "name": "Stability: tN3 v8.0",
      "description": "tN3 v8.0 is a text-to-image model that generates anime-style images.",
      "id": "sdxl-tn3-v8",
      "object": "model",
      "created": 1776283310,
      "owned_by": "stabilityai",
      "tokens": 1000,
      "pricing": {
        "type": "per_image",
        "coefficient": 0.044
      },
      "endpoints": [
        "/v1/images/generations"
      ],
      "premium_model": false,
      "sizes": [
        "1024x1024",
        "512x1024",
        "1024x512",
        "832x1216",
        "1216x832",
        "704x1280",
        "1280x704",
        "768x1024",
        "1024x768"
      ]
    },
    {
      "name": "Stability: Toonify Illustrious v1.0",
      "description": "Toonify is an excellent model if you're looking for cartoonish renders. This model is a merged model that can also do NSFW.\nPrompt suggestions :use cartoon in prompt for more cartoonish images, you can use anime or realistic prompts both works the same.",
      "id": "sdxl-toonify-illustrious",
      "object": "model",
      "created": 1776283310,
      "owned_by": "stabilityai",
      "tokens": 1000,
      "pricing": {
        "type": "per_image",
        "coefficient": 0.044
      },
      "endpoints": [
        "/v1/images/generations"
      ],
      "premium_model": false,
      "sizes": [
        "1024x1024",
        "512x1024",
        "1024x512",
        "832x1216",
        "1216x832",
        "704x1280",
        "1280x704",
        "768x1024",
        "1024x768"
      ]
    },
    {
      "name": "Stability: Vixons Illustrious v1.4",
      "description": "Vixons Illustrious v1.4 is a text-to-image model that generates anime-style images.",
      "id": "sdxl-vixonsillust-v14",
      "object": "model",
      "created": 1776283310,
      "owned_by": "stabilityai",
      "tokens": 1000,
      "pricing": {
        "type": "per_image",
        "coefficient": 0.044
      },
      "endpoints": [
        "/v1/images/generations"
      ],
      "premium_model": false,
      "sizes": [
        "1024x1024",
        "512x1024",
        "1024x512",
        "832x1216",
        "1216x832",
        "704x1280",
        "1280x704",
        "768x1024",
        "1024x768"
      ]
    },
    {
      "name": "Stability: waiCheapfastANI v1.0",
      "description": "The advantage of this model is that it can generate more stable images at lower resolutions, and the diversity in SFW situations is also very good. There are four safety rating tags: general, sensitive, nsfw, explicit.",
      "id": "sdxl-waicheapfastani-v10",
      "object": "model",
      "created": 1776283310,
      "owned_by": "stabilityai",
      "tokens": 1000,
      "pricing": {
        "type": "per_image",
        "coefficient": 0.044
      },
      "endpoints": [
        "/v1/images/generations"
      ],
      "premium_model": false,
      "sizes": [
        "1024x1024",
        "512x1024",
        "1024x512",
        "832x1216",
        "1216x832",
        "704x1280",
        "1280x704",
        "768x1024",
        "1024x768"
      ]
    },
    {
      "name": "Stability: waiNSFW Illustrious v1.30",
      "description": "waiNSFW Illustrious v1.30 is a text-to-image model that generates anime-style images. There are four safety rating tags: general, sensitive, nsfw, explicit.",
      "id": "sdxl-wainsfwillustrious-v130",
      "object": "model",
      "created": 1776283310,
      "owned_by": "stabilityai",
      "tokens": 1000,
      "pricing": {
        "type": "per_image",
        "coefficient": 0.044
      },
      "endpoints": [
        "/v1/images/generations"
      ],
      "premium_model": false,
      "sizes": [
        "1024x1024",
        "512x1024",
        "1024x512",
        "832x1216",
        "1216x832",
        "704x1280",
        "1280x704",
        "768x1024",
        "1024x768"
      ]
    },
    {
      "name": "Stability: waiNSFW Illustrious v1.40",
      "description": "waiNSFW Illustrious v1.40 is a text-to-image model that generates anime-style images. There are four safety rating tags: general, sensitive, nsfw, explicit.",
      "id": "sdxl-wainsfwillustrious-v140",
      "object": "model",
      "created": 1776283310,
      "owned_by": "stabilityai",
      "tokens": 1000,
      "pricing": {
        "type": "per_image",
        "coefficient": 0.044
      },
      "endpoints": [
        "/v1/images/generations"
      ],
      "premium_model": false,
      "sizes": [
        "1024x1024",
        "512x1024",
        "1024x512",
        "832x1216",
        "1216x832",
        "704x1280",
        "1280x704",
        "768x1024",
        "1024x768"
      ]
    },
    {
      "name": "Stability: Zuki Anime Illustrious Best",
      "description": "Zuki Anime Illustrious Best is a text-to-image model that generates anime-style images.",
      "id": "sdxl-zukianimeill-best",
      "object": "model",
      "created": 1776283310,
      "owned_by": "stabilityai",
      "tokens": 1000,
      "pricing": {
        "type": "per_image",
        "coefficient": 0.044
      },
      "endpoints": [
        "/v1/images/generations"
      ],
      "premium_model": false,
      "sizes": [
        "1024x1024",
        "512x1024",
        "1024x512",
        "832x1216",
        "1216x832",
        "704x1280",
        "1280x704",
        "768x1024",
        "1024x768"
      ]
    },
    {
      "name": "Stability: Zuki Anime Illustrious v4.0",
      "description": "Zuki Anime Illustrious v4.0 is a text-to-image model that generates anime-style images.",
      "id": "sdxl-zukianimeill-v40",
      "object": "model",
      "created": 1776283310,
      "owned_by": "stabilityai",
      "tokens": 1000,
      "pricing": {
        "type": "per_image",
        "coefficient": 0.044
      },
      "endpoints": [
        "/v1/images/generations"
      ],
      "premium_model": false,
      "sizes": [
        "1024x1024",
        "512x1024",
        "1024x512",
        "832x1216",
        "1216x832",
        "704x1280",
        "1280x704",
        "768x1024",
        "1024x768"
      ]
    },
    {
      "name": "Stability: Zuki Best Anime Mix v1.0",
      "description": "Zuki Best Anime Mix v1.0 is a text-to-image model that generates anime-style images.",
      "id": "sdxl-zukibestanimemix-v10",
      "object": "model",
      "created": 1776283310,
      "owned_by": "stabilityai",
      "tokens": 1000,
      "pricing": {
        "type": "per_image",
        "coefficient": 0.044
      },
      "endpoints": [
        "/v1/images/generations"
      ],
      "premium_model": false,
      "sizes": [
        "1024x1024",
        "512x1024",
        "1024x512",
        "832x1216",
        "1216x832",
        "704x1280",
        "1280x704",
        "768x1024",
        "1024x768"
      ]
    },
    {
      "name": "Stability: Zuki Cute Illustrious v6.0",
      "description": "Zuki Cute Illustrious v6.0 is a text-to-image model that generates anime-style images.",
      "id": "sdxl-zukicuteill-v60",
      "object": "model",
      "created": 1776283310,
      "owned_by": "stabilityai",
      "tokens": 1000,
      "pricing": {
        "type": "per_image",
        "coefficient": 0.044
      },
      "endpoints": [
        "/v1/images/generations"
      ],
      "premium_model": false,
      "sizes": [
        "1024x1024",
        "512x1024",
        "1024x512",
        "832x1216",
        "1216x832",
        "704x1280",
        "1280x704",
        "768x1024",
        "1024x768"
      ]
    },
    {
      "name": "Stability: Zuki Cute Mix v1.0",
      "description": "Zuki Cute Mix v1.0 is a text-to-image model that generates anime-style images.",
      "id": "sdxl-zukicutemix-v10",
      "object": "model",
      "created": 1776283310,
      "owned_by": "stabilityai",
      "tokens": 1000,
      "pricing": {
        "type": "per_image",
        "coefficient": 0.044
      },
      "endpoints": [
        "/v1/images/generations"
      ],
      "premium_model": false,
      "sizes": [
        "1024x1024",
        "512x1024",
        "1024x512",
        "832x1216",
        "1216x832",
        "704x1280",
        "1280x704",
        "768x1024",
        "1024x768"
      ]
    },
    {
      "name": "Stability: Zuki Kawaii Mix v1.0",
      "description": "Zuki Kawaii Mix v1.0 is a text-to-image model that generates anime-style images.",
      "id": "sdxl-zukikawaimix-v10",
      "object": "model",
      "created": 1776283310,
      "owned_by": "stabilityai",
      "tokens": 1000,
      "pricing": {
        "type": "per_image",
        "coefficient": 0.044
      },
      "endpoints": [
        "/v1/images/generations"
      ],
      "premium_model": false,
      "sizes": [
        "1024x1024",
        "512x1024",
        "1024x512",
        "832x1216",
        "1216x832",
        "704x1280",
        "1280x704",
        "768x1024",
        "1024x768"
      ]
    },
    {
      "name": "Stability: Zuki Soft Illustrious v1.0",
      "description": "Zuki Soft Illustrious v1.0 is a text-to-image model that generates anime-style images.",
      "id": "sdxl-zukisoftill-v10",
      "object": "model",
      "created": 1776283310,
      "owned_by": "stabilityai",
      "tokens": 1000,
      "pricing": {
        "type": "per_image",
        "coefficient": 0.044
      },
      "endpoints": [
        "/v1/images/generations"
      ],
      "premium_model": false,
      "sizes": [
        "1024x1024",
        "512x1024",
        "1024x512",
        "832x1216",
        "1216x832",
        "704x1280",
        "1280x704",
        "768x1024",
        "1024x768"
      ]
    },
    {
      "name": "Stability: TastyRice Magic on Paper (anime Guofeng Animation)",
      "description": "TastyRice Magic on Paper (anime Guofeng Animation) is a text-to-image model that generates anime-style images.",
      "id": "sdxl-tastyrice-magic-on-paper-anime-guofeng-animation",
      "object": "model",
      "created": 1776283310,
      "owned_by": "stabilityai",
      "tokens": 1000,
      "pricing": {
        "type": "per_image",
        "coefficient": 0.044
      },
      "endpoints": [
        "/v1/images/generations"
      ],
      "premium_model": false,
      "sizes": [
        "1024x1024",
        "512x1024",
        "1024x512",
        "832x1216",
        "1216x832",
        "704x1280",
        "1280x704",
        "768x1024",
        "1024x768"
      ]
    },
    {
      "name": "Stability: TastyRice Lingyun Caijing (Zhencai World)",
      "description": "TastyRice Lingyun Caijing (Zhencai World) is a text-to-image model that generates anime-style images.",
      "id": "sdxl-tastyrice-lingyun-caijing-zhencai-world",
      "object": "model",
      "created": 1776283310,
      "owned_by": "stabilityai",
      "tokens": 1000,
      "pricing": {
        "type": "per_image",
        "coefficient": 0.044
      },
      "endpoints": [
        "/v1/images/generations"
      ],
      "premium_model": false,
      "sizes": [
        "1024x1024",
        "512x1024",
        "1024x512",
        "832x1216",
        "1216x832",
        "704x1280",
        "1280x704",
        "768x1024",
        "1024x768"
      ]
    },
    {
      "name": "Playground: Playground v3",
      "description": "Latest image model from Playground, with industry leading capabilities in understanding complex prompts to generate realistic images, logos, typography, and more.",
      "id": "playground-v3",
      "object": "model",
      "created": 1776283310,
      "owned_by": "playgroundai",
      "tokens": 1000,
      "pricing": {
        "type": "per_image",
        "coefficient": 0.3
      },
      "endpoints": [
        "/v1/images/generations"
      ],
      "premium_model": false,
      "sizes": [
        "1024x1024",
        "1024x1536",
        "1536x1024",
        "1024x576",
        "576x1024"
      ]
    },
    {
      "name": "Playground: Playground v2.5",
      "description": "Playground v2.5 is the state-of-the-art open-source model in aesthetic quality",
      "id": "playground-v2.5",
      "object": "model",
      "created": 1776283310,
      "owned_by": "playgroundai",
      "tokens": 1000,
      "pricing": {
        "type": "per_image",
        "coefficient": 0.2
      },
      "endpoints": [
        "/v1/images/generations"
      ],
      "premium_model": false,
      "sizes": [
        "1024x1024",
        "1024x1536",
        "1536x1024",
        "1024x576",
        "576x1024"
      ]
    },
    {
      "name": "Fal: Animagine XL 3.1",
      "description": "Anime-themed text-to-image stable diffusion model.",
      "id": "animagine-xl-3.1",
      "object": "model",
      "created": 1776283310,
      "owned_by": "falai",
      "tokens": 1000,
      "pricing": {
        "type": "per_image",
        "coefficient": 0.044
      },
      "endpoints": [
        "/v1/images/generations"
      ],
      "premium_model": false,
      "sizes": [
        "1024x1024",
        "1024x1536",
        "1536x1024",
        "1024x576",
        "576x1024"
      ]
    },
    {
      "name": "Fal: Animagine XL 4.0",
      "description": "Anime-themed text-to-image stable diffusion model.",
      "id": "animagine-xl-4.0",
      "object": "model",
      "created": 1776283310,
      "owned_by": "falai",
      "tokens": 1000,
      "pricing": {
        "type": "per_image",
        "coefficient": 0.044
      },
      "endpoints": [
        "/v1/images/generations"
      ],
      "premium_model": false,
      "sizes": [
        "1024x1024",
        "1024x1536",
        "1536x1024",
        "1024x576",
        "576x1024"
      ]
    },
    {
      "name": "Fal: NoobAI XL",
      "description": "NoobAI XL is a text-to-image model that is based on the SDXL architecture and trained primarily for anime image generation. It uses the complete Danbooru and e621 datasets and should be able to complete generation tasks excellently with as few lora models as possible.",
      "id": "noobai-xl",
      "object": "model",
      "created": 1776283310,
      "owned_by": "falai",
      "tokens": 1000,
      "pricing": {
        "type": "per_image",
        "coefficient": 0.044
      },
      "endpoints": [
        "/v1/images/generations"
      ],
      "premium_model": false,
      "sizes": [
        "1024x1024",
        "1024x1536",
        "1536x1024",
        "1024x576",
        "576x1024"
      ]
    },
    {
      "name": "Fal: RealVisXL 5.0",
      "description": "Generate photo-realistic pictures with RealVisXL 5.0.",
      "id": "realvisxl-5.0",
      "object": "model",
      "created": 1776283310,
      "owned_by": "falai",
      "tokens": 1000,
      "pricing": {
        "type": "per_image",
        "coefficient": 0.044
      },
      "endpoints": [
        "/v1/images/generations"
      ],
      "premium_model": false,
      "sizes": [
        "1024x1024",
        "1024x1536",
        "1536x1024",
        "1024x576",
        "576x1024"
      ]
    },
    {
      "name": "NovelAI: NAI Diffusion Furry V3",
      "description": "A text-to-image model that generates furry-style images. It is trained on a large dataset of furry images and should be able to complete generation tasks excellently with as few lora models as possible.",
      "id": "nai-diffusion-furry-3",
      "object": "model",
      "created": 1776283310,
      "owned_by": "novelai",
      "tokens": 1000,
      "pricing": {
        "type": "per_image",
        "coefficient": 0.044
      },
      "endpoints": [
        "/v1/images/generations"
      ],
      "premium_model": false,
      "sizes": [
        "1024x1024",
        "512x1024",
        "1024x512",
        "832x1216",
        "1216x832",
        "704x1280",
        "1280x704",
        "768x1024",
        "1024x768"
      ]
    },
    {
      "name": "NovelAI: NAI Diffusion Anime V3",
      "description": "A text-to-image model that generates anime-style images. It is trained on a large dataset of anime images and should be able to complete generation tasks excellently with as few lora models as possible.",
      "id": "nai-diffusion-3",
      "object": "model",
      "created": 1776283310,
      "owned_by": "novelai",
      "tokens": 1000,
      "pricing": {
        "type": "per_image",
        "coefficient": 0.044
      },
      "endpoints": [
        "/v1/images/generations"
      ],
      "premium_model": false,
      "sizes": [
        "1024x1024",
        "512x1024",
        "1024x512",
        "832x1216",
        "1216x832",
        "704x1280",
        "1280x704",
        "768x1024",
        "1024x768"
      ]
    },
    {
      "name": "NovelAI: NAI Diffusion V4 Full",
      "description": "A text-to-image model that generates images. It is trained on a large dataset of images and should be able to complete generation tasks excellently with as few lora models as possible.",
      "id": "nai-diffusion-4-full",
      "object": "model",
      "created": 1776283310,
      "owned_by": "novelai",
      "tokens": 1000,
      "pricing": {
        "type": "per_image",
        "coefficient": 0.044
      },
      "endpoints": [
        "/v1/images/generations"
      ],
      "premium_model": false,
      "sizes": [
        "1024x1024",
        "512x1024",
        "1024x512",
        "832x1216",
        "1216x832",
        "704x1280",
        "1280x704",
        "768x1024",
        "1024x768"
      ]
    },
    {
      "name": "NovelAI: NAI Diffusion V4 Curated",
      "description": "A text-to-image model that generates images. It is trained on a large dataset of images and should be able to complete generation tasks excellently with as few lora models as possible.",
      "id": "nai-diffusion-4-curated-preview",
      "object": "model",
      "created": 1776283310,
      "owned_by": "novelai",
      "tokens": 1000,
      "pricing": {
        "type": "per_image",
        "coefficient": 0.044
      },
      "endpoints": [
        "/v1/images/generations"
      ],
      "premium_model": false,
      "sizes": [
        "1024x1024",
        "512x1024",
        "1024x512",
        "832x1216",
        "1216x832",
        "704x1280",
        "1280x704",
        "768x1024",
        "1024x768"
      ]
    },
    {
      "name": "NovelAI: NAI Diffusion V4.5 Full",
      "description": "A text-to-image model that generates images. It is trained on a large dataset of images and should be able to complete generation tasks excellently with as few lora models as possible.",
      "id": "nai-diffusion-4-5-full",
      "object": "model",
      "created": 1776283310,
      "owned_by": "novelai",
      "tokens": 1000,
      "pricing": {
        "type": "per_image",
        "coefficient": 0.044
      },
      "endpoints": [
        "/v1/images/generations"
      ],
      "premium_model": false,
      "sizes": [
        "1024x1024",
        "512x1024",
        "1024x512",
        "832x1216",
        "1216x832",
        "704x1280",
        "1280x704",
        "768x1024",
        "1024x768"
      ]
    },
    {
      "name": "NovelAI: NAI Diffusion V4.5 Curated",
      "description": "A text-to-image model that generates images. It is trained on a large dataset of images and should be able to complete generation tasks excellently with as few lora models as possible.",
      "id": "nai-diffusion-4-5-curated",
      "object": "model",
      "created": 1776283310,
      "owned_by": "novelai",
      "tokens": 1000,
      "pricing": {
        "type": "per_image",
        "coefficient": 0.044
      },
      "endpoints": [
        "/v1/images/generations"
      ],
      "premium_model": false,
      "sizes": [
        "1024x1024",
        "512x1024",
        "1024x512",
        "832x1216",
        "1216x832",
        "704x1280",
        "1280x704",
        "768x1024",
        "1024x768"
      ]
    },
    {
      "name": "Fal: Fooocus",
      "description": "Fooocus is at the forefront of AI-driven image generation, offering a suite of tools designed to cater to both novice users and advanced creators.",
      "id": "fooocus",
      "object": "model",
      "created": 1776283310,
      "owned_by": "falai",
      "tokens": 1000,
      "pricing": {
        "type": "per_image",
        "coefficient": 0.1
      },
      "endpoints": [
        "/v1/images/generations"
      ],
      "premium_model": false,
      "sizes": [
        "1024x1024",
        "1024x1536",
        "1536x1024",
        "1024x576",
        "576x1024"
      ]
    },
    {
      "name": "Fal: Kolors",
      "description": "Kolors is a photorealistic text-to-image model.",
      "id": "kolors",
      "object": "model",
      "created": 1776283310,
      "owned_by": "falai",
      "tokens": 1000,
      "pricing": {
        "type": "per_image",
        "coefficient": 0.1
      },
      "endpoints": [
        "/v1/images/generations"
      ],
      "premium_model": false,
      "sizes": [
        "1024x1024",
        "1024x1536",
        "1536x1024",
        "1024x576",
        "576x1024"
      ]
    },
    {
      "name": "Luma: Photon",
      "description": "High-quality image generation model optimized for creative professional workflows and ultra-high fidelity outputs.",
      "id": "luma-photon",
      "object": "model",
      "created": 1776283310,
      "owned_by": "luma",
      "tokens": 1000,
      "pricing": {
        "type": "per_image",
        "coefficient": 0.06
      },
      "endpoints": [
        "/v1/images/generations"
      ],
      "premium_model": true,
      "sizes": [
        "1024x1024",
        "1024x1536",
        "1536x1024",
        "1024x576",
        "576x1024"
      ]
    },
    {
      "name": "Luma: Photon Flash",
      "description": "Accelerated variant of Photon prioritizing speed while maintaining quality.",
      "id": "luma-photon-flash",
      "object": "model",
      "created": 1776283310,
      "owned_by": "luma",
      "tokens": 1000,
      "pricing": {
        "type": "per_image",
        "coefficient": 0.03
      },
      "endpoints": [
        "/v1/images/generations"
      ],
      "premium_model": true,
      "sizes": [
        "1024x1024",
        "1024x1536",
        "1536x1024",
        "1024x576",
        "576x1024"
      ]
    },
    {
      "name": "HiDream: HiDream I1 Fast",
      "description": "HiDream-I1 Fast is a new open-source image generative foundation model with 17B parameters that achieves state-of-the-art image generation quality within seconds.",
      "id": "hidream-i1-fast",
      "object": "model",
      "created": 1776283310,
      "owned_by": "hidream-ai",
      "tokens": 1000,
      "pricing": {
        "type": "per_image",
        "coefficient": 0.3
      },
      "endpoints": [
        "/v1/images/generations"
      ],
      "premium_model": false,
      "sizes": [
        "1024x1024",
        "1024x1536",
        "1536x1024",
        "1024x576",
        "576x1024"
      ]
    },
    {
      "name": "HiDream: HiDream I1 Dev",
      "description": "HiDream-I1 Dev is a new open-source image generative foundation model with 17B parameters that achieves state-of-the-art image generation quality within seconds.",
      "id": "hidream-i1-dev",
      "object": "model",
      "created": 1776283310,
      "owned_by": "hidream-ai",
      "tokens": 1000,
      "pricing": {
        "type": "per_image",
        "coefficient": 0.5
      },
      "endpoints": [
        "/v1/images/generations"
      ],
      "premium_model": true,
      "sizes": [
        "1024x1024",
        "1024x1536",
        "1536x1024",
        "1024x576",
        "576x1024"
      ]
    },
    {
      "name": "HiDream: HiDream I1 Full",
      "description": "HiDream-I1 Full is a new open-source image generative foundation model with 17B parameters that achieves state-of-the-art image generation quality within seconds.",
      "id": "hidream-i1-full",
      "object": "model",
      "created": 1776283310,
      "owned_by": "hidream-ai",
      "tokens": 1000,
      "pricing": {
        "type": "per_image",
        "coefficient": 0.8
      },
      "endpoints": [
        "/v1/images/generations"
      ],
      "premium_model": true,
      "sizes": [
        "1024x1024",
        "1024x1536",
        "1536x1024",
        "1024x576",
        "576x1024"
      ]
    },
    {
      "name": "BlackForestLabs: FLUX.1 [schnell]",
      "description": "The fastest image generation model tailored for local development and personal use.",
      "id": "flux-schnell",
      "object": "model",
      "created": 1776283310,
      "owned_by": "blackforestlabs",
      "tokens": 32000,
      "pricing": {
        "type": "per_image",
        "coefficient": 0.005
      },
      "endpoints": [
        "/v1/images/generations"
      ],
      "premium_model": false,
      "sizes": [
        "1024x1024",
        "1024x1536",
        "1536x1024",
        "1024x576",
        "576x1024"
      ]
    },
    {
      "name": "BlackForestLabs: FLUX.1 [dev]",
      "description": "A 12 billion parameter rectified flow transformer capable of generating images from text descriptions.",
      "id": "flux-dev",
      "object": "model",
      "created": 1776283310,
      "owned_by": "blackforestlabs",
      "tokens": 32000,
      "pricing": {
        "type": "per_image",
        "coefficient": 0.3
      },
      "endpoints": [
        "/v1/images/generations"
      ],
      "premium_model": false,
      "sizes": [
        "1024x1024",
        "1024x1536",
        "1536x1024",
        "1024x576",
        "576x1024"
      ]
    },
    {
      "name": "BlackForestLabs: FLUX.1 [pro]",
      "description": "State-of-the-art image generation with top of the line prompt following, visual quality, image detail and output diversity.",
      "id": "flux-pro",
      "object": "model",
      "created": 1776283310,
      "owned_by": "blackforestlabs",
      "tokens": 32000,
      "pricing": {
        "type": "per_image",
        "coefficient": 0.5
      },
      "endpoints": [
        "/v1/images/generations"
      ],
      "premium_model": false,
      "sizes": [
        "1024x1024",
        "1024x1536",
        "1536x1024",
        "1024x576",
        "576x1024"
      ]
    },
    {
      "name": "BlackForestLabs: Flux1.1 [pro]",
      "description": "Faster, better FLUX Pro. Text-to-image model with excellent image quality, prompt adherence, and output diversity.",
      "id": "flux-1.1-pro",
      "object": "model",
      "created": 1776283310,
      "owned_by": "blackforestlabs",
      "tokens": 32000,
      "pricing": {
        "type": "per_image",
        "coefficient": 0.7
      },
      "endpoints": [
        "/v1/images/generations"
      ],
      "premium_model": false,
      "sizes": [
        "1024x1024",
        "1024x1536",
        "1536x1024",
        "1024x576",
        "576x1024"
      ]
    },
    {
      "name": "BlackForestLabs: FLUX1.1 [pro] ultra",
      "description": "FLUX1.1 [pro] in ultra mode. Images are up to 4 megapixels.",
      "id": "flux-1.1-pro-ultra",
      "object": "model",
      "created": 1776283310,
      "owned_by": "blackforestlabs",
      "tokens": 32000,
      "pricing": {
        "type": "per_image",
        "coefficient": 0.8
      },
      "endpoints": [
        "/v1/images/generations"
      ],
      "premium_model": true,
      "sizes": [
        "1024x1024",
        "1024x1536",
        "1536x1024",
        "1024x576",
        "576x1024"
      ]
    },
    {
      "name": "BlackForestLabs: FLUX1.1 [pro] ultra raw",
      "description": "FLUX1.1 [pro] in ultra and raw modes. Images are up to 4 megapixels. Use raw mode for realism.",
      "id": "flux-1.1-pro-ultra-raw",
      "object": "model",
      "created": 1776283310,
      "owned_by": "blackforestlabs",
      "tokens": 32000,
      "pricing": {
        "type": "per_image",
        "coefficient": 1
      },
      "endpoints": [
        "/v1/images/generations"
      ],
      "premium_model": true,
      "sizes": [
        "1024x1024",
        "1024x1536",
        "1536x1024",
        "1024x576",
        "576x1024"
      ]
    },
    {
      "name": "BlackForestLabs: FLUX.2 [klein] 4B",
      "description": "FLUX.2 [klein] is an ultra-fast, distilled image model. It unifies image generation and editing in a single model, delivering state-of-the-art quality enabling interactive workflows, real-time previews, and latency-critical applications.",
      "id": "flux-2-klein-4b",
      "object": "model",
      "created": 1776283310,
      "owned_by": "blackforestlabs",
      "tokens": 32000,
      "pricing": {
        "type": "per_image",
        "coefficient": 0.3
      },
      "endpoints": [
        "/v1/images/generations"
      ],
      "premium_model": true,
      "sizes": [
        "1024x1024",
        "1024x1536",
        "1536x1024",
        "1024x576",
        "576x1024"
      ]
    },
    {
      "name": "BlackForestLabs: FLUX.2 [klein] 9B",
      "description": "FLUX.2 [klein] 9B is an ultra-fast, distilled image model with enhanced quality. It unifies image generation and editing in a single model, delivering state-of-the-art quality enabling interactive workflows, real-time previews, and latency-critical applications.",
      "id": "flux-2-klein-9b",
      "object": "model",
      "created": 1776283310,
      "owned_by": "blackforestlabs",
      "tokens": 32000,
      "pricing": {
        "type": "per_image",
        "coefficient": 0.5
      },
      "endpoints": [
        "/v1/images/generations"
      ],
      "premium_model": true,
      "sizes": [
        "1024x1024",
        "1024x1536",
        "1536x1024",
        "1024x576",
        "576x1024"
      ]
    },
    {
      "name": "BlackForestLabs: FLUX.2 [dev]",
      "description": "FLUX.2 [dev] is an image model from Black Forest Labs where you can generate highly realistic and detailed images, with multi-reference support.",
      "id": "flux-2-dev",
      "object": "model",
      "created": 1776283310,
      "owned_by": "blackforestlabs",
      "tokens": 32000,
      "pricing": {
        "type": "per_image",
        "coefficient": 0.7
      },
      "endpoints": [
        "/v1/images/generations"
      ],
      "premium_model": true,
      "sizes": [
        "1024x1024",
        "1024x1536",
        "1536x1024",
        "1024x576",
        "576x1024"
      ]
    },
    {
      "name": "BlackForestLabs: FLUX.1 Krea [dev]",
      "description": "An opinionated text-to-image model from Black Forest Labs in collaboration with Krea that excels in photorealism. Creates images that avoid the oversaturated \"AI look\".",
      "id": "flux-1-krea-dev",
      "object": "model",
      "created": 1776283310,
      "owned_by": "blackforestlabs",
      "tokens": 32000,
      "pricing": {
        "type": "per_image",
        "coefficient": 0.6
      },
      "endpoints": [
        "/v1/images/generations"
      ],
      "premium_model": false,
      "sizes": [
        "1024x1024",
        "1024x1536",
        "1536x1024",
        "1024x576",
        "576x1024"
      ]
    },
    {
      "name": "BlackForestLabs: FLUX.1 Kontext [dev]",
      "description": "FLUX.1 Kontext [dev] is a premium model with maximum performance and improved typography generation. It is a state-of-the-art image editing model from Black Forest Labs that allows you to edit images using text prompts. It’s the best in class for text-guided image editing and offers superior results compared to other models like OpenAI’s 4o/gpt-image-1.",
      "id": "flux-1-kontext-dev",
      "object": "model",
      "created": 1776283310,
      "owned_by": "blackforestlabs",
      "tokens": 32000,
      "pricing": {
        "type": "per_image",
        "coefficient": 0.8
      },
      "endpoints": [
        "/v1/images/generations",
        "/v1/images/edits"
      ],
      "premium_model": false,
      "sizes": [
        "1024x1024",
        "1568x672",
        "1504x688",
        "1456x720",
        "1392x752",
        "1328x800",
        "1248x832",
        "1184x880",
        "1104x944",
        "944x1104",
        "880x1184",
        "832x1248",
        "800x1328",
        "752x1392",
        "720x1456",
        "688x1504",
        "672x1568"
      ]
    },
    {
      "name": "BlackForestLabs: FLUX.1 Kontext [pro]",
      "description": "FLUX.1 Kontext [pro] has been optimized for text-guided image editing, providing exceptional performance with high-quality outputs, great prompt following, and consistent results. It is a state-of-the-art image editing model from Black Forest Labs that allows you to edit images using text prompts. It’s the best in class for text-guided image editing and offers superior results compared to other models like OpenAI’s 4o/gpt-image-1.",
      "id": "flux-1-kontext-pro",
      "object": "model",
      "created": 1776283310,
      "owned_by": "blackforestlabs",
      "tokens": 32000,
      "pricing": {
        "type": "per_image",
        "coefficient": 1
      },
      "endpoints": [
        "/v1/images/generations",
        "/v1/images/edits"
      ],
      "premium_model": true,
      "sizes": [
        "1024x1024",
        "1568x672",
        "1392x752",
        "1184x880",
        "1248x832",
        "832x1248",
        "880x1184",
        "752x1392",
        "672x1568"
      ]
    },
    {
      "name": "BlackForestLabs: FLUX.1 Kontext [max]",
      "description": "FLUX.1 Kontext [max] is a premium model with maximum performance and improved typography generation. It is a state-of-the-art image editing model from Black Forest Labs that allows you to edit images using text prompts. It’s the best in class for text-guided image editing and offers superior results compared to other models like OpenAI’s 4o/gpt-image-1.",
      "id": "flux-1-kontext-max",
      "object": "model",
      "created": 1776283310,
      "owned_by": "blackforestlabs",
      "tokens": 32000,
      "pricing": {
        "type": "per_image",
        "coefficient": 1.2
      },
      "endpoints": [
        "/v1/images/generations",
        "/v1/images/edits"
      ],
      "premium_model": true,
      "sizes": [
        "1024x1024",
        "1568x672",
        "1392x752",
        "1184x880",
        "1248x832",
        "832x1248",
        "880x1184",
        "752x1392",
        "672x1568"
      ]
    },
    {
      "name": "XLabs: Flux Realism",
      "description": "FLUX.1-dev with XLabs-AI’s realism lora.",
      "id": "flux-realism",
      "object": "model",
      "created": 1776283310,
      "owned_by": "xlabsai",
      "tokens": 600,
      "pricing": {
        "type": "per_image",
        "coefficient": 0.3
      },
      "endpoints": [
        "/v1/images/generations"
      ],
      "premium_model": false,
      "sizes": [
        "1024x1024",
        "1024x1536",
        "1536x1024",
        "1024x576",
        "576x1024"
      ]
    },
    {
      "name": "Replicate: Flux Half Illustration",
      "description": "Flux lora, use \"in the style of TOK\" to trigger generation, creates half photo half illustrated elements.",
      "id": "flux-half-illustration",
      "object": "model",
      "created": 1776283310,
      "owned_by": "replicate",
      "tokens": 600,
      "pricing": {
        "type": "per_image",
        "coefficient": 0.3
      },
      "endpoints": [
        "/v1/images/generations"
      ],
      "premium_model": false,
      "sizes": [
        "1024x1024",
        "1024x1536",
        "1536x1024",
        "1024x576",
        "576x1024"
      ]
    },
    {
      "name": "Replicate: Flux Black Light",
      "description": "A flux lora fine-tuned on black light images.",
      "id": "flux-black-light",
      "object": "model",
      "created": 1776283310,
      "owned_by": "replicate",
      "tokens": 600,
      "pricing": {
        "type": "per_image",
        "coefficient": 0.3
      },
      "endpoints": [
        "/v1/images/generations"
      ],
      "premium_model": false,
      "sizes": [
        "1024x1024",
        "1024x1536",
        "1536x1024",
        "1024x576",
        "576x1024"
      ]
    },
    {
      "name": "Replicate: Flux Minecraft Movie",
      "description": "Flux lora, use \"MNCRFTMOV\" to trigger image generation.",
      "id": "flux-minecraft-movie",
      "object": "model",
      "created": 1776283310,
      "owned_by": "replicate",
      "tokens": 600,
      "pricing": {
        "type": "per_image",
        "coefficient": 0.3
      },
      "endpoints": [
        "/v1/images/generations"
      ],
      "premium_model": false,
      "sizes": [
        "1024x1024",
        "1024x1536",
        "1536x1024",
        "1024x576",
        "576x1024"
      ]
    },
    {
      "name": "Replicate: Flux Dreamscape",
      "description": "Flux lora, use \"BSstyle004\" to trigger image generation.",
      "id": "flux-dreamscape",
      "object": "model",
      "created": 1776283310,
      "owned_by": "replicate",
      "tokens": 600,
      "pricing": {
        "type": "per_image",
        "coefficient": 0.3
      },
      "endpoints": [
        "/v1/images/generations"
      ],
      "premium_model": false,
      "sizes": [
        "1024x1024",
        "1024x1536",
        "1536x1024",
        "1024x576",
        "576x1024"
      ]
    },
    {
      "name": "Replicate: Flux Lineart",
      "description": "Flux finetuned for black and white line art.",
      "id": "flux-lineart",
      "object": "model",
      "created": 1776283310,
      "owned_by": "replicate",
      "tokens": 600,
      "pricing": {
        "type": "per_image",
        "coefficient": 0.3
      },
      "endpoints": [
        "/v1/images/generations"
      ],
      "premium_model": false,
      "sizes": [
        "1024x1024",
        "1024x1536",
        "1536x1024",
        "1024x576",
        "576x1024"
      ]
    },
    {
      "name": "Replicate: NSFW Master Flux",
      "description": "NSFW Master Flux is a model that can generate NSFW images.",
      "id": "nsfw-master-flux",
      "object": "model",
      "created": 1776283310,
      "owned_by": "replicate",
      "tokens": 600,
      "pricing": {
        "type": "per_image",
        "coefficient": 0.3
      },
      "endpoints": [
        "/v1/images/generations"
      ],
      "premium_model": false,
      "sizes": [
        "1024x1024",
        "1024x1536",
        "1536x1024",
        "1024x576",
        "576x1024"
      ]
    },
    {
      "name": "Replicate: Phlux - Photorealism with style",
      "description": "This is a Flux LoRA that can generate photorealistic images with incredible texture and lighting.",
      "id": "phlux",
      "object": "model",
      "created": 1776283310,
      "owned_by": "replicate",
      "tokens": 600,
      "pricing": {
        "type": "per_image",
        "coefficient": 0.3
      },
      "endpoints": [
        "/v1/images/generations"
      ],
      "premium_model": false,
      "sizes": [
        "1024x1024",
        "1024x1536",
        "1536x1024",
        "1024x576",
        "576x1024"
      ]
    },
    {
      "name": "Alibaba: Wan 2.2 Image",
      "description": "Wan 2.2 is powerful image generator model. Wan is an original, raw, un-distilled model which leads to advantages over distilled image models.",
      "id": "wan-2.2-image",
      "object": "model",
      "created": 1776283310,
      "owned_by": "alibaba",
      "tokens": 1000,
      "pricing": {
        "type": "per_image",
        "coefficient": 0.1
      },
      "endpoints": [
        "/v1/images/generations"
      ],
      "premium_model": true,
      "sizes": [
        "1024x1024",
        "864x1152",
        "1152x864",
        "1280x720",
        "720x1280",
        "832x1248",
        "1248x832",
        "1512x648"
      ]
    },
    {
      "name": "Alibaba: Qwen Image",
      "description": "An image generation foundation model in the Qwen series that achieves significant advances in complex text rendering.",
      "id": "qwen-image",
      "object": "model",
      "created": 1776283310,
      "owned_by": "alibaba",
      "tokens": 1000,
      "pricing": {
        "type": "per_image",
        "coefficient": 0.1
      },
      "endpoints": [
        "/v1/images/generations"
      ],
      "premium_model": true,
      "sizes": [
        "1024x1024",
        "864x1152",
        "1152x864",
        "1280x720",
        "720x1280",
        "832x1248",
        "1248x832",
        "1512x648"
      ]
    },
    {
      "name": "Alibaba: Z-Image Turbo",
      "description": "Z-Image Turbo is a super fast text-to-image model of 6B parameters developed by Tongyi-MAI.",
      "id": "z-image-turbo",
      "object": "model",
      "created": 1776283310,
      "owned_by": "alibaba",
      "tokens": 1000,
      "pricing": {
        "type": "per_image",
        "coefficient": 0.01
      },
      "endpoints": [
        "/v1/images/generations"
      ],
      "premium_model": false,
      "sizes": [
        "1024x1024",
        "1024x1536",
        "1536x1024",
        "1024x576",
        "576x1024"
      ]
    },
    {
      "name": "Fal: OmniGen",
      "description": "OmniGen is a unified image generation model that can generate a wide range of images from multi-modal prompts. It is designed to be simple, flexible, and easy to use.",
      "id": "omni-gen",
      "object": "model",
      "created": 1776283310,
      "owned_by": "falai",
      "tokens": 1000,
      "pricing": {
        "type": "per_image",
        "coefficient": 0.2
      },
      "endpoints": [
        "/v1/images/generations"
      ],
      "premium_model": true,
      "sizes": [
        "1024x1024",
        "1024x1536",
        "1536x1024",
        "1024x576",
        "576x1024"
      ]
    },
    {
      "name": "NVIDIA: Sana",
      "description": "Sana is a text-to-image framework developed by NVIDIA that can efficiently generate images up to 4096 × 4096 resolution. It can synthesize high-resolution, high-quality images with strong text-image alignment at a fast speed.",
      "id": "sana",
      "object": "model",
      "created": 1776283310,
      "owned_by": "nvidia",
      "tokens": 1000,
      "pricing": {
        "type": "per_image",
        "coefficient": 0.005
      },
      "endpoints": [
        "/v1/images/generations"
      ],
      "premium_model": false,
      "sizes": [
        "1024x1024",
        "1024x1536",
        "1536x1024",
        "1024x576",
        "576x1024"
      ]
    },
    {
      "name": "Leonardo: Lucid Origin",
      "description": "Lucid Origin from Leonardo.AI is their most adaptable and prompt-responsive model to date. Whether you're generating images with sharp graphic design, stunning full-HD renders, or highly specific creative direction, it adheres closely to your prompts, renders text with accuracy, and supports a wide array of visual styles and aesthetics – from stylized concept art to crisp product mockups.",
      "id": "lucid-origin",
      "object": "model",
      "created": 1776283310,
      "owned_by": "leonardoai",
      "tokens": 1000,
      "pricing": {
        "type": "per_image",
        "coefficient": 0.02
      },
      "endpoints": [
        "/v1/images/generations"
      ],
      "premium_model": false,
      "sizes": [
        "1024x1024",
        "384x1024",
        "1024x384",
        "448x1024",
        "1024x448",
        "512x1024",
        "1024x512",
        "640x1024",
        "1024x640",
        "768x1024",
        "1024x768",
        "704x1024",
        "1024x704",
        "576x1024",
        "1024x576",
        "832x1024",
        "1024x832",
        "896x1024",
        "1024x896"
      ]
    },
    {
      "name": "Leonardo: Phoenix 1.0",
      "description": "Phoenix 1.0 is a model by Leonardo.Ai that generates images with exceptional prompt adherence and coherent text.",
      "id": "phoenix-1.0",
      "object": "model",
      "created": 1776283310,
      "owned_by": "leonardoai",
      "tokens": 1000,
      "pricing": {
        "type": "per_image",
        "coefficient": 0.012
      },
      "endpoints": [
        "/v1/images/generations"
      ],
      "premium_model": false,
      "sizes": [
        "1024x1024",
        "384x1024",
        "1024x384",
        "448x1024",
        "1024x448",
        "512x1024",
        "1024x512",
        "640x1024",
        "1024x640",
        "768x1024",
        "1024x768",
        "704x1024",
        "1024x704",
        "576x1024",
        "1024x576",
        "832x1024",
        "1024x832",
        "896x1024",
        "1024x896"
      ]
    },
    {
      "name": "OpenAI: DALL-E 2",
      "description": "DALL-E 2 is an AI system built by OpenAI that allows users to create realistic images from natural language text prompts.",
      "id": "dall-e-2",
      "object": "model",
      "created": 1776283310,
      "owned_by": "openai",
      "tokens": 200,
      "pricing": {
        "type": "per_image",
        "coefficient": 0.05
      },
      "endpoints": [
        "/v1/images/generations"
      ],
      "premium_model": false,
      "sizes": [
        "1024x1024",
        "1024x1792",
        "1792x1024"
      ]
    },
    {
      "name": "OpenAI: DALL-E 3",
      "description": "OpenAI's most powerful image generation model. Generates high quality images with intricate details based on the user's most recent prompt.",
      "id": "dall-e-3",
      "object": "model",
      "created": 1776283310,
      "owned_by": "openai",
      "tokens": 200,
      "pricing": {
        "type": "per_image",
        "coefficient": 0.1
      },
      "endpoints": [
        "/v1/images/generations"
      ],
      "premium_model": false,
      "sizes": [
        "1024x1024",
        "1024x1792",
        "1792x1024"
      ],
      "qualities": [
        "standard",
        "hd"
      ]
    },
    {
      "name": "OpenAI: GPT Image 1 Mini",
      "description": "GPT Image 1 Mini is a cost-efficient version of GPT Image 1. It is a natively multimodal language model that accepts both text and image inputs, and produces image outputs.",
      "id": "gpt-image-1-mini",
      "object": "model",
      "created": 1776283310,
      "owned_by": "openai",
      "tokens": 32000,
      "pricing": {
        "type": "per_image",
        "coefficient": 0.3
      },
      "endpoints": [
        "/v1/images/generations"
      ],
      "premium_model": true,
      "sizes": [
        "1024x1024",
        "1024x1536",
        "1536x1024"
      ],
      "qualities": [
        "low",
        "medium",
        "high"
      ]
    },
    {
      "name": "OpenAI: GPT Image 1",
      "description": "GPT Image 1 is a new state-of-the-art image generation model. It is a natively multimodal language model that accepts both text and image inputs, and produces image outputs.",
      "id": "gpt-image-1",
      "object": "model",
      "created": 1776283310,
      "owned_by": "openai",
      "tokens": 32000,
      "pricing": {
        "type": "per_image",
        "coefficient": 0.3
      },
      "endpoints": [
        "/v1/images/generations"
      ],
      "premium_model": true,
      "sizes": [
        "1024x1024",
        "1024x1536",
        "1536x1024"
      ],
      "qualities": [
        "low",
        "medium",
        "high"
      ]
    },
    {
      "name": "OpenAI: TTS-1",
      "description": "TTS is a model that converts text to natural sounding spoken text. The tts-1 model is optimized for realtime text-to-speech use cases.",
      "id": "tts-1",
      "object": "model",
      "created": 1776283310,
      "owned_by": "openai",
      "tokens": 4096,
      "pricing": {
        "type": "per_million_tokens",
        "input": 15
      },
      "endpoints": [
        "/v1/audio/speech"
      ],
      "premium_model": false,
      "voices": [
        "alloy",
        "ash",
        "ballad",
        "coral",
        "echo",
        "fable",
        "onyx",
        "nova",
        "sage",
        "shimmer",
        "verse"
      ],
      "parameters": {
        "input": {
          "required": true,
          "type": "string",
          "min_length": 1,
          "max_length": 4096,
          "description": "The text to generate audio for. The maximum length is 4096 characters."
        },
        "voice": {
          "required": true,
          "type": "string",
          "description": "The voice to use when generating the audio"
        },
        "response_format": {
          "required": false,
          "type": "string",
          "default": "mp3",
          "description": "The format to audio in. Supported formats are mp3, opus, aac, flac, wav, and pcm."
        },
        "speed": {
          "required": false,
          "type": "number",
          "minimum": 0.25,
          "maximum": 4.0,
          "default": 1.0,
          "description": "The speed of the generated audio. Select a value from 0.25 to 4.0. 1.0 is the default."
        }
      }
    },
    {
      "name": "OpenAI: TTS-1 HD",
      "description": "TTS is a model that converts text to natural sounding spoken text. The tts-1-hd model is optimized for high quality text-to-speech use cases.",
      "id": "tts-1-hd",
      "object": "model",
      "created": 1776283310,
      "owned_by": "openai",
      "tokens": 4096,
      "pricing": {
        "type": "per_million_tokens",
        "input": 30
      },
      "endpoints": [
        "/v1/audio/speech"
      ],
      "premium_model": false,
      "voices": [
        "alloy",
        "ash",
        "ballad",
        "coral",
        "echo",
        "fable",
        "onyx",
        "nova",
        "sage",
        "shimmer",
        "verse"
      ],
      "parameters": {
        "input": {
          "required": true,
          "type": "string",
          "min_length": 1,
          "max_length": 4096,
          "description": "The text to generate audio for. The maximum length is 4096 characters."
        },
        "voice": {
          "required": true,
          "type": "string",
          "description": "The voice to use when generating the audio"
        },
        "response_format": {
          "required": false,
          "type": "string",
          "default": "mp3",
          "description": "The format to audio in. Supported formats are mp3, opus, aac, flac, wav, and pcm."
        },
        "speed": {
          "required": false,
          "type": "number",
          "minimum": 0.25,
          "maximum": 4.0,
          "default": 1.0,
          "description": "The speed of the generated audio. Select a value from 0.25 to 4.0. 1.0 is the default."
        }
      }
    },
    {
      "name": "OpenAI: GPT-4o Mini TTS",
      "description": "GPT-4o mini TTS is a text-to-speech model built on GPT-4o mini, a fast and powerful language model. Use it to convert text to natural sounding spoken text.",
      "id": "gpt-4o-mini-tts",
      "object": "model",
      "created": 1776283310,
      "owned_by": "openai",
      "tokens": 2000,
      "pricing": {
        "type": "per_million_tokens",
        "input": 10
      },
      "endpoints": [
        "/v1/audio/speech"
      ],
      "premium_model": false,
      "voices": [
        "alloy",
        "ash",
        "ballad",
        "coral",
        "echo",
        "fable",
        "onyx",
        "nova",
        "sage",
        "shimmer",
        "verse"
      ],
      "parameters": {
        "input": {
          "required": true,
          "type": "string",
          "min_length": 1,
          "max_length": 4096,
          "description": "The text to generate audio for. The maximum length is 4096 characters."
        },
        "voice": {
          "required": true,
          "type": "string",
          "description": "The voice to use when generating the audio"
        },
        "instructions": {
          "required": false,
          "type": "string",
          "description": "Control the voice of your generated audio with additional instructions"
        },
        "response_format": {
          "required": false,
          "type": "string",
          "default": "mp3",
          "description": "The format to audio in. Supported formats are mp3, opus, aac, flac, wav, and pcm."
        }
      }
    },
    {
      "name": "Google: Gemini 2.5 Flash TTS Preview",
      "description": "Google's Gemini 2.5 Flash model optimized for text-to-speech generation. Supports single-speaker synthesis with 30 high-quality voice options for natural-sounding speech output.",
      "id": "gemini-2.5-flash-preview-tts",
      "object": "model",
      "created": 1776283310,
      "owned_by": "google",
      "tokens": 8000,
      "pricing": {
        "type": "per_million_tokens",
        "input": 10
      },
      "endpoints": [
        "/v1/audio/speech"
      ],
      "premium_model": false,
      "voices": [
        "Zephyr",
        "Puck",
        "Charon",
        "Kore",
        "Fenrir",
        "Leda",
        "Orus",
        "Aoede",
        "Callirrhoe",
        "Autonoe",
        "Enceladus",
        "Iapetus",
        "Umbriel",
        "Algieba",
        "Despina",
        "Erinome",
        "Algenib",
        "Rasalgethi",
        "Laomedeia",
        "Achernar",
        "Alnilam",
        "Schedar",
        "Gacrux",
        "Pulcherrima",
        "Achird",
        "Zubenelgenubi",
        "Vindemiatrix",
        "Sadachbia",
        "Sadaltager",
        "Sulafat"
      ],
      "parameters": {
        "input": {
          "required": true,
          "type": "string",
          "min_length": 1,
          "max_length": 8000,
          "description": "Text to convert to speech",
          "original_field": "text"
        },
        "voice": {
          "required": false,
          "type": "string",
          "enum": [
            "Zephyr",
            "Puck",
            "Charon",
            "Kore",
            "Fenrir",
            "Leda",
            "Orus",
            "Aoede",
            "Callirrhoe",
            "Autonoe",
            "Enceladus",
            "Iapetus",
            "Umbriel",
            "Algieba",
            "Despina",
            "Erinome",
            "Algenib",
            "Rasalgethi",
            "Laomedeia",
            "Achernar",
            "Alnilam",
            "Schedar",
            "Gacrux",
            "Pulcherrima",
            "Achird",
            "Zubenelgenubi",
            "Vindemiatrix",
            "Sadachbia",
            "Sadaltager",
            "Sulafat"
          ],
          "default": "Kore",
          "description": "Voice name for speech synthesis"
        }
      }
    },
    {
      "name": "Google: Gemini 2.5 Pro TTS Preview",
      "description": "Google's Gemini 2.5 Pro model optimized for text-to-speech generation. Supports single-speaker synthesis with 30 high-quality voice options for natural-sounding speech output with enhanced quality and capabilities.",
      "id": "gemini-2.5-pro-preview-tts",
      "object": "model",
      "created": 1776283310,
      "owned_by": "google",
      "tokens": 8000,
      "pricing": {
        "type": "per_million_tokens",
        "input": 20
      },
      "endpoints": [
        "/v1/audio/speech"
      ],
      "premium_model": true,
      "voices": [
        "Zephyr",
        "Puck",
        "Charon",
        "Kore",
        "Fenrir",
        "Leda",
        "Orus",
        "Aoede",
        "Callirrhoe",
        "Autonoe",
        "Enceladus",
        "Iapetus",
        "Umbriel",
        "Algieba",
        "Despina",
        "Erinome",
        "Algenib",
        "Rasalgethi",
        "Laomedeia",
        "Achernar",
        "Alnilam",
        "Schedar",
        "Gacrux",
        "Pulcherrima",
        "Achird",
        "Zubenelgenubi",
        "Vindemiatrix",
        "Sadachbia",
        "Sadaltager",
        "Sulafat"
      ],
      "parameters": {
        "input": {
          "required": true,
          "type": "string",
          "min_length": 1,
          "max_length": 8000,
          "description": "Text to convert to speech",
          "original_field": "text"
        },
        "voice": {
          "required": false,
          "type": "string",
          "enum": [
            "Zephyr",
            "Puck",
            "Charon",
            "Kore",
            "Fenrir",
            "Leda",
            "Orus",
            "Aoede",
            "Callirrhoe",
            "Autonoe",
            "Enceladus",
            "Iapetus",
            "Umbriel",
            "Algieba",
            "Despina",
            "Erinome",
            "Algenib",
            "Rasalgethi",
            "Laomedeia",
            "Achernar",
            "Alnilam",
            "Schedar",
            "Gacrux",
            "Pulcherrima",
            "Achird",
            "Zubenelgenubi",
            "Vindemiatrix",
            "Sadachbia",
            "Sadaltager",
            "Sulafat"
          ],
          "default": "Kore",
          "description": "Voice name for speech synthesis"
        }
      }
    },
    {
      "name": "MyShell: MeloTTS",
      "description": "MeloTTS is a high-quality multi-lingual text-to-speech library by MyShell.ai.",
      "id": "melotts",
      "object": "model",
      "created": 1776283310,
      "owned_by": "myshell",
      "tokens": 1000,
      "pricing": {
        "type": "per_million_tokens",
        "input": 20
      },
      "endpoints": [
        "/v1/audio/speech"
      ],
      "premium_model": false,
      "voices": [
        "en",
        "fr",
        "es",
        "de",
        "it",
        "pt",
        "ru",
        "zh",
        "ja",
        "ko"
      ],
      "parameters": {
        "input": {
          "required": true,
          "type": "string",
          "min_length": 1,
          "description": "A text description of the audio you want to generate",
          "original_field": "prompt"
        },
        "voice": {
          "required": false,
          "type": "string",
          "default": "en",
          "description": "The speech language (e.g., 'en' for English, 'fr' for French). Defaults to 'en' if not specified",
          "original_field": "lang"
        }
      }
    },
    {
      "name": "Microsoft: Cognitive Speech TTS",
      "description": "Microsoft Azure Text-to-Speech service that converts text to natural sounding speech. Supports multiple languages, voices, and emotional styles with high-quality neural voices.",
      "id": "microsoft-tts",
      "object": "model",
      "created": 1776283310,
      "owned_by": "microsoft",
      "tokens": 5000,
      "pricing": {
        "type": "per_million_tokens",
        "input": 10
      },
      "endpoints": [
        "/v1/audio/speech"
      ],
      "premium_model": false,
      "voices": [
        "af-ZA-AdriNeural",
        "af-ZA-WillemNeural",
        "am-ET-AmehaNeural",
        "am-ET-MekdesNeural",
        "ar-AE-FatimaNeural",
        "ar-AE-HamdanNeural",
        "ar-BH-AliNeural",
        "ar-BH-LailaNeural",
        "ar-DZ-AminaNeural",
        "ar-DZ-IsmaelNeural",
        "ar-EG-SalmaNeural",
        "ar-EG-ShakirNeural",
        "ar-IQ-BasselNeural",
        "ar-IQ-RanaNeural",
        "ar-JO-SanaNeural",
        "ar-JO-TaimNeural",
        "ar-KW-FahedNeural",
        "ar-KW-NouraNeural",
        "ar-LB-LaylaNeural",
        "ar-LB-RamiNeural",
        "ar-LY-ImanNeural",
        "ar-LY-OmarNeural",
        "ar-MA-JamalNeural",
        "ar-MA-MounaNeural",
        "ar-OM-AbdullahNeural",
        "ar-OM-AyshaNeural",
        "ar-QA-AmalNeural",
        "ar-QA-MoazNeural",
        "ar-SA-HamedNeural",
        "ar-SA-ZariyahNeural",
        "ar-SY-AmanyNeural",
        "ar-SY-LaithNeural",
        "ar-TN-HediNeural",
        "ar-TN-ReemNeural",
        "ar-YE-MaryamNeural",
        "ar-YE-SalehNeural",
        "as-IN-PriyomNeural",
        "as-IN-YashicaNeural",
        "az-AZ-BabekNeural",
        "az-AZ-BanuNeural",
        "bg-BG-BorislavNeural",
        "bg-BG-KalinaNeural",
        "bn-BD-NabanitaNeural",
        "bn-BD-PradeepNeural",
        "bn-IN-BashkarNeural",
        "bn-IN-TanishaaNeural",
        "bs-BA-GoranNeural",
        "bs-BA-VesnaNeural",
        "ca-ES-AlbaNeural",
        "ca-ES-EnricNeural",
        "ca-ES-JoanaNeural",
        "cs-CZ-AntoninNeural",
        "cs-CZ-VlastaNeural",
        "cy-GB-AledNeural",
        "cy-GB-NiaNeural",
        "da-DK-ChristelNeural",
        "da-DK-JeppeNeural",
        "de-AT-IngridNeural",
        "de-AT-JonasNeural",
        "de-CH-JanNeural",
        "de-CH-LeniNeural",
        "de-DE-AmalaNeural",
        "de-DE-BerndNeural",
        "de-DE-ChristophNeural",
        "de-DE-ConradNeural",
        "de-DE-ElkeNeural",
        "de-DE-FlorianMultilingualNeural",
        "de-DE-GiselaNeural",
        "de-DE-KasperNeural",
        "de-DE-KatjaNeural",
        "de-DE-KillianNeural",
        "de-DE-KlarissaNeural",
        "de-DE-KlausNeural",
        "de-DE-LouisaNeural",
        "de-DE-MajaNeural",
        "de-DE-RalfNeural",
        "de-DE-SeraphinaMultilingualNeural",
        "de-DE-TanjaNeural",
        "el-GR-AthinaNeural",
        "el-GR-NestorasNeural",
        "en-AU-AnnetteNeural",
        "en-AU-CarlyNeural",
        "en-AU-DarrenNeural",
        "en-AU-DuncanNeural",
        "en-AU-ElsieNeural",
        "en-AU-FreyaNeural",
        "en-AU-JoanneNeural",
        "en-AU-KenNeural",
        "en-AU-KimNeural",
        "en-AU-NatashaNeural",
        "en-AU-NeilNeural",
        "en-AU-TimNeural",
        "en-AU-TinaNeural",
        "en-AU-WilliamMultilingualNeural",
        "en-AU-WilliamNeural",
        "en-CA-ClaraNeural",
        "en-CA-LiamNeural",
        "en-GB-AbbiNeural",
        "en-GB-AdaMultilingualNeural",
        "en-GB-AlfieNeural",
        "en-GB-BellaNeural",
        "en-GB-ElliotNeural",
        "en-GB-EthanNeural",
        "en-GB-HollieNeural",
        "en-GB-LibbyNeural",
        "en-GB-MaisieNeural",
        "en-GB-MiaNeural",
        "en-GB-NoahNeural",
        "en-GB-OliverNeural",
        "en-GB-OliviaNeural",
        "en-GB-OllieMultilingualNeural",
        "en-GB-RyanNeural",
        "en-GB-SoniaNeural",
        "en-GB-ThomasNeural",
        "en-HK-SamNeural",
        "en-HK-YanNeural",
        "en-IE-ConnorNeural",
        "en-IE-EmilyNeural",
        "en-IN-AaravNeural",
        "en-IN-AartiIndicNeural",
        "en-IN-AartiNeural",
        "en-IN-AashiNeural",
        "en-IN-AnanyaNeural",
        "en-IN-ArjunIndicNeural",
        "en-IN-ArjunNeural",
        "en-IN-KavyaNeural",
        "en-IN-KunalNeural",
        "en-IN-NeerjaIndicNeural",
        "en-IN-NeerjaNeural",
        "en-IN-PrabhatIndicNeural",
        "en-IN-PrabhatNeural",
        "en-IN-RehaanNeural",
        "en-KE-AsiliaNeural",
        "en-KE-ChilembaNeural",
        "en-NG-AbeoNeural",
        "en-NG-EzinneNeural",
        "en-NZ-MitchellNeural",
        "en-NZ-MollyNeural",
        "en-PH-JamesNeural",
        "en-PH-RosaNeural",
        "en-SG-LunaNeural",
        "en-SG-WayneNeural",
        "en-TZ-ElimuNeural",
        "en-TZ-ImaniNeural",
        "en-US-AIGenerate1Neural",
        "en-US-AIGenerate2Neural",
        "en-US-AdamMultilingualNeural",
        "en-US-AlloyTurboMultilingualNeural",
        "en-US-AmandaMultilingualNeural",
        "en-US-AmberNeural",
        "en-US-AnaNeural",
        "en-US-AndrewMultilingualNeural",
        "en-US-AndrewNeural",
        "en-US-AriaNeural",
        "en-US-AshTurboMultilingualNeural",
        "en-US-AshleyNeural",
        "en-US-AvaMultilingualNeural",
        "en-US-AvaNeural",
        "en-US-BlueNeural",
        "en-US-BrandonMultilingualNeural",
        "en-US-BrandonNeural",
        "en-US-BrianMultilingualNeural",
        "en-US-BrianNeural",
        "en-US-ChristopherMultilingualNeural",
        "en-US-ChristopherNeural",
        "en-US-CoraMultilingualNeural",
        "en-US-CoraNeural",
        "en-US-DavisMultilingualNeural",
        "en-US-DavisNeural",
        "en-US-DerekMultilingualNeural",
        "en-US-DustinMultilingualNeural",
        "en-US-EchoTurboMultilingualNeural",
        "en-US-ElizabethNeural",
        "en-US-EmmaMultilingualNeural",
        "en-US-EmmaNeural",
        "en-US-EricNeural",
        "en-US-EvelynMultilingualNeural",
        "en-US-FableTurboMultilingualNeural",
        "en-US-GuyNeural",
        "en-US-JacobNeural",
        "en-US-JaneNeural",
        "en-US-JasonNeural",
        "en-US-JennyMultilingualNeural",
        "en-US-JennyNeural",
        "en-US-KaiNeural",
        "en-US-LewisMultilingualNeural",
        "en-US-LolaMultilingualNeural",
        "en-US-LunaNeural",
        "en-US-MichelleNeural",
        "en-US-MonicaNeural",
        "en-US-NancyMultilingualNeural",
        "en-US-NancyNeural",
        "en-US-NovaTurboMultilingualNeural",
        "en-US-OnyxTurboMultilingualNeural",
        "en-US-PhoebeMultilingualNeural",
        "en-US-RogerNeural",
        "en-US-RyanMultilingualNeural",
        "en-US-SamuelMultilingualNeural",
        "en-US-SaraNeural",
        "en-US-SerenaMultilingualNeural",
        "en-US-ShimmerTurboMultilingualNeural",
        "en-US-SteffanMultilingualNeural",
        "en-US-SteffanNeural",
        "en-US-TonyNeural",
        "en-ZA-LeahNeural",
        "en-ZA-LukeNeural",
        "es-AR-ElenaNeural",
        "es-AR-TomasNeural",
        "es-BO-MarceloNeural",
        "es-BO-SofiaNeural",
        "es-CL-CatalinaNeural",
        "es-CL-LorenzoNeural",
        "es-CO-GonzaloNeural",
        "es-CO-SalomeNeural",
        "es-CR-JuanNeural",
        "es-CR-MariaNeural",
        "es-CU-BelkysNeural",
        "es-CU-ManuelNeural",
        "es-DO-EmilioNeural",
        "es-DO-RamonaNeural",
        "es-EC-AndreaNeural",
        "es-EC-LuisNeural",
        "es-ES-AbrilNeural",
        "es-ES-AlvaroNeural",
        "es-ES-ArabellaMultilingualNeural",
        "es-ES-ArnauNeural",
        "es-ES-DarioNeural",
        "es-ES-EliasNeural",
        "es-ES-ElviraNeural",
        "es-ES-EstrellaNeural",
        "es-ES-IreneNeural",
        "es-ES-IsidoraMultilingualNeural",
        "es-ES-LaiaNeural",
        "es-ES-LiaNeural",
        "es-ES-NilNeural",
        "es-ES-SaulNeural",
        "es-ES-TeoNeural",
        "es-ES-TrianaNeural",
        "es-ES-TristanMultilingualNeural",
        "es-ES-VeraNeural",
        "es-ES-XimenaMultilingualNeural",
        "es-ES-XimenaNeural",
        "es-GQ-JavierNeural",
        "es-GQ-TeresaNeural",
        "es-GT-AndresNeural",
        "es-GT-MartaNeural",
        "es-HN-CarlosNeural",
        "es-HN-KarlaNeural",
        "es-MX-BeatrizNeural",
        "es-MX-CandelaNeural",
        "es-MX-CarlotaNeural",
        "es-MX-CecilioNeural",
        "es-MX-DaliaMultilingualNeural",
        "es-MX-DaliaNeural",
        "es-MX-GerardoNeural",
        "es-MX-JorgeMultilingualNeural",
        "es-MX-JorgeNeural",
        "es-MX-LarissaNeural",
        "es-MX-LibertoNeural",
        "es-MX-LucianoNeural",
        "es-MX-MarinaNeural",
        "es-MX-NuriaNeural",
        "es-MX-PelayoNeural",
        "es-MX-RenataNeural",
        "es-MX-YagoNeural",
        "es-NI-FedericoNeural",
        "es-NI-YolandaNeural",
        "es-PA-MargaritaNeural",
        "es-PA-RobertoNeural",
        "es-PE-AlexNeural",
        "es-PE-CamilaNeural",
        "es-PR-KarinaNeural",
        "es-PR-VictorNeural",
        "es-PY-MarioNeural",
        "es-PY-TaniaNeural",
        "es-SV-LorenaNeural",
        "es-SV-RodrigoNeural",
        "es-US-AlonsoNeural",
        "es-US-PalomaNeural",
        "es-UY-MateoNeural",
        "es-UY-ValentinaNeural",
        "es-VE-PaolaNeural",
        "es-VE-SebastianNeural",
        "et-EE-AnuNeural",
        "et-EE-KertNeural",
        "eu-ES-AinhoaNeural",
        "eu-ES-AnderNeural",
        "fa-IR-DilaraNeural",
        "fa-IR-FaridNeural",
        "fi-FI-HarriNeural",
        "fi-FI-NooraNeural",
        "fi-FI-SelmaNeural",
        "fil-PH-AngeloNeural",
        "fil-PH-BlessicaNeural",
        "fr-BE-CharlineNeural",
        "fr-BE-GerardNeural",
        "fr-CA-AntoineNeural",
        "fr-CA-JeanNeural",
        "fr-CA-SylvieNeural",
        "fr-CA-ThierryNeural",
        "fr-CH-ArianeNeural",
        "fr-CH-FabriceNeural",
        "fr-FR-AlainNeural",
        "fr-FR-BrigitteNeural",
        "fr-FR-CelesteNeural",
        "fr-FR-ClaudeNeural",
        "fr-FR-CoralieNeural",
        "fr-FR-DeniseNeural",
        "fr-FR-EloiseNeural",
        "fr-FR-HenriNeural",
        "fr-FR-JacquelineNeural",
        "fr-FR-JeromeNeural",
        "fr-FR-JosephineNeural",
        "fr-FR-LucienMultilingualNeural",
        "fr-FR-MauriceNeural",
        "fr-FR-RemyMultilingualNeural",
        "fr-FR-VivienneMultilingualNeural",
        "fr-FR-YvesNeural",
        "fr-FR-YvetteNeural",
        "ga-IE-ColmNeural",
        "ga-IE-OrlaNeural",
        "gl-ES-RoiNeural",
        "gl-ES-SabelaNeural",
        "gu-IN-DhwaniNeural",
        "gu-IN-NiranjanNeural",
        "he-IL-AvriNeural",
        "he-IL-HilaNeural",
        "hi-IN-AaravNeural",
        "hi-IN-AartiNeural",
        "hi-IN-AnanyaNeural",
        "hi-IN-ArjunNeural",
        "hi-IN-KavyaNeural",
        "hi-IN-KunalNeural",
        "hi-IN-MadhurNeural",
        "hi-IN-RehaanNeural",
        "hi-IN-SwaraNeural",
        "hr-HR-GabrijelaNeural",
        "hr-HR-SreckoNeural",
        "hu-HU-NoemiNeural",
        "hu-HU-TamasNeural",
        "hy-AM-AnahitNeural",
        "hy-AM-HaykNeural",
        "id-ID-ArdiNeural",
        "id-ID-GadisNeural",
        "is-IS-GudrunNeural",
        "is-IS-GunnarNeural",
        "it-IT-AlessioMultilingualNeural",
        "it-IT-BenignoNeural",
        "it-IT-CalimeroNeural",
        "it-IT-CataldoNeural",
        "it-IT-DiegoNeural",
        "it-IT-ElsaNeural",
        "it-IT-FabiolaNeural",
        "it-IT-FiammaNeural",
        "it-IT-GianniNeural",
        "it-IT-GiuseppeMultilingualNeural",
        "it-IT-GiuseppeNeural",
        "it-IT-ImeldaNeural",
        "it-IT-IrmaNeural",
        "it-IT-IsabellaMultilingualNeural",
        "it-IT-IsabellaNeural",
        "it-IT-LisandroNeural",
        "it-IT-MarcelloMultilingualNeural",
        "it-IT-PalmiraNeural",
        "it-IT-PierinaNeural",
        "it-IT-RinaldoNeural",
        "iu-Cans-CA-SiqiniqNeural",
        "iu-Cans-CA-TaqqiqNeural",
        "iu-Latn-CA-SiqiniqNeural",
        "iu-Latn-CA-TaqqiqNeural",
        "ja-JP-AoiNeural",
        "ja-JP-DaichiNeural",
        "ja-JP-KeitaNeural",
        "ja-JP-MasaruMultilingualNeural",
        "ja-JP-MayuNeural",
        "ja-JP-NanamiNeural",
        "ja-JP-NaokiNeural",
        "ja-JP-ShioriNeural",
        "jv-ID-DimasNeural",
        "jv-ID-SitiNeural",
        "ka-GE-EkaNeural",
        "ka-GE-GiorgiNeural",
        "kk-KZ-AigulNeural",
        "kk-KZ-DauletNeural",
        "km-KH-PisethNeural",
        "km-KH-SreymomNeural",
        "kn-IN-GaganNeural",
        "kn-IN-SapnaNeural",
        "ko-KR-BongJinNeural",
        "ko-KR-GookMinNeural",
        "ko-KR-HyunsuMultilingualNeural",
        "ko-KR-HyunsuNeural",
        "ko-KR-InJoonNeural",
        "ko-KR-JiMinNeural",
        "ko-KR-SeoHyeonNeural",
        "ko-KR-SoonBokNeural",
        "ko-KR-SunHiNeural",
        "ko-KR-YuJinNeural",
        "lo-LA-ChanthavongNeural",
        "lo-LA-KeomanyNeural",
        "lt-LT-LeonasNeural",
        "lt-LT-OnaNeural",
        "lv-LV-EveritaNeural",
        "lv-LV-NilsNeural",
        "mk-MK-AleksandarNeural",
        "mk-MK-MarijaNeural",
        "ml-IN-MidhunNeural",
        "ml-IN-SobhanaNeural",
        "mn-MN-BataaNeural",
        "mn-MN-YesuiNeural",
        "mr-IN-AarohiNeural",
        "mr-IN-ManoharNeural",
        "ms-MY-OsmanNeural",
        "ms-MY-YasminNeural",
        "mt-MT-GraceNeural",
        "mt-MT-JosephNeural",
        "my-MM-NilarNeural",
        "my-MM-ThihaNeural",
        "nb-NO-FinnNeural",
        "nb-NO-IselinNeural",
        "nb-NO-PernilleNeural",
        "ne-NP-HemkalaNeural",
        "ne-NP-SagarNeural",
        "nl-BE-ArnaudNeural",
        "nl-BE-DenaNeural",
        "nl-NL-ColetteNeural",
        "nl-NL-FennaNeural",
        "nl-NL-MaartenNeural",
        "or-IN-SubhasiniNeural",
        "or-IN-SukantNeural",
        "pa-IN-OjasNeural",
        "pa-IN-VaaniNeural",
        "pl-PL-AgnieszkaNeural",
        "pl-PL-MarekNeural",
        "pl-PL-ZofiaNeural",
        "ps-AF-GulNawazNeural",
        "ps-AF-LatifaNeural",
        "pt-BR-AntonioNeural",
        "pt-BR-BrendaNeural",
        "pt-BR-DonatoNeural",
        "pt-BR-ElzaNeural",
        "pt-BR-FabioNeural",
        "pt-BR-FranciscaNeural",
        "pt-BR-GiovannaNeural",
        "pt-BR-HumbertoNeural",
        "pt-BR-JulioNeural",
        "pt-BR-LeilaNeural",
        "pt-BR-LeticiaNeural",
        "pt-BR-MacerioMultilingualNeural",
        "pt-BR-ManuelaNeural",
        "pt-BR-NicolauNeural",
        "pt-BR-ThalitaMultilingualNeural",
        "pt-BR-ThalitaNeural",
        "pt-BR-ValerioNeural",
        "pt-BR-YaraNeural",
        "pt-PT-DuarteNeural",
        "pt-PT-FernandaNeural",
        "pt-PT-RaquelNeural",
        "ro-RO-AlinaNeural",
        "ro-RO-EmilNeural",
        "ru-RU-DariyaNeural",
        "ru-RU-DmitryNeural",
        "ru-RU-SvetlanaNeural",
        "si-LK-SameeraNeural",
        "si-LK-ThiliniNeural",
        "sk-SK-LukasNeural",
        "sk-SK-ViktoriaNeural",
        "sl-SI-PetraNeural",
        "sl-SI-RokNeural",
        "so-SO-MuuseNeural",
        "so-SO-UbaxNeural",
        "sq-AL-AnilaNeural",
        "sq-AL-IlirNeural",
        "sr-Latn-RS-NicholasNeural",
        "sr-Latn-RS-SophieNeural",
        "sr-RS-NicholasNeural",
        "sr-RS-SophieNeural",
        "su-ID-JajangNeural",
        "su-ID-TutiNeural",
        "sv-SE-HilleviNeural",
        "sv-SE-MattiasNeural",
        "sv-SE-SofieNeural",
        "sw-KE-RafikiNeural",
        "sw-KE-ZuriNeural",
        "sw-TZ-DaudiNeural",
        "sw-TZ-RehemaNeural",
        "ta-IN-PallaviNeural",
        "ta-IN-ValluvarNeural",
        "ta-LK-KumarNeural",
        "ta-LK-SaranyaNeural",
        "ta-MY-KaniNeural",
        "ta-MY-SuryaNeural",
        "ta-SG-AnbuNeural",
        "ta-SG-VenbaNeural",
        "te-IN-MohanNeural",
        "te-IN-ShrutiNeural",
        "th-TH-AcharaNeural",
        "th-TH-NiwatNeural",
        "th-TH-PremwadeeNeural",
        "tr-TR-AhmetNeural",
        "tr-TR-EmelNeural",
        "uk-UA-OstapNeural",
        "uk-UA-PolinaNeural",
        "ur-IN-GulNeural",
        "ur-IN-SalmanNeural",
        "ur-PK-AsadNeural",
        "ur-PK-UzmaNeural",
        "uz-UZ-MadinaNeural",
        "uz-UZ-SardorNeural",
        "vi-VN-HoaiMyNeural",
        "vi-VN-NamMinhNeural",
        "wuu-CN-XiaotongNeural",
        "wuu-CN-YunzheNeural",
        "yue-CN-XiaoMinNeural",
        "yue-CN-YunSongNeural",
        "zh-CN-XiaochenMultilingualNeural",
        "zh-CN-XiaochenNeural",
        "zh-CN-XiaohanNeural",
        "zh-CN-XiaomengNeural",
        "zh-CN-XiaomoNeural",
        "zh-CN-XiaoqiuNeural",
        "zh-CN-XiaorouNeural",
        "zh-CN-XiaoruiNeural",
        "zh-CN-XiaoshuangNeural",
        "zh-CN-XiaoxiaoDialectsNeural",
        "zh-CN-XiaoxiaoMultilingualNeural",
        "zh-CN-XiaoxiaoNeural",
        "zh-CN-XiaoyanNeural",
        "zh-CN-XiaoyiNeural",
        "zh-CN-XiaoyouNeural",
        "zh-CN-XiaoyuMultilingualNeural",
        "zh-CN-XiaozhenNeural",
        "zh-CN-YunfanMultilingualNeural",
        "zh-CN-YunfengNeural",
        "zh-CN-YunhaoNeural",
        "zh-CN-YunjianNeural",
        "zh-CN-YunjieNeural",
        "zh-CN-YunxiNeural",
        "zh-CN-YunxiaNeural",
        "zh-CN-YunxiaoMultilingualNeural",
        "zh-CN-YunyangNeural",
        "zh-CN-YunyeNeural",
        "zh-CN-YunyiMultilingualNeural",
        "zh-CN-YunzeNeural",
        "zh-CN-guangxi-YunqiNeural",
        "zh-CN-henan-YundengNeural",
        "zh-CN-liaoning-XiaobeiNeural",
        "zh-CN-liaoning-YunbiaoNeural",
        "zh-CN-shaanxi-XiaoniNeural",
        "zh-CN-shandong-YunxiangNeural",
        "zh-CN-sichuan-YunxiNeural",
        "zh-HK-HiuGaaiNeural",
        "zh-HK-HiuMaanNeural",
        "zh-HK-WanLungNeural",
        "zh-TW-HsiaoChenNeural",
        "zh-TW-HsiaoYuNeural",
        "zh-TW-YunJheNeural",
        "zu-ZA-ThandoNeural",
        "zu-ZA-ThembaNeural"
      ],
      "parameters": {
        "input": {
          "required": true,
          "type": "string",
          "min_length": 1,
          "max_length": 5000,
          "description": "Text to convert to speech",
          "original_field": "text"
        },
        "voice": {
          "required": false,
          "type": "string",
          "default": "zh-CN-XiaoxiaoNeural",
          "description": "Voice name using short_name format (e.g., zh-CN-XiaoxiaoNeural)",
          "original_field": "voice_name"
        },
        "speech_rate": {
          "required": false,
          "type": "integer",
          "minimum": -100,
          "maximum": 100,
          "default": 0,
          "description": "Speech rate adjustment, range: -100 to 100"
        },
        "pitch_adjustment": {
          "required": false,
          "type": "integer",
          "minimum": -100,
          "maximum": 100,
          "default": 0,
          "description": "Pitch adjustment, range: -100 to 100"
        },
        "emotional_style": {
          "required": false,
          "type": "string",
          "description": "Emotional style (depends on voice's style_list, e.g., 'cheerful', 'sad', 'angry')"
        }
      }
    },
    {
      "name": "whisper-large-v3",
      "description": "",
      "id": "whisper-large-v3",
      "object": "model",
      "created": 1776283310,
      "owned_by": "openai",
      "tokens": null,
      "pricing": {
        "type": "per_request",
        "coefficient": 0.006
      },
      "endpoints": [
        "/v1/audio/transcriptions",
        "/v1/audio/translations"
      ],
      "premium_model": false
    },
    {
      "name": "distil-large-v3",
      "description": "",
      "id": "distil-large-v3",
      "object": "model",
      "created": 1776283310,
      "owned_by": "distil-whisper",
      "tokens": null,
      "pricing": {
        "type": "per_request",
        "coefficient": 0.006
      },
      "endpoints": [
        "/v1/audio/transcriptions",
        "/v1/audio/translations"
      ],
      "premium_model": false
    },
    {
      "name": "OpenAI: Text Embedding Ada 002",
      "description": "Text-embedding-ada-002 is a powerful model by OpenAI designed for generating high-dimensional text embeddings. These embeddings are numerical representations of text data, enabling a variety of natural language processing (NLP) tasks such as semantic search, clustering, and text classification.",
      "id": "text-embedding-ada-002",
      "object": "model",
      "created": 1776283310,
      "owned_by": "openai",
      "tokens": 8000,
      "pricing": {
        "type": "per_million_tokens",
        "input": 0.1
      },
      "endpoints": [
        "/v1/embeddings"
      ],
      "premium_model": false
    },
    {
      "name": "OpenAI: Text Embedding 3 Large",
      "description": "Text-embedding-3-large is a robust language model by OpenAI designed for generating high-dimensional text embeddings for a wide range of natural language processing (NLP) tasks including semantic search, text clustering, and classification.",
      "id": "text-embedding-3-large",
      "object": "model",
      "created": 1776283310,
      "owned_by": "openai",
      "tokens": 8000,
      "pricing": {
        "type": "per_million_tokens",
        "input": 0.13
      },
      "endpoints": [
        "/v1/embeddings"
      ],
      "premium_model": false
    },
    {
      "name": "OpenAI: Text Embedding 3 Small",
      "description": "Text-embedding-3-small is a compact and efficient model developed for generating high-quality text embeddings. These embeddings are numerical representations of text data, enabling a variety of natural language processing (NLP) tasks such as semantic search, clustering, and text classification",
      "id": "text-embedding-3-small",
      "object": "model",
      "created": 1776283310,
      "owned_by": "openai",
      "tokens": 8000,
      "pricing": {
        "type": "per_million_tokens",
        "input": 0.02
      },
      "endpoints": [
        "/v1/embeddings"
      ],
      "premium_model": false
    },
    {
      "name": "Google: Gemini Embedding Exp 03 07",
      "description": "Gemini-embedding-exp-03-07 is an advanced model by Google designed for generating high-dimensional text embeddings. These embeddings are numerical representations of text data, enabling a variety of natural language processing (NLP) tasks such as semantic search, clustering, and text classification.",
      "id": "gemini-embedding-exp-03-07",
      "object": "model",
      "created": 1776283310,
      "owned_by": "google",
      "tokens": 8000,
      "pricing": {
        "type": "per_million_tokens",
        "input": 0.15
      },
      "endpoints": [
        "/v1/embeddings"
      ],
      "premium_model": false
    },
    {
      "name": "Google: Gemini Embedding 001",
      "description": "Gemini-embedding-001 is a state-of-the-art model by Google designed for generating high-dimensional text embeddings. These embeddings are numerical representations of text data, enabling a variety of natural language processing (NLP) tasks such as semantic search, clustering, and text classification.",
      "id": "gemini-embedding-001",
      "object": "model",
      "created": 1776283310,
      "owned_by": "google",
      "tokens": 2000,
      "pricing": {
        "type": "per_million_tokens",
        "input": 0.15
      },
      "endpoints": [
        "/v1/embeddings"
      ],
      "premium_model": false
    },
    {
      "name": "Google: Text Embedding 004",
      "description": "Text-embedding-004 is a state-of-the-art model by Google designed for generating high-dimensional text embeddings. These embeddings are numerical representations of text data, enabling a variety of natural language processing (NLP) tasks such as semantic search, clustering, and text classification.",
      "id": "text-embedding-004",
      "object": "model",
      "created": 1776283310,
      "owned_by": "google",
      "tokens": 2000,
      "pricing": {
        "type": "per_million_tokens",
        "input": 0.025
      },
      "endpoints": [
        "/v1/embeddings"
      ],
      "premium_model": false
    },
    {
      "name": "Google: Embedding 001",
      "description": "Embedding-001 is a foundational model by Google designed for generating high-dimensional text embeddings. These embeddings are numerical representations of text data, enabling a variety of natural language processing (NLP) tasks such as semantic search, clustering, and text classification.",
      "id": "embedding-001",
      "object": "model",
      "created": 1776283310,
      "owned_by": "google",
      "tokens": 2000,
      "pricing": {
        "type": "per_million_tokens",
        "input": 0.025
      },
      "endpoints": [
        "/v1/embeddings"
      ],
      "premium_model": false
    },
    {
      "name": "Mistral: Mistral Embed",
      "description": "Official mistral-embed-2312 Mistral AI model",
      "id": "mistral-embed",
      "object": "model",
      "created": 1776283310,
      "owned_by": "mistralai",
      "tokens": 8192,
      "pricing": {
        "type": "per_million_tokens",
        "input": 0.1
      },
      "endpoints": [
        "/v1/embeddings"
      ],
      "premium_model": false
    },
    {
      "name": "Mistral: Mistral Embed 2312",
      "description": "Official mistral-embed-2312 Mistral AI model",
      "id": "mistral-embed-2312",
      "object": "model",
      "created": 1776283310,
      "owned_by": "mistralai",
      "tokens": 8192,
      "pricing": {
        "type": "per_million_tokens",
        "input": 0.1
      },
      "endpoints": [
        "/v1/embeddings"
      ],
      "premium_model": false
    },
    {
      "name": "Mistral: Codestral Embed 2505",
      "description": "Official codestral-embed Mistral AI model",
      "id": "codestral-embed-2505",
      "object": "model",
      "created": 1776283310,
      "owned_by": "mistralai",
      "tokens": 8192,
      "pricing": {
        "type": "per_million_tokens",
        "input": 0.15
      },
      "endpoints": [
        "/v1/embeddings"
      ],
      "premium_model": false
    },
    {
      "name": "Mistral: Codestral Embed",
      "description": "Official codestral-embed Mistral AI model",
      "id": "codestral-embed",
      "object": "model",
      "created": 1776283310,
      "owned_by": "mistralai",
      "tokens": 8192,
      "pricing": {
        "type": "per_million_tokens",
        "input": 0.15
      },
      "endpoints": [
        "/v1/embeddings"
      ],
      "premium_model": false
    },
    {
      "name": "OpenAI: Omni Moderation Latest",
      "description": "Identify potentially harmful content in text and images.",
      "id": "omni-moderation-latest",
      "object": "model",
      "created": 1776283310,
      "owned_by": "openai",
      "tokens": 32000,
      "pricing": {
        "type": "per_request",
        "coefficient": 0.001
      },
      "endpoints": [
        "/v1/moderations"
      ],
      "premium_model": false
    },
    {
      "name": "OpenAI: Omni Moderation 2024-09-26",
      "description": "Identify potentially harmful content in text and images.",
      "id": "omni-moderation-2024-09-26",
      "object": "model",
      "created": 1776283310,
      "owned_by": "openai",
      "tokens": 32000,
      "pricing": {
        "type": "per_request",
        "coefficient": 0.001
      },
      "endpoints": [
        "/v1/moderations"
      ],
      "premium_model": false
    },
    {
      "name": "OpenAI: Text Moderation Latest",
      "description": "Identify potentially harmful content in text.",
      "id": "text-moderation-latest",
      "object": "model",
      "created": 1776283310,
      "owned_by": "openai",
      "tokens": 32000,
      "pricing": {
        "type": "per_request",
        "coefficient": 0.001
      },
      "endpoints": [
        "/v1/moderations"
      ],
      "premium_model": false
    },
    {
      "name": "OpenAI: Text Moderation Stable",
      "description": "Identify potentially harmful content in text.",
      "id": "text-moderation-stable",
      "object": "model",
      "created": 1776283310,
      "owned_by": "openai",
      "tokens": 32000,
      "pricing": {
        "type": "per_request",
        "coefficient": 0.001
      },
      "endpoints": [
        "/v1/moderations"
      ],
      "premium_model": false
    },
    {
      "name": "OpenAI: Text Moderation 007",
      "description": "Identify potentially harmful content in text.",
      "id": "text-moderation-007",
      "object": "model",
      "created": 1776283310,
      "owned_by": "openai",
      "tokens": 32000,
      "pricing": {
        "type": "per_request",
        "coefficient": 0.001
      },
      "endpoints": [
        "/v1/moderations"
      ],
      "premium_model": false
    },
    {
      "name": "Mistral: Mistral Moderation 24.11",
      "description": "Official mistral-moderation-2411 Mistral AI model",
      "id": "mistral-moderation-2411",
      "object": "model",
      "created": 1776283310,
      "owned_by": "mistralai",
      "tokens": 8192,
      "pricing": {
        "type": "per_request",
        "coefficient": 0.005
      },
      "endpoints": [
        "/v1/moderations"
      ],
      "premium_model": false
    },
    {
      "name": "Mistral: Mistral Moderation Latest",
      "description": "Official mistral-moderation-2411 Mistral AI model",
      "id": "mistral-moderation-latest",
      "object": "model",
      "created": 1776283310,
      "owned_by": "mistralai",
      "tokens": 8192,
      "pricing": {
        "type": "per_request",
        "coefficient": 0.005
      },
      "endpoints": [
        "/v1/moderations"
      ],
      "premium_model": false
    }
  ]
}