Skip to content

OpenAI Special Usage

This page demonstrates special usage patterns for calling advanced features (such as Claude Reasoning) via the OpenAI SDK, including Extended Thinking, Adaptive Thinking, and streaming inference.

Basic Configuration

Before you begin, make sure you have obtained an API Key. If not, please refer to Create API Key.

Basic Information

  • API Base URL: https://api.agentsflare.com/v1
  • Authentication: Bearer Token
  • Content Type: application/json

Claude Reasoning Compatibility Test

The following script uses the OpenAI SDK to test whether the relay station supports Claude Reasoning features, covering model list queries, basic requests, Extended Thinking, Adaptive Thinking, and streaming output tests.

python
#!/usr/bin/env python3
# ============================================================
# test_claude_reasoning.py
# Test if a third-party relay supports Claude Reasoning via OpenAI SDK
# Dependency: pip install openai
# ============================================================

import os
import json
import sys
from openai import OpenAI
import httpx

# ==================== Config Area ====================
BASE_URL     = os.environ.get("BASE_URL",     "https://api.agentsflare.com/v1")
API_KEY      = os.environ.get("API_KEY",      "sk-********")
MODEL        = os.environ.get("MODEL",        "claude-sonnet-4-6")
MAX_TOKENS   = int(os.environ.get("MAX_TOKENS",   "16384"))
BUDGET_TOKENS= int(os.environ.get("BUDGET_TOKENS", "8000"))
# =====================================================

# ANSI Colors
GREEN  = "\033[92m"
RED    = "\033[91m"
YELLOW = "\033[93m"
CYAN   = "\033[96m"
RESET  = "\033[0m"

def ok(msg):   print(f"{GREEN}{msg}{RESET}")
def fail(msg): print(f"{RED}{msg}{RESET}")
def warn(msg): print(f"{YELLOW}⚠️  {msg}{RESET}")
def info(msg): print(f"{CYAN}{msg}{RESET}")

# Initialize OpenAI client (pointing to relay)
client = OpenAI(
    api_key=API_KEY,
    base_url=BASE_URL,
)

results: dict[str, bool] = {}

# ============================================================
# Utility: print response summary
# ============================================================
def print_response_summary(response):
    """Print key information from the response"""
    print(f"  Model       : {response.model}")
    print(f"  Usage       : prompt={response.usage.prompt_tokens}, "
          f"completion={response.usage.completion_tokens}, "
          f"total={response.usage.total_tokens}")
    for i, choice in enumerate(response.choices):
        msg = choice.message
        # Detect thinking content (Claude puts thinking blocks in content)
        if hasattr(msg, "content") and isinstance(msg.content, list):
            for block in msg.content:
                if hasattr(block, "type"):
                    if block.type == "thinking":
                        think_text = getattr(block, "thinking", "")
                        print(f"  [Thinking]  : {think_text[:120]}{'...' if len(think_text) > 120 else ''}")
                    elif block.type == "text":
                        print(f"  [Answer]    : {block.text[:200]}{'...' if len(block.text) > 200 else ''}")
        else:
            content = msg.content or ""
            print(f"  [Answer]    : {content[:200]}{'...' if len(content) > 200 else ''}")


# ============================================================
# Test 0: List supported models
# ============================================================
def _parse_models(raw):
    """Defensively parse various /models response formats"""
    # Standard OpenAI format: {"object":"list","data":[{"id":"..."}]}
    if isinstance(raw, dict):
        data = raw.get("data", [])
        if isinstance(data, list):
            for item in data:
                if isinstance(item, dict):
                    if "id" in item:
                        yield item["id"]
                    elif "model" in item:          # Some relays use model field
                        yield item["model"]
                elif isinstance(item, str):
                    yield item
    # Some relays return a JSON array directly
    elif isinstance(raw, list):
        for item in raw:
            if isinstance(item, dict):
                if "id" in item:
                    yield item["id"]
                elif "model" in item:
                    yield item["model"]
            elif isinstance(item, str):
                yield item


def test_list_models():
    info("\n━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━")
    info("[Test 0/4] Query supported Claude models")
    info("━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━")

    claude_models = []

    # 1) Try standard OpenAI SDK approach first
    try:
        models = client.models.list()
        if hasattr(models, "data") and isinstance(models.data, list):
            claude_models = [m.id for m in models.data
                             if hasattr(m, "id") and "claude" in m.id.lower()]
        else:
            # Some SDK versions iterate models directly
            claude_models = [m.id for m in models
                             if hasattr(m, "id") and "claude" in m.id.lower()]
    except Exception as sdk_err:
        warn(f"SDK parsing failed ({sdk_err}), trying direct HTTP request...")

        # 2) Fallback: direct HTTP GET to /v1/models
        try:
            url = BASE_URL.rstrip("/") + "/models"
            resp = httpx.get(url, headers={"Authorization": f"Bearer {API_KEY}"}, timeout=30)
            resp.raise_for_status()
            raw = resp.json()
            all_models = list(_parse_models(raw))
            claude_models = [m for m in all_models if "claude" in m.lower()]
        except Exception as http_err:
            fail(f"Query failed: SDK error={sdk_err}; HTTP fallback error={http_err}")
            results["Model List Query"] = False
            return

    if claude_models:
        ok(f"Found {len(claude_models)} Claude models:")
        for name in sorted(claude_models):
            print(f"    • {name}")
    else:
        warn("No Claude models found. Please check if the relay supports Claude.")
    results["Model List Query"] = True


# ============================================================
# Test 1: Basic request (no Thinking)
# ============================================================
def test_basic():
    info("\n━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━")
    info("[Test 1/4] Basic request (no Thinking)")
    info("━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━")
    try:
        response = client.chat.completions.create(
            model=MODEL,
            max_tokens=1024,
            messages=[
                {"role": "user", "content": "Hello, please introduce yourself in one sentence."}
            ],
        )
        ok(f"Basic request succeeded")
        print_response_summary(response)
        results["Basic Request"] = True
    except Exception as e:
        fail(f"Basic request failed: {e}")
        results["Basic Request"] = False


# ============================================================
# Test 2: Extended Thinking (type: enabled + budget_tokens)
#   For Claude 3.7 Sonnet / Claude Sonnet 4.5 / Claude Opus 4.5
# ============================================================
def test_extended_thinking():
    info("\n━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━")
    info("[Test 2/4] Extended Thinking (type: enabled + budget_tokens)")
    info("━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━")
    try:
        response = client.chat.completions.create(
            model=MODEL,
            max_tokens=MAX_TOKENS,
            messages=[
                {
                    "role": "user",
                    "content": (
                        "Please reason step by step: A farmer has 17 sheep. All but 9 died. How many are left alive?"
                    ),
                }
            ],
            # OpenAI SDK passes non-standard parameters via extra_body
            extra_body={
                "thinking": {
                    "type": "enabled",
                    "budget_tokens": BUDGET_TOKENS,
                }
            },
        )
        ok("Extended Thinking request succeeded")
        print_response_summary(response)
        results["Extended Thinking"] = True
    except Exception as e:
        fail(f"Extended Thinking failed: {e}")
        results["Extended Thinking"] = False


# ============================================================
# Test 3: Adaptive Thinking (type: adaptive, Claude Sonnet/Opus 4.6+)
# ============================================================
def test_adaptive_thinking():
    info("\n━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━")
    info("[Test 3/4] Adaptive Thinking (type: adaptive, Claude 4.6+)")
    info("━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━")
    try:
        response = client.chat.completions.create(
            model=MODEL,
            max_tokens=MAX_TOKENS,
            messages=[
                {
                    "role": "user",
                    "content": "Please think deeply: If 3^x + 3^x + 3^x = 3^12, what is the value of x?",
                }
            ],
            extra_body={
                "thinking": {
                    "type": "adaptive",
                }
            },
        )
        ok("Adaptive Thinking request succeeded")
        print_response_summary(response)
        results["Adaptive Thinking"] = True
    except Exception as e:
        warn(f"Adaptive Thinking failed (model may not support it): {e}")
        results["Adaptive Thinking"] = False


# ============================================================
# Test 4: Streaming + Extended Thinking
# ============================================================
def test_streaming_thinking():
    info("\n━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━")
    info("[Test 4/4] Streaming + Extended Thinking")
    info("━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━")
    try:
        stream = client.chat.completions.create(
            model=MODEL,
            max_tokens=MAX_TOKENS,
            stream=True,
            messages=[
                {
                    "role": "user",
                    "content": "Please briefly explain what quantum entanglement is.",
                }
            ],
            extra_body={
                "thinking": {
                    "type": "enabled",
                    "budget_tokens": BUDGET_TOKENS,
                }
            },
        )
        ok("Stream request established, receiving data...")
        print("  [Stream Output]:", end=" ", flush=True)
        char_count = 0
        for chunk in stream:
            delta = chunk.choices[0].delta if chunk.choices else None
            if delta and delta.content:
                print(delta.content, end="", flush=True)
                char_count += len(delta.content)
                if char_count > 300:          # Preview first 300 chars only
                    print(" ...(truncated)", flush=True)
                    break
        else:
            print()   # Newline
        ok(f"Streaming + Thinking completed")
        results["Streaming + Thinking"] = True
    except Exception as e:
        fail(f"Streaming + Thinking failed: {e}")
        results["Streaming + Thinking"] = False


# ============================================================
# Summary Report
# ============================================================
def print_summary():
    info("\n╔══════════════════════════════════════════╗")
    info("║           📊  Test Results Summary        ║")
    info("╚══════════════════════════════════════════╝")
    passed = sum(1 for v in results.values() if v)
    total  = len(results)
    for name, status in results.items():
        mark = f"{GREEN}✅ Pass{RESET}" if status else f"{RED}❌ Fail{RESET}"
        print(f"  {name:<24} {mark}")
    print()
    if passed == total:
        ok(f"All passed {passed}/{total} — Relay fully supports Claude Reasoning 🎉")
    elif passed >= 2:
        warn(f"Partially passed {passed}/{total} — Relay supports basic Claude features, but Reasoning support is incomplete")
    else:
        fail(f"Mostly failed {passed}/{total} — Please check BASE_URL / API_KEY / MODEL configuration")

    print(f"""
{YELLOW}📌 Configuration:{RESET}
  BASE_URL      = {BASE_URL}
  MODEL         = {MODEL}
  MAX_TOKENS    = {MAX_TOKENS}
  BUDGET_TOKENS = {BUDGET_TOKENS}

{YELLOW}📌 Notes:{RESET}
  • Extended Thinking (type:enabled)  → Claude 3.7 / 4.5 series
  • Adaptive Thinking (type:adaptive) → Claude 4.6+ series
  • OpenAI SDK passes thinking parameters via extra_body
""")


# ============================================================
# Main Entry
# ============================================================
if __name__ == "__main__":
    info("╔══════════════════════════════════════════╗")
    info("║  Claude Reasoning Relay Compatibility Test ║")
    info("╚══════════════════════════════════════════╝")
    info(f"🔗 API URL   : {BASE_URL}")
    info(f"🤖 Model     : {MODEL}")
    info(f"🧠 Budget    : {BUDGET_TOKENS} tokens")

    test_list_models()
    test_basic()
    test_extended_thinking()
    test_adaptive_thinking()
    test_streaming_thinking()
    print_summary()

Key Parameters

ParameterTypeDescription
extra_bodydictField in OpenAI SDK for passing non-standard parameters
thinking.typestringenabled (Claude 3.7 / 4.5 series) or adaptive (Claude 4.6+ series)
thinking.budget_tokensintMaximum token budget allocated for the reasoning process

Supported Models

The following Claude models support Reasoning via this method:

  • claude-sonnet-4-6 New
  • claude-opus-4-6
  • claude-sonnet-4-5
  • claude-opus-4-5
  • claude-3-7-sonnet

💡 Tip

  • Passing thinking parameters via extra_body is the recommended way to call Claude Reasoning with the OpenAI SDK
  • adaptive mode is only available for Claude 4.6 and above
  • During streaming output, thinking content blocks may be interleaved with regular content blocks

This documentation is licensed under CC BY-SA 4.0.