OpenAI Special Usage
This page demonstrates special usage patterns for calling advanced features (such as Claude Reasoning) via the OpenAI SDK, including Extended Thinking, Adaptive Thinking, and streaming inference.
Basic Configuration
Before you begin, make sure you have obtained an API Key. If not, please refer to Create API Key.
Basic Information
- API Base URL:
https://api.agentsflare.com/v1 - Authentication: Bearer Token
- Content Type:
application/json
Claude Reasoning Compatibility Test
The following script uses the OpenAI SDK to test whether the relay station supports Claude Reasoning features, covering model list queries, basic requests, Extended Thinking, Adaptive Thinking, and streaming output tests.
python
#!/usr/bin/env python3
# ============================================================
# test_claude_reasoning.py
# Test if a third-party relay supports Claude Reasoning via OpenAI SDK
# Dependency: pip install openai
# ============================================================
import os
import json
import sys
from openai import OpenAI
import httpx
# ==================== Config Area ====================
BASE_URL = os.environ.get("BASE_URL", "https://api.agentsflare.com/v1")
API_KEY = os.environ.get("API_KEY", "sk-********")
MODEL = os.environ.get("MODEL", "claude-sonnet-4-6")
MAX_TOKENS = int(os.environ.get("MAX_TOKENS", "16384"))
BUDGET_TOKENS= int(os.environ.get("BUDGET_TOKENS", "8000"))
# =====================================================
# ANSI Colors
GREEN = "\033[92m"
RED = "\033[91m"
YELLOW = "\033[93m"
CYAN = "\033[96m"
RESET = "\033[0m"
def ok(msg): print(f"{GREEN}✅ {msg}{RESET}")
def fail(msg): print(f"{RED}❌ {msg}{RESET}")
def warn(msg): print(f"{YELLOW}⚠️ {msg}{RESET}")
def info(msg): print(f"{CYAN}{msg}{RESET}")
# Initialize OpenAI client (pointing to relay)
client = OpenAI(
api_key=API_KEY,
base_url=BASE_URL,
)
results: dict[str, bool] = {}
# ============================================================
# Utility: print response summary
# ============================================================
def print_response_summary(response):
"""Print key information from the response"""
print(f" Model : {response.model}")
print(f" Usage : prompt={response.usage.prompt_tokens}, "
f"completion={response.usage.completion_tokens}, "
f"total={response.usage.total_tokens}")
for i, choice in enumerate(response.choices):
msg = choice.message
# Detect thinking content (Claude puts thinking blocks in content)
if hasattr(msg, "content") and isinstance(msg.content, list):
for block in msg.content:
if hasattr(block, "type"):
if block.type == "thinking":
think_text = getattr(block, "thinking", "")
print(f" [Thinking] : {think_text[:120]}{'...' if len(think_text) > 120 else ''}")
elif block.type == "text":
print(f" [Answer] : {block.text[:200]}{'...' if len(block.text) > 200 else ''}")
else:
content = msg.content or ""
print(f" [Answer] : {content[:200]}{'...' if len(content) > 200 else ''}")
# ============================================================
# Test 0: List supported models
# ============================================================
def _parse_models(raw):
"""Defensively parse various /models response formats"""
# Standard OpenAI format: {"object":"list","data":[{"id":"..."}]}
if isinstance(raw, dict):
data = raw.get("data", [])
if isinstance(data, list):
for item in data:
if isinstance(item, dict):
if "id" in item:
yield item["id"]
elif "model" in item: # Some relays use model field
yield item["model"]
elif isinstance(item, str):
yield item
# Some relays return a JSON array directly
elif isinstance(raw, list):
for item in raw:
if isinstance(item, dict):
if "id" in item:
yield item["id"]
elif "model" in item:
yield item["model"]
elif isinstance(item, str):
yield item
def test_list_models():
info("\n━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━")
info("[Test 0/4] Query supported Claude models")
info("━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━")
claude_models = []
# 1) Try standard OpenAI SDK approach first
try:
models = client.models.list()
if hasattr(models, "data") and isinstance(models.data, list):
claude_models = [m.id for m in models.data
if hasattr(m, "id") and "claude" in m.id.lower()]
else:
# Some SDK versions iterate models directly
claude_models = [m.id for m in models
if hasattr(m, "id") and "claude" in m.id.lower()]
except Exception as sdk_err:
warn(f"SDK parsing failed ({sdk_err}), trying direct HTTP request...")
# 2) Fallback: direct HTTP GET to /v1/models
try:
url = BASE_URL.rstrip("/") + "/models"
resp = httpx.get(url, headers={"Authorization": f"Bearer {API_KEY}"}, timeout=30)
resp.raise_for_status()
raw = resp.json()
all_models = list(_parse_models(raw))
claude_models = [m for m in all_models if "claude" in m.lower()]
except Exception as http_err:
fail(f"Query failed: SDK error={sdk_err}; HTTP fallback error={http_err}")
results["Model List Query"] = False
return
if claude_models:
ok(f"Found {len(claude_models)} Claude models:")
for name in sorted(claude_models):
print(f" • {name}")
else:
warn("No Claude models found. Please check if the relay supports Claude.")
results["Model List Query"] = True
# ============================================================
# Test 1: Basic request (no Thinking)
# ============================================================
def test_basic():
info("\n━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━")
info("[Test 1/4] Basic request (no Thinking)")
info("━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━")
try:
response = client.chat.completions.create(
model=MODEL,
max_tokens=1024,
messages=[
{"role": "user", "content": "Hello, please introduce yourself in one sentence."}
],
)
ok(f"Basic request succeeded")
print_response_summary(response)
results["Basic Request"] = True
except Exception as e:
fail(f"Basic request failed: {e}")
results["Basic Request"] = False
# ============================================================
# Test 2: Extended Thinking (type: enabled + budget_tokens)
# For Claude 3.7 Sonnet / Claude Sonnet 4.5 / Claude Opus 4.5
# ============================================================
def test_extended_thinking():
info("\n━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━")
info("[Test 2/4] Extended Thinking (type: enabled + budget_tokens)")
info("━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━")
try:
response = client.chat.completions.create(
model=MODEL,
max_tokens=MAX_TOKENS,
messages=[
{
"role": "user",
"content": (
"Please reason step by step: A farmer has 17 sheep. All but 9 died. How many are left alive?"
),
}
],
# OpenAI SDK passes non-standard parameters via extra_body
extra_body={
"thinking": {
"type": "enabled",
"budget_tokens": BUDGET_TOKENS,
}
},
)
ok("Extended Thinking request succeeded")
print_response_summary(response)
results["Extended Thinking"] = True
except Exception as e:
fail(f"Extended Thinking failed: {e}")
results["Extended Thinking"] = False
# ============================================================
# Test 3: Adaptive Thinking (type: adaptive, Claude Sonnet/Opus 4.6+)
# ============================================================
def test_adaptive_thinking():
info("\n━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━")
info("[Test 3/4] Adaptive Thinking (type: adaptive, Claude 4.6+)")
info("━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━")
try:
response = client.chat.completions.create(
model=MODEL,
max_tokens=MAX_TOKENS,
messages=[
{
"role": "user",
"content": "Please think deeply: If 3^x + 3^x + 3^x = 3^12, what is the value of x?",
}
],
extra_body={
"thinking": {
"type": "adaptive",
}
},
)
ok("Adaptive Thinking request succeeded")
print_response_summary(response)
results["Adaptive Thinking"] = True
except Exception as e:
warn(f"Adaptive Thinking failed (model may not support it): {e}")
results["Adaptive Thinking"] = False
# ============================================================
# Test 4: Streaming + Extended Thinking
# ============================================================
def test_streaming_thinking():
info("\n━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━")
info("[Test 4/4] Streaming + Extended Thinking")
info("━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━")
try:
stream = client.chat.completions.create(
model=MODEL,
max_tokens=MAX_TOKENS,
stream=True,
messages=[
{
"role": "user",
"content": "Please briefly explain what quantum entanglement is.",
}
],
extra_body={
"thinking": {
"type": "enabled",
"budget_tokens": BUDGET_TOKENS,
}
},
)
ok("Stream request established, receiving data...")
print(" [Stream Output]:", end=" ", flush=True)
char_count = 0
for chunk in stream:
delta = chunk.choices[0].delta if chunk.choices else None
if delta and delta.content:
print(delta.content, end="", flush=True)
char_count += len(delta.content)
if char_count > 300: # Preview first 300 chars only
print(" ...(truncated)", flush=True)
break
else:
print() # Newline
ok(f"Streaming + Thinking completed")
results["Streaming + Thinking"] = True
except Exception as e:
fail(f"Streaming + Thinking failed: {e}")
results["Streaming + Thinking"] = False
# ============================================================
# Summary Report
# ============================================================
def print_summary():
info("\n╔══════════════════════════════════════════╗")
info("║ 📊 Test Results Summary ║")
info("╚══════════════════════════════════════════╝")
passed = sum(1 for v in results.values() if v)
total = len(results)
for name, status in results.items():
mark = f"{GREEN}✅ Pass{RESET}" if status else f"{RED}❌ Fail{RESET}"
print(f" {name:<24} {mark}")
print()
if passed == total:
ok(f"All passed {passed}/{total} — Relay fully supports Claude Reasoning 🎉")
elif passed >= 2:
warn(f"Partially passed {passed}/{total} — Relay supports basic Claude features, but Reasoning support is incomplete")
else:
fail(f"Mostly failed {passed}/{total} — Please check BASE_URL / API_KEY / MODEL configuration")
print(f"""
{YELLOW}📌 Configuration:{RESET}
BASE_URL = {BASE_URL}
MODEL = {MODEL}
MAX_TOKENS = {MAX_TOKENS}
BUDGET_TOKENS = {BUDGET_TOKENS}
{YELLOW}📌 Notes:{RESET}
• Extended Thinking (type:enabled) → Claude 3.7 / 4.5 series
• Adaptive Thinking (type:adaptive) → Claude 4.6+ series
• OpenAI SDK passes thinking parameters via extra_body
""")
# ============================================================
# Main Entry
# ============================================================
if __name__ == "__main__":
info("╔══════════════════════════════════════════╗")
info("║ Claude Reasoning Relay Compatibility Test ║")
info("╚══════════════════════════════════════════╝")
info(f"🔗 API URL : {BASE_URL}")
info(f"🤖 Model : {MODEL}")
info(f"🧠 Budget : {BUDGET_TOKENS} tokens")
test_list_models()
test_basic()
test_extended_thinking()
test_adaptive_thinking()
test_streaming_thinking()
print_summary()Key Parameters
| Parameter | Type | Description |
|---|---|---|
extra_body | dict | Field in OpenAI SDK for passing non-standard parameters |
thinking.type | string | enabled (Claude 3.7 / 4.5 series) or adaptive (Claude 4.6+ series) |
thinking.budget_tokens | int | Maximum token budget allocated for the reasoning process |
Supported Models
The following Claude models support Reasoning via this method:
claude-sonnet-4-6Newclaude-opus-4-6claude-sonnet-4-5claude-opus-4-5claude-3-7-sonnet
💡 Tip
- Passing
thinkingparameters viaextra_bodyis the recommended way to call Claude Reasoning with the OpenAI SDK adaptivemode is only available for Claude 4.6 and above- During streaming output, thinking content blocks may be interleaved with regular content blocks