Skip to content

GLM API Examples

This page provides examples of using the Agentsflare GLM API to help you quickly integrate and use Zhipu AI's GLM series models.

Basic Configuration

Before starting to use the API, please ensure you have obtained an API Key. If not, please refer to Create API Key.

Basic Information

  • API Base URL: https://api.agentsflare.com/v1/chat/completions
  • Authentication Method: Bearer Token
  • Content Type: application/json

Request Examples

bash
curl -X POST "https://api.agentsflare.com/v1/chat/completions" \
  -H "Authorization: Bearer YOUR_API_KEY" \
  -H "Content-Type: application/json" \
  -d '{
    "model": "glm-5",
    "messages": [
      {
        "role": "user",
        "content": "Hello, please introduce the GLM model"
      }
    ],
    "max_tokens": 1024,
    "temperature": 0.7
  }'
python
from openai import OpenAI

client = OpenAI(
    base_url="https://api.agentsflare.com/v1",
    api_key="YOUR_API_KEY"
)

response = client.chat.completions.create(
    model="glm-5",
    messages=[
        {"role": "user", "content": "Hello, please introduce the GLM model"}
    ],
    max_tokens=1024,
    temperature=0.7,
    stream=False
)

print(response.choices[0].message.content)
python
from openai import OpenAI

client = OpenAI(
    base_url="https://api.agentsflare.com/v1",
    api_key="YOUR_API_KEY"
)

stream = client.chat.completions.create(
    model="glm-5",
    messages=[
        {"role": "user", "content": "Hello, please introduce the GLM model"}
    ],
    max_tokens=1024,
    temperature=0.7,
    stream=True
)

is_answering = False

for chunk in stream:
    if not chunk.choices:
        continue

    delta = chunk.choices[0].delta

    # Reasoning process
    reasoning = getattr(delta, "reasoning_content", None)
    if reasoning is not None:
        if not is_answering:
            print("🤔 Thinking...\n")
        print(reasoning, end="", flush=True)

    # Formal answer
    if delta.content is not None:
        if not is_answering:
            is_answering = True
            print("\n\n💬 Answer:\n")
        print(delta.content, end="", flush=True)

print()
javascript
import OpenAI from "openai";

const client = new OpenAI({
  apiKey: process.env.AGENTSFLARE_API_KEY, 
  baseURL: "https://api.agentsflare.com/v1"    
});

async function main() {
  try {
    const res = await client.chat.completions.create({
      model: "glm-5",
      messages: [{ role: "user", content: "Hello, please introduce the GLM model" }],
      max_tokens: 1024,
      temperature: 0.7
    });

    console.log(res.choices[0].message.content);
  } catch (err) {
    console.error(err?.response?.data ?? err);
  }
}

main();
javascript
import OpenAI from "openai";

const client = new OpenAI({
  apiKey: process.env.AGENTSFLARE_API_KEY, 
  baseURL: "https://api.agentsflare.com/v1"    
});

async function main() {
  try {
    const stream = await client.chat.completions.create({
      model: "glm-5",
      messages: [{ role: "user", content: "Hello, please introduce the GLM model" }],
      max_tokens: 1024,
      temperature: 0.7,
      stream: true
    });

    for await (const chunk of stream) {
      if (chunk.choices[0]?.delta?.content) {
        process.stdout.write(chunk.choices[0].delta.content);
      }
    }
  } catch (err) {
    console.error(err?.response?.data ?? err);
  }
}

main();
java
import com.openai.client.OpenAIClient;
import com.openai.client.okhttp.OpenAIOkHttpClient;
import com.openai.models.chat.completions.ChatCompletionCreateParams;
import com.openai.models.chat.completions.ChatCompletion;

public class Main {
  public static void main(String[] args) {
    String apiKey = System.getenv("AGENTSFLARE_API_KEY"); 
    if (apiKey == null || apiKey.isBlank()) {
      throw new IllegalStateException("Missing AGENTSFLARE_API_KEY env var");
    }

    OpenAIClient client = OpenAIOkHttpClient.builder()
        .apiKey(apiKey)
        .baseUrl("https://api.agentsflare.com/v1")
        .build();

    ChatCompletionCreateParams params = ChatCompletionCreateParams.builder()
        .model("glm-5")
        .addMessage(ChatCompletionCreateParams.Message.builder()
            .role(ChatCompletionCreateParams.Message.Role.USER)
            .content("Hello, please introduce the GLM model")
            .build())
        .maxTokens(1024)
        .temperature(0.7)
        .build();

    ChatCompletion res = client.chat().completions().create(params);

    String content = res.choices().get(0).message().content();
    System.out.println(content);
  }
}
go
package main

import (
	"context"
	"fmt"
	"log"
	"os"

	openai "github.com/openai/openai-go"
	"github.com/openai/openai-go/option"
)

func main() {
	apiKey := os.Getenv("AGENTSFLARE_API_KEY")
	if apiKey == "" {
		log.Fatal("missing env AGENTSFLARE_API_KEY")
	}

	client := openai.NewClient(
		option.WithAPIKey(apiKey),
		option.WithBaseURL("https://api.agentsflare.com/v1"),
	)

	ctx := context.Background()

	resp, err := client.Chat.Completions.New(ctx, openai.ChatCompletionNewParams{
		Model: openai.F("glm-5"),
		Messages: openai.F([]openai.ChatCompletionMessageParamUnion{
			openai.UserMessage("Hello, please introduce the GLM model"),
		}),
		MaxTokens:   openai.F(int64(1024)),
		Temperature: openai.F(0.7),
	})
	if err != nil {
		log.Fatalf("chat completion failed: %v", err)
	}

	if len(resp.Choices) > 0 && resp.Choices[0].Message.Content != "" {
		fmt.Println(resp.Choices[0].Message.Content)
	} else {
		fmt.Printf("empty response: %+v\n", resp)
	}
}
javascript
const { OpenAI } = require("openai");

const client = new OpenAI({
  apiKey: process.env.AGENTSFLARE_API_KEY, 
  baseURL: "https://api.agentsflare.com/v1"    
});

async function main() {
  try {
    const res = await client.chat.completions.create({
      model: "glm-5",
      messages: [{ role: "user", content: "Hello, please introduce the GLM model" }],
      max_tokens: 1024,
      temperature: 0.7
    });

    console.log(res.choices[0].message.content);
  } catch (err) {
    console.error(err?.response?.data ?? err);
  }
}

main();

Response Examples

Non-streaming Response

json
{
  "id": "chatcmpl-123456",
  "object": "chat.completion",
  "created": 1677652288,
  "model": "glm-5",
  "choices": [
    {
      "index": 0,
      "message": {
        "role": "assistant",
        "content": "Hello! GLM (General Language Model) is a series of general language models developed by Zhipu AI. GLM-4 is its latest generation model, featuring powerful natural language understanding and generation capabilities, supporting multi-turn conversations, knowledge Q&A, content creation, and various application scenarios. The GLM model employs advanced training techniques and excels in both Chinese and English processing."
      },
      "finish_reason": "stop"
    }
  ],
  "usage": {
    "prompt_tokens": 15,
    "completion_tokens": 85,
    "total_tokens": 100
  }
}

Streaming Response

json
data: {"id":"chatcmpl-123","object":"chat.completion.chunk","created":1677652288,"model":"glm-5","choices":[{"index":0,"delta":{"role":"assistant","content":"Hello"},"finish_reason":null}]}

data: {"id":"chatcmpl-123","object":"chat.completion.chunk","created":1677652288,"model":"glm-5","choices":[{"index":0,"delta":{"content":"!"},"finish_reason":null}]}

data: {"id":"chatcmpl-123","object":"chat.completion.chunk","created":1677652288,"model":"glm-5","choices":[{"index":0,"delta":{"content":" GLM"},"finish_reason":null}]}

data: {"id":"chatcmpl-123","object":"chat.completion.chunk","created":1677652288,"model":"glm-5","choices":[{"index":0,"delta":{},"finish_reason":"stop"}]}

data: [DONE]

Request Parameters

ParameterTypeRequiredDescription
modelstringYesModel name, e.g., glm-5, glm-4
messagesarrayYesArray of messages with role and content
max_tokensintegerNoMaximum tokens to generate, default 1024
temperaturefloatNoSampling temperature, range 0-2, default 0.95
top_pfloatNoNucleus sampling parameter, default 0.7
streambooleanNoEnable streaming response, default false

Features

Reasoning Process

GLM-5 supports displaying the reasoning process. In streaming responses, the model first outputs the thinking process (reasoning_content), then outputs the final answer (content). This helps understand the model's reasoning logic.

python
# Reasoning process
reasoning = getattr(delta, "reasoning_content", None)
if reasoning is not None:
    print(reasoning, end="", flush=True)

# Formal answer
if delta.content is not None:
    print(delta.content, end="", flush=True)

Multi-turn Conversations

GLM supports multi-turn conversations by including history in the messages array:

python
messages = [
    {"role": "user", "content": "What is artificial intelligence?"},
    {"role": "assistant", "content": "Artificial Intelligence (AI) is a branch of computer science..."},
    {"role": "user", "content": "What are its application fields?"}
]

completion = client.chat.completions.create(
    model="glm-5",
    messages=messages
)

Streaming Output

GLM API supports streaming output (SSE) by setting stream: true. Streaming responses allow real-time content generation, providing a better user experience.

Chinese Optimization

GLM models are deeply optimized for Chinese, excelling in Chinese understanding, generation, and reasoning tasks, making them particularly suitable for Chinese application scenarios.

Use Cases

  • Intelligent Dialogue: Customer service bots, virtual assistants
  • Content Creation: Article writing, copywriting generation
  • Knowledge Q&A: Knowledge base retrieval, question answering
  • Code Generation: Programming assistance, code explanation
  • Text Analysis: Sentiment analysis, text classification

Important Notes

  1. API Key Security: Do not hardcode API Keys in your code, use environment variables
  2. Request Rate: Please comply with API call rate limits
  3. Error Handling: Implement comprehensive error handling mechanisms
  4. Token Limits: Ensure total input and output tokens don't exceed model context limits

This documentation is licensed under CC BY-SA 4.0.