GLM API Examples
This page provides examples of using the Agentsflare GLM API to help you quickly integrate and use Zhipu AI's GLM series models.
Basic Configuration
Before starting to use the API, please ensure you have obtained an API Key. If not, please refer to Create API Key.
Basic Information
- API Base URL:
https://api.agentsflare.com/v1/chat/completions - Authentication Method: Bearer Token
- Content Type:
application/json
Request Examples
curl -X POST "https://api.agentsflare.com/v1/chat/completions" \
-H "Authorization: Bearer YOUR_API_KEY" \
-H "Content-Type: application/json" \
-d '{
"model": "glm-5",
"messages": [
{
"role": "user",
"content": "Hello, please introduce the GLM model"
}
],
"max_tokens": 1024,
"temperature": 0.7
}'from openai import OpenAI
client = OpenAI(
base_url="https://api.agentsflare.com/v1",
api_key="YOUR_API_KEY"
)
response = client.chat.completions.create(
model="glm-5",
messages=[
{"role": "user", "content": "Hello, please introduce the GLM model"}
],
max_tokens=1024,
temperature=0.7,
stream=False
)
print(response.choices[0].message.content)from openai import OpenAI
client = OpenAI(
base_url="https://api.agentsflare.com/v1",
api_key="YOUR_API_KEY"
)
stream = client.chat.completions.create(
model="glm-5",
messages=[
{"role": "user", "content": "Hello, please introduce the GLM model"}
],
max_tokens=1024,
temperature=0.7,
stream=True
)
is_answering = False
for chunk in stream:
if not chunk.choices:
continue
delta = chunk.choices[0].delta
# Reasoning process
reasoning = getattr(delta, "reasoning_content", None)
if reasoning is not None:
if not is_answering:
print("🤔 Thinking...\n")
print(reasoning, end="", flush=True)
# Formal answer
if delta.content is not None:
if not is_answering:
is_answering = True
print("\n\n💬 Answer:\n")
print(delta.content, end="", flush=True)
print()import OpenAI from "openai";
const client = new OpenAI({
apiKey: process.env.AGENTSFLARE_API_KEY,
baseURL: "https://api.agentsflare.com/v1"
});
async function main() {
try {
const res = await client.chat.completions.create({
model: "glm-5",
messages: [{ role: "user", content: "Hello, please introduce the GLM model" }],
max_tokens: 1024,
temperature: 0.7
});
console.log(res.choices[0].message.content);
} catch (err) {
console.error(err?.response?.data ?? err);
}
}
main();import OpenAI from "openai";
const client = new OpenAI({
apiKey: process.env.AGENTSFLARE_API_KEY,
baseURL: "https://api.agentsflare.com/v1"
});
async function main() {
try {
const stream = await client.chat.completions.create({
model: "glm-5",
messages: [{ role: "user", content: "Hello, please introduce the GLM model" }],
max_tokens: 1024,
temperature: 0.7,
stream: true
});
for await (const chunk of stream) {
if (chunk.choices[0]?.delta?.content) {
process.stdout.write(chunk.choices[0].delta.content);
}
}
} catch (err) {
console.error(err?.response?.data ?? err);
}
}
main();import com.openai.client.OpenAIClient;
import com.openai.client.okhttp.OpenAIOkHttpClient;
import com.openai.models.chat.completions.ChatCompletionCreateParams;
import com.openai.models.chat.completions.ChatCompletion;
public class Main {
public static void main(String[] args) {
String apiKey = System.getenv("AGENTSFLARE_API_KEY");
if (apiKey == null || apiKey.isBlank()) {
throw new IllegalStateException("Missing AGENTSFLARE_API_KEY env var");
}
OpenAIClient client = OpenAIOkHttpClient.builder()
.apiKey(apiKey)
.baseUrl("https://api.agentsflare.com/v1")
.build();
ChatCompletionCreateParams params = ChatCompletionCreateParams.builder()
.model("glm-5")
.addMessage(ChatCompletionCreateParams.Message.builder()
.role(ChatCompletionCreateParams.Message.Role.USER)
.content("Hello, please introduce the GLM model")
.build())
.maxTokens(1024)
.temperature(0.7)
.build();
ChatCompletion res = client.chat().completions().create(params);
String content = res.choices().get(0).message().content();
System.out.println(content);
}
}package main
import (
"context"
"fmt"
"log"
"os"
openai "github.com/openai/openai-go"
"github.com/openai/openai-go/option"
)
func main() {
apiKey := os.Getenv("AGENTSFLARE_API_KEY")
if apiKey == "" {
log.Fatal("missing env AGENTSFLARE_API_KEY")
}
client := openai.NewClient(
option.WithAPIKey(apiKey),
option.WithBaseURL("https://api.agentsflare.com/v1"),
)
ctx := context.Background()
resp, err := client.Chat.Completions.New(ctx, openai.ChatCompletionNewParams{
Model: openai.F("glm-5"),
Messages: openai.F([]openai.ChatCompletionMessageParamUnion{
openai.UserMessage("Hello, please introduce the GLM model"),
}),
MaxTokens: openai.F(int64(1024)),
Temperature: openai.F(0.7),
})
if err != nil {
log.Fatalf("chat completion failed: %v", err)
}
if len(resp.Choices) > 0 && resp.Choices[0].Message.Content != "" {
fmt.Println(resp.Choices[0].Message.Content)
} else {
fmt.Printf("empty response: %+v\n", resp)
}
}const { OpenAI } = require("openai");
const client = new OpenAI({
apiKey: process.env.AGENTSFLARE_API_KEY,
baseURL: "https://api.agentsflare.com/v1"
});
async function main() {
try {
const res = await client.chat.completions.create({
model: "glm-5",
messages: [{ role: "user", content: "Hello, please introduce the GLM model" }],
max_tokens: 1024,
temperature: 0.7
});
console.log(res.choices[0].message.content);
} catch (err) {
console.error(err?.response?.data ?? err);
}
}
main();Response Examples
Non-streaming Response
{
"id": "chatcmpl-123456",
"object": "chat.completion",
"created": 1677652288,
"model": "glm-5",
"choices": [
{
"index": 0,
"message": {
"role": "assistant",
"content": "Hello! GLM (General Language Model) is a series of general language models developed by Zhipu AI. GLM-4 is its latest generation model, featuring powerful natural language understanding and generation capabilities, supporting multi-turn conversations, knowledge Q&A, content creation, and various application scenarios. The GLM model employs advanced training techniques and excels in both Chinese and English processing."
},
"finish_reason": "stop"
}
],
"usage": {
"prompt_tokens": 15,
"completion_tokens": 85,
"total_tokens": 100
}
}Streaming Response
data: {"id":"chatcmpl-123","object":"chat.completion.chunk","created":1677652288,"model":"glm-5","choices":[{"index":0,"delta":{"role":"assistant","content":"Hello"},"finish_reason":null}]}
data: {"id":"chatcmpl-123","object":"chat.completion.chunk","created":1677652288,"model":"glm-5","choices":[{"index":0,"delta":{"content":"!"},"finish_reason":null}]}
data: {"id":"chatcmpl-123","object":"chat.completion.chunk","created":1677652288,"model":"glm-5","choices":[{"index":0,"delta":{"content":" GLM"},"finish_reason":null}]}
data: {"id":"chatcmpl-123","object":"chat.completion.chunk","created":1677652288,"model":"glm-5","choices":[{"index":0,"delta":{},"finish_reason":"stop"}]}
data: [DONE]Request Parameters
| Parameter | Type | Required | Description |
|---|---|---|---|
| model | string | Yes | Model name, e.g., glm-5, glm-4 |
| messages | array | Yes | Array of messages with role and content |
| max_tokens | integer | No | Maximum tokens to generate, default 1024 |
| temperature | float | No | Sampling temperature, range 0-2, default 0.95 |
| top_p | float | No | Nucleus sampling parameter, default 0.7 |
| stream | boolean | No | Enable streaming response, default false |
Features
Reasoning Process
GLM-5 supports displaying the reasoning process. In streaming responses, the model first outputs the thinking process (reasoning_content), then outputs the final answer (content). This helps understand the model's reasoning logic.
# Reasoning process
reasoning = getattr(delta, "reasoning_content", None)
if reasoning is not None:
print(reasoning, end="", flush=True)
# Formal answer
if delta.content is not None:
print(delta.content, end="", flush=True)Multi-turn Conversations
GLM supports multi-turn conversations by including history in the messages array:
messages = [
{"role": "user", "content": "What is artificial intelligence?"},
{"role": "assistant", "content": "Artificial Intelligence (AI) is a branch of computer science..."},
{"role": "user", "content": "What are its application fields?"}
]
completion = client.chat.completions.create(
model="glm-5",
messages=messages
)Streaming Output
GLM API supports streaming output (SSE) by setting stream: true. Streaming responses allow real-time content generation, providing a better user experience.
Chinese Optimization
GLM models are deeply optimized for Chinese, excelling in Chinese understanding, generation, and reasoning tasks, making them particularly suitable for Chinese application scenarios.
Use Cases
- Intelligent Dialogue: Customer service bots, virtual assistants
- Content Creation: Article writing, copywriting generation
- Knowledge Q&A: Knowledge base retrieval, question answering
- Code Generation: Programming assistance, code explanation
- Text Analysis: Sentiment analysis, text classification
Important Notes
- API Key Security: Do not hardcode API Keys in your code, use environment variables
- Request Rate: Please comply with API call rate limits
- Error Handling: Implement comprehensive error handling mechanisms
- Token Limits: Ensure total input and output tokens don't exceed model context limits