GLM API Examples

This page provides examples of using the Agentsflare GLM API to help you quickly integrate and use Zhipu AI's GLM series models.

Basic Configuration

Before starting to use the API, please ensure you have obtained an API Key. If not, please refer to Create API Key.

Basic Information

API Base URL: https://api.agentsflare.com/v1/chat/completions
Authentication Method: Bearer Token
Content Type: application/json

Request Examples

bash

curl -X POST "https://api.agentsflare.com/v1/chat/completions" \
  -H "Authorization: Bearer YOUR_API_KEY" \
  -H "Content-Type: application/json" \
  -d '{
    "model": "glm-5",
    "messages": [
      {
        "role": "user",
        "content": "Hello, please introduce the GLM model"
      }
    ],
    "max_tokens": 1024,
    "temperature": 0.7
  }'

python

from openai import OpenAI

client = OpenAI(
    base_url="https://api.agentsflare.com/v1",
    api_key="YOUR_API_KEY"
)

response = client.chat.completions.create(
    model="glm-5",
    messages=[
        {"role": "user", "content": "Hello, please introduce the GLM model"}
    ],
    max_tokens=1024,
    temperature=0.7,
    stream=False
)

print(response.choices[0].message.content)

python

from openai import OpenAI

client = OpenAI(
    base_url="https://api.agentsflare.com/v1",
    api_key="YOUR_API_KEY"
)

stream = client.chat.completions.create(
    model="glm-5",
    messages=[
        {"role": "user", "content": "Hello, please introduce the GLM model"}
    ],
    max_tokens=1024,
    temperature=0.7,
    stream=True
)

is_answering = False

for chunk in stream:
    if not chunk.choices:
        continue

    delta = chunk.choices[0].delta

    # Reasoning process
    reasoning = getattr(delta, "reasoning_content", None)
    if reasoning is not None:
        if not is_answering:
            print("🤔 Thinking...\n")
        print(reasoning, end="", flush=True)

    # Formal answer
    if delta.content is not None:
        if not is_answering:
            is_answering = True
            print("\n\n💬 Answer:\n")
        print(delta.content, end="", flush=True)

print()

javascript

import OpenAI from "openai";

const client = new OpenAI({
  apiKey: process.env.AGENTSFLARE_API_KEY, 
  baseURL: "https://api.agentsflare.com/v1"    
});

async function main() {
  try {
    const res = await client.chat.completions.create({
      model: "glm-5",
      messages: [{ role: "user", content: "Hello, please introduce the GLM model" }],
      max_tokens: 1024,
      temperature: 0.7
    });

    console.log(res.choices[0].message.content);
  } catch (err) {
    console.error(err?.response?.data ?? err);
  }
}

main();

javascript

import OpenAI from "openai";

const client = new OpenAI({
  apiKey: process.env.AGENTSFLARE_API_KEY, 
  baseURL: "https://api.agentsflare.com/v1"    
});

async function main() {
  try {
    const stream = await client.chat.completions.create({
      model: "glm-5",
      messages: [{ role: "user", content: "Hello, please introduce the GLM model" }],
      max_tokens: 1024,
      temperature: 0.7,
      stream: true
    });

    for await (const chunk of stream) {
      if (chunk.choices[0]?.delta?.content) {
        process.stdout.write(chunk.choices[0].delta.content);
      }
    }
  } catch (err) {
    console.error(err?.response?.data ?? err);
  }
}

main();

java

import com.openai.client.OpenAIClient;
import com.openai.client.okhttp.OpenAIOkHttpClient;
import com.openai.models.chat.completions.ChatCompletionCreateParams;
import com.openai.models.chat.completions.ChatCompletion;

public class Main {
  public static void main(String[] args) {
    String apiKey = System.getenv("AGENTSFLARE_API_KEY"); 
    if (apiKey == null || apiKey.isBlank()) {
      throw new IllegalStateException("Missing AGENTSFLARE_API_KEY env var");
    }

    OpenAIClient client = OpenAIOkHttpClient.builder()
        .apiKey(apiKey)
        .baseUrl("https://api.agentsflare.com/v1")
        .build();

    ChatCompletionCreateParams params = ChatCompletionCreateParams.builder()
        .model("glm-5")
        .addMessage(ChatCompletionCreateParams.Message.builder()
            .role(ChatCompletionCreateParams.Message.Role.USER)
            .content("Hello, please introduce the GLM model")
            .build())
        .maxTokens(1024)
        .temperature(0.7)
        .build();

    ChatCompletion res = client.chat().completions().create(params);

    String content = res.choices().get(0).message().content();
    System.out.println(content);
  }
}

package main

import (
	"context"
	"fmt"
	"log"
	"os"

	openai "github.com/openai/openai-go"
	"github.com/openai/openai-go/option"
)

func main() {
	apiKey := os.Getenv("AGENTSFLARE_API_KEY")
	if apiKey == "" {
		log.Fatal("missing env AGENTSFLARE_API_KEY")
	}

	client := openai.NewClient(
		option.WithAPIKey(apiKey),
		option.WithBaseURL("https://api.agentsflare.com/v1"),
	)

	ctx := context.Background()

	resp, err := client.Chat.Completions.New(ctx, openai.ChatCompletionNewParams{
		Model: openai.F("glm-5"),
		Messages: openai.F([]openai.ChatCompletionMessageParamUnion{
			openai.UserMessage("Hello, please introduce the GLM model"),
		}),
		MaxTokens:   openai.F(int64(1024)),
		Temperature: openai.F(0.7),
	})
	if err != nil {
		log.Fatalf("chat completion failed: %v", err)
	}

	if len(resp.Choices) > 0 && resp.Choices[0].Message.Content != "" {
		fmt.Println(resp.Choices[0].Message.Content)
	} else {
		fmt.Printf("empty response: %+v\n", resp)
	}
}

javascript

const { OpenAI } = require("openai");

const client = new OpenAI({
  apiKey: process.env.AGENTSFLARE_API_KEY, 
  baseURL: "https://api.agentsflare.com/v1"    
});

async function main() {
  try {
    const res = await client.chat.completions.create({
      model: "glm-5",
      messages: [{ role: "user", content: "Hello, please introduce the GLM model" }],
      max_tokens: 1024,
      temperature: 0.7
    });

    console.log(res.choices[0].message.content);
  } catch (err) {
    console.error(err?.response?.data ?? err);
  }
}

main();

Response Examples

Non-streaming Response

json

{
  "id": "chatcmpl-123456",
  "object": "chat.completion",
  "created": 1677652288,
  "model": "glm-5",
  "choices": [
    {
      "index": 0,
      "message": {
        "role": "assistant",
        "content": "Hello! GLM (General Language Model) is a series of general language models developed by Zhipu AI. GLM-4 is its latest generation model, featuring powerful natural language understanding and generation capabilities, supporting multi-turn conversations, knowledge Q&A, content creation, and various application scenarios. The GLM model employs advanced training techniques and excels in both Chinese and English processing."
      },
      "finish_reason": "stop"
    }
  ],
  "usage": {
    "prompt_tokens": 15,
    "completion_tokens": 85,
    "total_tokens": 100
  }
}

Streaming Response

json

data: {"id":"chatcmpl-123","object":"chat.completion.chunk","created":1677652288,"model":"glm-5","choices":[{"index":0,"delta":{"role":"assistant","content":"Hello"},"finish_reason":null}]}

data: {"id":"chatcmpl-123","object":"chat.completion.chunk","created":1677652288,"model":"glm-5","choices":[{"index":0,"delta":{"content":"!"},"finish_reason":null}]}

data: {"id":"chatcmpl-123","object":"chat.completion.chunk","created":1677652288,"model":"glm-5","choices":[{"index":0,"delta":{"content":" GLM"},"finish_reason":null}]}

data: {"id":"chatcmpl-123","object":"chat.completion.chunk","created":1677652288,"model":"glm-5","choices":[{"index":0,"delta":{},"finish_reason":"stop"}]}

data: [DONE]

Request Parameters

Parameter	Type	Required	Description
model	string	Yes	Model name, e.g., `glm-5`, `glm-4`
messages	array	Yes	Array of messages with role and content
max_tokens	integer	No	Maximum tokens to generate, default 1024
temperature	float	No	Sampling temperature, range 0-2, default 0.95
top_p	float	No	Nucleus sampling parameter, default 0.7
stream	boolean	No	Enable streaming response, default false

Features

Reasoning Process

GLM-5 supports displaying the reasoning process. In streaming responses, the model first outputs the thinking process (reasoning_content), then outputs the final answer (content). This helps understand the model's reasoning logic.

python

# Reasoning process
reasoning = getattr(delta, "reasoning_content", None)
if reasoning is not None:
    print(reasoning, end="", flush=True)

# Formal answer
if delta.content is not None:
    print(delta.content, end="", flush=True)

Multi-turn Conversations

GLM supports multi-turn conversations by including history in the messages array:

python

messages = [
    {"role": "user", "content": "What is artificial intelligence?"},
    {"role": "assistant", "content": "Artificial Intelligence (AI) is a branch of computer science..."},
    {"role": "user", "content": "What are its application fields?"}
]

completion = client.chat.completions.create(
    model="glm-5",
    messages=messages
)

Streaming Output

GLM API supports streaming output (SSE) by setting stream: true. Streaming responses allow real-time content generation, providing a better user experience.

Chinese Optimization

GLM models are deeply optimized for Chinese, excelling in Chinese understanding, generation, and reasoning tasks, making them particularly suitable for Chinese application scenarios.

Use Cases

Intelligent Dialogue: Customer service bots, virtual assistants
Content Creation: Article writing, copywriting generation
Knowledge Q&A: Knowledge base retrieval, question answering
Code Generation: Programming assistance, code explanation
Text Analysis: Sentiment analysis, text classification

Important Notes

API Key Security: Do not hardcode API Keys in your code, use environment variables
Request Rate: Please comply with API call rate limits
Error Handling: Implement comprehensive error handling mechanisms
Token Limits: Ensure total input and output tokens don't exceed model context limits

GLM API Examples ​

Basic Configuration ​

Basic Information ​

Request Examples ​

Response Examples ​

Non-streaming Response ​

Streaming Response ​

Request Parameters ​

Features ​

Reasoning Process ​

Multi-turn Conversations ​

Streaming Output ​

Chinese Optimization ​

Use Cases ​

Important Notes ​

Related Links ​

GLM API Examples

Basic Configuration

Basic Information

Request Examples

Response Examples

Non-streaming Response

Streaming Response

Request Parameters

Features

Reasoning Process

Multi-turn Conversations

Streaming Output

Chinese Optimization

Use Cases

Important Notes

Related Links