gpt模型图片识别
OpenAI 提供多个图片识别模型,以gpt-5-nano做示例。
基础配置
在开始使用API之前,请确保您已经获取了API Key。如果还没有,请参考创建API Key。
基础信息
- API Base URL:
https://api.agentsflare.com/v1/chat/completions - 认证方式: Bearer Token
- 内容类型:
application/json
请求示例
生成视频
python
#!/usr/bin/env python3
# -*- coding: utf-8 -*-
import base64
import mimetypes
import os
import sys
from openai import OpenAI
BASE_URL = "https://api.agentsflare.com/v1"
API_KEY = ""
MODEL = "gpt-5-nano"
def file_to_data_url(path: str) -> str:
if not os.path.isfile(path):
raise FileNotFoundError(f"找不到文件:{path}")
mime, _ = mimetypes.guess_type(path)
if mime is None:
mime = "image/jpeg"
with open(path, "rb") as f:
b64 = base64.b64encode(f.read()).decode("utf-8")
return f"data:{mime};base64,{b64}"
def main():
if len(sys.argv) < 2:
print(f"用法:{sys.argv[0]} /path/to/image.jpg")
sys.exit(1)
image_path = sys.argv[1]
data_url = file_to_data_url(image_path)
client = OpenAI(
api_key=API_KEY,
base_url=BASE_URL,
)
resp = client.chat.completions.create(
model=MODEL,
messages=[
{
"role": "user",
"content": [
{"type": "text", "text": "请详细分析这张图片,描述你看到的内容、场景、主体、可能的文字信息,以及任何值得注意的细节。"},
{"type": "image_url", "image_url": {"url": data_url}},
],
}
],
)
print(resp.choices[0].message.content)
if __name__ == "__main__":
main()java
#!/usr/bin/env node
"use strict";
const fs = require("fs");
const path = require("path");
const OpenAI = require("openai");
const BASE_URL = "https://api.agentsflare.com/v1";
const API_KEY = process.env.OPENAI_API_KEY || ""; // 建议用环境变量
const MODEL = "gpt-5-nano";
function fileToDataUrl(filePath) {
if (!fs.existsSync(filePath)) {
throw new Error(`找不到文件:${filePath}`);
}
// 简单 mime 推断(也可用 `mime-types` 包更完整)
const ext = path.extname(filePath).toLowerCase();
let mime = "image/jpeg";
if (ext === ".png") mime = "image/png";
else if (ext === ".webp") mime = "image/webp";
else if (ext === ".gif") mime = "image/gif";
const buf = fs.readFileSync(filePath);
const b64 = buf.toString("base64");
return `data:${mime};base64,${b64}`;
}
async function main() {
const imagePath = process.argv[2];
if (!imagePath) {
console.error(`用法:${process.argv[1]} /path/to/image.jpg`);
process.exit(1);
}
if (!API_KEY) {
console.error("请设置 OPENAI_API_KEY 环境变量或在代码里填入 API_KEY。");
process.exit(1);
}
const dataUrl = fileToDataUrl(imagePath);
const client = new OpenAI({
apiKey: API_KEY,
baseURL: BASE_URL,
});
const resp = await client.chat.completions.create({
model: MODEL,
messages: [
{
role: "user",
content: [
{
type: "text",
text:
"请详细分析这张图片,描述你看到的内容、场景、主体、可能的文字信息,以及任何值得注意的细节。",
},
{
type: "image_url",
image_url: { url: dataUrl },
},
],
},
],
});
console.log(resp.choices?.[0]?.message?.content ?? "");
}
main().catch((err) => {
console.error(err);
process.exit(1);
});go
package main
import (
"bytes"
"encoding/base64"
"encoding/json"
"fmt"
"io"
"mime"
"net/http"
"os"
"path/filepath"
)
const (
BASE_URL = "https://api.agentsflare.com/v1"
MODEL = "gpt-5-nano"
)
func fileToDataURL(path string) (string, error) {
info, err := os.Stat(path)
if err != nil || info.IsDir() {
return "", fmt.Errorf("找不到文件:%s", path)
}
ext := filepath.Ext(path)
m := mime.TypeByExtension(ext)
if m == "" {
// 保底
m = "image/jpeg"
}
b, err := os.ReadFile(path)
if err != nil {
return "", err
}
b64 := base64.StdEncoding.EncodeToString(b)
return fmt.Sprintf("data:%s;base64,%s", m, b64), nil
}
func main() {
if len(os.Args) < 2 {
fmt.Printf("用法:%s /path/to/image.jpg\n", os.Args[0])
os.Exit(1)
}
imagePath := os.Args[1]
apiKey := os.Getenv("OPENAI_API_KEY")
if apiKey == "" {
fmt.Println("请设置 OPENAI_API_KEY 环境变量。")
os.Exit(1)
}
dataURL, err := fileToDataURL(imagePath)
if err != nil {
fmt.Println(err)
os.Exit(1)
}
// 构造 chat.completions 请求体
reqBody := map[string]any{
"model": MODEL,
"messages": []any{
map[string]any{
"role": "user",
"content": []any{
map[string]any{
"type": "text",
"text": "请详细分析这张图片,描述你看到的内容、场景、主体、可能的文字信息,以及任何值得注意的细节。",
},
map[string]any{
"type": "image_url",
"image_url": map[string]any{
"url": dataURL,
},
},
},
},
},
}
bodyBytes, _ := json.Marshal(reqBody)
req, err := http.NewRequest("POST", BASE_URL+"/chat/completions", bytes.NewReader(bodyBytes))
if err != nil {
fmt.Println(err)
os.Exit(1)
}
req.Header.Set("Authorization", "Bearer "+apiKey)
req.Header.Set("Content-Type", "application/json")
resp, err := http.DefaultClient.Do(req)
if err != nil {
fmt.Println(err)
os.Exit(1)
}
defer resp.Body.Close()
respBytes, _ := io.ReadAll(resp.Body)
if resp.StatusCode < 200 || resp.StatusCode >= 300 {
fmt.Printf("HTTP %d\n%s\n", resp.StatusCode, string(respBytes))
os.Exit(1)
}
// 解析 choices[0].message.content
var out struct {
Choices []struct {
Message struct {
Content any `json:"content"`
} `json:"message"`
} `json:"choices"`
}
if err := json.Unmarshal(respBytes, &out); err != nil {
// 如果解析失败,直接打印原文
fmt.Println(string(respBytes))
return
}
if len(out.Choices) == 0 {
fmt.Println("")
return
}
// content 有的网关可能返回 string,有的可能返回数组结构;这里尽量兼容
switch v := out.Choices[0].Message.Content.(type) {
case string:
fmt.Println(v)
default:
pretty, _ := json.MarshalIndent(v, "", " ")
fmt.Println(string(pretty))
}
}注意:单个图片大小不要超过20M,否则模型会返回错误。