Skip to content

gpt模型图片识别

OpenAI 提供多个图片识别模型,以gpt-5-nano做示例。

基础配置

在开始使用API之前,请确保您已经获取了API Key。如果还没有,请参考创建API Key

基础信息

  • API Base URL: https://api.agentsflare.com/v1/chat/completions
  • 认证方式: Bearer Token
  • 内容类型: application/json

请求示例

生成视频

python
#!/usr/bin/env python3
# -*- coding: utf-8 -*-

import base64
import mimetypes
import os
import sys

from openai import OpenAI

BASE_URL = "https://api.agentsflare.com/v1"
API_KEY = ""

MODEL = "gpt-5-nano"

def file_to_data_url(path: str) -> str:
    if not os.path.isfile(path):
        raise FileNotFoundError(f"找不到文件:{path}")

    mime, _ = mimetypes.guess_type(path)
    if mime is None:
        mime = "image/jpeg"

    with open(path, "rb") as f:
        b64 = base64.b64encode(f.read()).decode("utf-8")

    return f"data:{mime};base64,{b64}"

def main():
    if len(sys.argv) < 2:
        print(f"用法:{sys.argv[0]} /path/to/image.jpg")
        sys.exit(1)

    image_path = sys.argv[1]
    data_url = file_to_data_url(image_path)

    client = OpenAI(
        api_key=API_KEY,
        base_url=BASE_URL,
    )

    resp = client.chat.completions.create(
        model=MODEL,
        messages=[
            {
                "role": "user",
                "content": [
                    {"type": "text", "text": "请详细分析这张图片,描述你看到的内容、场景、主体、可能的文字信息,以及任何值得注意的细节。"},
                    {"type": "image_url", "image_url": {"url": data_url}},
                ],
            }
        ],
    )

    print(resp.choices[0].message.content)

if __name__ == "__main__":
    main()
java
#!/usr/bin/env node
"use strict";

const fs = require("fs");
const path = require("path");
const OpenAI = require("openai");

const BASE_URL = "https://api.agentsflare.com/v1";
const API_KEY = process.env.OPENAI_API_KEY || ""; // 建议用环境变量
const MODEL = "gpt-5-nano";

function fileToDataUrl(filePath) {
  if (!fs.existsSync(filePath)) {
    throw new Error(`找不到文件:${filePath}`);
  }

  // 简单 mime 推断(也可用 `mime-types` 包更完整)
  const ext = path.extname(filePath).toLowerCase();
  let mime = "image/jpeg";
  if (ext === ".png") mime = "image/png";
  else if (ext === ".webp") mime = "image/webp";
  else if (ext === ".gif") mime = "image/gif";

  const buf = fs.readFileSync(filePath);
  const b64 = buf.toString("base64");
  return `data:${mime};base64,${b64}`;
}

async function main() {
  const imagePath = process.argv[2];
  if (!imagePath) {
    console.error(`用法:${process.argv[1]} /path/to/image.jpg`);
    process.exit(1);
  }

  if (!API_KEY) {
    console.error("请设置 OPENAI_API_KEY 环境变量或在代码里填入 API_KEY。");
    process.exit(1);
  }

  const dataUrl = fileToDataUrl(imagePath);

  const client = new OpenAI({
    apiKey: API_KEY,
    baseURL: BASE_URL,
  });

  const resp = await client.chat.completions.create({
    model: MODEL,
    messages: [
      {
        role: "user",
        content: [
          {
            type: "text",
            text:
              "请详细分析这张图片,描述你看到的内容、场景、主体、可能的文字信息,以及任何值得注意的细节。",
          },
          {
            type: "image_url",
            image_url: { url: dataUrl },
          },
        ],
      },
    ],
  });

  console.log(resp.choices?.[0]?.message?.content ?? "");
}

main().catch((err) => {
  console.error(err);
  process.exit(1);
});
go
package main

import (
	"bytes"
	"encoding/base64"
	"encoding/json"
	"fmt"
	"io"
	"mime"
	"net/http"
	"os"
	"path/filepath"
)

const (
	BASE_URL = "https://api.agentsflare.com/v1"
	MODEL    = "gpt-5-nano"
)

func fileToDataURL(path string) (string, error) {
	info, err := os.Stat(path)
	if err != nil || info.IsDir() {
		return "", fmt.Errorf("找不到文件:%s", path)
	}

	ext := filepath.Ext(path)
	m := mime.TypeByExtension(ext)
	if m == "" {
		// 保底
		m = "image/jpeg"
	}

	b, err := os.ReadFile(path)
	if err != nil {
		return "", err
	}

	b64 := base64.StdEncoding.EncodeToString(b)
	return fmt.Sprintf("data:%s;base64,%s", m, b64), nil
}

func main() {
	if len(os.Args) < 2 {
		fmt.Printf("用法:%s /path/to/image.jpg\n", os.Args[0])
		os.Exit(1)
	}
	imagePath := os.Args[1]

	apiKey := os.Getenv("OPENAI_API_KEY")
	if apiKey == "" {
		fmt.Println("请设置 OPENAI_API_KEY 环境变量。")
		os.Exit(1)
	}

	dataURL, err := fileToDataURL(imagePath)
	if err != nil {
		fmt.Println(err)
		os.Exit(1)
	}

	// 构造 chat.completions 请求体
	reqBody := map[string]any{
		"model": MODEL,
		"messages": []any{
			map[string]any{
				"role": "user",
				"content": []any{
					map[string]any{
						"type": "text",
						"text": "请详细分析这张图片,描述你看到的内容、场景、主体、可能的文字信息,以及任何值得注意的细节。",
					},
					map[string]any{
						"type": "image_url",
						"image_url": map[string]any{
							"url": dataURL,
						},
					},
				},
			},
		},
	}

	bodyBytes, _ := json.Marshal(reqBody)

	req, err := http.NewRequest("POST", BASE_URL+"/chat/completions", bytes.NewReader(bodyBytes))
	if err != nil {
		fmt.Println(err)
		os.Exit(1)
	}

	req.Header.Set("Authorization", "Bearer "+apiKey)
	req.Header.Set("Content-Type", "application/json")

	resp, err := http.DefaultClient.Do(req)
	if err != nil {
		fmt.Println(err)
		os.Exit(1)
	}
	defer resp.Body.Close()

	respBytes, _ := io.ReadAll(resp.Body)
	if resp.StatusCode < 200 || resp.StatusCode >= 300 {
		fmt.Printf("HTTP %d\n%s\n", resp.StatusCode, string(respBytes))
		os.Exit(1)
	}

	// 解析 choices[0].message.content
	var out struct {
		Choices []struct {
			Message struct {
				Content any `json:"content"`
			} `json:"message"`
		} `json:"choices"`
	}
	if err := json.Unmarshal(respBytes, &out); err != nil {
		// 如果解析失败,直接打印原文
		fmt.Println(string(respBytes))
		return
	}

	if len(out.Choices) == 0 {
		fmt.Println("")
		return
	}

	// content 有的网关可能返回 string,有的可能返回数组结构;这里尽量兼容
	switch v := out.Choices[0].Message.Content.(type) {
	case string:
		fmt.Println(v)
	default:
		pretty, _ := json.MarshalIndent(v, "", "  ")
		fmt.Println(string(pretty))
	}
}

注意:单个图片大小不要超过20M,否则模型会返回错误。

本文档遵循 CC BY-SA 4.0 协议。