Skip to content

GPT Model Image Recognition

OpenAI provides multiple image recognition models, using gpt-5-nano as an example.

Basic Configuration

Before starting to use the API, please ensure you have obtained an API Key. If not, please refer to Create API Key.

Basic Information

  • API Base URL: https://api.agentsflare.com/v1/chat/completions
  • Authentication Method: Bearer Token
  • Content Type: application/json

Request Example

Image Recognition

python
#!/usr/bin/env python3
# -*- coding: utf-8 -*-

import base64
import mimetypes
import os
import sys

from openai import OpenAI

BASE_URL = "https://api.agentsflare.com/v1"
API_KEY = ""

MODEL = "gpt-5-nano"

def file_to_data_url(path: str) -> str:
    if not os.path.isfile(path):
        raise FileNotFoundError(f"File not found: {path}")

    mime, _ = mimetypes.guess_type(path)
    if mime is None:
        mime = "image/jpeg"

    with open(path, "rb") as f:
        b64 = base64.b64encode(f.read()).decode("utf-8")

    return f"data:{mime};base64,{b64}"

def main():
    if len(sys.argv) < 2:
        print(f"Usage: {sys.argv[0]} /path/to/image.jpg")
        sys.exit(1)

    image_path = sys.argv[1]
    data_url = file_to_data_url(image_path)

    client = OpenAI(
        api_key=API_KEY,
        base_url=BASE_URL,
    )

    resp = client.chat.completions.create(
        model=MODEL,
        messages=[
            {
                "role": "user",
                "content": [
                    {"type": "text", "text": "Please analyze this image in detail, describe what you see, the scene, the subject, possible text information, and any noteworthy details."},
                    {"type": "image_url", "image_url": {"url": data_url}},
                ],
            }
        ],
    )

    print(resp.choices[0].message.content)

if __name__ == "__main__":
    main()
java
#!/usr/bin/env node
"use strict";

const fs = require("fs");
const path = require("path");
const OpenAI = require("openai");

const BASE_URL = "https://api.agentsflare.com/v1";
const API_KEY = process.env.OPENAI_API_KEY || ""; // Recommended to use environment variables
const MODEL = "gpt-5-nano";

function fileToDataUrl(filePath) {
  if (!fs.existsSync(filePath)) {
    throw new Error(`File not found: ${filePath}`);
  }

  // Simple mime inference (can also use `mime-types` package for more completeness)
  const ext = path.extname(filePath).toLowerCase();
  let mime = "image/jpeg";
  if (ext === ".png") mime = "image/png";
  else if (ext === ".webp") mime = "image/webp";
  else if (ext === ".gif") mime = "image/gif";

  const buf = fs.readFileSync(filePath);
  const b64 = buf.toString("base64");
  return `data:${mime};base64,${b64}`;
}

async function main() {
  const imagePath = process.argv[2];
  if (!imagePath) {
    console.error(`Usage: ${process.argv[1]} /path/to/image.jpg`);
    process.exit(1);
  }

  if (!API_KEY) {
    console.error("Please set the OPENAI_API_KEY environment variable or fill in API_KEY in the code.");
    process.exit(1);
  }

  const dataUrl = fileToDataUrl(imagePath);

  const client = new OpenAI({
    apiKey: API_KEY,
    baseURL: BASE_URL,
  });

  const resp = await client.chat.completions.create({
    model: MODEL,
    messages: [
      {
        role: "user",
        content: [
          {
            type: "text",
            text:
              "Please analyze this image in detail, describe what you see, the scene, the subject, possible text information, and any noteworthy details.",
          },
          {
            type: "image_url",
            image_url: { url: dataUrl },
          },
        ],
      },
    ],
  });

  console.log(resp.choices?.[0]?.message?.content ?? "");
}

main().catch((err) => {
  console.error(err);
  process.exit(1);
});
go
package main

import (
	"bytes"
	"encoding/base64"
	"encoding/json"
	"fmt"
	"io"
	"mime"
	"net/http"
	"os"
	"path/filepath"
)

const (
	BASE_URL = "https://api.agentsflare.com/v1"
	MODEL    = "gpt-5-nano"
)

func fileToDataURL(path string) (string, error) {
	info, err := os.Stat(path)
	if err != nil || info.IsDir() {
		return "", fmt.Errorf("File not found: %s", path)
	}

	ext := filepath.Ext(path)
	m := mime.TypeByExtension(ext)
	if m == "" {
		// Fallback
		m = "image/jpeg"
	}

	b, err := os.ReadFile(path)
	if err != nil {
		return "", err
	}

	b64 := base64.StdEncoding.EncodeToString(b)
	return fmt.Sprintf("data:%s;base64,%s", m, b64), nil
}

func main() {
	if len(os.Args) < 2 {
		fmt.Printf("Usage: %s /path/to/image.jpg\n", os.Args[0])
		os.Exit(1)
	}
	imagePath := os.Args[1]

	apiKey := os.Getenv("OPENAI_API_KEY")
	if apiKey == "" {
		fmt.Println("Please set the OPENAI_API_KEY environment variable.")
		os.Exit(1)
	}

	dataURL, err := fileToDataURL(imagePath)
	if err != nil {
		fmt.Println(err)
		os.Exit(1)
	}

	// Construct chat.completions request body
	reqBody := map[string]any{
		"model": MODEL,
		"messages": []any{
			map[string]any{
				"role": "user",
				"content": []any{
					map[string]any{
						"type": "text",
						"text": "Please analyze this image in detail, describe what you see, the scene, the subject, possible text information, and any noteworthy details.",
					},
					map[string]any{
						"type": "image_url",
						"image_url": map[string]any{
							"url": dataURL,
						},
					},
				},
			},
		},
	}

	bodyBytes, _ := json.Marshal(reqBody)

	req, err := http.NewRequest("POST", BASE_URL+"/chat/completions", bytes.NewReader(bodyBytes))
	if err != nil {
		fmt.Println(err)
		os.Exit(1)
	}

	req.Header.Set("Authorization", "Bearer "+apiKey)
	req.Header.Set("Content-Type", "application/json")

	resp, err := http.DefaultClient.Do(req)
	if err != nil {
		fmt.Println(err)
		os.Exit(1)
	}
	defer resp.Body.Close()

	respBytes, _ := io.ReadAll(resp.Body)
	if resp.StatusCode < 200 || resp.StatusCode >= 300 {
		fmt.Printf("HTTP %d\n%s\n", resp.StatusCode, string(respBytes))
		os.Exit(1)
	}

	// Parse choices[0].message.content
	var out struct {
		Choices []struct {
			Message struct {
				Content any `json:"content"`
			} `json:"message"`
		} `json:"choices"`
	}
	if err := json.Unmarshal(respBytes, &out); err != nil {
		// If parsing fails, print the original text directly
		fmt.Println(string(respBytes))
		return
	}

	if len(out.Choices) == 0 {
		fmt.Println("")
		return
	}

	// content may return string in some gateways, or array structure; try to be compatible here
	switch v := out.Choices[0].Message.Content.(type) {
	case string:
		fmt.Println(v)
	default:
		pretty, _ := json.MarshalIndent(v, "", "  ")
		fmt.Println(string(pretty))
	}
}

Note: The size of a single image should not exceed 20M, otherwise the model will return an error.

This documentation is licensed under CC BY-SA 4.0.