GPT Model Image Recognition

OpenAI provides multiple image recognition models, using gpt-5-nano as an example.

Basic Configuration

Before starting to use the API, please ensure you have obtained an API Key. If not, please refer to Create API Key.

Basic Information

API Base URL: https://api.agentsflare.com/v1/chat/completions
Authentication Method: Bearer Token
Content Type: application/json

Request Example

Image Recognition

PythonJavaGo

python

#!/usr/bin/env python3
# -*- coding: utf-8 -*-

import base64
import mimetypes
import os
import sys

from openai import OpenAI

BASE_URL = "https://api.agentsflare.com/v1"
API_KEY = ""

MODEL = "gpt-5-nano"

def file_to_data_url(path: str) -> str:
    if not os.path.isfile(path):
        raise FileNotFoundError(f"File not found: {path}")

    mime, _ = mimetypes.guess_type(path)
    if mime is None:
        mime = "image/jpeg"

    with open(path, "rb") as f:
        b64 = base64.b64encode(f.read()).decode("utf-8")

    return f"data:{mime};base64,{b64}"

def main():
    if len(sys.argv) < 2:
        print(f"Usage: {sys.argv[0]} /path/to/image.jpg")
        sys.exit(1)

    image_path = sys.argv[1]
    data_url = file_to_data_url(image_path)

    client = OpenAI(
        api_key=API_KEY,
        base_url=BASE_URL,
    )

    resp = client.chat.completions.create(
        model=MODEL,
        messages=[
            {
                "role": "user",
                "content": [
                    {"type": "text", "text": "Please analyze this image in detail, describe what you see, the scene, the subject, possible text information, and any noteworthy details."},
                    {"type": "image_url", "image_url": {"url": data_url}},
                ],
            }
        ],
    )

    print(resp.choices[0].message.content)

if __name__ == "__main__":
    main()

java

#!/usr/bin/env node
"use strict";

const fs = require("fs");
const path = require("path");
const OpenAI = require("openai");

const BASE_URL = "https://api.agentsflare.com/v1";
const API_KEY = process.env.OPENAI_API_KEY || ""; // Recommended to use environment variables
const MODEL = "gpt-5-nano";

function fileToDataUrl(filePath) {
  if (!fs.existsSync(filePath)) {
    throw new Error(`File not found: ${filePath}`);
  }

  // Simple mime inference (can also use `mime-types` package for more completeness)
  const ext = path.extname(filePath).toLowerCase();
  let mime = "image/jpeg";
  if (ext === ".png") mime = "image/png";
  else if (ext === ".webp") mime = "image/webp";
  else if (ext === ".gif") mime = "image/gif";

  const buf = fs.readFileSync(filePath);
  const b64 = buf.toString("base64");
  return `data:${mime};base64,${b64}`;
}

async function main() {
  const imagePath = process.argv[2];
  if (!imagePath) {
    console.error(`Usage: ${process.argv[1]} /path/to/image.jpg`);
    process.exit(1);
  }

  if (!API_KEY) {
    console.error("Please set the OPENAI_API_KEY environment variable or fill in API_KEY in the code.");
    process.exit(1);
  }

  const dataUrl = fileToDataUrl(imagePath);

  const client = new OpenAI({
    apiKey: API_KEY,
    baseURL: BASE_URL,
  });

  const resp = await client.chat.completions.create({
    model: MODEL,
    messages: [
      {
        role: "user",
        content: [
          {
            type: "text",
            text:
              "Please analyze this image in detail, describe what you see, the scene, the subject, possible text information, and any noteworthy details.",
          },
          {
            type: "image_url",
            image_url: { url: dataUrl },
          },
        ],
      },
    ],
  });

  console.log(resp.choices?.[0]?.message?.content ?? "");
}

main().catch((err) => {
  console.error(err);
  process.exit(1);
});

package main

import (
	"bytes"
	"encoding/base64"
	"encoding/json"
	"fmt"
	"io"
	"mime"
	"net/http"
	"os"
	"path/filepath"
)

const (
	BASE_URL = "https://api.agentsflare.com/v1"
	MODEL    = "gpt-5-nano"
)

func fileToDataURL(path string) (string, error) {
	info, err := os.Stat(path)
	if err != nil || info.IsDir() {
		return "", fmt.Errorf("File not found: %s", path)
	}

	ext := filepath.Ext(path)
	m := mime.TypeByExtension(ext)
	if m == "" {
		// Fallback
		m = "image/jpeg"
	}

	b, err := os.ReadFile(path)
	if err != nil {
		return "", err
	}

	b64 := base64.StdEncoding.EncodeToString(b)
	return fmt.Sprintf("data:%s;base64,%s", m, b64), nil
}

func main() {
	if len(os.Args) < 2 {
		fmt.Printf("Usage: %s /path/to/image.jpg\n", os.Args[0])
		os.Exit(1)
	}
	imagePath := os.Args[1]

	apiKey := os.Getenv("OPENAI_API_KEY")
	if apiKey == "" {
		fmt.Println("Please set the OPENAI_API_KEY environment variable.")
		os.Exit(1)
	}

	dataURL, err := fileToDataURL(imagePath)
	if err != nil {
		fmt.Println(err)
		os.Exit(1)
	}

	// Construct chat.completions request body
	reqBody := map[string]any{
		"model": MODEL,
		"messages": []any{
			map[string]any{
				"role": "user",
				"content": []any{
					map[string]any{
						"type": "text",
						"text": "Please analyze this image in detail, describe what you see, the scene, the subject, possible text information, and any noteworthy details.",
					},
					map[string]any{
						"type": "image_url",
						"image_url": map[string]any{
							"url": dataURL,
						},
					},
				},
			},
		},
	}

	bodyBytes, _ := json.Marshal(reqBody)

	req, err := http.NewRequest("POST", BASE_URL+"/chat/completions", bytes.NewReader(bodyBytes))
	if err != nil {
		fmt.Println(err)
		os.Exit(1)
	}

	req.Header.Set("Authorization", "Bearer "+apiKey)
	req.Header.Set("Content-Type", "application/json")

	resp, err := http.DefaultClient.Do(req)
	if err != nil {
		fmt.Println(err)
		os.Exit(1)
	}
	defer resp.Body.Close()

	respBytes, _ := io.ReadAll(resp.Body)
	if resp.StatusCode < 200 || resp.StatusCode >= 300 {
		fmt.Printf("HTTP %d\n%s\n", resp.StatusCode, string(respBytes))
		os.Exit(1)
	}

	// Parse choices[0].message.content
	var out struct {
		Choices []struct {
			Message struct {
				Content any `json:"content"`
			} `json:"message"`
		} `json:"choices"`
	}
	if err := json.Unmarshal(respBytes, &out); err != nil {
		// If parsing fails, print the original text directly
		fmt.Println(string(respBytes))
		return
	}

	if len(out.Choices) == 0 {
		fmt.Println("")
		return
	}

	// content may return string in some gateways, or array structure; try to be compatible here
	switch v := out.Choices[0].Message.Content.(type) {
	case string:
		fmt.Println(v)
	default:
		pretty, _ := json.MarshalIndent(v, "", "  ")
		fmt.Println(string(pretty))
	}
}

Note: The size of a single image should not exceed 20M, otherwise the model will return an error.

GPT Model Image Recognition ​

Basic Configuration ​

Basic Information ​

Request Example ​

Image Recognition ​

GPT Model Image Recognition

Basic Configuration

Basic Information

Request Example

Image Recognition