This cookbook contains production-ready code patterns for advanced use cases.

Streaming Responses

Stream responses for better user experience in chat applications.
from openai import OpenAI

client = OpenAI(
    api_key="YOUR_API_KEY",
    base_url="https://api.applerouter.ai/v1"
)

stream = client.chat.completions.create(
    model="gpt-4o",
    messages=[{"role": "user", "content": "Write a short story"}],
    stream=True
)

for chunk in stream:
    if chunk.choices[0].delta.content:
        print(chunk.choices[0].delta.content, end="", flush=True)

Function Calling (Tool Use)

Let the model call functions to interact with external systems.
Python
import json
from openai import OpenAI

client = OpenAI(
    api_key="YOUR_API_KEY",
    base_url="https://api.applerouter.ai/v1"
)

# Define available tools
tools = [
    {
        "type": "function",
        "function": {
            "name": "get_weather",
            "description": "Get the current weather in a given location",
            "parameters": {
                "type": "object",
                "properties": {
                    "location": {
                        "type": "string",
                        "description": "The city name, e.g. San Francisco"
                    },
                    "unit": {
                        "type": "string",
                        "enum": ["celsius", "fahrenheit"]
                    }
                },
                "required": ["location"]
            }
        }
    }
]

# Initial request
response = client.chat.completions.create(
    model="gpt-4o",
    messages=[{"role": "user", "content": "What's the weather in Tokyo?"}],
    tools=tools,
    tool_choice="auto"
)

# Check if the model wants to call a function
message = response.choices[0].message
if message.tool_calls:
    tool_call = message.tool_calls[0]
    function_name = tool_call.function.name
    arguments = json.loads(tool_call.function.arguments)

    # Call your actual function here
    weather_result = {"temperature": 22, "condition": "sunny"}

    # Send the result back
    response = client.chat.completions.create(
        model="gpt-4o",
        messages=[
            {"role": "user", "content": "What's the weather in Tokyo?"},
            message,
            {
                "role": "tool",
                "tool_call_id": tool_call.id,
                "content": json.dumps(weather_result)
            }
        ],
        tools=tools
    )
    print(response.choices[0].message.content)

Vision: Analyzing Images

Send images to multimodal models for analysis.
import base64

# From URL
response = client.chat.completions.create(
    model="gpt-4o",
    messages=[
        {
            "role": "user",
            "content": [
                {"type": "text", "text": "What's in this image?"},
                {
                    "type": "image_url",
                    "image_url": {"url": "https://example.com/image.jpg"}
                }
            ]
        }
    ]
)

# From local file (base64)
with open("image.jpg", "rb") as f:
    image_data = base64.standard_b64encode(f.read()).decode("utf-8")

response = client.chat.completions.create(
    model="gpt-4o",
    messages=[
        {
            "role": "user",
            "content": [
                {"type": "text", "text": "Describe this image"},
                {
                    "type": "image_url",
                    "image_url": {"url": f"data:image/jpeg;base64,{image_data}"}
                }
            ]
        }
    ]
)
print(response.choices[0].message.content)

Structured Output (JSON Mode)

Get structured JSON responses from the model.
Python
from pydantic import BaseModel
from openai import OpenAI

client = OpenAI(
    api_key="YOUR_API_KEY",
    base_url="https://api.applerouter.ai/v1"
)

# Using response_format
response = client.chat.completions.create(
    model="gpt-4o",
    messages=[
        {
            "role": "system",
            "content": "Extract product info and return as JSON with fields: name, price, category"
        },
        {
            "role": "user",
            "content": "iPhone 15 Pro costs $999 and is a smartphone"
        }
    ],
    response_format={"type": "json_object"}
)

import json
data = json.loads(response.choices[0].message.content)
print(data)
# {"name": "iPhone 15 Pro", "price": 999, "category": "smartphone"}

Retry with Exponential Backoff

Handle rate limits and transient errors gracefully.
Python
import time
from openai import OpenAI, RateLimitError, APIError

client = OpenAI(
    api_key="YOUR_API_KEY",
    base_url="https://api.applerouter.ai/v1"
)

def chat_with_retry(messages, max_retries=5):
    for attempt in range(max_retries):
        try:
            response = client.chat.completions.create(
                model="gpt-4o",
                messages=messages
            )
            return response
        except RateLimitError:
            wait_time = 2 ** attempt  # 1, 2, 4, 8, 16 seconds
            print(f"Rate limited. Waiting {wait_time}s...")
            time.sleep(wait_time)
        except APIError as e:
            if attempt == max_retries - 1:
                raise
            time.sleep(1)
    raise Exception("Max retries exceeded")

# Usage
response = chat_with_retry([
    {"role": "user", "content": "Hello!"}
])

Parallel Batch Requests

Process multiple requests concurrently for better throughput.
Python
import asyncio
from openai import AsyncOpenAI

client = AsyncOpenAI(
    api_key="YOUR_API_KEY",
    base_url="https://api.applerouter.ai/v1"
)

async def process_message(message: str) -> str:
    response = await client.chat.completions.create(
        model="gpt-4o",
        messages=[{"role": "user", "content": message}]
    )
    return response.choices[0].message.content

async def main():
    messages = [
        "Translate 'hello' to French",
        "Translate 'hello' to Spanish",
        "Translate 'hello' to Japanese",
    ]

    # Process all messages concurrently
    results = await asyncio.gather(*[
        process_message(msg) for msg in messages
    ])

    for msg, result in zip(messages, results):
        print(f"{msg} -> {result}")

asyncio.run(main())

Conversation Memory

Maintain conversation history for multi-turn chats.
Python
from openai import OpenAI

client = OpenAI(
    api_key="YOUR_API_KEY",
    base_url="https://api.applerouter.ai/v1"
)

class Conversation:
    def __init__(self, system_prompt: str = None):
        self.messages = []
        if system_prompt:
            self.messages.append({"role": "system", "content": system_prompt})

    def chat(self, user_message: str) -> str:
        self.messages.append({"role": "user", "content": user_message})

        response = client.chat.completions.create(
            model="gpt-4o",
            messages=self.messages
        )

        assistant_message = response.choices[0].message.content
        self.messages.append({"role": "assistant", "content": assistant_message})

        return assistant_message

# Usage
conv = Conversation("You are a helpful coding assistant.")
print(conv.chat("What is Python?"))
print(conv.chat("How do I install it?"))  # Remembers context
print(conv.chat("Write a hello world example"))

Model Fallback

Automatically fallback to alternative models on failure.
Python
from openai import OpenAI, APIError

client = OpenAI(
    api_key="YOUR_API_KEY",
    base_url="https://api.applerouter.ai/v1"
)

def chat_with_fallback(messages, models=None):
    if models is None:
        models = ["gpt-4o", "claude-sonnet-4-20250514", "gemini-2.0-flash"]

    last_error = None
    for model in models:
        try:
            response = client.chat.completions.create(
                model=model,
                messages=messages
            )
            return response, model
        except APIError as e:
            last_error = e
            print(f"Model {model} failed: {e}")
            continue

    raise last_error

# Usage
response, used_model = chat_with_fallback([
    {"role": "user", "content": "Hello!"}
])
print(f"Response from {used_model}: {response.choices[0].message.content}")

Next Steps