Streaming Responses
Stream responses for better user experience in chat applications.Copy
from openai import OpenAI
client = OpenAI(
api_key="YOUR_API_KEY",
base_url="https://api.applerouter.ai/v1"
)
stream = client.chat.completions.create(
model="gpt-4o",
messages=[{"role": "user", "content": "Write a short story"}],
stream=True
)
for chunk in stream:
if chunk.choices[0].delta.content:
print(chunk.choices[0].delta.content, end="", flush=True)
Function Calling (Tool Use)
Let the model call functions to interact with external systems.Python
Copy
import json
from openai import OpenAI
client = OpenAI(
api_key="YOUR_API_KEY",
base_url="https://api.applerouter.ai/v1"
)
# Define available tools
tools = [
{
"type": "function",
"function": {
"name": "get_weather",
"description": "Get the current weather in a given location",
"parameters": {
"type": "object",
"properties": {
"location": {
"type": "string",
"description": "The city name, e.g. San Francisco"
},
"unit": {
"type": "string",
"enum": ["celsius", "fahrenheit"]
}
},
"required": ["location"]
}
}
}
]
# Initial request
response = client.chat.completions.create(
model="gpt-4o",
messages=[{"role": "user", "content": "What's the weather in Tokyo?"}],
tools=tools,
tool_choice="auto"
)
# Check if the model wants to call a function
message = response.choices[0].message
if message.tool_calls:
tool_call = message.tool_calls[0]
function_name = tool_call.function.name
arguments = json.loads(tool_call.function.arguments)
# Call your actual function here
weather_result = {"temperature": 22, "condition": "sunny"}
# Send the result back
response = client.chat.completions.create(
model="gpt-4o",
messages=[
{"role": "user", "content": "What's the weather in Tokyo?"},
message,
{
"role": "tool",
"tool_call_id": tool_call.id,
"content": json.dumps(weather_result)
}
],
tools=tools
)
print(response.choices[0].message.content)
Vision: Analyzing Images
Send images to multimodal models for analysis.Copy
import base64
# From URL
response = client.chat.completions.create(
model="gpt-4o",
messages=[
{
"role": "user",
"content": [
{"type": "text", "text": "What's in this image?"},
{
"type": "image_url",
"image_url": {"url": "https://example.com/image.jpg"}
}
]
}
]
)
# From local file (base64)
with open("image.jpg", "rb") as f:
image_data = base64.standard_b64encode(f.read()).decode("utf-8")
response = client.chat.completions.create(
model="gpt-4o",
messages=[
{
"role": "user",
"content": [
{"type": "text", "text": "Describe this image"},
{
"type": "image_url",
"image_url": {"url": f"data:image/jpeg;base64,{image_data}"}
}
]
}
]
)
print(response.choices[0].message.content)
Structured Output (JSON Mode)
Get structured JSON responses from the model.Python
Copy
from pydantic import BaseModel
from openai import OpenAI
client = OpenAI(
api_key="YOUR_API_KEY",
base_url="https://api.applerouter.ai/v1"
)
# Using response_format
response = client.chat.completions.create(
model="gpt-4o",
messages=[
{
"role": "system",
"content": "Extract product info and return as JSON with fields: name, price, category"
},
{
"role": "user",
"content": "iPhone 15 Pro costs $999 and is a smartphone"
}
],
response_format={"type": "json_object"}
)
import json
data = json.loads(response.choices[0].message.content)
print(data)
# {"name": "iPhone 15 Pro", "price": 999, "category": "smartphone"}
Retry with Exponential Backoff
Handle rate limits and transient errors gracefully.Python
Copy
import time
from openai import OpenAI, RateLimitError, APIError
client = OpenAI(
api_key="YOUR_API_KEY",
base_url="https://api.applerouter.ai/v1"
)
def chat_with_retry(messages, max_retries=5):
for attempt in range(max_retries):
try:
response = client.chat.completions.create(
model="gpt-4o",
messages=messages
)
return response
except RateLimitError:
wait_time = 2 ** attempt # 1, 2, 4, 8, 16 seconds
print(f"Rate limited. Waiting {wait_time}s...")
time.sleep(wait_time)
except APIError as e:
if attempt == max_retries - 1:
raise
time.sleep(1)
raise Exception("Max retries exceeded")
# Usage
response = chat_with_retry([
{"role": "user", "content": "Hello!"}
])
Parallel Batch Requests
Process multiple requests concurrently for better throughput.Python
Copy
import asyncio
from openai import AsyncOpenAI
client = AsyncOpenAI(
api_key="YOUR_API_KEY",
base_url="https://api.applerouter.ai/v1"
)
async def process_message(message: str) -> str:
response = await client.chat.completions.create(
model="gpt-4o",
messages=[{"role": "user", "content": message}]
)
return response.choices[0].message.content
async def main():
messages = [
"Translate 'hello' to French",
"Translate 'hello' to Spanish",
"Translate 'hello' to Japanese",
]
# Process all messages concurrently
results = await asyncio.gather(*[
process_message(msg) for msg in messages
])
for msg, result in zip(messages, results):
print(f"{msg} -> {result}")
asyncio.run(main())
Conversation Memory
Maintain conversation history for multi-turn chats.Python
Copy
from openai import OpenAI
client = OpenAI(
api_key="YOUR_API_KEY",
base_url="https://api.applerouter.ai/v1"
)
class Conversation:
def __init__(self, system_prompt: str = None):
self.messages = []
if system_prompt:
self.messages.append({"role": "system", "content": system_prompt})
def chat(self, user_message: str) -> str:
self.messages.append({"role": "user", "content": user_message})
response = client.chat.completions.create(
model="gpt-4o",
messages=self.messages
)
assistant_message = response.choices[0].message.content
self.messages.append({"role": "assistant", "content": assistant_message})
return assistant_message
# Usage
conv = Conversation("You are a helpful coding assistant.")
print(conv.chat("What is Python?"))
print(conv.chat("How do I install it?")) # Remembers context
print(conv.chat("Write a hello world example"))
Model Fallback
Automatically fallback to alternative models on failure.Python
Copy
from openai import OpenAI, APIError
client = OpenAI(
api_key="YOUR_API_KEY",
base_url="https://api.applerouter.ai/v1"
)
def chat_with_fallback(messages, models=None):
if models is None:
models = ["gpt-4o", "claude-sonnet-4-20250514", "gemini-2.0-flash"]
last_error = None
for model in models:
try:
response = client.chat.completions.create(
model=model,
messages=messages
)
return response, model
except APIError as e:
last_error = e
print(f"Model {model} failed: {e}")
continue
raise last_error
# Usage
response, used_model = chat_with_fallback([
{"role": "user", "content": "Hello!"}
])
print(f"Response from {used_model}: {response.choices[0].message.content}")