Hi Daria,
You’re observing this behavior because OpenAI and Azure OpenAI use the same models (e.g., gpt-4o-mini-realtime-preview) but have different deployment configurations, leading to differences in event behavior.
Below is an example code that returns response.function_call_arguments.delta events using gpt-4o-mini-realtime-preview :
import asyncio
from openai import AsyncAzureOpenAI
from azure.identity.aio import DefaultAzureCredential, get_bearer_token_provider
async def main() -> None:
# Authenticate using Entra ID
credential = DefaultAzureCredential()
token_provider = get_bearer_token_provider(
credential, "https://cognitiveservices.azure.com/.default"
)
# Initialize Azure OpenAI Realtime client
client = AsyncAzureOpenAI(
azure_endpoint="<AI-FOUNDRY-ENDPOINT>",
azure_ad_token_provider=token_provider,
api_version="2024-10-01-preview",
)
# Connect to Realtime API session
async with client.beta.realtime.connect(
model="gpt-4o-mini-realtime-preview"
) as connection:
# Configure session with function/tool definition
await connection.session.update(
session={
"modalities": ["text"],
"tools": [
{
"type": "function",
"name": "get_weather",
"description": "Get the current weather for a location",
"parameters": {
"type": "object",
"properties": {
"location": {
"type": "string",
"description": "City name"
},
"unit": {
"type": "string",
"enum": ["celsius", "fahrenheit"]
}
},
"required": ["location"]
}
}
]
}
)
# Send hardcoded user message that should trigger function call
await connection.conversation.item.create(
item={
"type": "message",
"role": "user",
"content": [
{"type": "input_text", "text": "What's the weather in San Francisco?"}
],
}
)
# Request response
await connection.response.create()
# Stream events - only print function_call_arguments.delta
async for event in connection:
if event.type == "response.function_call_arguments.delta":
print(event.delta, end="", flush=True)
elif event.type == "response.done":
print("\n--- Done ---")
break
await credential.close()
if __name__ == "__main__":
asyncio.run(main())
Below is an example of the events returned by the server when the above script is executed:
SessionCreatedEvent(
event_id='event_001',
session=Session(
id='sess_001',
instructions='[SYSTEM_INSTRUCTIONS]',
model='gpt-4o-mini-realtime-preview-2024-12-17',
modalities=['audio', 'text'],
voice='alloy',
temperature=0.8,
tools=[]
),
type='session.created'
)
SessionUpdatedEvent(
event_id='event_002',
session=Session(
id='sess_001',
modalities=['text'],
tools=[
Tool(
name='get_weather',
description='Get the current weather for a location',
parameters={
'type': 'object',
'properties': {
'location': {'type': 'string', 'description': 'City name'},
'unit': {'type': 'string', 'enum': ['celsius', 'fahrenheit']}
},
'required': ['location']
}
)
]
),
type='session.updated'
)
ConversationItemCreatedEvent(
event_id='event_003',
item=ConversationItem(
id='item_001',
type='message',
role='user',
content=[ConversationItemContent(type='input_text', text="What's the weather in San Francisco?")],
status='completed'
),
type='conversation.item.created'
)
ResponseCreatedEvent(
event_id='event_004',
response=RealtimeResponse(
id='resp_001',
status='in_progress',
output=[]
),
type='response.created'
)
ResponseOutputItemAddedEvent(
event_id='event_005',
response_id='resp_001',
output_index=0,
item=ConversationItem(
id='item_002',
type='function_call',
name='get_weather',
call_id='call_001',
arguments='',
status='in_progress'
),
type='response.output_item.added'
)
ConversationItemCreatedEvent(
event_id='event_006',
previous_item_id='item_001',
item=ConversationItem(
id='item_002',
type='function_call',
name='get_weather',
call_id='call_001',
status='in_progress'
),
type='conversation.item.created'
)
ResponseFunctionCallArgumentsDeltaEvent(
event_id='event_007',
response_id='resp_001',
item_id='item_002',
output_index=0,
call_id='call_001',
delta='{"',
type='response.function_call_arguments.delta'
)
# [Additional delta events for: location, ":", San, Francisco, ", unit, ":", c, elsius, "}]
ResponseFunctionCallArgumentsDoneEvent(
event_id='event_008',
response_id='resp_001',
item_id='item_002',
output_index=0,
call_id='call_001',
name='get_weather',
arguments='{"location":"San Francisco","unit":"celsius"}',
type='response.function_call_arguments.done'
)
ResponseOutputItemDoneEvent(
event_id='event_009',
response_id='resp_001',
output_index=0,
item=ConversationItem(
id='item_002',
type='function_call',
name='get_weather',
call_id='call_001',
arguments='{"location":"San Francisco","unit":"celsius"}',
status='completed'
),
type='response.output_item.done'
)
ResponseDoneEvent(
event_id='event_010',
response=RealtimeResponse(
id='resp_001',
status='completed',
usage=RealtimeResponseUsage(
input_tokens=172,
output_tokens=21,
total_tokens=193
)
),
type='response.done'
)
Also note that:
The server response.text.delta event is returned when the model-generated text is updated.
The server response.function_call_arguments.delta event is returned when the model-generated function call arguments are updated.
Here is relevant documentation:
https://free.blessedness.top/en-us/azure/ai-foundry/openai/realtime-audio-reference#server-events
Feel free to accept this as an answer.
Thankyou for reaching out to The Microsoft QNA Portal.