Skip to content

Demo Branch #111

New issue

Have a question about this project? # for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “#”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? # to your account

Draft
wants to merge 11 commits into
base: main
Choose a base branch
from
Draft
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Original file line number Diff line number Diff line change
Expand Up @@ -70,6 +70,8 @@
7BBE7EDE2B03718E0096A693 /* ChatFunctionCallProvider.swift in Sources */ = {isa = PBXBuildFile; fileRef = 7BBE7EDD2B03718E0096A693 /* ChatFunctionCallProvider.swift */; };
7BBE7EE02B0372550096A693 /* ChatFunctionCallDemoView.swift in Sources */ = {isa = PBXBuildFile; fileRef = 7BBE7EDF2B0372550096A693 /* ChatFunctionCallDemoView.swift */; };
7BE802592D2878170080E06A /* ChatPredictedOutputDemoView.swift in Sources */ = {isa = PBXBuildFile; fileRef = 7BE802582D2878170080E06A /* ChatPredictedOutputDemoView.swift */; };
7BE802B02D3CD1A60080E06A /* RealTimeAPIDemoView.swift in Sources */ = {isa = PBXBuildFile; fileRef = 7BE802AF2D3CD1A60080E06A /* RealTimeAPIDemoView.swift */; };
7BE802B22D3CD4600080E06A /* RealTimeAPIViewModel.swift in Sources */ = {isa = PBXBuildFile; fileRef = 7BE802B12D3CD4600080E06A /* RealTimeAPIViewModel.swift */; };
7BE9A5AF2B0B33E600CE8103 /* SwiftOpenAIExampleTests.swift in Sources */ = {isa = PBXBuildFile; fileRef = 7BA788DE2AE23A49008825D5 /* SwiftOpenAIExampleTests.swift */; };
/* End PBXBuildFile section */

Expand Down Expand Up @@ -158,6 +160,8 @@
7BBE7EDD2B03718E0096A693 /* ChatFunctionCallProvider.swift */ = {isa = PBXFileReference; lastKnownFileType = sourcecode.swift; path = ChatFunctionCallProvider.swift; sourceTree = "<group>"; };
7BBE7EDF2B0372550096A693 /* ChatFunctionCallDemoView.swift */ = {isa = PBXFileReference; lastKnownFileType = sourcecode.swift; path = ChatFunctionCallDemoView.swift; sourceTree = "<group>"; };
7BE802582D2878170080E06A /* ChatPredictedOutputDemoView.swift */ = {isa = PBXFileReference; lastKnownFileType = sourcecode.swift; path = ChatPredictedOutputDemoView.swift; sourceTree = "<group>"; };
7BE802AF2D3CD1A60080E06A /* RealTimeAPIDemoView.swift */ = {isa = PBXFileReference; lastKnownFileType = sourcecode.swift; path = RealTimeAPIDemoView.swift; sourceTree = "<group>"; };
7BE802B12D3CD4600080E06A /* RealTimeAPIViewModel.swift */ = {isa = PBXFileReference; lastKnownFileType = sourcecode.swift; path = RealTimeAPIViewModel.swift; sourceTree = "<group>"; };
/* End PBXFileReference section */

/* Begin PBXFrameworksBuildPhase section */
Expand Down Expand Up @@ -380,6 +384,7 @@
isa = PBXGroup;
children = (
7BA788CC2AE23A48008825D5 /* SwiftOpenAIExampleApp.swift */,
7BE802AE2D3CD15B0080E06A /* RealTimeAPIDemo */,
7BE802572D2877D30080E06A /* PredictedOutputsDemo */,
7B50DD292C2A9D1D0070A64D /* LocalChatDemo */,
7B99C2E52C0718CD00E701B3 /* Files */,
Expand Down Expand Up @@ -491,6 +496,15 @@
path = PredictedOutputsDemo;
sourceTree = "<group>";
};
7BE802AE2D3CD15B0080E06A /* RealTimeAPIDemo */ = {
isa = PBXGroup;
children = (
7BE802AF2D3CD1A60080E06A /* RealTimeAPIDemoView.swift */,
7BE802B12D3CD4600080E06A /* RealTimeAPIViewModel.swift */,
);
path = RealTimeAPIDemo;
sourceTree = "<group>";
};
/* End PBXGroup section */

/* Begin PBXNativeTarget section */
Expand Down Expand Up @@ -631,6 +645,7 @@
buildActionMask = 2147483647;
files = (
7BBE7EA92B02E8E50096A693 /* ChatMessageView.swift in Sources */,
7BE802B02D3CD1A60080E06A /* RealTimeAPIDemoView.swift in Sources */,
7BE802592D2878170080E06A /* ChatPredictedOutputDemoView.swift in Sources */,
7B7239AE2AF9FF0000646679 /* ChatFunctionsCallStreamProvider.swift in Sources */,
7B436BA12AE25958003CE281 /* ChatProvider.swift in Sources */,
Expand All @@ -644,6 +659,7 @@
7B7239A22AF6260D00646679 /* ChatDisplayMessage.swift in Sources */,
0DF957862BB543F100DD2013 /* AIProxyIntroView.swift in Sources */,
7B1268052B08246400400694 /* AssistantConfigurationDemoView.swift in Sources */,
7BE802B22D3CD4600080E06A /* RealTimeAPIViewModel.swift in Sources */,
7B436BB72AE7A2EA003CE281 /* ImagesProvider.swift in Sources */,
7B436B962AE24A04003CE281 /* OptionsListView.swift in Sources */,
7BBE7EDE2B03718E0096A693 /* ChatFunctionCallProvider.swift in Sources */,
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -35,57 +35,59 @@ struct OptionsListView: View {
case chatStructuredOutput = "Chat Structured Output"
case chatStructuredOutputTool = "Chat Structured Output Tools"
case configureAssistant = "Configure Assistant"
case realTimeAPI = "Real time API"

var id: String { rawValue }
}

var body: some View {
List(options, id: \.self, selection: $selection) { option in
Text(option.rawValue)
.sheet(item: $selection) { selection in
VStack {
Text(selection.rawValue)
.font(.largeTitle)
.padding()
switch selection {
case .audio:
AudioDemoView(service: openAIService)
case .chat:
ChatDemoView(service: openAIService)
case .chatPredictedOutput:
ChatPredictedOutputDemoView(service: openAIService)
case .vision:
ChatVisionDemoView(service: openAIService)
case .embeddings:
EmbeddingsDemoView(service: openAIService)
case .fineTuning:
FineTuningJobDemoView(service: openAIService)
case .files:
FilesDemoView(service: openAIService)
case .images:
ImagesDemoView(service: openAIService)
case .localChat:
LocalChatDemoView(service: openAIService)
case .models:
ModelsDemoView(service: openAIService)
case .moderations:
ModerationDemoView(service: openAIService)
case .chatHistoryConversation:
ChatStreamFluidConversationDemoView(service: openAIService)
case .chatFunctionCall:
ChatFunctionCallDemoView(service: openAIService)
case .chatFunctionsCallStream:
ChatFunctionsCalllStreamDemoView(service: openAIService)
case .chatStructuredOutput:
ChatStructuredOutputDemoView(service: openAIService)
case .chatStructuredOutputTool:
ChatStructureOutputToolDemoView(service: openAIService)
case .configureAssistant:
AssistantConfigurationDemoView(service: openAIService)
}
}
}
.sheet(item: $selection) { selection in
VStack {
Text(selection.rawValue)
.font(.largeTitle)
.padding()
switch selection {
case .audio:
AudioDemoView(service: openAIService)
case .chat:
ChatDemoView(service: openAIService)
case .chatPredictedOutput:
ChatPredictedOutputDemoView(service: openAIService)
case .vision:
ChatVisionDemoView(service: openAIService)
case .embeddings:
EmbeddingsDemoView(service: openAIService)
case .fineTuning:
FineTuningJobDemoView(service: openAIService)
case .files:
FilesDemoView(service: openAIService)
case .images:
ImagesDemoView(service: openAIService)
case .localChat:
LocalChatDemoView(service: openAIService)
case .models:
ModelsDemoView(service: openAIService)
case .moderations:
ModerationDemoView(service: openAIService)
case .chatHistoryConversation:
ChatStreamFluidConversationDemoView(service: openAIService)
case .chatFunctionCall:
ChatFunctionCallDemoView(service: openAIService)
case .chatFunctionsCallStream:
ChatFunctionsCalllStreamDemoView(service: openAIService)
case .chatStructuredOutput:
ChatStructuredOutputDemoView(service: openAIService)
case .chatStructuredOutputTool:
ChatStructureOutputToolDemoView(service: openAIService)
case .configureAssistant:
AssistantConfigurationDemoView(service: openAIService)
case .realTimeAPI:
RealTimeAPIDemoView(service: openAIService)
}
}
}
}
}

Original file line number Diff line number Diff line change
@@ -0,0 +1,104 @@
//
// RealTimeAPIDemoView.swift
// SwiftOpenAIExample
//
// Created by James Rochabrun on 1/18/25.
//

import SwiftUI
import AVFoundation
import SwiftOpenAI

struct RealTimeAPIDemoView: View {

@State private var realTimeAPIViewModel: RealTimeAPIViewModel
@State private var microphonePermission: AVAudioSession.RecordPermission

init(service: OpenAIService) {
realTimeAPIViewModel = .init(service: service)
// TODO: Update this with latest API.
_microphonePermission = State(initialValue: AVAudioSession.sharedInstance().recordPermission)
}

var body: some View {
Group {
switch microphonePermission {
case .undetermined:
requestPermissionButton
case .denied:
deniedPermissionView
case .granted:
actionButtons
default:
Text("Unknown permission state")
}
}
.onAppear {
updateMicrophonePermission()
}
}

private var actionButtons: some View {
VStack(spacing: 40) {
startSessionButton
endSessionButton
}
}

private var startSessionButton: some View {
Button {
Task {
await realTimeAPIViewModel.testOpenAIRealtime()
}
} label: {
Label("Start session", systemImage: "microphone")
}
}

public var endSessionButton: some View {
Button {
Task {
await realTimeAPIViewModel.disconnect()
}
} label: {
Label("Stop session", systemImage: "stop")
}
}

private var requestPermissionButton: some View {
Button {
requestMicrophonePermission()
} label: {
Label("Allow microphone access", systemImage: "mic.slash")
}
}

private var deniedPermissionView: some View {
VStack(spacing: 12) {
Image(systemName: "mic.slash.circle")
.font(.largeTitle)
.foregroundColor(.red)

Text("Microphone access is required")
.font(.headline)

Button("Open Settings") {
if let settingsUrl = URL(string: UIApplication.openSettingsURLString) {
UIApplication.shared.open(settingsUrl)
}
}
}
}

private func updateMicrophonePermission() {
microphonePermission = AVAudioSession.sharedInstance().recordPermission
}

private func requestMicrophonePermission() {
AVAudioSession.sharedInstance().requestRecordPermission { granted in
DispatchQueue.main.async {
microphonePermission = granted ? .granted : .denied
}
}
}
}
Original file line number Diff line number Diff line change
@@ -0,0 +1,101 @@
//
// RealTimeAPIViewModel.swift
// SwiftOpenAI
//
// Created by James Rochabrun on 1/18/25.
//

import AVFoundation
import Foundation
import SwiftOpenAI

Copy link
Owner Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

RealTimeAPIViewModel and RealTimeAPIDemoView is how i try to test this. All the code has been copied from demo branch

@Observable
final class RealTimeAPIViewModel {

let service: OpenAIService

init(service: OpenAIService) {
self.service = service
}

var kMicrophoneSampleVendor: MicrophonePCMSampleVendor?
var kRealtimeSession: OpenAIRealtimeSession?

@RealtimeActor
func disconnect() {
kRealtimeSession?.disconnect()
}

@RealtimeActor
Copy link
Owner Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

@lzell do you mind taking a look in case on top of your head you think my web socket gets disconnected? I am a bit lost on this one :/

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Isn't that fix amazing :)

func testOpenAIRealtime() async {
let sessionConfiguration = OpenAIRealtimeSessionUpdate.SessionConfiguration(
inputAudioFormat: "pcm16",
inputAudioTranscription: .init(model: "whisper-1"),
instructions: "You are tour guide for Monument Valley, Utah",
maxResponseOutputTokens: .int(4096),
modalities: ["audio", "text"],
outputAudioFormat: "pcm16",
temperature: 0.7,
turnDetection: .init(prefixPaddingMs: 200, silenceDurationMs: 500, threshold: 0.5),
voice: "shimmer"
)

let microphoneSampleVendor = MicrophonePCMSampleVendor()
let audioStream: AsyncStream<AVAudioPCMBuffer>
do {
audioStream = try microphoneSampleVendor.start(useVoiceProcessing: true)
} catch {
fatalError("Could not start audio stream: \(error.localizedDescription)")
}

let realtimeSession: OpenAIRealtimeSession
do {
realtimeSession = try await service.realTimeSession(
sessionConfiguration: sessionConfiguration
)
} catch {
fatalError("Could not create an OpenAI realtime session")
}

var isOpenAIReadyForAudio = false
Task {
for await buffer in audioStream {
if isOpenAIReadyForAudio, let base64Audio = AudioUtils.base64EncodeAudioPCMBuffer(from: buffer) {
try await realtimeSession.sendMessage(
OpenAIRealtimeInputAudioBufferAppend(audio: base64Audio)
)
}
}
print("Done streaming microphone audio")
}

Task {
do {
print("Sending response create")
try await realtimeSession.sendMessage(OpenAIRealtimeResponseCreate())
} catch {
print("Could not send the session configuration instructions")
}
}

Task {
for await message in realtimeSession.receiver {
switch message {
case .sessionUpdated:
isOpenAIReadyForAudio = true
case .responseAudioDelta(let base64Audio):
InternalAudioPlayer.playPCM16Audio(from: base64Audio)
default:
break
}
}
print("Done listening for messages from OpenAI")
}

// Some time later
// microphoneSampleVendor.stop()

kMicrophoneSampleVendor = microphoneSampleVendor
kRealtimeSession = realtimeSession
}
}
19 changes: 19 additions & 0 deletions Sources/OpenAI/AIProxy/AIProxyService.swift
Original file line number Diff line number Diff line change
Expand Up @@ -823,6 +823,25 @@ struct AIProxyService: OpenAIService {
let request = try await OpenAIAPI.vectorStoreFileBatch(.list(vectorStoreID: vectorStoreID, batchID: batchID)).request(aiproxyPartialKey: partialKey, clientID: clientID, organizationID: organizationID, openAIEnvironment: openAIEnvironment, method: .get, queryItems: queryItems, betaHeaderField: Self.assistantsBetaV2)
return try await fetch(debugEnabled: debugEnabled, type: OpenAIResponse<VectorStoreFileObject>.self, with: request)
}

func realTimeSession(
sessionConfiguration: OpenAIRealtimeSessionUpdate.SessionConfiguration)
async throws -> OpenAIRealtimeSession
{

let request = try await OpenAIAPI.realTime(.realtime).request(
aiproxyPartialKey: partialKey,
clientID: clientID,
organizationID: organizationID,
openAIEnvironment: openAIEnvironment,
method: .get,
queryItems: [.init(name: "model", value: "gpt-4o-mini-realtime-preview-2024-12-17")],
betaHeaderField: "realtime=v1")
return await OpenAIRealtimeSession(
webSocketTask: self.session.webSocketTask(with: request),
sessionConfiguration: sessionConfiguration
)
}
}


Loading