-
Notifications
You must be signed in to change notification settings - Fork 89
Demo Branch #111
New issue
Have a question about this project? # for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “#”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? # to your account
Draft
jamesrochabrun
wants to merge
11
commits into
main
Choose a base branch
from
jroch-lz-realtime-api
base: main
Could not load branches
Branch not found: {{ refName }}
Loading
Could not load tags
Nothing to show
Loading
Are you sure you want to change the base?
Some commits from the old base branch may be removed from the timeline,
and old review comments may become outdated.
Draft
Demo Branch #111
Changes from all commits
Commits
Show all changes
11 commits
Select commit
Hold shift + click to select a range
f70c968
Udpating shared items
jamesrochabrun 04fd51e
Adding support for OpenAIAPI endpoint
jamesrochabrun d0a2e8b
Adding example usage
jamesrochabrun c7aca64
Adding Audio buffer
jamesrochabrun 41b1958
Fixing errors
jamesrochabrun ac43175
Adding demo in list
jamesrochabrun 800343c
debugging
jamesrochabrun 021d848
fix for sheet navigation
jamesrochabrun 5eb50ba
Updated with main
jamesrochabrun dbee7ab
Adding more logs
jamesrochabrun d001f59
fix
jamesrochabrun File filter
Filter by extension
Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
There are no files selected for viewing
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
104 changes: 104 additions & 0 deletions
104
Examples/SwiftOpenAIExample/SwiftOpenAIExample/RealTimeAPIDemo/RealTimeAPIDemoView.swift
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,104 @@ | ||
// | ||
// RealTimeAPIDemoView.swift | ||
// SwiftOpenAIExample | ||
// | ||
// Created by James Rochabrun on 1/18/25. | ||
// | ||
|
||
import SwiftUI | ||
import AVFoundation | ||
import SwiftOpenAI | ||
|
||
struct RealTimeAPIDemoView: View { | ||
|
||
@State private var realTimeAPIViewModel: RealTimeAPIViewModel | ||
@State private var microphonePermission: AVAudioSession.RecordPermission | ||
|
||
init(service: OpenAIService) { | ||
realTimeAPIViewModel = .init(service: service) | ||
// TODO: Update this with latest API. | ||
_microphonePermission = State(initialValue: AVAudioSession.sharedInstance().recordPermission) | ||
} | ||
|
||
var body: some View { | ||
Group { | ||
switch microphonePermission { | ||
case .undetermined: | ||
requestPermissionButton | ||
case .denied: | ||
deniedPermissionView | ||
case .granted: | ||
actionButtons | ||
default: | ||
Text("Unknown permission state") | ||
} | ||
} | ||
.onAppear { | ||
updateMicrophonePermission() | ||
} | ||
} | ||
|
||
private var actionButtons: some View { | ||
VStack(spacing: 40) { | ||
startSessionButton | ||
endSessionButton | ||
} | ||
} | ||
|
||
private var startSessionButton: some View { | ||
Button { | ||
Task { | ||
await realTimeAPIViewModel.testOpenAIRealtime() | ||
} | ||
} label: { | ||
Label("Start session", systemImage: "microphone") | ||
} | ||
} | ||
|
||
public var endSessionButton: some View { | ||
Button { | ||
Task { | ||
await realTimeAPIViewModel.disconnect() | ||
} | ||
} label: { | ||
Label("Stop session", systemImage: "stop") | ||
} | ||
} | ||
|
||
private var requestPermissionButton: some View { | ||
Button { | ||
requestMicrophonePermission() | ||
} label: { | ||
Label("Allow microphone access", systemImage: "mic.slash") | ||
} | ||
} | ||
|
||
private var deniedPermissionView: some View { | ||
VStack(spacing: 12) { | ||
Image(systemName: "mic.slash.circle") | ||
.font(.largeTitle) | ||
.foregroundColor(.red) | ||
|
||
Text("Microphone access is required") | ||
.font(.headline) | ||
|
||
Button("Open Settings") { | ||
if let settingsUrl = URL(string: UIApplication.openSettingsURLString) { | ||
UIApplication.shared.open(settingsUrl) | ||
} | ||
} | ||
} | ||
} | ||
|
||
private func updateMicrophonePermission() { | ||
microphonePermission = AVAudioSession.sharedInstance().recordPermission | ||
} | ||
|
||
private func requestMicrophonePermission() { | ||
AVAudioSession.sharedInstance().requestRecordPermission { granted in | ||
DispatchQueue.main.async { | ||
microphonePermission = granted ? .granted : .denied | ||
} | ||
} | ||
} | ||
} |
101 changes: 101 additions & 0 deletions
101
Examples/SwiftOpenAIExample/SwiftOpenAIExample/RealTimeAPIDemo/RealTimeAPIViewModel.swift
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,101 @@ | ||
// | ||
// RealTimeAPIViewModel.swift | ||
// SwiftOpenAI | ||
// | ||
// Created by James Rochabrun on 1/18/25. | ||
// | ||
|
||
import AVFoundation | ||
import Foundation | ||
import SwiftOpenAI | ||
|
||
@Observable | ||
final class RealTimeAPIViewModel { | ||
|
||
let service: OpenAIService | ||
|
||
init(service: OpenAIService) { | ||
self.service = service | ||
} | ||
|
||
var kMicrophoneSampleVendor: MicrophonePCMSampleVendor? | ||
var kRealtimeSession: OpenAIRealtimeSession? | ||
|
||
@RealtimeActor | ||
func disconnect() { | ||
kRealtimeSession?.disconnect() | ||
} | ||
|
||
@RealtimeActor | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. @lzell do you mind taking a look in case on top of your head you think my web socket gets disconnected? I am a bit lost on this one :/ There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Isn't that fix amazing :) |
||
func testOpenAIRealtime() async { | ||
let sessionConfiguration = OpenAIRealtimeSessionUpdate.SessionConfiguration( | ||
inputAudioFormat: "pcm16", | ||
inputAudioTranscription: .init(model: "whisper-1"), | ||
instructions: "You are tour guide for Monument Valley, Utah", | ||
maxResponseOutputTokens: .int(4096), | ||
modalities: ["audio", "text"], | ||
outputAudioFormat: "pcm16", | ||
temperature: 0.7, | ||
turnDetection: .init(prefixPaddingMs: 200, silenceDurationMs: 500, threshold: 0.5), | ||
voice: "shimmer" | ||
) | ||
|
||
let microphoneSampleVendor = MicrophonePCMSampleVendor() | ||
let audioStream: AsyncStream<AVAudioPCMBuffer> | ||
do { | ||
audioStream = try microphoneSampleVendor.start(useVoiceProcessing: true) | ||
} catch { | ||
fatalError("Could not start audio stream: \(error.localizedDescription)") | ||
} | ||
|
||
let realtimeSession: OpenAIRealtimeSession | ||
do { | ||
realtimeSession = try await service.realTimeSession( | ||
sessionConfiguration: sessionConfiguration | ||
) | ||
} catch { | ||
fatalError("Could not create an OpenAI realtime session") | ||
} | ||
|
||
var isOpenAIReadyForAudio = false | ||
Task { | ||
for await buffer in audioStream { | ||
if isOpenAIReadyForAudio, let base64Audio = AudioUtils.base64EncodeAudioPCMBuffer(from: buffer) { | ||
try await realtimeSession.sendMessage( | ||
OpenAIRealtimeInputAudioBufferAppend(audio: base64Audio) | ||
) | ||
} | ||
} | ||
print("Done streaming microphone audio") | ||
} | ||
|
||
Task { | ||
do { | ||
print("Sending response create") | ||
try await realtimeSession.sendMessage(OpenAIRealtimeResponseCreate()) | ||
} catch { | ||
print("Could not send the session configuration instructions") | ||
} | ||
} | ||
|
||
Task { | ||
for await message in realtimeSession.receiver { | ||
switch message { | ||
case .sessionUpdated: | ||
isOpenAIReadyForAudio = true | ||
case .responseAudioDelta(let base64Audio): | ||
InternalAudioPlayer.playPCM16Audio(from: base64Audio) | ||
default: | ||
break | ||
} | ||
} | ||
print("Done listening for messages from OpenAI") | ||
} | ||
|
||
// Some time later | ||
// microphoneSampleVendor.stop() | ||
|
||
kMicrophoneSampleVendor = microphoneSampleVendor | ||
kRealtimeSession = realtimeSession | ||
} | ||
} |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Oops, something went wrong.
Add this suggestion to a batch that can be applied as a single commit.
This suggestion is invalid because no changes were made to the code.
Suggestions cannot be applied while the pull request is closed.
Suggestions cannot be applied while viewing a subset of changes.
Only one suggestion per line can be applied in a batch.
Add this suggestion to a batch that can be applied as a single commit.
Applying suggestions on deleted lines is not supported.
You must change the existing code in this line in order to create a valid suggestion.
Outdated suggestions cannot be applied.
This suggestion has been applied or marked resolved.
Suggestions cannot be applied from pending reviews.
Suggestions cannot be applied on multi-line comments.
Suggestions cannot be applied while the pull request is queued to merge.
Suggestion cannot be applied right now. Please check back later.
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
RealTimeAPIViewModel and RealTimeAPIDemoView is how i try to test this. All the code has been copied from demo branch