argmaxinc · 1amageek · Oct 3, 2024 · Oct 3, 2024 · Oct 4, 2024 · Oct 5, 2024
diff --git a/Examples/WhisperAX/WhisperAX.xcodeproj/project.pbxproj b/Examples/WhisperAX/WhisperAX.xcodeproj/project.pbxproj
@@ -869,7 +869,7 @@
 				CURRENT_PROJECT_VERSION = 1;
 				DEAD_CODE_STRIPPING = YES;
 				DEVELOPMENT_ASSET_PATHS = "\"WhisperAX/Preview Content\"";
-				DEVELOPMENT_TEAM = PP83DTRKSA;
+				DEVELOPMENT_TEAM = 88ACA86N96;
 				ENABLE_HARDENED_RUNTIME = YES;
 				ENABLE_PREVIEWS = YES;
 				GENERATE_INFOPLIST_FILE = YES;

diff --git a/...s/WhisperAX/WhisperAX.xcodeproj/project.xcworkspace/xcshareddata/swiftpm/Package.resolved b/...s/WhisperAX/WhisperAX.xcodeproj/project.xcworkspace/xcshareddata/swiftpm/Package.resolved
@@ -1,6 +1,15 @@
 {
-  "originHash" : "cd17206b47bb810af9459722192530e3838d8e6629a970988e32a432aaa05f6e",
+  "originHash" : "420a1723357da21f9e31b01403fd3d66df6e400a752d242d05b2c3d5667e3c33",
   "pins" : [
+    {
+      "identity" : "jinja",
+      "kind" : "remoteSourceControl",
+      "location" : "https://github.com/maiqingqiang/Jinja",
+      "state" : {
+        "revision" : "b435eb62b0d3d5f34167ec70a128355486981712",
+        "version" : "1.0.5"
+      }
+    },
     {
       "identity" : "networkimage",
       "kind" : "remoteSourceControl",
@@ -15,26 +24,26 @@
       "kind" : "remoteSourceControl",
       "location" : "https://github.com/apple/swift-argument-parser.git",
       "state" : {
-        "revision" : "c8ed701b513cf5177118a175d85fbbbcd707ab41",
-        "version" : "1.3.0"
+        "revision" : "41982a3656a71c768319979febd796c6fd111d5c",
+        "version" : "1.5.0"
       }
     },
     {
       "identity" : "swift-markdown-ui",
       "kind" : "remoteSourceControl",
       "location" : "https://github.com/gonzalezreal/swift-markdown-ui.git",
       "state" : {
-        "revision" : "ae799d015a5374708f7b4c85f3294c05f2a564e2",
-        "version" : "2.3.0"
+        "revision" : "55441810c0f678c78ed7e2ebd46dde89228e02fc",
+        "version" : "2.4.0"
       }
     },
     {
       "identity" : "swift-transformers",
       "kind" : "remoteSourceControl",
       "location" : "https://github.com/huggingface/swift-transformers.git",
       "state" : {
-        "revision" : "74b94211bdc741694ed7e700a1104c72e5ba68fe",
-        "version" : "0.1.7"
+        "revision" : "0f2306713d48a75b862026ebb291926793773f52",
+        "version" : "0.1.12"
       }
     }
   ],

diff --git a/Examples/WhisperAX/WhisperAX/Views/ContentView.swift b/Examples/WhisperAX/WhisperAX/Views/ContentView.swift
@@ -1206,9 +1206,10 @@ struct ContentView: View {
                 #endif
 
                 try? audioProcessor.startRecordingLive(inputDeviceID: deviceId) { _ in
-                    DispatchQueue.main.async {
-                        bufferEnergy = whisperKit?.audioProcessor.relativeEnergy ?? []
-                        bufferSeconds = Double(whisperKit?.audioProcessor.audioSamples.count ?? 0) / Double(WhisperKit.sampleRate)
+                    Task { @MainActor in
+                        bufferEnergy = await whisperKit?.audioProcessor.getRelativeEnergy() ?? []
+                        let audioSamples = await whisperKit?.audioProcessor.getAudioSamples() ?? []
+                        bufferSeconds = Double(audioSamples.count) / Double(WhisperKit.sampleRate)
                     }
                 }
 
@@ -1406,7 +1407,7 @@ struct ContentView: View {
         guard let whisperKit = whisperKit else { return }
 
         // Retrieve the current audio buffer from the audio processor
-        let currentBuffer = whisperKit.audioProcessor.audioSamples
+        let currentBuffer = whisperKit.audioProcessor.getAudioSamples()
 
         // Calculate the size and duration of the next buffer segment
         let nextBufferSize = currentBuffer.count - lastBufferSize
@@ -1424,8 +1425,9 @@ struct ContentView: View {
         }
 
         if useVAD {
+            let relativeEnergy = whisperKit.audioProcessor.getRelativeEnergy()
             let voiceDetected = AudioProcessor.isVoiceDetected(
-                in: whisperKit.audioProcessor.relativeEnergy,
+                in: relativeEnergy,
                 nextBufferInSeconds: nextBufferSeconds,
                 silenceThreshold: Float(silenceThreshold)
             )

diff --git a/Package.resolved b/Package.resolved
@@ -1,21 +1,30 @@
 {
   "pins" : [
+    {
+      "identity" : "jinja",
+      "kind" : "remoteSourceControl",
+      "location" : "https://github.com/maiqingqiang/Jinja",
+      "state" : {
+        "revision" : "4ffa95ce02e013c992287e19e3bbd620b6cc233a",
+        "version" : "1.0.4"
+      }
+    },
     {
       "identity" : "swift-argument-parser",
       "kind" : "remoteSourceControl",
       "location" : "https://github.com/apple/swift-argument-parser.git",
       "state" : {
-        "revision" : "c8ed701b513cf5177118a175d85fbbbcd707ab41",
-        "version" : "1.3.0"
+        "revision" : "41982a3656a71c768319979febd796c6fd111d5c",
+        "version" : "1.5.0"
       }
     },
     {
       "identity" : "swift-transformers",
       "kind" : "remoteSourceControl",
       "location" : "https://github.com/huggingface/swift-transformers.git",
       "state" : {
-        "revision" : "74b94211bdc741694ed7e700a1104c72e5ba68fe",
-        "version" : "0.1.7"
+        "revision" : "0f2306713d48a75b862026ebb291926793773f52",
+        "version" : "0.1.12"
       }
     }
   ],

diff --git a/Package.swift b/Package.swift
@@ -20,8 +20,8 @@ let package = Package(
         ),
     ],
     dependencies: [
-        .package(url: "https://github.com/huggingface/swift-transformers.git", exact: "0.1.7"),
-        .package(url: "https://github.com/apple/swift-argument-parser.git", exact: "1.3.0"),
+        .package(url: "https://github.com/huggingface/swift-transformers.git", exact: "0.1.12"),
+        .package(url: "https://github.com/apple/swift-argument-parser.git", exact: "1.5.0"),
     ],
     targets: [
         .target(

diff --git a/Sources/WhisperKit/Core/Audio/AudioChunker.swift b/Sources/WhisperKit/Core/Audio/AudioChunker.swift
@@ -43,12 +43,12 @@ public extension AudioChunking {
 
 /// A audio chunker that splits audio into smaller pieces based on voice activity detection
 @available(macOS 13, iOS 16, watchOS 10, visionOS 1, *)
-open class VADAudioChunker: AudioChunking {
+public struct VADAudioChunker: AudioChunking {
     /// prevent hallucinations at the end of the clip by stopping up to 1.0s early
     private let windowPadding: Int
-    private let vad: VoiceActivityDetector
+    private let vad: any VoiceActivityDetectable
 
-    public init(windowPadding: Int = 16000, vad: VoiceActivityDetector? = nil) {
+    public init(windowPadding: Int = 16000, vad: (any VoiceActivityDetectable)? = nil) {
         self.windowPadding = windowPadding
         self.vad = vad ?? EnergyVAD()
     }
@@ -81,12 +81,12 @@ open class VADAudioChunker: AudioChunking {
             // Typically this will be the full audio file, unless seek points are explicitly provided
             var startIndex = seekClipStart
             while startIndex < seekClipEnd - windowPadding {
-                let currentFrameLength = startIndex - seekClipStart
-                if startIndex >= currentFrameLength, startIndex < 0 {
+                // 配列範囲内にあるかチェック
+                if startIndex >= audioArray.count || startIndex < 0 {
                     throw WhisperError.audioProcessingFailed("startIndex is outside the buffer size")
                 }
 
-                // Make sure we still need chunking for this seek clip, otherwise use the original seek clip end
+                // Adjust the end index based on VAD or maxChunkLength
                 var endIndex = seekClipEnd
                 if startIndex + maxChunkLength < endIndex {
                     // Adjust the end index based on VAD
@@ -97,6 +97,8 @@ open class VADAudioChunker: AudioChunking {
                     )
                 }
 
+                // Ensure endIndex is within the array bounds
+                endIndex = min(endIndex, audioArray.count)
                 guard endIndex > startIndex else {
                     break
                 }
@@ -108,4 +110,5 @@ open class VADAudioChunker: AudioChunking {
         }
         return chunkedAudio
     }
+
 }