@@ -1720,18 +1720,28 @@ async def process_audio(
1720
1720
language : Optional [str ] = Query (
1721
1721
None , description = "Language code (e.g. 'en', 'fr') for transcription." ),
1722
1722
temperature : Optional [float ] = Query (
1723
- 0.0 , description = "Temperature for sampling." )
1723
+ 0.0 , description = "Temperature for sampling." ),
1724
+ tmp_file_dir : Optional [str ] = Query (
1725
+ None , description = "Directory to save temporary audio file. If not provided, uses system temp directory." )
1724
1726
):
1727
+ temp_audio_path = None
1725
1728
try :
1726
1729
if not whisper_model :
1727
1730
raise HTTPException (
1728
1731
status_code = 400 ,
1729
1732
detail = "Whisper model is not loaded. Please load a Whisper model first."
1730
1733
)
1731
1734
1732
- with tempfile .NamedTemporaryFile (delete = False , suffix = os .path .splitext (file .filename )[1 ]) as temp_audio :
1733
- temp_audio .write (await file .read ())
1734
- temp_audio_path = temp_audio .name
1735
+ # Modify temp file creation to use custom directory if provided
1736
+ if tmp_file_dir :
1737
+ os .makedirs (tmp_file_dir , exist_ok = True )
1738
+ temp_audio_path = os .path .join (tmp_file_dir , f"temp_{ file .filename } " )
1739
+ with open (temp_audio_path , 'wb' ) as temp_audio :
1740
+ temp_audio .write (await file .read ())
1741
+ else :
1742
+ with tempfile .NamedTemporaryFile (delete = False , suffix = os .path .splitext (file .filename )[1 ]) as temp_audio :
1743
+ temp_audio .write (await file .read ())
1744
+ temp_audio_path = temp_audio .name
1735
1745
1736
1746
# Set up parameters for Whisper or similar model
1737
1747
task_params = {
@@ -1753,9 +1763,13 @@ async def process_audio(
1753
1763
raise HTTPException (
1754
1764
status_code = 500 , detail = f"Error during { task } : { str (e )} " )
1755
1765
finally :
1756
- if 'temp_audio_path' in locals () and os .path .exists (temp_audio_path ):
1757
- os .unlink (temp_audio_path )
1758
-
1766
+ # Clean up temp file if it was created
1767
+ if temp_audio_path and os .path .exists (temp_audio_path ):
1768
+ try :
1769
+ os .unlink (temp_audio_path )
1770
+ logging .info (f"Cleaned up temporary file: { temp_audio_path } " )
1771
+ except Exception as e :
1772
+ logging .error (f"Error cleaning up temporary file { temp_audio_path } : { e } " )
1759
1773
1760
1774
@app .post ("/v1/audio/processing_stream" , tags = ["Audio" ])
1761
1775
async def processing_stream_audio (
@@ -1768,18 +1782,30 @@ async def processing_stream_audio(
1768
1782
"auto" , description = "Language code (e.g., 'en', 'fr')" ),
1769
1783
min_chunk : Optional [float ] = Query (
1770
1784
1.0 , description = "Minimum chunk duration for streaming" ),
1785
+ tmp_file_dir : Optional [str ] = Query (
1786
+ None , description = "Directory to save temporary audio file. If not provided, uses system memory." )
1771
1787
):
1788
+ temp_audio_path = None
1772
1789
try :
1773
1790
if not whisper_model :
1774
1791
raise HTTPException (
1775
1792
status_code = 400 ,
1776
1793
detail = "Whisper model is not loaded. Please load a Whisper model first."
1777
1794
)
1778
1795
1779
- # Read the entire file into memory
1780
- audio_bytes = await file .read ()
1781
- a_full = load_audio_from_bytes (audio_bytes )
1782
- duration = len (a_full ) / SAMPLING_RATE
1796
+ # Modify audio loading to optionally save to file
1797
+ if tmp_file_dir :
1798
+ os .makedirs (tmp_file_dir , exist_ok = True )
1799
+ temp_audio_path = os .path .join (tmp_file_dir , f"temp_{ file .filename } " )
1800
+ with open (temp_audio_path , 'wb' ) as temp_audio :
1801
+ audio_bytes = await file .read ()
1802
+ temp_audio .write (audio_bytes )
1803
+ # Read the saved file
1804
+ a_full = load_audio_from_bytes (audio_bytes )
1805
+ else :
1806
+ # Original in-memory processing
1807
+ audio_bytes = await file .read ()
1808
+ a_full = load_audio_from_bytes (audio_bytes )
1783
1809
1784
1810
# Only include language parameter if task is "transcribe"
1785
1811
# For "translate", the language is always defined as "en"
@@ -1798,13 +1824,13 @@ async def processing_stream_audio(
1798
1824
1799
1825
def stream_generator ():
1800
1826
nonlocal beg
1801
- while beg < duration :
1827
+ while beg < len ( a_full ) / SAMPLING_RATE :
1802
1828
now = time .time () - start
1803
1829
if now < beg + min_chunk :
1804
1830
time .sleep ((beg + min_chunk ) - now )
1805
1831
end = time .time () - start
1806
- if end > duration :
1807
- end = duration
1832
+ if end > len ( a_full ) / SAMPLING_RATE :
1833
+ end = len ( a_full ) / SAMPLING_RATE
1808
1834
1809
1835
chunk_samples = int ((end - beg )* SAMPLING_RATE )
1810
1836
chunk_audio = a_full [int (
@@ -1839,7 +1865,14 @@ def stream_generator():
1839
1865
except Exception as e :
1840
1866
logging .error (f"Error in audio processing stream: { e } " )
1841
1867
raise HTTPException (status_code = 500 , detail = str (e ))
1842
-
1868
+ finally :
1869
+ # Clean up temp file if it was created
1870
+ if temp_audio_path and os .path .exists (temp_audio_path ):
1871
+ try :
1872
+ os .unlink (temp_audio_path )
1873
+ logging .info (f"Cleaned up temporary file: { temp_audio_path } " )
1874
+ except Exception as e :
1875
+ logging .error (f"Error cleaning up temporary file { temp_audio_path } : { e } " )
1843
1876
1844
1877
@app .post ("/v1/audiolm/chat/completions" , tags = ["AudioLM" ])
1845
1878
async def audio_chat_completions (
0 commit comments