Skip to content

Commit 8443596

Browse files
committed
added file path params for server whisper API
1 parent a62fdce commit 8443596

File tree

1 file changed

+48
-15
lines changed

1 file changed

+48
-15
lines changed

nexa/gguf/server/nexa_service.py

+48-15
Original file line numberDiff line numberDiff line change
@@ -1720,18 +1720,28 @@ async def process_audio(
17201720
language: Optional[str] = Query(
17211721
None, description="Language code (e.g. 'en', 'fr') for transcription."),
17221722
temperature: Optional[float] = Query(
1723-
0.0, description="Temperature for sampling.")
1723+
0.0, description="Temperature for sampling."),
1724+
tmp_file_dir: Optional[str] = Query(
1725+
None, description="Directory to save temporary audio file. If not provided, uses system temp directory.")
17241726
):
1727+
temp_audio_path = None
17251728
try:
17261729
if not whisper_model:
17271730
raise HTTPException(
17281731
status_code=400,
17291732
detail="Whisper model is not loaded. Please load a Whisper model first."
17301733
)
17311734

1732-
with tempfile.NamedTemporaryFile(delete=False, suffix=os.path.splitext(file.filename)[1]) as temp_audio:
1733-
temp_audio.write(await file.read())
1734-
temp_audio_path = temp_audio.name
1735+
# Modify temp file creation to use custom directory if provided
1736+
if tmp_file_dir:
1737+
os.makedirs(tmp_file_dir, exist_ok=True)
1738+
temp_audio_path = os.path.join(tmp_file_dir, f"temp_{file.filename}")
1739+
with open(temp_audio_path, 'wb') as temp_audio:
1740+
temp_audio.write(await file.read())
1741+
else:
1742+
with tempfile.NamedTemporaryFile(delete=False, suffix=os.path.splitext(file.filename)[1]) as temp_audio:
1743+
temp_audio.write(await file.read())
1744+
temp_audio_path = temp_audio.name
17351745

17361746
# Set up parameters for Whisper or similar model
17371747
task_params = {
@@ -1753,9 +1763,13 @@ async def process_audio(
17531763
raise HTTPException(
17541764
status_code=500, detail=f"Error during {task}: {str(e)}")
17551765
finally:
1756-
if 'temp_audio_path' in locals() and os.path.exists(temp_audio_path):
1757-
os.unlink(temp_audio_path)
1758-
1766+
# Clean up temp file if it was created
1767+
if temp_audio_path and os.path.exists(temp_audio_path):
1768+
try:
1769+
os.unlink(temp_audio_path)
1770+
logging.info(f"Cleaned up temporary file: {temp_audio_path}")
1771+
except Exception as e:
1772+
logging.error(f"Error cleaning up temporary file {temp_audio_path}: {e}")
17591773

17601774
@app.post("/v1/audio/processing_stream", tags=["Audio"])
17611775
async def processing_stream_audio(
@@ -1768,18 +1782,30 @@ async def processing_stream_audio(
17681782
"auto", description="Language code (e.g., 'en', 'fr')"),
17691783
min_chunk: Optional[float] = Query(
17701784
1.0, description="Minimum chunk duration for streaming"),
1785+
tmp_file_dir: Optional[str] = Query(
1786+
None, description="Directory to save temporary audio file. If not provided, uses system memory.")
17711787
):
1788+
temp_audio_path = None
17721789
try:
17731790
if not whisper_model:
17741791
raise HTTPException(
17751792
status_code=400,
17761793
detail="Whisper model is not loaded. Please load a Whisper model first."
17771794
)
17781795

1779-
# Read the entire file into memory
1780-
audio_bytes = await file.read()
1781-
a_full = load_audio_from_bytes(audio_bytes)
1782-
duration = len(a_full) / SAMPLING_RATE
1796+
# Modify audio loading to optionally save to file
1797+
if tmp_file_dir:
1798+
os.makedirs(tmp_file_dir, exist_ok=True)
1799+
temp_audio_path = os.path.join(tmp_file_dir, f"temp_{file.filename}")
1800+
with open(temp_audio_path, 'wb') as temp_audio:
1801+
audio_bytes = await file.read()
1802+
temp_audio.write(audio_bytes)
1803+
# Read the saved file
1804+
a_full = load_audio_from_bytes(audio_bytes)
1805+
else:
1806+
# Original in-memory processing
1807+
audio_bytes = await file.read()
1808+
a_full = load_audio_from_bytes(audio_bytes)
17831809

17841810
# Only include language parameter if task is "transcribe"
17851811
# For "translate", the language is always defined as "en"
@@ -1798,13 +1824,13 @@ async def processing_stream_audio(
17981824

17991825
def stream_generator():
18001826
nonlocal beg
1801-
while beg < duration:
1827+
while beg < len(a_full) / SAMPLING_RATE:
18021828
now = time.time() - start
18031829
if now < beg + min_chunk:
18041830
time.sleep((beg + min_chunk) - now)
18051831
end = time.time() - start
1806-
if end > duration:
1807-
end = duration
1832+
if end > len(a_full) / SAMPLING_RATE:
1833+
end = len(a_full) / SAMPLING_RATE
18081834

18091835
chunk_samples = int((end - beg)*SAMPLING_RATE)
18101836
chunk_audio = a_full[int(
@@ -1839,7 +1865,14 @@ def stream_generator():
18391865
except Exception as e:
18401866
logging.error(f"Error in audio processing stream: {e}")
18411867
raise HTTPException(status_code=500, detail=str(e))
1842-
1868+
finally:
1869+
# Clean up temp file if it was created
1870+
if temp_audio_path and os.path.exists(temp_audio_path):
1871+
try:
1872+
os.unlink(temp_audio_path)
1873+
logging.info(f"Cleaned up temporary file: {temp_audio_path}")
1874+
except Exception as e:
1875+
logging.error(f"Error cleaning up temporary file {temp_audio_path}: {e}")
18431876

18441877
@app.post("/v1/audiolm/chat/completions", tags=["AudioLM"])
18451878
async def audio_chat_completions(

0 commit comments

Comments
 (0)