Viewing File: /home/ubuntu/codegamaai-test/tts_openvoice/app.py
# Description: This file is used to run the main app of OpenVoice. It is used to generate audio files from text input.
# =====================================================================================
import json
import os
import threading
import uuid
import warnings
import torch
from fastapi import (Depends, FastAPI, File, Form, HTTPException, Request,
UploadFile)
from typing import List
from pydantic import BaseModel, HttpUrl
from fastapi.staticfiles import StaticFiles
from src.s3_download import download_file_from_s3, direct_download
from main_tts import *
from main_voice_style import *
warnings.filterwarnings("ignore")
app = FastAPI()
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
# Get the current working directory
current_directory = os.getcwd()
print(f"Current working directory: {current_directory}")
app = FastAPI()
app.mount("/static", StaticFiles(directory=current_directory), name="static")
class AudioPayload(BaseModel):
user_id: str
vid: str
audio_files: List[HttpUrl]
download_type: str
# Save Reference Audio
@app.post("/upload_audio/")
async def download_audios(payload: AudioPayload):
directory = os.path.join(os.environ['dataset_store'], payload.user_id, payload.vid, "audio")
if not os.path.exists(directory):
os.makedirs(directory)
for url in payload.audio_files:
try:
if payload.download_type == 's3':
download_file_from_s3(url, directory)
print(f"Downloaded and saved {url} to {directory}")
else:
direct_download(url, directory)
print(f"Downloaded and saved {url} to {directory}")
return {"status": 1, "message": "Files downloaded successfully", "directory": directory}
except Exception as e:
print(f"Failed to download {url}: {str(e)}")
# raise HTTPException(status_code=400, detail=f"Failed to download {url}: {str(e)}")
return {"status": 0, "message": "Failed to download files", "directory": None, "error": str(e)}
@app.post("/tts")
async def text_to_speech(json_data: dict):
user_id = json_data.get('user_id')
vid = json_data.get('vid')
text = json_data.get('text')
language = json_data.get('language')
speaker_id = json_data.get('speaker_id')
speed = json_data.get('speed', 1.0)
try:
output = main_tts(text, language, speaker_id, user_id, vid, speed)
return output
except Exception as e:
print(f"Failed to generate audio file: {str(e)}")
return {"status": 0, "message": "Failed to generate audio file", "output_file": None, "output_file_url": None, "error": str(e)}
@app.post("/voice_style")
async def voice_style_transfer(json_data: dict):
user_id = json_data.get('user_id')
vid = json_data.get('vid')
input_audio_path = json_data.get('input_audio_path')
reference_speaker_path = json_data.get('reference_speaker_path')
speaker_key = json_data.get('speaker_key')
generation_type = json_data.get('generation_type') # Base or Customized
try:
if generation_type == 'Base':
output = base_voice_style(input_audio_path, user_id, vid)
else:
output = voice_style(reference_speaker_path, speaker_key, input_audio_path, user_id, vid)
return output
except Exception as e:
print(f"Failed to generate audio file: {str(e)}")
return {"status": 0, "message": "Failed to generate audio file", "output_file": None, "output_file_url": None, "error": str(e)}
@app.post("/voice_style_text")
async def voice_style_transfer_text(json_data: dict):
user_id = json_data.get('user_id')
vid = json_data.get('vid')
text = json_data.get('text')
language = json_data.get('language')
speaker_id = json_data.get('speaker_id')
reference_speaker_path = json_data.get('reference_speaker_path')
speaker_key = json_data.get('speaker_key')
generation_type = json_data.get('generation_type')
# Convert text to audio
try:
output = main_tts(text, language, speaker_id, user_id, vid)
except Exception as e:
print(f"Failed to generate audio file: {str(e)}")
return {"status": 0, "message": "Failed to generate audio file", "output_file": None, "output_file_url": None, "error": str(e)}
# Get the input audio path
input_audio_path = output.get('output_file')
# Perform voice style transfer
try:
if generation_type == 'Base':
output = base_voice_style(input_audio_path, user_id, vid)
else:
output = voice_style(reference_speaker_path, speaker_key, input_audio_path, user_id, vid)
return output
except Exception as e:
print(f"Failed to generate audio file: {str(e)}")
return {"status": 0, "message": "Failed to generate audio file", "output_file": None, "output_file_url": None, "error": str(e)}
if __name__ == "__main__":
import uvicorn
uvicorn.run(app, host='0.0.0.0', port=8020, ssl_keyfile="privkey.pem",ssl_certfile="fullchain.pem")
Back to Directory
File Manager