Viewing File: /home/ubuntu/codegamaai-test/tts_openvoice/app.py

# Description: This file is used to run the main app of OpenVoice. It is used to generate audio files from text input.
# =====================================================================================
import json
import os
import threading
import uuid
import warnings
import torch
from fastapi import (Depends, FastAPI, File, Form, HTTPException, Request,
                     UploadFile)
from typing import List
from pydantic import BaseModel, HttpUrl
from fastapi.staticfiles import StaticFiles
from src.s3_download import download_file_from_s3, direct_download
from main_tts import *
from main_voice_style import *

warnings.filterwarnings("ignore")
app = FastAPI()
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")

# Get the current working directory
current_directory = os.getcwd()
print(f"Current working directory: {current_directory}")

app = FastAPI()
app.mount("/static", StaticFiles(directory=current_directory), name="static")

class AudioPayload(BaseModel):
    user_id: str
    vid: str
    audio_files: List[HttpUrl]
    download_type: str



# Save Reference Audio
@app.post("/upload_audio/")
async def download_audios(payload: AudioPayload):
    directory = os.path.join(os.environ['dataset_store'], payload.user_id, payload.vid, "audio")
    if not os.path.exists(directory):
        os.makedirs(directory)

    for url in payload.audio_files:
        try:
            if payload.download_type == 's3':
                download_file_from_s3(url, directory)
                print(f"Downloaded and saved {url} to {directory}")

            else:
                direct_download(url, directory)
                print(f"Downloaded and saved {url} to {directory}")
            
            return {"status": 1, "message": "Files downloaded successfully", "directory": directory}
        
        except Exception as e:
            print(f"Failed to download {url}: {str(e)}")
            # raise HTTPException(status_code=400, detail=f"Failed to download {url}: {str(e)}")
            return {"status": 0, "message": "Failed to download files", "directory": None, "error": str(e)}  

@app.post("/tts")
async def text_to_speech(json_data: dict):
    user_id = json_data.get('user_id')
    vid = json_data.get('vid')
    text = json_data.get('text')
    language = json_data.get('language')
    speaker_id = json_data.get('speaker_id')
    speed = json_data.get('speed', 1.0)
    try:
        output = main_tts(text, language, speaker_id, user_id, vid, speed)
        return output
    except Exception as e:
        print(f"Failed to generate audio file: {str(e)}")
        return {"status": 0, "message": "Failed to generate audio file", "output_file": None, "output_file_url": None, "error": str(e)}
    
@app.post("/voice_style")
async def voice_style_transfer(json_data: dict):
    user_id = json_data.get('user_id')
    vid = json_data.get('vid')
    input_audio_path = json_data.get('input_audio_path')
    reference_speaker_path = json_data.get('reference_speaker_path')
    speaker_key = json_data.get('speaker_key')
    generation_type = json_data.get('generation_type') # Base or Customized


    try:
        if generation_type == 'Base':
            output = base_voice_style(input_audio_path, user_id, vid)
        else:
            output = voice_style(reference_speaker_path, speaker_key, input_audio_path, user_id, vid)
        return output
    except Exception as e:
        print(f"Failed to generate audio file: {str(e)}")
        return {"status": 0, "message": "Failed to generate audio file", "output_file": None, "output_file_url": None, "error": str(e)}


@app.post("/voice_style_text")
async def voice_style_transfer_text(json_data: dict):
    user_id = json_data.get('user_id')
    vid = json_data.get('vid')
    text = json_data.get('text')
    language = json_data.get('language')
    speaker_id = json_data.get('speaker_id')
    reference_speaker_path = json_data.get('reference_speaker_path')
    speaker_key = json_data.get('speaker_key')
    generation_type = json_data.get('generation_type')

    # Convert text to audio
    try:
        output = main_tts(text, language, speaker_id, user_id, vid)
    except Exception as e:
        print(f"Failed to generate audio file: {str(e)}")
        return {"status": 0, "message": "Failed to generate audio file", "output_file": None, "output_file_url": None, "error": str(e)}
    
    # Get the input audio path
    input_audio_path = output.get('output_file')
    # Perform voice style transfer
    try:
        if generation_type == 'Base':
            output = base_voice_style(input_audio_path, user_id, vid)
        else:
            output = voice_style(reference_speaker_path, speaker_key, input_audio_path, user_id, vid)
        return output
    except Exception as e:
        print(f"Failed to generate audio file: {str(e)}")
        return {"status": 0, "message": "Failed to generate audio file", "output_file": None, "output_file_url": None, "error": str(e)}


if __name__ == "__main__":
    import uvicorn
    uvicorn.run(app, host='0.0.0.0', port=8020, ssl_keyfile="privkey.pem",ssl_certfile="fullchain.pem")
Back to Directory File Manager