Viewing File: /home/ubuntu/codegamaai-test/voice_clone/src/rvc_implementation/train_index.py

import numpy as np
import faiss
import os
from random import shuffle
import json
import os
import pathlib
from subprocess import Popen, PIPE, STDOUT
import traceback
import shutil
# Add all folders to path from previous directory
import sys
sys.path.append(os.path.join(os.path.dirname(__file__), '..'))
from utils import *

def train_index(exp_dir1, version19):
    exp_dir = "logs/%s" % (exp_dir1)
    model_name = exp_dir1
    os.makedirs(exp_dir, exist_ok=True)
    feature_dir = (
        "%s/3_feature256" % (exp_dir)
        if version19 == "v1"
        else "%s/3_feature768" % (exp_dir)
    )
    if not os.path.exists(feature_dir):
        return "feature_dir path doesn't exist"
    listdir_res = list(os.listdir(feature_dir))
    if len(listdir_res) == 0:
        return "feature_dir path doesn't contain any data"
    infos = []
    npys = []
    for name in sorted(listdir_res):
        phone = np.load("%s/%s" % (feature_dir, name))
        npys.append(phone)
    big_npy = np.concatenate(npys, 0)
    big_npy_idx = np.arange(big_npy.shape[0])
    np.random.shuffle(big_npy_idx)
    big_npy = big_npy[big_npy_idx]
    if big_npy.shape[0] > 2e5:
        infos.append("Trying doing kmeans %s shape to 10k centers." % big_npy.shape[0])
        yield "\n".join(infos)
        try:
            big_npy = (
                MiniBatchKMeans(
                    n_clusters=10000,
                    verbose=True,
                    batch_size=256 * config.n_cpu,
                    compute_labels=False,
                    init="random",
                )
                .fit(big_npy)
                .cluster_centers_
            )
        except:
            info = traceback.format_exc()
            logger.info(info)
            infos.append(info)
            yield "\n".join(infos)

    np.save("%s/total_fea.npy" % exp_dir, big_npy)
    n_ivf = min(int(16 * np.sqrt(big_npy.shape[0])), big_npy.shape[0] // 39)
    infos.append("%s,%s" % (big_npy.shape, n_ivf))
    yield "\n".join(infos)
    index = faiss.index_factory(256 if version19 == "v1" else 768, "IVF%s,Flat" % n_ivf)
    infos.append("training")
    yield "\n".join(infos)
    index_ivf = faiss.extract_index_ivf(index)  #
    index_ivf.nprobe = 1
    index.train(big_npy)
    faiss.write_index(
        index,
        "%s/trained_IVF%s_Flat_nprobe_%s_%s_%s.index"
        % (exp_dir, n_ivf, index_ivf.nprobe, exp_dir1, version19),
    )

    infos.append("adding")
    yield "\n".join(infos)
    batch_size_add = 8192
    for i in range(0, big_npy.shape[0], batch_size_add):
        index.add(big_npy[i : i + batch_size_add])
    faiss.write_index(
        index,
        "%s/added_IVF%s_Flat_nprobe_%s_%s_%s.index"
        % (exp_dir, n_ivf, index_ivf.nprobe, exp_dir1, version19),
    )
    infos.append(
        "Index built successfully ,added_IVF%s_Flat_nprobe_%s_%s_%s.index"
        % (n_ivf, index_ivf.nprobe, exp_dir1, version19)
    )

    index_file_update = "added_IVF%s_Flat_nprobe_%s_%s_%s.index" % (
        n_ivf, index_ivf.nprobe, exp_dir1, version19 # noqa
    )
    # Add index_file in model_name.json
    # Update model_name.json
    with open(os.path.join(user_config_files, model_name + '.json'), 'r') as f:
        data = json.load(f)
        data['index_file'] = index_file_update
    with open(os.path.join(user_config_files, model_name + '.json'), 'w') as f:
        json.dump(data, f)

    



def click_train(
    exp_dir1,
    sr2,
    if_f0_3,
    spk_id5,
    save_epoch10,
    total_epoch11,
    batch_size12,
    if_save_latest13,
    pretrained_G14,
    pretrained_D15,
    gpus16,
    if_cache_gpu17,
    if_save_every_weights18,
    version19,
):
    # filelist
    now_dir=os.getcwd()
    exp_dir = "%s/logs/%s" % (now_dir, exp_dir1)
    os.makedirs(exp_dir, exist_ok=True)
    gt_wavs_dir = "%s/0_gt_wavs" % (exp_dir)
    feature_dir = (
        "%s/3_feature256" % (exp_dir)
        if version19 == "v1"
        else "%s/3_feature768" % (exp_dir)
    )
    if if_f0_3:
        f0_dir = "%s/2a_f0" % (exp_dir)
        f0nsf_dir = "%s/2b-f0nsf" % (exp_dir)
        names = (
            set([name.split(".")[0] for name in os.listdir(gt_wavs_dir)])
            & set([name.split(".")[0] for name in os.listdir(feature_dir)])
            & set([name.split(".")[0] for name in os.listdir(f0_dir)])
            & set([name.split(".")[0] for name in os.listdir(f0nsf_dir)])
        )
    else:
        names = set([name.split(".")[0] for name in os.listdir(gt_wavs_dir)]) & set(
            [name.split(".")[0] for name in os.listdir(feature_dir)]
        )
    opt = []
    for name in names:
        if if_f0_3:
            opt.append(
                "%s/%s.wav|%s/%s.npy|%s/%s.wav.npy|%s/%s.wav.npy|%s"
                % (
                    gt_wavs_dir.replace("\\", "\\\\"),
                    name,
                    feature_dir.replace("\\", "\\\\"),
                    name,
                    f0_dir.replace("\\", "\\\\"),
                    name,
                    f0nsf_dir.replace("\\", "\\\\"),
                    name,
                    spk_id5,
                )
            )
        else:
            opt.append(
                "%s/%s.wav|%s/%s.npy|%s"
                % (
                    gt_wavs_dir.replace("\\", "\\\\"),
                    name,
                    feature_dir.replace("\\", "\\\\"),
                    name,
                    spk_id5,
                )
            )
    fea_dim = 256 if version19 == "v1" else 768
    if if_f0_3:
        for _ in range(2):
            opt.append(
                "%s/logs/mute/0_gt_wavs/mute%s.wav|%s/logs/mute/3_feature%s/mute.npy|%s/logs/mute/2a_f0/mute.wav.npy|%s/logs/mute/2b-f0nsf/mute.wav.npy|%s"
                % (now_dir, sr2, now_dir, fea_dim, now_dir, now_dir, spk_id5)
            )
    else:
        for _ in range(2):
            opt.append(
                "%s/logs/mute/0_gt_wavs/mute%s.wav|%s/logs/mute/3_feature%s/mute.npy|%s"
                % (now_dir, sr2, now_dir, fea_dim, spk_id5)
            )
    shuffle(opt)
    with open("%s/filelist.txt" % exp_dir, "w") as f:
        f.write("\n".join(opt))

    # Replace logger.debug, logger.info with print statements
    print("Write filelist done")
    print("Use gpus:", str(gpus16))
    if pretrained_G14 == "":
        print("No pretrained Generator")
    if pretrained_D15 == "":
        print("No pretrained Discriminator")
    if version19 == "v1" or sr2 == "40k":
        config_path = "configs/v1/%s.json" % sr2
    else:
        config_path = "configs/v2/%s.json" % sr2
    config_save_path = os.path.join(exp_dir, "config.json")
    if not pathlib.Path(config_save_path).exists():
        with open(config_save_path, "w", encoding="utf-8") as f:
            with open(config_path, "r") as config_file:
                config_data = json.load(config_file)
                json.dump(
                    config_data,
                    f,
                    ensure_ascii=False,
                    indent=4,
                    sort_keys=True,
                )
            f.write("\n")
    print("Creted config_save_path File")
    cmd = (
        'python infer/modules/train/train.py -e "%s" -sr %s -f0 %s -bs %s -g %s -te %s -se %s %s %s -l %s -c %s -sw %s -v %s'
        % (
            exp_dir1,
            sr2,
            1 if if_f0_3 else 0,
            batch_size12,
            gpus16,
            total_epoch11,
            save_epoch10,
            "-pg %s" % pretrained_G14 if pretrained_G14 != "" else "",
            "-pd %s" % pretrained_D15 if pretrained_D15 != "" else "",
            1 if if_save_latest13 == True else 0,
            1 if if_cache_gpu17 == True else 0,
            1 if if_save_every_weights18 == True else 0,
            version19,
        )
    )
    # Use PIPE to capture the output and error streams
    p = Popen(cmd, shell=True, cwd=now_dir, stdout=PIPE, stderr=STDOUT, bufsize=1, universal_newlines=True)

    # Print the command's output as it runs
    for line in p.stdout:
        print(line.strip())

    # Wait for the process to finish
    p.wait()
    return "After the training is completed, you can view the console training log or check out train.log"


def rvc_train_and_save(model_name, f0method, epochs, save_frequency, user_id, vid, dataset_folder):
    # !python infer/modules/train/preprocess.py {dataset_folder} 40000 2 ./logs/{model_name} False 3.0 > /dev/null 2>&1
    # Run preprocess.py
    # Write code to Create folder in logs folder name 'model_name' if it doesn't exist
    logs_directory = os.path.join('./logs', model_name)

    # Check if the directory exists, and create it if not
    if not os.path.exists(logs_directory):
        os.makedirs(logs_directory)
        print(f"Logs folder '{model_name}' created successfully.")
    else:
        print(f"Logs folder '{model_name}' already exists.")

    with open(f'./logs/{model_name}/preprocess.log','w') as f:
        print("Starting...")

    os.system(f'python infer/modules/train/preprocess.py {dataset_folder} 40000 2 ./logs/{model_name} False 3.0 > /dev/null 2>&1')

    with open(f'./logs/{model_name}/preprocess.log','r') as f:
        if 'end preprocess' in f.read():
            print("Data Preprocessing successful.")

        else:
            print("Error preprocessing data... Make sure your dataset folder is correct.")

    # 2.Extract Features

    with open(f'./logs/{model_name}/extract_f0_feature.log','w') as f:
        print("Starting...")
    if f0method != "rmvpe_gpu":
        os.system(f'python infer/modules/train/extract/extract_f0_print.py ./logs/{model_name} 2 {f0method}')
        # !python infer/modules/train/extract/extract_f0_print.py ./logs/{model_name} 2 {f0method}
    else:
        os.system(f'python infer/modules/train/extract/extract_f0_rmvpe.py 1 0 0 ./logs/{model_name} True')
        # !python infer/modules/train/extract/extract_f0_rmvpe.py 1 0 0 ./logs/{model_name} True
    # !python infer/modules/train/extract_feature_print.py cuda:0 1 0 0 ./logs/{model_name} v2
    os.system(f'python infer/modules/train/extract_feature_print.py cuda:0 1 0 0 ./logs/{model_name} v2')
    with open(f'./logs/{model_name}/extract_f0_feature.log','r') as f:
        if 'all-feature-done' in f.read():
            print("Feature Extraction successful.")
        else:
            print("Error preprocessing data... Make sure your data was preprocessed.")
    
    # 3.Train Index

    training_log = train_index(model_name, 'v2')

    for line in training_log:
        print(line)
        if 'adding' in line:
            print("Creating index successful.")


    # 4.Train Model
    cache = False #@param {type:"boolean"}

    try:
    

        training_log = click_train(
            model_name,
            '40k',
            True,
            0,
            save_frequency,
            epochs,
            7,
            True,
            'assets/pretrained_v2/f0G40k.pth',
            'assets/pretrained_v2/f0D40k.pth',
            0,
            cache,
            True,
            'v2',
        )
        print(training_log)
    except:
        print("Error training model...")
        return {'status': 2, 'message': 'Error training model...'}

    
        
    # Update the status of the model
    with open(os.path.join(os.environ['user_config_files'] , model_name + '.json'), 'r') as f:
        data = json.load(f)
        data['status'] = 1
        index_file = data['index_file']
        model_weights = data['model_weights']
        data['index_file'] = os.environ['model_base_dir'] + user_id + '/' + vid + '/' + 'model' + '/' + index_file
    
    # Copy the index file and model weights to the model folder
    weights_file_path = os.path.join(os.environ['model_weights_dir'], model_weights)
    index_file_path = os.path.join(os.environ['model_index_dir'] + model_name, index_file)

    weights_move_path_1 = os.environ['dataset_store'] + user_id + '/' + vid + '/' + 'model'
    index_move_path_1 = os.environ['dataset_store'] + user_id + '/' + vid + '/' + 'model'
    
    import time
    # Create the model folder if it doesn't exist
    if not os.path.exists(weights_move_path_1):
        os.makedirs(weights_move_path_1)
    # Copy weights_file to weights_move_path_1 and weights_move_path_2
    shutil.copy(weights_file_path  , weights_move_path_1)
    # Give a wait time of 2 seconds
    time.sleep(2)
    # Copy index_file to index_move_path_1 and index_move_path_2
    shutil.copy(index_file_path, index_move_path_1)
    time.sleep(2)
    
    with open(os.path.join(os.environ['user_config_files'] , model_name + '.json'), 'w') as f:
        json.dump(data, f)

    # Move the index file and model weights to the model folder
    print("Training successful.")
    print("Model weights saved to: ", weights_move_path_1)
    print("Index file saved to: ", index_move_path_1)


    if os.path.exists(weights_move_path_1) and os.path.exists(index_move_path_1):
        status = 1
        send_callback_to_frontend(user_id, vid, status)
    
    else:
        status = 0
        send_callback_to_frontend(user_id, vid, status)

Back to Directory File Manager