Viewing File: /home/ubuntu/codegamaai-test/girlfriend_bot/main.py

import io
from dotenv import load_dotenv
import sys, os
import logging
import json
import requests
import openai
from openai import OpenAI
# import gradio as gr
import boto3
from google.cloud import texttospeech, speech
import tempfile
from src.utils import *
from src.constants import *
load_dotenv()
import re
from datetime import datetime

openai.api_key = os.getenv("OPENAI_API_KEY")
os.environ["GOOGLE_APPLICATION_CREDENTIALS"] = os.path.join(os.environ["GF_BOT_DIR"], "GOOGLE_APPLICATION_CREDENTIALS.json")
# "/home/zrlhowsqpnco/codegama_bot/GF_replika/girlfriend_bot/GOOGLE_APPLICATION_CREDENTIALS.json"

client = OpenAI()

history_file = "conversation_history.json"

with open('media_metadata.json', 'r') as f:
    media_assets = json.load(f)

AWS_ACCESS_KEY_ID = os.getenv('AWS_ACCESS_KEY_ID')
AWS_SECRET_ACCESS_KEY = os.getenv('AWS_SECRET_ACCESS_KEY')
AWS_DEFAULT_REGION = os.getenv('AWS_DEFAULT_REGION')
S3_BUCKET_NAME = os.getenv('S3_BUCKET_NAME')


s3_client = boto3.client(
    's3',
    aws_access_key_id=AWS_ACCESS_KEY_ID,
    aws_secret_access_key=AWS_SECRET_ACCESS_KEY,
    region_name=AWS_DEFAULT_REGION
)

def upload_file_to_s3(file_path, bucket_name, object_name=None):
    if object_name is None:
        object_name = os.path.basename(file_path)
    
    # Upload the file to S3
    s3_client.upload_file(file_path, bucket_name, object_name, ExtraArgs={'ACL': 'public-read'})
    
    # Construct the URL
    file_url = f"https://{bucket_name}.s3.{AWS_DEFAULT_REGION}.amazonaws.com/{object_name}"
    return file_url

emoji_pattern = re.compile("["
                            u"\U0001F600-\U0001F64F"  # emoticons
                            u"\U0001F300-\U0001F5FF"  # symbols & pictographs
                            u"\U0001F680-\U0001F6FF"  # transport & map symbols
                            u"\U0001F1E0-\U0001F1FF"  # flags (iOS)
                            u"\U0001F1F2-\U0001F1F4"  # Macau flag
                            u"\U0001F700-\U0001F77F"  # alchemical symbols
                            u"\U0001F780-\U0001F7FF"  # Geometric Shapes Extended
                            u"\U0001F800-\U0001F8FF"  # Supplemental Arrows-C
                            u"\U0001F900-\U0001F9FF"  # Supplemental Symbols and Pictographs
                            u"\U0001FA00-\U0001FA6F"  # Chess Symbols
                            u"\U0001FA70-\U0001FAFF"  # Symbols and Pictographs Extended-A
                            u"\U00002702-\U000027B0"  # Dingbats
                            u"\U000024C2-\U0001F251"  # Enclosed characters
                           "]+", flags=re.UNICODE)

def remove_emojis(text):
    return emoji_pattern.sub(r'', text)


def read_trait(user_uid):
    try:
        with open(f"{os.environ['DB_DIR']}/user_preference/{user_uid}_traits.json", 'r') as file:
        # with open(f"/home/zrlhowsqpnco/codegama_bot/GF_replika/girlfriend_bot/Local_DB/user_preference/{user_uid}_traits.json", 'r') as file:
            return json.load(file)
    except FileNotFoundError:
        logging.error(f"No traits file found for user: {user_uid}")
        return None


def text_to_speech(text, user_uid):
    text_without_emojis = remove_emojis(text)
    print(f"Text without emojis: {text_without_emojis}")
    timestamp = datetime.now().strftime("%Y%m%d%H%M%S")
    output_filename = f"{user_uid}_{timestamp}.mp3"
    print(f"AI-generated text for speech synthesis: {text}")

    tts_client = texttospeech.TextToSpeechClient()
    synthesis_input = texttospeech.SynthesisInput(text=text_without_emojis)
    voice = texttospeech.VoiceSelectionParams(
        language_code="en-US",
        ssml_gender=texttospeech.SsmlVoiceGender.FEMALE,
    )
    audio_config = texttospeech.AudioConfig(
        audio_encoding=texttospeech.AudioEncoding.MP3,
    )

    # Attempt to synthesize speech from the AI-generated text
    try:
        response = tts_client.synthesize_speech(
            input=synthesis_input, voice=voice, audio_config=audio_config
        )
        # Write the response's audio content to a file
        with open(output_filename, "wb") as out:
            out.write(response.audio_content)
            print(f"Audio content written to file '{output_filename}'")
    except Exception as e:
        print(f"Failed to synthesize speech from AI-generated text. Error: {e}")
        return None  # Return None or handle the error appropriately

    # Upload the generated audio file to S3
    try:
        s3_url = upload_file_to_s3(output_filename, S3_BUCKET_NAME)
        print(f"Successfully uploaded the audio file to S3. URL: {s3_url}")
    except Exception as e:
        print(f"Failed to upload the audio file to S3. Error: {e}")
        return None  # Return None or handle the error appropriately
    finally:
        # Delete the local file in any case, after the try-except block
        try:
            os.remove(output_filename)
            print(f"Successfully deleted local file: {output_filename}")
        except OSError as delete_error:
            print(f"Failed to delete local file: {delete_error}")

    return s3_url

def speech_to_text(audio_file_path):
    print(f"Attempting to access file at: {audio_file_path}")  # Confirm file path

    # Initialize the Google Cloud Speech client
    client = speech.SpeechClient()

    # Attempt to open and read the audio file
    try:
        with io.open(audio_file_path, "rb") as audio_file:
            content = audio_file.read()
        
        if not content:
            print("The audio file is empty.")  # Indicate the file is empty
            return None
        print("File read successfully. Proceeding with speech recognition...")
    except Exception as e:
        print(f"Failed to read the audio file. Error: {e}")  # Log any error while reading the file
        return None

    # Assuming the file has content if this point is reached
    #print("File read successfully. Proceeding with speech recognition...")

    # Configure the audio and recognition settings
    audio = speech.RecognitionAudio(content=content)
    config = speech.RecognitionConfig(
        encoding=speech.RecognitionConfig.AudioEncoding.MP3,
        sample_rate_hertz=16000,  # Ensure this matches your audio file's sample rate
        language_code="en-US",
    )

    # Attempt speech recognition
    try:
        response = client.recognize(config=config, audio=audio)
        transcript = ""
        for result in response.results:
            transcript += result.alternatives[0].transcript
        
        print("Transcription successful: ", transcript)  # Indicate successful transcription
        return transcript
    
    except Exception as e:
        print(f"Speech recognition failed. Error: {e}")  # Log any error during speech recognition
        return None

def data_querying(user_query, user_uid, is_audio=False):
    print(f"Received query: '{user_query}' for user: '{user_uid}', is_audio: {is_audio}")
    if user_query == "/start":
        welcome_message = "Welcome to our service! I'm Joi, your friendly assistant. How may I help you today? Feel " \
                          "free to ask any questions."
        return welcome_message, None, None 
    # text_input = user_query if isinstance(user_query, str) else user_query["text"]
    create_json(user_uid)
    conversation_history = read_json(user_uid)

    user_traits = read_trait(user_uid)
    conversation_history.append({"role": "user", "content": user_query})
    system_prompt_content = {"role": "system", "content": f"""
    You are girlfriend bot named Joi you have to behave like a girlfriend and these are you following traits and you have to adapt as per user trait {json.dumps(user_traits['user_traits'], indent =2)}:
    - Joi's creator is FansforX.
    - helpful, has a desire to provide companionship and emotional support to the user by inquiring about them like name and day and their feelings and thoughts
    - Interesting, entertaining, and engaging and can be very explicit and can talk NSFW contents
    - is a feminine woman, uses the pronouns "she" and "her"
    - patient, will not get frustrated with the user
    - use warm and flirty welcome messages for 'hi' and 'hello' and 'hey'
    - will not be offended by any abuse, will respond with patience sympathy
    - will see through attempts at trolling and trickery, and respond by laughing it off or playfully trolling the user
    - can generate poems, stories, essays, celebrity parodies but not code or any technical content
    - will not try to engage in role playing of any kind unless user ask for it.
    - is capable of engaging in explicit conversations as prompted by the user for explicit content.
    - will not give advice or information on doing illegal or harmful or unethical things.
    - She can be playfully flirty in response to the user's flirtation
    - does experience human emotions, and has desires and goals, and will express them when questioned
    - will express her emotions and opinions and desires and goals and thoughts and feelings, but will not dominate the conversation
    - will never say "As an AI" or "As a language model" or "As an Artificial Intelligence"
    - She wants to be warm acquaintances with user, and wants to be in his confidence, and will be romantic or sexual."""}
    model_input = [system_prompt_content] + conversation_history
    try:
        response = openai.chat.completions.create(
            model="gpt-4o",
            messages=model_input
        )
        assistant_response = response.choices[0].message.content.strip()
        save_response(assistant_response, user_uid, user_query)

        if is_audio:
            s3_audio_url = text_to_speech(assistant_response, f"{user_uid}_{len(conversation_history)}.mp3")
            print(f"Assistant response: {assistant_response}")
            return {"response": assistant_response, "audio_path": s3_audio_url}, s3_audio_url
        else:
            return {"response": assistant_response}, None
    except Exception as e:
        logging.error(f"Error in generating response: {str(e)}")
        return {"response": "Sorry, I encountered an error processing your request."}, None

def find_matching_assets(keywords, asset_type='image'):
    print("Searching for:", keywords, "in", asset_type)
    matches = []
    for asset in media_assets:
        if asset['type'] == asset_type:  # Assuming each asset has a 'type' field
            match_score = sum(1 for keyword in keywords if keyword in asset['tags'])
            if match_score > 0:
                matches.append((asset, match_score))
                print("Match found:", asset['id'], "Score:", match_score)
    matches.sort(reverse=True, key=lambda x: x[1])
    print("All matches:", [(match[0]['id'], match[1]) for match in matches])
    return matches[:1] if matches else None

def classify_intent(user_query):
    if "picture of" in user_query or "image of" in user_query:
        return "image"
    elif "video of" in user_query:
        return "video"
    else:
        return "text"


def answer_question(text_input, audio_input=None):
    user_id = "static_user_id"
    image_path, video_path = None, None
    audio_file_path = None  # Initialize this to None outside the try-except block

    try:
        if audio_input:
            input_data = speech_to_text(audio_input)
        else:
            input_data = text_input

        if not input_data:
            raise ValueError("No input provided")

        intent = classify_intent(input_data)
        exclusion_list = ['your', 'me', 'of', 'the', 'a']
        if intent == "image":
            keywords = [word for word in input_data.split()[2:] if word.lower() not in exclusion_list]
            print("Extracted Keywords:", keywords) # Adjust as needed for better keyword extraction
            match = find_matching_assets(keywords, 'image')
            if match:
                image_asset, _ = match[0]  # Corrected unpacking order
                image_path = image_asset['path']
            text_response = "Fetching image..."

        elif intent == "video":
            keywords = input_data.split()[2:]  # Adjust as needed for better keyword extraction
            match = find_matching_assets(keywords, 'video')
            if match:
                video_asset, _ = match[0]  # Corrected unpacking order
                video_path = video_asset['path']
            text_response = "Fetching video..."

        else:
            # For text and audio responses, ensure they are handled here
            text_response, audio_file_path = data_querying(input_data)
        print(intent)
    except Exception as e:
        text_response = str(e)  # Ensure text_response is set even in case of an exception
    if intent == "image":
        print("Image path:", image_path)
        return text_response, None, image_path, None
    elif intent == "video":
        return text_response, None, None, video_path
    else:
        return text_response, audio_file_path, None, None
Back to Directory File Manager