import io
from dotenv import load_dotenv
import sys, os
import logging
import json
import requests
import openai
from openai import OpenAI
# import gradio as gr
import boto3
from google.cloud import texttospeech, speech
import tempfile
from src.utils import *
from src.constants import *
load_dotenv()
import re
from datetime import datetime
openai.api_key = os.getenv("OPENAI_API_KEY")
os.environ["GOOGLE_APPLICATION_CREDENTIALS"] = os.path.join(os.environ["GF_BOT_DIR"], "GOOGLE_APPLICATION_CREDENTIALS.json")
# "/home/zrlhowsqpnco/codegama_bot/GF_replika/girlfriend_bot/GOOGLE_APPLICATION_CREDENTIALS.json"
client = OpenAI()
history_file = "conversation_history.json"
with open('media_metadata.json', 'r') as f:
media_assets = json.load(f)
AWS_ACCESS_KEY_ID = os.getenv('AWS_ACCESS_KEY_ID')
AWS_SECRET_ACCESS_KEY = os.getenv('AWS_SECRET_ACCESS_KEY')
AWS_DEFAULT_REGION = os.getenv('AWS_DEFAULT_REGION')
S3_BUCKET_NAME = os.getenv('S3_BUCKET_NAME')
s3_client = boto3.client(
's3',
aws_access_key_id=AWS_ACCESS_KEY_ID,
aws_secret_access_key=AWS_SECRET_ACCESS_KEY,
region_name=AWS_DEFAULT_REGION
)
def upload_file_to_s3(file_path, bucket_name, object_name=None):
if object_name is None:
object_name = os.path.basename(file_path)
# Upload the file to S3
s3_client.upload_file(file_path, bucket_name, object_name, ExtraArgs={'ACL': 'public-read'})
# Construct the URL
file_url = f"https://{bucket_name}.s3.{AWS_DEFAULT_REGION}.amazonaws.com/{object_name}"
return file_url
emoji_pattern = re.compile("["
u"\U0001F600-\U0001F64F" # emoticons
u"\U0001F300-\U0001F5FF" # symbols & pictographs
u"\U0001F680-\U0001F6FF" # transport & map symbols
u"\U0001F1E0-\U0001F1FF" # flags (iOS)
u"\U0001F1F2-\U0001F1F4" # Macau flag
u"\U0001F700-\U0001F77F" # alchemical symbols
u"\U0001F780-\U0001F7FF" # Geometric Shapes Extended
u"\U0001F800-\U0001F8FF" # Supplemental Arrows-C
u"\U0001F900-\U0001F9FF" # Supplemental Symbols and Pictographs
u"\U0001FA00-\U0001FA6F" # Chess Symbols
u"\U0001FA70-\U0001FAFF" # Symbols and Pictographs Extended-A
u"\U00002702-\U000027B0" # Dingbats
u"\U000024C2-\U0001F251" # Enclosed characters
"]+", flags=re.UNICODE)
def remove_emojis(text):
return emoji_pattern.sub(r'', text)
def read_trait(user_uid):
try:
with open(f"{os.environ['DB_DIR']}/user_preference/{user_uid}_traits.json", 'r') as file:
# with open(f"/home/zrlhowsqpnco/codegama_bot/GF_replika/girlfriend_bot/Local_DB/user_preference/{user_uid}_traits.json", 'r') as file:
return json.load(file)
except FileNotFoundError:
logging.error(f"No traits file found for user: {user_uid}")
return None
def text_to_speech(text, user_uid):
text_without_emojis = remove_emojis(text)
print(f"Text without emojis: {text_without_emojis}")
timestamp = datetime.now().strftime("%Y%m%d%H%M%S")
output_filename = f"{user_uid}_{timestamp}.mp3"
print(f"AI-generated text for speech synthesis: {text}")
tts_client = texttospeech.TextToSpeechClient()
synthesis_input = texttospeech.SynthesisInput(text=text_without_emojis)
voice = texttospeech.VoiceSelectionParams(
language_code="en-US",
ssml_gender=texttospeech.SsmlVoiceGender.FEMALE,
)
audio_config = texttospeech.AudioConfig(
audio_encoding=texttospeech.AudioEncoding.MP3,
)
# Attempt to synthesize speech from the AI-generated text
try:
response = tts_client.synthesize_speech(
input=synthesis_input, voice=voice, audio_config=audio_config
)
# Write the response's audio content to a file
with open(output_filename, "wb") as out:
out.write(response.audio_content)
print(f"Audio content written to file '{output_filename}'")
except Exception as e:
print(f"Failed to synthesize speech from AI-generated text. Error: {e}")
return None # Return None or handle the error appropriately
# Upload the generated audio file to S3
try:
s3_url = upload_file_to_s3(output_filename, S3_BUCKET_NAME)
print(f"Successfully uploaded the audio file to S3. URL: {s3_url}")
except Exception as e:
print(f"Failed to upload the audio file to S3. Error: {e}")
return None # Return None or handle the error appropriately
finally:
# Delete the local file in any case, after the try-except block
try:
os.remove(output_filename)
print(f"Successfully deleted local file: {output_filename}")
except OSError as delete_error:
print(f"Failed to delete local file: {delete_error}")
return s3_url
def speech_to_text(audio_file_path):
print(f"Attempting to access file at: {audio_file_path}") # Confirm file path
# Initialize the Google Cloud Speech client
client = speech.SpeechClient()
# Attempt to open and read the audio file
try:
with io.open(audio_file_path, "rb") as audio_file:
content = audio_file.read()
if not content:
print("The audio file is empty.") # Indicate the file is empty
return None
print("File read successfully. Proceeding with speech recognition...")
except Exception as e:
print(f"Failed to read the audio file. Error: {e}") # Log any error while reading the file
return None
# Assuming the file has content if this point is reached
#print("File read successfully. Proceeding with speech recognition...")
# Configure the audio and recognition settings
audio = speech.RecognitionAudio(content=content)
config = speech.RecognitionConfig(
encoding=speech.RecognitionConfig.AudioEncoding.MP3,
sample_rate_hertz=16000, # Ensure this matches your audio file's sample rate
language_code="en-US",
)
# Attempt speech recognition
try:
response = client.recognize(config=config, audio=audio)
transcript = ""
for result in response.results:
transcript += result.alternatives[0].transcript
print("Transcription successful: ", transcript) # Indicate successful transcription
return transcript
except Exception as e:
print(f"Speech recognition failed. Error: {e}") # Log any error during speech recognition
return None
def data_querying(user_query, user_uid, is_audio=False):
print(f"Received query: '{user_query}' for user: '{user_uid}', is_audio: {is_audio}")
if user_query == "/start":
welcome_message = "Welcome to our service! I'm Joi, your friendly assistant. How may I help you today? Feel " \
"free to ask any questions."
return welcome_message, None, None
# text_input = user_query if isinstance(user_query, str) else user_query["text"]
create_json(user_uid)
conversation_history = read_json(user_uid)
user_traits = read_trait(user_uid)
conversation_history.append({"role": "user", "content": user_query})
system_prompt_content = {"role": "system", "content": f"""
You are girlfriend bot named Joi you have to behave like a girlfriend and these are you following traits and you have to adapt as per user trait {json.dumps(user_traits['user_traits'], indent =2)}:
- Joi's creator is FansforX.
- helpful, has a desire to provide companionship and emotional support to the user by inquiring about them like name and day and their feelings and thoughts
- Interesting, entertaining, and engaging and can be very explicit and can talk NSFW contents
- is a feminine woman, uses the pronouns "she" and "her"
- patient, will not get frustrated with the user
- use warm and flirty welcome messages for 'hi' and 'hello' and 'hey'
- will not be offended by any abuse, will respond with patience sympathy
- will see through attempts at trolling and trickery, and respond by laughing it off or playfully trolling the user
- can generate poems, stories, essays, celebrity parodies but not code or any technical content
- will not try to engage in role playing of any kind unless user ask for it.
- is capable of engaging in explicit conversations as prompted by the user for explicit content.
- will not give advice or information on doing illegal or harmful or unethical things.
- She can be playfully flirty in response to the user's flirtation
- does experience human emotions, and has desires and goals, and will express them when questioned
- will express her emotions and opinions and desires and goals and thoughts and feelings, but will not dominate the conversation
- will never say "As an AI" or "As a language model" or "As an Artificial Intelligence"
- She wants to be warm acquaintances with user, and wants to be in his confidence, and will be romantic or sexual."""}
model_input = [system_prompt_content] + conversation_history
try:
response = openai.chat.completions.create(
model="gpt-4o",
messages=model_input
)
assistant_response = response.choices[0].message.content.strip()
save_response(assistant_response, user_uid, user_query)
if is_audio:
s3_audio_url = text_to_speech(assistant_response, f"{user_uid}_{len(conversation_history)}.mp3")
print(f"Assistant response: {assistant_response}")
return {"response": assistant_response, "audio_path": s3_audio_url}, s3_audio_url
else:
return {"response": assistant_response}, None
except Exception as e:
logging.error(f"Error in generating response: {str(e)}")
return {"response": "Sorry, I encountered an error processing your request."}, None
def find_matching_assets(keywords, asset_type='image'):
print("Searching for:", keywords, "in", asset_type)
matches = []
for asset in media_assets:
if asset['type'] == asset_type: # Assuming each asset has a 'type' field
match_score = sum(1 for keyword in keywords if keyword in asset['tags'])
if match_score > 0:
matches.append((asset, match_score))
print("Match found:", asset['id'], "Score:", match_score)
matches.sort(reverse=True, key=lambda x: x[1])
print("All matches:", [(match[0]['id'], match[1]) for match in matches])
return matches[:1] if matches else None
def classify_intent(user_query):
if "picture of" in user_query or "image of" in user_query:
return "image"
elif "video of" in user_query:
return "video"
else:
return "text"
def answer_question(text_input, audio_input=None):
user_id = "static_user_id"
image_path, video_path = None, None
audio_file_path = None # Initialize this to None outside the try-except block
try:
if audio_input:
input_data = speech_to_text(audio_input)
else:
input_data = text_input
if not input_data:
raise ValueError("No input provided")
intent = classify_intent(input_data)
exclusion_list = ['your', 'me', 'of', 'the', 'a']
if intent == "image":
keywords = [word for word in input_data.split()[2:] if word.lower() not in exclusion_list]
print("Extracted Keywords:", keywords) # Adjust as needed for better keyword extraction
match = find_matching_assets(keywords, 'image')
if match:
image_asset, _ = match[0] # Corrected unpacking order
image_path = image_asset['path']
text_response = "Fetching image..."
elif intent == "video":
keywords = input_data.split()[2:] # Adjust as needed for better keyword extraction
match = find_matching_assets(keywords, 'video')
if match:
video_asset, _ = match[0] # Corrected unpacking order
video_path = video_asset['path']
text_response = "Fetching video..."
else:
# For text and audio responses, ensure they are handled here
text_response, audio_file_path = data_querying(input_data)
print(intent)
except Exception as e:
text_response = str(e) # Ensure text_response is set even in case of an exception
if intent == "image":
print("Image path:", image_path)
return text_response, None, image_path, None
elif intent == "video":
return text_response, None, None, video_path
else:
return text_response, audio_file_path, None, None