from dotenv import load_dotenv
import time
import uuid
import gradio as gr
import openai
import sys, os
import logging
from llama_index import (
SimpleDirectoryReader,
LLMPredictor,
GPTVectorStoreIndex,
load_index_from_storage,
set_global_service_context,
)
from llama_index.storage.storage_context import StorageContext
from llama_index.prompts import Prompt
from llama_index.indices.service_context import ServiceContext
from langchain.chat_models import ChatOpenAI
from llama_index.memory import ChatMemoryBuffer
from llama_index.llms import OpenAI
load_dotenv()
openai.api_key = os.getenv("OPENAI_API_KEY")
logging.basicConfig(stream=sys.stdout, level=logging.INFO)
logging.getLogger().addHandler(logging.StreamHandler(stream=sys.stdout))
service_context = ServiceContext.from_defaults(llm=OpenAI(model_name="gpt-4", temperature=0.2, max_tokens=1500))
directory_path = r'C:\Users\tejas\PycharmProjects\tax_help_bot\efi_docs'
class Llama:
index = None
@staticmethod
def init():
Llama.index = Llama.load_index(directory_path)
@staticmethod
def load_index(directory_path):
documents = SimpleDirectoryReader(directory_path, filename_as_id=True).load_data()
print(f"Loaded documents with {len(documents)} pages")
try:
storage_context = StorageContext.from_defaults(persist_dir="./storage")
index = load_index_from_storage(storage_context)
logging.info("Index loaded from storage.")
except FileNotFoundError:
logging.info("Index not found. Creating a new one...")
index = GPTVectorStoreIndex.from_documents(documents)
index.storage_context.persist()
logging.info("New index created and persisted to storage.")
refreshed_docs = index.refresh_ref_docs(documents,
update_kwargs={"delete_kwargs": {'delete_from_docstore': True}})
print(refreshed_docs)
print('Number of newly inserted/refreshed docs: ', sum(refreshed_docs))
index.storage_context.persist()
logging.info("Index refreshed and persisted to storage.")
return index
Llama.init()
user_sessions = {}
def is_customer_support_related(query):
try:
response = openai.chat.completions.create(
model="gpt-3.5-turbo",
messages=[
{"role": "user", "content": query},
{"role": "assistant",
"content": """Is the above query related to customer support like asking help?Return 'yes' if it is,
otherwise return 'no'. For example: hi, api is not working , Hi webhook is not received,
API keys related issues for these cases return yes and return no for these examples: What is the
format for creating a signature for API requests?, Any documentation related queries """}
]
)
print(response)
assistant_response = response.choices[0].message.content.strip().lower()
print("Assistant response:", assistant_response)
return "yes" in assistant_response
except Exception as e:
print(f"Error in OpenAI call: {e}")
return False
def data_querying(input_text, user_id):
global user_sessions
if user_id not in user_sessions:
user_sessions[user_id] = {
"interaction_count": 0,
"conversation_history": [],
}
user_session = user_sessions[user_id]
user_session["interaction_count"] += 1
print(f"Debug: User {user_id} interaction count: {user_session['interaction_count']}")
user_session["conversation_history"].append(input_text)
if is_customer_support_related(input_text):
response_message = "Hi, we will get back to you as soon as possible to try to resolve your query."
user_sessions[user_id] = user_session
return response_message
else:
text_qa_template_str = (
"Context information is below.\n"
"---------------------\n"
"{context_str}\n"
"---------------------\n"
"Using both the context information and also using your own knowledge, "
"answer the question: {query_str}\n"
"If the context isn't helpful, Please respond with: I don't understand please try to rephrase your question\n"
)
refine_template_str = (
"The original question is as follows: {query_str}\n"
"We have provided an existing answer: {existing_answer}\n"
"We have the opportunity to refine the existing answer "
"(only if needed) with some more context below.\n"
"------------\n"
"{context_msg}\n"
"------------\n"
"Using both the new context and your own knowledge, update or repeat the existing answer.\n"
)
memory = ChatMemoryBuffer.from_defaults(token_limit=1500)
refine_template = Prompt(refine_template_str)
text_qa_template = Prompt(text_qa_template_str)
system_prompt = """\nYou are a highly knowledgeable customer support assistant with expertise in EFI API queries.
Your primary goal is to assist users by providing accurate, clear, and concise information.
You should base your responses on the provided context, ensuring relevance and utility.
If a query falls outside of your available context, respond with:
'I'm not sure how to answer that. Could you provide more details or try rephrasing your question?'
Keep the conversation professional and focused on providing solutions. </s>\n"""
response = Llama.index.as_chat_engine(
text_qa_template=text_qa_template,
refine_template=refine_template,
chat_mode="context",
memory=memory,
system_prompt=system_prompt,
).chat(input_text)
user_sessions[user_id] = user_session
return response.response
def answer_question(question):
user_id = "static_user_id"
try:
response = data_querying(question, user_id)
return response
except Exception as e:
return str(e)
iface = gr.Interface(
fn=answer_question,
inputs="text",
outputs="text",
title="customer support assistant",
description="Here's a assistant which can help you with your queries"
)
if __name__ == "__main__":
iface.launch(share=True)