Viewing File: /home/ubuntu/combine_ai/sentiment_analysis/main.py
import fitz
from docx import Document
from transformers import pipeline
from collections import defaultdict
import numpy as np
import openai
from openai import OpenAI
import gradio as gr
from dotenv import load_dotenv
import os
from fastapi import FastAPI, File, UploadFile, HTTPException
from fastapi.responses import JSONResponse
import os
from io import BytesIO
app = FastAPI()
sentiment_pipeline = pipeline("sentiment-analysis", model="distilbert-base-uncased-finetuned-sst-2-english")
load_dotenv()
openai.api_key = os.getenv("OPENAI_API_KEY")
client = OpenAI()
def analyze_sentiment(texts):
results = sentiment_pipeline(texts)
return results
def extract_text_from_pdf(pdf_path):
text = ""
with fitz.open(pdf_path) as doc:
for page in doc:
text += page.get_text()
return text
def extract_text_from_docx_path(docx_path):
doc = Document(docx_path)
fullText = []
for para in doc.paragraphs:
fullText.append(para.text)
return '\n'.join(fullText)
def generate_summary(text):
response = openai.chat.completions.create(
model="gpt-3.5-turbo",
messages=[
{"role": "system", "content": "You are a helpful assistant."},
{"role": "user", "content": f"Summarize this document: {text}"}
]
)
return response.choices[0].message.content.strip()
def extract_insight(text):
response = openai.chat.completions.create(
model="gpt-3.5-turbo",
messages=[
{"role": "system", "content": "you are a helpful assistant."},
{"role": "user", "content": f"Summarize this document: {text}"}
]
)
return response.choices[0].message.content.strip()
def process_document(file_path):
file_type = file_path.split('.')[-1].lower()
# file_path = file_info["path"]
# file_type = file_info["name"].split('.')[-1].lower()
if file_type == "pdf":
text = extract_text_from_pdf(file_path)
elif file_type == "docx":
text = extract_text_from_docx_path(file_path)
else:
return "unsupported file type"
MAX_lENGHT = 1500
text_parts = [text[i:i + MAX_lENGHT] for i in range(0, len(text), MAX_lENGHT)]
sentiment_result = [analyze_sentiment([part])[0] for part in text_parts]
summaries = [generate_summary(part) for part in text_parts]
insights = [extract_insight(part) for part in text_parts]
return sentiment_result[0], summaries[0], insights[0]
@app.post("/analyze-document/")
async def analyze_document(file: UploadFile = File(...)):
# Check file extension
file_extension = file.filename.split('.')[-1].lower()
if file_extension not in ["pdf", "docx"]:
raise HTTPException(status_code=400, detail="Unsupported file type. Please upload a PDF or DOCX file.")
# Read file content
file_content = await file.read()
file_path = f"temp.{file_extension}"
with open(file_path, "wb") as temp_file:
temp_file.write(file_content)
# Process the document
try:
sentiment_results, summary, insights = process_document(file_path)
finally:
os.remove(file_path) # Clean up the file
# Prepare the response
result = {
"Sentiment": sentiment_results['label'],
"Sentiment Score": float(sentiment_results['score']),
"Summary": summary,
"Insights": insights
}
return JSONResponse(content=result)
if __name__ == "__main__":
import uvicorn
uvicorn.run(app, host="0.0.0.0", port=8007,ssl_keyfile="privkey.pem", ssl_certfile="fullchain.pem")
Back to Directory
File Manager