File Manager

Viewing File: /home/ubuntu/combine_ai/sentiment_analysis/main_V2.py

import fitz
from docx import Document
from transformers import pipeline
from collections import defaultdict
import numpy as np
import openai
from openai import OpenAI
import gradio as gr
from dotenv import load_dotenv
import os
from fastapi import FastAPI, File, UploadFile, HTTPException
from fastapi.responses import JSONResponse
import os
from io import BytesIO

app = FastAPI()

# sentiment_pipeline = pipeline("sentiment-analysis")
load_dotenv()

openai.api_key = os.getenv("OPENAI_API_KEY")

client = OpenAI()


def analyze_sentiment(text):
    response = openai.chat.completions.create(
        model="gpt-3.5-turbo",
        messages=[
            {"role": "system", "content": "You are a helpful assistant which provide sentiment score based on the content like BERT model like eg Sentiment: NEGATIVE, Sentiment Score: 0.9917946457862854"},
            {"role": "user", "content": f"Provide sentiment score this document: {text}"}
        ]
    )
    print(response)
    response_content = response.choices[0].message.content
    parts = response_content.split(', ')
    sentiment_label = parts[0].split(': ')[1]  # 'Neutral'
    sentiment_score = float(parts[1].split(': ')[1])  # 0.0
    sentiment = {'label': sentiment_label, 'score': sentiment_score}
    print(sentiment)
    return sentiment


def extract_text_from_pdf(pdf_path):
    text = ""
    with fitz.open(pdf_path) as doc:
        for page in doc:
            text += page.get_text()
    return text


def extract_text_from_docx_path(docx_path):
    doc = Document(docx_path)
    fullText = []
    for para in doc.paragraphs:
        fullText.append(para.text)
    return '\n'.join(fullText)


def generate_summary(text):
    response = openai.chat.completions.create(
        model="gpt-3.5-turbo",
        messages=[
            {"role": "system", "content": "You are a helpful assistant."},
            {"role": "user", "content": f"Summarize this document in less than 200 word: {text}"}
        ]
    )
    return response.choices[0].message.content.strip()


def extract_insight(text):
    response = openai.chat.completions.create(
        model="gpt-3.5-turbo",
        messages=[
            {"role": "system", "content": "you are a helpful assistant."},
            {"role": "user", "content": f"extract insights from this document: {text}"}
        ]

    )
    return response.choices[0].message.content.strip()


def process_document(file_path):
    file_type = file_path.split('.')[-1].lower()

    if file_type == "pdf":
        text = extract_text_from_pdf(file_path)
    elif file_type == "docx":
        text = extract_text_from_docx_path(file_path)
    else:
        return "Unsupported file type"

    MAX_LENGTH = 1500  # Typo corrected: was MAX_lENGHT
    if len(text) > MAX_LENGTH:
        # If text exceeds the MAX_LENGTH, we process it in parts
        parts = [text[i:i + MAX_LENGTH] for i in range(0, len(text), MAX_LENGTH)]
        summaries = [generate_summary(part) for part in parts]
        insights = [extract_insight(part) for part in parts]
        # Aggregate or choose how to handle multiple summaries and insights
        summary = " ".join(summaries)  # Simple aggregation example
        insight = " ".join(insights)  # Simple aggregation example
    else:
        # Process the entire text at once if it doesn't exceed MAX_LENGTH
        summary = generate_summary(text)
        insight = extract_insight(text)

    sentiment_result = analyze_sentiment(text)
    return sentiment_result, summary, insight


@app.post("/analyze-document/")
async def analyze_document(file: UploadFile = File(...)):
    file_extension = file.filename.split('.')[-1].lower()
    if file_extension not in ["pdf", "docx"]:
        raise HTTPException(status_code=400, detail="Unsupported file type. Please upload a PDF or DOCX file.")

    file_content = await file.read()
    file_path = f"temp.{file_extension}"
    with open(file_path, "wb") as temp_file:
        temp_file.write(file_content)

    try:
        sentiment_result, summary, insights = process_document(file_path)
    finally:
        os.remove(file_path)  # Clean up the file

    result = {
        "Sentiment": sentiment_result['label'],
        "Sentiment Score": sentiment_result['score'],
        "Summary": summary,
        "Insights": insights
    }
    return JSONResponse(content=result)

if __name__ == "__main__":
    import uvicorn
    uvicorn.run(app, host="0.0.0.0", port=8007,ssl_keyfile="privkey.pem", ssl_certfile="fullchain.pem")
Back to Directory File Manager