Viewing File: /home/ubuntu/combine_ai/sentiment_analysis/main.py

import fitz
from docx import Document
from transformers import pipeline
from collections import defaultdict
import numpy as np
import openai
from openai import OpenAI
import gradio as gr
from dotenv import load_dotenv
import os
from fastapi import FastAPI, File, UploadFile, HTTPException
from fastapi.responses import JSONResponse
import os
from io import BytesIO

app = FastAPI()

sentiment_pipeline = pipeline("sentiment-analysis", model="distilbert-base-uncased-finetuned-sst-2-english")
load_dotenv()

openai.api_key = os.getenv("OPENAI_API_KEY")

client = OpenAI()


def analyze_sentiment(texts):
    results = sentiment_pipeline(texts)
    return results


def extract_text_from_pdf(pdf_path):
    text = ""
    with fitz.open(pdf_path) as doc:
        for page in doc:
            text += page.get_text()
    return text


def extract_text_from_docx_path(docx_path):
    doc = Document(docx_path)
    fullText = []
    for para in doc.paragraphs:
        fullText.append(para.text)
    return '\n'.join(fullText)


def generate_summary(text):
    response = openai.chat.completions.create(
        model="gpt-3.5-turbo",
        messages=[
            {"role": "system", "content": "You are a helpful assistant."},
            {"role": "user", "content": f"Summarize this document: {text}"}
        ]
    )
    return response.choices[0].message.content.strip()


def extract_insight(text):
    response = openai.chat.completions.create(
        model="gpt-3.5-turbo",
        messages=[
            {"role": "system", "content": "you are a helpful assistant."},
            {"role": "user", "content": f"Summarize this document: {text}"}
        ]

    )
    return response.choices[0].message.content.strip()


def process_document(file_path):
    file_type = file_path.split('.')[-1].lower()
    # file_path = file_info["path"]
    # file_type = file_info["name"].split('.')[-1].lower()

    if file_type == "pdf":
        text = extract_text_from_pdf(file_path)
    elif file_type == "docx":
        text = extract_text_from_docx_path(file_path)
    else:
        return "unsupported file type"

    MAX_lENGHT = 1500
    text_parts = [text[i:i + MAX_lENGHT] for i in range(0, len(text), MAX_lENGHT)]
    sentiment_result = [analyze_sentiment([part])[0] for part in text_parts]
    summaries = [generate_summary(part) for part in text_parts]
    insights = [extract_insight(part) for part in text_parts]

    return sentiment_result[0], summaries[0], insights[0]


@app.post("/analyze-document/")
async def analyze_document(file: UploadFile = File(...)):
    # Check file extension
    file_extension = file.filename.split('.')[-1].lower()
    if file_extension not in ["pdf", "docx"]:
        raise HTTPException(status_code=400, detail="Unsupported file type. Please upload a PDF or DOCX file.")

    # Read file content
    file_content = await file.read()
    file_path = f"temp.{file_extension}"
    with open(file_path, "wb") as temp_file:
        temp_file.write(file_content)

    # Process the document
    try:
        sentiment_results, summary, insights = process_document(file_path)
    finally:
        os.remove(file_path)  # Clean up the file

    # Prepare the response
    result = {
        "Sentiment": sentiment_results['label'],
        "Sentiment Score": float(sentiment_results['score']),
        "Summary": summary,
        "Insights": insights
    }

    return JSONResponse(content=result)

if __name__ == "__main__":
    import uvicorn
    uvicorn.run(app, host="0.0.0.0", port=8007,ssl_keyfile="privkey.pem", ssl_certfile="fullchain.pem")
Back to Directory File Manager