Viewing File: /home/ubuntu/codegamaai-test/efimarket_bot/src/Chunk_store.py

from langchain.vectorstores import Qdrant
from langchain.embeddings import OpenAIEmbeddings
from langchain.text_splitter import SpacyTextSplitter
from langchain.text_splitter import MarkdownTextSplitter
from langchain.document_loaders import DirectoryLoader
import src.constants
import shutil
import os
from src.utils import *

model_name = 'text-embedding-ada-002'
embedding = OpenAIEmbeddings(
    model=model_name,
)

def method_1(user_id,bot_id,chunk_size,chunk_overlap):

    user_bot_data = os.path.join(os.environ['DB_DIR'], user_id, bot_id, 'data')
    user_bot_data_markdown_document = os.path.join(os.environ['DB_DIR'], user_id, bot_id, 'markdown_document')

    collection_id = str(user_id) + "_" + str(bot_id)
    
    loader = DirectoryLoader(user_bot_data)
    documents = loader.load()
    spacy_text_split = SpacyTextSplitter(chunk_size=chunk_size,chunk_overlap=chunk_overlap)
    texts = spacy_text_split.split_documents(documents)
    
    vectordb = Qdrant.from_documents(documents=texts, embedding=embedding,url=qdrant_url, collection_name=collection_id,force_recreate=True)
    # shutil.rmtree(user_bot_data)
    # shutil.rmtree(user_bot_data_markdown_document)

user_id = "efimarket"
bot_id = "web_data"
chunk_size= 1000
chunk_overlap= 200
fine_tune_model_id="87"

# method_1(user_id,bot_id,chunk_size,chunk_overlap)
Back to Directory File Manager