Viewing File: /home/ubuntu/codegamaai-test/efimarket_bot/src/Chunk_store.py
from langchain.vectorstores import Qdrant
from langchain.embeddings import OpenAIEmbeddings
from langchain.text_splitter import SpacyTextSplitter
from langchain.text_splitter import MarkdownTextSplitter
from langchain.document_loaders import DirectoryLoader
import src.constants
import shutil
import os
from src.utils import *
model_name = 'text-embedding-ada-002'
embedding = OpenAIEmbeddings(
model=model_name,
)
def method_1(user_id,bot_id,chunk_size,chunk_overlap):
user_bot_data = os.path.join(os.environ['DB_DIR'], user_id, bot_id, 'data')
user_bot_data_markdown_document = os.path.join(os.environ['DB_DIR'], user_id, bot_id, 'markdown_document')
collection_id = str(user_id) + "_" + str(bot_id)
loader = DirectoryLoader(user_bot_data)
documents = loader.load()
spacy_text_split = SpacyTextSplitter(chunk_size=chunk_size,chunk_overlap=chunk_overlap)
texts = spacy_text_split.split_documents(documents)
vectordb = Qdrant.from_documents(documents=texts, embedding=embedding,url=qdrant_url, collection_name=collection_id,force_recreate=True)
# shutil.rmtree(user_bot_data)
# shutil.rmtree(user_bot_data_markdown_document)
user_id = "efimarket"
bot_id = "web_data"
chunk_size= 1000
chunk_overlap= 200
fine_tune_model_id="87"
# method_1(user_id,bot_id,chunk_size,chunk_overlap)
Back to Directory
File Manager