I am building my first chatbot trained on my database. I've stucked and can't move forward with the last part of my code :( My code looks as follows:
import osfrom llama_index import SimpleDirectoryReader, GPTListIndex, GPTVectorStoreIndex, LLMPredictor, PromptHelperfrom langchain import OpenAIfrom langchain.embeddings.openai import OpenAIEmbeddingsfrom langchain.text_splitter import CharacterTextSplitter, RecursiveCharacterTextSplitterfrom langchain.vectorstores import DocArrayInMemorySearch, FAISSfrom langchain.document_loaders import TextLoader, PyPDFLoader, DirectoryLoaderfrom langchain.chains import RetrievalQA, ConversationalRetrievalChainfrom langchain.memory import VectorStoreRetrieverMemoryfrom langchain.chat_models import ChatOpenAIimport gradio as gr os.environ["OPENAI_API_KEY"] = "key" #moj APIllm_name = "gpt-3.5-turbo"from llama_index import ServiceContext, StorageContext, load_index_from_storagedef create_index(path): max_input = 4096 num_output = 4096 tokens = 200 chunk_size = 500 # for LLM, we need to define chunk size # define prompt promptHelper = PromptHelper(max_input, num_output, chunk_overlap_ratio=0.1, chunk_size_limit=1024) # define LLM — there could be many models we can use, but in this example, let’s go with OpenAI model llmPredictor = LLMPredictor(llm=OpenAI(temperature=0, model_name="gpt-3.5-turbo", max_tokens=tokens)) # load data — it will take all the .txtx files, if there are more than 1 csv_docs = SimpleDirectoryReader("/Python_script/llama/llama").load_data() pdf_docs = SimpleDirectoryReader("/Python_script/llama/llama").load_data() docs = csv_docs + pdf_docs # create vector index service_context = ServiceContext.from_defaults(llm_predictor=llmPredictor, prompt_helper=promptHelper) vectorIndex = GPTVectorStoreIndex.from_documents(documents=docs, service_context=service_context) vectorIndex.storage_context.persist(persist_dir="store_test")import gradio as grfrom llama_index import GPTVectorStoreIndex, StorageContext, LLMPredictor, load_index_from_storage, SimpleDirectoryReaderstorage_context = StorageContext.from_defaults(persist_dir="/Users/renia/Praca_dyplomowa/Python_script/store_test")index = load_index_from_storage(storage_context)# Define the LLMPredictor with OpenAI modeltokens = 200predictor = LLMPredictor(llm=OpenAI(temperature=0, model_name="gpt-3.5-turbo", max_tokens=tokens))# Get the documents from the indexdocuments = SimpleDirectoryReader("/Python_script/store_test").load_data()# Create a new GPTVectorStoreIndex using the loaded documentsvector_index = GPTVectorStoreIndex.from_documents(documents=documents, predictor=predictor)# Load the previously created vector index from storage using the specified index ID (or default ID)vectorIndex = load_index_from_storage(storage_context)# Create OpenAIEmbeddingsembedding_size = 1536 # Dimensions of the OpenAIEmbeddingsembedding_fn = OpenAIEmbeddings().embed_query# Create DocArrayInMemorySearch retrieverdef to_vector_store(vector_store_index): return VectorStore(vector_store_index.documents, vector_store_index.embeddings)retriever = {"name": "DocArrayInMemorySearch","params": {"vector_index": vectorIndex,"embedding_fn": embedding_fn, },"vectorstore": to_vector_store(vectorIndex),}# Create VectorStoreRetrieverMemory with the retrievermemory = VectorStoreRetrieverMemory(retriever=retriever)# Define the chat functiondef chatbot_response(message, history): # Convert history to a list of (user_input, bot_response) tuples history_list = [] for user_input, bot_response in history: history_list.append((f"User: {user_input}", f"Bot: {bot_response}")) # Retrieve relevant memories based on previous conversations relevant_memories = memory.predict_new_summary(messages=history_list, previous_summary="") # Concatenate all relevant memories into a single string relevant_memories_str = "\n".join(relevant_memories) # Get the bot's response using the vector index, considering both the message and relevant memories bot_response = vectorIndex.query(relevant_memories_str + f"\nUser: {message}\n") # Only take the last part of the response which is the bot's response bot_response = bot_response.split("Bot:")[-1].strip() return bot_response# Create the Gradio chat interfacegr.ChatInterface(chatbot_response, title="Iron Ladies Chatbot", description="Ask the Iron Ladies chatbot any question!", theme="dark", examples=[ ["Tell me about Iron Ladies.", "The Iron Ladies is a team of powerful female superheroes :)"], ["What are their powers?", "Each member of the Iron Ladies has unique superpowers."], ["Who is the leader?", "The leader of the Iron Ladies is Maja - natural born leader :D."], ])
Once I execute the code I get the following error:
---------------------------------------------------------------------------AttributeError Traceback (most recent call last)Cell In[32], line 37 27 def to_vector_store(vector_store_index): 28 return VectorStore(vector_store_index.documents, vector_store_index.embeddings) 31 retriever = { 32 "name": "DocArrayInMemorySearch", 33 "params": { 34 "vector_index": vectorIndex, 35 "embedding_fn": embedding_fn, 36 },---> 37 "vectorstore": to_vector_store(vectorIndex), 38 } 40 # Create VectorStoreRetrieverMemory with the retriever 41 memory = VectorStoreRetrieverMemory(retriever=retriever)Cell In[32], line 28, in to_vector_store(vector_store_index) 27 def to_vector_store(vector_store_index):---> 28 return VectorStore(vector_store_index.documents, vector_store_index.embeddings)AttributeError: 'VectorStoreIndex' object has no attribute 'documents'
Is there anyone who have a clue how I can move forward and load the vectorIndex and give my chatbot memory?
I've tried to update my code but I'm unable to find solution. Each time I make any amendments, I get new errors relating to vectorIndex. I was trying to get some help from GPT or Bard, but they are not that smart ;)