Quantcast
Channel: Recent Questions - Stack Overflow
Viewing all articles
Browse latest Browse all 12111

WARNING:langchain_community.vectorstores.pinecone: Found document with no `text` key. Skipping. No matched documents

$
0
0

I am trying to create a chat app through which i can query pdf documents. The code for upserting document in pinecone is as follows:

def handler(event, context):dotenv.load_dotenv()s3 = boto3.client("s3")   client = OpenAI(api_key=os.environ["OPENAI_API_KEY"]) pinecone.init(api_key=os.environ["PINECONE_API_KEY"], environment=os.environ["PINECONE_ENV"])index_name = "golf-knowledge-base"bucket_name = 'aidata-pdf-files'logger.info(event)for record in event["Records"]:    s3_key = unquote_plus(record["s3"]["object"]["key"])     logger.info("Extracting text:"+ s3_key)        object = s3.get_object(Bucket=bucket_name,Key=s3_key)     fileObject = object["Body"].read()    reader = PdfReader(io.BytesIO(fileObject))    pages  = []    for page in reader.pages:        pages.append(               Document(                   page_content=page.extract_text(),                   metadata={'source':s3_key,'page': reader.get_page_number(page)                       }                   )                 )    document_splitter = RecursiveCharacterTextSplitter(chunk_size=1000,chunk_overlap=20)    documents = document_splitter.split_documents(pages)    # test pinecone index     logger.info("checking pinecone index")    pinecone.whoami()    # create index if it does not exists    if index_name not in pinecone.list_indexes():        logger.info("Creating Pinecone Index : "+index_name)        pinecone.create_index(name=index_name,dimension=1536, metric="cosine")    # connect to index    index = pinecone.Index(index_name)    logger.info(index.describe_index_stats())    logger.info("creating embeddings")    vectors = []    for doc in documents:        embeds = get_embedding(client, text=doc.page_content)        # Generate consistant keys to avoid duplicate indigest         id = doc.metadata["source"].split(".")[:-1][0].replace(" ","-").lower() +"-" + str(doc.metadata["page"])        vector = {'id': id , 'values': embeds, 'metadata' : doc.metadata}        vectors.append(vector)    # Store embeddings in pinecone    response = index.upsert(vectors)    print(response)return {"status":200, "message":"stored to db"} def get_embedding(client, text, model="text-embedding-ada-002"):text = text.replace("\n", " ")return client.embeddings.create(input = [text], model=model).data[0].embedding

And this is how i am querying document.

def pinecone_query(query) :pc = Pinecone(api_key="Key")index = pc.Index("name")OPENAI_API_KEY="Key"client = OpenAI(api_key=OPENAI_API_KEY) # print(pc.whoami())print(pc.list_indexes())embeddings = OpenAIEmbeddings(openai_api_key=OPENAI_API_KEY)vectorstore = vs_Pinecone(index, embeddings.embed_query, "text")    docs = vectorstore.similarity_search(query)print('docs len', len(docs)) # this returns 0for doc in docs:    print(f"{doc.page_content}\n")chat_model_name = "gpt-3.5-turbo"llm = ChatOpenAI(model_name=chat_model_name, openai_api_key=OPENAI_API_KEY, temperature=0.0 )chain = load_qa_chain(llm, chain_type="stuff")res = chain.run(input_documents=docs, question=query)return res

It gives me the answer. But strangely returned matched documents are empty.

And i get a warning

WARNING:langchain_community.vectorstores.pinecone: Found documentwith no text key. Skipping.


Viewing all articles
Browse latest Browse all 12111

Trending Articles



<script src="https://jsc.adskeeper.com/r/s/rssing.com.1596347.js" async> </script>