-
Notifications
You must be signed in to change notification settings - Fork 0
Expand file tree
/
Copy pathmain.py
More file actions
61 lines (49 loc) · 2 KB
/
Copy pathmain.py
File metadata and controls
61 lines (49 loc) · 2 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
from langchain_community.document_loaders import PyPDFLoader
from langchain.text_splitter import RecursiveCharacterTextSplitter
from langchain_pinecone import PineconeVectorStore
from langchain_openai import OpenAIEmbeddings, ChatOpenAI
from langchain.chains import RetrievalQA
from pinecone import Pinecone, ServerlessSpec
import streamlit as st
import time
import NO_COPY_keys_file
OPENAI_API_KEY = NO_COPY_keys_file.opn
PINECONE_API_KEY = NO_COPY_keys_file.pnc
# Document Intelligence and Compliance Bot
def create_docbot(pdf):
# Load and split documents
loader = PyPDFLoader(pdf)
pages = loader.load_and_split()
# Embed and store in vector DB
text_splitter = RecursiveCharacterTextSplitter(chunk_size=500, chunk_overlap=50)
docs = text_splitter.split_documents(pages)
embeddings = OpenAIEmbeddings(openai_api_key=OPENAI_API_KEY)
pc = Pinecone(api_key=PINECONE_API_KEY)
index_name = 'desired-index'
existing_indices = [index_info["name"] for index_info in pc.list_indexes()]
if index_name not in existing_indices:
pc.create_index(
name=index_name,
dimension=1536,
metric="cosine",
spec=ServerlessSpec(cloud="aws", region="us-east-1"),
deletion_protection="enabled",
)
while not pc.describe_index(index_name).status["ready"]:
time.sleep(1)
index = pc.Index(index_name)
vectorstore = PineconeVectorStore(index=index, embedding=embeddings)
vectorstore.add_documents(docs)
# Create QA chain
qa_chain = RetrievalQA.from_chain_type(llm=ChatOpenAI(openai_api_key=OPENAI_API_KEY),
retriever=vectorstore.as_retriever())
# Ask a question
return qa_chain
if __name__ == '__main__':
docbot = create_docbot("SampleDocument.pdf")
name = st.text_input("Name")
if not name:
st.warning('Please input a question.')
st.stop()
st.success("Question received.")
st.write(docbot.invoke(name)['result'])