1 Star 0 Fork 0

非洲挖井队之猎天/Medical-Graph-RAG

加入 Gitee
与超过 1200万 开发者一起发现、参与优秀开源项目,私有仓库也完全免费 :)
免费加入
文件
克隆/下载
data_chunk.py 1.58 KB
一键复制 编辑 原始数据 按行查看 历史
WuJunde 提交于 2024-08-21 08:46 +08:00 . update
from langchain.output_parsers.openai_tools import JsonOutputToolsParser
from langchain_community.chat_models import ChatOpenAI
from langchain_core.prompts import ChatPromptTemplate
from langchain_core.runnables import RunnableLambda
from langchain.chains import create_extraction_chain
from typing import Optional, List
from langchain.chains import create_extraction_chain_pydantic
from langchain_core.pydantic_v1 import BaseModel
from langchain import hub
import os
from dataloader import load_high
from agentic_chunker import AgenticChunker
# Pydantic data class
class Sentences(BaseModel):
sentences: List[str]
def get_propositions(text, runnable, extraction_chain):
runnable_output = runnable.invoke({
"input": text
}).content
propositions = extraction_chain.run(runnable_output)[0].sentences
return propositions
def run_chunk(essay):
obj = hub.pull("wfh/proposal-indexing")
llm = ChatOpenAI(model='gpt-4-1106-preview', openai_api_key = os.getenv("OPENAI_API_KEY"))
runnable = obj | llm
# Extraction
extraction_chain = create_extraction_chain_pydantic(pydantic_schema=Sentences, llm=llm)
paragraphs = essay.split("\n\n")
essay_propositions = []
for i, para in enumerate(paragraphs):
propositions = get_propositions(para, runnable, extraction_chain)
essay_propositions.extend(propositions)
print (f"Done with {i}")
ac = AgenticChunker()
ac.add_propositions(essay_propositions)
ac.pretty_print_chunks()
chunks = ac.get_chunks(get_type='list_of_strings')
return chunks
print(chunks)
Loading...
马建仓 AI 助手
尝试更多
代码解读
代码找茬
代码优化
Python
1
https://gitee.com/rengm/Medical-Graph-RAG.git
git@gitee.com:rengm/Medical-Graph-RAG.git
rengm
Medical-Graph-RAG
Medical-Graph-RAG
main

搜索帮助