代码拉取完成,页面将自动刷新
from langchain.output_parsers.openai_tools import JsonOutputToolsParser
from langchain_community.chat_models import ChatOpenAI
from langchain_core.prompts import ChatPromptTemplate
from langchain_core.runnables import RunnableLambda
from langchain.chains import create_extraction_chain
from typing import Optional, List
from langchain.chains import create_extraction_chain_pydantic
from langchain_core.pydantic_v1 import BaseModel
from langchain import hub
import os
from dataloader import load_high
from agentic_chunker import AgenticChunker
# Pydantic data class
class Sentences(BaseModel):
sentences: List[str]
def get_propositions(text, runnable, extraction_chain):
runnable_output = runnable.invoke({
"input": text
}).content
propositions = extraction_chain.run(runnable_output)[0].sentences
return propositions
def run_chunk(essay):
obj = hub.pull("wfh/proposal-indexing")
llm = ChatOpenAI(model='gpt-4-1106-preview', openai_api_key = os.getenv("OPENAI_API_KEY"))
runnable = obj | llm
# Extraction
extraction_chain = create_extraction_chain_pydantic(pydantic_schema=Sentences, llm=llm)
paragraphs = essay.split("\n\n")
essay_propositions = []
for i, para in enumerate(paragraphs):
propositions = get_propositions(para, runnable, extraction_chain)
essay_propositions.extend(propositions)
print (f"Done with {i}")
ac = AgenticChunker()
ac.add_propositions(essay_propositions)
ac.pretty_print_chunks()
chunks = ac.get_chunks(get_type='list_of_strings')
return chunks
print(chunks)
此处可能存在不合适展示的内容,页面不予展示。您可通过相关编辑功能自查并修改。
如您确认内容无涉及 不当用语 / 纯广告导流 / 暴力 / 低俗色情 / 侵权 / 盗版 / 虚假 / 无价值内容或违法国家有关法律法规的内容,可点击提交进行申诉,我们将尽快为您处理。