first commit
This commit is contained in:
547
AIEC-RAG/retriver/langsmith/langsmith_example.py
Normal file
547
AIEC-RAG/retriver/langsmith/langsmith_example.py
Normal file
@ -0,0 +1,547 @@
|
||||
"""
|
||||
LangSmith监控使用示例 - 交互式版本
|
||||
手动输入查询问题,执行完整检索流程并监控
|
||||
"""
|
||||
|
||||
import os
|
||||
import sys
|
||||
import json
|
||||
import pickle
|
||||
from typing import Dict, Any, List
|
||||
from datetime import datetime
|
||||
|
||||
# 添加路径
|
||||
project_root = os.path.join(os.path.dirname(__file__), '..', '..')
|
||||
sys.path.append(project_root)
|
||||
|
||||
from retriver.langsmith.langsmith_retriever import create_langsmith_retriever, check_langsmith_connection
|
||||
|
||||
|
||||
def load_graph(pkl_path: str):
|
||||
"""加载图数据"""
|
||||
try:
|
||||
with open(pkl_path, 'rb') as f:
|
||||
graph = pickle.load(f)
|
||||
return graph
|
||||
except Exception as e:
|
||||
print(f"[WARNING] 加载图数据失败: {e}")
|
||||
return None
|
||||
|
||||
|
||||
def extract_evidences_path(passage_ids: List[str], pkl_path: str = "test_with_concept.pkl") -> List[List[List[str]]]:
|
||||
"""
|
||||
根据段落ID提取相关的三元组
|
||||
提取段落连接的实体和事件之间的关系三元组,而不是段落与实体的直接关系
|
||||
|
||||
Args:
|
||||
passage_ids: 段落ID列表
|
||||
pkl_path: 图数据文件路径
|
||||
|
||||
Returns:
|
||||
每个段落对应的三元组列表,格式: [[[头实体, 关系, 尾实体], ...], ...]
|
||||
"""
|
||||
graph = load_graph(pkl_path)
|
||||
if graph is None:
|
||||
return []
|
||||
|
||||
all_evidences = []
|
||||
|
||||
for passage_id in passage_ids:
|
||||
passage_evidences = []
|
||||
|
||||
# 在图中查找对应的文本节点
|
||||
text_node_id = None
|
||||
|
||||
# 方法1: 直接使用passage_id作为节点ID
|
||||
if passage_id in graph.nodes:
|
||||
text_node_id = passage_id
|
||||
else:
|
||||
# 方法2: 在所有文本节点中查找匹配的ID
|
||||
for node_id, node_data in graph.nodes(data=True):
|
||||
if isinstance(node_data, dict):
|
||||
node_type = node_data.get('type', '').lower()
|
||||
if 'text' in node_type:
|
||||
# 检查各种可能的ID字段
|
||||
if (node_data.get('id') == passage_id or
|
||||
node_data.get('name') == passage_id or
|
||||
node_id == passage_id):
|
||||
text_node_id = node_id
|
||||
break
|
||||
|
||||
if text_node_id is None:
|
||||
print(f"[WARNING] 未找到段落ID对应的文本节点: {passage_id}")
|
||||
passage_evidences = []
|
||||
else:
|
||||
# 步骤1: 获取该文本节点连接的所有entity和event节点
|
||||
connected_entities_events = []
|
||||
neighbors = list(graph.neighbors(text_node_id))
|
||||
|
||||
for neighbor_id in neighbors:
|
||||
neighbor_data = graph.nodes.get(neighbor_id, {})
|
||||
if isinstance(neighbor_data, dict):
|
||||
neighbor_type = neighbor_data.get('type', '').lower()
|
||||
if 'entity' in neighbor_type or 'event' in neighbor_type:
|
||||
connected_entities_events.append(neighbor_id)
|
||||
|
||||
print(f"[OK] 段落 {passage_id[:20]}... 连接了 {len(connected_entities_events)} 个实体/事件节点")
|
||||
|
||||
# 步骤2: 在这些实体和事件节点之间寻找连接关系
|
||||
seen_triplets = set() # 用于去重
|
||||
|
||||
for i, entity1_id in enumerate(connected_entities_events):
|
||||
entity1_data = graph.nodes.get(entity1_id, {})
|
||||
entity1_name = entity1_data.get('name', entity1_data.get('id', str(entity1_id)))
|
||||
entity1_type = entity1_data.get('type', '').lower()
|
||||
|
||||
# 检查entity1与其他实体/事件的连接
|
||||
for j, entity2_id in enumerate(connected_entities_events):
|
||||
if i >= j: # 避免重复检查和自连接
|
||||
continue
|
||||
|
||||
entity2_data = graph.nodes.get(entity2_id, {})
|
||||
entity2_name = entity2_data.get('name', entity2_data.get('id', str(entity2_id)))
|
||||
entity2_type = entity2_data.get('type', '').lower()
|
||||
|
||||
# 检查两个节点之间是否有边连接
|
||||
edge_data = graph.get_edge_data(entity1_id, entity2_id)
|
||||
reverse_edge_data = graph.get_edge_data(entity2_id, entity1_id)
|
||||
|
||||
if edge_data:
|
||||
# entity1 -> entity2
|
||||
relation = "连接" # 默认关系
|
||||
if isinstance(edge_data, dict):
|
||||
relation = edge_data.get('relation', edge_data.get('label', '连接'))
|
||||
|
||||
triplet = [entity1_name, relation, entity2_name]
|
||||
triplet_key = (entity1_name, relation, entity2_name)
|
||||
|
||||
if triplet_key not in seen_triplets:
|
||||
seen_triplets.add(triplet_key)
|
||||
passage_evidences.append(triplet)
|
||||
|
||||
if reverse_edge_data and edge_data != reverse_edge_data:
|
||||
# entity2 -> entity1 (如果与正向边不同)
|
||||
relation = "连接" # 默认关系
|
||||
if isinstance(reverse_edge_data, dict):
|
||||
relation = reverse_edge_data.get('relation', reverse_edge_data.get('label', '连接'))
|
||||
|
||||
triplet = [entity2_name, relation, entity1_name]
|
||||
triplet_key = (entity2_name, relation, entity1_name)
|
||||
|
||||
if triplet_key not in seen_triplets:
|
||||
seen_triplets.add(triplet_key)
|
||||
passage_evidences.append(triplet)
|
||||
|
||||
print(f" 提取到 {len(passage_evidences)} 个实体间三元组")
|
||||
|
||||
all_evidences.append(passage_evidences)
|
||||
|
||||
return all_evidences
|
||||
|
||||
|
||||
def main():
|
||||
"""主函数 - 硬编码查询测试和监控"""
|
||||
|
||||
# ================================
|
||||
# 在这里修改您要测试的问题和调试模式
|
||||
# ================================
|
||||
query = "混沌工程的定义是什么?DataOps是什么?" # 简化查询便于快速测试
|
||||
debug_mode = "complex" # 可选值: "0"(自动判断), "simple"(强制简单路径), "complex"(强制复杂路径)
|
||||
|
||||
print("[STARTING] LangSmith监控检索系统 (基于阿里云DashScope)")
|
||||
print("="*50)
|
||||
print("[TIP] 本系统将自动:")
|
||||
print(" • 分析查询复杂度 (阿里云通义千问)")
|
||||
print(" • 选择最优检索路径")
|
||||
print(" • 执行混合检索 (事件节点+段落节点)")
|
||||
print(" • 迭代推理检索和充分性检查")
|
||||
print(" • 多轮子查询生成和检索")
|
||||
print(" • 提供详细的执行监控")
|
||||
print(" • 在LangSmith中记录完整过程")
|
||||
|
||||
# 检查LangSmith连接
|
||||
print("\n[INFO] 检查LangSmith连接...")
|
||||
langsmith_ok = check_langsmith_connection()
|
||||
if not langsmith_ok:
|
||||
print("[WARNING] LangSmith连接失败,但检索器仍可正常工作")
|
||||
else:
|
||||
print("[OK] LangSmith连接正常")
|
||||
|
||||
print(f"\n[INFO] 测试查询: {query}")
|
||||
print(f"[BUG] 调试模式: {debug_mode}")
|
||||
print(f" {'自动复杂度判断' if debug_mode == '0' else '强制简单路径' if debug_mode == 'simple' else '强制复杂路径' if debug_mode == 'complex' else '未知模式'}")
|
||||
|
||||
try:
|
||||
# 生成带时间戳的项目名称
|
||||
timestamp = datetime.now().strftime("%Y%m%d_%H%M%S")
|
||||
project_name = f"{timestamp}-{query}"
|
||||
print(f"\n[PACKAGE] 创建新的LangSmith项目: {project_name}")
|
||||
|
||||
# 创建带LangSmith监控的检索器
|
||||
print("[CONFIG] 初始化检索器...")
|
||||
retriever = create_langsmith_retriever(
|
||||
keyword="test",
|
||||
top_k=13, # 匹配新的检索器设置(10事件+3段落)
|
||||
max_iterations=1, # 减少迭代次数
|
||||
max_parallel_retrievals=1, # 减少并行数
|
||||
langsmith_project=project_name
|
||||
)
|
||||
print("[OK] 检索器创建成功")
|
||||
|
||||
# 执行检索(完整流程追踪)
|
||||
print(f"\n[TARGET] 开始检索...")
|
||||
print("[TIP] 完整过程将在LangSmith中追踪,包括:")
|
||||
print(" • 查询复杂度判断")
|
||||
print(" • 路径选择 (简单向量检索 vs 复杂推理检索)")
|
||||
print(" • 混合检索 (TOP-10事件节点 + TOP-3段落节点)")
|
||||
print(" • 迭代检索和充分性检查")
|
||||
print(" • 子查询生成和并行检索")
|
||||
print(" • 最终答案生成过程")
|
||||
|
||||
result = retriever.retrieve(query, debug_mode)
|
||||
|
||||
# 输出详细结果
|
||||
print("\n" + "="*60)
|
||||
print("[INFO] 检索完成 - 详细结果")
|
||||
print("="*60)
|
||||
|
||||
# 1. 查询路径分析
|
||||
complexity = result.get('query_complexity', {})
|
||||
debug_override = complexity.get('debug_override', {})
|
||||
|
||||
print(f"[?] 查询路径分析:")
|
||||
print(f" 查询复杂度: {complexity.get('complexity_level', 'unknown').upper()}")
|
||||
print(f" 置信度: {complexity.get('confidence', 0):.2f}")
|
||||
print(f" 执行路径: {result.get('retrieval_path', 'unknown')}")
|
||||
print(f" 判断依据: {complexity.get('reason', '')[:120]}{'...' if len(complexity.get('reason', '')) > 120 else ''}")
|
||||
|
||||
if debug_override:
|
||||
print(f" [BUG] 调试覆盖: {debug_override.get('original_complexity')} → {result.get('is_complex_query')}")
|
||||
|
||||
# 2. 执行流程概览
|
||||
debug_info = result.get('debug_info', {})
|
||||
iterations = result.get('iterations', 0)
|
||||
is_sufficient = result.get('is_sufficient', False)
|
||||
|
||||
print(f"\n[CHART] 执行流程概览:")
|
||||
print(f" 总耗时: {debug_info.get('total_time', 0):.2f}秒")
|
||||
print(f" 迭代次数: {iterations}")
|
||||
print(f" LLM调用: {debug_info.get('llm_calls', 0)}次")
|
||||
print(f" 检索内容: {result.get('total_passages', 0)}个 (事件+段落)")
|
||||
print(f" 最终状态: {'[OK] 信息充分' if is_sufficient else '[WARNING] 信息不充分但已达上限'}")
|
||||
|
||||
# 3. 查询分解与子查询生成
|
||||
decomposed_queries = result.get('decomposed_sub_queries', [])
|
||||
all_sub_queries = result.get('sub_queries', [])
|
||||
|
||||
if decomposed_queries or all_sub_queries:
|
||||
print(f"\n[TARGET] 查询分解与子查询:")
|
||||
if decomposed_queries:
|
||||
print(f" [INFO] 初始分解 ({len(decomposed_queries)}个):")
|
||||
for i, sub_q in enumerate(decomposed_queries, 1):
|
||||
print(f" {i}. {sub_q}")
|
||||
|
||||
additional_queries = [q for q in all_sub_queries if q not in decomposed_queries]
|
||||
if additional_queries:
|
||||
print(f" [RELOAD] 迭代生成 ({len(additional_queries)}个):")
|
||||
for i, sub_q in enumerate(additional_queries, 1):
|
||||
print(f" {i}. {sub_q}")
|
||||
|
||||
# 4. 充分性检查演进
|
||||
sufficiency_analysis = debug_info.get('sufficiency_analysis', {})
|
||||
sufficiency_history = sufficiency_analysis.get('iteration_sufficiency_history', [])
|
||||
final_sufficiency = result.get('sufficiency_check', {})
|
||||
|
||||
if sufficiency_history or final_sufficiency:
|
||||
print(f"\n[THINK] 充分性检查演进:")
|
||||
if sufficiency_history:
|
||||
for hist in sufficiency_history:
|
||||
iter_num = hist.get('iteration', 0)
|
||||
is_suff = hist.get('is_sufficient', False)
|
||||
conf = hist.get('confidence', 0)
|
||||
status = '[OK] 充分' if is_suff else '[ERROR] 不充分'
|
||||
print(f" 迭代{iter_num}: {status} (置信度: {conf:.2f})")
|
||||
|
||||
if final_sufficiency:
|
||||
final_reason = final_sufficiency.get('reason', '')
|
||||
print(f" 最终原因: {final_reason[:150]}{'...' if len(final_reason) > 150 else ''}")
|
||||
|
||||
# 5. Token使用统计
|
||||
token_info = debug_info.get('token_usage_summary', {})
|
||||
if token_info and not token_info.get('error'):
|
||||
total_usage = token_info.get('total_usage', {})
|
||||
print(f"\n[NUMBER] 资源使用统计:")
|
||||
print(f" 模型: {token_info.get('model_name', 'unknown')}")
|
||||
print(f" 输入Token: {total_usage.get('prompt_tokens', 0):,}")
|
||||
print(f" 输出Token: {total_usage.get('completion_tokens', 0):,}")
|
||||
print(f" 总计Token: {total_usage.get('total_tokens', 0):,}")
|
||||
print(f" 调用次数: {total_usage.get('call_count', 0)}次")
|
||||
|
||||
# 6. 检索状态摘要
|
||||
state_summary = result.get('state_summary', {})
|
||||
if state_summary:
|
||||
print(f"\n[FILE] 检索状态摘要:")
|
||||
retrieval_path = state_summary.get('retrieval_path', '')
|
||||
final_state = state_summary.get('final_state', '')
|
||||
total_queries_processed = state_summary.get('total_queries_processed', 0)
|
||||
|
||||
if retrieval_path:
|
||||
print(f" 执行路径: {retrieval_path}")
|
||||
if final_state:
|
||||
print(f" 最终状态: {final_state}")
|
||||
if total_queries_processed > 0:
|
||||
print(f" 处理查询数: {total_queries_processed}")
|
||||
|
||||
# 显示迭代历史
|
||||
iteration_history = result.get('iteration_history', [])
|
||||
if iteration_history:
|
||||
print(f" 迭代历史:")
|
||||
for hist in iteration_history[-3:]: # 只显示最后3次
|
||||
iter_num = hist.get('iteration', 0)
|
||||
action = hist.get('action', 'unknown')
|
||||
print(f" 迭代{iter_num}: {action}")
|
||||
|
||||
# 7. 最终答案
|
||||
print(f"\n[NOTE] 最终答案:")
|
||||
print("─" * 60)
|
||||
answer = result.get('answer', '未能生成答案')
|
||||
print(answer)
|
||||
print("─" * 60)
|
||||
|
||||
# 8. 检索结果统计(在路由分析前显示)
|
||||
print(f"\n[INFO] 检索结果统计:")
|
||||
|
||||
# 从轻量级结果获取检索内容统计
|
||||
total_passages = result.get('total_passages', 0)
|
||||
print(f" 实际检索到的内容数: {total_passages} (事件节点+段落节点)")
|
||||
|
||||
if total_passages > 0:
|
||||
print(f" 检索方式: LangSmith轻量级统计")
|
||||
print(f" 内容组成: TOP-10事件节点 + TOP-3段落节点")
|
||||
print(f" 详细内容已保存到本地完整文件")
|
||||
print(f" [FOLDER] 完整数据位置: json_langsmith/ 目录")
|
||||
else:
|
||||
print(f" [WARNING] 未检索到任何内容")
|
||||
|
||||
# 9. 路由决策分析(高级信息)
|
||||
routing_analysis = debug_info.get('routing_analysis', {})
|
||||
sufficiency_progression = debug_info.get('sufficiency_analysis', {}).get('sufficiency_progression', {})
|
||||
|
||||
if routing_analysis or sufficiency_progression:
|
||||
print(f"\n[?] 路由决策分析:")
|
||||
if routing_analysis:
|
||||
print(f" 总决策次数: {routing_analysis.get('total_routing_decisions', 0)}")
|
||||
print(f" 子查询生成: {routing_analysis.get('sub_query_generation_count', 0)}次")
|
||||
print(f" 并行检索: {routing_analysis.get('parallel_retrieval_count', 0)}次")
|
||||
|
||||
if sufficiency_progression.get('pattern'):
|
||||
pattern = sufficiency_progression['pattern']
|
||||
pattern_desc = {
|
||||
'improved_to_sufficient': '逐步改善至充分',
|
||||
'consistently_sufficient': '始终充分',
|
||||
'consistently_insufficient': '始终不充分',
|
||||
'mixed': '结果不一致'
|
||||
}.get(pattern, pattern)
|
||||
print(f" 充分性模式: {pattern_desc}")
|
||||
|
||||
conf_improvement = sufficiency_progression.get('confidence_improvement', 0)
|
||||
if conf_improvement > 0:
|
||||
print(f" 置信度提升: +{conf_improvement:.2f}")
|
||||
elif conf_improvement < 0:
|
||||
print(f" 置信度下降: {conf_improvement:.2f}")
|
||||
|
||||
# 10. LangSmith追踪信息
|
||||
print(f"\n[LINK] LangSmith详细追踪:")
|
||||
print(f" 项目名称: {project_name}")
|
||||
print(" 访问地址: https://smith.langchain.com")
|
||||
print(" 详细监控内容:")
|
||||
print(" • 节点级别的执行时间和数据流")
|
||||
print(" • 每一步的Token使用详情")
|
||||
print(" • 复杂度判断的详细推理过程")
|
||||
print(" • 混合检索的节点过滤过程 (事件+段落)")
|
||||
print(" • 充分性检查的迭代过程和演进")
|
||||
print(" • 子查询生成的上下文和反馈")
|
||||
print(" • 路由决策的完整链路追踪")
|
||||
print(" • 不同检索路径的性能对比")
|
||||
print(" • 并行检索的执行情况")
|
||||
print(" • 错误和异常的完整追踪")
|
||||
|
||||
print(f"\n[OK] 查询处理完成!")
|
||||
|
||||
# ================================
|
||||
# 数据提取和JSON生成
|
||||
# ================================
|
||||
print(f"\n[FILE] 开始提取关键数据...")
|
||||
|
||||
try:
|
||||
# 提取最终答案
|
||||
final_answer = result.get('answer', '')
|
||||
|
||||
# 提取支撑段落信息和事件信息
|
||||
supporting_facts = [] # 保持原有段落提取
|
||||
supporting_events = [] # 新增事件信息提取
|
||||
|
||||
# 直接从result中获取真实的检索数据
|
||||
print(f"[INFO] 从检索结果中提取文档数据...")
|
||||
all_documents = result.get('all_documents', [])
|
||||
|
||||
print(f" 检索结果中的文档数: {len(all_documents)} 个")
|
||||
|
||||
seen_docs = set()
|
||||
|
||||
if all_documents:
|
||||
for doc in all_documents:
|
||||
if hasattr(doc, 'page_content') and hasattr(doc, 'metadata'):
|
||||
# 提取前30个字符
|
||||
content_preview = doc.page_content[:30] if doc.page_content else ""
|
||||
|
||||
# 提取文档ID(优先使用node_id作为text_id)
|
||||
doc_id = (doc.metadata.get('node_id') or # HippoRAG中的节点ID,对应图中的text_id
|
||||
doc.metadata.get('passage_id') or # ES检索器中的段落ID
|
||||
doc.metadata.get('id') or
|
||||
doc.metadata.get('document_id') or
|
||||
doc.metadata.get('chunk_id') or
|
||||
doc.metadata.get('source') or
|
||||
f"doc_{hash(doc.page_content) % 100000}")
|
||||
|
||||
# 获取节点类型
|
||||
node_type = doc.metadata.get('node_type', 'unknown')
|
||||
|
||||
# 去重:基于doc_id和内容的组合
|
||||
doc_key = (str(doc_id), content_preview)
|
||||
if doc_key not in seen_docs:
|
||||
seen_docs.add(doc_key)
|
||||
|
||||
# 根据节点类型分别添加到不同列表
|
||||
if node_type == 'event':
|
||||
supporting_events.append([content_preview, str(doc_id)])
|
||||
print(f" [OK] 添加事件: {content_preview}... (ID: {doc_id})")
|
||||
else:
|
||||
# 段落节点或其他类型都归入supporting_facts
|
||||
supporting_facts.append([content_preview, str(doc_id)])
|
||||
print(f" [OK] 添加段落: {content_preview}... (ID: {doc_id})")
|
||||
else:
|
||||
print(f" [RELOAD] 跳过重复文档: {content_preview}...")
|
||||
|
||||
# 如果没有真实数据,回退到轻量级模拟
|
||||
if not all_documents:
|
||||
total_passages = result.get('total_passages', 0)
|
||||
print(f"[INFO] 回退到轻量级模拟: {total_passages} 个内容信息")
|
||||
|
||||
# 生成模拟事件数据(TOP-10)
|
||||
expected_events = min(10, total_passages)
|
||||
for i in range(expected_events):
|
||||
content_preview = f"[事件{i+1}内容-轻量级]"
|
||||
event_id = f"langsmith_event_{i}"
|
||||
supporting_events.append([content_preview, event_id])
|
||||
print(f" [OK] 模拟事件: {content_preview} (ID: {event_id})")
|
||||
|
||||
# 生成模拟段落数据(TOP-3)
|
||||
expected_passages = min(3, max(0, total_passages - expected_events))
|
||||
for i in range(expected_passages):
|
||||
content_preview = f"[段落{i+1}内容-轻量级]"
|
||||
passage_id = f"langsmith_passage_{i}"
|
||||
supporting_facts.append([content_preview, passage_id])
|
||||
print(f" [OK] 模拟段落: {content_preview} (ID: {passage_id})")
|
||||
|
||||
if total_passages > 13:
|
||||
print(f" [INFO] 还有 {total_passages - 13} 个额外内容")
|
||||
|
||||
# 暂时跳过三元组提取,只记录段落和事件信息
|
||||
print(f"\n[INFO] 跳过三元组提取,保存段落和事件信息")
|
||||
pred_evidences_path = [] # 置空,不进行三元组提取
|
||||
|
||||
# 构造JSON数据
|
||||
json_data = {
|
||||
"query": query,
|
||||
"pred_answer": final_answer,
|
||||
"pred_supporting_facts": supporting_facts, # 保持原有段落字段
|
||||
"pred_supporting_events": supporting_events, # 新增事件信息字段
|
||||
"pred_evidences_path": pred_evidences_path,
|
||||
"extraction_timestamp": timestamp,
|
||||
"langsmith_project": project_name,
|
||||
"total_passages": len(supporting_facts), # 段落数量
|
||||
"total_events": len(supporting_events), # 事件数量
|
||||
"total_content": len(supporting_facts) + len(supporting_events), # 总内容数量
|
||||
"total_triplets": sum(len(evidence_list) for evidence_list in pred_evidences_path),
|
||||
"answer_length": len(final_answer)
|
||||
}
|
||||
|
||||
# 保存JSON文件
|
||||
# 创建json_output目录(如果不存在)
|
||||
json_output_dir = os.path.join(os.path.dirname(__file__), "json_output")
|
||||
os.makedirs(json_output_dir, exist_ok=True)
|
||||
|
||||
output_file = os.path.join(json_output_dir, f"output_{timestamp}.json")
|
||||
with open(output_file, 'w', encoding='utf-8') as f:
|
||||
json.dump(json_data, f, ensure_ascii=False, indent=2)
|
||||
|
||||
print(f"[OK] 数据提取完成!")
|
||||
print(f"[FILE] 输出文件: {output_file}")
|
||||
print(f"[INFO] 最终统计:")
|
||||
print(f" 支撑段落: {len(supporting_facts)} 个")
|
||||
print(f" 支撑事件: {len(supporting_events)} 个")
|
||||
print(f" 总内容数: {len(supporting_facts) + len(supporting_events)} 个")
|
||||
print(f" 答案长度: {len(final_answer)} 字符")
|
||||
print(f" 三元组提取: 已跳过")
|
||||
|
||||
# 检查数据一致性
|
||||
if len(supporting_facts) == 0 and len(supporting_events) == 0:
|
||||
print(f"[WARNING] 警告: 未提取到任何支撑信息(段落或事件),请检查检索结果格式")
|
||||
print(f" 检索结果中的字段:")
|
||||
print(f" - all_documents: {len(result.get('all_documents', []))} 个")
|
||||
print(f" - all_passages: {len(result.get('all_passages', []))} 个")
|
||||
if result.get('all_documents'):
|
||||
first_doc = result.get('all_documents')[0]
|
||||
print(f" - 第一个文档类型: {type(first_doc)}")
|
||||
if hasattr(first_doc, 'metadata'):
|
||||
print(f" - 第一个文档metadata: {first_doc.metadata}")
|
||||
else:
|
||||
print(f"[OK] 成功提取到信息,无需提取三元组")
|
||||
if len(supporting_facts) > 0:
|
||||
print(f" - 段落信息: {len(supporting_facts)} 个")
|
||||
if len(supporting_events) > 0:
|
||||
print(f" - 事件信息: {len(supporting_events)} 个")
|
||||
|
||||
# 显示提取结果预览
|
||||
print(f"\n[INFO] 提取数据预览:")
|
||||
print(f" 答案: {final_answer[:100]}{'...' if len(final_answer) > 100 else ''}")
|
||||
|
||||
# 显示支撑段落
|
||||
if supporting_facts:
|
||||
print(f" 支撑段落 ({len(supporting_facts)}个):")
|
||||
for i, fact in enumerate(supporting_facts[:3]): # 只显示前3个
|
||||
if len(fact) >= 2:
|
||||
print(f" {i+1}. '{fact[0]}...' (ID: {fact[1]})")
|
||||
|
||||
if len(supporting_facts) > 3:
|
||||
print(f" ... 还有 {len(supporting_facts) - 3} 个段落")
|
||||
|
||||
# 显示支撑事件
|
||||
if supporting_events:
|
||||
print(f" 支撑事件 ({len(supporting_events)}个):")
|
||||
for i, event in enumerate(supporting_events[:3]): # 只显示前3个
|
||||
if len(event) >= 2:
|
||||
print(f" {i+1}. '{event[0]}...' (ID: {event[1]})")
|
||||
|
||||
if len(supporting_events) > 3:
|
||||
print(f" ... 还有 {len(supporting_events) - 3} 个事件")
|
||||
|
||||
print(f"\n 三元组提取: 已跳过(保存段落和事件信息)")
|
||||
|
||||
except Exception as extract_error:
|
||||
print(f"[WARNING] 数据提取失败: {extract_error}")
|
||||
import traceback
|
||||
traceback.print_exc()
|
||||
|
||||
except KeyboardInterrupt:
|
||||
print(f"\n[WARNING] 用户中断检索过程")
|
||||
print(f"[TIP] 检索已安全停止")
|
||||
except Exception as e:
|
||||
print(f"[ERROR] 检索失败: {e}")
|
||||
import traceback
|
||||
traceback.print_exc()
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
main()
|
||||
Reference in New Issue
Block a user