209 lines
6.6 KiB
Python
209 lines
6.6 KiB
Python
|
|
"""
|
|||
|
|
Elasticsearch配置文件
|
|||
|
|
包含云端ES连接配置和相关常量
|
|||
|
|
"""
|
|||
|
|
|
|||
|
|
import os
|
|||
|
|
from typing import Optional
|
|||
|
|
|
|||
|
|
class ElasticsearchConfig:
|
|||
|
|
"""Elasticsearch配置类"""
|
|||
|
|
|
|||
|
|
# ES连接配置(优先从环境变量读取)
|
|||
|
|
ES_HOST = os.getenv("ELASTICSEARCH_HOST", "http://101.200.154.78:9200")
|
|||
|
|
ES_USERNAME = os.getenv("ELASTICSEARCH_USERNAME", "elastic")
|
|||
|
|
ES_PASSWORD = os.getenv("ELASTICSEARCH_PASSWORD", "Abcd123456")
|
|||
|
|
|
|||
|
|
# 备用SSH隧道方式(如果端口未开放时使用)
|
|||
|
|
# ES_HOST = "http://localhost:9200" # 通过SSH隧道映射到本地
|
|||
|
|
|
|||
|
|
# 索引配置
|
|||
|
|
INDEX_PREFIX = "hipporag"
|
|||
|
|
EDGES_INDEX_SUFFIX = "edges"
|
|||
|
|
PASSAGES_INDEX_SUFFIX = "passages"
|
|||
|
|
|
|||
|
|
# 向量配置
|
|||
|
|
VECTOR_DIMENSION = 1024 # text-embedding-v3的维度
|
|||
|
|
SIMILARITY_METRIC = "cosine"
|
|||
|
|
|
|||
|
|
# 批处理配置
|
|||
|
|
DEFAULT_BATCH_SIZE = 100
|
|||
|
|
DEFAULT_TEXT_BATCH_SIZE = 40
|
|||
|
|
DEFAULT_EDGE_BATCH_SIZE = 256
|
|||
|
|
|
|||
|
|
# 搜索配置
|
|||
|
|
DEFAULT_TOP_K = 10
|
|||
|
|
DEFAULT_NUM_CANDIDATES_MULTIPLIER = 2
|
|||
|
|
|
|||
|
|
@classmethod
|
|||
|
|
def get_edges_index_name(cls, keyword: str) -> str:
|
|||
|
|
"""获取边索引名称"""
|
|||
|
|
return f"{cls.INDEX_PREFIX}_{keyword}_{cls.EDGES_INDEX_SUFFIX}"
|
|||
|
|
|
|||
|
|
@classmethod
|
|||
|
|
def get_passages_index_name(cls, keyword: str) -> str:
|
|||
|
|
"""获取文本段落索引名称"""
|
|||
|
|
return f"{cls.INDEX_PREFIX}_{keyword}_{cls.PASSAGES_INDEX_SUFFIX}"
|
|||
|
|
|
|||
|
|
@classmethod
|
|||
|
|
def get_es_config(cls) -> dict:
|
|||
|
|
"""获取ES连接配置"""
|
|||
|
|
return {
|
|||
|
|
"hosts": [cls.ES_HOST],
|
|||
|
|
"basic_auth": (cls.ES_USERNAME, cls.ES_PASSWORD),
|
|||
|
|
"verify_certs": False, # 如果使用自签名证书
|
|||
|
|
"ssl_show_warn": False,
|
|||
|
|
"request_timeout": 300, # 请求超时时间,增加至5分钟
|
|||
|
|
"retry_on_timeout": True,
|
|||
|
|
"max_retries": 5, # 重试次数,增加至5次
|
|||
|
|
"sniff_on_start": False, # 禁用节点嗅探,避免网络问题
|
|||
|
|
"sniff_on_connection_fail": False, # 禁用连接失败时的嗅探
|
|||
|
|
"sniff_timeout": 10,
|
|||
|
|
"http_compress": True, # 启用HTTP压缩
|
|||
|
|
"http_auth": (cls.ES_USERNAME, cls.ES_PASSWORD),
|
|||
|
|
"timeout": 300, # 连接超时
|
|||
|
|
"connection_class": None, # 使用默认连接类
|
|||
|
|
"selector_class": None, # 使用默认选择器
|
|||
|
|
"dead_timeout": 60, # 死连接超时时间
|
|||
|
|
"retry_on_status": {502, 503, 504, 408, 429}, # 重试的HTTP状态码
|
|||
|
|
"maxsize": 25, # 连接池大小
|
|||
|
|
}
|
|||
|
|
|
|||
|
|
|
|||
|
|
class SSHTunnelConfig:
|
|||
|
|
"""SSH隧道配置类"""
|
|||
|
|
|
|||
|
|
# SSH连接配置(需要向管理员获取)
|
|||
|
|
SSH_HOST = "101.200.154.78" # SSH服务器地址
|
|||
|
|
SSH_PORT = 22 # SSH端口
|
|||
|
|
SSH_USERNAME = "your_username" # 需要替换为实际用户名
|
|||
|
|
SSH_KEY_PATH = "path/to/your/private_key.pem" # 需要替换为实际私钥路径
|
|||
|
|
# SSH_PASSWORD = "your_password" # 如果使用密码认证
|
|||
|
|
|
|||
|
|
# 隧道映射配置
|
|||
|
|
LOCAL_PORT = 9200 # 本地端口
|
|||
|
|
REMOTE_HOST = "localhost" # 服务器内部ES地址
|
|||
|
|
REMOTE_PORT = 9200 # 服务器内部ES端口
|
|||
|
|
|
|||
|
|
@classmethod
|
|||
|
|
def get_ssh_config(cls) -> dict:
|
|||
|
|
"""获取SSH隧道配置"""
|
|||
|
|
config = {
|
|||
|
|
'ssh_host': cls.SSH_HOST,
|
|||
|
|
'ssh_port': cls.SSH_PORT,
|
|||
|
|
'ssh_username': cls.SSH_USERNAME,
|
|||
|
|
'local_port': cls.LOCAL_PORT,
|
|||
|
|
'remote_host': cls.REMOTE_HOST,
|
|||
|
|
'remote_port': cls.REMOTE_PORT
|
|||
|
|
}
|
|||
|
|
|
|||
|
|
# 添加认证信息
|
|||
|
|
if hasattr(cls, 'SSH_KEY_PATH') and cls.SSH_KEY_PATH != "path/to/your/private_key.pem":
|
|||
|
|
config['ssh_key_path'] = cls.SSH_KEY_PATH
|
|||
|
|
elif hasattr(cls, 'SSH_PASSWORD'):
|
|||
|
|
config['ssh_password'] = cls.SSH_PASSWORD
|
|||
|
|
|
|||
|
|
return config
|
|||
|
|
|
|||
|
|
|
|||
|
|
class HippoRAGConfig:
|
|||
|
|
"""HippoRAG检索配置"""
|
|||
|
|
|
|||
|
|
# 推理配置
|
|||
|
|
TOPK_EDGES = 10
|
|||
|
|
TOPK_NODES = 30
|
|||
|
|
WEIGHT_ADJUST = 0.05
|
|||
|
|
PPR_ALPHA = 0.85
|
|||
|
|
PPR_MAX_ITER = 100
|
|||
|
|
PPR_TOL = 1e-6
|
|||
|
|
|
|||
|
|
# 检索模式
|
|||
|
|
RETRIEVAL_MODE = "query2edge" # query2edge, query2node, ner2node
|
|||
|
|
|
|||
|
|
# OneAPI配置(从环境变量读取)
|
|||
|
|
@classmethod
|
|||
|
|
def get_oneapi_config(cls) -> dict:
|
|||
|
|
"""获取OneAPI配置"""
|
|||
|
|
return {
|
|||
|
|
"api_key": os.getenv('ONEAPI_KEY'),
|
|||
|
|
"base_url": os.getenv('ONEAPI_BASE_URL'),
|
|||
|
|
"model_embed": os.getenv('ONEAPI_MODEL_EMBED', 'text-embedding-v3'),
|
|||
|
|
"model_gen": os.getenv('ONEAPI_MODEL_GEN', 'qwen2-7b-instruct')
|
|||
|
|
}
|
|||
|
|
|
|||
|
|
|
|||
|
|
# 索引映射模板(兼容版本)
|
|||
|
|
EDGES_INDEX_MAPPING = {
|
|||
|
|
"mappings": {
|
|||
|
|
"properties": {
|
|||
|
|
"edge_index": {"type": "integer"},
|
|||
|
|
"head_node_id": {"type": "keyword"},
|
|||
|
|
"tail_node_id": {"type": "keyword"},
|
|||
|
|
"head_entity": {"type": "keyword"},
|
|||
|
|
"relation": {"type": "keyword"},
|
|||
|
|
"tail_entity": {"type": "keyword"},
|
|||
|
|
"triple_text": {"type": "text"},
|
|||
|
|
"embedding": {
|
|||
|
|
"type": "dense_vector",
|
|||
|
|
"dims": 1024,
|
|||
|
|
"index": True,
|
|||
|
|
"similarity": "cosine"
|
|||
|
|
},
|
|||
|
|
"created_at": {"type": "date"},
|
|||
|
|
"keyword": {"type": "keyword"}
|
|||
|
|
}
|
|||
|
|
},
|
|||
|
|
"settings": {
|
|||
|
|
"number_of_shards": 1,
|
|||
|
|
"number_of_replicas": 0
|
|||
|
|
}
|
|||
|
|
}
|
|||
|
|
|
|||
|
|
PASSAGES_INDEX_MAPPING = {
|
|||
|
|
"mappings": {
|
|||
|
|
"properties": {
|
|||
|
|
"passage_id": {"type": "keyword"},
|
|||
|
|
"content": {"type": "text"},
|
|||
|
|
"file_id": {"type": "keyword"},
|
|||
|
|
"evidence": {"type": "keyword"},
|
|||
|
|
"embedding": {
|
|||
|
|
"type": "dense_vector",
|
|||
|
|
"dims": 1024,
|
|||
|
|
"index": True,
|
|||
|
|
"similarity": "cosine"
|
|||
|
|
},
|
|||
|
|
"created_at": {"type": "date"},
|
|||
|
|
"keyword": {"type": "keyword"}
|
|||
|
|
}
|
|||
|
|
},
|
|||
|
|
"settings": {
|
|||
|
|
"number_of_shards": 1,
|
|||
|
|
"number_of_replicas": 0
|
|||
|
|
}
|
|||
|
|
}
|
|||
|
|
|
|||
|
|
# 新增:Node节点ES索引mapping
|
|||
|
|
NODES_INDEX_MAPPING = {
|
|||
|
|
"mappings": {
|
|||
|
|
"properties": {
|
|||
|
|
"node_id": {"type": "keyword"},
|
|||
|
|
"name": {"type": "text"},
|
|||
|
|
"type": {"type": "keyword"},
|
|||
|
|
"concepts": {"type": "text"},
|
|||
|
|
"synsets": {"type": "text"},
|
|||
|
|
"embedding": {
|
|||
|
|
"type": "dense_vector",
|
|||
|
|
"dims": 1024,
|
|||
|
|
"index": True,
|
|||
|
|
"similarity": "cosine"
|
|||
|
|
},
|
|||
|
|
"created_at": {"type": "date"},
|
|||
|
|
"keyword": {"type": "keyword"}
|
|||
|
|
}
|
|||
|
|
},
|
|||
|
|
"settings": {
|
|||
|
|
"number_of_shards": 1,
|
|||
|
|
"number_of_replicas": 0 # 改回0,避免黄色状态
|
|||
|
|
}
|
|||
|
|
}
|