""" Elasticsearch配置文件 包含云端ES连接配置和相关常量 """ import os from typing import Optional class ElasticsearchConfig: """Elasticsearch配置类""" # ES连接配置(优先从环境变量读取) ES_HOST = os.getenv("ELASTICSEARCH_HOST", "http://101.200.154.78:9200") ES_USERNAME = os.getenv("ELASTICSEARCH_USERNAME", "elastic") ES_PASSWORD = os.getenv("ELASTICSEARCH_PASSWORD", "Abcd123456") # 备用SSH隧道方式(如果端口未开放时使用) # ES_HOST = "http://localhost:9200" # 通过SSH隧道映射到本地 # 索引配置 INDEX_PREFIX = "hipporag" EDGES_INDEX_SUFFIX = "edges" PASSAGES_INDEX_SUFFIX = "passages" # 向量配置 VECTOR_DIMENSION = 1024 # text-embedding-v3的维度 SIMILARITY_METRIC = "cosine" # 批处理配置 DEFAULT_BATCH_SIZE = 100 DEFAULT_TEXT_BATCH_SIZE = 40 DEFAULT_EDGE_BATCH_SIZE = 256 # 搜索配置 DEFAULT_TOP_K = 10 DEFAULT_NUM_CANDIDATES_MULTIPLIER = 2 @classmethod def get_edges_index_name(cls, keyword: str) -> str: """获取边索引名称""" return f"{cls.INDEX_PREFIX}_{keyword}_{cls.EDGES_INDEX_SUFFIX}" @classmethod def get_passages_index_name(cls, keyword: str) -> str: """获取文本段落索引名称""" return f"{cls.INDEX_PREFIX}_{keyword}_{cls.PASSAGES_INDEX_SUFFIX}" @classmethod def get_es_config(cls) -> dict: """获取ES连接配置""" return { "hosts": [cls.ES_HOST], "basic_auth": (cls.ES_USERNAME, cls.ES_PASSWORD), "verify_certs": False, # 如果使用自签名证书 "ssl_show_warn": False, "request_timeout": 300, # 请求超时时间,增加至5分钟 "retry_on_timeout": True, "max_retries": 5, # 重试次数,增加至5次 "sniff_on_start": False, # 禁用节点嗅探,避免网络问题 "sniff_on_connection_fail": False, # 禁用连接失败时的嗅探 "sniff_timeout": 10, "http_compress": True, # 启用HTTP压缩 "http_auth": (cls.ES_USERNAME, cls.ES_PASSWORD), "timeout": 300, # 连接超时 "connection_class": None, # 使用默认连接类 "selector_class": None, # 使用默认选择器 "dead_timeout": 60, # 死连接超时时间 "retry_on_status": {502, 503, 504, 408, 429}, # 重试的HTTP状态码 "maxsize": 25, # 连接池大小 } class SSHTunnelConfig: """SSH隧道配置类""" # SSH连接配置(需要向管理员获取) SSH_HOST = "101.200.154.78" # SSH服务器地址 SSH_PORT = 22 # SSH端口 SSH_USERNAME = "your_username" # 需要替换为实际用户名 SSH_KEY_PATH = "path/to/your/private_key.pem" # 需要替换为实际私钥路径 # SSH_PASSWORD = "your_password" # 如果使用密码认证 # 隧道映射配置 LOCAL_PORT = 9200 # 本地端口 REMOTE_HOST = "localhost" # 服务器内部ES地址 REMOTE_PORT = 9200 # 服务器内部ES端口 @classmethod def get_ssh_config(cls) -> dict: """获取SSH隧道配置""" config = { 'ssh_host': cls.SSH_HOST, 'ssh_port': cls.SSH_PORT, 'ssh_username': cls.SSH_USERNAME, 'local_port': cls.LOCAL_PORT, 'remote_host': cls.REMOTE_HOST, 'remote_port': cls.REMOTE_PORT } # 添加认证信息 if hasattr(cls, 'SSH_KEY_PATH') and cls.SSH_KEY_PATH != "path/to/your/private_key.pem": config['ssh_key_path'] = cls.SSH_KEY_PATH elif hasattr(cls, 'SSH_PASSWORD'): config['ssh_password'] = cls.SSH_PASSWORD return config class HippoRAGConfig: """HippoRAG检索配置""" # 推理配置 TOPK_EDGES = 10 TOPK_NODES = 30 WEIGHT_ADJUST = 0.05 PPR_ALPHA = 0.85 PPR_MAX_ITER = 100 PPR_TOL = 1e-6 # 检索模式 RETRIEVAL_MODE = "query2edge" # query2edge, query2node, ner2node # OneAPI配置(从环境变量读取) @classmethod def get_oneapi_config(cls) -> dict: """获取OneAPI配置""" return { "api_key": os.getenv('ONEAPI_KEY'), "base_url": os.getenv('ONEAPI_BASE_URL'), "model_embed": os.getenv('ONEAPI_MODEL_EMBED', 'text-embedding-v3'), "model_gen": os.getenv('ONEAPI_MODEL_GEN', 'qwen2-7b-instruct') } # 索引映射模板(兼容版本) EDGES_INDEX_MAPPING = { "mappings": { "properties": { "edge_index": {"type": "integer"}, "head_node_id": {"type": "keyword"}, "tail_node_id": {"type": "keyword"}, "head_entity": {"type": "keyword"}, "relation": {"type": "keyword"}, "tail_entity": {"type": "keyword"}, "triple_text": {"type": "text"}, "embedding": { "type": "dense_vector", "dims": 1024, "index": True, "similarity": "cosine" }, "created_at": {"type": "date"}, "keyword": {"type": "keyword"} } }, "settings": { "number_of_shards": 1, "number_of_replicas": 0 } } PASSAGES_INDEX_MAPPING = { "mappings": { "properties": { "passage_id": {"type": "keyword"}, "content": {"type": "text"}, "file_id": {"type": "keyword"}, "evidence": {"type": "keyword"}, "embedding": { "type": "dense_vector", "dims": 1024, "index": True, "similarity": "cosine" }, "created_at": {"type": "date"}, "keyword": {"type": "keyword"} } }, "settings": { "number_of_shards": 1, "number_of_replicas": 0 } } # 新增:Node节点ES索引mapping NODES_INDEX_MAPPING = { "mappings": { "properties": { "node_id": {"type": "keyword"}, "name": {"type": "text"}, "type": {"type": "keyword"}, "concepts": {"type": "text"}, "synsets": {"type": "text"}, "embedding": { "type": "dense_vector", "dims": 1024, "index": True, "similarity": "cosine" }, "created_at": {"type": "date"}, "keyword": {"type": "keyword"} } }, "settings": { "number_of_shards": 1, "number_of_replicas": 0 # 改回0,避免黄色状态 } }