Files
AIEC-new/AIEC-RAG/DEPLOYMENT_GUIDE.md
2025-10-17 09:31:28 +08:00

11 KiB
Raw Permalink Blame History

AIEC-RAG 部署指南

目录

  1. 部署架构
  2. 单机部署
  3. Docker部署
  4. 生产环境部署
  5. 性能调优
  6. 监控配置
  7. 备份恢复

部署架构

推荐架构

                    [负载均衡器]
                         |
            ┌────────────┼────────────┐
            ↓            ↓            ↓
      [AIEC-RAG-1] [AIEC-RAG-2] [AIEC-RAG-3]
            ↓            ↓            ↓
            └────────────┼────────────┘
                         ↓
                  [Elasticsearch集群]
                         ↓
                   [向量数据库]

最小配置要求

组件 CPU 内存 存储 说明
API服务 4核 8GB 50GB 单实例最小配置
Elasticsearch 4核 16GB 200GB 推荐使用SSD
整体系统 8核 32GB 500GB 生产环境推荐

单机部署

1. 系统准备

# Ubuntu/Debian
sudo apt update
sudo apt install -y python3.8 python3-pip git curl wget

# CentOS/RHEL
sudo yum update -y
sudo yum install -y python38 python38-pip git curl wget

2. 安装Elasticsearch

# 下载并安装Elasticsearch 8.x
wget https://artifacts.elastic.co/downloads/elasticsearch/elasticsearch-8.11.0-linux-x86_64.tar.gz
tar -xzf elasticsearch-8.11.0-linux-x86_64.tar.gz
cd elasticsearch-8.11.0

# 配置Elasticsearch
cat >> config/elasticsearch.yml << EOF
network.host: 0.0.0.0
discovery.type: single-node
xpack.security.enabled: true
xpack.security.authc.api_key.enabled: true
EOF

# 启动Elasticsearch
./bin/elasticsearch -d

3. 部署AIEC-RAG

# 克隆项目
git clone <repository_url>
cd AIEC-RAG

# 创建虚拟环境
python3 -m venv venv
source venv/bin/activate

# 安装依赖
pip install -r requirements.txt

# 配置环境变量
cp .env.example .env
# 编辑.env文件填入实际配置

# 启动服务
python rag_api_server_production.py

4. 设置系统服务

创建 /etc/systemd/system/aiec-rag.service:

[Unit]
Description=AIEC-RAG Service
After=network.target elasticsearch.service

[Service]
Type=simple
User=aiec
WorkingDirectory=/opt/AIEC-RAG
Environment="PATH=/opt/AIEC-RAG/venv/bin"
ExecStart=/opt/AIEC-RAG/venv/bin/python /opt/AIEC-RAG/rag_api_server_production.py
Restart=always
RestartSec=10

[Install]
WantedBy=multi-user.target

启用服务:

sudo systemctl daemon-reload
sudo systemctl enable aiec-rag
sudo systemctl start aiec-rag
sudo systemctl status aiec-rag

Docker部署

1. 使用预构建镜像

# 拉取镜像(如果有私有仓库)
docker pull your-registry/aiec-rag:latest

# 或构建本地镜像
docker build -t aiec-rag:latest .

2. Docker Compose部署

创建 docker-compose.yml:

version: '3.8'

services:
  elasticsearch:
    image: docker.elastic.co/elasticsearch/elasticsearch:8.11.0
    container_name: aiec-elasticsearch
    environment:
      - discovery.type=single-node
      - "ES_JAVA_OPTS=-Xms2g -Xmx2g"
      - xpack.security.enabled=true
      - ELASTIC_PASSWORD=your_password
    volumes:
      - es_data:/usr/share/elasticsearch/data
    ports:
      - "9200:9200"
    networks:
      - aiec_network
    healthcheck:
      test: ["CMD", "curl", "-f", "http://localhost:9200"]
      interval: 30s
      timeout: 10s
      retries: 5

  aiec-rag:
    build: .
    container_name: aiec-rag
    depends_on:
      elasticsearch:
        condition: service_healthy
    environment:
      - ELASTICSEARCH_HOST=http://elasticsearch:9200
      - ELASTICSEARCH_USERNAME=elastic
      - ELASTICSEARCH_PASSWORD=your_password
    env_file:
      - .env
    ports:
      - "8100:8100"
    volumes:
      - ./rag_config_production.yaml:/app/rag_config_production.yaml
      - ./api_outputs:/app/api_outputs
    networks:
      - aiec_network
    restart: unless-stopped

volumes:
  es_data:
    driver: local

networks:
  aiec_network:
    driver: bridge

启动服务:

docker-compose up -d
docker-compose logs -f

3. Kubernetes部署

创建 k8s-deployment.yaml:

apiVersion: apps/v1
kind: Deployment
metadata:
  name: aiec-rag
  labels:
    app: aiec-rag
spec:
  replicas: 3
  selector:
    matchLabels:
      app: aiec-rag
  template:
    metadata:
      labels:
        app: aiec-rag
    spec:
      containers:
      - name: aiec-rag
        image: your-registry/aiec-rag:latest
        ports:
        - containerPort: 8100
        env:
        - name: ELASTICSEARCH_HOST
          value: "http://elasticsearch-service:9200"
        envFrom:
        - secretRef:
            name: aiec-secrets
        resources:
          requests:
            memory: "4Gi"
            cpu: "2"
          limits:
            memory: "8Gi"
            cpu: "4"
        livenessProbe:
          httpGet:
            path: /health
            port: 8100
          initialDelaySeconds: 30
          periodSeconds: 10
        readinessProbe:
          httpGet:
            path: /health
            port: 8100
          initialDelaySeconds: 5
          periodSeconds: 5
---
apiVersion: v1
kind: Service
metadata:
  name: aiec-rag-service
spec:
  selector:
    app: aiec-rag
  ports:
    - protocol: TCP
      port: 80
      targetPort: 8100
  type: LoadBalancer

部署到Kubernetes:

# 创建密钥
kubectl create secret generic aiec-secrets --from-env-file=.env

# 部署应用
kubectl apply -f k8s-deployment.yaml

# 查看状态
kubectl get pods
kubectl get services

生产环境部署

1. 负载均衡配置

使用Nginx作为负载均衡器

upstream aiec_backend {
    least_conn;
    server 10.0.1.10:8100 weight=1 max_fails=3 fail_timeout=30s;
    server 10.0.1.11:8100 weight=1 max_fails=3 fail_timeout=30s;
    server 10.0.1.12:8100 weight=1 max_fails=3 fail_timeout=30s;
}

server {
    listen 80;
    server_name api.aiec-rag.com;

    location / {
        proxy_pass http://aiec_backend;
        proxy_http_version 1.1;
        proxy_set_header Upgrade $http_upgrade;
        proxy_set_header Connection 'upgrade';
        proxy_set_header Host $host;
        proxy_cache_bypass $http_upgrade;
        proxy_set_header X-Real-IP $remote_addr;
        proxy_set_header X-Forwarded-For $proxy_add_x_forwarded_for;
        proxy_set_header X-Forwarded-Proto $scheme;
        
        # 超时设置
        proxy_connect_timeout 60s;
        proxy_send_timeout 120s;
        proxy_read_timeout 120s;
    }
    
    # 健康检查端点
    location /health {
        proxy_pass http://aiec_backend/health;
    }
}

2. SSL/TLS配置

server {
    listen 443 ssl http2;
    server_name api.aiec-rag.com;

    ssl_certificate /etc/nginx/ssl/aiec-rag.crt;
    ssl_certificate_key /etc/nginx/ssl/aiec-rag.key;
    
    ssl_protocols TLSv1.2 TLSv1.3;
    ssl_ciphers HIGH:!aNULL:!MD5;
    ssl_prefer_server_ciphers on;
    
    # ... 其他配置同上
}

3. 数据库优化

Elasticsearch优化配置

# elasticsearch.yml
cluster.name: aiec-rag-cluster
node.name: node-1

# 内存设置
bootstrap.memory_lock: true

# 线程池
thread_pool:
  write:
    size: 8
    queue_size: 1000
  search:
    size: 16
    queue_size: 1000

# 索引设置
index:
  number_of_shards: 3
  number_of_replicas: 1
  refresh_interval: 30s

性能调优

1. Python应用优化

# 使用Gunicorn作为WSGI服务器Linux
gunicorn -w 4 -k uvicorn.workers.UvicornWorker \
    --bind 0.0.0.0:8100 \
    --timeout 120 \
    --keep-alive 5 \
    --max-requests 1000 \
    --max-requests-jitter 50 \
    rag_api_server_production:app

2. 系统参数优化

# /etc/sysctl.conf
net.ipv4.tcp_fin_timeout = 30
net.ipv4.tcp_tw_reuse = 1
net.ipv4.tcp_tw_recycle = 1
net.ipv4.tcp_max_syn_backlog = 8192
net.ipv4.tcp_max_tw_buckets = 10000
net.core.somaxconn = 65535
net.core.netdev_max_backlog = 65535

# 应用配置
sudo sysctl -p

3. 缓存策略

配置Redis缓存

# 在代码中添加缓存支持
import redis
from functools import lru_cache

redis_client = redis.Redis(
    host='localhost',
    port=6379,
    decode_responses=True,
    max_connections=50
)

@lru_cache(maxsize=128)
def get_cached_embedding(text: str):
    # 缓存嵌入向量
    pass

监控配置

1. Prometheus监控

# prometheus.yml
scrape_configs:
  - job_name: 'aiec-rag'
    static_configs:
      - targets: ['localhost:8100']
    metrics_path: '/metrics'
    scrape_interval: 15s

2. 日志管理

配置日志轮转:

# /etc/logrotate.d/aiec-rag
/opt/AIEC-RAG/logs/*.log {
    daily
    rotate 30
    compress
    delaycompress
    missingok
    notifempty
    create 644 aiec aiec
    sharedscripts
    postrotate
        systemctl reload aiec-rag
    endscript
}

3. 告警配置

# alerting_rules.yml
groups:
- name: aiec_alerts
  rules:
  - alert: HighResponseTime
    expr: http_request_duration_seconds{quantile="0.99"} > 5
    for: 5m
    labels:
      severity: warning
    annotations:
      summary: "High response time on {{ $labels.instance }}"
      
  - alert: ServiceDown
    expr: up{job="aiec-rag"} == 0
    for: 1m
    labels:
      severity: critical
    annotations:
      summary: "AIEC-RAG service is down"

备份恢复

1. 数据备份

#!/bin/bash
# backup.sh
DATE=$(date +%Y%m%d_%H%M%S)
BACKUP_DIR="/backup/aiec-rag"

# 备份Elasticsearch数据
curl -X PUT "localhost:9200/_snapshot/backup_repo" -H 'Content-Type: application/json' -d'
{
  "type": "fs",
  "settings": {
    "location": "'$BACKUP_DIR'/elasticsearch"
  }
}'

curl -X PUT "localhost:9200/_snapshot/backup_repo/snapshot_$DATE?wait_for_completion=true"

# 备份配置文件
tar -czf $BACKUP_DIR/config_$DATE.tar.gz \
    /opt/AIEC-RAG/.env \
    /opt/AIEC-RAG/rag_config_production.yaml

echo "Backup completed: $DATE"

2. 恢复流程

#!/bin/bash
# restore.sh
SNAPSHOT_NAME=$1

# 恢复Elasticsearch数据
curl -X POST "localhost:9200/_snapshot/backup_repo/$SNAPSHOT_NAME/_restore"

# 恢复配置文件
tar -xzf /backup/aiec-rag/config_latest.tar.gz -C /

# 重启服务
systemctl restart aiec-rag

echo "Restore completed from: $SNAPSHOT_NAME"

故障处理

常见问题处理

  1. 服务无响应

    # 检查服务状态
    systemctl status aiec-rag
    # 查看日志
    journalctl -u aiec-rag -n 100
    # 重启服务
    systemctl restart aiec-rag
    
  2. Elasticsearch连接失败

    # 检查ES状态
    curl -X GET "localhost:9200/_cluster/health?pretty"
    # 检查网络连接
    telnet localhost 9200
    
  3. 内存溢出

    # 增加内存限制
    export PYTHONUNBUFFERED=1
    export OMP_NUM_THREADS=4
    

安全建议

  1. API密钥管理

    • 使用密钥管理服务如HashiCorp Vault
    • 定期轮换API密钥
    • 不要在代码中硬编码密钥
  2. 网络安全

    • 使用防火墙限制访问
    • 配置SSL/TLS加密
    • 实施速率限制
  3. 数据安全

    • 加密敏感数据
    • 定期备份
    • 实施访问控制

更多部署问题请参考项目Wiki或联系技术支持。