#!/usr/bin/env python3 """ 依赖提取脚本 用法: python extract_dependencies.py [项目路径] 输出: JSON 格式的依赖信息 """ import ast import json import sys from pathlib import Path from typing import Dict, List, Set def extract_python_imports(file_path: Path) -> Set[str]: """从 Python 文件提取 import""" imports = set() try: with open(file_path, 'r', encoding='utf-8') as f: tree = ast.parse(f.read(), filename=str(file_path)) for node in ast.walk(tree): if isinstance(node, ast.Import): for alias in node.names: # 只保留顶层包名 module = alias.name.split('.')[0] imports.add(module) elif isinstance(node, ast.ImportFrom): if node.module: module = node.module.split('.')[0] imports.add(module) except Exception as e: print(f"Warning: Failed to parse {file_path}: {e}", file=sys.stderr) return imports def analyze_python_project(project_path: Path) -> Dict: """分析 Python 项目的依赖""" all_imports = set() file_count = 0 # 遍历所有 Python 文件 for py_file in project_path.rglob("*.py"): # 跳过虚拟环境和 node_modules if any(part in py_file.parts for part in ['venv', 'env', '.venv', 'node_modules', '__pycache__']): continue file_count += 1 imports = extract_python_imports(py_file) all_imports.update(imports) # 读取声明的依赖 declared_deps = set() requirements_txt = project_path / "requirements.txt" if requirements_txt.exists(): for line in requirements_txt.read_text(encoding='utf-8').split('\n'): line = line.strip() if line and not line.startswith('#'): # 去掉版本号 dep = line.split('==')[0].split('>=')[0].split('~=')[0].strip() declared_deps.add(dep) pyproject_toml = project_path / "pyproject.toml" if pyproject_toml.exists(): content = pyproject_toml.read_text(encoding='utf-8') # 简单提取(不使用 toml 库) in_deps = False for line in content.split('\n'): if '[tool.poetry.dependencies]' in line or '[project.dependencies]' in line: in_deps = True continue if in_deps and line.strip().startswith('['): break if in_deps and '=' in line: dep = line.split('=')[0].strip().strip('"') if dep != 'python': declared_deps.add(dep) # Python 标准库(部分常见的) stdlib = { 'os', 'sys', 'json', 'time', 'datetime', 'collections', 'itertools', 'functools', 'pathlib', 're', 'math', 'random', 'typing', 'abc', 'asyncio', 'logging', 'unittest', 'argparse', 'subprocess', 'io', 'copy', 'pickle', 'sqlite3', 'http', 'urllib', 'email', 'uuid' } # 区分第三方包和标准库 third_party = all_imports - stdlib return { "language": "Python", "files_analyzed": file_count, "total_imports": len(all_imports), "third_party_imports": list(sorted(third_party)), "declared_dependencies": list(sorted(declared_deps)), "undeclared_usage": list(sorted(third_party - declared_deps)), "unused_dependencies": list(sorted(declared_deps - third_party)) } def analyze_javascript_project(project_path: Path) -> Dict: """分析 JavaScript 项目的依赖""" package_json = project_path / "package.json" if not package_json.exists(): return None try: import json as json_module content = json_module.loads(package_json.read_text(encoding='utf-8')) dependencies = content.get("dependencies", {}) dev_dependencies = content.get("devDependencies", {}) return { "language": "JavaScript/TypeScript", "dependencies": list(dependencies.keys()), "dev_dependencies": list(dev_dependencies.keys()), "total_dependencies": len(dependencies) + len(dev_dependencies) } except Exception as e: print(f"Error parsing package.json: {e}", file=sys.stderr) return None def main(): # 获取项目路径 if len(sys.argv) > 1: project_path = Path(sys.argv[1]) else: project_path = Path.cwd() if not project_path.exists(): print(f"Error: Path {project_path} does not exist", file=sys.stderr) sys.exit(1) results = [] # 分析 Python python_result = analyze_python_project(project_path) if python_result["files_analyzed"] > 0: results.append(python_result) # 分析 JavaScript js_result = analyze_javascript_project(project_path) if js_result: results.append(js_result) # 输出结果 output = { "project_path": str(project_path), "analyses": results } print(json.dumps(output, indent=2, ensure_ascii=False)) if __name__ == "__main__": main()