AIEC_Skills/codebase_architecture_analyzer_v1/scripts/extract_dependencies.py

#!/usr/bin/env python3
"""
依赖提取脚本

用法: python extract_dependencies.py [项目路径]

输出: JSON 格式的依赖信息
"""

import ast
import json
import sys
from pathlib import Path
from typing import Dict, List, Set


def extract_python_imports(file_path: Path) -> Set[str]:
    """从 Python 文件提取 import"""
    imports = set()

    try:
        with open(file_path, 'r', encoding='utf-8') as f:
            tree = ast.parse(f.read(), filename=str(file_path))

        for node in ast.walk(tree):
            if isinstance(node, ast.Import):
                for alias in node.names:
                    # 只保留顶层包名
                    module = alias.name.split('.')[0]
                    imports.add(module)

            elif isinstance(node, ast.ImportFrom):
                if node.module:
                    module = node.module.split('.')[0]
                    imports.add(module)

    except Exception as e:
        print(f"Warning: Failed to parse {file_path}: {e}", file=sys.stderr)

    return imports


def analyze_python_project(project_path: Path) -> Dict:
    """分析 Python 项目的依赖"""
    all_imports = set()
    file_count = 0

    # 遍历所有 Python 文件
    for py_file in project_path.rglob("*.py"):
        # 跳过虚拟环境和 node_modules
        if any(part in py_file.parts for part in ['venv', 'env', '.venv', 'node_modules', '__pycache__']):
            continue

        file_count += 1
        imports = extract_python_imports(py_file)
        all_imports.update(imports)

    # 读取声明的依赖
    declared_deps = set()

    requirements_txt = project_path / "requirements.txt"
    if requirements_txt.exists():
        for line in requirements_txt.read_text(encoding='utf-8').split('\n'):
            line = line.strip()
            if line and not line.startswith('#'):
                # 去掉版本号
                dep = line.split('==')[0].split('>=')[0].split('~=')[0].strip()
                declared_deps.add(dep)

    pyproject_toml = project_path / "pyproject.toml"
    if pyproject_toml.exists():
        content = pyproject_toml.read_text(encoding='utf-8')
        # 简单提取（不使用 toml 库）
        in_deps = False
        for line in content.split('\n'):
            if '[tool.poetry.dependencies]' in line or '[project.dependencies]' in line:
                in_deps = True
                continue
            if in_deps and line.strip().startswith('['):
                break
            if in_deps and '=' in line:
                dep = line.split('=')[0].strip().strip('"')
                if dep != 'python':
                    declared_deps.add(dep)

    # Python 标准库（部分常见的）
    stdlib = {
        'os', 'sys', 'json', 'time', 'datetime', 'collections', 'itertools',
        'functools', 'pathlib', 're', 'math', 'random', 'typing', 'abc',
        'asyncio', 'logging', 'unittest', 'argparse', 'subprocess', 'io',
        'copy', 'pickle', 'sqlite3', 'http', 'urllib', 'email', 'uuid'
    }

    # 区分第三方包和标准库
    third_party = all_imports - stdlib

    return {
        "language": "Python",
        "files_analyzed": file_count,
        "total_imports": len(all_imports),
        "third_party_imports": list(sorted(third_party)),
        "declared_dependencies": list(sorted(declared_deps)),
        "undeclared_usage": list(sorted(third_party - declared_deps)),
        "unused_dependencies": list(sorted(declared_deps - third_party))
    }


def analyze_javascript_project(project_path: Path) -> Dict:
    """分析 JavaScript 项目的依赖"""
    package_json = project_path / "package.json"

    if not package_json.exists():
        return None

    try:
        import json as json_module
        content = json_module.loads(package_json.read_text(encoding='utf-8'))

        dependencies = content.get("dependencies", {})
        dev_dependencies = content.get("devDependencies", {})

        return {
            "language": "JavaScript/TypeScript",
            "dependencies": list(dependencies.keys()),
            "dev_dependencies": list(dev_dependencies.keys()),
            "total_dependencies": len(dependencies) + len(dev_dependencies)
        }

    except Exception as e:
        print(f"Error parsing package.json: {e}", file=sys.stderr)
        return None


def main():
    # 获取项目路径
    if len(sys.argv) > 1:
        project_path = Path(sys.argv[1])
    else:
        project_path = Path.cwd()

    if not project_path.exists():
        print(f"Error: Path {project_path} does not exist", file=sys.stderr)
        sys.exit(1)

    results = []

    # 分析 Python
    python_result = analyze_python_project(project_path)
    if python_result["files_analyzed"] > 0:
        results.append(python_result)

    # 分析 JavaScript
    js_result = analyze_javascript_project(project_path)
    if js_result:
        results.append(js_result)

    # 输出结果
    output = {
        "project_path": str(project_path),
        "analyses": results
    }

    print(json.dumps(output, indent=2, ensure_ascii=False))


if __name__ == "__main__":
    main()
项目理解skill-v1 2025-11-12 10:27:56 +08:00			`#!/usr/bin/env python3`
			`"""`
			`依赖提取脚本`

			`用法: python extract_dependencies.py [项目路径]`

			`输出: JSON 格式的依赖信息`
			`"""`

			`import ast`
			`import json`
			`import sys`
			`from pathlib import Path`
			`from typing import Dict, List, Set`


			`def extract_python_imports(file_path: Path) -> Set[str]:`
			`"""从 Python 文件提取 import"""`
			`imports = set()`

			`try:`
			`with open(file_path, 'r', encoding='utf-8') as f:`
			`tree = ast.parse(f.read(), filename=str(file_path))`

			`for node in ast.walk(tree):`
			`if isinstance(node, ast.Import):`
			`for alias in node.names:`
			`# 只保留顶层包名`
			`module = alias.name.split('.')[0]`
			`imports.add(module)`

			`elif isinstance(node, ast.ImportFrom):`
			`if node.module:`
			`module = node.module.split('.')[0]`
			`imports.add(module)`

			`except Exception as e:`
			`print(f"Warning: Failed to parse {file_path}: {e}", file=sys.stderr)`

			`return imports`


			`def analyze_python_project(project_path: Path) -> Dict:`
			`"""分析 Python 项目的依赖"""`
			`all_imports = set()`
			`file_count = 0`

			`# 遍历所有 Python 文件`
			`for py_file in project_path.rglob("*.py"):`
			`# 跳过虚拟环境和 node_modules`
			`if any(part in py_file.parts for part in ['venv', 'env', '.venv', 'node_modules', '__pycache__']):`
			`continue`

			`file_count += 1`
			`imports = extract_python_imports(py_file)`
			`all_imports.update(imports)`

			`# 读取声明的依赖`
			`declared_deps = set()`

			`requirements_txt = project_path / "requirements.txt"`
			`if requirements_txt.exists():`
			`for line in requirements_txt.read_text(encoding='utf-8').split('\n'):`
			`line = line.strip()`
			`if line and not line.startswith('#'):`
			`# 去掉版本号`
			`dep = line.split('==')[0].split('>=')[0].split('~=')[0].strip()`
			`declared_deps.add(dep)`

			`pyproject_toml = project_path / "pyproject.toml"`
			`if pyproject_toml.exists():`
			`content = pyproject_toml.read_text(encoding='utf-8')`
			`# 简单提取（不使用 toml 库）`
			`in_deps = False`
			`for line in content.split('\n'):`
			`if '[tool.poetry.dependencies]' in line or '[project.dependencies]' in line:`
			`in_deps = True`
			`continue`
			`if in_deps and line.strip().startswith('['):`
			`break`
			`if in_deps and '=' in line:`
			`dep = line.split('=')[0].strip().strip('"')`
			`if dep != 'python':`
			`declared_deps.add(dep)`

			`# Python 标准库（部分常见的）`
			`stdlib = {`
			`'os', 'sys', 'json', 'time', 'datetime', 'collections', 'itertools',`
			`'functools', 'pathlib', 're', 'math', 'random', 'typing', 'abc',`
			`'asyncio', 'logging', 'unittest', 'argparse', 'subprocess', 'io',`
			`'copy', 'pickle', 'sqlite3', 'http', 'urllib', 'email', 'uuid'`
			`}`

			`# 区分第三方包和标准库`
			`third_party = all_imports - stdlib`

			`return {`
			`"language": "Python",`
			`"files_analyzed": file_count,`
			`"total_imports": len(all_imports),`
			`"third_party_imports": list(sorted(third_party)),`
			`"declared_dependencies": list(sorted(declared_deps)),`
			`"undeclared_usage": list(sorted(third_party - declared_deps)),`
			`"unused_dependencies": list(sorted(declared_deps - third_party))`
			`}`


			`def analyze_javascript_project(project_path: Path) -> Dict:`
			`"""分析 JavaScript 项目的依赖"""`
			`package_json = project_path / "package.json"`

			`if not package_json.exists():`
			`return None`

			`try:`
			`import json as json_module`
			`content = json_module.loads(package_json.read_text(encoding='utf-8'))`

			`dependencies = content.get("dependencies", {})`
			`dev_dependencies = content.get("devDependencies", {})`

			`return {`
			`"language": "JavaScript/TypeScript",`
			`"dependencies": list(dependencies.keys()),`
			`"dev_dependencies": list(dev_dependencies.keys()),`
			`"total_dependencies": len(dependencies) + len(dev_dependencies)`
			`}`

			`except Exception as e:`
			`print(f"Error parsing package.json: {e}", file=sys.stderr)`
			`return None`


			`def main():`
			`# 获取项目路径`
			`if len(sys.argv) > 1:`
			`project_path = Path(sys.argv[1])`
			`else:`
			`project_path = Path.cwd()`

			`if not project_path.exists():`
			`print(f"Error: Path {project_path} does not exist", file=sys.stderr)`
			`sys.exit(1)`

			`results = []`

			`# 分析 Python`
			`python_result = analyze_python_project(project_path)`
			`if python_result["files_analyzed"] > 0:`
			`results.append(python_result)`

			`# 分析 JavaScript`
			`js_result = analyze_javascript_project(project_path)`
			`if js_result:`
			`results.append(js_result)`

			`# 输出结果`
			`output = {`
			`"project_path": str(project_path),`
			`"analyses": results`
			`}`

			`print(json.dumps(output, indent=2, ensure_ascii=False))`


			`if __name__ == "__main__":`
			`main()`