diff --git a/.github/workflows/measure-disk-usage.yml b/.github/workflows/measure-disk-usage.yml new file mode 100644 index 0000000000000..35c41c3fcb775 --- /dev/null +++ b/.github/workflows/measure-disk-usage.yml @@ -0,0 +1,125 @@ +name: Measure Disk Usage + +on: + push: + branches: + - master +env: + PYTHON_VERSION: "3.12" + +jobs: + measure-disk-usage: + runs-on: ubuntu-22.04 + steps: + - uses: actions/checkout@11bd71901bbe5b1630ceea73d27597364c9af683 # v4.2.2 + with: + fetch-depth: 0 + - name: Set up Python ${{ env.PYTHON_VERSION }} + uses: actions/setup-python@a26af69be951a213d495a4c3e4e4022e16d87065 # v5.6.0 + with: + python-version: ${{ env.PYTHON_VERSION }} + - name: Install ddev + run: | + pip install -e ./datadog_checks_dev[cli] + pip install -e ./ddev + + - name: Configure ddev + run: | + ddev config set repos.core . + ddev config set repo core + - name: Measure disk usage (uncompressed) + run: | + mkdir -p status_visualizations + ddev size status --csv > size-uncompressed.csv + ddev size status --save_to_png_path status_visualizations/uncompressed.png > size-uncompressed.txt + cat size-uncompressed.txt + echo "# Size (uncompressed)" >> $GITHUB_STEP_SUMMARY + echo '```' >> $GITHUB_STEP_SUMMARY + cat size-uncompressed.txt >> $GITHUB_STEP_SUMMARY + echo '```' >> $GITHUB_STEP_SUMMARY + + - name: Measure disk usage (compressed) + run: | + mkdir -p status_visualizations + ddev size status --csv --compressed > size-compressed.csv + ddev size status --compressed --save_to_png_path status_visualizations/compressed.png > size-compressed.txt + cat size-compressed.txt + echo "# Size (compressed)" >> $GITHUB_STEP_SUMMARY + echo '```' >> $GITHUB_STEP_SUMMARY + cat size-compressed.txt >> $GITHUB_STEP_SUMMARY + echo '```' >> $GITHUB_STEP_SUMMARY + + + - name: Measure disk usage differences from last commit (uncompressed) + if: false # Disabled for now: size difference can be misleading due to dependencies not being built in the same PR + run: | + mkdir -p diff_visualizations + BEFORE=$(git rev-parse HEAD^) + AFTER=$(git rev-parse HEAD) + ddev size diff $BEFORE $AFTER --csv > diff-uncompressed.csv + ddev size diff $BEFORE $AFTER --save_to_png_path diff_visualizations/diff-uncompressed-linux.png > diff-uncompressed.txt + cat diff-uncompressed.txt + echo "# Size diff (uncompressed)" >> $GITHUB_STEP_SUMMARY + echo '```' >> $GITHUB_STEP_SUMMARY + cat diff-uncompressed.txt >> $GITHUB_STEP_SUMMARY + echo '```' >> $GITHUB_STEP_SUMMARY + + - name: Measure disk usage differences from last commit (compressed) + if: false # Disabled for now: size difference can be misleading due to dependencies not being built in the same PR + run: | + mkdir -p diff_visualizations + BEFORE=$(git rev-parse HEAD^) + AFTER=$(git rev-parse HEAD) + ddev size diff $BEFORE $AFTER --compressed --csv > diff-compressed.csv + ddev size diff $BEFORE $AFTER --compressed --save_to_png_path diff_visualizations/diff-compressed-linux.png > diff-compressed.txt + cat diff-compressed.txt + echo "# Size diff (compressed)" >> $GITHUB_STEP_SUMMARY + echo '```' >> $GITHUB_STEP_SUMMARY + cat diff-compressed.txt >> $GITHUB_STEP_SUMMARY + echo '```' >> $GITHUB_STEP_SUMMARY + + - name: Upload file sizes (uncompressed) + uses: actions/upload-artifact@ea165f8d65b6e75b540449e92b4886f43607fa02 # v4.6.2 + with: + name: size-uncompressed.csv + path: size-uncompressed.csv + if-no-files-found: error + + - name: Upload file sizes (compressed) + uses: actions/upload-artifact@ea165f8d65b6e75b540449e92b4886f43607fa02 # v4.6.2 + with: + name: size-compressed.csv + path: size-compressed.csv + if-no-files-found: error + + - name: Upload file sizes diff (uncompressed) + if: false # Disabled for now: size difference can be misleading due to dependencies not being built in the same PR + uses: actions/upload-artifact@ea165f8d65b6e75b540449e92b4886f43607fa02 # v4.6.2 + with: + name: diff-uncompressed.csv + path: diff-uncompressed.csv + if-no-files-found: error + + - name: Upload file sizes diff (compressed) + if: false # Disabled for now: size difference can be misleading due to dependencies not being built in the same PR + uses: actions/upload-artifact@ea165f8d65b6e75b540449e92b4886f43607fa02 # v4.6.2 + with: + name: diff-compressed.csv + path: diff-compressed.csv + if-no-files-found: error + + - name: Upload status PNGs + uses: actions/upload-artifact@ea165f8d65b6e75b540449e92b4886f43607fa02 # v4.6.2 + with: + name: size-visuals + path: status_visualizations/ + if-no-files-found: error + + - name: Upload diff PNGs + if: false # Disabled for now: size difference can be misleading due to dependencies not being built in the same PR + uses: actions/upload-artifact@ea165f8d65b6e75b540449e92b4886f43607fa02 # v4.6.2 + with: + name: diff-visuals + path: diff_visualizations/ + if-no-files-found: error + diff --git a/.github/workflows/slapr.yml b/.github/workflows/slapr.yml index 98ee0448e5614..dbec90dedbd4c 100644 --- a/.github/workflows/slapr.yml +++ b/.github/workflows/slapr.yml @@ -27,4 +27,4 @@ jobs: SLAPR_EMOJI_APPROVED: "approved2" SLAPR_EMOJI_CHANGES_REQUESTED: "changes_requested" SLAPR_EMOJI_MERGED: "merged" - SLAPR_EMOJI_CLOSED: "closed" + SLAPR_EMOJI_CLOSED: "closed" \ No newline at end of file diff --git a/ddev/changelog.d/20128.added b/ddev/changelog.d/20128.added new file mode 100644 index 0000000000000..6b95fbdfccc6f --- /dev/null +++ b/ddev/changelog.d/20128.added @@ -0,0 +1,4 @@ +Added new commands to track and analyze size changes in integrations and dependencies: +- **`ddev size status`**: Shows current sizes of all modules. +- **`ddev size diff [COMMIT_BEFORE] [COMMIT_AFTER]`**: Compares size changes between two commits. +- **`ddev size timeline {integration | dependency} [INTEGRATION_NAME/DEPENDENCY_NAME]`**: Visualizes the size evolution of a module over time. diff --git a/ddev/pyproject.toml b/ddev/pyproject.toml index 7218fa7a26a98..78378e34e80de 100644 --- a/ddev/pyproject.toml +++ b/ddev/pyproject.toml @@ -40,6 +40,9 @@ dependencies = [ "tomli-w", "tomlkit", "tqdm", + "requests", + "matplotlib", + "squarify" ] dynamic = ["version"] diff --git a/ddev/src/ddev/cli/__init__.py b/ddev/src/ddev/cli/__init__.py index 302f859cd9f54..e16dc80db4146 100644 --- a/ddev/src/ddev/cli/__init__.py +++ b/ddev/src/ddev/cli/__init__.py @@ -18,6 +18,7 @@ from ddev.cli.env import env from ddev.cli.meta import meta from ddev.cli.release import release +from ddev.cli.size import size from ddev.cli.status import status from ddev.cli.test import test from ddev.cli.validate import validate @@ -149,6 +150,7 @@ def ddev( ddev.add_command(status) ddev.add_command(test) ddev.add_command(validate) +ddev.add_command(size) __management_command = os.environ.get('PYAPP_COMMAND_NAME', '') if __management_command: diff --git a/ddev/src/ddev/cli/size/__init__.py b/ddev/src/ddev/cli/size/__init__.py new file mode 100644 index 0000000000000..63ddba4fa4af8 --- /dev/null +++ b/ddev/src/ddev/cli/size/__init__.py @@ -0,0 +1,30 @@ +# (C) Datadog, Inc. 2022-present +# All rights reserved +# Licensed under a 3-clause BSD style license (see LICENSE) + +import click + +from ddev.cli.size.diff import diff +from ddev.cli.size.status import status +from ddev.cli.size.timeline import timeline + + +@click.group() +def size(): + """ + Analyze the download size of integrations and dependencies in various modes. + + This command provides tools to inspect the current status, compare commits and monitor size changes of modules + across different commits, platforms, and Python versions. + + """ + + pass + + +size.add_command(status) +size.add_command(diff) +size.add_command(timeline) + +if __name__ == "__main__": + size() diff --git a/ddev/src/ddev/cli/size/common.py b/ddev/src/ddev/cli/size/common.py new file mode 100644 index 0000000000000..74408f50115e9 --- /dev/null +++ b/ddev/src/ddev/cli/size/common.py @@ -0,0 +1,757 @@ +# (C) Datadog, Inc. 2022-present +# All rights reserved +# Licensed under a 3-clause BSD style license (see LICENSE) +import json +import os +import re +import shutil +import subprocess +import tempfile +import zipfile +import zlib +from datetime import date +from pathlib import Path +from types import TracebackType +from typing import Literal, Optional, Type, TypedDict + +import matplotlib.pyplot as plt +import requests +import squarify +from matplotlib.patches import Patch + +from ddev.cli.application import Application + + +class FileDataEntry(TypedDict): + Name: str # Integration/Dependency name + Version: str # Version of the Integration/Dependency + Size_Bytes: int # Size in bytes + Size: str # Human-readable size + Type: str # Integration/Dependency + + +class FileDataEntryPlatformVersion(FileDataEntry): + Platform: str # Target platform (e.g. linux-aarch64) + Python_Version: str # Target Python version (e.g. 3.12) + + +class CommitEntry(TypedDict): + Size_Bytes: int # Total size in bytes at commit + Version: str # Version of the Integration/Dependency at commit + Date: date # Commit date + Author: str # Commit author + Commit_Message: str # Commit message + Commit_SHA: str # Commit SHA hash + + +class CommitEntryWithDelta(CommitEntry): + Delta_Bytes: int # Size change in bytes compared to previous commit + Delta: str # Human-readable size change + + +class CommitEntryPlatformWithDelta(CommitEntryWithDelta): + Platform: str # Target platform (e.g. linux-aarch64) + + +class CLIParameters(TypedDict): + app: Application + platform: str + version: str + compressed: bool + csv: bool + markdown: bool + json: bool + save_to_png_path: Optional[str] + show_gui: bool + + +class CLIParametersTimeline(TypedDict): + app: Application + module: str + threshold: Optional[int] + compressed: bool + csv: bool + markdown: bool + json: bool + save_to_png_path: Optional[str] + show_gui: bool + + +class InitialParametersTimelineIntegration(CLIParametersTimeline): + type: Literal["integration"] + first_commit: str + platform: None + + +class InitialParametersTimelineDependency(CLIParametersTimeline): + type: Literal["dependency"] + first_commit: None + platform: str + + +def get_valid_platforms(repo_path: Path | str) -> set[str]: + """ + Extracts the platforms we support from the .deps/resolved file names. + """ + resolved_path = os.path.join(repo_path, os.path.join(repo_path, ".deps", "resolved")) + platforms = [] + for file in os.listdir(resolved_path): + platforms.append("_".join(file.split("_")[:-1])) + return set(platforms) + + +def get_valid_versions(repo_path: Path | str) -> set[str]: + """ + Extracts the Python versions we support from the .deps/resolved file names. + """ + resolved_path = os.path.join(repo_path, os.path.join(repo_path, ".deps", "resolved")) + versions = [] + for file in os.listdir(resolved_path): + match = re.search(r"\d+\.\d+", file) + if match: + versions.append(match.group()) + return set(versions) + + +def is_correct_dependency(platform: str, version: str, name: str) -> bool: + return platform in name and version in name + + +def is_valid_integration(path: str, included_folder: str, ignored_files: set[str], git_ignore: list[str]) -> bool: + # It is not an integration + if path.startswith("."): + return False + # It is part of an integration and it is not in the datadog_checks folder + elif included_folder not in path: + return False + # It is an irrelevant file + elif any(ignore in path for ignore in ignored_files): + return False + # This file is contained in .gitignore + elif any(ignore in path for ignore in git_ignore): + return False + else: + return True + + +def get_gitignore_files(repo_path: str | Path) -> list[str]: + gitignore_path = os.path.join(repo_path, ".gitignore") + with open(gitignore_path, "r", encoding="utf-8") as file: + gitignore_content = file.read() + ignored_patterns = [ + line.strip() for line in gitignore_content.splitlines() if line.strip() and not line.startswith("#") + ] + return ignored_patterns + + +def convert_to_human_readable_size(size_bytes: float) -> str: + for unit in [" B", " KB", " MB", " GB"]: + if abs(size_bytes) < 1024: + return str(round(size_bytes, 2)) + unit + size_bytes /= 1024 + return str(round(size_bytes, 2)) + " TB" + + +def compress(file_path: str) -> int: + compressor = zlib.compressobj() + compressed_size = 0 + chunk_size = 8192 + with open(file_path, "rb") as f: + while chunk := f.read(chunk_size): + compressed_chunk = compressor.compress(chunk) + compressed_size += len(compressed_chunk) + compressed_size += len(compressor.flush()) + return compressed_size + + +def get_files(repo_path: str | Path, compressed: bool) -> list[FileDataEntry]: + """ + Calculates integration file sizes and versions from a repository. + """ + ignored_files = {"datadog_checks_dev", "datadog_checks_tests_helper"} + git_ignore = get_gitignore_files(repo_path) + included_folder = "datadog_checks" + os.sep + + integration_sizes: dict[str, int] = {} + integration_versions: dict[str, str] = {} + + for root, _, files in os.walk(repo_path): + for file in files: + file_path = os.path.join(root, file) + relative_path = os.path.relpath(file_path, repo_path) + + if not is_valid_integration(relative_path, included_folder, ignored_files, git_ignore): + continue + path = Path(relative_path) + parts = path.parts + + integration_name = parts[0] + + size = compress(file_path) if compressed else os.path.getsize(file_path) + integration_sizes[integration_name] = integration_sizes.get(integration_name, 0) + size + + if integration_name not in integration_versions and file == "__about__.py": + version = extract_version_from_about_py(file_path) + integration_versions[integration_name] = version + + return [ + { + "Name": name, + "Version": integration_versions.get(name, ""), + "Size_Bytes": size, + "Size": convert_to_human_readable_size(size), + "Type": "Integration", + } + for name, size in integration_sizes.items() + ] + + +def extract_version_from_about_py(path: str) -> str: + """ + Extracts the __version__ string from a given __about__.py file. + """ + try: + with open(path, "r", encoding="utf-8") as f: + for line in f: + line = line.strip() + if line.startswith("__version__"): + return line.split("=")[1].strip().strip("'\"") + except Exception: + pass + return "" + + +def get_dependencies(repo_path: str | Path, platform: str, version: str, compressed: bool) -> list[FileDataEntry]: + """ + Gets the list of dependencies for a given platform and Python version and returns a FileDataEntry that includes: + Name, Version, Size_Bytes, Size, and Type. + """ + resolved_path = os.path.join(repo_path, os.path.join(repo_path, ".deps", "resolved")) + + for filename in os.listdir(resolved_path): + file_path = os.path.join(resolved_path, filename) + + if os.path.isfile(file_path) and is_correct_dependency(platform, version, filename): + deps, download_urls, versions = get_dependencies_list(file_path) + return get_dependencies_sizes(deps, download_urls, versions, compressed) + return [] + + +def get_dependencies_list(file_path: str) -> tuple[list[str], list[str], list[str]]: + """ + Parses a dependency file and extracts the dependency names, download URLs, and versions. + """ + download_urls = [] + deps = [] + versions = [] + with open(file_path, "r", encoding="utf-8") as file: + file_content = file.read() + for line in file_content.splitlines(): + match = re.search(r"([\w\-\d\.]+) @ (https?://[^\s#]+)", line) + if not match: + raise WrongDependencyFormat("The dependency format 'name @ link' is no longer supported.") + name = match.group(1) + url = match.group(2) + + deps.append(name) + download_urls.append(url) + version_match = re.search(rf"{re.escape(name)}/[^/]+?-([0-9]+(?:\.[0-9]+)*)-", url) + if version_match: + versions.append(version_match.group(1)) + else: + versions.append("") + + return deps, download_urls, versions + + +def get_dependencies_sizes( + deps: list[str], download_urls: list[str], versions: list[str], compressed: bool +) -> list[FileDataEntry]: + """ + Calculates the sizes of dependencies, either compressed or uncompressed. + + Args: + deps: List of dependency names. + download_urls: Corresponding download URLs for the dependencies. + versions: Corresponding version strings for the dependencies. + compressed: If True, use the Content-Length from the HTTP headers. + If False, download, extract, and compute actual uncompressed size. + """ + file_data: list[FileDataEntry] = [] + for dep, url, version in zip(deps, download_urls, versions, strict=False): + if compressed: + response = requests.head(url) + response.raise_for_status() + size_str = response.headers.get("Content-Length") + if size_str is None: + raise ValueError(f"Missing size for {dep}") + size = int(size_str) + + else: + with requests.get(url, stream=True) as response: + response.raise_for_status() + wheel_data = response.content + + with tempfile.TemporaryDirectory() as tmpdir: + wheel_path = Path(tmpdir) / "package.whl" + with open(wheel_path, "wb") as f: + f.write(wheel_data) + extract_path = Path(tmpdir) / "extracted" + with zipfile.ZipFile(wheel_path, "r") as zip_ref: + zip_ref.extractall(extract_path) + + size = 0 + for dirpath, _, filenames in os.walk(extract_path): + for name in filenames: + file_path = os.path.join(dirpath, name) + size += os.path.getsize(file_path) + file_data.append( + { + "Name": str(dep), + "Version": version, + "Size_Bytes": int(size), + "Size": convert_to_human_readable_size(size), + "Type": "Dependency", + } + ) + + return file_data + + +def format_modules( + modules: list[FileDataEntry], + platform: str, + py_version: str, + multiple_plats_and_vers: bool, +) -> list[FileDataEntryPlatformVersion] | list[FileDataEntry]: + """ + Formats the modules list, adding platform and Python version information. + + If the modules list is empty, returns a default empty entry. + """ + if modules == [] and not multiple_plats_and_vers: + empty_entry: FileDataEntry = { + "Name": "", + "Version": "", + "Size_Bytes": 0, + "Size": "", + "Type": "", + } + return [empty_entry] + elif modules == []: + empty_entry_with_platform: FileDataEntryPlatformVersion = { + "Name": "", + "Version": "", + "Size_Bytes": 0, + "Size": "", + "Type": "", + "Platform": "", + "Python_Version": "", + } + return [empty_entry_with_platform] + elif multiple_plats_and_vers: + new_modules: list[FileDataEntryPlatformVersion] = [ + {**entry, "Platform": platform, "Python_Version": py_version} for entry in modules + ] + return new_modules + else: + return modules + + +def print_json( + app: Application, + modules: ( + list[FileDataEntry] + | list[FileDataEntryPlatformVersion] + | list[CommitEntryWithDelta] + | list[CommitEntryPlatformWithDelta] + ), +) -> None: + printed_yet = False + app.display("[") + for row in modules: + if any(str(value).strip() not in ("", "0", "0001-01-01") for value in row.values()): + if printed_yet: + app.display(",") + app.display(json.dumps(row, default=str)) + printed_yet = True + + app.display("]") + + +def print_csv( + app: Application, + modules: ( + list[FileDataEntry] + | list[FileDataEntryPlatformVersion] + | list[CommitEntryWithDelta] + | list[CommitEntryPlatformWithDelta] + ), +) -> None: + headers = [k for k in modules[0].keys() if k not in ["Size", "Delta"]] + app.display(",".join(headers)) + + for row in modules: + if any(str(value).strip() not in ("", "0", "0001-01-01") for value in row.values()): + app.display(",".join(format(str(row.get(h, ""))) for h in headers)) + + +def format(s: str) -> str: + """ + Wraps the string in double quotes if it contains a comma, for safe CSV formatting. + """ + return f'"{s}"' if "," in s else s + + +def print_markdown( + app: Application, + title: str, + modules: ( + list[FileDataEntry] + | list[FileDataEntryPlatformVersion] + | list[CommitEntryWithDelta] + | list[CommitEntryPlatformWithDelta] + ), +) -> None: + if all(str(value).strip() in ("", "0", "0001-01-01") for value in modules[0].values()): + return # skip empty table + + headers = [k for k in modules[0].keys() if "Bytes" not in k] + + lines = [] + lines.append(f"### {title}") + lines.append("| " + " | ".join(headers) + " |") + lines.append("| " + " | ".join("---" for _ in headers) + " |") + for row in modules: + lines.append("| " + " | ".join(str(row.get(h, "")) for h in headers) + " |") + + markdown = "\n".join(lines) + app.display_markdown(markdown) + + +def print_table( + app: Application, + mode: str, + modules: ( + list[FileDataEntry] + | list[FileDataEntryPlatformVersion] + | list[CommitEntryWithDelta] + | list[CommitEntryPlatformWithDelta] + ), +) -> None: + columns = [col for col in modules[0].keys() if "Bytes" not in col] + modules_table: dict[str, dict[int, str]] = {col: {} for col in columns} + for i, row in enumerate(modules): + if any(str(value).strip() not in ("", "0", "0001-01-01") for value in row.values()): + for key in columns: + modules_table[key][i] = str(row.get(key, "")) + + app.display_table(mode, modules_table) + + +def plot_treemap( + modules: list[FileDataEntry] | list[FileDataEntryPlatformVersion], + title: str, + show: bool, + mode: Literal["status", "diff"] = "status", + path: Optional[str] = None, +) -> None: + if not any(str(value).strip() not in ("", "0") for value in modules[0].values()): + # table is empty + return + + # Initialize figure and axis + plt.figure(figsize=(12, 8)) + ax = plt.gca() + ax.set_axis_off() + + # Calculate the rectangles + if mode == "status": + rects, colors, legend_handles = plot_status_treemap(modules) + + if mode == "diff": + rects, colors, legend_handles = plot_diff_treemap(modules) + + draw_treemap_rects_with_labels(ax, rects, modules, colors) + + # Finalize layout and show/save plot + ax.set_xlim(0, 100) + ax.set_ylim(0, 100) + + plt.title(title, fontsize=16) + + plt.legend(handles=legend_handles, title="Type", loc="center left", bbox_to_anchor=(1.0, 0.5)) + plt.subplots_adjust(right=0.8) + plt.tight_layout() + + if show: + plt.show() + if path: + plt.savefig(path, bbox_inches="tight", format="png") + + +def plot_status_treemap( + modules: list[FileDataEntry] | list[FileDataEntryPlatformVersion], +) -> tuple[list[dict[str, float]], list[tuple[float, float, float, float]], list[Patch]]: + # Calculate the area of the rectangles + sizes = [mod["Size_Bytes"] for mod in modules] + norm_sizes = squarify.normalize_sizes(sizes, 100, 100) + rects = squarify.squarify(norm_sizes, 0, 0, 100, 100) + + # Define the colors for each type + cmap_int = plt.get_cmap("Purples") + cmap_dep = plt.get_cmap("Reds") + + # Assign colors based on type and normalized size + colors = [] + max_area = max(norm_sizes) or 1 + for mod, area in zip(modules, norm_sizes, strict=False): + intensity = scale_colors_treemap(area, max_area) + if mod["Type"] == "Integration": + colors.append(cmap_int(intensity)) + elif mod["Type"] == "Dependency": + colors.append(cmap_dep(intensity)) + else: + colors.append("#999999") + # Define the legend + legend_handles = [ + Patch(color=plt.get_cmap("Purples")(0.6), label="Integration"), + Patch(color=plt.get_cmap("Reds")(0.6), label="Dependency"), + ] + return rects, colors, legend_handles + + +def plot_diff_treemap( + modules: list[FileDataEntry] | list[FileDataEntryPlatformVersion], +) -> tuple[list[dict[str, float]], list[tuple[float, float, float, float]], list[Patch]]: + # Define the colors for each type + cmap_pos = plt.get_cmap("Oranges") + cmap_neg = plt.get_cmap("Blues") + + # Separate in negative and positive differences + positives = [mod for mod in modules if mod["Size_Bytes"] > 0] + negatives = [mod for mod in modules if mod["Size_Bytes"] < 0] + + sizes_pos = [mod["Size_Bytes"] for mod in positives] + sizes_neg = [abs(mod["Size_Bytes"]) for mod in negatives] + + sum_pos = sum(sizes_pos) + sum_neg = sum(sizes_neg) + + canvas_area = 50 * 100 + + # Determine dominant side and scale layout accordingly + if sum_pos >= sum_neg: + norm_sizes_pos = [s / sum_pos * canvas_area for s in sizes_pos] + norm_sizes_neg = [s / sum_pos * canvas_area for s in sizes_neg] + rects_neg = squarify.squarify(norm_sizes_neg, 0, 0, 50, 100) + rects_pos = squarify.squarify(norm_sizes_pos, 50, 0, 50, 100) + + else: + norm_sizes_neg = [s / sum_neg * canvas_area for s in sizes_neg] + norm_sizes_pos = [s / sum_neg * canvas_area for s in sizes_pos] + rects_neg = squarify.squarify(norm_sizes_neg, 0, 0, 50, 100) + rects_pos = squarify.squarify(norm_sizes_pos, 50, 0, 50, 100) + + # Merge layout and module lists + rects = rects_neg + rects_pos + modules = negatives + positives + + # Assign colors based on type and normalized size + colors = [] + max_area = max(norm_sizes_pos + norm_sizes_neg) or 1 + + for area in norm_sizes_neg: + intensity = scale_colors_treemap(area, max_area) + colors.append(cmap_neg(intensity)) + + for area in norm_sizes_pos: + intensity = scale_colors_treemap(area, max_area) + colors.append(cmap_pos(intensity)) + + legend_handles = [ + Patch(color=plt.get_cmap("Oranges")(0.7), label="Increase"), + Patch(color=plt.get_cmap("Blues")(0.7), label="Decrease"), + ] + + return rects, colors, legend_handles + + +def scale_colors_treemap(area: float, max_area: float) -> float: + vmin = 0.3 + vmax = 0.65 + return vmin + (area / max_area) * (vmax - vmin) + + +def draw_treemap_rects_with_labels( + ax: plt.Axes, + rects: list[dict], + modules: list[FileDataEntry] | list[FileDataEntryPlatformVersion], + colors: list[tuple[float, float, float, float]], +) -> None: + """ + Draw treemap rectangles with their assigned colors and optional text labels. + + Args: + ax: Matplotlib Axes to draw on. + rects: List of rectangle dicts from squarify, each with 'x', 'y', 'dx', 'dy'. + modules: List of modules associated with each rectangle (same order). + colors: List of colors for each module (same order). + """ + for rect, mod, color in zip(rects, modules, colors, strict=False): + x, y, dx, dy = rect["x"], rect["y"], rect["dx"], rect["dy"] + + # Draw the rectangle with a white border + ax.add_patch(plt.Rectangle((x, y), dx, dy, color=color, ec="white")) + + # Determine font size based on rectangle area + MIN_FONT_SIZE = 6 + MAX_FONT_SIZE = 12 + FONT_SIZE_SCALE = 0.4 + AVG_SIDE = (dx * dy) ** 0.5 # Geometric mean + font_size = max(MIN_FONT_SIZE, min(MAX_FONT_SIZE, AVG_SIDE * FONT_SIZE_SCALE)) + + # Determine the info for the labels + name = mod["Name"] + size_str = f"({mod['Size']})" + + # Estimate if there's enough space for text + CHAR_WIDTH_FACTOR = 0.1 # Width of each character relative to font size + CHAR_HEIGHT_FACTOR = 0.5 # Minimum height for readable text + + name_fits = (len(name) + 2) * font_size * CHAR_WIDTH_FACTOR < dx and dy > font_size * CHAR_HEIGHT_FACTOR + size_fits = (len(size_str) + 2) * font_size * CHAR_WIDTH_FACTOR < dx + both_fit = dy > font_size * CHAR_HEIGHT_FACTOR * 2 # Enough room for two lines + + # If the rectangle is too small, skip the label + if dx < 5 or dy < 5: + label = None + + # If the name doesn't fit, truncate it with "..." + elif not name_fits and dx > 5: + max_chars = int(dx / (font_size * CHAR_WIDTH_FACTOR)) - 2 + if max_chars >= 4: + name = name[: max_chars - 3] + "..." + name_fits = True + + # Build the label based on available space + if name_fits and size_fits and both_fit: + label = f"{name}\n{size_str}" # Two-line label + elif name_fits: + label = name + else: + label = None + + # Draw label centered inside the rectangle + if label: + ax.text( + x + dx / 2, + y + dy / 2, + label, + va="center", + ha="center", + fontsize=font_size, + color="black", + ) + + +class WrongDependencyFormat(Exception): + def __init__(self, mensaje: str) -> None: + super().__init__(mensaje) + + +class GitRepo: + """ + Clones the repo to a temp folder and deletes the folder on exit. + """ + + def __init__(self, url: Path | str) -> None: + self.url = url + self.repo_dir: str + + def __enter__(self): + self.repo_dir = tempfile.mkdtemp() + try: + self._run("git status") + except Exception: + # If it is not already a repo + self._run(f"git clone --quiet {self.url} {self.repo_dir}") + return self + + def _run(self, command: str) -> list[str]: + result = subprocess.run(command, shell=True, capture_output=True, text=True, check=True, cwd=self.repo_dir) + return result.stdout.strip().split("\n") + + def get_module_commits( + self, module_path: str, initial: Optional[str], final: Optional[str], time: Optional[str] + ) -> list[str]: + """ + Returns the list of commits (SHA) that modified a given module, filtered by time or commit range. + + Args: + module_path: Integration name or path to the .deps/resolved file (for dependencies). + initial: Optional initial commit hash. + final: Optional final commit hash. + time: Optional time filter (e.g. '2 weeks ago'). + + Returns: + List of commit SHAs (oldest to newest) + """ + self._run("git fetch origin --quiet") + self._run("git checkout origin/HEAD") + try: + if time: + return self._run(f'git log --since="{time}" --reverse --pretty=format:%H -- {module_path}') + elif not initial and not final: + return self._run(f"git log --reverse --pretty=format:%H -- {module_path}") + elif not final: + return self._run(f"git log --reverse --pretty=format:%H {initial}..HEAD -- {module_path}") + else: + try: + self._run(f"git merge-base --is-ancestor {initial} {final}") + except subprocess.CalledProcessError: + raise ValueError(f"Commit {initial} does not come before {final}") + return self._run(f"git log --reverse --pretty=format:%H {initial}..{final} -- {module_path}") + except subprocess.CalledProcessError as e: + raise ValueError( + "Failed to retrieve commit history.\n" + "Make sure that the provided commits are correct and that your local repository is up to" + "date with the remote" + ) from e + + def checkout_commit(self, commit: str) -> None: + try: + self._run(f"git fetch --quiet --depth 1 origin {commit}") + except subprocess.CalledProcessError as e: + if e.returncode == 128: + raise ValueError( + f"Failed to fetch commit '{commit}'.\n" + f"Make sure the provided commit hash is correct and that your local repository " + "is up to date with the remote\n" + ) from e + self._run(f"git checkout --quiet {commit}") + + def sparse_checkout_commit(self, commit_sha: str, module: str) -> None: + self._run("git sparse-checkout init --cone") + self._run(f"git sparse-checkout set {module}") + self._run(f"git checkout {commit_sha}") + + def get_commit_metadata(self, commit: str) -> tuple[str, str, str]: + result = self._run(f'git log -1 --date=format:"%b %d %Y" --pretty=format:"%ad\n%an\n%s" {commit}') + date, author, message = result + return date, author, message + + def get_creation_commit_module(self, integration: str) -> str: + """ + Returns the first commit (SHA) where the given integration was introduced. + """ + return self._run(f'git log --reverse --format="%H" -- {integration}')[0] + + def __exit__( + self, + exception_type: Optional[Type[BaseException]], + exception_value: Optional[BaseException], + exception_traceback: Optional[TracebackType], + ) -> None: + if self.repo_dir and os.path.exists(self.repo_dir): + shutil.rmtree(self.repo_dir) diff --git a/ddev/src/ddev/cli/size/diff.py b/ddev/src/ddev/cli/size/diff.py new file mode 100644 index 0000000000000..479e89e78742a --- /dev/null +++ b/ddev/src/ddev/cli/size/diff.py @@ -0,0 +1,352 @@ +# (C) Datadog, Inc. 2022-present +# All rights reserved +# Licensed under a 3-clause BSD style license (see LICENSE) + +import os +from datetime import datetime +from typing import Literal, Optional, overload + +import click +from rich.console import Console +from rich.progress import BarColumn, Progress, SpinnerColumn, TextColumn, TimeElapsedColumn + +from ddev.cli.application import Application + +from .common import ( + CLIParameters, + FileDataEntry, + FileDataEntryPlatformVersion, + GitRepo, + convert_to_human_readable_size, + format_modules, + get_dependencies, + get_files, + get_valid_platforms, + get_valid_versions, + plot_treemap, + print_csv, + print_json, + print_markdown, + print_table, +) + +console = Console(stderr=True) +MINIMUM_DATE = datetime.strptime("Sep 17 2024", "%b %d %Y").date() +MINIMUM_LENGTH_COMMIT = 7 + + +@click.command() +@click.argument("first_commit") +@click.argument("second_commit") +@click.option( + "--platform", help="Target platform (e.g. linux-aarch64). If not specified, all platforms will be analyzed" +) +@click.option("--python", "version", help="Python version (e.g 3.12). If not specified, all versions will be analyzed") +@click.option("--compressed", is_flag=True, help="Measure compressed size") +@click.option("--csv", is_flag=True, help="Output in CSV format") +@click.option("--markdown", is_flag=True, help="Output in Markdown format") +@click.option("--json", is_flag=True, help="Output in JSON format") +@click.option("--save_to_png_path", help="Path to save the treemap as PNG") +@click.option( + "--show_gui", + is_flag=True, + help="Display a pop-up window with a treemap showing size differences between the two commits.", +) +@click.pass_obj +def diff( + app: Application, + first_commit: str, + second_commit: str, + platform: Optional[str], + version: Optional[str], + compressed: bool, + csv: bool, + markdown: bool, + json: bool, + save_to_png_path: str, + show_gui: bool, +) -> None: + """ + Compare the size of integrations and dependencies between two commits. + """ + with Progress( + SpinnerColumn(), + TextColumn("[progress.description]{task.description}"), + BarColumn(), + TimeElapsedColumn(), + transient=True, + console=console, + ) as progress: + task = progress.add_task("[cyan]Calculating differences...", total=None) + if sum([csv, markdown, json]) > 1: + raise click.BadParameter("Only one output format can be selected: --csv, --markdown, or --json") + if len(first_commit) < MINIMUM_LENGTH_COMMIT and len(second_commit) < MINIMUM_LENGTH_COMMIT: + raise click.BadParameter(f"Commit hashes must be at least {MINIMUM_LENGTH_COMMIT} characters long") + elif len(first_commit) < MINIMUM_LENGTH_COMMIT: + raise click.BadParameter( + f"First commit hash must be at least {MINIMUM_LENGTH_COMMIT} characters long.", + param_hint="first_commit", + ) + elif len(second_commit) < MINIMUM_LENGTH_COMMIT: + raise click.BadParameter( + f"Second commit hash must be at least {MINIMUM_LENGTH_COMMIT} characters long.", + param_hint="second_commit", + ) + if first_commit == second_commit: + raise click.BadParameter("Commit hashes must be different") + + repo_url = app.repo.path + + with GitRepo(repo_url) as gitRepo: + try: + date_str, _, _ = gitRepo.get_commit_metadata(first_commit) + date = datetime.strptime(date_str, "%b %d %Y").date() + if date < MINIMUM_DATE: + raise ValueError(f"First commit must be after {MINIMUM_DATE.strftime('%b %d %Y')} ") + valid_platforms = get_valid_platforms(gitRepo.repo_dir) + valid_versions = get_valid_versions(gitRepo.repo_dir) + if platform and platform not in valid_platforms: + raise ValueError(f"Invalid platform: {platform}") + elif version and version not in valid_versions: + raise ValueError(f"Invalid version: {version}") + if platform is None or version is None: + modules_plat_ver: list[FileDataEntryPlatformVersion] = [] + platforms = valid_platforms if platform is None else [platform] + versions = valid_versions if version is None else [version] + progress.remove_task(task) + combinations = [(p, v) for p in platforms for v in versions] + for plat, ver in combinations: + path = None + if save_to_png_path: + base, ext = os.path.splitext(save_to_png_path) + path = f"{base}_{plat}_{ver}{ext}" + parameters: CLIParameters = { + "app": app, + "platform": plat, + "version": ver, + "compressed": compressed, + "csv": csv, + "markdown": markdown, + "json": json, + "save_to_png_path": path, + "show_gui": show_gui, + } + multiple_plats_and_vers: Literal[True] = True + modules_plat_ver.extend( + diff_mode( + gitRepo, + first_commit, + second_commit, + parameters, + progress, + multiple_plats_and_vers, + ) + ) + if csv: + print_csv(app, modules_plat_ver) + elif json: + print_json(app, modules_plat_ver) + else: + progress.remove_task(task) + modules: list[FileDataEntry] = [] + multiple_plat_and_ver: Literal[False] = False + base_parameters: CLIParameters = { + "app": app, + "platform": platform, + "version": version, + "compressed": compressed, + "csv": csv, + "markdown": markdown, + "json": json, + "save_to_png_path": save_to_png_path, + "show_gui": show_gui, + } + modules.extend( + diff_mode( + gitRepo, + first_commit, + second_commit, + base_parameters, + progress, + multiple_plat_and_ver, + ) + ) + if csv: + print_csv(app, modules) + elif json: + print_json(app, modules) + except Exception as e: + progress.stop() + app.abort(str(e)) + return None + + +@overload +def diff_mode( + gitRepo: GitRepo, + first_commit: str, + second_commit: str, + params: CLIParameters, + progress: Progress, + multiple_plats_and_vers: Literal[True], +) -> list[FileDataEntryPlatformVersion]: ... +@overload +def diff_mode( + gitRepo: GitRepo, + first_commit: str, + second_commit: str, + params: CLIParameters, + progress: Progress, + multiple_plats_and_vers: Literal[False], +) -> list[FileDataEntry]: ... +def diff_mode( + gitRepo: GitRepo, + first_commit: str, + second_commit: str, + params: CLIParameters, + progress: Progress, + multiple_plats_and_vers: bool, +) -> list[FileDataEntryPlatformVersion] | list[FileDataEntry]: + files_b, dependencies_b, files_a, dependencies_a = get_repo_info( + gitRepo, params["platform"], params["version"], first_commit, second_commit, params["compressed"], progress + ) + + integrations = get_diff(files_b, files_a, "Integration") + dependencies = get_diff(dependencies_b, dependencies_a, "Dependency") + + if integrations + dependencies == []: + params["app"].display_error( + f"No size differences were detected between the selected commits for {params['platform']}" + ) + formatted_modules = format_modules( + integrations + dependencies, params["platform"], params["version"], multiple_plats_and_vers + ) + else: + formatted_modules = format_modules( + integrations + dependencies, params["platform"], params["version"], multiple_plats_and_vers + ) + formatted_modules.sort(key=lambda x: x["Size_Bytes"], reverse=True) + for module in formatted_modules: + if module["Size_Bytes"] > 0: + module["Size"] = f"+{module['Size']}" + + if params["markdown"]: + print_markdown(params["app"], "Differences between selected commits", formatted_modules) + elif not params["csv"] and not params["json"]: + print_table(params["app"], "Differences between selected commits", formatted_modules) + + if params["show_gui"] or params["save_to_png_path"]: + plot_treemap( + formatted_modules, + f"Disk Usage Differences for {params['platform']} and Python version {params['version']}", + params["show_gui"], + "diff", + params["save_to_png_path"], + ) + + return formatted_modules + + +def get_repo_info( + gitRepo: GitRepo, + platform: str, + version: str, + first_commit: str, + second_commit: str, + compressed: bool, + progress: Progress, +) -> tuple[list[FileDataEntry], list[FileDataEntry], list[FileDataEntry], list[FileDataEntry]]: + with progress: + """ + Retrieves integration and dependency sizes for two commits in the repo. + + Args: + gitRepo: An instance of GitRepo for accessing the repository. + platform: Target platform for dependency resolution. + version: Python version for dependency resolution. + first_commit: The earlier commit SHA to compare. + second_commit: The later commit SHA to compare. + compressed: Whether to measure compressed sizes. + progress: Rich Progress bar. + + Returns: + A tuple of four lists: + - files_b: Integration sizes at first_commit + - dependencies_b: Dependency sizes at first_commit + - files_a: Integration sizes at second_commit + - dependencies_a: Dependency sizes at second_commit + """ + + repo = gitRepo.repo_dir + task = progress.add_task("[cyan]Calculating sizes for the first commit...", total=None) + gitRepo.checkout_commit(first_commit) + files_b = get_files(repo, compressed) + dependencies_b = get_dependencies(repo, platform, version, compressed) + progress.remove_task(task) + + task = progress.add_task("[cyan]Calculating sizes for the second commit...", total=None) + gitRepo.checkout_commit(second_commit) + files_a = get_files(repo, compressed) + dependencies_a = get_dependencies(repo, platform, version, compressed) + progress.remove_task(task) + + return files_b, dependencies_b, files_a, dependencies_a + + +def get_diff( + size_first_commit: list[FileDataEntry], size_second_commit: list[FileDataEntry], type: str +) -> list[FileDataEntry]: + """ + Computes size differences between two sets of integrations or dependencies. + + Args: + size_first_commit: Entries from the first (earlier) commit. + size_second_commit: Entries from the second (later) commit. + type: Integration/Dependency + + Returns: + A list of FileDataEntry items representing only the entries with a size difference. + Entries include new, deleted, or changed modules, with delta size in bytes and human-readable format. + """ + + first_commit = {entry["Name"]: entry for entry in size_first_commit} + second_commit = {entry["Name"]: entry for entry in size_second_commit} + + all_names = set(first_commit) | set(second_commit) + diffs: list[FileDataEntry] = [] + + for name in all_names: + b = first_commit.get(name) + a = second_commit.get(name) + + size_b = b["Size_Bytes"] if b else 0 + size_a = a["Size_Bytes"] if a else 0 + delta = size_a - size_b + + if delta == 0: + continue + + ver_b = b["Version"] if b else "" + ver_a = a["Version"] if a else "" + + if size_b == 0: + name_str = f"{name} (NEW)" + version_str = ver_a + elif size_a == 0: + name_str = f"{name} (DELETED)" + version_str = ver_b + else: + name_str = name + version_str = f"{ver_b} -> {ver_a}" if ver_a != ver_b else ver_a + + diffs.append( + { + "Name": name_str, + "Version": version_str, + "Type": type, + "Size_Bytes": delta, + "Size": convert_to_human_readable_size(delta), + } + ) + + return diffs diff --git a/ddev/src/ddev/cli/size/status.py b/ddev/src/ddev/cli/size/status.py new file mode 100644 index 0000000000000..ad09b45e89ed3 --- /dev/null +++ b/ddev/src/ddev/cli/size/status.py @@ -0,0 +1,176 @@ +# (C) Datadog, Inc. 2022-present +# All rights reserved +# Licensed under a 3-clause BSD style license (see LICENSE) + +import os # noqa: I001 +from pathlib import Path +from typing import Optional, Literal, overload + +import click +from rich.console import Console + +from ddev.cli.application import Application + +from .common import ( + FileDataEntry, + FileDataEntryPlatformVersion, + CLIParameters, + format_modules, + get_dependencies, + get_files, + get_valid_platforms, + get_valid_versions, + plot_treemap, + print_csv, + print_json, + print_markdown, + print_table, +) + +console = Console(stderr=True) + + +@click.command() +@click.option( + "--platform", help="Target platform (e.g. linux-aarch64). If not specified, all platforms will be analyzed" +) +@click.option("--python", "version", help="Python version (e.g 3.12). If not specified, all versions will be analyzed") +@click.option("--compressed", is_flag=True, help="Measure compressed size") +@click.option("--csv", is_flag=True, help="Output in CSV format") +@click.option("--markdown", is_flag=True, help="Output in Markdown format") +@click.option("--json", is_flag=True, help="Output in JSON format") +@click.option("--save_to_png_path", help="Path to save the treemap as PNG") +@click.option( + "--show_gui", + is_flag=True, + help="Display a pop-up window with a treemap showing the current size distribution of modules.", +) +@click.pass_obj +def status( + app: Application, + platform: Optional[str], + version: Optional[str], + compressed: bool, + csv: bool, + markdown: bool, + json: bool, + save_to_png_path: Optional[str], + show_gui: bool, +) -> None: + """ + Show the current size of all integrations and dependencies. + """ + try: + if sum([csv, markdown, json]) > 1: + raise click.BadParameter("Only one output format can be selected: --csv, --markdown, or --json") + repo_path = app.repo.path + valid_platforms = get_valid_platforms(repo_path) + valid_versions = get_valid_versions(repo_path) + if platform and platform not in valid_platforms: + raise ValueError(f"Invalid platform: {platform}") + elif version and version not in valid_versions: + raise ValueError(f"Invalid version: {version}") + + if platform is None or version is None: + modules_plat_ver: list[FileDataEntryPlatformVersion] = [] + platforms = valid_platforms if platform is None else [platform] + versions = valid_versions if version is None else [version] + combinations = [(p, v) for p in platforms for v in versions] + for plat, ver in combinations: + multiple_plats_and_vers: Literal[True] = True + path = None + if save_to_png_path: + base, ext = os.path.splitext(save_to_png_path) + path = f"{base}_{plat}_{ver}{ext}" + parameters: CLIParameters = { + "app": app, + "platform": plat, + "version": ver, + "compressed": compressed, + "csv": csv, + "markdown": markdown, + "json": json, + "save_to_png_path": path, + "show_gui": show_gui, + } + modules_plat_ver.extend( + status_mode( + repo_path, + parameters, + multiple_plats_and_vers, + ) + ) + if csv: + print_csv(app, modules_plat_ver) + elif json: + print_json(app, modules_plat_ver) + else: + modules: list[FileDataEntry] = [] + multiple_plat_and_ver: Literal[False] = False + base_parameters: CLIParameters = { + "app": app, + "platform": platform, + "version": version, + "compressed": compressed, + "csv": csv, + "markdown": markdown, + "json": json, + "save_to_png_path": save_to_png_path, + "show_gui": show_gui, + } + modules.extend( + status_mode( + repo_path, + base_parameters, + multiple_plat_and_ver, + ) + ) + if csv: + print_csv(app, modules) + elif json: + print_json(app, modules) + + except Exception as e: + app.abort(str(e)) + + +@overload +def status_mode( + repo_path: Path, + params: CLIParameters, + multiple_plats_and_vers: Literal[True], +) -> list[FileDataEntryPlatformVersion]: ... +@overload +def status_mode( + repo_path: Path, + params: CLIParameters, + multiple_plats_and_vers: Literal[False], +) -> list[FileDataEntry]: ... +def status_mode( + repo_path: Path, + params: CLIParameters, + multiple_plats_and_vers: bool, +) -> list[FileDataEntryPlatformVersion] | list[FileDataEntry]: + with console.status("[cyan]Calculating sizes...", spinner="dots"): + modules = get_files(repo_path, params["compressed"]) + get_dependencies( + repo_path, params["platform"], params["version"], params["compressed"] + ) + + formatted_modules = format_modules(modules, params["platform"], params["version"], multiple_plats_and_vers) + formatted_modules.sort(key=lambda x: x["Size_Bytes"], reverse=True) + + if params["markdown"]: + print_markdown(params["app"], "Status", formatted_modules) + elif not params["csv"] and not params["json"]: + print_table(params["app"], "Status", formatted_modules) + + if params["show_gui"] or params["save_to_png_path"]: + plot_treemap( + formatted_modules, + f"Disk Usage Status for {params['platform']} and Python version {params['version']}", + params["show_gui"], + "status", + params["save_to_png_path"], + ) + + return formatted_modules diff --git a/ddev/src/ddev/cli/size/timeline.py b/ddev/src/ddev/cli/size/timeline.py new file mode 100644 index 0000000000000..caec7c3efd992 --- /dev/null +++ b/ddev/src/ddev/cli/size/timeline.py @@ -0,0 +1,888 @@ +import os +import re +import tempfile +import zipfile +from datetime import date, datetime +from pathlib import Path +from typing import Literal, Optional, overload + +import click +import matplotlib.pyplot as plt +import requests +from rich.console import Console +from rich.progress import BarColumn, Progress, SpinnerColumn, TextColumn, TimeElapsedColumn + +from ddev.cli.application import Application + +from .common import ( + CommitEntry, + CommitEntryPlatformWithDelta, + CommitEntryWithDelta, + GitRepo, + InitialParametersTimelineDependency, + InitialParametersTimelineIntegration, + WrongDependencyFormat, + compress, + convert_to_human_readable_size, + extract_version_from_about_py, + get_gitignore_files, + get_valid_platforms, + is_correct_dependency, + is_valid_integration, + print_csv, + print_json, + print_markdown, + print_table, +) + +MINIMUM_DATE_DEPENDENCIES = datetime.strptime( + "Apr 3 2024", "%b %d %Y" +).date() # Dependencies not available before this date due to a storage change +MINIMUM_LENGTH_COMMIT = 7 +console = Console(stderr=True) + + +@click.command() +@click.argument("type", type=click.Choice(["integration", "dependency"])) +@click.argument("name") +@click.argument("initial_commit", required=False) +@click.argument("final_commit", required=False) +@click.option( + "--time", + help="Filter commits starting from a specific date. Accepts both absolute and relative formats, " + "such as '2025-03-01', '2 weeks ago', or 'yesterday'", +) +@click.option( + "--threshold", + type=click.IntRange(min=0), + help="Only show modules with size differences greater than a threshold in bytes", +) +@click.option( + "--platform", + help="Target platform to analyze. Only required for dependencies. If not specified, all platforms will be analyzed", +) +@click.option("--compressed", is_flag=True, help="Measure compressed size") +@click.option("--csv", is_flag=True, help="Output results in CSV format") +@click.option("--markdown", is_flag=True, help="Output in Markdown format") +@click.option("--json", is_flag=True, help="Output in JSON format") +@click.option("--save_to_png_path", help="Path to save the treemap as PNG") +@click.option( + "--show_gui", + is_flag=True, + help="Display a pop-up window with a line chart showing the size evolution of the selected module over time.", +) +@click.pass_obj +def timeline( + app: Application, + type: str, + name: str, + initial_commit: Optional[str], + final_commit: Optional[str], + time: Optional[str], + threshold: Optional[int], + platform: Optional[str], + compressed: bool, + csv: bool, + markdown: bool, + json: bool, + save_to_png_path: str, + show_gui: bool, +) -> None: + """ + Show the size evolution of a module (integration or dependency) over time. + """ + with Progress( + SpinnerColumn(), + TextColumn("[progress.description]{task.description}"), + BarColumn(), + TimeElapsedColumn(), + transient=True, + console=console, + ) as progress: + module = name # module is the name of the integration or the dependency + if sum([csv, markdown, json]) > 1: + raise click.BadParameter("Only one output format can be selected: --csv, --markdown, or --json") + elif ( + initial_commit + and final_commit + and len(initial_commit) < MINIMUM_LENGTH_COMMIT + and len(final_commit) < MINIMUM_LENGTH_COMMIT + ): + raise click.BadParameter(f"Commit hashes must be at least {MINIMUM_LENGTH_COMMIT} characters long") + elif initial_commit and len(initial_commit) < MINIMUM_LENGTH_COMMIT: + raise click.BadParameter( + f"Initial commit hash must be at least {MINIMUM_LENGTH_COMMIT} characters long.", param_hint="initial" + ) + elif final_commit and len(final_commit) < MINIMUM_LENGTH_COMMIT: + raise click.BadParameter( + f"Final commit hash must be at least {MINIMUM_LENGTH_COMMIT} characters long.", param_hint="final" + ) + elif final_commit and initial_commit and final_commit == initial_commit: + raise click.BadParameter("Commit hashes must be different") + task = progress.add_task("[cyan]Calculating timeline...", total=None) + url = app.repo.path + + with GitRepo(url) as gitRepo: + try: + if final_commit and type == "dependency": + date_str, _, _ = gitRepo.get_commit_metadata(final_commit) + date = datetime.strptime(date_str, "%b %d %Y").date() + if date < MINIMUM_DATE_DEPENDENCIES: + raise ValueError( + f"Final commit must be after {MINIMUM_DATE_DEPENDENCIES.strftime('%b %d %Y')}" + " in case of Dependencies" + ) + folder = module if type == "integration" else ".deps/resolved" + commits = gitRepo.get_module_commits(folder, initial_commit, final_commit, time) + first_commit = gitRepo.get_creation_commit_module(module) + if final_commit and commits == []: + gitRepo.checkout_commit(final_commit) + elif commits != []: + gitRepo.checkout_commit(commits[-1]) + if type == "dependency": + valid_platforms = get_valid_platforms(gitRepo.repo_dir) + if platform and platform not in valid_platforms: + raise ValueError(f"Invalid platform: {platform}") + if commits == [""] and type == "integration" and module_exists(gitRepo.repo_dir, module): + progress.remove_task(task) + progress.stop() + app.display_error(f"No changes found for {type}: {module}") + return + elif commits == [""] and type == "integration" and not module_exists(gitRepo.repo_dir, module): + raise ValueError(f"Integration {module} not found in latest commit, is the name correct?") + elif ( + type == "dependency" + and platform + and module not in get_dependency_list(gitRepo.repo_dir, {platform}) + ): + raise ValueError( + f"Dependency {module} not found in latest commit for the platform {platform}, " + "is the name correct?" + ) + elif ( + type == "dependency" + and not platform + and module not in get_dependency_list(gitRepo.repo_dir, valid_platforms) + ): + raise ValueError(f"Dependency {module} not found in latest commit, is the name correct?") + elif type == "dependency" and commits == [""]: + progress.remove_task(task) + progress.stop() + app.display_error(f"No changes found for {type}: {module}") + return + if type == "dependency": + modules_plat: list[CommitEntryPlatformWithDelta] = [] + multiple_plats_and_vers: Literal[True] = True + progress.remove_task(task) + dep_parameters: InitialParametersTimelineDependency + if not platform: + for plat in valid_platforms: + path = None + if save_to_png_path: + base, ext = os.path.splitext(save_to_png_path) + path = f"{base}_{plat}{ext}" + dep_parameters = { + "app": app, + "type": "dependency", + "module": module, + "threshold": threshold, + "platform": plat, + "compressed": compressed, + "csv": csv, + "markdown": markdown, + "json": json, + "save_to_png_path": path, + "show_gui": show_gui, + "first_commit": None, + } + + modules_plat.extend( + timeline_mode( + gitRepo, + commits, + dep_parameters, + multiple_plats_and_vers, + progress, + ) + ) + + else: + dep_parameters = { + "app": app, + "type": "dependency", + "module": module, + "threshold": threshold, + "platform": platform, + "compressed": compressed, + "csv": csv, + "markdown": markdown, + "json": json, + "save_to_png_path": save_to_png_path, + "show_gui": show_gui, + "first_commit": None, + } + modules_plat.extend( + timeline_mode( + gitRepo, + commits, + dep_parameters, + multiple_plats_and_vers, + progress, + ) + ) + + if csv: + print_csv(app, modules_plat) + elif json: + print_json(app, modules_plat) + else: + modules: list[CommitEntryWithDelta] = [] + multiple_plat_and_ver: Literal[False] = False + int_parameters: InitialParametersTimelineIntegration = { + "app": app, + "type": "integration", + "module": module, + "threshold": threshold, + "platform": None, + "compressed": compressed, + "csv": csv, + "markdown": markdown, + "json": json, + "save_to_png_path": save_to_png_path, + "show_gui": show_gui, + "first_commit": first_commit, + } + progress.remove_task(task) + modules.extend( + timeline_mode( + gitRepo, + commits, + int_parameters, + multiple_plat_and_ver, + progress, + ) + ) + if csv: + print_csv(app, modules) + elif json: + print_json(app, modules) + + except Exception as e: + progress.stop() + app.abort(str(e)) + + +@overload +def timeline_mode( + gitRepo: GitRepo, + commits: list[str], + params: InitialParametersTimelineDependency, + multiple_plats_and_vers: Literal[True], + progress: Progress, +) -> list[CommitEntryPlatformWithDelta]: ... + + +@overload +def timeline_mode( + gitRepo: GitRepo, + commits: list[str], + params: InitialParametersTimelineIntegration, + multiple_plats_and_vers: Literal[False], + progress: Progress, +) -> list[CommitEntryWithDelta]: ... + + +@overload +def timeline_mode( + gitRepo: GitRepo, + commits: list[str], + params: InitialParametersTimelineDependency, + multiple_plats_and_vers: Literal[False], + progress: Progress, +) -> list[CommitEntryWithDelta]: ... + + +def timeline_mode( + gitRepo: GitRepo, + commits: list[str], + params: InitialParametersTimelineIntegration | InitialParametersTimelineDependency, + multiple_plats_and_vers: bool, + progress: Progress, +) -> list[CommitEntryWithDelta] | list[CommitEntryPlatformWithDelta]: + if params["type"] == "integration": + modules = get_repo_info( + gitRepo, + params, + commits, + progress, + ) + else: + modules = get_repo_info( + gitRepo, + params, + commits, + progress, + ) + trimmed_modules = trim_modules(modules, params["threshold"]) + formatted_modules = format_modules(trimmed_modules, params["platform"], multiple_plats_and_vers) + + if params["markdown"]: + print_markdown(params["app"], "Timeline for " + params["module"], formatted_modules) + elif not params["csv"] and not params["json"]: + print_table(params["app"], "Timeline for " + params["module"], formatted_modules) + + if params["show_gui"] or params["save_to_png_path"]: + plot_linegraph( + formatted_modules, params["module"], params["platform"], params["show_gui"], params["save_to_png_path"] + ) + + return formatted_modules + + +@overload +def get_repo_info( + gitRepo: GitRepo, + params: InitialParametersTimelineIntegration, + commits: list[str], + progress: Progress, +) -> list[CommitEntry]: ... + + +@overload +def get_repo_info( + gitRepo: GitRepo, + params: InitialParametersTimelineDependency, + commits: list[str], + progress: Progress, +) -> list[CommitEntry]: ... + + +def get_repo_info( + gitRepo: GitRepo, + params: InitialParametersTimelineIntegration | InitialParametersTimelineDependency, + commits: list[str], + progress: Progress, +) -> list[CommitEntry]: + """ + Retrieves size and metadata info for a module across multiple commits. + + Args: + gitRepo: Active GitRepo instance. + params: Parameters Typed Dictionary containing module name, type, platform, and other configuration options. + commits: List of commits to process. + first_commit: First commit hash where the given integration was introduced (only for integrations). + progress: Progress bar instance. + + Returns: + A list of CommitEntry objects with size, version, date, author, commit message and commit hash. + """ + with progress: + if params["type"] == "integration": + file_data = process_commits(commits, params, gitRepo, progress, params["first_commit"]) + else: + file_data = process_commits(commits, params, gitRepo, progress, params["first_commit"]) + return file_data + + +@overload +def process_commits( + commits: list[str], + params: InitialParametersTimelineIntegration, + gitRepo: GitRepo, + progress: Progress, + first_commit: str, +) -> list[CommitEntry]: ... + + +@overload +def process_commits( + commits: list[str], + params: InitialParametersTimelineDependency, + gitRepo: GitRepo, + progress: Progress, + first_commit: None, +) -> list[CommitEntry]: ... + + +def process_commits( + commits: list[str], + params: InitialParametersTimelineIntegration | InitialParametersTimelineDependency, + gitRepo: GitRepo, + progress: Progress, + first_commit: Optional[str], +) -> list[CommitEntry]: + """ + Processes a list of commits for a given integration or dependency. + + For each commit, it checks out the corresponding version of the module, + retrieves its metadata, and calculates its size. + + Args: + commits: List of commit SHAs to process. + params: InitialParametersTimelineIntegration or InitialParametersTimelineDependency dict containing module name, + type, platform, and other configuration options. + gitRepo: GitRepo instance managing the repository. + progress: Progress bar instance. + first_commit: First commit hash where the given integration was introduced (only for integrations). + + Returns: + A list of CommitEntry objects with commit metadata and size information. + """ + file_data: list[CommitEntry] = [] + task = progress.add_task("[cyan]Processing commits...", total=len(commits)) + repo = gitRepo.repo_dir + + folder = params["module"] if params["type"] == "integration" else ".deps/resolved" + + for commit in commits: + gitRepo.sparse_checkout_commit(commit, folder) + date_str, author, message = gitRepo.get_commit_metadata(commit) + date, message, commit = format_commit_data(date_str, message, commit, first_commit) + if params["type"] == "dependency" and date > MINIMUM_DATE_DEPENDENCIES: + assert params["platform"] is not None + result = get_dependencies( + repo, + params["module"], + params["platform"], + commit, + date, + author, + message, + params["compressed"], + ) + if result: + file_data.append(result) + elif params["type"] == "integration": + file_data = get_files( + repo, + params["module"], + commit, + date, + author, + message, + file_data, + params["compressed"], + ) + + progress.advance(task) + + progress.remove_task(task) + return file_data + + +def get_files( + repo_path: str, + module: str, + commit: str, + date: date, + author: str, + message: str, + file_data: list[CommitEntry], + compressed: bool, +) -> list[CommitEntry]: + """ + Calculates integration file sizes and versions from a repository + + If the integration folder no longer exists, a 'Deleted' entry is added. Otherwise, + it walks the module directory, sums file sizes, extracts the version, and appends a CommitEntry. + + Args: + repo_path: Path to the local Git repository. + module: Name of the integration. + commit: Commit SHA being analyzed. + date: Commit date. + author: Commit author. + message: Commit message. + file_data: List to append the result to. + compressed: Whether to use compressed file sizes. + + Returns: + The updated file_data list with one new CommitEntry appended. + """ + module_path = os.path.join(repo_path, module) + + if not module_exists(repo_path, module): + file_data.append( + { + "Size_Bytes": 0, + "Version": "Deleted", + "Date": date, + "Author": author, + "Commit_Message": f"(DELETED) {message}", + "Commit_SHA": commit, + } + ) + return file_data + + ignored_files = {"datadog_checks_dev", "datadog_checks_tests_helper"} + git_ignore = get_gitignore_files(repo_path) + included_folder = "datadog_checks" + os.sep + + total_size = 0 + version = "" + + for root, _, files in os.walk(module_path): + for file in files: + file_path = os.path.join(root, file) + relative_path = os.path.relpath(file_path, repo_path) + + if not is_valid_integration(relative_path, included_folder, ignored_files, git_ignore): + continue + + if file == "__about__.py" and "datadog_checks" in relative_path: + version = extract_version_from_about_py(file_path) + + size = compress(file_path) if compressed else os.path.getsize(file_path) + total_size += size + + file_data.append( + { + "Size_Bytes": total_size, + "Version": version, + "Date": date, + "Author": author, + "Commit_Message": message, + "Commit_SHA": commit, + } + ) + return file_data + + +def get_dependencies( + repo_path: str, + module: str, + platform: str, + commit: str, + date: date, + author: str, + message: str, + compressed: bool, +) -> Optional[CommitEntry]: + """ + Returns the size and metadata of a dependency for a given commit and platform. + + Args: + repo_path: Path to the repository. + module: Dependency name to look for. + platform: Target platform to match (e.g., 'linux-x86_64'). + commit: Commit SHA being analyzed. + date: Commit date. + author: Commit author. + message: Commit message. + compressed: Whether to calculate compressed size or uncompressed. + + Returns: + A CommitEntry with size and metadata if the dependency is found, else None. + """ + resolved_path = os.path.join(repo_path, ".deps/resolved") + paths = os.listdir(resolved_path) + version = get_version(paths, platform) + for filename in paths: + file_path = os.path.join(resolved_path, filename) + if os.path.isfile(file_path) and is_correct_dependency(platform, version, filename): + download_url, dep_version = get_dependency_data(file_path, module) + return ( + get_dependency_size(download_url, dep_version, commit, date, author, message, compressed) + if download_url and dep_version is not None + else None + ) + return None + + +def get_dependency_data(file_path: str, module: str) -> tuple[Optional[str], Optional[str]]: + """ + Parses a dependency file and extracts the dependency name, download URL, and version. + + Args: + file_path: Path to the file containing the dependencies. + module: Name of the dependency. + + Returns: + A tuple of two strings: + - Download URL + - Extracted dependency version + """ + with open(file_path, "r", encoding="utf-8") as file: + file_content = file.read() + for line in file_content.splitlines(): + match = re.search(r"([\w\-\d\.]+) @ (https?://[^\s#]+)", line) + if not match: + raise WrongDependencyFormat("The dependency format 'name @ link' is no longer supported.") + name, url = match.groups() + if name == module: + version_match = re.search(rf"{re.escape(name)}/[^/]+?-([0-9]+(?:\.[0-9]+)*)-", url) + version = version_match.group(1) if version_match else "" + return url, version + return None, None + + +def get_dependency_size( + download_url: str, version: str, commit: str, date: date, author: str, message: str, compressed: bool +) -> CommitEntry: + """ + Calculates the size of a dependency wheel at a given commit. + + Args: + download_url: URL to download the wheel file. + version: Dependency version. + commit: Commit SHA being analyzed. + date: Commit date. + author: Commit author. + message: Commit message. + compressed: If True, use Content-Length. If False, download and decompress to calculate size. + + Returns: + A CommitEntry with size and metadata for the given dependency and commit. + """ + if compressed: + response = requests.head(download_url) + response.raise_for_status() + size_str = response.headers.get("Content-Length") + if size_str is None: + raise ValueError(f"Missing size for commit {commit}") + size = int(size_str) + else: + with requests.get(download_url, stream=True) as response: + response.raise_for_status() + wheel_data = response.content + + with tempfile.TemporaryDirectory() as tmpdir: + wheel_path = Path(tmpdir) / "package.whl" + with open(wheel_path, "wb") as f: + f.write(wheel_data) + extract_path = Path(tmpdir) / "extracted" + with zipfile.ZipFile(wheel_path, "r") as zip_ref: + zip_ref.extractall(extract_path) + + size = 0 + for dirpath, _, filenames in os.walk(extract_path): + for name in filenames: + file_path = os.path.join(dirpath, name) + size += os.path.getsize(file_path) + + commit_entry: CommitEntry = { + "Size_Bytes": size, + "Version": version, + "Date": date, + "Author": author, + "Commit_Message": message, + "Commit_SHA": commit, + } + return commit_entry + + +def get_version(files: list[str], platform: str) -> str: + """ + Returns the latest Python version for the given target platform based on .deps/resolved filenames. + + Args: + files: List of filenames from the .deps/resolved folder. + platform: Target platform. + + Returns: + If the version is a single digit (e.g., '3'), returns 'py3'; + otherwise (e.g., '3.12'), returns it as-is. + """ + final_version = "" + for file in files: + if platform in file: + curr_version = file.split("_")[-1] + match = re.search(r"\d+(?:\.\d+)?", curr_version) + version = match.group(0) if match else None + if version and version > final_version: + final_version = version + return final_version if len(final_version) != 1 else "py" + final_version + + +def format_modules( + modules: list[CommitEntryWithDelta], + platform: Optional[str], + multiple_plats_and_vers: bool, +) -> list[CommitEntryWithDelta] | list[CommitEntryPlatformWithDelta]: + """ + Formats the modules list, adding platform and Python version information if needed. + + If the modules list is empty, returns a default empty entry (with or without platform information). + """ + if modules == [] and multiple_plats_and_vers and platform: + empty_module_platform: CommitEntryPlatformWithDelta = { + "Size_Bytes": 0, + "Version": "", + "Date": datetime.min.date(), + "Author": "", + "Commit_Message": "", + "Commit_SHA": "", + "Delta_Bytes": 0, + "Delta": " ", + "Platform": "", + } + return [empty_module_platform] + elif modules == []: + empty_module: CommitEntryWithDelta = { + "Size_Bytes": 0, + "Version": "", + "Date": datetime.min.date(), + "Author": "", + "Commit_Message": "", + "Commit_SHA": "", + "Delta_Bytes": 0, + "Delta": " ", + } + return [empty_module] + elif multiple_plats_and_vers and platform: + new_modules: list[CommitEntryPlatformWithDelta] = [{**entry, "Platform": platform} for entry in modules] + return new_modules + else: + return modules + + +def trim_modules( + modules: list[CommitEntry], + threshold: Optional[int] = None, +) -> list[CommitEntryWithDelta]: + """ + Filters a list of commit entries, keeping only those with significant size changes. + + Args: + modules: List of CommitEntry items ordered by commit date. + threshold: Minimum size change (in bytes) required to keep an entry. Defaults to 0. + + Returns: + A list of CommitEntryWithDelta objects: + - Always includes the first and last entry. + - Includes intermediate entries where size difference exceeds the threshold. + - Adds Delta_Bytes and human-readable Delta for each included entry. + - Marks version transitions as 'X -> Y' when the version changes. + """ + if modules == []: + empty_modules: list[CommitEntryWithDelta] = [] + return empty_modules + + threshold = threshold or 0 + + trimmed_modules: list[CommitEntryWithDelta] = [] + + first: CommitEntryWithDelta = { + **modules[0], + "Delta_Bytes": 0, + "Delta": " ", + } + trimmed_modules.append(first) + + last_version = modules[0]["Version"] + + for j in range(1, len(modules)): + prev = modules[j - 1] + curr = modules[j] + delta = curr["Size_Bytes"] - prev["Size_Bytes"] + + if abs(delta) > threshold or j == len(modules) - 1: + new_entry: CommitEntryWithDelta = { + **curr, + "Delta_Bytes": delta, + "Delta": convert_to_human_readable_size(delta), + } + + curr_version = curr["Version"] + if curr_version != "" and curr_version != last_version: + new_entry["Version"] = f"{last_version} -> {curr_version}" + last_version = curr_version + + trimmed_modules.append(new_entry) + + return trimmed_modules + + +def format_commit_data(date_str: str, message: str, commit: str, first_commit: Optional[str]) -> tuple[date, str, str]: + """ + Formats commit metadata by shortening the message, marking the first commit, and parsing the date. + Args: + date_str: Commit date as a string (e.g., 'Apr 3 2024'). + message: Original commit message. + commit: commit SHA. + first_commit: First commit hash where the given integration was introduced (only for integrations). + + Returns: + A tuple containing: + - Parsed date object, + - Shortened and possibly annotated message, + - Shortened commit SHA . + """ + if commit == first_commit: + message = "(NEW) " + message + # Truncates the commit message if it's too long, keeping the first words and the PR number within the allowed length + MAX_LENGTH_COMMIT = 45 + PR_NUMBER_LENGTH = 8 + message = ( + message + if len(message) <= MAX_LENGTH_COMMIT + else message[: MAX_LENGTH_COMMIT - PR_NUMBER_LENGTH - 3].rsplit(" ", 1)[0] + "..." + message.split()[-1] + ) + date = datetime.strptime(date_str, "%b %d %Y").date() + return date, message, commit[:MINIMUM_LENGTH_COMMIT] + + +def module_exists(path: str, module: str) -> bool: + """ + Checks if the given module exists at the specified path + """ + return os.path.exists(os.path.join(path, module)) + + +def get_dependency_list(path: str, platforms: set[str]) -> set[str]: + """ + Returns the set of dependencies from the .deps/resolved folder for the latest version of the given platform. + """ + resolved_path = os.path.join(path, ".deps/resolved") + all_files = os.listdir(resolved_path) + dependencies = set() + + for platform in platforms: + version = get_version(all_files, platform) + for filename in all_files: + file_path = os.path.join(resolved_path, filename) + if os.path.isfile(file_path) and is_correct_dependency(platform, version, filename): + with open(file_path, "r", encoding="utf-8") as file: + matches = re.findall(r"([\w\-\d\.]+) @ https?://[^\s#]+", file.read()) + dependencies.update(matches) + return dependencies + + +def plot_linegraph( + modules: list[CommitEntryWithDelta] | list[CommitEntryPlatformWithDelta], + module: str, + platform: Optional[str], + show: bool, + path: Optional[str], +) -> None: + """ + Plots the disk usage evolution over time for a given module. + + Args: + modules: List of commit entries with size and date information. + module: Name of the module to display in the title. + platform: Target platform (used in the title if provided). + show: If True, displays the plot interactively. + path: If provided, saves the plot to this file path. + """ + if not any(str(value).strip() not in ("", "0", "0001-01-01") for value in modules[0].values()): # table is empty + return + + dates = [entry["Date"] for entry in modules] + sizes = [entry["Size_Bytes"] for entry in modules] + title = f"Disk Usage Evolution of {module} for {platform}" if platform else f"Disk Usage Evolution of {module}" + + plt.figure(figsize=(10, 6)) + plt.plot(dates, sizes, linestyle="-") + plt.title(title) + plt.xlabel("Date") + plt.ylabel("Size_Bytes") + plt.grid(True) + plt.xticks(rotation=45) + plt.tight_layout() + + if path: + plt.savefig(path) + if show: + plt.show() + plt.close() diff --git a/ddev/tests/cli/size/__init__.py b/ddev/tests/cli/size/__init__.py new file mode 100644 index 0000000000000..3eff9712cbcf5 --- /dev/null +++ b/ddev/tests/cli/size/__init__.py @@ -0,0 +1,3 @@ +# (C) Datadog, Inc. 2022-present +# All rights reserved +# Licensed under a 3-clause BSD style license (see LICENSE) diff --git a/ddev/tests/cli/size/test_diff.py b/ddev/tests/cli/size/test_diff.py new file mode 100644 index 0000000000000..bd5db6def4c54 --- /dev/null +++ b/ddev/tests/cli/size/test_diff.py @@ -0,0 +1,250 @@ +# (C) Datadog, Inc. 2022-present +# All rights reserved +# Licensed under a 3-clause BSD style license (see LICENSE) + +import os +from unittest.mock import MagicMock, patch + +import pytest + + +def to_native_path(path: str) -> str: + return path.replace("/", os.sep) + + +@pytest.fixture +def mock_size_diff_dependencies(): + mock_git_repo = MagicMock() + mock_git_repo.repo_dir = "fake_repo" + mock_git_repo.get_commit_metadata.return_value = ("Feb 1 2025", "", "") + + def get_compressed_files_side_effect(_, __): + get_compressed_files_side_effect.counter += 1 + if get_compressed_files_side_effect.counter % 2 == 1: + return [{"Name": "path1.py", "Version": "1.1.1", "Size_Bytes": 1000, "Type": "Integration"}] # before + else: + return [ + {"Name": "path1.py", "Version": "1.1.2", "Size_Bytes": 1200, "Type": "Integration"}, + {"Name": "path2.py", "Version": "1.1.1", "Size_Bytes": 500, "Type": "Integration"}, + ] # after + + get_compressed_files_side_effect.counter = 0 + + def get_compressed_dependencies_side_effect(_, __, ___, ____): + get_compressed_dependencies_side_effect.counter += 1 + if get_compressed_dependencies_side_effect.counter % 2 == 1: + return [{"Name": "dep1", "Version": "1.0.0", "Size_Bytes": 2000, "Type": "Dependency"}] # before + else: + return [ + {"Name": "dep1", "Version": "1.1.0", "Size_Bytes": 2500, "Type": "Dependency"}, + {"Name": "dep2", "Version": "1.0.0", "Size_Bytes": 1000, "Type": "Dependency"}, + ] # after + + get_compressed_dependencies_side_effect.counter = 0 + + with ( + patch( + "ddev.cli.size.diff.get_valid_platforms", + return_value=({'linux-x86_64', 'macos-x86_64', 'linux-aarch64', 'windows-x86_64'}), + ), + patch( + "ddev.cli.size.diff.get_valid_versions", + return_value=({'3.12'}), + ), + patch("ddev.cli.size.diff.GitRepo.__enter__", return_value=mock_git_repo), + patch("ddev.cli.size.diff.GitRepo.__exit__", return_value=None), + patch("ddev.cli.size.diff.GitRepo.checkout_commit"), + patch("tempfile.mkdtemp", return_value="fake_repo"), + patch("ddev.cli.size.diff.get_files", side_effect=get_compressed_files_side_effect), + patch("ddev.cli.size.diff.get_dependencies", side_effect=get_compressed_dependencies_side_effect), + patch("ddev.cli.size.diff.format_modules", side_effect=lambda m, *_: m), + patch("matplotlib.pyplot.show"), + patch("matplotlib.pyplot.savefig"), + ): + yield + + +def test_diff_no_args(ddev, mock_size_diff_dependencies): + assert ddev("size", "diff", "commit1", "commit2").exit_code == 0 + assert ddev("size", "diff", "commit1", "commit2", "--compressed").exit_code == 0 + assert ddev("size", "diff", "commit1", "commit2", "--csv").exit_code == 0 + assert ddev("size", "diff", "commit1", "commit2", "--markdown").exit_code == 0 + assert ddev("size", "diff", "commit1", "commit2", "--json").exit_code == 0 + assert ddev("size", "diff", "commit1", "commit2", "--save_to_png_path", "out.png").exit_code == 0 + assert ddev("size", "diff", "commit1", "commit2", "--show_gui").exit_code == 0 + + +def test_diff_with_platform_and_version(ddev, mock_size_diff_dependencies): + assert ddev("size", "diff", "commit1", "commit2", "--platform", "linux-aarch64", "--python", "3.12").exit_code == 0 + assert ( + ddev( + "size", "diff", "commit1", "commit2", "--platform", "linux-aarch64", "--python", "3.12", "--compressed" + ).exit_code + == 0 + ) + assert ( + ddev("size", "diff", "commit1", "commit2", "--platform", "linux-aarch64", "--python", "3.12", "--csv").exit_code + == 0 + ) + assert ( + ddev( + "size", "diff", "commit1", "commit2", "--platform", "linux-aarch64", "--python", "3.12", "--markdown" + ).exit_code + == 0 + ) + assert ( + ddev( + "size", "diff", "commit1", "commit2", "--platform", "linux-aarch64", "--python", "3.12", "--json" + ).exit_code + == 0 + ) + assert ( + ddev( + "size", + "diff", + "commit1", + "commit2", + "--platform", + "linux-aarch64", + "--python", + "3.12", + "--save_to_png_path", + "out.png", + ).exit_code + == 0 + ) + assert ( + ddev( + "size", "diff", "commit1", "commit2", "--platform", "linux-aarch64", "--python", "3.12", "--show_gui" + ).exit_code + == 0 + ) + + +def test_diff_no_differences(ddev): + fake_repo = MagicMock() + fake_repo.repo_dir = "fake_repo" + fake_repo.get_commit_metadata.return_value = ("Feb 1 2025", "", "") + + with ( + patch("ddev.cli.size.diff.GitRepo.__enter__", return_value=fake_repo), + patch("ddev.cli.size.diff.GitRepo.__exit__", return_value=None), + patch( + "ddev.cli.size.diff.get_valid_platforms", + return_value=({'linux-x86_64', 'macos-x86_64', 'linux-aarch64', 'windows-x86_64'}), + ), + patch( + "ddev.cli.size.diff.get_valid_versions", + return_value=({'3.12'}), + ), + patch.object(fake_repo, "checkout_commit"), + patch("tempfile.mkdtemp", return_value="fake_repo"), + patch("os.path.exists", return_value=True), + patch("os.path.isdir", return_value=True), + patch("os.path.isfile", return_value=True), + patch("os.listdir", return_value=["linux-aarch64_3.12"]), + patch( + "ddev.cli.size.diff.get_files", + return_value=[ + {"Name": "path1.py", "Version": "1.0.0", "Size_Bytes": 1000}, + {"Name": "path2.py", "Version": "1.0.0", "Size_Bytes": 500}, + ], + ), + patch( + "ddev.cli.size.diff.get_dependencies", + return_value=[ + {"Name": "dep1.whl", "Version": "2.0.0", "Size_Bytes": 2000}, + {"Name": "dep2.whl", "Version": "2.0.0", "Size_Bytes": 1000}, + ], + ), + patch("matplotlib.pyplot.show"), + patch("matplotlib.pyplot.savefig"), + ): + result = ddev( + "size", "diff", "commit1", "commit2", "--platform", "linux-aarch64", "--python", "3.12", "--compressed" + ) + + assert result.exit_code == 0, result.output + assert "No size differences were detected" in result.output + + assert ddev("size", "diff", "commit1", "commit2").exit_code == 0 + assert ddev("size", "diff", "commit1", "commit2", "--compressed").exit_code == 0 + assert ddev("size", "diff", "commit1", "commit2", "--csv").exit_code == 0 + assert ddev("size", "diff", "commit1", "commit2", "--markdown").exit_code == 0 + assert ddev("size", "diff", "commit1", "commit2", "--json").exit_code == 0 + assert ddev("size", "diff", "commit1", "commit2", "--save_to_png_path", "out.png").exit_code == 0 + assert ddev("size", "diff", "commit1", "commit2", "--show_gui").exit_code == 0 + + +def test_diff_invalid_platform(ddev): + mock_git_repo = MagicMock() + mock_git_repo.repo_dir = "fake_repo" + mock_git_repo.get_module_commits.return_value = ["commit1", "commit2"] + mock_git_repo.get_commit_metadata.side_effect = lambda c: ("Apr 4 2025", "Fix dep", c) + mock_git_repo.__enter__.return_value = mock_git_repo + with ( + patch("ddev.cli.size.diff.GitRepo", return_value=mock_git_repo), + patch( + "ddev.cli.size.status.get_valid_platforms", + return_value=({'linux-x86_64', 'macos-x86_64', 'linux-aarch64', 'windows-x86_64'}), + ), + patch( + "ddev.cli.size.status.get_valid_versions", + return_value=({'3.12'}), + ), + ): + result = ddev("size", "diff", "commit1", "commit2", "--platform", "linux", "--python", "3.12", "--compressed") + assert result.exit_code != 0 + + +def test_diff_invalid_version(ddev): + mock_git_repo = MagicMock() + mock_git_repo.repo_dir = "fake_repo" + mock_git_repo.get_module_commits.return_value = ["commit1", "commit2"] + mock_git_repo.get_commit_metadata.side_effect = lambda c: ("Apr 4 2025", "Fix dep", c) + mock_git_repo.__enter__.return_value = mock_git_repo + + with ( + patch("ddev.cli.size.diff.GitRepo", return_value=mock_git_repo), + patch( + "ddev.cli.size.status.get_valid_platforms", + return_value=({'linux-x86_64', 'macos-x86_64', 'linux-aarch64', 'windows-x86_64'}), + ), + patch( + "ddev.cli.size.status.get_valid_versions", + return_value=({'3.12'}), + ), + ): + result = ddev( + "size", + "diff", + "commit1", + "commit2", + "--platform", + "linux-aarch64", + "--python", + "2.10", # invalid + "--compressed", + ) + assert result.exit_code != 0 + + +def test_diff_invalid_platform_and_version(ddev): + mock_git_repo = MagicMock() + mock_git_repo.repo_dir = "fake_repo" + mock_git_repo.get_module_commits.return_value = ["commit1", "commit2"] + mock_git_repo.get_commit_metadata.side_effect = lambda c: ("Apr 4 2025", "Fix dep", c) + mock_git_repo.__enter__.return_value = mock_git_repo + with ( + patch("ddev.cli.size.diff.GitRepo", return_value=mock_git_repo), + patch( + "ddev.cli.size.status.get_valid_platforms", + return_value=({'linux-x86_64', 'macos-x86_64', 'linux-aarch64', 'windows-x86_64'}), + ), + patch( + "ddev.cli.size.status.get_valid_versions", + return_value=({'3.12'}), + ), + ): + result = ddev("size", "diff", "commit1", "commit2", "--platform", "linux", "--python", "2.10", "--compressed") + assert result.exit_code != 0 diff --git a/ddev/tests/cli/size/test_status.py b/ddev/tests/cli/size/test_status.py new file mode 100644 index 0000000000000..d60b09170bbef --- /dev/null +++ b/ddev/tests/cli/size/test_status.py @@ -0,0 +1,135 @@ +# (C) Datadog, Inc. 2022-present +# All rights reserved +# Licensed under a 3-clause BSD style license (see LICENSE) + +import os +from pathlib import Path +from unittest.mock import MagicMock, patch + +import pytest + + +def to_native_path(path: str) -> str: + return path.replace("/", os.sep) + + +@pytest.fixture() +def mock_size_status(): + fake_repo_path = Path(os.path.join("fake_root")).resolve() + + mock_walk = [(os.path.join(str(fake_repo_path), "datadog_checks", "my_check"), [], ["__init__.py"])] + + mock_app = MagicMock() + mock_app.repo.path = fake_repo_path + + fake_files = [ + { + "Name": "int1", + "Version": "1.1.1", + "Size_Bytes": 1234, + "Size": 100, + "Type": "Integration", + } + ] + + fake_deps = [ + { + "Name": "dep1", + "Version": "1.1.1", + "Size_Bytes": 5678, + "Size": 123, + "Type": "Dependency", + } + ] + + with ( + patch("ddev.cli.size.common.get_gitignore_files", return_value=set()), + patch( + "ddev.cli.size.status.get_valid_platforms", + return_value=({'linux-x86_64', 'macos-x86_64', 'linux-aarch64', 'windows-x86_64'}), + ), + patch( + "ddev.cli.size.status.get_valid_versions", + return_value=({'3.12'}), + ), + patch("ddev.cli.size.status.get_files", return_value=fake_files), + patch("ddev.cli.size.status.get_dependencies", return_value=fake_deps), + patch("os.path.relpath", side_effect=lambda path, _: path.replace(f"fake_root{os.sep}", "")), + patch("ddev.cli.size.status.print_csv"), + patch("ddev.cli.size.common.compress", return_value=1234), + patch("ddev.cli.size.status.print_table"), + patch("ddev.cli.size.status.plot_treemap"), + patch("os.walk", return_value=mock_walk), + patch("os.listdir", return_value=["fake_dep.whl"]), + patch("os.path.isfile", return_value=True), + patch("matplotlib.pyplot.show"), + patch("matplotlib.pyplot.savefig"), + ): + yield mock_app + + +def test_status_no_args(ddev, mock_size_status): + assert ddev("size", "status").exit_code == 0 + assert ddev("size", "status", "--compressed").exit_code == 0 + assert ddev("size", "status", "--csv").exit_code == 0 + assert ddev("size", "status", "--markdown").exit_code == 0 + assert ddev("size", "status", "--json").exit_code == 0 + assert ddev("size", "status", "--save_to_png_path", "out.png").exit_code == 0 + assert ddev("size", "status", "--show_gui").exit_code == 0 + + +def test_status(ddev, mock_size_status): + assert (ddev("size", "status", "--platform", "linux-aarch64", "--python", "3.12")).exit_code == 0 + assert (ddev("size", "status", "--platform", "linux-aarch64", "--python", "3.12", "--compressed")).exit_code == 0 + assert (ddev("size", "status", "--platform", "linux-aarch64", "--python", "3.12", "--csv")).exit_code == 0 + assert (ddev("size", "status", "--platform", "linux-aarch64", "--python", "3.12", "--markdown")).exit_code == 0 + assert (ddev("size", "status", "--platform", "linux-aarch64", "--python", "3.12", "--json")).exit_code == 0 + assert ( + ddev("size", "status", "--platform", "linux-aarch64", "--python", "3.12", "--save_to_png_path", "out.png") + ).exit_code == 0 + assert (ddev("size", "status", "--platform", "linux-aarch64", "--python", "3.12", "--show_gui")).exit_code == 0 + + +def test_status_wrong_platform(ddev): + with ( + patch( + "ddev.cli.size.status.get_valid_platforms", + return_value=({'linux-x86_64', 'macos-x86_64', 'linux-aarch64', 'windows-x86_64'}), + ), + patch( + "ddev.cli.size.status.get_valid_versions", + return_value=({'3.12'}), + ), + ): + result = ddev("size", "status", "--platform", "linux", "--python", "3.12", "--compressed") + assert result.exit_code != 0 + + +def test_status_wrong_version(ddev): + with ( + patch( + "ddev.cli.size.status.get_valid_platforms", + return_value=({'linux-x86_64', 'macos-x86_64', 'linux-aarch64', 'windows-x86_64'}), + ), + patch( + "ddev.cli.size.status.get_valid_versions", + return_value=({'3.12'}), + ), + ): + result = ddev("size", "status", "--platform", "linux-aarch64", "--python", "2.10", "--compressed") + assert result.exit_code != 0 + + +def test_status_wrong_plat_and_version(ddev): + with ( + patch( + "ddev.cli.size.status.get_valid_platforms", + return_value=({'linux-x86_64', 'macos-x86_64', 'linux-aarch64', 'windows-x86_64'}), + ), + patch( + "ddev.cli.size.status.get_valid_versions", + return_value=({'3.12'}), + ), + ): + result = ddev("size", "status", "--platform", "linux", "--python", "2.10", "--compressed") + assert result.exit_code != 0 diff --git a/ddev/tests/cli/size/test_timeline.py b/ddev/tests/cli/size/test_timeline.py new file mode 100644 index 0000000000000..a07e72a9a0b4d --- /dev/null +++ b/ddev/tests/cli/size/test_timeline.py @@ -0,0 +1,431 @@ +from datetime import date +from pathlib import Path +from unittest.mock import MagicMock, patch + +import pytest + + +@pytest.fixture +def mock_timeline(): + mock_git_repo = MagicMock() + mock_git_repo.repo_dir = "fake_repo" + mock_git_repo.get_module_commits.return_value = ["commit1", "commit2"] + mock_git_repo.get_creation_commit_module.return_value = "commit1" + mock_git_repo.get_commit_metadata.side_effect = lambda c: ("Apr 4 2025", "Initial commit", c) + + with ( + patch("ddev.cli.size.timeline.GitRepo.__enter__", return_value=mock_git_repo), + patch("ddev.cli.size.timeline.GitRepo.__exit__", return_value=None), + patch("ddev.cli.size.timeline.GitRepo.sparse_checkout_commit"), + patch("ddev.cli.size.timeline.get_gitignore_files", return_value=set()), + patch("ddev.cli.size.timeline.compress", return_value=1234), + patch("os.walk", return_value=[(Path("/tmp") / "fake_repo" / "int", [], ["file1.py"])]), + patch("os.path.exists", return_value=True), + patch("ddev.cli.size.timeline.format_modules", side_effect=lambda m, *_: m), + patch("ddev.cli.size.timeline.trim_modules", side_effect=lambda m, *_: m), + patch( + "ddev.cli.size.timeline.get_valid_platforms", + return_value=({"linux-x86_64", "macos-x86_64", "linux-aarch64", "windows-x86_64"}), + ), + patch("ddev.cli.size.timeline.plt.show"), + patch("ddev.cli.size.timeline.plt.savefig"), + patch("ddev.cli.size.timeline.plt.figure"), + ): + yield + + +@pytest.fixture +def app(): + mock_app = MagicMock() + mock_app.repo.path = "fake_repo" + return mock_app + + +def test_timeline_integration(ddev, mock_timeline, app): + assert ddev("size", "timeline", "integration", "int1", "commit1", "commit2", "--compressed", obj=app).exit_code == 0 + assert ddev("size", "timeline", "integration", "int1", "commit1", "commit2", "--csv", obj=app).exit_code == 0 + assert ddev("size", "timeline", "integration", "int1", "commit1", "commit2", "--markdown", obj=app).exit_code == 0 + assert ddev("size", "timeline", "integration", "int1", "commit1", "commit2", "--json", obj=app).exit_code == 0 + assert ( + ddev( + "size", + "timeline", + "integration", + "int1", + "commit1", + "commit2", + "--save_to_png_path", + "out_int.png", + obj=app, + ).exit_code + == 0 + ) + assert ddev("size", "timeline", "integration", "int1", "commit1", "commit2", "--show_gui", obj=app).exit_code == 0 + assert ( + ddev("size", "timeline", "integration", "int1", "commit1", "commit2", "--threshold", "1000", obj=app).exit_code + == 0 + ) + + +@pytest.fixture +def mock_timeline_dependencies(): + mock_git_repo = MagicMock() + mock_git_repo.repo_dir = "fake_repo" + mock_git_repo.get_module_commits.return_value = ["commit1", "commit2"] + mock_git_repo.get_commit_metadata.side_effect = lambda c: ("Apr 4 2025", "Fix dep", c) + mock_git_repo.get_creation_commit_module.side_effect = "initial_commit" + with ( + patch("ddev.cli.size.timeline.GitRepo.__enter__", return_value=mock_git_repo), + patch("ddev.cli.size.timeline.GitRepo.__exit__", return_value=None), + patch("ddev.cli.size.timeline.GitRepo.sparse_checkout_commit"), + patch( + "ddev.cli.size.timeline.get_valid_platforms", + return_value=({"linux-x86_64", "macos-x86_64", "linux-aarch64", "windows-x86_64"}), + ), + patch("ddev.cli.size.timeline.get_dependency_list", return_value={"dep1"}), + patch("os.path.exists", return_value=True), + patch("os.path.isdir", return_value=True), + patch("os.listdir", return_value=["linux-x86_64-3.12"]), + patch("os.path.isfile", return_value=True), + patch("ddev.cli.size.timeline.get_gitignore_files", return_value=set()), + patch( + "ddev.cli.size.timeline.get_dependencies", + return_value={ + "Size_Bytes": 12345, + "Version": "1.2.3", + "Date": date(2025, 4, 4), + "Author": "Mock User", + "Commit_Message": "Mock commit message", + "Commit_SHA": "abcdef123456", + }, + ), + patch("ddev.cli.size.timeline.format_modules", side_effect=lambda m, *_: m), + patch("ddev.cli.size.timeline.trim_modules", side_effect=lambda m, *_: m), + patch("ddev.cli.size.timeline.plt.show"), + patch("ddev.cli.size.timeline.plt.savefig"), + patch("ddev.cli.size.timeline.plt.figure"), + ): + yield + + +def test_timeline_dependency(ddev, mock_timeline_dependencies, app): + assert ( + ddev( + "size", "timeline", "dependency", "dep1", "commit1", "commit2", "--platform", "linux-x86_64", obj=app + ).exit_code + == 0 + ) + assert ddev("size", "timeline", "dependency", "dep1", "commit1", "commit2", obj=app).exit_code == 0 + assert ( + ddev( + "size", + "timeline", + "dependency", + "dep1", + "commit1", + "commit2", + "--platform", + "linux-x86_64", + "--compressed", + obj=app, + ).exit_code + == 0 + ) + assert ( + ddev( + "size", + "timeline", + "dependency", + "dep1", + "commit1", + "commit2", + "--platform", + "linux-x86_64", + "--csv", + obj=app, + ).exit_code + == 0 + ) + assert ( + ddev( + "size", + "timeline", + "dependency", + "dep1", + "commit1", + "commit2", + "--platform", + "linux-x86_64", + "--markdown", + obj=app, + ).exit_code + == 0 + ) + assert ( + ddev( + "size", + "timeline", + "dependency", + "dep1", + "commit1", + "commit2", + "--platform", + "linux-x86_64", + "--json", + obj=app, + ).exit_code + == 0 + ) + assert ( + ddev( + "size", + "timeline", + "dependency", + "dep1", + "commit1", + "commit2", + "--platform", + "linux-x86_64", + "--save_to_png_path", + "out2.png", + obj=app, + ).exit_code + == 0 + ) + + assert ( + ddev( + "size", + "timeline", + "dependency", + "dep1", + "commit1", + "commit2", + "--platform", + "linux-x86_64", + "--show_gui", + obj=app, + ).exit_code + == 0 + ) + assert ( + ddev( + "size", "timeline", "dependency", "dep1", "--platform", "linux-x86_64", "--threshold", "1000", obj=app + ).exit_code + == 0 + ) + + +def test_timeline_invalid_platform(ddev): + mock_git_repo = MagicMock() + mock_git_repo.repo_dir = "fake_repo" + mock_git_repo.get_module_commits.return_value = ["commit1", "commit2"] + mock_git_repo.get_commit_metadata.side_effect = lambda c: ("Apr 4 2025", "Fix dep", c) + mock_git_repo.__enter__.return_value = mock_git_repo + + with ( + patch("ddev.cli.size.timeline.GitRepo", return_value=mock_git_repo), + patch( + "ddev.cli.size.timeline.get_valid_platforms", + return_value=({"linux-x86_64", "macos-x86_64", "linux-aarch64", "windows-x86_64"}), + ), + ): + result = ddev( + "size", + "timeline", + "dependency", + "dep1", + "commit1", + "commit2", + "--compressed", + "--platform", + "invalid-platform", + ) + + assert result.exit_code != 0 + + +def test_timeline_integration_no_changes(ddev): + mock_git_repo = MagicMock() + mock_git_repo.repo_dir = "fake_repo" + mock_git_repo.get_module_commits.return_value = [""] + mock_git_repo.get_commit_metadata.return_value = ("Feb 1 2025", "", "") + + with ( + patch("ddev.cli.size.timeline.GitRepo.__enter__", return_value=mock_git_repo), + patch("ddev.cli.size.timeline.GitRepo.__exit__", return_value=None), + patch("os.path.exists", return_value=True), + patch("os.path.isdir", return_value=True), + patch("os.listdir", return_value=[]), + patch( + "ddev.cli.size.timeline.get_valid_platforms", + return_value=({"linux-x86_64", "macos-x86_64", "linux-aarch64", "windows-x86_64"}), + ), + ): + assert ( + "No changes found" + in (result := ddev("size", "timeline", "integration", "int1", "commit1", "commit2")).output + and result.exit_code == 0 + ) + assert ( + "No changes found" + in (result := ddev("size", "timeline", "integration", "int1", "commit1", "commit2", "--compressed")).output + and result.exit_code == 0 + ) + assert ( + "No changes found" + in (result := ddev("size", "timeline", "integration", "int1", "commit1", "commit2", "--csv")).output + and result.exit_code == 0 + ) + assert ( + "No changes found" + in (result := ddev("size", "timeline", "integration", "int1", "commit1", "commit2", "--markdown")).output + and result.exit_code == 0 + ) + assert ( + "No changes found" + in (result := ddev("size", "timeline", "integration", "int1", "commit1", "commit2", "--json")).output + and result.exit_code == 0 + ) + assert ( + "No changes found" + in ( + result := ddev( + "size", "timeline", "integration", "int1", "commit1", "commit2", "--save_to_png_path", "out.png" + ) + ).output + and result.exit_code == 0 + ) + assert ( + "No changes found" + in (result := ddev("size", "timeline", "integration", "int1", "commit1", "commit2", "--show_gui")).output + and result.exit_code == 0 + ) + assert ( + "No changes found" + in ( + result := ddev("size", "timeline", "integration", "int1", "commit1", "commit2", "--time", "2025-04-01") + ).output + and result.exit_code == 0 + ) + assert ( + "No changes found" + in ( + result := ddev("size", "timeline", "integration", "int1", "commit1", "commit2", "--threshold", "1000") + ).output + and result.exit_code == 0 + ) + + +def test_timeline_integration_not_found(ddev): + mock_repo = MagicMock() + mock_repo.repo_dir = "fake" + mock_repo.get_module_commits.return_value = [""] + mock_repo.get_creation_commit_module.return_value = "c1" + mock_repo.checkout_commit.return_value = None + mock_repo.get_commit_metadata.return_value = ("Feb 1 2025", "", "") + + with ( + patch("ddev.cli.size.timeline.GitRepo.__enter__", return_value=mock_repo), + patch("ddev.cli.size.timeline.GitRepo.__exit__", return_value=None), + patch( + "ddev.cli.size.timeline.get_valid_platforms", + return_value=({"linux-x86_64", "macos-x86_64", "linux-aarch64", "windows-x86_64"}), + ), + patch("ddev.cli.size.timeline.module_exists", return_value=False), + patch("matplotlib.pyplot.show"), + patch("matplotlib.pyplot.savefig"), + ): + result = ddev("size", "timeline", "integration", "missing_module", "c123456", "c2345667") + assert result.exit_code != 0 + assert "not found" in result.output + + +def test_timeline_dependency_missing_no_platform(ddev): + mock_repo = MagicMock() + mock_repo.repo_dir = "fake" + mock_repo.get_module_commits.return_value = ["c1"] + mock_repo.get_creation_commit_module.return_value = "c1" + mock_repo.checkout_commit.return_value = None + mock_repo.get_commit_metadata.return_value = ("Feb 1 2025", "", "") + + with ( + patch("ddev.cli.size.timeline.GitRepo.__enter__", return_value=mock_repo), + patch("ddev.cli.size.timeline.GitRepo.__exit__", return_value=None), + patch( + "ddev.cli.size.timeline.get_valid_platforms", + return_value=({"linux-x86_64", "macos-x86_64", "linux-aarch64", "windows-x86_64"}), + ), + patch("ddev.cli.size.timeline.get_dependency_list", return_value=set()), + ): + result = ddev("size", "timeline", "dependency", "missing_module", "c123456", "c2345667") + assert result.exit_code != 0 + assert "Dependency missing_module not found in latest commit" in result.output + + +def test_timeline_dependency_missing_for_platform(ddev, app): + mock_repo = MagicMock() + mock_repo.repo_dir = "fake" + mock_repo.get_module_commits.return_value = ["c1"] + mock_repo.get_creation_commit_module.return_value = "c1" + mock_repo.checkout_commit.return_value = None + mock_repo.get_commit_metadata.return_value = ("Feb 1 2025", "", "") + + with ( + patch("ddev.cli.size.timeline.GitRepo.__enter__", return_value=mock_repo), + patch("ddev.cli.size.timeline.GitRepo.__exit__", return_value=None), + patch( + "ddev.cli.size.timeline.get_valid_platforms", + return_value=({"linux-x86_64", "macos-x86_64", "linux-aarch64", "windows-x86_64"}), + ), + patch("ddev.cli.size.timeline.get_dependency_list", return_value=set()), + ): + result = ddev( + "size", + "timeline", + "dependency", + "missing_module", + "c123456", + "c2345667", + "--platform", + "linux-x86_64", + ) + + assert result.exit_code != 0 + assert ( + "Dependency missing_module not found in latest commit for the platform linux-x86_64, is the name correct?" + in result.output + ) + + +def test_timeline_dependency_no_changes(ddev, app): + mock_repo = MagicMock() + mock_repo.repo_dir = "fake" + mock_repo.get_module_commits.return_value = [""] + mock_repo.get_creation_commit_module.return_value = "c1" + mock_repo.checkout_commit.return_value = None + mock_repo.get_commit_metadata.return_value = ("Feb 1 2025", "", "") + + with ( + patch("ddev.cli.size.timeline.GitRepo.__enter__", return_value=mock_repo), + patch("ddev.cli.size.timeline.GitRepo.__exit__", return_value=None), + patch( + "ddev.cli.size.timeline.get_valid_platforms", + return_value=({"linux-x86_64", "macos-x86_64", "linux-aarch64", "windows-x86_64"}), + ), + patch("ddev.cli.size.timeline.get_dependency_list", return_value={"dep1"}), + ): + result = ddev( + "size", + "timeline", + "dependency", + "dep1", + "c123456", + "c2345667", + "--platform", + "linux-x86_64", + obj=app, + ) + + assert result.exit_code == 0 + assert "no changes found" in result.output.lower() diff --git a/ddev/tests/size/__init__.py b/ddev/tests/size/__init__.py new file mode 100644 index 0000000000000..3eff9712cbcf5 --- /dev/null +++ b/ddev/tests/size/__init__.py @@ -0,0 +1,3 @@ +# (C) Datadog, Inc. 2022-present +# All rights reserved +# Licensed under a 3-clause BSD style license (see LICENSE) diff --git a/ddev/tests/size/test_common.py b/ddev/tests/size/test_common.py new file mode 100644 index 0000000000000..00469e80f2ec8 --- /dev/null +++ b/ddev/tests/size/test_common.py @@ -0,0 +1,322 @@ +import json +import os +from pathlib import Path +from unittest.mock import MagicMock, mock_open, patch + +from ddev.cli.size.common import ( + compress, + convert_to_human_readable_size, + extract_version_from_about_py, + format_modules, + get_dependencies_list, + get_dependencies_sizes, + get_files, + get_gitignore_files, + get_valid_platforms, + get_valid_versions, + is_correct_dependency, + is_valid_integration, + print_csv, + print_json, +) + + +def to_native_path(path: str) -> str: + return path.replace("/", os.sep) + + +def test_get_valid_platforms(): + filenames = [ + "linux-aarch64_3.12.txt", + "linux-aarch64_py2.txt", + "linux-aarch64_py3.txt", + "linux-x86_64_3.12.txt", + "linux-x86_64_py2.txt", + "linux-x86_64_py3.txt", + "macos-x86_64_3.12.txt", + "macos-x86_64_py2.txt", + "macos-x86_64_py3.txt", + "windows-x86_64_3.12.txt", + "windows-x86_64_py2.txt", + "windows-x86_64_py3.txt", + ] + + expected_platforms = {"linux-aarch64", "linux-x86_64", "macos-x86_64", "windows-x86_64"} + with patch("os.listdir", return_value=filenames): + platforms = get_valid_platforms("fake_repo") + assert platforms == expected_platforms + + +def test_get_valid_versions(): + filenames = [ + "linux-aarch64_3.12.txt", + "linux-aarch64_py2.txt", + "linux-aarch64_py3.txt", + "linux-x86_64_3.12.txt", + "linux-x86_64_py2.txt", + "linux-x86_64_py3.txt", + "macos-x86_64_3.12.txt", + "macos-x86_64_py2.txt", + "macos-x86_64_py3.txt", + "windows-x86_64_3.12.txt", + "windows-x86_64_py2.txt", + "windows-x86_64_py3.txt", + ] + + expected_versions = {"3.12"} + with patch("os.listdir", return_value=filenames): + versions = get_valid_versions("fake_repo") + assert versions == expected_versions + + +def test_is_correct_dependency(): + assert is_correct_dependency("windows-x86_64", "3.12", "windows-x86_64-3.12") + assert not is_correct_dependency("windows-x86_64", "3.12", "linux-x86_64-3.12") + assert not is_correct_dependency("windows-x86_64", "3.13", "windows-x86_64-3.12") + + +def test_convert_to_human_readable_size(): + assert convert_to_human_readable_size(500) == "500 B" + assert convert_to_human_readable_size(1024) == "1.0 KB" + assert convert_to_human_readable_size(1048576) == "1.0 MB" + assert convert_to_human_readable_size(1073741824) == "1.0 GB" + + +def test_is_valid_integration(): + included_folder = "datadog_checks" + os.sep + ignored_files = {"datadog_checks_dev", "datadog_checks_tests_helper"} + git_ignore = [".git", "__pycache__"] + + assert is_valid_integration(to_native_path("datadog_checks/example.py"), included_folder, ignored_files, git_ignore) + assert not is_valid_integration(to_native_path("__pycache__/file.py"), included_folder, ignored_files, git_ignore) + assert not is_valid_integration( + to_native_path("datadog_checks_dev/example.py"), included_folder, ignored_files, git_ignore + ) + assert not is_valid_integration(to_native_path(".git/config"), included_folder, ignored_files, git_ignore) + + +def test_get_dependencies_list(): + file_content = "dependency1 @ https://example.com/dependency1/dependency1-1.1.1-.whl\ndependency2 @ https://example.com/dependency2/dependency2-1.1.1-.whl" + mock_open_obj = mock_open(read_data=file_content) + with patch("builtins.open", mock_open_obj): + deps, urls, versions = get_dependencies_list("fake_path") + assert deps == ["dependency1", "dependency2"] + assert urls == [ + "https://example.com/dependency1/dependency1-1.1.1-.whl", + "https://example.com/dependency2/dependency2-1.1.1-.whl", + ] + assert versions == ["1.1.1", "1.1.1"] + + +def test_get_dependencies_sizes(): + mock_response = MagicMock() + mock_response.status_code = 200 + mock_response.headers = {"Content-Length": "12345"} + with patch("requests.head", return_value=mock_response): + file_data = get_dependencies_sizes(["dependency1"], ["https://example.com/dependency1.whl"], ["1.1.1"], True) + assert file_data == [ + { + "Name": "dependency1", + "Version": "1.1.1", + "Size_Bytes": 12345, + "Size": convert_to_human_readable_size(12345), + "Type": "Dependency", + } + ] + + +def test_format_modules_multiple_platform(): + modules = [ + {"Name": "module1", "Type": "A", "Size_Bytes": 1500}, + {"Name": "module2", "Type": "B", "Size_Bytes": 3000}, + ] + platform = "linux-aarch64" + version = "3.12" + + expected_output = [ + { + "Name": "module1", + "Type": "A", + "Size_Bytes": 1500, + "Platform": "linux-aarch64", + "Python_Version": "3.12", + }, + { + "Name": "module2", + "Type": "B", + "Size_Bytes": 3000, + "Platform": "linux-aarch64", + "Python_Version": "3.12", + }, + ] + + assert format_modules(modules, platform, version, True) == expected_output + + +def test_format_modules_one_plat(): + modules = [ + {"Name": "module1", "Type": "A", "Size_Bytes": 1500}, + {"Name": "module2", "Type": "B", "Size_Bytes": 3000}, + ] + platform = "linux-aarch64" + version = "3.12" + + expected_output = [ + { + "Name": "module1", + "Type": "A", + "Size_Bytes": 1500, + }, + { + "Name": "module2", + "Type": "B", + "Size_Bytes": 3000, + }, + ] + + assert format_modules(modules, platform, version, False) == expected_output + + +def test_get_files_grouped_and_with_versions(): + repo_path = Path("fake_repo") + + os_walk_output = [ + (repo_path / "integration1" / "datadog_checks", [], ["__about__.py", "file2.py"]), + (repo_path / "integration2" / "datadog_checks", [], ["__about__.py"]), + ] + + def mock_is_valid_integration(path, included_folder, ignored, ignored_files): + return True + + def mock_getsize(path): + file_sizes = { + repo_path / "integration1" / "datadog_checks" / "file2.py": 2000, + repo_path / "integration1" / "datadog_checks" / "__about__.py": 1000, + repo_path / "integration2" / "datadog_checks" / "__about__.py": 3000, + } + return file_sizes[Path(path)] + + with ( + patch("os.walk", return_value=[(str(p), dirs, files) for p, dirs, files in os_walk_output]), + patch("os.path.getsize", side_effect=mock_getsize), + patch("ddev.cli.size.common.get_gitignore_files", return_value=set()), + patch("ddev.cli.size.common.is_valid_integration", side_effect=mock_is_valid_integration), + patch("ddev.cli.size.common.extract_version_from_about_py", return_value="1.2.3"), + patch("ddev.cli.size.common.convert_to_human_readable_size", side_effect=lambda s: f"{s / 1024:.2f} KB"), + ): + + result = get_files(repo_path, compressed=False) + + expected = [ + { + "Name": "integration1", + "Version": "1.2.3", + "Size_Bytes": 3000, + "Size": "2.93 KB", + "Type": "Integration", + }, + { + "Name": "integration2", + "Version": "1.2.3", + "Size_Bytes": 3000, + "Size": "2.93 KB", + "Type": "Integration", + }, + ] + + assert result == expected + + +def test_get_gitignore_files(): + mock_gitignore = f"__pycache__{os.sep}\n*.log\n" # Sample .gitignore file + repo_path = "fake_repo" + with patch("builtins.open", mock_open(read_data=mock_gitignore)): + with patch("os.path.exists", return_value=True): + ignored_patterns = get_gitignore_files(repo_path) + assert ignored_patterns == ["__pycache__" + os.sep, "*.log"] + + +def test_compress(): + fake_content = b'a' * 16384 + original_size = len(fake_content) + + m = mock_open(read_data=fake_content) + with patch("builtins.open", m): + compressed_size = compress(to_native_path("fake/path/file.py")) + + assert isinstance(compressed_size, int) + assert compressed_size > 0 + assert compressed_size < original_size + + +def test_print_csv(): + mock_app = MagicMock() + modules = [ + {"Name": "module1", "Size B": 123, "Size": "2 B"}, + {"Name": "module,with,comma", "Size B": 456, "Size": "2 B"}, + ] + + print_csv(mock_app, modules=modules) + + expected_calls = [ + (("Name,Size B",),), + (('module1,123',),), + (('"module,with,comma",456',),), + ] + + actual_calls = mock_app.display.call_args_list + assert actual_calls == expected_calls + + +def test_print_json(): + mock_app = MagicMock() + + modules = [ + {"name": "mod1", "size": "100"}, + {"name": "mod2", "size": "200"}, + {"name": "mod3", "size": "300"}, + ] + print_json(mock_app, modules) + + expected_calls = [ + (("[",),), + (('{"name": "mod1", "size": "100"}',),), + ((",",),), + (('{"name": "mod2", "size": "200"}',),), + ((",",),), + (('{"name": "mod3", "size": "300"}',),), + (("]",),), + ] + + actual_calls = mock_app.display.call_args_list + print(actual_calls) + assert actual_calls == expected_calls + + result = "".join(call[0][0] for call in actual_calls) + parsed = json.loads(result) + assert parsed == [ + {"name": "mod1", "size": "100"}, + {"name": "mod2", "size": "200"}, + {"name": "mod3", "size": "300"}, + ] + + +def test_extract_version_from_about_py_pathlib(): + # Usa Path para compatibilidad multiplataforma + fake_path = Path("some") / "module" / "__about__.py" + fake_content = "__version__ = '1.2.3'\n" + + with patch("builtins.open", mock_open(read_data=fake_content)): + version = extract_version_from_about_py(str(fake_path)) + + assert version == "1.2.3" + + +def test_extract_version_from_about_py_no_version_pathlib(): + fake_path = Path("another") / "module" / "__about__.py" + fake_content = "version = 'not_defined'\n" + + with patch("builtins.open", mock_open(read_data=fake_content)): + version = extract_version_from_about_py(str(fake_path)) + + assert version == "" diff --git a/ddev/tests/size/test_diff.py b/ddev/tests/size/test_diff.py new file mode 100644 index 0000000000000..f5ff3fc5000c4 --- /dev/null +++ b/ddev/tests/size/test_diff.py @@ -0,0 +1,54 @@ +# (C) Datadog, Inc. 2022-present +# All rights reserved +# Licensed under a 3-clause BSD style license (see LICENSE) + +import os + +from ddev.cli.size.common import convert_to_human_readable_size +from ddev.cli.size.diff import get_diff + + +def to_native_path(path: str) -> str: + return path.replace("/", os.sep) + + +def test_get_diff(): + size_before = [ + {"Name": "foo", "Version": "1.0.0", "Size_Bytes": 1000, "Type": "Integration"}, + {"Name": "bar", "Version": "2.0.0", "Size_Bytes": 2000, "Type": "Integration"}, + {"Name": "deleted", "Version": "3.0.0", "Size_Bytes": 1500, "Type": "Integration"}, + ] + + size_after = [ + {"Name": "foo", "Version": "1.1.0", "Size_Bytes": 1200, "Type": "Integration"}, + {"Name": "bar", "Version": "2.0.0", "Size_Bytes": 2000, "Type": "Integration"}, + {"Name": "new", "Version": "0.1.0", "Size_Bytes": 800, "Type": "Integration"}, + ] + + result = get_diff(size_before, size_after, "Integration") + + expected = [ + { + "Name": "deleted (DELETED)", + "Version": "3.0.0", + "Type": "Integration", + "Size_Bytes": -1500, + "Size": convert_to_human_readable_size(-1500), + }, + { + "Name": "foo", + "Version": "1.0.0 -> 1.1.0", + "Type": "Integration", + "Size_Bytes": 200, + "Size": convert_to_human_readable_size(200), + }, + { + "Name": "new (NEW)", + "Version": "0.1.0", + "Type": "Integration", + "Size_Bytes": 800, + "Size": convert_to_human_readable_size(800), + }, + ] + + assert sorted(result, key=lambda x: x["Name"]) == expected diff --git a/ddev/tests/size/test_timeline.py b/ddev/tests/size/test_timeline.py new file mode 100644 index 0000000000000..331a9c62a0c04 --- /dev/null +++ b/ddev/tests/size/test_timeline.py @@ -0,0 +1,158 @@ +import os +from datetime import datetime +from pathlib import Path +from unittest.mock import MagicMock, mock_open, patch + +from ddev.cli.size.timeline import ( + format_commit_data, + get_dependencies, + get_dependency_data, + get_dependency_size, + get_files, + get_version, + trim_modules, +) + + +def test_get_compressed_files(): + with ( + patch("os.walk", return_value=[(os.path.join("fake_repo", "datadog_checks"), [], ["__about__.py"])]), + patch("os.path.relpath", return_value=os.path.join("datadog_checks", "__about__.py")), + patch("os.path.exists", return_value=True), + patch("ddev.cli.size.timeline.get_gitignore_files", return_value=set()), + patch("ddev.cli.size.timeline.is_valid_integration", return_value=True), + patch("ddev.cli.size.timeline.compress", return_value=1234), + patch("ddev.cli.size.timeline.extract_version_from_about_py", return_value='1.1.1'), + ): + result = get_files("fake_repo", "int1", "abc1234", datetime(2025, 4, 4).date(), "auth", "Added int1", [], True) + print(result) + assert result == [ + { + "Size_Bytes": 1234, + "Version": '1.1.1', + "Date": datetime(2025, 4, 4).date(), + "Author": "auth", + "Commit_Message": "Added int1", + "Commit_SHA": "abc1234", + } + ] + + +def test_get_compressed_files_deleted_only(): + repo_path = "fake_repo" + module = "foo" + commit = "abc1234" + date = datetime.strptime("Apr 5 2025", "%b %d %Y").date() + author = "Author" + message = "deleted module" + + with ( + patch("ddev.cli.size.timeline.get_gitignore_files", return_value=set()), + patch("os.walk", return_value=[]), + patch("os.path.relpath", side_effect=lambda path, _: path.replace(f"{repo_path}{os.sep}", "")), + patch("os.path.exists", return_value=False), + ): + file_data = get_files(repo_path, module, commit, date, author, message, [], True) + + assert file_data == [ + { + "Size_Bytes": 0, + "Version": "Deleted", + "Date": date, + "Author": author, + "Commit_Message": "(DELETED) " + message, + "Commit_SHA": commit, + } + ] + + +def test_get_version(): + files = ["linux-x86_64_3.12.txt", "linux-x86_64_3.10.txt"] + version = get_version(files, "linux-x86_64") + assert version == "3.12" + + +def test_format_commit_data(): + date, message, commit = format_commit_data( + "Apr 4 2025", "this is a very long commit message that should be trimmed (#1234)", "abc1234def", "abc1234def" + ) + expected_date = datetime.strptime("Apr 4 2025", "%b %d %Y").date() + expected_message = "(NEW) this is a very long commit...(#1234)" + expected_commit = "abc1234" + assert date == expected_date + assert message == expected_message + assert commit == expected_commit + + +def test_trim_modules_keep_some_remove_some(): + modules = [ + {"Size_Bytes": 1000, "Version": "1.0.0"}, + {"Size_Bytes": 1100, "Version": "1.0.0"}, + {"Size_Bytes": 1400, "Version": "1.1.0"}, + ] + expected = [ + {"Size_Bytes": 1000, "Delta_Bytes": 0, "Delta": " ", "Version": "1.0.0"}, + {"Size_Bytes": 1400, "Delta_Bytes": 300, "Delta": "300 B", "Version": "1.0.0 -> 1.1.0"}, + ] + trimmed = trim_modules(modules, threshold=200) + assert trimmed == expected + + +def test_get_dependency(): + content = """dep1 @ https://example.com/dep1/dep1-1.1.1-.whl +dep2 @ https://example.com/dep2/dep2-1.1.2-.whl""" + with patch("builtins.open", mock_open(read_data=content)): + url, version = get_dependency_data(Path("some") / "path" / "file.txt", "dep2") + assert (url, version) == ("https://example.com/dep2/dep2-1.1.2-.whl", "1.1.2") + + +def make_mock_response(size): + mock_response = MagicMock() + mock_response.__enter__.return_value = mock_response + mock_response.headers = {"Content-Length": size} + mock_response.raise_for_status = lambda: None + return mock_response + + +def test_get_dependency_size(): + mock_response = make_mock_response("45678") + with patch("requests.head", return_value=mock_response): + info = get_dependency_size( + "https://example.com/file-1.1.1-.whl", + "1.1.1", + "abc1234", + datetime(2025, 4, 4).date(), + "auth", + "Fixed bug", + True, + ) + assert info == { + "Size_Bytes": 45678, + "Version": "1.1.1", + "Date": datetime(2025, 4, 4).date(), + "Author": "auth", + "Commit_Message": "Fixed bug", + "Commit_SHA": "abc1234", + } + + +def test_get_compressed_dependencies(): + with ( + patch("os.path.exists", return_value=True), + patch("os.path.isdir", return_value=True), + patch("os.path.isfile", return_value=True), + patch("os.listdir", return_value=["linux-x86_64_3.12.txt"]), + patch("ddev.cli.size.timeline.get_dependency_data", return_value=("https://example.com/dep1.whl", '1.1.1')), + patch("ddev.cli.size.timeline.requests.head", return_value=make_mock_response("12345")), + ): + result = get_dependencies( + "fake_repo", "dep1", "linux-x86_64", "abc1234", datetime(2025, 4, 4).date(), "auth", "Added dep1", True + ) + assert result == { + "Size_Bytes": 12345, + "Version": '1.1.1', + "Date": datetime(2025, 4, 4).date(), + "Author": "auth", + "Commit_Message": "Added dep1", + "Commit_SHA": "abc1234", + }