From c39ed1a134132c3792a801349edb73d9753e7d66 Mon Sep 17 00:00:00 2001 From: Lucia Sanchez Bella Date: Wed, 12 Mar 2025 12:52:12 +0100 Subject: [PATCH 01/70] basic status reporting --- package_size_analyzer/cli.py | 38 +++++++ package_size_analyzer/modes.py | 184 +++++++++++++++++++++++++++++++++ package_size_analyzer/test.py | 58 +++++++++++ 3 files changed, 280 insertions(+) create mode 100644 package_size_analyzer/cli.py create mode 100644 package_size_analyzer/modes.py create mode 100644 package_size_analyzer/test.py diff --git a/package_size_analyzer/cli.py b/package_size_analyzer/cli.py new file mode 100644 index 0000000000000..23c9a15d0bc0c --- /dev/null +++ b/package_size_analyzer/cli.py @@ -0,0 +1,38 @@ +import argparse +from modes import status_mode + + +def main(): + parser = argparse.ArgumentParser(description="Package Size Analyzer CLI") + + # Define allowed choices + valid_modes = ["status", "diff", "timeline"] + valid_platforms = ["linux-aarch64", "linux-x86_64", "macos-x86_64", "windows-x86_64"] + valid_python_versions = ["3.12"] + + # Arguments + parser.add_argument("mode", choices=valid_modes, help="Mode of operation") + parser.add_argument("--platform", choices=valid_platforms, required=False, help="Target platform") + parser.add_argument("--python", choices=valid_python_versions, required=False, help="Python version (MAJOR.MINOR)") + parser.add_argument("--compressed", action="store_true", help="Measure compressed size") + + args = parser.parse_args() + + # Execute the corresponding function based on the selected mode + if args.mode == "status": + # if an argument is not specified, all possibilities are executed + if args.platform is None and args.python is None: + for platform in valid_platforms: + for version in valid_python_versions: + status_mode(platform, version, args.compressed) + elif args.platform is None: + for platform in valid_platforms: + status_mode(platform, args.python, args.compressed) + elif args.python is None: + for version in valid_python_versions: + status_mode(args.platform, version, args.compressed) + else: + status_mode(args.platform, args.python, args.compressed) + +if __name__ == "__main__": + main() diff --git a/package_size_analyzer/modes.py b/package_size_analyzer/modes.py new file mode 100644 index 0000000000000..b9e0f2f05b4ab --- /dev/null +++ b/package_size_analyzer/modes.py @@ -0,0 +1,184 @@ +import requests +import pandas as pd +import re +import os +from tabulate import tabulate +import zlib +import io + + +def status_mode(platform, version, compressed): + if compressed: + df1 = pd.DataFrame(get_compressed_files()) + print("Compressed integrations done") + + df2 = pd.DataFrame(get_compressed_dependencies(platform,version)) + print("Compressed dependencies done") + + + df = pd.concat([df1, df2], ignore_index=True) + + # Calculate the size for the whole module + df_grouped = df.groupby(["Name", 'Type'], as_index=False).agg({"Size (Bytes)": "sum"}) + df_grouped = df_grouped.sort_values(by="Size (Bytes)", ascending=False).reset_index(drop=True) + + + df_grouped["Size"] = df_grouped["Size (Bytes)"].apply(convert_size) + df_grouped.to_csv("compressed_status_" + platform + "_" + version + ".csv", index=False) + df.to_csv("compressed_status_all_" + platform + "_" + version + ".csv", index=False) + df_grouped = df_grouped.drop(columns=['Size (Bytes)']) + print('--------------', platform,version,'--------------') + print(tabulate(df_grouped, headers='keys', tablefmt='grid')) + print("CSV exported") + + + + +def get_compressed_files(): + print("Getting compressed integrations") + + ignored_files = {"datadog_checks_dev", "datadog_checks_tests_helper"} + git_ignore = get_gitignore_files() + included_folder = "datadog_checks/" + + script_path = os.path.abspath(__file__) + parent_dir = os.path.dirname(script_path) + repo_path = os.path.dirname(parent_dir) + + file_data = [] + for root, _, files in os.walk(repo_path): + for file in files: + file_path = os.path.join(root, file) + + # Convert the path to a relative format within the repo + relative_path = os.path.relpath(file_path, repo_path) + + # Filter files + if is_valid_integration(relative_path, included_folder, ignored_files, git_ignore): + try: + # Compress the file + compressor = zlib.compressobj() + compressed_size = 0 + + # original_size = os.path.getsize(file_path) + with open(file_path, "rb") as f: + while chunk := f.read(8192): # Read in 8KB chunks + compressed_chunk = compressor.compress(chunk) + compressed_size += len(compressed_chunk) + + compressed_size += len(compressor.flush()) # Flush the buffer + integration = relative_path.split("/")[0] + file_data.append({ + "File Path": relative_path, + "Type": "Integration", + "Name": integration, + "Size (Bytes)": compressed_size + }) + + except Exception as e: + print(f"Error processing {relative_path}: {e}") + + return file_data + + +def get_compressed_dependencies(platform=None, version=None): + print("Getting compressed dependencies") + + script_path = os.path.abspath(__file__) + parent_dir = os.path.dirname(script_path) + repo_path = os.path.dirname(parent_dir) + resolved_path = os.path.join(repo_path, ".deps/resolved") + + if not os.path.exists(resolved_path) or not os.path.isdir(resolved_path): + print(f"Error: Directory not found {resolved_path}") + return [] + + file_data = [] + + for filename in os.listdir(resolved_path): + file_path = os.path.join(resolved_path, filename) + + if os.path.isfile(file_path) and is_correct_dependency(platform, version, filename): + deps, download_urls = get_dependencies(file_path) + return get_dependencies_sizes(deps, download_urls) + + + + + +def is_correct_dependency(platform, version, name): + return platform in name and version in name + +def get_dependencies_sizes(deps, download_urls): + file_data = [] + for dep, url in zip(deps, download_urls): + dep_response = requests.head(url) + if dep_response.status_code != 200: + print(f"Error {response.status_code}: Unable to fetch the dependencies file") + else: + size = dep_response.headers.get("Content-Length", None) + file_data.append({"File Path": dep, "Type": "Dependency", "Name": dep, "Size (Bytes)": int(size)}) + + return file_data + + +def get_dependencies(file_path): + download_urls = [] + deps = [] + try: + with open(file_path, "r", encoding="utf-8") as file: + file_content = file.read() + for line in file_content.splitlines(): + match = re.search(r"([\w\-\d\.]+) @ (https?://[^\s#]+)", line) + if match: + deps.append(match.group(1)) + download_urls.append(match.group(2)) + except Exception as e: + print(f"Error reading file {file_path}: {e}") + + return deps, download_urls + +def is_valid_integration(path, included_folder, ignored_files, git_ignore): + # It is not an integration + if path.startswith('.'): + return False + # It is part of an integration and it is not in the datadog_checks folder + elif not (included_folder in path): + return False + # It is an irrelevant file + elif any(ignore in path for ignore in ignored_files): + return False + # This file is contained in .gitignore + elif any(ignore in path for ignore in git_ignore): + return False + else: + return True + + +def get_gitignore_files(): + script_path = os.path.abspath(__file__) + parent_dir = os.path.dirname(script_path) + repo_path = os.path.dirname(parent_dir) + gitignore_path = os.path.join(repo_path, ".gitignore") + if not os.path.exists(gitignore_path): + print(f"Error: .gitignore file not found at {gitignore_path}") + return [] + + try: + with open(gitignore_path, "r", encoding="utf-8") as file: + gitignore_content = file.read() + ignored_patterns = [line.strip() for line in gitignore_content.splitlines() if line.strip() and not line.startswith("#")] + return ignored_patterns + except Exception as e: + print(f"Error reading .gitignore file: {e}") + return [] + +def convert_size(size_bytes): + """Transforms bytes into a human-friendly format (KB, MB, GB) with 3 decimal places.""" + for unit in ['B', 'KB', 'MB', 'GB']: + if size_bytes < 1024: + return (str(round(size_bytes, 2)) + unit) + size_bytes /= 1024 + return (str(round(size_bytes, 2)) + "TB") + + diff --git a/package_size_analyzer/test.py b/package_size_analyzer/test.py new file mode 100644 index 0000000000000..89bc7c64a3c11 --- /dev/null +++ b/package_size_analyzer/test.py @@ -0,0 +1,58 @@ + +import pytest +import requests +from unittest.mock import patch, mock_open, MagicMock +from modes import ( + get_compressed_dependencies, + get_gitignore_files, + convert_size, + is_valid_integration, + is_correct_dependency, + get_dependencies, + get_dependencies_sizes +) + +def test_is_correct_dependency(): + assert is_correct_dependency("windows-x86_64", "3.12", "windows-x86_64-3.12") == True + assert is_correct_dependency("windows-x86_64", "3.12", "linux-x86_64-3.12") == False + assert is_correct_dependency("windows-x86_64", "3.13", "windows-x86_64-3.12") == False + + +def test_convert_size(): + assert convert_size(500) == "500B" + assert convert_size(1024) == "1.0KB" + assert convert_size(1048576) == "1.0MB" + assert convert_size(1073741824) == "1.0GB" + +def test_is_valid_integration(): + included_folder = "datadog_checks/" + ignored_files = {"datadog_checks_dev", "datadog_checks_tests_helper"} + git_ignore = [".git", "__pycache__"] + + assert is_valid_integration("datadog_checks/example.py", included_folder, ignored_files, git_ignore) == True + assert is_valid_integration("__pycache__/file.py", included_folder, ignored_files, git_ignore) == False + assert is_valid_integration("datadog_checks_dev/example.py", included_folder, ignored_files, git_ignore) == False + assert is_valid_integration(".git/config", included_folder, ignored_files, git_ignore) == False + +def test_get_dependencies(): + file_content = "dependency1 @ https://example.com/dependency1.whl\ndependency2 @ https://example.com/dependency2.whl" + mock_open_obj = mock_open(read_data=file_content) + with patch("builtins.open", mock_open_obj): + deps, urls = get_dependencies("fake_path") + assert deps == ["dependency1", "dependency2"] + assert urls == ["https://example.com/dependency1.whl", "https://example.com/dependency2.whl"] + +def test_get_gitignore_files(): + mock_gitignore = "__pycache__/\n*.log\n" # Sample .gitignore file + with patch("builtins.open", mock_open(read_data=mock_gitignore)): + with patch("os.path.exists", return_value=True): + ignored_patterns = get_gitignore_files() + assert ignored_patterns == ["__pycache__/", "*.log"] + +def test_get_dependencies_sizes(): + mock_response = MagicMock() + mock_response.status_code = 200 + mock_response.headers = {"Content-Length": "12345"} + with patch("requests.head", return_value=mock_response): + file_data = get_dependencies_sizes(["dependency1"], ["https://example.com/dependency1.whl"]) + assert file_data == [{"File Path": "dependency1", "Type": "Dependency", "Name": "dependency1", "Size (Bytes)": 12345}] From a9c824dd7996328c8fece9553f4033a5d5d7d46a Mon Sep 17 00:00:00 2001 From: Lucia Sanchez Bella Date: Thu, 20 Mar 2025 12:51:05 +0100 Subject: [PATCH 02/70] integration into ddev --- ddev/pyproject.toml | 1 + ddev/src/ddev/cli/__init__.py | 2 + ddev/src/ddev/cli/size/__init__.py | 19 +++ ddev/src/ddev/cli/size/status.py | 216 +++++++++++++++++++++++++++++ ddev/tests/cli/size/__init__.py | 3 + ddev/tests/cli/size/test_status.py | 130 +++++++++++++++++ 6 files changed, 371 insertions(+) create mode 100644 ddev/src/ddev/cli/size/__init__.py create mode 100644 ddev/src/ddev/cli/size/status.py create mode 100644 ddev/tests/cli/size/__init__.py create mode 100644 ddev/tests/cli/size/test_status.py diff --git a/ddev/pyproject.toml b/ddev/pyproject.toml index 7218fa7a26a98..8a1ab15edbb2c 100644 --- a/ddev/pyproject.toml +++ b/ddev/pyproject.toml @@ -40,6 +40,7 @@ dependencies = [ "tomli-w", "tomlkit", "tqdm", + "requests" ] dynamic = ["version"] diff --git a/ddev/src/ddev/cli/__init__.py b/ddev/src/ddev/cli/__init__.py index 302f859cd9f54..a5924607a880c 100644 --- a/ddev/src/ddev/cli/__init__.py +++ b/ddev/src/ddev/cli/__init__.py @@ -25,6 +25,7 @@ from ddev.plugin import specs from ddev.utils.ci import running_in_ci from ddev.utils.fs import Path +from ddev.cli.size import size @click.group(context_settings={'help_option_names': ['-h', '--help']}, invoke_without_command=True) @@ -149,6 +150,7 @@ def ddev( ddev.add_command(status) ddev.add_command(test) ddev.add_command(validate) +ddev.add_command(size) __management_command = os.environ.get('PYAPP_COMMAND_NAME', '') if __management_command: diff --git a/ddev/src/ddev/cli/size/__init__.py b/ddev/src/ddev/cli/size/__init__.py new file mode 100644 index 0000000000000..25863ae1dbc5e --- /dev/null +++ b/ddev/src/ddev/cli/size/__init__.py @@ -0,0 +1,19 @@ +# (C) Datadog, Inc. 2022-present +# All rights reserved +# Licensed under a 3-clause BSD style license (see LICENSE) + +import click + +from ddev.cli.size.status import status + + +@click.group(short_help='Get the size of integrations and dependencies by platform and python version') +def size(): + """Package Size Analyzer""" + pass + + +size.add_command(status) + +if __name__ == "__main__": + size() \ No newline at end of file diff --git a/ddev/src/ddev/cli/size/status.py b/ddev/src/ddev/cli/size/status.py new file mode 100644 index 0000000000000..147e1dd34f216 --- /dev/null +++ b/ddev/src/ddev/cli/size/status.py @@ -0,0 +1,216 @@ +# (C) Datadog, Inc. 2022-present +# All rights reserved +# Licensed under a 3-clause BSD style license (see LICENSE) + +import click +import requests +import re +import os +import zlib +import io +from pathlib import Path +import sys +import csv as csv_lib + + +VALID_PLATFORMS = ["linux-aarch64", "linux-x86_64", "macos-x86_64", "windows-x86_64"] +VALID_PYTHON_VERSIONS = ["3.12"] +REPO_PATH = Path(__file__).resolve().parents[5] + + + +@click.command() +@click.option('--platform', type=click.Choice(VALID_PLATFORMS), help="Target platform") +@click.option('--python', 'version', type=click.Choice(VALID_PYTHON_VERSIONS), help="Python version (MAJOR.MINOR)") +@click.option('--compressed', is_flag=True, help="Measure compressed size") +@click.option('--csv', is_flag=True, help="Output in CSV format") +@click.pass_obj +def status(app, platform, version, compressed, csv): + platforms = VALID_PLATFORMS if platform is None else [platform] + versions = VALID_PYTHON_VERSIONS if version is None else [version] + + for i, (plat, ver) in enumerate([(p, v) for p in platforms for v in versions]): + status_mode(app, plat, ver, compressed, csv, i) + + + +def status_mode(app,platform, version, compressed,csv,i): + if compressed: + modules = get_compressed_files(app) + get_compressed_dependencies(app, platform,version) + + grouped_modules = group_modules(modules,platform, version) + grouped_modules.sort(key=lambda x: x['Size (Bytes)'], reverse=True) + + if csv: + headers = grouped_modules[0].keys() + if i == 0: + app.display(",".join(headers)) # comas alrededor + + for row in grouped_modules: + app.display(",".join(str(row[h]) for h in headers)) + else: + modules_table = {col: {} for col in grouped_modules[0].keys()} + for i,row in enumerate(grouped_modules): + for key,value in row.items(): + modules_table[key][i] = str(value) + app.display_table(platform + " " + version, modules_table) + + + +def group_modules(modules, platform, version): + grouped_aux = {} + + for file in modules: + key = (file['Name'], file['Type']) + grouped_aux[key] = grouped_aux.get(key, 0) + file["Size (Bytes)"] + + return [{'Name': name, 'Type': type, 'Size (Bytes)': size, 'Size': convert_size(size), 'Platform': platform, 'Version': version} for (name,type), size in grouped_aux.items()] + + +def get_compressed_files(app): + #print("Getting compressed integrations") + + ignored_files = {"datadog_checks_dev", "datadog_checks_tests_helper"} + git_ignore = get_gitignore_files(app) + included_folder = "datadog_checks/" + + # script_path = + #REPO_PATH = os.path.abspath(os.path.join(os.path.abspath(__file__), "../../../../../../")) + + file_data = [] + for root, _, files in os.walk(REPO_PATH): + for file in files: + file_path = os.path.join(root, file) + + # Convert the path to a relative format within the repo + relative_path = os.path.relpath(file_path, REPO_PATH) + + # Filter files + if is_valid_integration(relative_path, included_folder, ignored_files, git_ignore): + try: + # Compress the file + compressor = zlib.compressobj() + compressed_size = 0 + + # original_size = os.path.getsize(file_path) + with open(file_path, "rb") as f: + while chunk := f.read(8192): # Read in 8KB chunks + compressed_chunk = compressor.compress(chunk) + compressed_size += len(compressed_chunk) + + compressed_size += len(compressor.flush()) # Flush the buffer + integration = relative_path.split("/")[0] + file_data.append({ + "File Path": relative_path, + "Type": "Integration", + "Name": integration, + "Size (Bytes)": compressed_size + }) + + except Exception as e: + app.display_error(f"Error processing {relative_path}: {e}") + sys.exit(1) + + return file_data + + +def get_compressed_dependencies(app,platform, version): + #print("Getting compressed dependencies") + + #script_path = os.path.abspath(__file__) + #REPO_PATH = os.path.abspath(os.path.join(script_path, "../../../../../../")) + resolved_path = os.path.join(REPO_PATH, ".deps/resolved") + + if not os.path.exists(resolved_path) or not os.path.isdir(resolved_path): + app.display_error(f"Error: Directory not found {resolved_path}") + sys.exit(1) + + + for filename in os.listdir(resolved_path): + file_path = os.path.join(resolved_path, filename) + + if os.path.isfile(file_path) and is_correct_dependency(platform, version, filename): + deps, download_urls = get_dependencies(app, file_path) + return get_dependencies_sizes(app, deps, download_urls) + + + + + +def is_correct_dependency(platform, version, name): + return platform in name and version in name + +def get_dependencies_sizes(app, deps, download_urls): + file_data = [] + for dep, url in zip(deps, download_urls): + dep_response = requests.head(url) + if dep_response.status_code != 200: + app.display_error(f"Error {dep_response.status_code}: Unable to fetch the dependencies file") + sys.exit(1) + else: + size = dep_response.headers.get("Content-Length", None) + file_data.append({"File Path": dep, "Type": "Dependency", "Name": dep, "Size (Bytes)": int(size)}) + + return file_data + + +def get_dependencies(app,file_path): + download_urls = [] + deps = [] + try: + with open(file_path, "r", encoding="utf-8") as file: + file_content = file.read() + for line in file_content.splitlines(): + match = re.search(r"([\w\-\d\.]+) @ (https?://[^\s#]+)", line) + if match: + deps.append(match.group(1)) + download_urls.append(match.group(2)) + except Exception as e: + app.display_error(f"Error reading file {file_path}: {e}") + sys.exit(1) + + return deps, download_urls + +def is_valid_integration(path, included_folder, ignored_files, git_ignore): + # It is not an integration + if path.startswith('.'): + return False + # It is part of an integration and it is not in the datadog_checks folder + elif not (included_folder in path): + return False + # It is an irrelevant file + elif any(ignore in path for ignore in ignored_files): + return False + # This file is contained in .gitignore + elif any(ignore in path for ignore in git_ignore): + return False + else: + return True + + +def get_gitignore_files(app): + #script_path = os.path.abspath(__file__) + #repo_root = os.path.abspath(os.path.join(script_path, "../../../../../../")) + gitignore_path = os.path.join(REPO_PATH, ".gitignore") + if not os.path.exists(gitignore_path): + app.display_error(f"Error: .gitignore file not found at {gitignore_path}") + sys.exit(1) + + try: + with open(gitignore_path, "r", encoding="utf-8") as file: + gitignore_content = file.read() + ignored_patterns = [line.strip() for line in gitignore_content.splitlines() if line.strip() and not line.startswith("#")] + return ignored_patterns + except Exception as e: + app.display_error(f"Error reading .gitignore file: {e}") + sys.exit(1) + +def convert_size(size_bytes): + #Transforms bytes into a human-friendly format (KB, MB, GB) + for unit in [' B', ' KB', ' MB', ' GB']: + if size_bytes < 1024: + return (str(round(size_bytes, 2)) + unit) + size_bytes /= 1024 + return (str(round(size_bytes, 2)) + " TB") + + diff --git a/ddev/tests/cli/size/__init__.py b/ddev/tests/cli/size/__init__.py new file mode 100644 index 0000000000000..3eff9712cbcf5 --- /dev/null +++ b/ddev/tests/cli/size/__init__.py @@ -0,0 +1,3 @@ +# (C) Datadog, Inc. 2022-present +# All rights reserved +# Licensed under a 3-clause BSD style license (see LICENSE) diff --git a/ddev/tests/cli/size/test_status.py b/ddev/tests/cli/size/test_status.py new file mode 100644 index 0000000000000..a7625797ef763 --- /dev/null +++ b/ddev/tests/cli/size/test_status.py @@ -0,0 +1,130 @@ +# (C) Datadog, Inc. 2022-present +# All rights reserved +# Licensed under a 3-clause BSD style license (see LICENSE) + +from unittest.mock import patch, mock_open, MagicMock +import os +from ddev.cli.size.status import ( + get_compressed_dependencies, + get_gitignore_files, + convert_size, + is_valid_integration, + is_correct_dependency, + get_dependencies, + get_dependencies_sizes, + group_modules +) +from ddev.cli.application import Application + + +def test_is_correct_dependency(): + assert is_correct_dependency("windows-x86_64", "3.12", "windows-x86_64-3.12") + assert not is_correct_dependency("windows-x86_64", "3.12", "linux-x86_64-3.12") + assert not is_correct_dependency("windows-x86_64", "3.13", "windows-x86_64-3.12") + + +def test_convert_size(): + assert convert_size(500) == "500 B" + assert convert_size(1024) == "1.0 KB" + assert convert_size(1048576) == "1.0 MB" + assert convert_size(1073741824) == "1.0 GB" + +def test_is_valid_integration(): + included_folder = "datadog_checks/" + ignored_files = {"datadog_checks_dev", "datadog_checks_tests_helper"} + git_ignore = [".git", "__pycache__"] + + assert is_valid_integration("datadog_checks/example.py", included_folder, ignored_files, git_ignore) + assert not is_valid_integration("__pycache__/file.py", included_folder, ignored_files, git_ignore) + assert not is_valid_integration("datadog_checks_dev/example.py", included_folder, ignored_files, git_ignore) + assert not is_valid_integration(".git/config", included_folder, ignored_files, git_ignore) + +def test_get_dependencies(terminal): + file_content = "dependency1 @ https://example.com/dependency1.whl\ndependency2 @ https://example.com/dependency2.whl" + mock_open_obj = mock_open(read_data=file_content) + with patch("builtins.open", mock_open_obj): + deps, urls = get_dependencies(terminal, "fake_path") + assert deps == ["dependency1", "dependency2"] + assert urls == ["https://example.com/dependency1.whl", "https://example.com/dependency2.whl"] + +def test_get_gitignore_files(terminal): + mock_gitignore = "__pycache__/\n*.log\n" # Sample .gitignore file + with patch("builtins.open", mock_open(read_data=mock_gitignore)): + with patch("os.path.exists", return_value=True): + ignored_patterns = get_gitignore_files(terminal) + assert ignored_patterns == ["__pycache__/", "*.log"] + +def test_get_dependencies_sizes(terminal): + mock_response = MagicMock() + mock_response.status_code = 200 + mock_response.headers = {"Content-Length": "12345"} + with patch("requests.head", return_value=mock_response): + file_data = get_dependencies_sizes(terminal, ["dependency1"], ["https://example.com/dependency1.whl"]) + assert file_data == [{"File Path": "dependency1", "Type": "Dependency", "Name": "dependency1", "Size (Bytes)": 12345}] + +def test_get_compressed_dependencies(terminal): + platform = "windows-x86_64" + version = "3.12" + + fake_file_content = "dependency1 @ https://example.com/dependency1.whl\ndependency2 @ https://example.com/dependency2.whl" + + mock_response = MagicMock() + mock_response.status_code = 200 + mock_response.headers = {"Content-Length": "12345"} + + with patch("os.path.exists", return_value=True), \ + patch("os.path.isdir", return_value=True), \ + patch("os.listdir", return_value=[f"{platform}-{version}"]), \ + patch("os.path.isfile", return_value=True), \ + patch("builtins.open", mock_open(read_data=fake_file_content)), \ + patch("requests.head", return_value=mock_response): + + file_data = get_compressed_dependencies(terminal, platform, version) + + assert file_data == [ + {"File Path": "dependency1", "Type": "Dependency", "Name": "dependency1", "Size (Bytes)": 12345}, + {"File Path": "dependency2", "Type": "Dependency", "Name": "dependency2", "Size (Bytes)": 12345}, + ] +def test_group_modules(): + modules = [ + {"Name": "module1", "Type": "A", "Size (Bytes)": 1500}, + {"Name": "module2", "Type": "B", "Size (Bytes)": 3000}, + {"Name": "module1", "Type": "A", "Size (Bytes)": 2500}, + {"Name": "module3", "Type": "A", "Size (Bytes)": 4000}, + ] + + platform = "linux-aarch64" + version = "3.12" + + expected_output = [ + {"Name": "module1", "Type": "A", "Size (Bytes)": 4000, "Size": "3.91 KB", "Platform": "linux-aarch64", "Version": "3.12"}, + {"Name": "module2", "Type": "B", "Size (Bytes)": 3000, "Size": "2.93 KB", "Platform": "linux-aarch64", "Version": "3.12"}, + {"Name": "module3", "Type": "A", "Size (Bytes)": 4000, "Size": "3.91 KB", "Platform": "linux-aarch64", "Version": "3.12"}, + ] + + assert group_modules(modules, platform, version) == expected_output + +def test_statu_no_args(ddev): + result = ddev('size', 'status', '--compressed') + assert result.exit_code == 0 + +def test_status(ddev): + result = ddev('size', 'status', '--platform', 'linux-aarch64', '--python', '3.12', '--compressed') + assert result.exit_code == 0 + +def test_status_csv(ddev): + result = ddev('size', 'status', '--platform', 'linux-aarch64', '--python', '3.12', '--compressed', '--csv') + assert result.exit_code == 0 + +def test_status_fail(ddev): + result = ddev('size', 'status', '--platform', 'linux', '--python', '3.12', '--compressed') + assert result.exit_code != 0 + +def test_status_fail2(ddev): + result = ddev('size', 'status', '--platform', 'linux-aarch64', '--python', '2.10', '--compressed') + assert result.exit_code != 0 + +def test_status_fail2(ddev): + result = ddev('size', 'status', '--platform', 'linux', '--python' ,'2.10', '--compressed') + assert result.exit_code != 0 + From b4d0f5fb4c015f6970d80e66d462e8b19c20fa55 Mon Sep 17 00:00:00 2001 From: Lucia Sanchez Bella Date: Thu, 20 Mar 2025 12:54:36 +0100 Subject: [PATCH 03/70] clean commented code --- ddev/src/ddev/cli/size/status.py | 14 ++------------ 1 file changed, 2 insertions(+), 12 deletions(-) diff --git a/ddev/src/ddev/cli/size/status.py b/ddev/src/ddev/cli/size/status.py index 147e1dd34f216..97f52300ae562 100644 --- a/ddev/src/ddev/cli/size/status.py +++ b/ddev/src/ddev/cli/size/status.py @@ -68,15 +68,11 @@ def group_modules(modules, platform, version): def get_compressed_files(app): - #print("Getting compressed integrations") ignored_files = {"datadog_checks_dev", "datadog_checks_tests_helper"} git_ignore = get_gitignore_files(app) included_folder = "datadog_checks/" - # script_path = - #REPO_PATH = os.path.abspath(os.path.join(os.path.abspath(__file__), "../../../../../../")) - file_data = [] for root, _, files in os.walk(REPO_PATH): for file in files: @@ -88,7 +84,6 @@ def get_compressed_files(app): # Filter files if is_valid_integration(relative_path, included_folder, ignored_files, git_ignore): try: - # Compress the file compressor = zlib.compressobj() compressed_size = 0 @@ -115,10 +110,7 @@ def get_compressed_files(app): def get_compressed_dependencies(app,platform, version): - #print("Getting compressed dependencies") - - #script_path = os.path.abspath(__file__) - #REPO_PATH = os.path.abspath(os.path.join(script_path, "../../../../../../")) + resolved_path = os.path.join(REPO_PATH, ".deps/resolved") if not os.path.exists(resolved_path) or not os.path.isdir(resolved_path): @@ -188,9 +180,7 @@ def is_valid_integration(path, included_folder, ignored_files, git_ignore): return True -def get_gitignore_files(app): - #script_path = os.path.abspath(__file__) - #repo_root = os.path.abspath(os.path.join(script_path, "../../../../../../")) +def get_gitignore_files(app): gitignore_path = os.path.join(REPO_PATH, ".gitignore") if not os.path.exists(gitignore_path): app.display_error(f"Error: .gitignore file not found at {gitignore_path}") From 79b0fa80f22f7031df9adc1a52ecf013220e462b Mon Sep 17 00:00:00 2001 From: Lucia Sanchez Bella Date: Tue, 25 Mar 2025 12:47:43 +0100 Subject: [PATCH 04/70] add diff mode --- ddev/src/ddev/cli/__init__.py | 2 +- ddev/src/ddev/cli/size/GitRepo.py | 27 +++++ ddev/src/ddev/cli/size/__init__.py | 5 +- ddev/src/ddev/cli/size/common.py | 144 ++++++++++++++++++++++++++ ddev/src/ddev/cli/size/diff.py | 151 +++++++++++++++++++++++++++ ddev/src/ddev/cli/size/status.py | 159 +++++----------------------- ddev/tests/cli/size/test_common.py | 141 +++++++++++++++++++++++++ ddev/tests/cli/size/test_diff.py | 112 ++++++++++++++++++++ ddev/tests/cli/size/test_status.py | 160 +++++++++++++---------------- 9 files changed, 676 insertions(+), 225 deletions(-) create mode 100644 ddev/src/ddev/cli/size/GitRepo.py create mode 100644 ddev/src/ddev/cli/size/common.py create mode 100644 ddev/src/ddev/cli/size/diff.py create mode 100644 ddev/tests/cli/size/test_common.py create mode 100644 ddev/tests/cli/size/test_diff.py diff --git a/ddev/src/ddev/cli/__init__.py b/ddev/src/ddev/cli/__init__.py index a5924607a880c..e16dc80db4146 100644 --- a/ddev/src/ddev/cli/__init__.py +++ b/ddev/src/ddev/cli/__init__.py @@ -18,6 +18,7 @@ from ddev.cli.env import env from ddev.cli.meta import meta from ddev.cli.release import release +from ddev.cli.size import size from ddev.cli.status import status from ddev.cli.test import test from ddev.cli.validate import validate @@ -25,7 +26,6 @@ from ddev.plugin import specs from ddev.utils.ci import running_in_ci from ddev.utils.fs import Path -from ddev.cli.size import size @click.group(context_settings={'help_option_names': ['-h', '--help']}, invoke_without_command=True) diff --git a/ddev/src/ddev/cli/size/GitRepo.py b/ddev/src/ddev/cli/size/GitRepo.py new file mode 100644 index 0000000000000..04e736b1058b5 --- /dev/null +++ b/ddev/src/ddev/cli/size/GitRepo.py @@ -0,0 +1,27 @@ +import os +import shutil +import tempfile +import subprocess + +class GitRepo: + def __init__(self, url): + self.url = url + self.repo_dir = None + + def __enter__(self): + self.repo_dir = tempfile.mkdtemp() + self._run("git init") + self._run(f"git remote add origin {self.url}") + return self + + def _run(self, cmd): + subprocess.run(cmd, shell=True, cwd=self.repo_dir, check=True) + + def checkout_commit(self, commit): + self._run(f"git fetch --depth 1 origin {commit}") + self._run(f"git checkout {commit}") + + + def __exit__(self, exception_type, exception_value, exception_traceback): + if self.repo_dir and os.path.exists(self.repo_dir): + shutil.rmtree(self.repo_dir) \ No newline at end of file diff --git a/ddev/src/ddev/cli/size/__init__.py b/ddev/src/ddev/cli/size/__init__.py index 25863ae1dbc5e..1cb5140b75d23 100644 --- a/ddev/src/ddev/cli/size/__init__.py +++ b/ddev/src/ddev/cli/size/__init__.py @@ -5,7 +5,7 @@ import click from ddev.cli.size.status import status - +from ddev.cli.size.diff import diff @click.group(short_help='Get the size of integrations and dependencies by platform and python version') def size(): @@ -14,6 +14,7 @@ def size(): size.add_command(status) +size.add_command(diff) if __name__ == "__main__": - size() \ No newline at end of file + size() diff --git a/ddev/src/ddev/cli/size/common.py b/ddev/src/ddev/cli/size/common.py new file mode 100644 index 0000000000000..922b5ca748a4f --- /dev/null +++ b/ddev/src/ddev/cli/size/common.py @@ -0,0 +1,144 @@ +# (C) Datadog, Inc. 2022-present +# All rights reserved +# Licensed under a 3-clause BSD style license (see LICENSE) +import sys +import re +import requests +import os +import zlib + +#utilities + +#mirar si existe +def convert_size(size_bytes): + # Transforms bytes into a human-friendly format (KB, MB, GB) + for unit in [' B', ' KB', ' MB', ' GB']: + if size_bytes < 1024: + return str(round(size_bytes, 2)) + unit + size_bytes /= 1024 + return str(round(size_bytes, 2)) + " TB" + + +def is_valid_integration(path, included_folder, ignored_files, git_ignore): + # It is not an integration + if path.startswith('.'): + return False + # It is part of an integration and it is not in the datadog_checks folder + elif included_folder not in path: + return False + # It is an irrelevant file + elif any(ignore in path for ignore in ignored_files): + return False + # This file is contained in .gitignore + elif any(ignore in path for ignore in git_ignore): + return False + else: + return True + + +def is_correct_dependency(platform, version, name): + return platform in name and version in name + +def print_csv(app, i, modules): + headers = modules[0].keys() + if i == 0: + app.display(",".join(headers)) + + for row in modules: + app.display(",".join(format(str(row[h])) for h in headers)) + +def format(s): + if "," in s: + return '"' + s + '"' + else: + return s + +def print_table(app, modules, platform, version): + modules_table = {col: {} for col in modules[0].keys()} + for i, row in enumerate(modules): + for key, value in row.items(): + modules_table[key][i] = str(value) + app.display_table(platform + " " + version, modules_table) + +def get_dependencies_sizes(app, deps, download_urls): + file_data = [] + for dep, url in zip(deps, download_urls, strict=False): + dep_response = requests.head(url) + if dep_response.status_code != 200: + app.display_error(f"Error {dep_response.status_code}: Unable to fetch the dependencies file") + sys.exit(1) + else: + size = dep_response.headers.get("Content-Length", None) + file_data.append({"File Path": dep, "Type": "Dependency", "Name": dep, "Size (Bytes)": int(size)}) + + return file_data + +def get_dependencies(app, file_path): + download_urls = [] + deps = [] + try: + with open(file_path, "r", encoding="utf-8") as file: + file_content = file.read() + for line in file_content.splitlines(): + match = re.search(r"([\w\-\d\.]+) @ (https?://[^\s#]+)", line) + if match: + deps.append(match.group(1)) + download_urls.append(match.group(2)) + except Exception as e: + app.display_error(f"Error reading file {file_path}: {e}") + sys.exit(1) + + return deps, download_urls + + +def group_modules(modules, platform, version): + grouped_aux = {} + + for file in modules: + key = (file['Name'], file['Type']) + grouped_aux[key] = grouped_aux.get(key, 0) + file["Size (Bytes)"] + + return [ + { + 'Name': name , + 'Type': type , + 'Size (Bytes)': size , + 'Size': convert_size(size), + 'Platform': platform , + 'Version': version , + } + for (name, type), size in grouped_aux.items() + ] + +def get_gitignore_files(app, repo_path): + gitignore_path = os.path.join(repo_path, ".gitignore") + if not os.path.exists(gitignore_path): + app.display_error(f"Error: .gitignore file not found at {gitignore_path}") + sys.exit(1) + + try: + with open(gitignore_path, "r", encoding="utf-8") as file: + gitignore_content = file.read() + ignored_patterns = [ + line.strip() for line in gitignore_content.splitlines() if line.strip() and not line.startswith("#") + ] + return ignored_patterns + except Exception as e: + app.display_error(f"Error reading .gitignore file: {e}") + sys.exit(1) + +def compress(app, file_path, relative_path): + compressor = zlib.compressobj() + compressed_size = 0 + try: + # original_size = os.path.getsize(file_path) + with open(file_path, "rb") as f: + while chunk := f.read(8192): # Read in 8KB chunks + compressed_chunk = compressor.compress(chunk) + compressed_size += len(compressed_chunk) + + compressed_size += len(compressor.flush()) + return compressed_size + except Exception as e: + app.display_error(f"Error processing {relative_path}: {e}") + sys.exit(1) \ No newline at end of file diff --git a/ddev/src/ddev/cli/size/diff.py b/ddev/src/ddev/cli/size/diff.py new file mode 100644 index 0000000000000..0739dbe4dd482 --- /dev/null +++ b/ddev/src/ddev/cli/size/diff.py @@ -0,0 +1,151 @@ +# (C) Datadog, Inc. 2022-present +# All rights reserved +# Licensed under a 3-clause BSD style license (see LICENSE) + +import os +import sys +from pathlib import Path +import click +import requests +from .common import is_correct_dependency, is_valid_integration, group_modules, print_csv, print_table, get_gitignore_files, get_dependencies, compress +from .GitRepo import GitRepo + +VALID_PLATFORMS = ["linux-aarch64", "linux-x86_64", "macos-x86_64", "windows-x86_64"] +VALID_PYTHON_VERSIONS = ["3.12"] + + +@click.command() +@click.argument("before") +@click.argument("after") +@click.option('--platform', type=click.Choice(VALID_PLATFORMS), help="Target platform") +@click.option('--python', 'version', type=click.Choice(VALID_PYTHON_VERSIONS), help="Python version (MAJOR.MINOR)") +@click.option('--compressed', is_flag=True, help="Measure compressed size") +@click.option('--csv', is_flag=True, help="Output in CSV format") +@click.pass_obj +def diff(app, before, after, platform, version, compressed, csv): + platforms = VALID_PLATFORMS if platform is None else [platform] + versions = VALID_PYTHON_VERSIONS if version is None else [version] + + for i, (plat, ver) in enumerate([(p, v) for p in platforms for v in versions]): + diff_mode(app, before, after, plat, ver, compressed, csv, i) + + +def diff_mode(app, before, after, platform, version, compressed, csv, i): + if compressed: + with GitRepo("https://github.com/DataDog/integrations-core.git") as gitRepo: + repo = gitRepo.repo_dir + gitRepo.checkout_commit(before) + files_b = get_compressed_files(app, repo) + dependencies_b = get_compressed_dependencies(app, repo, platform, version) + gitRepo.checkout_commit(after) + files_a = get_compressed_files(app, repo) + dependencies_a = get_compressed_dependencies(app, repo, platform, version) + + integrations = get_diff(files_b, files_a, 'Integration') + dependencies = get_diff(dependencies_b, dependencies_a, 'Dependency') + + grouped_modules = group_modules(integrations + dependencies, platform, version) + grouped_modules.sort(key=lambda x: x['Size (Bytes)'], reverse=True) + for module in grouped_modules: + if module['Size (Bytes)'] > 0: + module['Size'] = f"+{module['Size']}" + + if csv: + print_csv(app, i, grouped_modules) + else: + print_table(app, grouped_modules, platform, version) + + + + +def get_diff(size_before, size_after, type): + all_paths = set(size_before.keys()) | set(size_after.keys()) + diff_files = [] + + for path in all_paths: + size_b = size_before.get(path, 0) + size_a = size_after.get(path, 0) + size_delta = size_a - size_b + module = Path(path).parts[0] + if size_delta != 0: + if size_b == 0: + diff_files.append( + { + 'File Path': path, + 'Type': type, + 'Name': module + " (NEW)", + 'Size (Bytes)': size_delta, + } + ) + elif size_a == 0: + diff_files.append( + { + 'File Path': path, + 'Type': type, + 'Name': module + " (DELETED)", + 'Size (Bytes)': size_delta, + } + ) + else: + diff_files.append( + { + 'File Path': path, + 'Type': type, + 'Name': module, + 'Size (Bytes)': size_delta, + } + ) + + + return diff_files + +def get_compressed_files(app, repo_path): + + ignored_files = {"datadog_checks_dev", "datadog_checks_tests_helper"} + git_ignore = get_gitignore_files(app, repo_path) + included_folder = "datadog_checks/" + + file_data = {} + for root, _, files in os.walk(repo_path): + for file in files: + file_path = os.path.join(root, file) + + # Convert the path to a relative format within the repo + relative_path = os.path.relpath(file_path, repo_path) + + # Filter files + if is_valid_integration(relative_path, included_folder, ignored_files, git_ignore): + compressed_size = compress(app, file_path, relative_path) + file_data[relative_path] = compressed_size + return file_data + +def get_compressed_dependencies(app, repo_path, platform, version): + + resolved_path = os.path.join(repo_path, ".deps/resolved") + + if not os.path.exists(resolved_path) or not os.path.isdir(resolved_path): + app.display_error(f"Error: Directory not found {resolved_path}") + sys.exit(1) + + for filename in os.listdir(resolved_path): + file_path = os.path.join(resolved_path, filename) + + if os.path.isfile(file_path) and is_correct_dependency(platform, version, filename): + deps, download_urls = get_dependencies(app, file_path) + return get_dependencies_sizes(app, deps, download_urls) + return {} + + +def get_dependencies_sizes(app, deps, download_urls): + file_data = {} + for dep, url in zip(deps, download_urls, strict=False): + dep_response = requests.head(url) + if dep_response.status_code != 200: + app.display_error(f"Error {dep_response.status_code}: Unable to fetch the dependencies file") + sys.exit(1) + else: + size = dep_response.headers.get("Content-Length", None) + file_data[dep] = int(size) + + return file_data + diff --git a/ddev/src/ddev/cli/size/status.py b/ddev/src/ddev/cli/size/status.py index 97f52300ae562..f52f41015a109 100644 --- a/ddev/src/ddev/cli/size/status.py +++ b/ddev/src/ddev/cli/size/status.py @@ -2,23 +2,21 @@ # All rights reserved # Licensed under a 3-clause BSD style license (see LICENSE) -import click -import requests -import re import os +import re +import sys import zlib -import io from pathlib import Path -import sys -import csv as csv_lib +import click + +from .common import convert_size, get_dependencies_sizes, is_correct_dependency, is_valid_integration, group_modules, print_csv, print_table, get_gitignore_files, get_dependencies, compress VALID_PLATFORMS = ["linux-aarch64", "linux-x86_64", "macos-x86_64", "windows-x86_64"] VALID_PYTHON_VERSIONS = ["3.12"] REPO_PATH = Path(__file__).resolve().parents[5] - @click.command() @click.option('--platform', type=click.Choice(VALID_PLATFORMS), help="Target platform") @click.option('--python', 'version', type=click.Choice(VALID_PYTHON_VERSIONS), help="Python version (MAJOR.MINOR)") @@ -33,44 +31,25 @@ def status(app, platform, version, compressed, csv): status_mode(app, plat, ver, compressed, csv, i) - -def status_mode(app,platform, version, compressed,csv,i): +def status_mode(app, platform, version, compressed, csv, i): if compressed: - modules = get_compressed_files(app) + get_compressed_dependencies(app, platform,version) - - grouped_modules = group_modules(modules,platform, version) + modules = get_compressed_files(app) + get_compressed_dependencies(app, platform, version) + + grouped_modules = group_modules(modules, platform, version) grouped_modules.sort(key=lambda x: x['Size (Bytes)'], reverse=True) - - if csv: - headers = grouped_modules[0].keys() - if i == 0: - app.display(",".join(headers)) # comas alrededor - for row in grouped_modules: - app.display(",".join(str(row[h]) for h in headers)) + if csv: + print_csv(app, i, grouped_modules) else: - modules_table = {col: {} for col in grouped_modules[0].keys()} - for i,row in enumerate(grouped_modules): - for key,value in row.items(): - modules_table[key][i] = str(value) - app.display_table(platform + " " + version, modules_table) + print_table(app, grouped_modules, platform, version) - -def group_modules(modules, platform, version): - grouped_aux = {} - - for file in modules: - key = (file['Name'], file['Type']) - grouped_aux[key] = grouped_aux.get(key, 0) + file["Size (Bytes)"] - - return [{'Name': name, 'Type': type, 'Size (Bytes)': size, 'Size': convert_size(size), 'Platform': platform, 'Version': version} for (name,type), size in grouped_aux.items()] def get_compressed_files(app): ignored_files = {"datadog_checks_dev", "datadog_checks_tests_helper"} - git_ignore = get_gitignore_files(app) + git_ignore = get_gitignore_files(app, REPO_PATH) included_folder = "datadog_checks/" file_data = [] @@ -81,126 +60,38 @@ def get_compressed_files(app): # Convert the path to a relative format within the repo relative_path = os.path.relpath(file_path, REPO_PATH) - # Filter files + # Filter files if is_valid_integration(relative_path, included_folder, ignored_files, git_ignore): - try: - compressor = zlib.compressobj() - compressed_size = 0 - - # original_size = os.path.getsize(file_path) - with open(file_path, "rb") as f: - while chunk := f.read(8192): # Read in 8KB chunks - compressed_chunk = compressor.compress(chunk) - compressed_size += len(compressed_chunk) - - compressed_size += len(compressor.flush()) # Flush the buffer - integration = relative_path.split("/")[0] - file_data.append({ + compressed_size = compress(app, file_path, relative_path) + integration = relative_path.split(os.sep)[0] + file_data.append( + { "File Path": relative_path, "Type": "Integration", "Name": integration, - "Size (Bytes)": compressed_size - }) + "Size (Bytes)": compressed_size, + } + ) + return file_data - except Exception as e: - app.display_error(f"Error processing {relative_path}: {e}") - sys.exit(1) - return file_data - -def get_compressed_dependencies(app,platform, version): - +def get_compressed_dependencies(app, platform, version): + resolved_path = os.path.join(REPO_PATH, ".deps/resolved") if not os.path.exists(resolved_path) or not os.path.isdir(resolved_path): app.display_error(f"Error: Directory not found {resolved_path}") sys.exit(1) - for filename in os.listdir(resolved_path): file_path = os.path.join(resolved_path, filename) - + if os.path.isfile(file_path) and is_correct_dependency(platform, version, filename): deps, download_urls = get_dependencies(app, file_path) return get_dependencies_sizes(app, deps, download_urls) - - - -def is_correct_dependency(platform, version, name): - return platform in name and version in name - -def get_dependencies_sizes(app, deps, download_urls): - file_data = [] - for dep, url in zip(deps, download_urls): - dep_response = requests.head(url) - if dep_response.status_code != 200: - app.display_error(f"Error {dep_response.status_code}: Unable to fetch the dependencies file") - sys.exit(1) - else: - size = dep_response.headers.get("Content-Length", None) - file_data.append({"File Path": dep, "Type": "Dependency", "Name": dep, "Size (Bytes)": int(size)}) - - return file_data - - -def get_dependencies(app,file_path): - download_urls = [] - deps = [] - try: - with open(file_path, "r", encoding="utf-8") as file: - file_content = file.read() - for line in file_content.splitlines(): - match = re.search(r"([\w\-\d\.]+) @ (https?://[^\s#]+)", line) - if match: - deps.append(match.group(1)) - download_urls.append(match.group(2)) - except Exception as e: - app.display_error(f"Error reading file {file_path}: {e}") - sys.exit(1) - - return deps, download_urls - -def is_valid_integration(path, included_folder, ignored_files, git_ignore): - # It is not an integration - if path.startswith('.'): - return False - # It is part of an integration and it is not in the datadog_checks folder - elif not (included_folder in path): - return False - # It is an irrelevant file - elif any(ignore in path for ignore in ignored_files): - return False - # This file is contained in .gitignore - elif any(ignore in path for ignore in git_ignore): - return False - else: - return True - - -def get_gitignore_files(app): - gitignore_path = os.path.join(REPO_PATH, ".gitignore") - if not os.path.exists(gitignore_path): - app.display_error(f"Error: .gitignore file not found at {gitignore_path}") - sys.exit(1) - - try: - with open(gitignore_path, "r", encoding="utf-8") as file: - gitignore_content = file.read() - ignored_patterns = [line.strip() for line in gitignore_content.splitlines() if line.strip() and not line.startswith("#")] - return ignored_patterns - except Exception as e: - app.display_error(f"Error reading .gitignore file: {e}") - sys.exit(1) -def convert_size(size_bytes): - #Transforms bytes into a human-friendly format (KB, MB, GB) - for unit in [' B', ' KB', ' MB', ' GB']: - if size_bytes < 1024: - return (str(round(size_bytes, 2)) + unit) - size_bytes /= 1024 - return (str(round(size_bytes, 2)) + " TB") diff --git a/ddev/tests/cli/size/test_common.py b/ddev/tests/cli/size/test_common.py new file mode 100644 index 0000000000000..e7dbd96d0672d --- /dev/null +++ b/ddev/tests/cli/size/test_common.py @@ -0,0 +1,141 @@ +import os +from unittest.mock import MagicMock, mock_open, patch + +from ddev.cli.application import Application +from ddev.cli.size.status import ( + convert_size, + print_csv, + compress, + get_dependencies, + get_dependencies_sizes, + get_gitignore_files, + group_modules, + is_correct_dependency, + is_valid_integration, +) + + +def test_is_correct_dependency(): + assert is_correct_dependency("windows-x86_64", "3.12", "windows-x86_64-3.12") + assert not is_correct_dependency("windows-x86_64", "3.12", "linux-x86_64-3.12") + assert not is_correct_dependency("windows-x86_64", "3.13", "windows-x86_64-3.12") + + +def test_convert_size(): + assert convert_size(500) == "500 B" + assert convert_size(1024) == "1.0 KB" + assert convert_size(1048576) == "1.0 MB" + assert convert_size(1073741824) == "1.0 GB" + + +def test_is_valid_integration(): + included_folder = "datadog_checks/" + ignored_files = {"datadog_checks_dev", "datadog_checks_tests_helper"} + git_ignore = [".git", "__pycache__"] + + assert is_valid_integration("datadog_checks/example.py", included_folder, ignored_files, git_ignore) + assert not is_valid_integration("__pycache__/file.py", included_folder, ignored_files, git_ignore) + assert not is_valid_integration("datadog_checks_dev/example.py", included_folder, ignored_files, git_ignore) + assert not is_valid_integration(".git/config", included_folder, ignored_files, git_ignore) + + +def test_get_dependencies(terminal): + file_content = ( + "dependency1 @ https://example.com/dependency1.whl\ndependency2 @ https://example.com/dependency2.whl" + ) + mock_open_obj = mock_open(read_data=file_content) + with patch("builtins.open", mock_open_obj): + deps, urls = get_dependencies(terminal, "fake_path") + assert deps == ["dependency1", "dependency2"] + assert urls == ["https://example.com/dependency1.whl", "https://example.com/dependency2.whl"] + + +def test_get_dependencies_sizes(terminal): + mock_response = MagicMock() + mock_response.status_code = 200 + mock_response.headers = {"Content-Length": "12345"} + with patch("requests.head", return_value=mock_response): + file_data = get_dependencies_sizes(terminal, ["dependency1"], ["https://example.com/dependency1.whl"]) + assert file_data == [ + {"File Path": "dependency1", "Type": "Dependency", "Name": "dependency1", "Size (Bytes)": 12345} + ] + + +def test_group_modules(): + modules = [ + {"Name": "module1", "Type": "A", "Size (Bytes)": 1500}, + {"Name": "module2", "Type": "B", "Size (Bytes)": 3000}, + {"Name": "module1", "Type": "A", "Size (Bytes)": 2500}, + {"Name": "module3", "Type": "A", "Size (Bytes)": 4000}, + ] + + platform = "linux-aarch64" + version = "3.12" + + expected_output = [ + { + "Name": "module1", + "Type": "A", + "Size (Bytes)": 4000, + "Size": "3.91 KB", + "Platform": "linux-aarch64", + "Version": "3.12", + }, + { + "Name": "module2", + "Type": "B", + "Size (Bytes)": 3000, + "Size": "2.93 KB", + "Platform": "linux-aarch64", + "Version": "3.12", + }, + { + "Name": "module3", + "Type": "A", + "Size (Bytes)": 4000, + "Size": "3.91 KB", + "Platform": "linux-aarch64", + "Version": "3.12", + }, + ] + + assert group_modules(modules, platform, version) == expected_output + +def test_get_gitignore_files(terminal): + mock_gitignore = "__pycache__/\n*.log\n" # Sample .gitignore file + repo_path = "/fake/repo" + with patch("builtins.open", mock_open(read_data=mock_gitignore)): + with patch("os.path.exists", return_value=True): + ignored_patterns = get_gitignore_files(terminal, repo_path) + assert ignored_patterns == ["__pycache__/", "*.log"] + +def test_compress(): + mock_app = MagicMock() + fake_content = b'a' * 16384 + original_size = len(fake_content) + + m = mock_open(read_data=fake_content) + with patch("builtins.open", m): + compressed_size = compress(mock_app, "fake/path/file.py", "relative/path/file.py") + + assert isinstance(compressed_size, int) + assert compressed_size > 0 + assert compressed_size < original_size + +def test_print_csv(): + mock_app = MagicMock() + modules = [ + {"Name": "module1", "Size": 123}, + {"Name": "module,with,comma", "Size": 456}, + ] + + print_csv(mock_app, i=0, modules=modules) + + expected_calls = [ + (("Name,Size",),), + (('module1,123',),), + (('"module,with,comma",456',),), + ] + + actual_calls = mock_app.display.call_args_list + assert actual_calls == expected_calls diff --git a/ddev/tests/cli/size/test_diff.py b/ddev/tests/cli/size/test_diff.py new file mode 100644 index 0000000000000..288067aa37bfc --- /dev/null +++ b/ddev/tests/cli/size/test_diff.py @@ -0,0 +1,112 @@ +# (C) Datadog, Inc. 2022-present +# All rights reserved +# Licensed under a 3-clause BSD style license (see LICENSE) + +from unittest.mock import MagicMock, mock_open, patch + +from ddev.cli.size.diff import ( + get_compressed_files, + get_compressed_dependencies, + get_diff + +) + +def test_get_compressed_files(): + mock_app = MagicMock() + mock_repo_path = "root" + + mock_files = [ + ("root/integration/datadog_checks", [], ["file1.py", "file2.py"]), + ("root/integration_b/datadog_checks", [], ["file3.py"]), + ("root", [], ["ignored.py"]), + ] + + def fake_compress(app, file_path, relative_path): + return 1000 + + fake_gitignore = {"ignored.py"} + + with patch("os.walk", return_value=mock_files), \ + patch("os.path.relpath", side_effect=lambda path, _: path.replace("root/", "")), \ + patch("os.path.exists", return_value=True), \ + patch("builtins.open", mock_open(read_data="__pycache__/\n*.log\n")),\ + patch("ddev.cli.size.diff.get_gitignore_files", return_value=fake_gitignore), \ + patch("ddev.cli.size.diff.is_valid_integration", side_effect=lambda path, folder, ignored, git_ignore: path.startswith("integration")), \ + patch("ddev.cli.size.diff.compress", side_effect=fake_compress): + + result = get_compressed_files(mock_app, mock_repo_path) + + expected = { + "integration/datadog_checks/file1.py": 1000, + "integration/datadog_checks/file2.py": 1000, + "integration_b/datadog_checks/file3.py": 1000 + } + + + assert result == expected + +def test_get_compressed_dependencies(terminal): + platform = "windows-x86_64" + version = "3.12" + + fake_file_content = ( + "dependency1 @ https://example.com/dependency1.whl\ndependency2 @ https://example.com/dependency2.whl" + ) + + mock_response = MagicMock() + mock_response.status_code = 200 + mock_response.headers = {"Content-Length": "12345"} + mock_repo_path = "root" + + with ( + patch("os.path.exists", return_value=True), + patch("os.path.isdir", return_value=True), + patch("os.listdir", return_value=[f"{platform}-{version}"]), + patch("os.path.isfile", return_value=True), + patch("builtins.open", mock_open(read_data=fake_file_content)), + patch("requests.head", return_value=mock_response), + ): + + file_data = get_compressed_dependencies(terminal,mock_repo_path, platform, version) + + assert file_data == { + "dependency1": 12345, + "dependency2": 12345, + } + + def test_get_diff(): + size_before = { + "integration/foo.py": 1000, + "integration/bar.py": 2000, + "integration/deleted.py": 1500, + } + size_after = { + "integration/foo.py": 1200, # modified + "integration/bar.py": 2000, # unchanged + "integration/new.py": 800, # new + } + + expected = [ + { + "File Path": "integration/foo.py", + "Type": "Integration", + "Name": "integration", + "Size (Bytes)": 200, + }, + { + "File Path": "integration/deleted.py", + "Type": "Integration", + "Name": "integration (DELETED)", + "Size (Bytes)": -1500, + }, + { + "File Path": "integration/new.py", + "Type": "Integration", + "Name": "integration (NEW)", + "Size (Bytes)": 800, + } + ] + + result = get_diff(size_before, size_after, "Integration") + assert sorted(result, key=lambda x: x["File Path"]) == sorted(expected, key=lambda x: x["File Path"]) + diff --git a/ddev/tests/cli/size/test_status.py b/ddev/tests/cli/size/test_status.py index a7625797ef763..1792802b44bca 100644 --- a/ddev/tests/cli/size/test_status.py +++ b/ddev/tests/cli/size/test_status.py @@ -2,129 +2,113 @@ # All rights reserved # Licensed under a 3-clause BSD style license (see LICENSE) -from unittest.mock import patch, mock_open, MagicMock -import os +from unittest.mock import MagicMock, mock_open, patch + from ddev.cli.size.status import ( + get_compressed_files, get_compressed_dependencies, - get_gitignore_files, - convert_size, - is_valid_integration, - is_correct_dependency, - get_dependencies, - get_dependencies_sizes, - group_modules + ) -from ddev.cli.application import Application +def test_get_compressed_files(): + mock_app = MagicMock() -def test_is_correct_dependency(): - assert is_correct_dependency("windows-x86_64", "3.12", "windows-x86_64-3.12") - assert not is_correct_dependency("windows-x86_64", "3.12", "linux-x86_64-3.12") - assert not is_correct_dependency("windows-x86_64", "3.13", "windows-x86_64-3.12") + mock_files = [ + ("root/integration/datadog_checks", [], ["file1.py", "file2.py"]), + ("root/integration_b/datadog_checks", [], ["file3.py"]), + ("root", [], ["ignored.py"]), + ] + + def fake_compress(app, file_path, relative_path): + return 1000 + + fake_gitignore = {"ignored.py"} + + with patch("os.walk", return_value=mock_files), \ + patch("os.path.relpath", side_effect=lambda path, _: path.replace("root/", "")), \ + patch("ddev.cli.size.status.get_gitignore_files", return_value=fake_gitignore), \ + patch("ddev.cli.size.status.is_valid_integration", side_effect=lambda path, folder, ignored, git_ignore: path.startswith("integration")), \ + patch("ddev.cli.size.status.compress", side_effect=fake_compress): + + result = get_compressed_files(mock_app) + + expected = [ + { + "File Path": "integration/datadog_checks/file1.py", + "Type": "Integration", + "Name": "integration", + "Size (Bytes)": 1000, + }, + { + "File Path": "integration/datadog_checks/file2.py", + "Type": "Integration", + "Name": "integration", + "Size (Bytes)": 1000, + }, + { + "File Path": "integration_b/datadog_checks/file3.py", + "Type": "Integration", + "Name": "integration_b", + "Size (Bytes)": 1000, + }, + ] + + assert result == expected - -def test_convert_size(): - assert convert_size(500) == "500 B" - assert convert_size(1024) == "1.0 KB" - assert convert_size(1048576) == "1.0 MB" - assert convert_size(1073741824) == "1.0 GB" - -def test_is_valid_integration(): - included_folder = "datadog_checks/" - ignored_files = {"datadog_checks_dev", "datadog_checks_tests_helper"} - git_ignore = [".git", "__pycache__"] - - assert is_valid_integration("datadog_checks/example.py", included_folder, ignored_files, git_ignore) - assert not is_valid_integration("__pycache__/file.py", included_folder, ignored_files, git_ignore) - assert not is_valid_integration("datadog_checks_dev/example.py", included_folder, ignored_files, git_ignore) - assert not is_valid_integration(".git/config", included_folder, ignored_files, git_ignore) - -def test_get_dependencies(terminal): - file_content = "dependency1 @ https://example.com/dependency1.whl\ndependency2 @ https://example.com/dependency2.whl" - mock_open_obj = mock_open(read_data=file_content) - with patch("builtins.open", mock_open_obj): - deps, urls = get_dependencies(terminal, "fake_path") - assert deps == ["dependency1", "dependency2"] - assert urls == ["https://example.com/dependency1.whl", "https://example.com/dependency2.whl"] - -def test_get_gitignore_files(terminal): - mock_gitignore = "__pycache__/\n*.log\n" # Sample .gitignore file - with patch("builtins.open", mock_open(read_data=mock_gitignore)): - with patch("os.path.exists", return_value=True): - ignored_patterns = get_gitignore_files(terminal) - assert ignored_patterns == ["__pycache__/", "*.log"] - -def test_get_dependencies_sizes(terminal): - mock_response = MagicMock() - mock_response.status_code = 200 - mock_response.headers = {"Content-Length": "12345"} - with patch("requests.head", return_value=mock_response): - file_data = get_dependencies_sizes(terminal, ["dependency1"], ["https://example.com/dependency1.whl"]) - assert file_data == [{"File Path": "dependency1", "Type": "Dependency", "Name": "dependency1", "Size (Bytes)": 12345}] def test_get_compressed_dependencies(terminal): platform = "windows-x86_64" version = "3.12" - - fake_file_content = "dependency1 @ https://example.com/dependency1.whl\ndependency2 @ https://example.com/dependency2.whl" - + + fake_file_content = ( + "dependency1 @ https://example.com/dependency1.whl\ndependency2 @ https://example.com/dependency2.whl" + ) + mock_response = MagicMock() mock_response.status_code = 200 mock_response.headers = {"Content-Length": "12345"} - - with patch("os.path.exists", return_value=True), \ - patch("os.path.isdir", return_value=True), \ - patch("os.listdir", return_value=[f"{platform}-{version}"]), \ - patch("os.path.isfile", return_value=True), \ - patch("builtins.open", mock_open(read_data=fake_file_content)), \ - patch("requests.head", return_value=mock_response): - + + with ( + patch("os.path.exists", return_value=True), + patch("os.path.isdir", return_value=True), + patch("os.listdir", return_value=[f"{platform}-{version}"]), + patch("os.path.isfile", return_value=True), + patch("builtins.open", mock_open(read_data=fake_file_content)), + patch("requests.head", return_value=mock_response), + ): + file_data = get_compressed_dependencies(terminal, platform, version) - + assert file_data == [ {"File Path": "dependency1", "Type": "Dependency", "Name": "dependency1", "Size (Bytes)": 12345}, {"File Path": "dependency2", "Type": "Dependency", "Name": "dependency2", "Size (Bytes)": 12345}, ] -def test_group_modules(): - modules = [ - {"Name": "module1", "Type": "A", "Size (Bytes)": 1500}, - {"Name": "module2", "Type": "B", "Size (Bytes)": 3000}, - {"Name": "module1", "Type": "A", "Size (Bytes)": 2500}, - {"Name": "module3", "Type": "A", "Size (Bytes)": 4000}, - ] - - platform = "linux-aarch64" - version = "3.12" - - expected_output = [ - {"Name": "module1", "Type": "A", "Size (Bytes)": 4000, "Size": "3.91 KB", "Platform": "linux-aarch64", "Version": "3.12"}, - {"Name": "module2", "Type": "B", "Size (Bytes)": 3000, "Size": "2.93 KB", "Platform": "linux-aarch64", "Version": "3.12"}, - {"Name": "module3", "Type": "A", "Size (Bytes)": 4000, "Size": "3.91 KB", "Platform": "linux-aarch64", "Version": "3.12"}, - ] - assert group_modules(modules, platform, version) == expected_output -def test_statu_no_args(ddev): +def test_status_no_args(ddev): result = ddev('size', 'status', '--compressed') assert result.exit_code == 0 + def test_status(ddev): result = ddev('size', 'status', '--platform', 'linux-aarch64', '--python', '3.12', '--compressed') assert result.exit_code == 0 + def test_status_csv(ddev): result = ddev('size', 'status', '--platform', 'linux-aarch64', '--python', '3.12', '--compressed', '--csv') assert result.exit_code == 0 -def test_status_fail(ddev): + +def test_status_wrong_platform(ddev): result = ddev('size', 'status', '--platform', 'linux', '--python', '3.12', '--compressed') assert result.exit_code != 0 -def test_status_fail2(ddev): + +def test_status_wrong_version(ddev): result = ddev('size', 'status', '--platform', 'linux-aarch64', '--python', '2.10', '--compressed') assert result.exit_code != 0 -def test_status_fail2(ddev): - result = ddev('size', 'status', '--platform', 'linux', '--python' ,'2.10', '--compressed') +def test_status_wrong_plat_and_version(ddev): + result = ddev('size', 'status', '--platform', 'linux', '--python', '2.10', '--compressed') assert result.exit_code != 0 - From fefd6a3f7d9911815741f7e1d169b309d3b692fe Mon Sep 17 00:00:00 2001 From: Lucia Sanchez Bella Date: Tue, 25 Mar 2025 12:58:45 +0100 Subject: [PATCH 05/70] add diff mode --- ddev/src/ddev/cli/size/GitRepo.py | 6 ++--- ddev/src/ddev/cli/size/__init__.py | 3 ++- ddev/src/ddev/cli/size/common.py | 39 ++++++++++++++++++----------- ddev/src/ddev/cli/size/diff.py | 35 ++++++++++++++++---------- ddev/src/ddev/cli/size/status.py | 22 ++++++++-------- ddev/tests/cli/size/test_common.py | 9 ++++--- ddev/tests/cli/size/test_diff.py | 40 +++++++++++++++--------------- ddev/tests/cli/size/test_status.py | 22 ++++++++++------ 8 files changed, 101 insertions(+), 75 deletions(-) diff --git a/ddev/src/ddev/cli/size/GitRepo.py b/ddev/src/ddev/cli/size/GitRepo.py index 04e736b1058b5..112ee0c15d418 100644 --- a/ddev/src/ddev/cli/size/GitRepo.py +++ b/ddev/src/ddev/cli/size/GitRepo.py @@ -1,7 +1,8 @@ import os import shutil -import tempfile import subprocess +import tempfile + class GitRepo: def __init__(self, url): @@ -21,7 +22,6 @@ def checkout_commit(self, commit): self._run(f"git fetch --depth 1 origin {commit}") self._run(f"git checkout {commit}") - def __exit__(self, exception_type, exception_value, exception_traceback): if self.repo_dir and os.path.exists(self.repo_dir): - shutil.rmtree(self.repo_dir) \ No newline at end of file + shutil.rmtree(self.repo_dir) diff --git a/ddev/src/ddev/cli/size/__init__.py b/ddev/src/ddev/cli/size/__init__.py index 1cb5140b75d23..6f3878b2fda35 100644 --- a/ddev/src/ddev/cli/size/__init__.py +++ b/ddev/src/ddev/cli/size/__init__.py @@ -4,8 +4,9 @@ import click -from ddev.cli.size.status import status from ddev.cli.size.diff import diff +from ddev.cli.size.status import status + @click.group(short_help='Get the size of integrations and dependencies by platform and python version') def size(): diff --git a/ddev/src/ddev/cli/size/common.py b/ddev/src/ddev/cli/size/common.py index 922b5ca748a4f..6a2c83f7a7c38 100644 --- a/ddev/src/ddev/cli/size/common.py +++ b/ddev/src/ddev/cli/size/common.py @@ -1,15 +1,17 @@ # (C) Datadog, Inc. 2022-present # All rights reserved # Licensed under a 3-clause BSD style license (see LICENSE) -import sys -import re -import requests import os +import re +import sys import zlib -#utilities +import requests + +# utilities -#mirar si existe + +# mirar si existe def convert_size(size_bytes): # Transforms bytes into a human-friendly format (KB, MB, GB) for unit in [' B', ' KB', ' MB', ' GB']: @@ -39,27 +41,31 @@ def is_valid_integration(path, included_folder, ignored_files, git_ignore): def is_correct_dependency(platform, version, name): return platform in name and version in name + def print_csv(app, i, modules): headers = modules[0].keys() if i == 0: - app.display(",".join(headers)) + app.display(",".join(headers)) for row in modules: app.display(",".join(format(str(row[h])) for h in headers)) + def format(s): if "," in s: return '"' + s + '"' else: return s - + + def print_table(app, modules, platform, version): modules_table = {col: {} for col in modules[0].keys()} for i, row in enumerate(modules): for key, value in row.items(): modules_table[key][i] = str(value) app.display_table(platform + " " + version, modules_table) - + + def get_dependencies_sizes(app, deps, download_urls): file_data = [] for dep, url in zip(deps, download_urls, strict=False): @@ -73,6 +79,7 @@ def get_dependencies_sizes(app, deps, download_urls): return file_data + def get_dependencies(app, file_path): download_urls = [] deps = [] @@ -100,16 +107,17 @@ def group_modules(modules, platform, version): return [ { - 'Name': name , - 'Type': type , - 'Size (Bytes)': size , + 'Name': name, + 'Type': type, + 'Size (Bytes)': size, 'Size': convert_size(size), - 'Platform': platform , - 'Version': version , + 'Platform': platform, + 'Version': version, } for (name, type), size in grouped_aux.items() ] + def get_gitignore_files(app, repo_path): gitignore_path = os.path.join(repo_path, ".gitignore") if not os.path.exists(gitignore_path): @@ -127,6 +135,7 @@ def get_gitignore_files(app, repo_path): app.display_error(f"Error reading .gitignore file: {e}") sys.exit(1) + def compress(app, file_path, relative_path): compressor = zlib.compressobj() compressed_size = 0 @@ -137,8 +146,8 @@ def compress(app, file_path, relative_path): compressed_chunk = compressor.compress(chunk) compressed_size += len(compressed_chunk) - compressed_size += len(compressor.flush()) + compressed_size += len(compressor.flush()) return compressed_size except Exception as e: app.display_error(f"Error processing {relative_path}: {e}") - sys.exit(1) \ No newline at end of file + sys.exit(1) diff --git a/ddev/src/ddev/cli/size/diff.py b/ddev/src/ddev/cli/size/diff.py index 0739dbe4dd482..a18dc313ec531 100644 --- a/ddev/src/ddev/cli/size/diff.py +++ b/ddev/src/ddev/cli/size/diff.py @@ -5,9 +5,20 @@ import os import sys from pathlib import Path + import click import requests -from .common import is_correct_dependency, is_valid_integration, group_modules, print_csv, print_table, get_gitignore_files, get_dependencies, compress + +from .common import ( + compress, + get_dependencies, + get_gitignore_files, + group_modules, + is_correct_dependency, + is_valid_integration, + print_csv, + print_table, +) from .GitRepo import GitRepo VALID_PLATFORMS = ["linux-aarch64", "linux-x86_64", "macos-x86_64", "windows-x86_64"] @@ -40,10 +51,10 @@ def diff_mode(app, before, after, platform, version, compressed, csv, i): gitRepo.checkout_commit(after) files_a = get_compressed_files(app, repo) dependencies_a = get_compressed_dependencies(app, repo, platform, version) - - integrations = get_diff(files_b, files_a, 'Integration') - dependencies = get_diff(dependencies_b, dependencies_a, 'Dependency') - + + integrations = get_diff(files_b, files_a, 'Integration') + dependencies = get_diff(dependencies_b, dependencies_a, 'Dependency') + grouped_modules = group_modules(integrations + dependencies, platform, version) grouped_modules.sort(key=lambda x: x['Size (Bytes)'], reverse=True) for module in grouped_modules: @@ -56,8 +67,6 @@ def diff_mode(app, before, after, platform, version, compressed, csv, i): print_table(app, grouped_modules, platform, version) - - def get_diff(size_before, size_after, type): all_paths = set(size_before.keys()) | set(size_after.keys()) diff_files = [] @@ -66,7 +75,7 @@ def get_diff(size_before, size_after, type): size_b = size_before.get(path, 0) size_a = size_after.get(path, 0) size_delta = size_a - size_b - module = Path(path).parts[0] + module = Path(path).parts[0] if size_delta != 0: if size_b == 0: diff_files.append( @@ -74,16 +83,16 @@ def get_diff(size_before, size_after, type): 'File Path': path, 'Type': type, 'Name': module + " (NEW)", - 'Size (Bytes)': size_delta, + 'Size (Bytes)': size_delta, } - ) + ) elif size_a == 0: diff_files.append( { 'File Path': path, 'Type': type, 'Name': module + " (DELETED)", - 'Size (Bytes)': size_delta, + 'Size (Bytes)': size_delta, } ) else: @@ -96,9 +105,9 @@ def get_diff(size_before, size_after, type): } ) - return diff_files + def get_compressed_files(app, repo_path): ignored_files = {"datadog_checks_dev", "datadog_checks_tests_helper"} @@ -119,6 +128,7 @@ def get_compressed_files(app, repo_path): file_data[relative_path] = compressed_size return file_data + def get_compressed_dependencies(app, repo_path, platform, version): resolved_path = os.path.join(repo_path, ".deps/resolved") @@ -148,4 +158,3 @@ def get_dependencies_sizes(app, deps, download_urls): file_data[dep] = int(size) return file_data - diff --git a/ddev/src/ddev/cli/size/status.py b/ddev/src/ddev/cli/size/status.py index f52f41015a109..f804233f4a406 100644 --- a/ddev/src/ddev/cli/size/status.py +++ b/ddev/src/ddev/cli/size/status.py @@ -3,14 +3,22 @@ # Licensed under a 3-clause BSD style license (see LICENSE) import os -import re import sys -import zlib from pathlib import Path import click -from .common import convert_size, get_dependencies_sizes, is_correct_dependency, is_valid_integration, group_modules, print_csv, print_table, get_gitignore_files, get_dependencies, compress +from .common import ( + compress, + get_dependencies, + get_dependencies_sizes, + get_gitignore_files, + group_modules, + is_correct_dependency, + is_valid_integration, + print_csv, + print_table, +) VALID_PLATFORMS = ["linux-aarch64", "linux-x86_64", "macos-x86_64", "windows-x86_64"] VALID_PYTHON_VERSIONS = ["3.12"] @@ -44,8 +52,6 @@ def status_mode(app, platform, version, compressed, csv, i): print_table(app, grouped_modules, platform, version) - - def get_compressed_files(app): ignored_files = {"datadog_checks_dev", "datadog_checks_tests_helper"} @@ -75,7 +81,6 @@ def get_compressed_files(app): return file_data - def get_compressed_dependencies(app, platform, version): resolved_path = os.path.join(REPO_PATH, ".deps/resolved") @@ -90,8 +95,3 @@ def get_compressed_dependencies(app, platform, version): if os.path.isfile(file_path) and is_correct_dependency(platform, version, filename): deps, download_urls = get_dependencies(app, file_path) return get_dependencies_sizes(app, deps, download_urls) - - - - - diff --git a/ddev/tests/cli/size/test_common.py b/ddev/tests/cli/size/test_common.py index e7dbd96d0672d..8d218fab237e1 100644 --- a/ddev/tests/cli/size/test_common.py +++ b/ddev/tests/cli/size/test_common.py @@ -1,17 +1,15 @@ -import os from unittest.mock import MagicMock, mock_open, patch -from ddev.cli.application import Application from ddev.cli.size.status import ( - convert_size, - print_csv, compress, + convert_size, get_dependencies, get_dependencies_sizes, get_gitignore_files, group_modules, is_correct_dependency, is_valid_integration, + print_csv, ) @@ -101,6 +99,7 @@ def test_group_modules(): assert group_modules(modules, platform, version) == expected_output + def test_get_gitignore_files(terminal): mock_gitignore = "__pycache__/\n*.log\n" # Sample .gitignore file repo_path = "/fake/repo" @@ -109,6 +108,7 @@ def test_get_gitignore_files(terminal): ignored_patterns = get_gitignore_files(terminal, repo_path) assert ignored_patterns == ["__pycache__/", "*.log"] + def test_compress(): mock_app = MagicMock() fake_content = b'a' * 16384 @@ -122,6 +122,7 @@ def test_compress(): assert compressed_size > 0 assert compressed_size < original_size + def test_print_csv(): mock_app = MagicMock() modules = [ diff --git a/ddev/tests/cli/size/test_diff.py b/ddev/tests/cli/size/test_diff.py index 288067aa37bfc..4db01ab8ed3d5 100644 --- a/ddev/tests/cli/size/test_diff.py +++ b/ddev/tests/cli/size/test_diff.py @@ -4,12 +4,8 @@ from unittest.mock import MagicMock, mock_open, patch -from ddev.cli.size.diff import ( - get_compressed_files, - get_compressed_dependencies, - get_diff - -) +from ddev.cli.size.diff import get_compressed_dependencies, get_compressed_files, get_diff + def test_get_compressed_files(): mock_app = MagicMock() @@ -26,25 +22,30 @@ def fake_compress(app, file_path, relative_path): fake_gitignore = {"ignored.py"} - with patch("os.walk", return_value=mock_files), \ - patch("os.path.relpath", side_effect=lambda path, _: path.replace("root/", "")), \ - patch("os.path.exists", return_value=True), \ - patch("builtins.open", mock_open(read_data="__pycache__/\n*.log\n")),\ - patch("ddev.cli.size.diff.get_gitignore_files", return_value=fake_gitignore), \ - patch("ddev.cli.size.diff.is_valid_integration", side_effect=lambda path, folder, ignored, git_ignore: path.startswith("integration")), \ - patch("ddev.cli.size.diff.compress", side_effect=fake_compress): + with ( + patch("os.walk", return_value=mock_files), + patch("os.path.relpath", side_effect=lambda path, _: path.replace("root/", "")), + patch("os.path.exists", return_value=True), + patch("builtins.open", mock_open(read_data="__pycache__/\n*.log\n")), + patch("ddev.cli.size.diff.get_gitignore_files", return_value=fake_gitignore), + patch( + "ddev.cli.size.diff.is_valid_integration", + side_effect=lambda path, folder, ignored, git_ignore: path.startswith("integration"), + ), + patch("ddev.cli.size.diff.compress", side_effect=fake_compress), + ): result = get_compressed_files(mock_app, mock_repo_path) expected = { "integration/datadog_checks/file1.py": 1000, "integration/datadog_checks/file2.py": 1000, - "integration_b/datadog_checks/file3.py": 1000 + "integration_b/datadog_checks/file3.py": 1000, } - assert result == expected + def test_get_compressed_dependencies(terminal): platform = "windows-x86_64" version = "3.12" @@ -67,12 +68,12 @@ def test_get_compressed_dependencies(terminal): patch("requests.head", return_value=mock_response), ): - file_data = get_compressed_dependencies(terminal,mock_repo_path, platform, version) + file_data = get_compressed_dependencies(terminal, mock_repo_path, platform, version) assert file_data == { "dependency1": 12345, "dependency2": 12345, - } + } def test_get_diff(): size_before = { @@ -83,7 +84,7 @@ def test_get_diff(): size_after = { "integration/foo.py": 1200, # modified "integration/bar.py": 2000, # unchanged - "integration/new.py": 800, # new + "integration/new.py": 800, # new } expected = [ @@ -104,9 +105,8 @@ def test_get_diff(): "Type": "Integration", "Name": "integration (NEW)", "Size (Bytes)": 800, - } + }, ] result = get_diff(size_before, size_after, "Integration") assert sorted(result, key=lambda x: x["File Path"]) == sorted(expected, key=lambda x: x["File Path"]) - diff --git a/ddev/tests/cli/size/test_status.py b/ddev/tests/cli/size/test_status.py index 1792802b44bca..fb2436725705d 100644 --- a/ddev/tests/cli/size/test_status.py +++ b/ddev/tests/cli/size/test_status.py @@ -5,11 +5,11 @@ from unittest.mock import MagicMock, mock_open, patch from ddev.cli.size.status import ( - get_compressed_files, get_compressed_dependencies, - + get_compressed_files, ) + def test_get_compressed_files(): mock_app = MagicMock() @@ -20,15 +20,20 @@ def test_get_compressed_files(): ] def fake_compress(app, file_path, relative_path): - return 1000 + return 1000 fake_gitignore = {"ignored.py"} - with patch("os.walk", return_value=mock_files), \ - patch("os.path.relpath", side_effect=lambda path, _: path.replace("root/", "")), \ - patch("ddev.cli.size.status.get_gitignore_files", return_value=fake_gitignore), \ - patch("ddev.cli.size.status.is_valid_integration", side_effect=lambda path, folder, ignored, git_ignore: path.startswith("integration")), \ - patch("ddev.cli.size.status.compress", side_effect=fake_compress): + with ( + patch("os.walk", return_value=mock_files), + patch("os.path.relpath", side_effect=lambda path, _: path.replace("root/", "")), + patch("ddev.cli.size.status.get_gitignore_files", return_value=fake_gitignore), + patch( + "ddev.cli.size.status.is_valid_integration", + side_effect=lambda path, folder, ignored, git_ignore: path.startswith("integration"), + ), + patch("ddev.cli.size.status.compress", side_effect=fake_compress), + ): result = get_compressed_files(mock_app) @@ -109,6 +114,7 @@ def test_status_wrong_version(ddev): result = ddev('size', 'status', '--platform', 'linux-aarch64', '--python', '2.10', '--compressed') assert result.exit_code != 0 + def test_status_wrong_plat_and_version(ddev): result = ddev('size', 'status', '--platform', 'linux', '--python', '2.10', '--compressed') assert result.exit_code != 0 From 76c1f09cc1bd4e5904c721bdc763b293a0d4c56a Mon Sep 17 00:00:00 2001 From: Lucia Sanchez Bella Date: Thu, 27 Mar 2025 12:55:33 +0100 Subject: [PATCH 06/70] final diff version --- ddev/src/ddev/cli/size/GitRepo.py | 27 ---- ddev/src/ddev/cli/size/common.py | 95 ++++++-------- ddev/src/ddev/cli/size/diff.py | 109 ++++++++++------ ddev/src/ddev/cli/size/status.py | 32 ++--- ddev/tests/cli/size/test_common.py | 23 ++-- ddev/tests/cli/size/test_diff.py | 200 +++++++++++++++++++++++------ ddev/tests/cli/size/test_status.py | 46 +++++-- 7 files changed, 336 insertions(+), 196 deletions(-) delete mode 100644 ddev/src/ddev/cli/size/GitRepo.py diff --git a/ddev/src/ddev/cli/size/GitRepo.py b/ddev/src/ddev/cli/size/GitRepo.py deleted file mode 100644 index 112ee0c15d418..0000000000000 --- a/ddev/src/ddev/cli/size/GitRepo.py +++ /dev/null @@ -1,27 +0,0 @@ -import os -import shutil -import subprocess -import tempfile - - -class GitRepo: - def __init__(self, url): - self.url = url - self.repo_dir = None - - def __enter__(self): - self.repo_dir = tempfile.mkdtemp() - self._run("git init") - self._run(f"git remote add origin {self.url}") - return self - - def _run(self, cmd): - subprocess.run(cmd, shell=True, cwd=self.repo_dir, check=True) - - def checkout_commit(self, commit): - self._run(f"git fetch --depth 1 origin {commit}") - self._run(f"git checkout {commit}") - - def __exit__(self, exception_type, exception_value, exception_traceback): - if self.repo_dir and os.path.exists(self.repo_dir): - shutil.rmtree(self.repo_dir) diff --git a/ddev/src/ddev/cli/size/common.py b/ddev/src/ddev/cli/size/common.py index 6a2c83f7a7c38..983bd443ea016 100644 --- a/ddev/src/ddev/cli/size/common.py +++ b/ddev/src/ddev/cli/size/common.py @@ -3,13 +3,10 @@ # Licensed under a 3-clause BSD style license (see LICENSE) import os import re -import sys import zlib import requests -# utilities - # mirar si existe def convert_size(size_bytes): @@ -43,7 +40,7 @@ def is_correct_dependency(platform, version, name): def print_csv(app, i, modules): - headers = modules[0].keys() + headers = [k for k in modules[0].keys() if k != 'Size'] if i == 0: app.display(",".join(headers)) @@ -59,41 +56,37 @@ def format(s): def print_table(app, modules, platform, version): - modules_table = {col: {} for col in modules[0].keys()} + modules_table = {col: {} for col in modules[0].keys() if col != 'Size (Bytes)'} for i, row in enumerate(modules): for key, value in row.items(): - modules_table[key][i] = str(value) + if key in modules_table: + modules_table[key][i] = str(value) app.display_table(platform + " " + version, modules_table) -def get_dependencies_sizes(app, deps, download_urls): +def get_dependencies_sizes(deps, download_urls): file_data = [] for dep, url in zip(deps, download_urls, strict=False): dep_response = requests.head(url) - if dep_response.status_code != 200: - app.display_error(f"Error {dep_response.status_code}: Unable to fetch the dependencies file") - sys.exit(1) - else: - size = dep_response.headers.get("Content-Length", None) - file_data.append({"File Path": dep, "Type": "Dependency", "Name": dep, "Size (Bytes)": int(size)}) + dep_response.raise_for_status() + size = dep_response.headers.get("Content-Length", None) + file_data.append({"File Path": dep, "Type": "Dependency", "Name": dep, "Size (Bytes)": int(size)}) return file_data -def get_dependencies(app, file_path): +def get_dependencies(file_path): download_urls = [] deps = [] - try: - with open(file_path, "r", encoding="utf-8") as file: - file_content = file.read() - for line in file_content.splitlines(): - match = re.search(r"([\w\-\d\.]+) @ (https?://[^\s#]+)", line) - if match: - deps.append(match.group(1)) - download_urls.append(match.group(2)) - except Exception as e: - app.display_error(f"Error reading file {file_path}: {e}") - sys.exit(1) + with open(file_path, "r", encoding="utf-8") as file: + file_content = file.read() + for line in file_content.splitlines(): + match = re.search(r"([\w\-\d\.]+) @ (https?://[^\s#]+)", line) + if match: + deps.append(match.group(1)) + download_urls.append(match.group(2)) + else: + raise WrongDependencyFormat("The dependency format 'name @ link' is no longer supported.") return deps, download_urls @@ -118,36 +111,28 @@ def group_modules(modules, platform, version): ] -def get_gitignore_files(app, repo_path): +def get_gitignore_files(repo_path): gitignore_path = os.path.join(repo_path, ".gitignore") - if not os.path.exists(gitignore_path): - app.display_error(f"Error: .gitignore file not found at {gitignore_path}") - sys.exit(1) - - try: - with open(gitignore_path, "r", encoding="utf-8") as file: - gitignore_content = file.read() - ignored_patterns = [ - line.strip() for line in gitignore_content.splitlines() if line.strip() and not line.startswith("#") - ] - return ignored_patterns - except Exception as e: - app.display_error(f"Error reading .gitignore file: {e}") - sys.exit(1) - - -def compress(app, file_path, relative_path): + with open(gitignore_path, "r", encoding="utf-8") as file: + gitignore_content = file.read() + ignored_patterns = [ + line.strip() for line in gitignore_content.splitlines() if line.strip() and not line.startswith("#") + ] + return ignored_patterns + + +def compress(file_path): compressor = zlib.compressobj() compressed_size = 0 - try: - # original_size = os.path.getsize(file_path) - with open(file_path, "rb") as f: - while chunk := f.read(8192): # Read in 8KB chunks - compressed_chunk = compressor.compress(chunk) - compressed_size += len(compressed_chunk) - - compressed_size += len(compressor.flush()) - return compressed_size - except Exception as e: - app.display_error(f"Error processing {relative_path}: {e}") - sys.exit(1) + # original_size = os.path.getsize(file_path) + with open(file_path, "rb") as f: + while chunk := f.read(8192): # Read in 8KB chunks + compressed_chunk = compressor.compress(chunk) + compressed_size += len(compressed_chunk) + compressed_size += len(compressor.flush()) + return compressed_size + + +class WrongDependencyFormat(Exception): + def __init__(self, mensaje): + super().__init__(mensaje) diff --git a/ddev/src/ddev/cli/size/diff.py b/ddev/src/ddev/cli/size/diff.py index a18dc313ec531..8ce12e8ea8ae2 100644 --- a/ddev/src/ddev/cli/size/diff.py +++ b/ddev/src/ddev/cli/size/diff.py @@ -3,7 +3,9 @@ # Licensed under a 3-clause BSD style license (see LICENSE) import os -import sys +import shutil +import subprocess +import tempfile from pathlib import Path import click @@ -19,7 +21,6 @@ print_csv, print_table, ) -from .GitRepo import GitRepo VALID_PLATFORMS = ["linux-aarch64", "linux-x86_64", "macos-x86_64", "windows-x86_64"] VALID_PYTHON_VERSIONS = ["3.12"] @@ -34,37 +35,50 @@ @click.option('--csv', is_flag=True, help="Output in CSV format") @click.pass_obj def diff(app, before, after, platform, version, compressed, csv): - platforms = VALID_PLATFORMS if platform is None else [platform] - versions = VALID_PYTHON_VERSIONS if version is None else [version] + try: + platforms = VALID_PLATFORMS if platform is None else [platform] + versions = VALID_PYTHON_VERSIONS if version is None else [version] - for i, (plat, ver) in enumerate([(p, v) for p in platforms for v in versions]): - diff_mode(app, before, after, plat, ver, compressed, csv, i) + for i, (plat, ver) in enumerate([(p, v) for p in platforms for v in versions]): + diff_mode(app, before, after, plat, ver, compressed, csv, i) + except Exception as e: + app.abort(str(e)) def diff_mode(app, before, after, platform, version, compressed, csv, i): + url = "https://github.com/DataDog/integrations-core.git" if compressed: - with GitRepo("https://github.com/DataDog/integrations-core.git") as gitRepo: - repo = gitRepo.repo_dir - gitRepo.checkout_commit(before) - files_b = get_compressed_files(app, repo) - dependencies_b = get_compressed_dependencies(app, repo, platform, version) - gitRepo.checkout_commit(after) - files_a = get_compressed_files(app, repo) - dependencies_a = get_compressed_dependencies(app, repo, platform, version) + files_b, dependencies_b, files_a, dependencies_a = get_repo_info(url, platform, version, before, after) integrations = get_diff(files_b, files_a, 'Integration') dependencies = get_diff(dependencies_b, dependencies_a, 'Dependency') - grouped_modules = group_modules(integrations + dependencies, platform, version) - grouped_modules.sort(key=lambda x: x['Size (Bytes)'], reverse=True) + grouped_modules.sort(key=lambda x: abs(x['Size (Bytes)']), reverse=True) for module in grouped_modules: if module['Size (Bytes)'] > 0: module['Size'] = f"+{module['Size']}" - - if csv: - print_csv(app, i, grouped_modules) + if grouped_modules == []: + app.display("No size differences were detected between the selected commits.") else: - print_table(app, grouped_modules, platform, version) + if csv: + print_csv(app, i, grouped_modules) + else: + print_table(app, grouped_modules, platform, version) + + +def get_repo_info(repo_url, platform, version, before, after): + with GitRepo(repo_url) as gitRepo: + repo = gitRepo.repo_dir + + gitRepo.checkout_commit(before) + files_b = get_compressed_files(repo) + dependencies_b = get_compressed_dependencies(repo, platform, version) + + gitRepo.checkout_commit(after) + files_a = get_compressed_files(repo) + dependencies_a = get_compressed_dependencies(repo, platform, version) + + return files_b, dependencies_b, files_a, dependencies_a def get_diff(size_before, size_after, type): @@ -108,10 +122,10 @@ def get_diff(size_before, size_after, type): return diff_files -def get_compressed_files(app, repo_path): +def get_compressed_files(repo_path): ignored_files = {"datadog_checks_dev", "datadog_checks_tests_helper"} - git_ignore = get_gitignore_files(app, repo_path) + git_ignore = get_gitignore_files(repo_path) included_folder = "datadog_checks/" file_data = {} @@ -124,37 +138,58 @@ def get_compressed_files(app, repo_path): # Filter files if is_valid_integration(relative_path, included_folder, ignored_files, git_ignore): - compressed_size = compress(app, file_path, relative_path) + compressed_size = compress(file_path) file_data[relative_path] = compressed_size return file_data -def get_compressed_dependencies(app, repo_path, platform, version): +def get_compressed_dependencies(repo_path, platform, version): resolved_path = os.path.join(repo_path, ".deps/resolved") - if not os.path.exists(resolved_path) or not os.path.isdir(resolved_path): - app.display_error(f"Error: Directory not found {resolved_path}") - sys.exit(1) - for filename in os.listdir(resolved_path): file_path = os.path.join(resolved_path, filename) if os.path.isfile(file_path) and is_correct_dependency(platform, version, filename): - deps, download_urls = get_dependencies(app, file_path) - return get_dependencies_sizes(app, deps, download_urls) + deps, download_urls = get_dependencies(file_path) + return get_dependencies_sizes(deps, download_urls) return {} -def get_dependencies_sizes(app, deps, download_urls): +def get_dependencies_sizes(deps, download_urls): file_data = {} for dep, url in zip(deps, download_urls, strict=False): dep_response = requests.head(url) - if dep_response.status_code != 200: - app.display_error(f"Error {dep_response.status_code}: Unable to fetch the dependencies file") - sys.exit(1) - else: - size = dep_response.headers.get("Content-Length", None) - file_data[dep] = int(size) + dep_response.raise_for_status() + size = dep_response.headers.get("Content-Length", None) + file_data[dep] = int(size) return file_data + + +class GitRepo: + def __init__(self, url): + self.url = url + self.repo_dir = None + + def __enter__(self): + self.repo_dir = tempfile.mkdtemp() + self._run("git init --quiet") + self._run(f"git remote add origin {self.url}") + return self + + def _run(self, cmd): + subprocess.run( + cmd, + shell=True, + cwd=self.repo_dir, + check=True, + ) + + def checkout_commit(self, commit): + self._run(f"git fetch --quiet --depth 1 origin {commit}") + self._run(f"git checkout --quiet {commit}") + + def __exit__(self, exception_type, exception_value, exception_traceback): + if self.repo_dir and os.path.exists(self.repo_dir): + shutil.rmtree(self.repo_dir) diff --git a/ddev/src/ddev/cli/size/status.py b/ddev/src/ddev/cli/size/status.py index f804233f4a406..f6d44eb5068bd 100644 --- a/ddev/src/ddev/cli/size/status.py +++ b/ddev/src/ddev/cli/size/status.py @@ -3,7 +3,6 @@ # Licensed under a 3-clause BSD style license (see LICENSE) import os -import sys from pathlib import Path import click @@ -32,16 +31,19 @@ @click.option('--csv', is_flag=True, help="Output in CSV format") @click.pass_obj def status(app, platform, version, compressed, csv): - platforms = VALID_PLATFORMS if platform is None else [platform] - versions = VALID_PYTHON_VERSIONS if version is None else [version] + try: + platforms = VALID_PLATFORMS if platform is None else [platform] + versions = VALID_PYTHON_VERSIONS if version is None else [version] - for i, (plat, ver) in enumerate([(p, v) for p in platforms for v in versions]): - status_mode(app, plat, ver, compressed, csv, i) + for i, (plat, ver) in enumerate([(p, v) for p in platforms for v in versions]): + status_mode(app, plat, ver, compressed, csv, i) + except Exception as e: + app.abort(str(e)) def status_mode(app, platform, version, compressed, csv, i): if compressed: - modules = get_compressed_files(app) + get_compressed_dependencies(app, platform, version) + modules = get_compressed_files() + get_compressed_dependencies(platform, version) grouped_modules = group_modules(modules, platform, version) grouped_modules.sort(key=lambda x: x['Size (Bytes)'], reverse=True) @@ -52,10 +54,10 @@ def status_mode(app, platform, version, compressed, csv, i): print_table(app, grouped_modules, platform, version) -def get_compressed_files(app): +def get_compressed_files(): ignored_files = {"datadog_checks_dev", "datadog_checks_tests_helper"} - git_ignore = get_gitignore_files(app, REPO_PATH) + git_ignore = get_gitignore_files(REPO_PATH) included_folder = "datadog_checks/" file_data = [] @@ -68,7 +70,7 @@ def get_compressed_files(app): # Filter files if is_valid_integration(relative_path, included_folder, ignored_files, git_ignore): - compressed_size = compress(app, file_path, relative_path) + compressed_size = compress(file_path) integration = relative_path.split(os.sep)[0] file_data.append( { @@ -81,17 +83,11 @@ def get_compressed_files(app): return file_data -def get_compressed_dependencies(app, platform, version): +def get_compressed_dependencies(platform, version): resolved_path = os.path.join(REPO_PATH, ".deps/resolved") - - if not os.path.exists(resolved_path) or not os.path.isdir(resolved_path): - app.display_error(f"Error: Directory not found {resolved_path}") - sys.exit(1) - for filename in os.listdir(resolved_path): file_path = os.path.join(resolved_path, filename) - if os.path.isfile(file_path) and is_correct_dependency(platform, version, filename): - deps, download_urls = get_dependencies(app, file_path) - return get_dependencies_sizes(app, deps, download_urls) + deps, download_urls = get_dependencies(file_path) + return get_dependencies_sizes(deps, download_urls) diff --git a/ddev/tests/cli/size/test_common.py b/ddev/tests/cli/size/test_common.py index 8d218fab237e1..839ef2f9b7627 100644 --- a/ddev/tests/cli/size/test_common.py +++ b/ddev/tests/cli/size/test_common.py @@ -1,6 +1,6 @@ from unittest.mock import MagicMock, mock_open, patch -from ddev.cli.size.status import ( +from ddev.cli.size.common import ( compress, convert_size, get_dependencies, @@ -37,23 +37,23 @@ def test_is_valid_integration(): assert not is_valid_integration(".git/config", included_folder, ignored_files, git_ignore) -def test_get_dependencies(terminal): +def test_get_dependencies(): file_content = ( "dependency1 @ https://example.com/dependency1.whl\ndependency2 @ https://example.com/dependency2.whl" ) mock_open_obj = mock_open(read_data=file_content) with patch("builtins.open", mock_open_obj): - deps, urls = get_dependencies(terminal, "fake_path") + deps, urls = get_dependencies("fake_path") assert deps == ["dependency1", "dependency2"] assert urls == ["https://example.com/dependency1.whl", "https://example.com/dependency2.whl"] -def test_get_dependencies_sizes(terminal): +def test_get_dependencies_sizes(): mock_response = MagicMock() mock_response.status_code = 200 mock_response.headers = {"Content-Length": "12345"} with patch("requests.head", return_value=mock_response): - file_data = get_dependencies_sizes(terminal, ["dependency1"], ["https://example.com/dependency1.whl"]) + file_data = get_dependencies_sizes(["dependency1"], ["https://example.com/dependency1.whl"]) assert file_data == [ {"File Path": "dependency1", "Type": "Dependency", "Name": "dependency1", "Size (Bytes)": 12345} ] @@ -100,23 +100,22 @@ def test_group_modules(): assert group_modules(modules, platform, version) == expected_output -def test_get_gitignore_files(terminal): +def test_get_gitignore_files(): mock_gitignore = "__pycache__/\n*.log\n" # Sample .gitignore file repo_path = "/fake/repo" with patch("builtins.open", mock_open(read_data=mock_gitignore)): with patch("os.path.exists", return_value=True): - ignored_patterns = get_gitignore_files(terminal, repo_path) + ignored_patterns = get_gitignore_files(repo_path) assert ignored_patterns == ["__pycache__/", "*.log"] def test_compress(): - mock_app = MagicMock() fake_content = b'a' * 16384 original_size = len(fake_content) m = mock_open(read_data=fake_content) with patch("builtins.open", m): - compressed_size = compress(mock_app, "fake/path/file.py", "relative/path/file.py") + compressed_size = compress("fake/path/file.py") assert isinstance(compressed_size, int) assert compressed_size > 0 @@ -126,14 +125,14 @@ def test_compress(): def test_print_csv(): mock_app = MagicMock() modules = [ - {"Name": "module1", "Size": 123}, - {"Name": "module,with,comma", "Size": 456}, + {"Name": "module1", "Size B": 123, "Size": "2 B"}, + {"Name": "module,with,comma", "Size B": 456, "Size": "2 B"}, ] print_csv(mock_app, i=0, modules=modules) expected_calls = [ - (("Name,Size",),), + (("Name,Size B",),), (('module1,123',),), (('"module,with,comma",456',),), ] diff --git a/ddev/tests/cli/size/test_diff.py b/ddev/tests/cli/size/test_diff.py index 4db01ab8ed3d5..1de5cb1987056 100644 --- a/ddev/tests/cli/size/test_diff.py +++ b/ddev/tests/cli/size/test_diff.py @@ -2,13 +2,12 @@ # All rights reserved # Licensed under a 3-clause BSD style license (see LICENSE) -from unittest.mock import MagicMock, mock_open, patch - +from unittest.mock import MagicMock, mock_open, patch, Mock +import pytest from ddev.cli.size.diff import get_compressed_dependencies, get_compressed_files, get_diff def test_get_compressed_files(): - mock_app = MagicMock() mock_repo_path = "root" mock_files = [ @@ -17,7 +16,7 @@ def test_get_compressed_files(): ("root", [], ["ignored.py"]), ] - def fake_compress(app, file_path, relative_path): + def fake_compress(file_path): return 1000 fake_gitignore = {"ignored.py"} @@ -35,7 +34,7 @@ def fake_compress(app, file_path, relative_path): patch("ddev.cli.size.diff.compress", side_effect=fake_compress), ): - result = get_compressed_files(mock_app, mock_repo_path) + result = get_compressed_files(mock_repo_path) expected = { "integration/datadog_checks/file1.py": 1000, @@ -68,45 +67,170 @@ def test_get_compressed_dependencies(terminal): patch("requests.head", return_value=mock_response), ): - file_data = get_compressed_dependencies(terminal, mock_repo_path, platform, version) + file_data = get_compressed_dependencies(mock_repo_path, platform, version) assert file_data == { "dependency1": 12345, "dependency2": 12345, } - def test_get_diff(): - size_before = { - "integration/foo.py": 1000, - "integration/bar.py": 2000, - "integration/deleted.py": 1500, - } - size_after = { - "integration/foo.py": 1200, # modified - "integration/bar.py": 2000, # unchanged - "integration/new.py": 800, # new - } - - expected = [ - { - "File Path": "integration/foo.py", - "Type": "Integration", - "Name": "integration", - "Size (Bytes)": 200, - }, - { - "File Path": "integration/deleted.py", - "Type": "Integration", - "Name": "integration (DELETED)", - "Size (Bytes)": -1500, + +def test_get_diff(): + size_before = { + "integration/foo.py": 1000, + "integration/bar.py": 2000, + "integration/deleted.py": 1500, + } + size_after = { + "integration/foo.py": 1200, # modified + "integration/bar.py": 2000, # unchanged + "integration/new.py": 800, # new + } + + expected = [ + { + "File Path": "integration/foo.py", + "Type": "Integration", + "Name": "integration", + "Size (Bytes)": 200, + }, + { + "File Path": "integration/deleted.py", + "Type": "Integration", + "Name": "integration (DELETED)", + "Size (Bytes)": -1500, + }, + { + "File Path": "integration/new.py", + "Type": "Integration", + "Name": "integration (NEW)", + "Size (Bytes)": 800, + }, + ] + + result = get_diff(size_before, size_after, "Integration") + assert sorted(result, key=lambda x: x["File Path"]) == sorted(expected, key=lambda x: x["File Path"]) + + +@pytest.fixture +def mock_size_diff_dependencies(): + mock_git_repo = MagicMock() + mock_git_repo.repo_dir = "/tmp/fake_repo" + + def get_compressed_files_side_effect(_): + get_compressed_files_side_effect.counter += 1 + if get_compressed_files_side_effect.counter % 2 == 1: + return {"path1.py": 1000} # before + else: + return {"path1.py": 1200, "path2.py": 500} # after + + get_compressed_files_side_effect.counter = 0 + + def get_compressed_dependencies_side_effect(_, __, ___): + get_compressed_dependencies_side_effect.counter += 1 + if get_compressed_dependencies_side_effect.counter % 2 == 1: + return {"dep1.whl": 2000} # before + else: + return {"dep1.whl": 2500, "dep2.whl": 1000} # after + + get_compressed_dependencies_side_effect.counter = 0 + + with ( + patch("ddev.cli.size.diff.GitRepo.__enter__", return_value=mock_git_repo), + patch("ddev.cli.size.diff.GitRepo.__exit__", return_value=None), + patch("ddev.cli.size.diff.GitRepo.checkout_commit"), + patch("tempfile.mkdtemp", return_value="/tmp/fake_repo"), + patch("ddev.cli.size.diff.get_compressed_files", side_effect=get_compressed_files_side_effect), + patch("ddev.cli.size.diff.get_compressed_dependencies", side_effect=get_compressed_dependencies_side_effect), + patch("ddev.cli.size.common.group_modules", side_effect=lambda m, *_: m), + patch("ddev.cli.size.common.print_csv"), + patch("ddev.cli.size.common.print_table"), + ): + yield + + +def test_diff_no_args(ddev, mock_size_diff_dependencies): + result = ddev('size', 'diff', 'commit1', 'commit2', '--compressed') + print("Exit code:", result.exit_code) + print("Output:\n", result.output) + print("Exception:", result.exception) + assert result.exit_code == 0 + + +def test_diff_with_platform_and_version(ddev, mock_size_diff_dependencies): + result = ddev( + 'size', 'diff', 'commit1', 'commit2', '--platform', 'linux-aarch64', '--python', '3.12', '--compressed' + ) + assert result.exit_code == 0 + + +def test_diff_csv(ddev, mock_size_diff_dependencies): + result = ddev( + 'size', 'diff', 'commit1', 'commit2', '--platform', 'linux-aarch64', '--python', '3.12', '--compressed', '--csv' + ) + assert result.exit_code == 0 + + + + +from unittest.mock import patch, MagicMock + +def test_diff_no_differences(ddev): + fake_repo = MagicMock() + + with ( + patch("ddev.cli.size.diff.GitRepo.__enter__", return_value=fake_repo), + patch("ddev.cli.size.diff.GitRepo.__exit__", return_value=None), + patch.object(fake_repo, "checkout_commit"), + patch("tempfile.mkdtemp", return_value="/tmp/fake_repo"), + patch( + "ddev.cli.size.diff.get_compressed_files", + return_value={ + "path1.py": 1000, + "path2.py": 500, }, - { - "File Path": "integration/new.py", - "Type": "Integration", - "Name": "integration (NEW)", - "Size (Bytes)": 800, + ), + patch( + "ddev.cli.size.diff.get_compressed_dependencies", + return_value={ + "dep1.whl": 2000, + "dep2.whl": 1000, }, - ] + ), + patch("ddev.cli.size.common.group_modules", side_effect=lambda m, *_: m), + ): + result = ddev( + 'size', 'diff', 'commit1', 'commit2', '--platform', 'linux-aarch64', '--python', '3.12', '--compressed' + ) + print(result.output) + print(result.exit_code) + + assert result.exit_code == 0 + + + +def test_diff_invalid_platform(ddev): + result = ddev( + 'size', 'diff', 'commit1', 'commit2', '--platform', 'linux', '--python', '3.12', '--compressed' # inválido + ) + assert result.exit_code != 0 + + +def test_diff_invalid_version(ddev): + result = ddev( + 'size', + 'diff', + 'commit1', + 'commit2', + '--platform', + 'linux-aarch64', + '--python', + '2.10', # inválido + '--compressed', + ) + assert result.exit_code != 0 + - result = get_diff(size_before, size_after, "Integration") - assert sorted(result, key=lambda x: x["File Path"]) == sorted(expected, key=lambda x: x["File Path"]) +def test_diff_invalid_platform_and_version(ddev): + result = ddev('size', 'diff', 'commit1', 'commit2', '--platform', 'linux', '--python', '2.10', '--compressed') + assert result.exit_code != 0 diff --git a/ddev/tests/cli/size/test_status.py b/ddev/tests/cli/size/test_status.py index fb2436725705d..22031ad0d5e52 100644 --- a/ddev/tests/cli/size/test_status.py +++ b/ddev/tests/cli/size/test_status.py @@ -4,6 +4,8 @@ from unittest.mock import MagicMock, mock_open, patch +import pytest + from ddev.cli.size.status import ( get_compressed_dependencies, get_compressed_files, @@ -11,15 +13,13 @@ def test_get_compressed_files(): - mock_app = MagicMock() - mock_files = [ ("root/integration/datadog_checks", [], ["file1.py", "file2.py"]), ("root/integration_b/datadog_checks", [], ["file3.py"]), ("root", [], ["ignored.py"]), ] - def fake_compress(app, file_path, relative_path): + def fake_compress(file_path): return 1000 fake_gitignore = {"ignored.py"} @@ -35,7 +35,7 @@ def fake_compress(app, file_path, relative_path): patch("ddev.cli.size.status.compress", side_effect=fake_compress), ): - result = get_compressed_files(mock_app) + result = get_compressed_files() expected = [ { @@ -61,7 +61,7 @@ def fake_compress(app, file_path, relative_path): assert result == expected -def test_get_compressed_dependencies(terminal): +def test_get_compressed_dependencies(): platform = "windows-x86_64" version = "3.12" @@ -82,7 +82,7 @@ def test_get_compressed_dependencies(terminal): patch("requests.head", return_value=mock_response), ): - file_data = get_compressed_dependencies(terminal, platform, version) + file_data = get_compressed_dependencies(platform, version) assert file_data == [ {"File Path": "dependency1", "Type": "Dependency", "Name": "dependency1", "Size (Bytes)": 12345}, @@ -90,17 +90,45 @@ def test_get_compressed_dependencies(terminal): ] -def test_status_no_args(ddev): +@pytest.fixture() +def mock_size_status(): + with ( + patch("ddev.cli.size.status.get_gitignore_files", return_value=set()), + patch("ddev.cli.size.status.compress", return_value=1234), + patch("ddev.cli.size.status.get_dependencies", return_value=(["dep1"], {"dep1": "https://example.com/dep1"})), + patch( + "ddev.cli.size.status.get_dependencies_sizes", + return_value=[ + {"File Path": "dep1.whl", "Type": "Dependency", "Name": "dep1", "Size (Bytes)": 5678}, + ], + ), + patch("ddev.cli.size.status.is_valid_integration", return_value=True), + patch("ddev.cli.size.status.is_correct_dependency", return_value=True), + patch("ddev.cli.size.status.print_csv"), + patch("ddev.cli.size.status.print_table"), + patch( + "os.walk", + return_value=[ + ("datadog_checks/my_check", [], ["__init__.py"]), + ], + ), + patch("os.listdir", return_value=["fake_dep.whl"]), + patch("os.path.isfile", return_value=True), + ): + yield + + +def test_status_no_args(ddev, mock_size_status): result = ddev('size', 'status', '--compressed') assert result.exit_code == 0 -def test_status(ddev): +def test_status(ddev, mock_size_status): result = ddev('size', 'status', '--platform', 'linux-aarch64', '--python', '3.12', '--compressed') assert result.exit_code == 0 -def test_status_csv(ddev): +def test_status_csv(ddev, mock_size_status): result = ddev('size', 'status', '--platform', 'linux-aarch64', '--python', '3.12', '--compressed', '--csv') assert result.exit_code == 0 From 5aa1f49c185a126dfd4029a1374e785fdba61cb7 Mon Sep 17 00:00:00 2001 From: Lucia Sanchez Bella Date: Thu, 27 Mar 2025 12:59:40 +0100 Subject: [PATCH 07/70] final diff version --- ddev/tests/cli/size/test_diff.py | 11 +++++------ 1 file changed, 5 insertions(+), 6 deletions(-) diff --git a/ddev/tests/cli/size/test_diff.py b/ddev/tests/cli/size/test_diff.py index 1de5cb1987056..1ba94bfdc6693 100644 --- a/ddev/tests/cli/size/test_diff.py +++ b/ddev/tests/cli/size/test_diff.py @@ -2,8 +2,10 @@ # All rights reserved # Licensed under a 3-clause BSD style license (see LICENSE) -from unittest.mock import MagicMock, mock_open, patch, Mock +from unittest.mock import MagicMock, mock_open, patch + import pytest + from ddev.cli.size.diff import get_compressed_dependencies, get_compressed_files, get_diff @@ -173,15 +175,13 @@ def test_diff_csv(ddev, mock_size_diff_dependencies): -from unittest.mock import patch, MagicMock - def test_diff_no_differences(ddev): fake_repo = MagicMock() - + with ( patch("ddev.cli.size.diff.GitRepo.__enter__", return_value=fake_repo), patch("ddev.cli.size.diff.GitRepo.__exit__", return_value=None), - patch.object(fake_repo, "checkout_commit"), + patch.object(fake_repo, "checkout_commit"), patch("tempfile.mkdtemp", return_value="/tmp/fake_repo"), patch( "ddev.cli.size.diff.get_compressed_files", @@ -208,7 +208,6 @@ def test_diff_no_differences(ddev): assert result.exit_code == 0 - def test_diff_invalid_platform(ddev): result = ddev( 'size', 'diff', 'commit1', 'commit2', '--platform', 'linux', '--python', '3.12', '--compressed' # inválido From c64d2f9745300de9f4b80caf55132ecafbefa364 Mon Sep 17 00:00:00 2001 From: Lucia Sanchez Bella Date: Tue, 15 Apr 2025 09:47:32 +0200 Subject: [PATCH 08/70] Added timeline mode and uncompressed sizes (#5025) --- ddev/src/ddev/cli/size/__init__.py | 3 +- ddev/src/ddev/cli/size/common.py | 142 +++++++++++-- ddev/src/ddev/cli/size/diff.py | 171 +++++++-------- ddev/src/ddev/cli/size/status.py | 65 +++--- ddev/tests/cli/size/test_common.py | 32 ++- ddev/tests/cli/size/test_diff.py | 8 +- ddev/tests/cli/size/test_status.py | 5 +- ddev/tests/cli/size/test_timeline.py | 306 +++++++++++++++++++++++++++ 8 files changed, 595 insertions(+), 137 deletions(-) create mode 100644 ddev/tests/cli/size/test_timeline.py diff --git a/ddev/src/ddev/cli/size/__init__.py b/ddev/src/ddev/cli/size/__init__.py index 6f3878b2fda35..dc0a07beb809b 100644 --- a/ddev/src/ddev/cli/size/__init__.py +++ b/ddev/src/ddev/cli/size/__init__.py @@ -6,7 +6,7 @@ from ddev.cli.size.diff import diff from ddev.cli.size.status import status - +from ddev.cli.size.timeline import timeline @click.group(short_help='Get the size of integrations and dependencies by platform and python version') def size(): @@ -16,6 +16,7 @@ def size(): size.add_command(status) size.add_command(diff) +size.add_command(timeline) if __name__ == "__main__": size() diff --git a/ddev/src/ddev/cli/size/common.py b/ddev/src/ddev/cli/size/common.py index 983bd443ea016..4c2ad720edb7b 100644 --- a/ddev/src/ddev/cli/size/common.py +++ b/ddev/src/ddev/cli/size/common.py @@ -4,13 +4,28 @@ import os import re import zlib - +import shutil +import subprocess +import tempfile import requests - +from pathlib import Path +import zipfile + + +def valid_platforms_versions(repo_path): + resolved_path = os.path.join(repo_path, ".deps/resolved") + platforms = [] + versions = [] + for file in os.listdir(resolved_path): + platforms.append("_".join(file.split('_')[:-1])) + match = re.search(r"\d+\.\d+", file) + if match: + versions.append(match.group()) + return set(platforms), set(versions) + # mirar si existe def convert_size(size_bytes): - # Transforms bytes into a human-friendly format (KB, MB, GB) for unit in [' B', ' KB', ' MB', ' GB']: if size_bytes < 1024: return str(round(size_bytes, 2)) + unit @@ -40,8 +55,8 @@ def is_correct_dependency(platform, version, name): def print_csv(app, i, modules): - headers = [k for k in modules[0].keys() if k != 'Size'] - if i == 0: + headers = [k for k in modules[0].keys() if k not in ['Size', 'Delta']] + if not i: app.display(",".join(headers)) for row in modules: @@ -55,27 +70,45 @@ def format(s): return s -def print_table(app, modules, platform, version): - modules_table = {col: {} for col in modules[0].keys() if col != 'Size (Bytes)'} +def print_table(app, mode, modules): + modules_table = {col: {} for col in modules[0].keys() if '(Bytes)' not in col} for i, row in enumerate(modules): for key, value in row.items(): if key in modules_table: modules_table[key][i] = str(value) - app.display_table(platform + " " + version, modules_table) + app.display_table(mode, modules_table) -def get_dependencies_sizes(deps, download_urls): +def get_dependencies_sizes(deps, download_urls, compressed): file_data = [] for dep, url in zip(deps, download_urls, strict=False): - dep_response = requests.head(url) - dep_response.raise_for_status() - size = dep_response.headers.get("Content-Length", None) + if compressed: + with requests.get(url, stream=True) as response: + response.raise_for_status() + size = int(response.headers.get("Content-Length")) + else: + with requests.get(url, stream=True) as response: + response.raise_for_status() + wheel_data = response.content + + with tempfile.TemporaryDirectory() as tmpdir: + wheel_path = Path(tmpdir) / "package.whl" + with open(wheel_path, "wb") as f: + f.write(wheel_data) + extract_path = Path(tmpdir) / "extracted" + with zipfile.ZipFile(wheel_path, 'r') as zip_ref: + zip_ref.extractall(extract_path) + + size = 0 + for dirpath, _, filenames in os.walk(extract_path): + for name in filenames: + file_path = os.path.join(dirpath, name) + size += os.path.getsize(file_path) file_data.append({"File Path": dep, "Type": "Dependency", "Name": dep, "Size (Bytes)": int(size)}) - return file_data -def get_dependencies(file_path): +def get_dependencies_list(file_path): download_urls = [] deps = [] with open(file_path, "r", encoding="utf-8") as file: @@ -91,24 +124,34 @@ def get_dependencies(file_path): return deps, download_urls -def group_modules(modules, platform, version): +def group_modules(modules, platform, version, i): grouped_aux = {} for file in modules: key = (file['Name'], file['Type']) grouped_aux[key] = grouped_aux.get(key, 0) + file["Size (Bytes)"] - - return [ + if i is None: + return [ { 'Name': name, 'Type': type, 'Size (Bytes)': size, - 'Size': convert_size(size), - 'Platform': platform, - 'Version': version, + 'Size': convert_size(size) } for (name, type), size in grouped_aux.items() - ] + ] + else: + return [ + { + 'Name': name, + 'Type': type, + 'Size (Bytes)': size, + 'Size': convert_size(size), + 'Platform': platform, + 'Version': version, + } + for (name, type), size in grouped_aux.items() + ] def get_gitignore_files(repo_path): @@ -132,7 +175,62 @@ def compress(file_path): compressed_size += len(compressor.flush()) return compressed_size - class WrongDependencyFormat(Exception): def __init__(self, mensaje): super().__init__(mensaje) + +class GitRepo: + def __init__(self, url): + self.url = url + self.repo_dir = None + + def __enter__(self): + self.repo_dir = tempfile.mkdtemp() + try: + self._run("git status") + except Exception: + # If it is not already a repo + self._run(f"git clone --quiet {self.url} {self.repo_dir}") + return self + + def _run(self, command): + result = subprocess.run(command, shell=True, capture_output=True, text=True, check=True, cwd=self.repo_dir) + return result.stdout.strip().split('\n') + + def get_module_commits(self, module_path, initial, final, time): + self._run("git fetch origin --quiet") # 1 min no coger todo solo el module + self._run("git checkout origin/HEAD") + if time: + return self._run(f'git log --since="{time}" --reverse --pretty=format:%H -- {module_path}') + elif not initial and not final: + return self._run(f"git log --reverse --pretty=format:%H -- {module_path}") + elif not final: + return self._run(f"git log --reverse --pretty=format:%H {initial}..HEAD -- {module_path}") + else: + try: + self._run(f"git merge-base --is-ancestor {initial} {final}") + except subprocess.CalledProcessError: + raise ValueError(f"Commit {initial} does not come before {final}") + return self._run(f"git log --reverse --pretty=format:%H {initial}..{final} -- {module_path}") + + + def checkout_commit(self, commit): + self._run(f"git fetch --quiet --depth 1 origin {commit}") + self._run(f"git checkout --quiet {commit}") + + def sparse_checkout_commit(self, commit_sha, module): + self._run("git sparse-checkout init --cone") + self._run(f"git sparse-checkout set {module}") + self._run(f"git checkout {commit_sha}") + + def get_commit_metadata(self,commit): + result = self._run(f'git log -1 --date=format:"%b %d %Y" --pretty=format:"%ad\n%an\n%s" {commit}') + date, author, message = result + return date, author, message + + def get_creation_commit_module(self, module): + return self._run(f'git log --reverse --format="%H" -- {module}')[0] + + def __exit__(self, exception_type, exception_value, exception_traceback): + if self.repo_dir and os.path.exists(self.repo_dir): + shutil.rmtree(self.repo_dir) diff --git a/ddev/src/ddev/cli/size/diff.py b/ddev/src/ddev/cli/size/diff.py index 8ce12e8ea8ae2..dd3ce4ab624df 100644 --- a/ddev/src/ddev/cli/size/diff.py +++ b/ddev/src/ddev/cli/size/diff.py @@ -3,80 +3,89 @@ # Licensed under a 3-clause BSD style license (see LICENSE) import os -import shutil -import subprocess -import tempfile from pathlib import Path - +from rich.console import Console import click import requests - +import tempfile +import zipfile from .common import ( compress, - get_dependencies, + valid_platforms_versions, + get_dependencies_list, get_gitignore_files, group_modules, is_correct_dependency, is_valid_integration, print_csv, print_table, + GitRepo ) -VALID_PLATFORMS = ["linux-aarch64", "linux-x86_64", "macos-x86_64", "windows-x86_64"] -VALID_PYTHON_VERSIONS = ["3.12"] +# VALID_PLATFORMS, VALID_PYTHON_VERSIONS = valid_platforms_versions() +console = Console() @click.command() @click.argument("before") @click.argument("after") -@click.option('--platform', type=click.Choice(VALID_PLATFORMS), help="Target platform") -@click.option('--python', 'version', type=click.Choice(VALID_PYTHON_VERSIONS), help="Python version (MAJOR.MINOR)") +@click.option('--platform', help="Target platform") +@click.option('--python', 'version', help="Python version (MAJOR.MINOR)") @click.option('--compressed', is_flag=True, help="Measure compressed size") @click.option('--csv', is_flag=True, help="Output in CSV format") @click.pass_obj def diff(app, before, after, platform, version, compressed, csv): - try: - platforms = VALID_PLATFORMS if platform is None else [platform] - versions = VALID_PYTHON_VERSIONS if version is None else [version] - - for i, (plat, ver) in enumerate([(p, v) for p in platforms for v in versions]): - diff_mode(app, before, after, plat, ver, compressed, csv, i) - except Exception as e: - app.abort(str(e)) - - -def diff_mode(app, before, after, platform, version, compressed, csv, i): - url = "https://github.com/DataDog/integrations-core.git" - if compressed: - files_b, dependencies_b, files_a, dependencies_a = get_repo_info(url, platform, version, before, after) - - integrations = get_diff(files_b, files_a, 'Integration') - dependencies = get_diff(dependencies_b, dependencies_a, 'Dependency') - grouped_modules = group_modules(integrations + dependencies, platform, version) - grouped_modules.sort(key=lambda x: abs(x['Size (Bytes)']), reverse=True) - for module in grouped_modules: - if module['Size (Bytes)'] > 0: - module['Size'] = f"+{module['Size']}" - if grouped_modules == []: - app.display("No size differences were detected between the selected commits.") - else: - if csv: - print_csv(app, i, grouped_modules) + repo_url = app.repo.path + with GitRepo(repo_url) as gitRepo: + try: + valid_platforms,valid_versions = valid_platforms_versions(gitRepo.repo_dir) + if platform and platform not in valid_platforms: + raise ValueError(f"Invalid platform: {platform}") + elif version and version not in valid_versions: + raise ValueError(f"Invalid version: {version}") + if platform is None or version is None: + platforms = valid_platforms if platform is None else [platform] + versions = valid_versions if version is None else [version] + + for i, (plat, ver) in enumerate([(p, v) for p in platforms for v in versions]): + diff_mode(app, gitRepo, before, after, plat, ver, compressed, csv, i) else: - print_table(app, grouped_modules, platform, version) - + diff_mode(app, gitRepo, before, after, platform, version, compressed, csv, None) + + except Exception as e: + app.abort(str(e)) + + +def diff_mode(app, gitRepo, before, after, platform, version, compressed, csv, i): + files_b, dependencies_b, files_a, dependencies_a = get_repo_info(gitRepo, platform, version, before, after, compressed) + + integrations = get_diff(files_b, files_a, 'Integration') + dependencies = get_diff(dependencies_b, dependencies_a, 'Dependency') + grouped_modules = group_modules(integrations + dependencies, platform, version, i) + grouped_modules.sort(key=lambda x: abs(x['Size (Bytes)']), reverse=True) + for module in grouped_modules: + if module['Size (Bytes)'] > 0: + module['Size'] = f"+{module['Size']}" + if grouped_modules == []: + app.display("No size differences were detected between the selected commits.") + else: + if csv: + print_csv(app, i, grouped_modules) + else: + print_table(app, "Diff", grouped_modules) -def get_repo_info(repo_url, platform, version, before, after): - with GitRepo(repo_url) as gitRepo: - repo = gitRepo.repo_dir +def get_repo_info(gitRepo, platform, version, before, after, compressed): + repo = gitRepo.repo_dir + with console.status("[cyan]Calculating compressed sizes for the first commit...", spinner="dots"): gitRepo.checkout_commit(before) - files_b = get_compressed_files(repo) - dependencies_b = get_compressed_dependencies(repo, platform, version) + files_b = get_files(repo, compressed) + dependencies_b = get_dependencies(repo, platform, version, compressed) + with console.status("[cyan]Calculating compressed sizes for the second commit...", spinner="dots"): gitRepo.checkout_commit(after) - files_a = get_compressed_files(repo) - dependencies_a = get_compressed_dependencies(repo, platform, version) + files_a = get_files(repo, compressed) + dependencies_a = get_dependencies(repo, platform, version, compressed) return files_b, dependencies_b, files_a, dependencies_a @@ -122,7 +131,7 @@ def get_diff(size_before, size_after, type): return diff_files -def get_compressed_files(repo_path): +def get_files(repo_path, compressed): ignored_files = {"datadog_checks_dev", "datadog_checks_tests_helper"} git_ignore = get_gitignore_files(repo_path) @@ -138,12 +147,12 @@ def get_compressed_files(repo_path): # Filter files if is_valid_integration(relative_path, included_folder, ignored_files, git_ignore): - compressed_size = compress(file_path) - file_data[relative_path] = compressed_size + size = compress(file_path) if compressed else os.path.getsize(file_path) + file_data[relative_path] = size return file_data -def get_compressed_dependencies(repo_path, platform, version): +def get_dependencies(repo_path, platform, version, compressed): resolved_path = os.path.join(repo_path, ".deps/resolved") @@ -151,45 +160,37 @@ def get_compressed_dependencies(repo_path, platform, version): file_path = os.path.join(resolved_path, filename) if os.path.isfile(file_path) and is_correct_dependency(platform, version, filename): - deps, download_urls = get_dependencies(file_path) - return get_dependencies_sizes(deps, download_urls) + deps, download_urls = get_dependencies_list(file_path) + return get_dependencies_sizes(deps, download_urls, compressed) return {} -def get_dependencies_sizes(deps, download_urls): +def get_dependencies_sizes(deps, download_urls, compressed): file_data = {} - for dep, url in zip(deps, download_urls, strict=False): - dep_response = requests.head(url) - dep_response.raise_for_status() - size = dep_response.headers.get("Content-Length", None) - file_data[dep] = int(size) - + for dep, url in zip(deps, download_urls): + if compressed: + with requests.get(url, stream=True) as response: + response.raise_for_status() + size = int(response.headers.get("Content-Length")) + else: + with requests.get(url, stream=True) as response: + response.raise_for_status() + wheel_data = response.content + + with tempfile.TemporaryDirectory() as tmpdir: + wheel_path = Path(tmpdir) / "package.whl" + with open(wheel_path, "wb") as f: + f.write(wheel_data) + extract_path = Path(tmpdir) / "extracted" + with zipfile.ZipFile(wheel_path, 'r') as zip_ref: + zip_ref.extractall(extract_path) + + size = 0 + for dirpath, _, filenames in os.walk(extract_path): + for name in filenames: + file_path = os.path.join(dirpath, name) + size += os.path.getsize(file_path) + file_data[dep] = size return file_data -class GitRepo: - def __init__(self, url): - self.url = url - self.repo_dir = None - - def __enter__(self): - self.repo_dir = tempfile.mkdtemp() - self._run("git init --quiet") - self._run(f"git remote add origin {self.url}") - return self - - def _run(self, cmd): - subprocess.run( - cmd, - shell=True, - cwd=self.repo_dir, - check=True, - ) - - def checkout_commit(self, commit): - self._run(f"git fetch --quiet --depth 1 origin {commit}") - self._run(f"git checkout --quiet {commit}") - - def __exit__(self, exception_type, exception_value, exception_traceback): - if self.repo_dir and os.path.exists(self.repo_dir): - shutil.rmtree(self.repo_dir) diff --git a/ddev/src/ddev/cli/size/status.py b/ddev/src/ddev/cli/size/status.py index f6d44eb5068bd..252e682210e6c 100644 --- a/ddev/src/ddev/cli/size/status.py +++ b/ddev/src/ddev/cli/size/status.py @@ -4,12 +4,12 @@ import os from pathlib import Path - +from rich.console import Console import click from .common import ( compress, - get_dependencies, + get_dependencies_list, get_dependencies_sizes, get_gitignore_files, group_modules, @@ -17,44 +17,56 @@ is_valid_integration, print_csv, print_table, + valid_platforms_versions ) -VALID_PLATFORMS = ["linux-aarch64", "linux-x86_64", "macos-x86_64", "windows-x86_64"] -VALID_PYTHON_VERSIONS = ["3.12"] +#VALID_PLATFORMS = ["linux-aarch64", "linux-x86_64", "macos-x86_64", "windows-x86_64"] + + REPO_PATH = Path(__file__).resolve().parents[5] +# VALID_PLATFORMS, VALID_PYTHON_VERSIONS = valid_platforms_versions() +console = Console() @click.command() -@click.option('--platform', type=click.Choice(VALID_PLATFORMS), help="Target platform") -@click.option('--python', 'version', type=click.Choice(VALID_PYTHON_VERSIONS), help="Python version (MAJOR.MINOR)") +@click.option('--platform', help="Target platform") +@click.option('--python', 'version', help="Python version (MAJOR.MINOR)") @click.option('--compressed', is_flag=True, help="Measure compressed size") @click.option('--csv', is_flag=True, help="Output in CSV format") @click.pass_obj def status(app, platform, version, compressed, csv): try: - platforms = VALID_PLATFORMS if platform is None else [platform] - versions = VALID_PYTHON_VERSIONS if version is None else [version] - - for i, (plat, ver) in enumerate([(p, v) for p in platforms for v in versions]): - status_mode(app, plat, ver, compressed, csv, i) + repo_path = app.repo.path + valid_platforms,valid_versions = valid_platforms_versions(repo_path) + if platform and platform not in valid_platforms: + raise ValueError(f"Invalid platform: {platform}") + elif version and version not in valid_versions: + raise ValueError(f"Invalid version: {version}") + if platform is None or version is None: + platforms = valid_platforms if platform is None else [platform] + versions = valid_versions if version is None else [version] + for i, (plat, ver) in enumerate([(p, v) for p in platforms for v in versions]): + status_mode(app, plat, ver, compressed, csv, i) + else: + status_mode(app, platform, version, compressed, csv, None) + except Exception as e: app.abort(str(e)) def status_mode(app, platform, version, compressed, csv, i): - if compressed: - modules = get_compressed_files() + get_compressed_dependencies(platform, version) + with console.status("[cyan]Calculating sizes...", spinner="dots"): + modules = get_files(compressed) + get_dependencies(platform, version,compressed) + grouped_modules = group_modules(modules, platform, version, i) + grouped_modules.sort(key=lambda x: x['Size (Bytes)'], reverse=True) - grouped_modules = group_modules(modules, platform, version) - grouped_modules.sort(key=lambda x: x['Size (Bytes)'], reverse=True) - - if csv: - print_csv(app, i, grouped_modules) - else: - print_table(app, grouped_modules, platform, version) + if csv: + print_csv(app, i, grouped_modules) + else: + print_table(app, "STATUS", grouped_modules) -def get_compressed_files(): +def get_files(compressed): ignored_files = {"datadog_checks_dev", "datadog_checks_tests_helper"} git_ignore = get_gitignore_files(REPO_PATH) @@ -70,24 +82,27 @@ def get_compressed_files(): # Filter files if is_valid_integration(relative_path, included_folder, ignored_files, git_ignore): - compressed_size = compress(file_path) + size = compress(file_path) if compressed else os.path.getsize(file_path) integration = relative_path.split(os.sep)[0] file_data.append( { "File Path": relative_path, "Type": "Integration", "Name": integration, - "Size (Bytes)": compressed_size, + "Size (Bytes)": size, } ) return file_data -def get_compressed_dependencies(platform, version): +def get_dependencies(platform, version): resolved_path = os.path.join(REPO_PATH, ".deps/resolved") for filename in os.listdir(resolved_path): file_path = os.path.join(resolved_path, filename) if os.path.isfile(file_path) and is_correct_dependency(platform, version, filename): - deps, download_urls = get_dependencies(file_path) + deps, download_urls = get_dependencies_list(file_path) return get_dependencies_sizes(deps, download_urls) + + + diff --git a/ddev/tests/cli/size/test_common.py b/ddev/tests/cli/size/test_common.py index 839ef2f9b7627..f3c8565ac5e07 100644 --- a/ddev/tests/cli/size/test_common.py +++ b/ddev/tests/cli/size/test_common.py @@ -10,9 +10,39 @@ is_correct_dependency, is_valid_integration, print_csv, + valid_platforms_versions ) +def test_valid_platforms_versions(): + filenames = [ + "linux-aarch64_3.12.txt", + "linux-aarch64_py2.txt", + "linux-aarch64_py3.txt", + "linux-x86_64_3.12.txt", + "linux-x86_64_py2.txt", + "linux-x86_64_py3.txt", + "macos-x86_64_3.12.txt", + "macos-x86_64_py2.txt", + "macos-x86_64_py3.txt", + "windows-x86_64_3.12.txt", + "windows-x86_64_py2.txt", + "windows-x86_64_py3.txt" + ] + + expected_platforms = { + "linux-aarch64", + "linux-x86_64", + "macos-x86_64", + "windows-x86_64" + } + expected_versions = {"3.12"} + with patch("os.listdir", return_value=filenames): + platforms, versions = valid_platforms_versions("/tmp/fake_repo") + assert platforms == expected_platforms + assert versions == expected_versions + + def test_is_correct_dependency(): assert is_correct_dependency("windows-x86_64", "3.12", "windows-x86_64-3.12") assert not is_correct_dependency("windows-x86_64", "3.12", "linux-x86_64-3.12") @@ -97,7 +127,7 @@ def test_group_modules(): }, ] - assert group_modules(modules, platform, version) == expected_output + assert group_modules(modules, platform, version,0) == expected_output def test_get_gitignore_files(): diff --git a/ddev/tests/cli/size/test_diff.py b/ddev/tests/cli/size/test_diff.py index 1ba94bfdc6693..cd6027bc6914a 100644 --- a/ddev/tests/cli/size/test_diff.py +++ b/ddev/tests/cli/size/test_diff.py @@ -138,6 +138,7 @@ def get_compressed_dependencies_side_effect(_, __, ___): get_compressed_dependencies_side_effect.counter = 0 with ( + patch("ddev.cli.size.diff.valid_platforms_versions", return_value=({'linux-x86_64', 'macos-x86_64', 'linux-aarch64', 'windows-x86_64'}, {'3.12'})), patch("ddev.cli.size.diff.GitRepo.__enter__", return_value=mock_git_repo), patch("ddev.cli.size.diff.GitRepo.__exit__", return_value=None), patch("ddev.cli.size.diff.GitRepo.checkout_commit"), @@ -174,15 +175,20 @@ def test_diff_csv(ddev, mock_size_diff_dependencies): - def test_diff_no_differences(ddev): fake_repo = MagicMock() + fake_repo.repo_dir = "/tmp/fake_repo" with ( patch("ddev.cli.size.diff.GitRepo.__enter__", return_value=fake_repo), + patch("ddev.cli.size.diff.valid_platforms_versions", return_value=({'linux-x86_64', 'macos-x86_64', 'linux-aarch64', 'windows-x86_64'}, {'3.12'})), patch("ddev.cli.size.diff.GitRepo.__exit__", return_value=None), patch.object(fake_repo, "checkout_commit"), patch("tempfile.mkdtemp", return_value="/tmp/fake_repo"), + patch("os.path.exists", return_value=True), + patch("os.path.isdir", return_value=True), + patch("os.path.isfile", return_value=True), + patch("os.listdir", return_value=["linux-aarch64_3.12"]), patch( "ddev.cli.size.diff.get_compressed_files", return_value={ diff --git a/ddev/tests/cli/size/test_status.py b/ddev/tests/cli/size/test_status.py index 22031ad0d5e52..f4500c228600a 100644 --- a/ddev/tests/cli/size/test_status.py +++ b/ddev/tests/cli/size/test_status.py @@ -12,7 +12,7 @@ ) -def test_get_compressed_files(): +def test_get_files_compressed(): mock_files = [ ("root/integration/datadog_checks", [], ["file1.py", "file2.py"]), ("root/integration_b/datadog_checks", [], ["file3.py"]), @@ -35,7 +35,7 @@ def fake_compress(file_path): patch("ddev.cli.size.status.compress", side_effect=fake_compress), ): - result = get_compressed_files() + result = get_files(True) expected = [ { @@ -93,6 +93,7 @@ def test_get_compressed_dependencies(): @pytest.fixture() def mock_size_status(): with ( + patch("ddev.cli.size.status.valid_platforms_versions", return_value=({'linux-x86_64', 'macos-x86_64', 'linux-aarch64', 'windows-x86_64'}, {'3.12'})), patch("ddev.cli.size.status.get_gitignore_files", return_value=set()), patch("ddev.cli.size.status.compress", return_value=1234), patch("ddev.cli.size.status.get_dependencies", return_value=(["dep1"], {"dep1": "https://example.com/dep1"})), diff --git a/ddev/tests/cli/size/test_timeline.py b/ddev/tests/cli/size/test_timeline.py new file mode 100644 index 0000000000000..60e92b4f6c96a --- /dev/null +++ b/ddev/tests/cli/size/test_timeline.py @@ -0,0 +1,306 @@ +import pytest +from unittest.mock import MagicMock, patch, mock_open +from ddev.cli.size.timeline import ( + get_version, + format_commit_data, + trim_modules, + group_modules, + get_dependency_size, + get_dependency, + get_compressed_dependencies, + get_compressed_files, + module_exists +) +from datetime import datetime + + +def test_get_compressed_files(): + with ( + patch("os.walk", return_value=[("/tmp/fake_repo/int1", [], ["int1.py"])]), + patch("os.path.relpath", return_value="int1/int1.py"), + patch("os.path.exists", return_value=True), + patch("ddev.cli.size.timeline.get_gitignore_files", return_value=set()), + patch("ddev.cli.size.timeline.is_valid_integration", return_value=True), + patch("ddev.cli.size.timeline.compress", return_value=1234), + ): + result = get_compressed_files( + "/tmp/fake_repo", + "int1", + "abc1234", + datetime(2025, 4, 4).date(), + "auth", + "Added int1", + [] + ) + assert result == [ + { + "Size (Bytes)": 1234, + "Date": datetime(2025, 4, 4).date(), + "Author": "auth", + "Commit Message": "Added int1", + "Commit SHA": "abc1234" + } + ] + +def test_get_compressed_files_deleted_only(): + repo_path = "/tmp/fake_repo" + module = "foo" + commit = "abc1234" + date = datetime.strptime("Apr 5 2025", "%b %d %Y").date() + author = "Author" + message = "deleted module" + + with ( + patch("ddev.cli.size.timeline.get_gitignore_files", return_value=set()), + patch("os.walk", return_value=[]), + patch("os.path.relpath", side_effect=lambda path, _: path.replace(f"{repo_path}/", "")), + patch("os.path.exists", return_value=False), + ): + file_data = get_compressed_files(repo_path, module, commit, date, author, message, []) + + assert file_data == [ + { + "Size (Bytes)": 0, + "Date": date, + "Author": author, + "Commit Message": "(DELETED) " + message, + "Commit SHA": commit, + } + ] + + +def test_get_version(): + files = ["linux-x86_64_3.12.txt", "linux-x86_64_3.10.txt"] + version = get_version(files, "linux-x86_64") + assert version == "3.12" + +def test_format_commit_data(): + date, message, commit = format_commit_data("Apr 4 2025", "this is a very long commit message that should be trimmed (#1234)", "abc1234def", "abc1234def") + expected_date = datetime.strptime("Apr 4 2025", "%b %d %Y").date() + expected_message = "(NEW) this is a very long...(#1234)" + expected_commit = "abc1234" + assert date == expected_date + assert message == expected_message + assert commit == expected_commit + +def test_trim_modules_keep_some_remove_some(): + modules = [ + {"Size (Bytes)": 1000}, + {"Size (Bytes)": 1100}, # diff = 100 -> should be removed if threshold = 200 + {"Size (Bytes)": 1400}, # diff = 300 -> should be kept + ] + expected = [ + {"Size (Bytes)": 1000, "Delta (Bytes)": 0, "Delta": " "}, + {"Size (Bytes)": 1400, "Delta (Bytes)": 300, "Delta": "300 B"}, + ] + trimmed = trim_modules(modules, threshold=200) + assert trimmed == expected + + +def test_group_modules(): + modules = [ + {"Size (Bytes)": 1000, "Date": datetime(2025, 4, 4).date(), "Author": "A", "Commit Message": "msg", "Commit SHA": "c1"}, + {"Size (Bytes)": 500, "Date": datetime(2025, 4, 4).date(), "Author": "A", "Commit Message": "msg", "Commit SHA": "c1"}, + {"Size (Bytes)": 1500, "Date": datetime(2025, 4, 5).date(), "Author": "A", "Commit Message": "msg2", "Commit SHA": "c2"}, + ] + expected = [ + { + "Commit SHA": "c1", + "Size (Bytes)": 1500, + "Size": "1.46 KB", + "Delta (Bytes)": "N/A", + "Delta": "N/A", + "Date": datetime(2025, 4, 4).date(), + "Author": "A", + "Commit Message": "msg", + "Platform": "linux-x86_64", + }, + { + "Commit SHA": "c2", + "Size (Bytes)": 1500, + "Size": "1.46 KB", + "Delta (Bytes)": "N/A", + "Delta": "N/A", + "Date": datetime(2025, 4, 5).date(), + "Author": "A", + "Commit Message": "msg2", + "Platform": "linux-x86_64", + }, + ] + grouped = group_modules(modules, "linux-x86_64", 0) + assert grouped == expected + + +def test_get_dependency(): + content = """dep1 @ https://example.com/dep1.whl +dep2 @ https://example.com/dep2.whl""" + with patch("builtins.open", mock_open(read_data=content)): + url = get_dependency("some/path/file.txt", "dep2") + assert url == "https://example.com/dep2.whl" + +def make_mock_response(size): + mock_response = MagicMock() + mock_response.__enter__.return_value = mock_response + mock_response.headers = {"Content-Length": size} + mock_response.raise_for_status = lambda: None + return mock_response + +def test_get_dependency_size(): + mock_response = make_mock_response("45678") + with patch("requests.get", return_value=mock_response): + info = get_dependency_size("https://example.com/file.whl", "abc1234", datetime(2025, 4, 4).date(), "auth", "Fixed bug") + assert info == { + "Size (Bytes)": 45678, + "Date": datetime(2025, 4, 4).date(), + "Author": "auth", + "Commit Message": "Fixed bug", + "Commit SHA": "abc1234", + } + +def test_get_compressed_dependencies(): + with ( + patch("os.path.exists", return_value=True), + patch("os.path.isdir", return_value=True), + patch("os.path.isfile", return_value=True), + patch("os.listdir", return_value=["linux-x86_64_3.12.txt"]), + patch("ddev.cli.size.timeline.get_dependency", return_value="https://example.com/dep1.whl"), + patch("ddev.cli.size.timeline.requests.get", return_value=make_mock_response("12345")), + ): + result = get_compressed_dependencies( + "/tmp/fake_repo", + "dep1", + "linux-x86_64", + "abc1234", + datetime(2025, 4, 4).date(), + "auth", + "Added dep1" + ) + assert result == { + "Size (Bytes)": 12345, + "Date": datetime(2025, 4, 4).date(), + "Author": "auth", + "Commit Message": "Added dep1", + "Commit SHA": "abc1234" + } + +def test_get_dependency_size(): + with patch("requests.get", return_value=make_mock_response("45678")): + result = get_dependency_size( + "https://example.com/dep1.whl", + "abc1234", + datetime(2025, 4, 4).date(), + "auth", + "Fixed bug" + ) + assert result == { + "Size (Bytes)": 45678, + "Date": datetime(2025, 4, 4).date(), + "Author": "auth", + "Commit Message": "Fixed bug", + "Commit SHA": "abc1234" + } + + +@pytest.fixture +def mock_timeline_gitrepo(): + mock_git_repo = MagicMock() + mock_git_repo.repo_dir = "/tmp/fake_repo" + mock_git_repo.get_module_commits.return_value = ["commit1", "commit2"] + mock_git_repo.get_creation_commit_module.return_value = "commit1" + mock_git_repo.get_commit_metadata.side_effect = lambda c: ("Apr 4 2025", "Initial commit", c) + + with ( + patch("ddev.cli.size.timeline.GitRepo.__enter__", return_value=mock_git_repo), + patch("ddev.cli.size.timeline.GitRepo.__exit__", return_value=None), + patch("ddev.cli.size.timeline.GitRepo.sparse_checkout_commit"), + patch("ddev.cli.size.timeline.get_gitignore_files", return_value=set()), + patch("ddev.cli.size.timeline.compress", return_value=1234), + patch("os.walk", return_value=[("/tmp/fake_repo/int", [], ["file1.py"])]), + patch("os.path.exists", return_value=True), + patch("ddev.cli.size.timeline.group_modules", side_effect=lambda m, *_: m), + patch("ddev.cli.size.timeline.trim_modules", side_effect=lambda m, *_: m), + patch("ddev.cli.size.timeline.print_table"), + patch("ddev.cli.size.timeline.print_csv"), + patch("os.path.exists", return_value=True), + patch("os.path.isdir", return_value=True), + patch("os.path.isfile", return_value=True), + patch("os.listdir", return_value=[ + "linux-x86_64_3.12_dep1.whl", + "linux-x86_64_3.12_dep2.whl" + ]), + ): + yield + +@pytest.fixture +def app(): + mock_app = MagicMock() + mock_app.repo.path = "/tmp/fake_repo" + return mock_app + +def test_timeline_integration_compressed(ddev, mock_timeline_gitrepo, app): + result = ddev("size", "timeline", "integration", "int1", "commit1", "commit2", "--compressed", obj=app) + assert result.exit_code == 0 + +@pytest.fixture +def mock_timeline_dependencies(): + mock_git_repo = MagicMock() + mock_git_repo.repo_dir = "/tmp/fake_repo" + mock_git_repo.get_module_commits.return_value = ["commit1", "commit2"] + mock_git_repo.get_commit_metadata.side_effect = lambda c: ("Apr 4 2025", "Fix dep", c) + + with ( + patch("ddev.cli.size.timeline.GitRepo.__enter__", return_value=mock_git_repo), + patch("ddev.cli.size.timeline.GitRepo.__exit__", return_value=None), + patch("ddev.cli.size.timeline.GitRepo.sparse_checkout_commit"), + patch("ddev.cli.size.timeline.valid_platforms_versions", return_value=({'linux-x86_64', 'macos-x86_64', 'linux-aarch64', 'windows-x86_64'}, {'3.12'})), + patch("os.path.exists", return_value=True), + patch("os.path.isdir", return_value=True), + patch("os.listdir", return_value=["linux-x86_64-3.12"]), + patch("os.path.isfile", return_value=True), + patch("ddev.cli.size.timeline.get_gitignore_files", return_value=set()), + patch("ddev.cli.size.timeline.get_dependency", return_value="https://example.com/dep1.whl"), + patch("ddev.cli.size.timeline.requests.get") as mock_get, + patch("ddev.cli.size.timeline.group_modules", side_effect=lambda m, *_: m), + patch("ddev.cli.size.timeline.trim_modules", side_effect=lambda m, *_: m), + patch("ddev.cli.size.timeline.print_table"), + ): + mock_response = MagicMock() + mock_response.__enter__.return_value = mock_response + mock_response.headers = {"Content-Length": "1024"} + mock_response.raise_for_status = lambda: None + mock_get.return_value = mock_response + + yield + +def test_timeline_dependency_compressed(ddev, mock_timeline_dependencies, app): + result = ddev( + "size", "timeline", "dependency", "dep1", "commit1", "commit2", + "--compressed", "--platform", "linux-x86_64", + obj=app, + ) + + assert result.exit_code == 0 + + +def test_timeline_invalid_platform(ddev): + result = ddev( + "size", "timeline", "dependency", "dep1", "commit1", "commit2", + "--compressed", "--platform", "invalid-platform" + ) + assert result.exit_code != 0 + + + + +def test_timeline_no_changes_in_integration(ddev): + mock_git_repo = MagicMock() + mock_git_repo.repo_dir = "/tmp/fake_repo" + mock_git_repo.get_module_commits.return_value = [""] + + with ( + patch("ddev.cli.size.timeline.GitRepo.__enter__", return_value=mock_git_repo), + patch("ddev.cli.size.timeline.GitRepo.__exit__", return_value=None), + ): + result = ddev("size", "timeline", "integration", "integration/foo", "commit1", "commit2", "--compressed") + assert result.exit_code != 0 + assert "No changes found" in result.output From 846886dc0028f9790b02f13b8f369a0a4107f543 Mon Sep 17 00:00:00 2001 From: Enrico Donnici Date: Tue, 15 Apr 2025 11:55:45 +0200 Subject: [PATCH 09/70] Test ddev size status in GHA (by hijacking the slapr workflow) --- .github/workflows/slapr.yml | 73 +++++++++++++++++++++++++------------ 1 file changed, 49 insertions(+), 24 deletions(-) diff --git a/.github/workflows/slapr.yml b/.github/workflows/slapr.yml index 98ee0448e5614..3fbefcc73aa2e 100644 --- a/.github/workflows/slapr.yml +++ b/.github/workflows/slapr.yml @@ -1,30 +1,55 @@ -# https://github.com/DataDog/slapr +name: Measure Disk Usage -name: Slack emoji PR updates on: - pull_request_review: - types: [submitted] pull_request: - types: [closed] + branches: + - master +env: + PYTHON_VERSION: "3.12" jobs: - run_slapr_agent_integrations: - runs-on: ubuntu-latest - strategy: - matrix: - slack_channel_variable: - - SLACK_CHANNEL_ID - - SLACK_CHANNEL_ID_AGENT_INTEGRATIONS_REVIEWS - - SLACK_CHANNEL_ID_INFRA_INTEGRATIONS + measure-disk-usage: + runs-on: ubuntu-22.04 steps: - - uses: DataDog/slapr@master - env: - GITHUB_TOKEN: "${{ secrets.GITHUB_TOKEN }}" - SLACK_CHANNEL_ID: "${{ secrets[matrix.slack_channel_variable] }}" - SLACK_API_TOKEN: "${{ secrets.SLACK_API_TOKEN }}" - SLAPR_BOT_USER_ID: "${{ secrets.SLAPR_BOT_USER_ID }}" - SLAPR_EMOJI_REVIEW_STARTED: "review_started" - SLAPR_EMOJI_APPROVED: "approved2" - SLAPR_EMOJI_CHANGES_REQUESTED: "changes_requested" - SLAPR_EMOJI_MERGED: "merged" - SLAPR_EMOJI_CLOSED: "closed" + - uses: actions/checkout@v4 + with: + fetch-depth: 0 + - name: Set up Python ${{ env.PYTHON_VERSION }} + uses: actions/setup-python@v5 + with: + python-version: ${{ env.PYTHON_VERSION }} + - name: Install ddev + run: | + pip install -e ./datadog_checks_dev[cli] + pip install -e ./ddev + + - name: Configure ddev + run: | + ddev config set repos.core . + ddev config set repo core + - name: Measure disk usage Uncompressed + run: | + ddev size status --csv > size-uncompressed.csv + ddev size status + echo "```" >> $GITHUB_STEP_SUMMARY + ddev size status >> $GITHUB_STEP_SUMMARY + echo "```" >> $GITHUB_STEP_SUMMARY + - name: Measure disk usage Compressed + run: | + ddev size status --csv --compressed > size-compressed.csv + ddev size status --compressed + echo "```" >> $GITHUB_STEP_SUMMARY + ddev size status --compressed >> $GITHUB_STEP_SUMMARY + echo "```" >> $GITHUB_STEP_SUMMARY + - name: Upload file sizes (uncompressed) + uses: actions/upload-artifact@v4 + with: + name: size-uncompressed.csv + path: size-uncompressed.csv + if-no-files-found: error + - name: Upload file sizes (compressed) + uses: actions/upload-artifact@v4 + with: + name: size-compressed.csv + path: size-compressed.csv + if-no-files-found: error From a3aafc5540d4c27a4f9a184abb9d5337c9902a44 Mon Sep 17 00:00:00 2001 From: Lucia Sanchez Bella Date: Tue, 15 Apr 2025 12:03:34 +0200 Subject: [PATCH 10/70] Timeline mode --- ddev/src/ddev/cli/size/common.py | 6 +- ddev/src/ddev/cli/size/diff.py | 6 +- ddev/src/ddev/cli/size/status.py | 4 +- ddev/src/ddev/cli/size/timeline.py | 314 +++++++++++++++++++++++++++ ddev/tests/cli/size/test_common.py | 8 +- ddev/tests/cli/size/test_diff.py | 36 +-- ddev/tests/cli/size/test_status.py | 8 +- ddev/tests/cli/size/test_timeline.py | 29 ++- 8 files changed, 368 insertions(+), 43 deletions(-) create mode 100644 ddev/src/ddev/cli/size/timeline.py diff --git a/ddev/src/ddev/cli/size/common.py b/ddev/src/ddev/cli/size/common.py index 4c2ad720edb7b..0f480bbe5ab85 100644 --- a/ddev/src/ddev/cli/size/common.py +++ b/ddev/src/ddev/cli/size/common.py @@ -83,9 +83,9 @@ def get_dependencies_sizes(deps, download_urls, compressed): file_data = [] for dep, url in zip(deps, download_urls, strict=False): if compressed: - with requests.get(url, stream=True) as response: - response.raise_for_status() - size = int(response.headers.get("Content-Length")) + response = requests.head(url) + response.raise_for_status() + size = int(response.headers.get("Content-Length")) else: with requests.get(url, stream=True) as response: response.raise_for_status() diff --git a/ddev/src/ddev/cli/size/diff.py b/ddev/src/ddev/cli/size/diff.py index dd3ce4ab624df..1700eda378686 100644 --- a/ddev/src/ddev/cli/size/diff.py +++ b/ddev/src/ddev/cli/size/diff.py @@ -169,9 +169,9 @@ def get_dependencies_sizes(deps, download_urls, compressed): file_data = {} for dep, url in zip(deps, download_urls): if compressed: - with requests.get(url, stream=True) as response: - response.raise_for_status() - size = int(response.headers.get("Content-Length")) + response = requests.head(url) + response.raise_for_status() + size = int(response.headers.get("Content-Length")) else: with requests.get(url, stream=True) as response: response.raise_for_status() diff --git a/ddev/src/ddev/cli/size/status.py b/ddev/src/ddev/cli/size/status.py index 252e682210e6c..8e1db4e78b840 100644 --- a/ddev/src/ddev/cli/size/status.py +++ b/ddev/src/ddev/cli/size/status.py @@ -95,14 +95,14 @@ def get_files(compressed): return file_data -def get_dependencies(platform, version): +def get_dependencies(platform, version, compressed): resolved_path = os.path.join(REPO_PATH, ".deps/resolved") for filename in os.listdir(resolved_path): file_path = os.path.join(resolved_path, filename) if os.path.isfile(file_path) and is_correct_dependency(platform, version, filename): deps, download_urls = get_dependencies_list(file_path) - return get_dependencies_sizes(deps, download_urls) + return get_dependencies_sizes(deps, download_urls, compressed) diff --git a/ddev/src/ddev/cli/size/timeline.py b/ddev/src/ddev/cli/size/timeline.py new file mode 100644 index 0000000000000..aee4256084ed9 --- /dev/null +++ b/ddev/src/ddev/cli/size/timeline.py @@ -0,0 +1,314 @@ + +import click +import requests +import os +import re +from datetime import datetime +from rich.progress import Progress, SpinnerColumn, BarColumn, TextColumn, TimeElapsedColumn +from rich.console import Console +import tempfile +from pathlib import Path +import zipfile +from .common import ( + compress, + get_gitignore_files, + convert_size, + is_correct_dependency, + is_valid_integration, + print_csv, + print_table, + GitRepo, + WrongDependencyFormat, + valid_platforms_versions +) + +#VALID_PLATFORMS = ["linux-aarch64", "linux-x86_64", "macos-x86_64", "windows-x86_64"] +#VALID_PYTHON_VERSIONS = ["3.12"] + +# VALID_PLATFORMS, _ = valid_platforms_versions() +DEPENDENCY_FILE_CHANGE = datetime.strptime("Sep 17 2024","%b %d %Y").date() +MINIMUM_DATE = datetime.strptime("Apr 3 2024","%b %d %Y").date() +console = Console() + +@click.command() +@click.argument('type', type=click.Choice(['integration', 'dependency'])) +@click.argument('module') +@click.argument('initial', required=False) +@click.argument('final', required=False) +@click.option('--time', help="Filter commits starting from a specific date. Accepts both absolute and relative formats, " + "such as '2025-03-01', '2 weeks ago', or 'yesterday'") +@click.option('--threshold', help="Only show modules with size differences greater than a threshold in bytes") +@click.option('--platform', help="Target platform to analyze. Only required for dependencies. If not specified, all platforms will be analyzed") +#@click.option('--python', 'version', type=click.Choice(VALID_PYTHON_VERSIONS), help="Python version (MAJOR.MINOR)") +@click.option('--compressed', is_flag=True, help="Measure compressed size") +@click.option('--csv', is_flag=True, help="Output results in CSV format") +@click.pass_obj +def timeline(app, type, module, initial, final, time, threshold, platform, compressed, csv): + url = app.repo.path + with GitRepo(url) as gitRepo: + try: + with console.status("[cyan]Fetching commits...", spinner="dots"): + folder = module if type == 'integration' else '.deps/resolved' + commits = gitRepo.get_module_commits(folder, initial, final, time) + first_commit = gitRepo.get_creation_commit_module(module) + gitRepo.checkout_commit(commits[-1]) + valid_platforms, _ = valid_platforms_versions(gitRepo.repo_dir) + n_platforms = len(valid_platforms) + if platform and platform not in valid_platforms: + raise ValueError(f"Invalid platform: {platform}") + elif commits == [''] and type == "integration" and module_exists(gitRepo.repo_dir, module): + raise ValueError(f"No changes found: {module}") + elif commits == [''] and type == "integration" and not module_exists(gitRepo.repo_dir, module): + raise ValueError(f"Integration {module} not found in latest commit, is the name correct?") + elif type == 'dependency' and platform and module not in get_dependency_list(gitRepo.repo_dir, [platform]): + raise ValueError(f"Dependency {module} not found in latest commit for the platform {platform}, is the name correct?") + elif type == 'dependency' and not platform and module not in get_dependency_list(gitRepo.repo_dir, valid_platforms): + raise ValueError(f"Dependency {module} not found in latest commit, is the name correct?") + elif type == 'dependency' and commits == ['']: + raise ValueError(f"No changes found: {module}") + if type == "dependency" and platform is None: + for i, plat in enumerate(valid_platforms): + timeline_mode(app, gitRepo, type, module, commits, threshold, plat, compressed, csv, i, True, n_platforms, None) + else: + timeline_mode(app, gitRepo, type, module, commits, threshold, platform, compressed, csv, None, False, n_platforms, first_commit) + except Exception as e: + app.abort(str(e)) + + +def timeline_mode(app, gitRepo, type, module, commits, threshold, platform, compressed, csv, i, maybe_mod_missing, n_platforms,first_commit): + modules = get_repo_info(gitRepo, type, platform, module, commits, i, maybe_mod_missing,n_platforms, compressed, first_commit) + if modules != []: + with console.status("[cyan]Exporting data...", spinner="dots"): + grouped_modules = group_modules(modules, platform, i) + trimmed_modules = trim_modules(grouped_modules, threshold) + maybe_mod_missing = False + if csv: + print_csv(app, i, trimmed_modules) + else: + print_table(app, "Timeline for " + module, trimmed_modules) + +def get_repo_info(gitRepo, type, platform, module, commits, i, maybe_mod_missing, n_platforms, compressed, first_commit): + with Progress( + SpinnerColumn(), + TextColumn("[progress.description]{task.description}"), + BarColumn(), + TimeElapsedColumn(), + transient=True, + ) as progress: + if type == "integration": + file_data = process_commits(commits, module, gitRepo, progress, platform, type, compressed, first_commit) + else: + file_data = process_commits(commits, module, gitRepo, progress, platform, type, compressed, None) + return file_data + +def process_commits(commits, module, gitRepo, progress, platform, type, compressed, first_commit=None): + file_data=[] + task = progress.add_task("[cyan]Processing commits...", total=len(commits)) + repo = gitRepo.repo_dir + + folder = module if type == 'integration' else '.deps/resolved' + for commit in commits: + gitRepo.sparse_checkout_commit(commit, folder) + date, author, message = gitRepo.get_commit_metadata(commit) + date, message, commit = format_commit_data(date, message, commit, first_commit) + if type == 'dependency' and date < MINIMUM_DATE: + continue + elif type == 'dependency': + result = get_dependencies(repo, module, platform, commit, date, author, message, compressed) + if result: + file_data.append(result) + elif type == 'integration': + file_data = get_files(repo, module, commit, date, author, message, file_data, compressed) + progress.advance(task) + return file_data + +def get_files(repo_path, module, commit, date, author, message, file_data, compressed): + + if not module_exists(repo_path, module): + file_data.append( + { + "Size (Bytes)": 0, + "Date": date, + "Author": author, + "Commit Message": "(DELETED) " + message, + "Commit SHA": commit + } + ) + return file_data + + ignored_files = {"datadog_checks_dev", "datadog_checks_tests_helper"} + # resolved_path = os.path.join(repo_path, module) + + git_ignore = get_gitignore_files(repo_path) + included_folder = "datadog_checks/" + for root, _, files in os.walk(repo_path): + for file in files: + file_path = os.path.join(root, file) + # Convert the path to a relative format within the repo + relative_path = os.path.relpath(file_path, repo_path) + + # Filter files + if is_valid_integration(relative_path, included_folder, ignored_files, git_ignore): + size = compress(file_path) if compressed else os.path.getsize(file_path) + file_data.append( + { + "Size (Bytes)": size, + "Date": date, + "Author": author, + "Commit Message": message, + "Commit SHA": commit + } + ) + return file_data + +def get_dependencies(repo_path, module, platform, commit, date, author, message, compressed): + resolved_path = os.path.join(repo_path, ".deps/resolved") + paths = os.listdir(resolved_path) + version = get_version(paths, platform) + for filename in paths: + file_path = os.path.join(resolved_path, filename) + if os.path.isfile(file_path) and is_correct_dependency(platform, version, filename): + download_url = get_dependency(file_path, module) + return get_dependency_size(download_url, commit, date, author, message, compressed) if download_url else None + +def get_dependency(file_path, module): + with open(file_path, "r", encoding="utf-8") as file: + file_content = file.read() + for line in file_content.splitlines(): + match = re.search(r"([\w\-\d\.]+) @ (https?://[^\s#]+)", line) + if not match: + raise WrongDependencyFormat("The dependency format 'name @ link' is no longer supported.") + name, url = match.groups() + if name == module: + return url + return None + +def get_dependency_size(download_url, commit, date, author, message, compressed): + if compressed: + response = requests.head(download_url) + response.raise_for_status() + size = int(response.headers.get("Content-Length")) + else: + with requests.get(download_url, stream=True) as response: + response.raise_for_status() + wheel_data = response.content + + with tempfile.TemporaryDirectory() as tmpdir: + wheel_path = Path(tmpdir) / "package.whl" + with open(wheel_path, "wb") as f: + f.write(wheel_data) + extract_path = Path(tmpdir) / "extracted" + with zipfile.ZipFile(wheel_path, 'r') as zip_ref: + zip_ref.extractall(extract_path) + + size = 0 + for dirpath, _, filenames in os.walk(extract_path): + for name in filenames: + file_path = os.path.join(dirpath, name) + size += os.path.getsize(file_path) + + return { + "Size (Bytes)": size, + "Date": date, + "Author": author, + "Commit Message": message, + "Commit SHA": commit + } + +def get_version(files, platform): + final_version = '' + for file in files: + if platform in file: + version = file.split('_')[-1] + match = re.search(r"\d+(?:\.\d+)?", version) + version = match.group(0) if match else None + if version > final_version: + final_version = version + return final_version if len(final_version) != 1 else 'py'+ final_version + + +def is_correct_dependency(platform, version, name): + return platform in name and version in name + + + + + +def group_modules(modules, platform, i): + grouped_aux = {} + + for file in modules: + key = (file['Date'], file['Author'], file['Commit Message'], file['Commit SHA']) + grouped_aux[key] = grouped_aux.get(key, 0) + file["Size (Bytes)"] + if i is None: + return [ + { + "Commit SHA": commit, + "Size (Bytes)": size, + 'Size': convert_size(size), + 'Delta (Bytes)': 'N/A', + 'Delta': 'N/A', + "Date": date, + "Author": author, + "Commit Message": message, + + } + for (date, author, message, commit), size in grouped_aux.items() + ] + else: + return [ + { + "Commit SHA": commit, + "Size (Bytes)": size, + 'Size': convert_size(size), + 'Delta (Bytes)': 'N/A', + 'Delta': 'N/A', + "Date": date, + "Author": author, + "Commit Message": message, + 'Platform': platform, + } + for (date, author, message, commit), size in grouped_aux.items() + ] + +def trim_modules(modules, threshold=0): + modules[0]['Delta (Bytes)'] = 0 + modules[0]['Delta'] = ' ' + trimmed_modules = [modules[0]] + for i in range(1, len(modules)-1): + delta = modules[i]['Size (Bytes)']-modules[i-1]['Size (Bytes)'] + if abs(delta) > int(threshold): + modules[i]['Delta (Bytes)'] = delta + modules[i]['Delta'] = convert_size(delta) + trimmed_modules.append(modules[i]) + if len(modules) > 1: + delta = modules[-1]['Size (Bytes)']-modules[-2]['Size (Bytes)'] + modules[-1]['Delta (Bytes)'] = delta + modules[-1]['Delta'] = convert_size(delta) + trimmed_modules.append(modules[-1]) + return trimmed_modules + +def format_commit_data(date, message, commit, first_commit): + if commit == first_commit: + message = "(NEW) " + message + message = message if len(message) <= 35 else message[:30].rsplit(" ", 1)[0] + "..." + message.split()[-1] + date = datetime.strptime(date, "%b %d %Y").date() + return date, message, commit[:7] + +def module_exists(path, module): + return os.path.exists(os.path.join(path, module)) + +def get_dependency_list(path, platforms): + resolved_path = os.path.join(path, ".deps/resolved") + all_files = os.listdir(resolved_path) + dependencies = set() + + for platform in platforms: + version = get_version(all_files, platform) + for filename in all_files: + file_path = os.path.join(resolved_path, filename) + if os.path.isfile(file_path) and is_correct_dependency(platform, version, filename): + with open(file_path, "r", encoding="utf-8") as file: + matches = re.findall(r"([\w\-\d\.]+) @ https?://[^\s#]+", file.read()) + dependencies.update(matches) + return dependencies diff --git a/ddev/tests/cli/size/test_common.py b/ddev/tests/cli/size/test_common.py index f3c8565ac5e07..a3f29bc17134e 100644 --- a/ddev/tests/cli/size/test_common.py +++ b/ddev/tests/cli/size/test_common.py @@ -3,7 +3,7 @@ from ddev.cli.size.common import ( compress, convert_size, - get_dependencies, + get_dependencies_list, get_dependencies_sizes, get_gitignore_files, group_modules, @@ -67,13 +67,13 @@ def test_is_valid_integration(): assert not is_valid_integration(".git/config", included_folder, ignored_files, git_ignore) -def test_get_dependencies(): +def test_get_dependencies_list(): file_content = ( "dependency1 @ https://example.com/dependency1.whl\ndependency2 @ https://example.com/dependency2.whl" ) mock_open_obj = mock_open(read_data=file_content) with patch("builtins.open", mock_open_obj): - deps, urls = get_dependencies("fake_path") + deps, urls = get_dependencies_list("fake_path") assert deps == ["dependency1", "dependency2"] assert urls == ["https://example.com/dependency1.whl", "https://example.com/dependency2.whl"] @@ -83,7 +83,7 @@ def test_get_dependencies_sizes(): mock_response.status_code = 200 mock_response.headers = {"Content-Length": "12345"} with patch("requests.head", return_value=mock_response): - file_data = get_dependencies_sizes(["dependency1"], ["https://example.com/dependency1.whl"]) + file_data = get_dependencies_sizes(["dependency1"], ["https://example.com/dependency1.whl"], True) assert file_data == [ {"File Path": "dependency1", "Type": "Dependency", "Name": "dependency1", "Size (Bytes)": 12345} ] diff --git a/ddev/tests/cli/size/test_diff.py b/ddev/tests/cli/size/test_diff.py index cd6027bc6914a..56759b92edb68 100644 --- a/ddev/tests/cli/size/test_diff.py +++ b/ddev/tests/cli/size/test_diff.py @@ -6,7 +6,7 @@ import pytest -from ddev.cli.size.diff import get_compressed_dependencies, get_compressed_files, get_diff +from ddev.cli.size.diff import get_dependencies, get_files, get_diff def test_get_compressed_files(): @@ -36,7 +36,7 @@ def fake_compress(file_path): patch("ddev.cli.size.diff.compress", side_effect=fake_compress), ): - result = get_compressed_files(mock_repo_path) + result = get_files(mock_repo_path, True) expected = { "integration/datadog_checks/file1.py": 1000, @@ -55,9 +55,16 @@ def test_get_compressed_dependencies(terminal): "dependency1 @ https://example.com/dependency1.whl\ndependency2 @ https://example.com/dependency2.whl" ) - mock_response = MagicMock() - mock_response.status_code = 200 - mock_response.headers = {"Content-Length": "12345"} + mock_head_response = MagicMock() + mock_head_response.status_code = 200 + mock_head_response.headers = {"Content-Length": "12345"} + + mock_get_response = MagicMock() + mock_get_response.__enter__.return_value = mock_get_response # for use in `with` block + mock_get_response.status_code = 200 + mock_get_response.headers = {"Content-Length": "12345"} + mock_get_response.content = b"Fake wheel file content" + mock_repo_path = "root" with ( @@ -66,17 +73,16 @@ def test_get_compressed_dependencies(terminal): patch("os.listdir", return_value=[f"{platform}-{version}"]), patch("os.path.isfile", return_value=True), patch("builtins.open", mock_open(read_data=fake_file_content)), - patch("requests.head", return_value=mock_response), + patch("requests.head", return_value=mock_head_response), + patch("requests.get", return_value=mock_get_response), ): - - file_data = get_compressed_dependencies(mock_repo_path, platform, version) + file_data = get_dependencies(mock_repo_path, platform, version, True) assert file_data == { "dependency1": 12345, "dependency2": 12345, } - def test_get_diff(): size_before = { "integration/foo.py": 1000, @@ -119,7 +125,7 @@ def mock_size_diff_dependencies(): mock_git_repo = MagicMock() mock_git_repo.repo_dir = "/tmp/fake_repo" - def get_compressed_files_side_effect(_): + def get_compressed_files_side_effect(_, __): get_compressed_files_side_effect.counter += 1 if get_compressed_files_side_effect.counter % 2 == 1: return {"path1.py": 1000} # before @@ -128,7 +134,7 @@ def get_compressed_files_side_effect(_): get_compressed_files_side_effect.counter = 0 - def get_compressed_dependencies_side_effect(_, __, ___): + def get_compressed_dependencies_side_effect(_, __, ___, ____): get_compressed_dependencies_side_effect.counter += 1 if get_compressed_dependencies_side_effect.counter % 2 == 1: return {"dep1.whl": 2000} # before @@ -143,8 +149,8 @@ def get_compressed_dependencies_side_effect(_, __, ___): patch("ddev.cli.size.diff.GitRepo.__exit__", return_value=None), patch("ddev.cli.size.diff.GitRepo.checkout_commit"), patch("tempfile.mkdtemp", return_value="/tmp/fake_repo"), - patch("ddev.cli.size.diff.get_compressed_files", side_effect=get_compressed_files_side_effect), - patch("ddev.cli.size.diff.get_compressed_dependencies", side_effect=get_compressed_dependencies_side_effect), + patch("ddev.cli.size.diff.get_files", side_effect=get_compressed_files_side_effect), + patch("ddev.cli.size.diff.get_dependencies", side_effect=get_compressed_dependencies_side_effect), patch("ddev.cli.size.common.group_modules", side_effect=lambda m, *_: m), patch("ddev.cli.size.common.print_csv"), patch("ddev.cli.size.common.print_table"), @@ -190,14 +196,14 @@ def test_diff_no_differences(ddev): patch("os.path.isfile", return_value=True), patch("os.listdir", return_value=["linux-aarch64_3.12"]), patch( - "ddev.cli.size.diff.get_compressed_files", + "ddev.cli.size.diff.get_files", return_value={ "path1.py": 1000, "path2.py": 500, }, ), patch( - "ddev.cli.size.diff.get_compressed_dependencies", + "ddev.cli.size.diff.get_dependencies", return_value={ "dep1.whl": 2000, "dep2.whl": 1000, diff --git a/ddev/tests/cli/size/test_status.py b/ddev/tests/cli/size/test_status.py index f4500c228600a..13657e33b93a2 100644 --- a/ddev/tests/cli/size/test_status.py +++ b/ddev/tests/cli/size/test_status.py @@ -7,8 +7,8 @@ import pytest from ddev.cli.size.status import ( - get_compressed_dependencies, - get_compressed_files, + get_dependencies, + get_files, ) @@ -82,7 +82,7 @@ def test_get_compressed_dependencies(): patch("requests.head", return_value=mock_response), ): - file_data = get_compressed_dependencies(platform, version) + file_data = get_dependencies(platform, version, True) assert file_data == [ {"File Path": "dependency1", "Type": "Dependency", "Name": "dependency1", "Size (Bytes)": 12345}, @@ -96,7 +96,7 @@ def mock_size_status(): patch("ddev.cli.size.status.valid_platforms_versions", return_value=({'linux-x86_64', 'macos-x86_64', 'linux-aarch64', 'windows-x86_64'}, {'3.12'})), patch("ddev.cli.size.status.get_gitignore_files", return_value=set()), patch("ddev.cli.size.status.compress", return_value=1234), - patch("ddev.cli.size.status.get_dependencies", return_value=(["dep1"], {"dep1": "https://example.com/dep1"})), + patch("ddev.cli.size.status.get_dependencies_list", return_value=(["dep1"], {"dep1": "https://example.com/dep1"})), patch( "ddev.cli.size.status.get_dependencies_sizes", return_value=[ diff --git a/ddev/tests/cli/size/test_timeline.py b/ddev/tests/cli/size/test_timeline.py index 60e92b4f6c96a..0cb5d49a4b062 100644 --- a/ddev/tests/cli/size/test_timeline.py +++ b/ddev/tests/cli/size/test_timeline.py @@ -7,9 +7,8 @@ group_modules, get_dependency_size, get_dependency, - get_compressed_dependencies, - get_compressed_files, - module_exists + get_dependencies, + get_files, ) from datetime import datetime @@ -23,14 +22,15 @@ def test_get_compressed_files(): patch("ddev.cli.size.timeline.is_valid_integration", return_value=True), patch("ddev.cli.size.timeline.compress", return_value=1234), ): - result = get_compressed_files( + result = get_files( "/tmp/fake_repo", "int1", "abc1234", datetime(2025, 4, 4).date(), "auth", "Added int1", - [] + [], + True ) assert result == [ { @@ -56,7 +56,7 @@ def test_get_compressed_files_deleted_only(): patch("os.path.relpath", side_effect=lambda path, _: path.replace(f"{repo_path}/", "")), patch("os.path.exists", return_value=False), ): - file_data = get_compressed_files(repo_path, module, commit, date, author, message, []) + file_data = get_files(repo_path, module, commit, date, author, message, [], True) assert file_data == [ { @@ -166,14 +166,15 @@ def test_get_compressed_dependencies(): patch("ddev.cli.size.timeline.get_dependency", return_value="https://example.com/dep1.whl"), patch("ddev.cli.size.timeline.requests.get", return_value=make_mock_response("12345")), ): - result = get_compressed_dependencies( + result = get_dependencies( "/tmp/fake_repo", "dep1", "linux-x86_64", "abc1234", datetime(2025, 4, 4).date(), "auth", - "Added dep1" + "Added dep1", + True ) assert result == { "Size (Bytes)": 12345, @@ -190,7 +191,8 @@ def test_get_dependency_size(): "abc1234", datetime(2025, 4, 4).date(), "auth", - "Fixed bug" + "Fixed bug", + True ) assert result == { "Size (Bytes)": 45678, @@ -259,16 +261,15 @@ def mock_timeline_dependencies(): patch("os.path.isfile", return_value=True), patch("ddev.cli.size.timeline.get_gitignore_files", return_value=set()), patch("ddev.cli.size.timeline.get_dependency", return_value="https://example.com/dep1.whl"), - patch("ddev.cli.size.timeline.requests.get") as mock_get, + patch("ddev.cli.size.timeline.requests.head") as mock_head, patch("ddev.cli.size.timeline.group_modules", side_effect=lambda m, *_: m), patch("ddev.cli.size.timeline.trim_modules", side_effect=lambda m, *_: m), patch("ddev.cli.size.timeline.print_table"), ): mock_response = MagicMock() - mock_response.__enter__.return_value = mock_response mock_response.headers = {"Content-Length": "1024"} mock_response.raise_for_status = lambda: None - mock_get.return_value = mock_response + mock_head.return_value = mock_response yield @@ -300,6 +301,10 @@ def test_timeline_no_changes_in_integration(ddev): with ( patch("ddev.cli.size.timeline.GitRepo.__enter__", return_value=mock_git_repo), patch("ddev.cli.size.timeline.GitRepo.__exit__", return_value=None), + patch("os.path.exists", return_value=True), + patch("os.path.isdir", return_value=True), + patch("os.listdir", return_value=[]), + ): result = ddev("size", "timeline", "integration", "integration/foo", "commit1", "commit2", "--compressed") assert result.exit_code != 0 From 5a4e5d363a1eee259c8e230ffa160a03cf56edb1 Mon Sep 17 00:00:00 2001 From: Enrico Donnici Date: Tue, 15 Apr 2025 12:17:06 +0200 Subject: [PATCH 11/70] Try to fix job summaries --- .github/workflows/slapr.yml | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/.github/workflows/slapr.yml b/.github/workflows/slapr.yml index 3fbefcc73aa2e..8d0845e7d54c1 100644 --- a/.github/workflows/slapr.yml +++ b/.github/workflows/slapr.yml @@ -27,17 +27,19 @@ jobs: run: | ddev config set repos.core . ddev config set repo core - - name: Measure disk usage Uncompressed + - name: Measure disk usage (uncompressed) run: | ddev size status --csv > size-uncompressed.csv ddev size status + echo "# Size (uncompressed)" >> $GITHUB_STEP_SUMMARY echo "```" >> $GITHUB_STEP_SUMMARY ddev size status >> $GITHUB_STEP_SUMMARY echo "```" >> $GITHUB_STEP_SUMMARY - - name: Measure disk usage Compressed + - name: Measure disk usage (compressed) run: | ddev size status --csv --compressed > size-compressed.csv ddev size status --compressed + echo "# Size (compressed)" >> $GITHUB_STEP_SUMMARY echo "```" >> $GITHUB_STEP_SUMMARY ddev size status --compressed >> $GITHUB_STEP_SUMMARY echo "```" >> $GITHUB_STEP_SUMMARY From a4a00dd081ee0001f734b8c6a42871436be29f4a Mon Sep 17 00:00:00 2001 From: Enrico Donnici Date: Tue, 15 Apr 2025 12:27:18 +0200 Subject: [PATCH 12/70] Try fixing the job summaries again --- .github/workflows/slapr.yml | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/.github/workflows/slapr.yml b/.github/workflows/slapr.yml index 8d0845e7d54c1..1ea4befc58971 100644 --- a/.github/workflows/slapr.yml +++ b/.github/workflows/slapr.yml @@ -32,17 +32,17 @@ jobs: ddev size status --csv > size-uncompressed.csv ddev size status echo "# Size (uncompressed)" >> $GITHUB_STEP_SUMMARY - echo "```" >> $GITHUB_STEP_SUMMARY + echo '```' >> $GITHUB_STEP_SUMMARY ddev size status >> $GITHUB_STEP_SUMMARY - echo "```" >> $GITHUB_STEP_SUMMARY + echo '```' >> $GITHUB_STEP_SUMMARY - name: Measure disk usage (compressed) run: | ddev size status --csv --compressed > size-compressed.csv ddev size status --compressed echo "# Size (compressed)" >> $GITHUB_STEP_SUMMARY - echo "```" >> $GITHUB_STEP_SUMMARY + echo '```' >> $GITHUB_STEP_SUMMARY ddev size status --compressed >> $GITHUB_STEP_SUMMARY - echo "```" >> $GITHUB_STEP_SUMMARY + echo '```' >> $GITHUB_STEP_SUMMARY - name: Upload file sizes (uncompressed) uses: actions/upload-artifact@v4 with: From ed1e372faccabe565d7b0f7af125d2c20e6d98c0 Mon Sep 17 00:00:00 2001 From: Lucia Sanchez Bella Date: Wed, 16 Apr 2025 09:55:22 +0200 Subject: [PATCH 13/70] CI size status integration on master --- .github/workflows/measure-disk-usage.yml | 59 ++++++++++++++++++++++++ 1 file changed, 59 insertions(+) create mode 100644 .github/workflows/measure-disk-usage.yml diff --git a/.github/workflows/measure-disk-usage.yml b/.github/workflows/measure-disk-usage.yml new file mode 100644 index 0000000000000..31f78b1316415 --- /dev/null +++ b/.github/workflows/measure-disk-usage.yml @@ -0,0 +1,59 @@ +name: Measure Disk Usage + +on: + push: + branches: + - master +env: + PYTHON_VERSION: "3.12" + +jobs: + measure-disk-usage: + runs-on: ubuntu-22.04 + steps: + - uses: actions/checkout@v4 + with: + fetch-depth: 0 + - name: Set up Python ${{ env.PYTHON_VERSION }} + uses: actions/setup-python@v5 + with: + python-version: ${{ env.PYTHON_VERSION }} + - name: Install ddev + run: | + pip install -e ./datadog_checks_dev[cli] + pip install -e ./ddev + + - name: Configure ddev + run: | + ddev config set repos.core . + ddev config set repo core + - name: Measure disk usage (uncompressed) + run: | + ddev size status --csv > size-uncompressed.csv + ddev size status > size-uncompressed.txt + cat size-uncompressed.txt + echo "# Size (uncompressed)" >> $GITHUB_STEP_SUMMARY + echo '```' >> $GITHUB_STEP_SUMMARY + cat size-uncompressed.txt >> $GITHUB_STEP_SUMMARY + echo '```' >> $GITHUB_STEP_SUMMARY + - name: Measure disk usage (compressed) + run: | + ddev size status --csv --compressed > size-compressed.csv + ddev size status --compressed > size-compressed.txt + cat size-compressed.txt + echo "# Size (compressed)" >> $GITHUB_STEP_SUMMARY + echo '```' >> $GITHUB_STEP_SUMMARY + cat size-compressed.txt >> $GITHUB_STEP_SUMMARY + echo '```' >> $GITHUB_STEP_SUMMARY + - name: Upload file sizes (uncompressed) + uses: actions/upload-artifact@v4 + with: + name: size-uncompressed.csv + path: size-uncompressed.csv + if-no-files-found: error + - name: Upload file sizes (compressed) + uses: actions/upload-artifact@v4 + with: + name: size-compressed.csv + path: size-compressed.csv + if-no-files-found: error From 9cc7e5b2f0928536f2de4ea439e7ffbb166629c6 Mon Sep 17 00:00:00 2001 From: Lucia Sanchez Bella Date: Mon, 21 Apr 2025 10:08:24 +0200 Subject: [PATCH 14/70] types added --- ddev/src/ddev/cli/size/__init__.py | 18 +- ddev/src/ddev/cli/size/common.py | 97 +++---- ddev/src/ddev/cli/size/diff.py | 134 ++++++---- ddev/src/ddev/cli/size/status.py | 43 ++-- ddev/src/ddev/cli/size/timeline.py | 362 +++++++++++++++++---------- ddev/tests/cli/size/test_common.py | 13 +- ddev/tests/cli/size/test_diff.py | 38 ++- ddev/tests/cli/size/test_status.py | 21 +- ddev/tests/cli/size/test_timeline.py | 245 +++++++++++++----- 9 files changed, 647 insertions(+), 324 deletions(-) diff --git a/ddev/src/ddev/cli/size/__init__.py b/ddev/src/ddev/cli/size/__init__.py index dc0a07beb809b..df20d8628ba2f 100644 --- a/ddev/src/ddev/cli/size/__init__.py +++ b/ddev/src/ddev/cli/size/__init__.py @@ -8,9 +8,23 @@ from ddev.cli.size.status import status from ddev.cli.size.timeline import timeline -@click.group(short_help='Get the size of integrations and dependencies by platform and python version') + +@click.group() def size(): - """Package Size Analyzer""" + """ + Analyze the download size of integrations and dependencies in various modes. + + This command provides tools to inspect, compare, and monitor size changes of modules + across different commits, platforms, and Python versions. + + Available subcommands: + • status Show the current sizes of all modules + • diff Compare sizes between two commits + • timeline Show the size evolution of a module over time + + Use `ddev size --help` for more details on each mode. + """ + pass diff --git a/ddev/src/ddev/cli/size/common.py b/ddev/src/ddev/cli/size/common.py index 0f480bbe5ab85..491ff024d4d48 100644 --- a/ddev/src/ddev/cli/size/common.py +++ b/ddev/src/ddev/cli/size/common.py @@ -3,16 +3,21 @@ # Licensed under a 3-clause BSD style license (see LICENSE) import os import re -import zlib import shutil import subprocess import tempfile -import requests -from pathlib import Path import zipfile +import zlib +from pathlib import Path +from types import TracebackType +from typing import Dict, List, Optional, Set, Tuple, Type, Union +import requests + +from ddev.cli.application import Application -def valid_platforms_versions(repo_path): + +def valid_platforms_versions(repo_path: str) -> Tuple[Set[str], Set[str]]: resolved_path = os.path.join(repo_path, ".deps/resolved") platforms = [] versions = [] @@ -22,18 +27,17 @@ def valid_platforms_versions(repo_path): if match: versions.append(match.group()) return set(platforms), set(versions) - -# mirar si existe -def convert_size(size_bytes): + +def convert_size(size_bytes: int) -> str: for unit in [' B', ' KB', ' MB', ' GB']: - if size_bytes < 1024: + if abs(size_bytes) < 1024: return str(round(size_bytes, 2)) + unit size_bytes /= 1024 return str(round(size_bytes, 2)) + " TB" -def is_valid_integration(path, included_folder, ignored_files, git_ignore): +def is_valid_integration(path: str, included_folder: str, ignored_files: Set[str], git_ignore: List[str]) -> bool: # It is not an integration if path.startswith('.'): return False @@ -50,11 +54,11 @@ def is_valid_integration(path, included_folder, ignored_files, git_ignore): return True -def is_correct_dependency(platform, version, name): +def is_correct_dependency(platform: str, version: str, name: str) -> bool: return platform in name and version in name -def print_csv(app, i, modules): +def print_csv(app: Application, i: Optional[int], modules: List[Dict[str, Union[str, int]]]) -> None: headers = [k for k in modules[0].keys() if k not in ['Size', 'Delta']] if not i: app.display(",".join(headers)) @@ -63,14 +67,11 @@ def print_csv(app, i, modules): app.display(",".join(format(str(row[h])) for h in headers)) -def format(s): - if "," in s: - return '"' + s + '"' - else: - return s +def format(s: str) -> str: + return f'"{s}"' if "," in s else s -def print_table(app, mode, modules): +def print_table(app: Application, mode: str, modules: List[Dict[str, Union[str, int]]]) -> None: modules_table = {col: {} for col in modules[0].keys() if '(Bytes)' not in col} for i, row in enumerate(modules): for key, value in row.items(): @@ -79,7 +80,9 @@ def print_table(app, mode, modules): app.display_table(mode, modules_table) -def get_dependencies_sizes(deps, download_urls, compressed): +def get_dependencies_sizes( + deps: List[str], download_urls: List[str], compressed: bool +) -> List[Dict[str, Union[str, int]]]: file_data = [] for dep, url in zip(deps, download_urls, strict=False): if compressed: @@ -108,7 +111,7 @@ def get_dependencies_sizes(deps, download_urls, compressed): return file_data -def get_dependencies_list(file_path): +def get_dependencies_list(file_path: str) -> Tuple[List[str], List[str]]: download_urls = [] deps = [] with open(file_path, "r", encoding="utf-8") as file: @@ -124,7 +127,9 @@ def get_dependencies_list(file_path): return deps, download_urls -def group_modules(modules, platform, version, i): +def group_modules( + modules: List[Dict[str, Union[str, int]]], platform: str, version: str, i: Optional[int] +) -> List[Dict[str, Union[str, int]]]: grouped_aux = {} for file in modules: @@ -132,15 +137,10 @@ def group_modules(modules, platform, version, i): grouped_aux[key] = grouped_aux.get(key, 0) + file["Size (Bytes)"] if i is None: return [ - { - 'Name': name, - 'Type': type, - 'Size (Bytes)': size, - 'Size': convert_size(size) - } - for (name, type), size in grouped_aux.items() + {'Name': name, 'Type': type, 'Size (Bytes)': size, 'Size': convert_size(size)} + for (name, type), size in grouped_aux.items() ] - else: + else: return [ { 'Name': name, @@ -154,7 +154,7 @@ def group_modules(modules, platform, version, i): ] -def get_gitignore_files(repo_path): +def get_gitignore_files(repo_path: str) -> List[str]: gitignore_path = os.path.join(repo_path, ".gitignore") with open(gitignore_path, "r", encoding="utf-8") as file: gitignore_content = file.read() @@ -164,7 +164,7 @@ def get_gitignore_files(repo_path): return ignored_patterns -def compress(file_path): +def compress(file_path: str) -> int: compressor = zlib.compressobj() compressed_size = 0 # original_size = os.path.getsize(file_path) @@ -175,12 +175,14 @@ def compress(file_path): compressed_size += len(compressor.flush()) return compressed_size + class WrongDependencyFormat(Exception): - def __init__(self, mensaje): + def __init__(self, mensaje: str) -> None: super().__init__(mensaje) + class GitRepo: - def __init__(self, url): + def __init__(self, url: str) -> None: self.url = url self.repo_dir = None @@ -193,12 +195,14 @@ def __enter__(self): self._run(f"git clone --quiet {self.url} {self.repo_dir}") return self - def _run(self, command): + def _run(self, command: str) -> List[str]: result = subprocess.run(command, shell=True, capture_output=True, text=True, check=True, cwd=self.repo_dir) return result.stdout.strip().split('\n') - def get_module_commits(self, module_path, initial, final, time): - self._run("git fetch origin --quiet") # 1 min no coger todo solo el module + def get_module_commits( + self, module_path: str, initial: Optional[str], final: Optional[str], time: Optional[str] + ) -> List[str]: + self._run("git fetch origin --quiet") self._run("git checkout origin/HEAD") if time: return self._run(f'git log --since="{time}" --reverse --pretty=format:%H -- {module_path}') @@ -212,25 +216,30 @@ def get_module_commits(self, module_path, initial, final, time): except subprocess.CalledProcessError: raise ValueError(f"Commit {initial} does not come before {final}") return self._run(f"git log --reverse --pretty=format:%H {initial}..{final} -- {module_path}") - - def checkout_commit(self, commit): + def checkout_commit(self, commit: str) -> None: self._run(f"git fetch --quiet --depth 1 origin {commit}") self._run(f"git checkout --quiet {commit}") - def sparse_checkout_commit(self, commit_sha, module): - self._run("git sparse-checkout init --cone") + def sparse_checkout_commit(self, commit_sha: str, module: str) -> None: + self._run("git sparse-checkout init --cone") self._run(f"git sparse-checkout set {module}") self._run(f"git checkout {commit_sha}") - - def get_commit_metadata(self,commit): + + def get_commit_metadata(self, commit: str) -> Tuple[str, str, str]: result = self._run(f'git log -1 --date=format:"%b %d %Y" --pretty=format:"%ad\n%an\n%s" {commit}') date, author, message = result return date, author, message - - def get_creation_commit_module(self, module): + + def get_creation_commit_module(self, module: str) -> str: return self._run(f'git log --reverse --format="%H" -- {module}')[0] - def __exit__(self, exception_type, exception_value, exception_traceback): + + def __exit__( + self, + exception_type: Optional[Type[BaseException]], + exception_value: Optional[BaseException], + exception_traceback: Optional[TracebackType], + ) -> None: if self.repo_dir and os.path.exists(self.repo_dir): shutil.rmtree(self.repo_dir) diff --git a/ddev/src/ddev/cli/size/diff.py b/ddev/src/ddev/cli/size/diff.py index 1700eda378686..bfad6bf61442a 100644 --- a/ddev/src/ddev/cli/size/diff.py +++ b/ddev/src/ddev/cli/size/diff.py @@ -3,15 +3,21 @@ # Licensed under a 3-clause BSD style license (see LICENSE) import os +import tempfile +import zipfile from pathlib import Path -from rich.console import Console +from typing import Dict, List, Optional, Tuple + import click import requests -import tempfile -import zipfile +from rich.console import Console +from rich.progress import BarColumn, Progress, SpinnerColumn, TextColumn, TimeElapsedColumn + +from ddev.cli.application import Application + from .common import ( + GitRepo, compress, - valid_platforms_versions, get_dependencies_list, get_gitignore_files, group_modules, @@ -19,45 +25,74 @@ is_valid_integration, print_csv, print_table, - GitRepo + valid_platforms_versions, ) -# VALID_PLATFORMS, VALID_PYTHON_VERSIONS = valid_platforms_versions() console = Console() @click.command() @click.argument("before") @click.argument("after") -@click.option('--platform', help="Target platform") -@click.option('--python', 'version', help="Python version (MAJOR.MINOR)") +@click.option( + '--platform', help="Target platform (e.g. linux-aarch64). If not specified, all platforms will be analyzed" +) +@click.option('--python', 'version', help="Python version (e.g 3.12). If not specified, all versions will be analyzed") @click.option('--compressed', is_flag=True, help="Measure compressed size") @click.option('--csv', is_flag=True, help="Output in CSV format") @click.pass_obj -def diff(app, before, after, platform, version, compressed, csv): - repo_url = app.repo.path - with GitRepo(repo_url) as gitRepo: - try: - valid_platforms,valid_versions = valid_platforms_versions(gitRepo.repo_dir) - if platform and platform not in valid_platforms: - raise ValueError(f"Invalid platform: {platform}") - elif version and version not in valid_versions: - raise ValueError(f"Invalid version: {version}") - if platform is None or version is None: - platforms = valid_platforms if platform is None else [platform] - versions = valid_versions if version is None else [version] - - for i, (plat, ver) in enumerate([(p, v) for p in platforms for v in versions]): - diff_mode(app, gitRepo, before, after, plat, ver, compressed, csv, i) - else: - diff_mode(app, gitRepo, before, after, platform, version, compressed, csv, None) - - except Exception as e: - app.abort(str(e)) - - -def diff_mode(app, gitRepo, before, after, platform, version, compressed, csv, i): - files_b, dependencies_b, files_a, dependencies_a = get_repo_info(gitRepo, platform, version, before, after, compressed) +def diff( + app: str, before: str, after: str, platform: Optional[str], version: Optional[str], compressed: bool, csv: bool +) -> None: + """ + Compare the size of integrations and dependencies between two commits. + """ + with Progress( + SpinnerColumn(), + TextColumn("[progress.description]{task.description}"), + BarColumn(), + TimeElapsedColumn(), + transient=True, + ) as progress: + task = progress.add_task("[cyan]Calculating differences...", total=None) + repo_url = app.repo.path + with GitRepo(repo_url) as gitRepo: + try: + valid_platforms, valid_versions = valid_platforms_versions(gitRepo.repo_dir) + if platform and platform not in valid_platforms: + raise ValueError(f"Invalid platform: {platform}") + elif version and version not in valid_versions: + raise ValueError(f"Invalid version: {version}") + if platform is None or version is None: + platforms = valid_platforms if platform is None else [platform] + versions = valid_versions if version is None else [version] + progress.remove_task(task) + + for i, (plat, ver) in enumerate([(p, v) for p in platforms for v in versions]): + diff_mode(app, gitRepo, before, after, plat, ver, compressed, csv, i,progress) + else: + progress.remove_task(task) + diff_mode(app, gitRepo, before, after, platform, version, compressed, csv, None, progress) + + except Exception as e: + app.abort(str(e)) + + +def diff_mode( + app: Application, + gitRepo: GitRepo, + before: str, + after: str, + platform: str, + version: str, + compressed: bool, + csv: bool, + i: Optional[int], + progress: Progress, +) -> None: + files_b, dependencies_b, files_a, dependencies_a = get_repo_info( + gitRepo, platform, version, before, after, compressed,progress + ) integrations = get_diff(files_b, files_a, 'Integration') dependencies = get_diff(dependencies_b, dependencies_a, 'Dependency') @@ -75,22 +110,27 @@ def diff_mode(app, gitRepo, before, after, platform, version, compressed, csv, i print_table(app, "Diff", grouped_modules) -def get_repo_info(gitRepo, platform, version, before, after, compressed): +def get_repo_info( + gitRepo: GitRepo, platform: str, version: str, before: str, after: str, compressed: bool,progress: Progress, +) -> Tuple[Dict[str, int], Dict[str, int], Dict[str, int], Dict[str, int]]: repo = gitRepo.repo_dir - with console.status("[cyan]Calculating compressed sizes for the first commit...", spinner="dots"): - gitRepo.checkout_commit(before) - files_b = get_files(repo, compressed) - dependencies_b = get_dependencies(repo, platform, version, compressed) + task = progress.add_task("[cyan]Calculating sizes for the first commit...", total=None) + gitRepo.checkout_commit(before) + files_b = get_files(repo, compressed) + dependencies_b = get_dependencies(repo, platform, version, compressed) + progress.remove_task(task) + + task = progress.add_task("[cyan]Calculating sizes for the second commit...", total=None) + gitRepo.checkout_commit(after) + files_a = get_files(repo, compressed) + dependencies_a = get_dependencies(repo, platform, version, compressed) + progress.remove_task(task) - with console.status("[cyan]Calculating compressed sizes for the second commit...", spinner="dots"): - gitRepo.checkout_commit(after) - files_a = get_files(repo, compressed) - dependencies_a = get_dependencies(repo, platform, version, compressed) return files_b, dependencies_b, files_a, dependencies_a -def get_diff(size_before, size_after, type): +def get_diff(size_before: Dict[str, int], size_after: Dict[str, int], type: str) -> List[Dict[str, str | int]]: all_paths = set(size_before.keys()) | set(size_after.keys()) diff_files = [] @@ -131,7 +171,7 @@ def get_diff(size_before, size_after, type): return diff_files -def get_files(repo_path, compressed): +def get_files(repo_path: str, compressed: bool) -> Dict[str, int]: ignored_files = {"datadog_checks_dev", "datadog_checks_tests_helper"} git_ignore = get_gitignore_files(repo_path) @@ -152,7 +192,7 @@ def get_files(repo_path, compressed): return file_data -def get_dependencies(repo_path, platform, version, compressed): +def get_dependencies(repo_path: str, platform: str, version: str, compressed: bool) -> Dict[str, int]: resolved_path = os.path.join(repo_path, ".deps/resolved") @@ -165,9 +205,9 @@ def get_dependencies(repo_path, platform, version, compressed): return {} -def get_dependencies_sizes(deps, download_urls, compressed): +def get_dependencies_sizes(deps: List[str], download_urls: List[str], compressed: bool) -> Dict[str, int]: file_data = {} - for dep, url in zip(deps, download_urls): + for dep, url in zip(deps, download_urls, strict=False): if compressed: response = requests.head(url) response.raise_for_status() @@ -192,5 +232,3 @@ def get_dependencies_sizes(deps, download_urls, compressed): size += os.path.getsize(file_path) file_data[dep] = size return file_data - - diff --git a/ddev/src/ddev/cli/size/status.py b/ddev/src/ddev/cli/size/status.py index 8e1db4e78b840..66efd243b317b 100644 --- a/ddev/src/ddev/cli/size/status.py +++ b/ddev/src/ddev/cli/size/status.py @@ -4,8 +4,12 @@ import os from pathlib import Path -from rich.console import Console +from typing import Dict, List, Optional, Union + import click +from rich.console import Console + +from ddev.cli.application import Application from .common import ( compress, @@ -17,46 +21,48 @@ is_valid_integration, print_csv, print_table, - valid_platforms_versions + valid_platforms_versions, ) -#VALID_PLATFORMS = ["linux-aarch64", "linux-x86_64", "macos-x86_64", "windows-x86_64"] - - REPO_PATH = Path(__file__).resolve().parents[5] -# VALID_PLATFORMS, VALID_PYTHON_VERSIONS = valid_platforms_versions() console = Console() + @click.command() -@click.option('--platform', help="Target platform") -@click.option('--python', 'version', help="Python version (MAJOR.MINOR)") +@click.option( + '--platform', help="Target platform (e.g. linux-aarch64). If not specified, all platforms will be analyzed" +) +@click.option('--python', 'version', help="Python version (e.g 3.12). If not specified, all versions will be analyzed") @click.option('--compressed', is_flag=True, help="Measure compressed size") @click.option('--csv', is_flag=True, help="Output in CSV format") @click.pass_obj -def status(app, platform, version, compressed, csv): +def status(app: Application, platform: Optional[str], version: Optional[str], compressed: bool, csv: bool) -> None: + """ + Show the current size of all integrations and dependencies. + """ try: repo_path = app.repo.path - valid_platforms,valid_versions = valid_platforms_versions(repo_path) + valid_platforms, valid_versions = valid_platforms_versions(repo_path) if platform and platform not in valid_platforms: raise ValueError(f"Invalid platform: {platform}") elif version and version not in valid_versions: raise ValueError(f"Invalid version: {version}") if platform is None or version is None: - platforms = valid_platforms if platform is None else [platform] + platforms = valid_platforms if platform is None else [platform] versions = valid_versions if version is None else [version] for i, (plat, ver) in enumerate([(p, v) for p in platforms for v in versions]): status_mode(app, plat, ver, compressed, csv, i) else: - status_mode(app, platform, version, compressed, csv, None) - + status_mode(app, platform, version, compressed, csv, None) + except Exception as e: app.abort(str(e)) -def status_mode(app, platform, version, compressed, csv, i): +def status_mode(app: Application, platform: str, version: str, compressed: bool, csv: bool, i: Optional[int]) -> None: with console.status("[cyan]Calculating sizes...", spinner="dots"): - modules = get_files(compressed) + get_dependencies(platform, version,compressed) + modules = get_files(compressed) + get_dependencies(platform, version, compressed) grouped_modules = group_modules(modules, platform, version, i) grouped_modules.sort(key=lambda x: x['Size (Bytes)'], reverse=True) @@ -66,7 +72,7 @@ def status_mode(app, platform, version, compressed, csv, i): print_table(app, "STATUS", grouped_modules) -def get_files(compressed): +def get_files(compressed: bool) -> List[Dict[str, Union[str, int]]]: ignored_files = {"datadog_checks_dev", "datadog_checks_tests_helper"} git_ignore = get_gitignore_files(REPO_PATH) @@ -95,7 +101,7 @@ def get_files(compressed): return file_data -def get_dependencies(platform, version, compressed): +def get_dependencies(platform: str, version: str, compressed: bool) -> List[Dict[str, Union[str, int]]]: resolved_path = os.path.join(REPO_PATH, ".deps/resolved") for filename in os.listdir(resolved_path): @@ -103,6 +109,3 @@ def get_dependencies(platform, version, compressed): if os.path.isfile(file_path) and is_correct_dependency(platform, version, filename): deps, download_urls = get_dependencies_list(file_path) return get_dependencies_sizes(deps, download_urls, compressed) - - - diff --git a/ddev/src/ddev/cli/size/timeline.py b/ddev/src/ddev/cli/size/timeline.py index aee4256084ed9..6cb1b5c3a8775 100644 --- a/ddev/src/ddev/cli/size/timeline.py +++ b/ddev/src/ddev/cli/size/timeline.py @@ -1,111 +1,194 @@ - -import click -import requests import os import re -from datetime import datetime -from rich.progress import Progress, SpinnerColumn, BarColumn, TextColumn, TimeElapsedColumn -from rich.console import Console import tempfile -from pathlib import Path import zipfile +from datetime import date, datetime +from pathlib import Path +from typing import Dict, List, Optional, Set, Tuple, Union + +import click +import requests +from rich.console import Console +from rich.progress import BarColumn, Progress, SpinnerColumn, TextColumn, TimeElapsedColumn + +from ddev.cli.application import Application + from .common import ( + GitRepo, + WrongDependencyFormat, compress, - get_gitignore_files, convert_size, + get_gitignore_files, is_correct_dependency, is_valid_integration, print_csv, print_table, - GitRepo, - WrongDependencyFormat, - valid_platforms_versions + valid_platforms_versions, ) -#VALID_PLATFORMS = ["linux-aarch64", "linux-x86_64", "macos-x86_64", "windows-x86_64"] -#VALID_PYTHON_VERSIONS = ["3.12"] - -# VALID_PLATFORMS, _ = valid_platforms_versions() -DEPENDENCY_FILE_CHANGE = datetime.strptime("Sep 17 2024","%b %d %Y").date() -MINIMUM_DATE = datetime.strptime("Apr 3 2024","%b %d %Y").date() +DEPENDENCY_FILE_CHANGE = datetime.strptime("Sep 17 2024", "%b %d %Y").date() +MINIMUM_DATE = datetime.strptime("Apr 3 2024", "%b %d %Y").date() console = Console() + @click.command() @click.argument('type', type=click.Choice(['integration', 'dependency'])) @click.argument('module') @click.argument('initial', required=False) @click.argument('final', required=False) -@click.option('--time', help="Filter commits starting from a specific date. Accepts both absolute and relative formats, " - "such as '2025-03-01', '2 weeks ago', or 'yesterday'") +@click.option( + '--time', + help="Filter commits starting from a specific date. Accepts both absolute and relative formats, " + "such as '2025-03-01', '2 weeks ago', or 'yesterday'", +) @click.option('--threshold', help="Only show modules with size differences greater than a threshold in bytes") -@click.option('--platform', help="Target platform to analyze. Only required for dependencies. If not specified, all platforms will be analyzed") -#@click.option('--python', 'version', type=click.Choice(VALID_PYTHON_VERSIONS), help="Python version (MAJOR.MINOR)") +@click.option( + '--platform', + help="Target platform to analyze. Only required for dependencies. If not specified, all platforms will be analyzed", +) @click.option('--compressed', is_flag=True, help="Measure compressed size") @click.option('--csv', is_flag=True, help="Output results in CSV format") @click.pass_obj -def timeline(app, type, module, initial, final, time, threshold, platform, compressed, csv): - url = app.repo.path - with GitRepo(url) as gitRepo: - try: - with console.status("[cyan]Fetching commits...", spinner="dots"): - folder = module if type == 'integration' else '.deps/resolved' - commits = gitRepo.get_module_commits(folder, initial, final, time) - first_commit = gitRepo.get_creation_commit_module(module) - gitRepo.checkout_commit(commits[-1]) - valid_platforms, _ = valid_platforms_versions(gitRepo.repo_dir) - n_platforms = len(valid_platforms) - if platform and platform not in valid_platforms: - raise ValueError(f"Invalid platform: {platform}") - elif commits == [''] and type == "integration" and module_exists(gitRepo.repo_dir, module): - raise ValueError(f"No changes found: {module}") - elif commits == [''] and type == "integration" and not module_exists(gitRepo.repo_dir, module): - raise ValueError(f"Integration {module} not found in latest commit, is the name correct?") - elif type == 'dependency' and platform and module not in get_dependency_list(gitRepo.repo_dir, [platform]): - raise ValueError(f"Dependency {module} not found in latest commit for the platform {platform}, is the name correct?") - elif type == 'dependency' and not platform and module not in get_dependency_list(gitRepo.repo_dir, valid_platforms): - raise ValueError(f"Dependency {module} not found in latest commit, is the name correct?") - elif type == 'dependency' and commits == ['']: - raise ValueError(f"No changes found: {module}") - if type == "dependency" and platform is None: - for i, plat in enumerate(valid_platforms): - timeline_mode(app, gitRepo, type, module, commits, threshold, plat, compressed, csv, i, True, n_platforms, None) - else: - timeline_mode(app, gitRepo, type, module, commits, threshold, platform, compressed, csv, None, False, n_platforms, first_commit) - except Exception as e: - app.abort(str(e)) - - -def timeline_mode(app, gitRepo, type, module, commits, threshold, platform, compressed, csv, i, maybe_mod_missing, n_platforms,first_commit): - modules = get_repo_info(gitRepo, type, platform, module, commits, i, maybe_mod_missing,n_platforms, compressed, first_commit) - if modules != []: - with console.status("[cyan]Exporting data...", spinner="dots"): - grouped_modules = group_modules(modules, platform, i) - trimmed_modules = trim_modules(grouped_modules, threshold) - maybe_mod_missing = False - if csv: - print_csv(app, i, trimmed_modules) - else: - print_table(app, "Timeline for " + module, trimmed_modules) - -def get_repo_info(gitRepo, type, platform, module, commits, i, maybe_mod_missing, n_platforms, compressed, first_commit): +def timeline( + app: Application, + type: str, + module: str, + initial: Optional[str], + final: Optional[str], + time: Optional[str], + threshold: Optional[str], + platform: Optional[str], + compressed: bool, + csv: bool, +) -> None: + """ + Show the size evolution of a module (integration or dependency) over time. + """ with Progress( SpinnerColumn(), TextColumn("[progress.description]{task.description}"), BarColumn(), TimeElapsedColumn(), - transient=True, + transient=True, ) as progress: + task = progress.add_task("[cyan]Calculating timeline...", total=None) + url = app.repo.path + with GitRepo(url) as gitRepo: + try: + # with console.status("[cyan]Fetching commits...", spinner="dots"): + folder = module if type == 'integration' else '.deps/resolved' + commits = gitRepo.get_module_commits(folder, initial, final, time) + first_commit = gitRepo.get_creation_commit_module(module) + gitRepo.checkout_commit(commits[-1]) + valid_platforms, _ = valid_platforms_versions(gitRepo.repo_dir) + if platform and platform not in valid_platforms: + raise ValueError(f"Invalid platform: {platform}") + elif commits == [''] and type == "integration" and module_exists(gitRepo.repo_dir, module): + raise ValueError(f"No changes found: {module}") + elif commits == [''] and type == "integration" and not module_exists(gitRepo.repo_dir, module): + raise ValueError(f"Integration {module} not found in latest commit, is the name correct?") + elif ( + type == 'dependency' + and platform + and module not in get_dependency_list(gitRepo.repo_dir, [platform]) + ): + raise ValueError( + f"Dependency {module} not found in latest commit for the platform {platform}, " + "is the name correct?" + ) + elif ( + type == 'dependency' + and not platform + and module not in get_dependency_list(gitRepo.repo_dir, valid_platforms) + ): + raise ValueError(f"Dependency {module} not found in latest commit, is the name correct?") + elif type == 'dependency' and commits == ['']: + raise ValueError(f"No changes found: {module}") + if type == "dependency" and platform is None: + progress.remove_task(task) + for i, plat in enumerate(valid_platforms): + timeline_mode( + app, gitRepo, type, module, commits, threshold, plat, compressed, csv, i, None, progress + ) + else: + progress.remove_task(task) + + timeline_mode( + app, + gitRepo, + type, + module, + commits, + threshold, + platform, + compressed, + csv, + None, + first_commit, + progress, + ) + + except Exception as e: + progress.remove_task(task) + app.abort(str(e)) + + +def timeline_mode( + app: Application, + gitRepo: GitRepo, + type: str, + module: str, + commits: List[str], + threshold: Optional[str], + platform: Optional[str], + compressed: bool, + csv: bool, + i: Optional[int], + first_commit: Optional[str], + progress: Progress, +) -> None: + modules = get_repo_info(gitRepo, type, platform, module, commits, compressed, first_commit, progress) + if modules != []: + grouped_modules = group_modules(modules, platform, i) + trimmed_modules = trim_modules(grouped_modules, threshold) + if csv: + print_csv(app, i, trimmed_modules) + else: + print_table(app, "Timeline for " + module, trimmed_modules) + + +def get_repo_info( + gitRepo: GitRepo, + type: str, + platform: Optional[str], + module: str, + commits: List[str], + compressed: bool, + first_commit: Optional[str], + progress: Progress, +) -> List[Dict[str, Union[str, int, date]]]: + with progress: if type == "integration": file_data = process_commits(commits, module, gitRepo, progress, platform, type, compressed, first_commit) - else: + else: file_data = process_commits(commits, module, gitRepo, progress, platform, type, compressed, None) - return file_data - -def process_commits(commits, module, gitRepo, progress, platform, type, compressed, first_commit=None): - file_data=[] + return file_data + + +def process_commits( + commits: List[str], + module: str, + gitRepo: GitRepo, + progress: Progress, + platform: Optional[str], + type: str, + compressed: bool, + first_commit: Optional[str], +) -> List[Dict[str, Union[str, int, date]]]: + file_data = [] task = progress.add_task("[cyan]Processing commits...", total=len(commits)) repo = gitRepo.repo_dir - + folder = module if type == 'integration' else '.deps/resolved' for commit in commits: gitRepo.sparse_checkout_commit(commit, folder) @@ -120,34 +203,42 @@ def process_commits(commits, module, gitRepo, progress, platform, type, compress elif type == 'integration': file_data = get_files(repo, module, commit, date, author, message, file_data, compressed) progress.advance(task) + progress.remove_task(task) + return file_data -def get_files(repo_path, module, commit, date, author, message, file_data, compressed): - + +def get_files( + repo_path: str, + module: str, + commit: str, + date: date, + author: str, + message: str, + file_data: List[Dict[str, Union[str, int, date]]], + compressed: bool, +) -> List[Dict[str, Union[str, int, date]]]: if not module_exists(repo_path, module): file_data.append( - { - "Size (Bytes)": 0, - "Date": date, - "Author": author, - "Commit Message": "(DELETED) " + message, - "Commit SHA": commit - } - ) - return file_data - + { + "Size (Bytes)": 0, + "Date": date, + "Author": author, + "Commit Message": "(DELETED) " + message, + "Commit SHA": commit, + } + ) + return file_data + ignored_files = {"datadog_checks_dev", "datadog_checks_tests_helper"} - # resolved_path = os.path.join(repo_path, module) git_ignore = get_gitignore_files(repo_path) included_folder = "datadog_checks/" for root, _, files in os.walk(repo_path): for file in files: file_path = os.path.join(root, file) - # Convert the path to a relative format within the repo relative_path = os.path.relpath(file_path, repo_path) - # Filter files if is_valid_integration(relative_path, included_folder, ignored_files, git_ignore): size = compress(file_path) if compressed else os.path.getsize(file_path) file_data.append( @@ -156,12 +247,22 @@ def get_files(repo_path, module, commit, date, author, message, file_data, compr "Date": date, "Author": author, "Commit Message": message, - "Commit SHA": commit + "Commit SHA": commit, } ) return file_data -def get_dependencies(repo_path, module, platform, commit, date, author, message, compressed): + +def get_dependencies( + repo_path: str, + module: str, + platform: Optional[str], + commit: str, + date: date, + author: str, + message: str, + compressed: bool, +) -> Optional[Dict[str, Union[str, int, date]]]: resolved_path = os.path.join(repo_path, ".deps/resolved") paths = os.listdir(resolved_path) version = get_version(paths, platform) @@ -169,9 +270,12 @@ def get_dependencies(repo_path, module, platform, commit, date, author, message, file_path = os.path.join(resolved_path, filename) if os.path.isfile(file_path) and is_correct_dependency(platform, version, filename): download_url = get_dependency(file_path, module) - return get_dependency_size(download_url, commit, date, author, message, compressed) if download_url else None + return ( + get_dependency_size(download_url, commit, date, author, message, compressed) if download_url else None + ) + -def get_dependency(file_path, module): +def get_dependency(file_path: str, module: str) -> Optional[str]: with open(file_path, "r", encoding="utf-8") as file: file_content = file.read() for line in file_content.splitlines(): @@ -181,9 +285,12 @@ def get_dependency(file_path, module): name, url = match.groups() if name == module: return url - return None + return None -def get_dependency_size(download_url, commit, date, author, message, compressed): + +def get_dependency_size( + download_url: str, commit: str, date: date, author: str, message: str, compressed: bool +) -> Dict[str, Union[str, int, date]]: if compressed: response = requests.head(download_url) response.raise_for_status() @@ -207,15 +314,10 @@ def get_dependency_size(download_url, commit, date, author, message, compressed) file_path = os.path.join(dirpath, name) size += os.path.getsize(file_path) - return { - "Size (Bytes)": size, - "Date": date, - "Author": author, - "Commit Message": message, - "Commit SHA": commit - } + return {"Size (Bytes)": size, "Date": date, "Author": author, "Commit Message": message, "Commit SHA": commit} + -def get_version(files, platform): +def get_version(files: List[str], platform: Optional[str]) -> str: final_version = '' for file in files: if platform in file: @@ -224,23 +326,18 @@ def get_version(files, platform): version = match.group(0) if match else None if version > final_version: final_version = version - return final_version if len(final_version) != 1 else 'py'+ final_version - - -def is_correct_dependency(platform, version, name): - return platform in name and version in name + return final_version if len(final_version) != 1 else 'py' + final_version - - - -def group_modules(modules, platform, i): +def group_modules( + modules: List[Dict[str, Union[str, int, date]]], platform: Optional[str], i: Optional[int] +) -> List[Dict[str, Union[str, int, date]]]: grouped_aux = {} for file in modules: key = (file['Date'], file['Author'], file['Commit Message'], file['Commit SHA']) grouped_aux[key] = grouped_aux.get(key, 0) + file["Size (Bytes)"] - if i is None: + if i is None: return [ { "Commit SHA": commit, @@ -251,11 +348,10 @@ def group_modules(modules, platform, i): "Date": date, "Author": author, "Commit Message": message, - } for (date, author, message, commit), size in grouped_aux.items() ] - else: + else: return [ { "Commit SHA": commit, @@ -271,34 +367,40 @@ def group_modules(modules, platform, i): for (date, author, message, commit), size in grouped_aux.items() ] -def trim_modules(modules, threshold=0): + +def trim_modules( + modules: List[Dict[str, Union[str, int, date]]], threshold: Optional[str] = None +) -> List[Dict[str, Union[str, int, date]]]: modules[0]['Delta (Bytes)'] = 0 modules[0]['Delta'] = ' ' trimmed_modules = [modules[0]] - for i in range(1, len(modules)-1): - delta = modules[i]['Size (Bytes)']-modules[i-1]['Size (Bytes)'] - if abs(delta) > int(threshold): - modules[i]['Delta (Bytes)'] = delta - modules[i]['Delta'] = convert_size(delta) - trimmed_modules.append(modules[i]) - if len(modules) > 1: - delta = modules[-1]['Size (Bytes)']-modules[-2]['Size (Bytes)'] - modules[-1]['Delta (Bytes)'] = delta - modules[-1]['Delta'] = convert_size(delta) - trimmed_modules.append(modules[-1]) + threshold_value = int(threshold) if threshold else 0 + + for i in range(1, len(modules)): + prev = modules[i - 1] + curr = modules[i] + delta = curr['Size (Bytes)'] - prev['Size (Bytes)'] + if abs(delta) > threshold_value or i == len(modules) - 1: + curr['Delta (Bytes)'] = delta + curr['Delta'] = convert_size(delta) + trimmed_modules.append(curr) + return trimmed_modules -def format_commit_data(date, message, commit, first_commit): + +def format_commit_data(date_str: str, message: str, commit: str, first_commit: Optional[str]) -> Tuple[date, str, str]: if commit == first_commit: message = "(NEW) " + message message = message if len(message) <= 35 else message[:30].rsplit(" ", 1)[0] + "..." + message.split()[-1] - date = datetime.strptime(date, "%b %d %Y").date() + date = datetime.strptime(date_str, "%b %d %Y").date() return date, message, commit[:7] -def module_exists(path, module): + +def module_exists(path: str, module: str) -> bool: return os.path.exists(os.path.join(path, module)) -def get_dependency_list(path, platforms): + +def get_dependency_list(path: str, platforms: List[str]) -> Set[str]: resolved_path = os.path.join(path, ".deps/resolved") all_files = os.listdir(resolved_path) dependencies = set() diff --git a/ddev/tests/cli/size/test_common.py b/ddev/tests/cli/size/test_common.py index a3f29bc17134e..cd11c1ba8c41f 100644 --- a/ddev/tests/cli/size/test_common.py +++ b/ddev/tests/cli/size/test_common.py @@ -10,7 +10,7 @@ is_correct_dependency, is_valid_integration, print_csv, - valid_platforms_versions + valid_platforms_versions, ) @@ -27,15 +27,10 @@ def test_valid_platforms_versions(): "macos-x86_64_py3.txt", "windows-x86_64_3.12.txt", "windows-x86_64_py2.txt", - "windows-x86_64_py3.txt" + "windows-x86_64_py3.txt", ] - expected_platforms = { - "linux-aarch64", - "linux-x86_64", - "macos-x86_64", - "windows-x86_64" - } + expected_platforms = {"linux-aarch64", "linux-x86_64", "macos-x86_64", "windows-x86_64"} expected_versions = {"3.12"} with patch("os.listdir", return_value=filenames): platforms, versions = valid_platforms_versions("/tmp/fake_repo") @@ -127,7 +122,7 @@ def test_group_modules(): }, ] - assert group_modules(modules, platform, version,0) == expected_output + assert group_modules(modules, platform, version, 0) == expected_output def test_get_gitignore_files(): diff --git a/ddev/tests/cli/size/test_diff.py b/ddev/tests/cli/size/test_diff.py index 56759b92edb68..ba1d0a826ad63 100644 --- a/ddev/tests/cli/size/test_diff.py +++ b/ddev/tests/cli/size/test_diff.py @@ -6,7 +6,7 @@ import pytest -from ddev.cli.size.diff import get_dependencies, get_files, get_diff +from ddev.cli.size.diff import get_dependencies, get_diff, get_files def test_get_compressed_files(): @@ -83,6 +83,7 @@ def test_get_compressed_dependencies(terminal): "dependency2": 12345, } + def test_get_diff(): size_before = { "integration/foo.py": 1000, @@ -144,7 +145,10 @@ def get_compressed_dependencies_side_effect(_, __, ___, ____): get_compressed_dependencies_side_effect.counter = 0 with ( - patch("ddev.cli.size.diff.valid_platforms_versions", return_value=({'linux-x86_64', 'macos-x86_64', 'linux-aarch64', 'windows-x86_64'}, {'3.12'})), + patch( + "ddev.cli.size.diff.valid_platforms_versions", + return_value=({'linux-x86_64', 'macos-x86_64', 'linux-aarch64', 'windows-x86_64'}, {'3.12'}), + ), patch("ddev.cli.size.diff.GitRepo.__enter__", return_value=mock_git_repo), patch("ddev.cli.size.diff.GitRepo.__exit__", return_value=None), patch("ddev.cli.size.diff.GitRepo.checkout_commit"), @@ -180,14 +184,16 @@ def test_diff_csv(ddev, mock_size_diff_dependencies): assert result.exit_code == 0 - def test_diff_no_differences(ddev): fake_repo = MagicMock() fake_repo.repo_dir = "/tmp/fake_repo" with ( patch("ddev.cli.size.diff.GitRepo.__enter__", return_value=fake_repo), - patch("ddev.cli.size.diff.valid_platforms_versions", return_value=({'linux-x86_64', 'macos-x86_64', 'linux-aarch64', 'windows-x86_64'}, {'3.12'})), + patch( + "ddev.cli.size.diff.valid_platforms_versions", + return_value=({'linux-x86_64', 'macos-x86_64', 'linux-aarch64', 'windows-x86_64'}, {'3.12'}), + ), patch("ddev.cli.size.diff.GitRepo.__exit__", return_value=None), patch.object(fake_repo, "checkout_commit"), patch("tempfile.mkdtemp", return_value="/tmp/fake_repo"), @@ -221,6 +227,14 @@ def test_diff_no_differences(ddev): def test_diff_invalid_platform(ddev): + mock_git_repo = MagicMock() + mock_git_repo.repo_dir = "/tmp/fake_repo" + mock_git_repo.get_module_commits.return_value = ["commit1", "commit2"] + mock_git_repo.get_commit_metadata.side_effect = lambda c: ("Apr 4 2025", "Fix dep", c) + patch( + "ddev.cli.size.timeline.valid_platforms_versions", + return_value=({'linux-x86_64', 'macos-x86_64', 'linux-aarch64', 'windows-x86_64'}, {'3.12'}), + ), result = ddev( 'size', 'diff', 'commit1', 'commit2', '--platform', 'linux', '--python', '3.12', '--compressed' # inválido ) @@ -228,6 +242,14 @@ def test_diff_invalid_platform(ddev): def test_diff_invalid_version(ddev): + mock_git_repo = MagicMock() + mock_git_repo.repo_dir = "/tmp/fake_repo" + mock_git_repo.get_module_commits.return_value = ["commit1", "commit2"] + mock_git_repo.get_commit_metadata.side_effect = lambda c: ("Apr 4 2025", "Fix dep", c) + patch( + "ddev.cli.size.timeline.valid_platforms_versions", + return_value=({'linux-x86_64', 'macos-x86_64', 'linux-aarch64', 'windows-x86_64'}, {'3.12'}), + ), result = ddev( 'size', 'diff', @@ -243,5 +265,13 @@ def test_diff_invalid_version(ddev): def test_diff_invalid_platform_and_version(ddev): + mock_git_repo = MagicMock() + mock_git_repo.repo_dir = "/tmp/fake_repo" + mock_git_repo.get_module_commits.return_value = ["commit1", "commit2"] + mock_git_repo.get_commit_metadata.side_effect = lambda c: ("Apr 4 2025", "Fix dep", c) + patch( + "ddev.cli.size.timeline.valid_platforms_versions", + return_value=({'linux-x86_64', 'macos-x86_64', 'linux-aarch64', 'windows-x86_64'}, {'3.12'}), + ), result = ddev('size', 'diff', 'commit1', 'commit2', '--platform', 'linux', '--python', '2.10', '--compressed') assert result.exit_code != 0 diff --git a/ddev/tests/cli/size/test_status.py b/ddev/tests/cli/size/test_status.py index 13657e33b93a2..cce9193345de8 100644 --- a/ddev/tests/cli/size/test_status.py +++ b/ddev/tests/cli/size/test_status.py @@ -93,10 +93,15 @@ def test_get_compressed_dependencies(): @pytest.fixture() def mock_size_status(): with ( - patch("ddev.cli.size.status.valid_platforms_versions", return_value=({'linux-x86_64', 'macos-x86_64', 'linux-aarch64', 'windows-x86_64'}, {'3.12'})), + patch( + "ddev.cli.size.status.valid_platforms_versions", + return_value=({'linux-x86_64', 'macos-x86_64', 'linux-aarch64', 'windows-x86_64'}, {'3.12'}), + ), patch("ddev.cli.size.status.get_gitignore_files", return_value=set()), patch("ddev.cli.size.status.compress", return_value=1234), - patch("ddev.cli.size.status.get_dependencies_list", return_value=(["dep1"], {"dep1": "https://example.com/dep1"})), + patch( + "ddev.cli.size.status.get_dependencies_list", return_value=(["dep1"], {"dep1": "https://example.com/dep1"}) + ), patch( "ddev.cli.size.status.get_dependencies_sizes", return_value=[ @@ -135,15 +140,27 @@ def test_status_csv(ddev, mock_size_status): def test_status_wrong_platform(ddev): + patch( + "ddev.cli.size.timeline.valid_platforms_versions", + return_value=({'linux-x86_64', 'macos-x86_64', 'linux-aarch64', 'windows-x86_64'}, {'3.12'}), + ), result = ddev('size', 'status', '--platform', 'linux', '--python', '3.12', '--compressed') assert result.exit_code != 0 def test_status_wrong_version(ddev): + patch( + "ddev.cli.size.timeline.valid_platforms_versions", + return_value=({'linux-x86_64', 'macos-x86_64', 'linux-aarch64', 'windows-x86_64'}, {'3.12'}), + ), result = ddev('size', 'status', '--platform', 'linux-aarch64', '--python', '2.10', '--compressed') assert result.exit_code != 0 def test_status_wrong_plat_and_version(ddev): + patch( + "ddev.cli.size.timeline.valid_platforms_versions", + return_value=({'linux-x86_64', 'macos-x86_64', 'linux-aarch64', 'windows-x86_64'}, {'3.12'}), + ), result = ddev('size', 'status', '--platform', 'linux', '--python', '2.10', '--compressed') assert result.exit_code != 0 diff --git a/ddev/tests/cli/size/test_timeline.py b/ddev/tests/cli/size/test_timeline.py index 0cb5d49a4b062..6042f92f85d5a 100644 --- a/ddev/tests/cli/size/test_timeline.py +++ b/ddev/tests/cli/size/test_timeline.py @@ -1,16 +1,18 @@ +from datetime import datetime +from unittest.mock import MagicMock, mock_open, patch + import pytest -from unittest.mock import MagicMock, patch, mock_open + from ddev.cli.size.timeline import ( - get_version, format_commit_data, - trim_modules, - group_modules, - get_dependency_size, - get_dependency, get_dependencies, + get_dependency, + get_dependency_size, get_files, + get_version, + group_modules, + trim_modules, ) -from datetime import datetime def test_get_compressed_files(): @@ -23,14 +25,7 @@ def test_get_compressed_files(): patch("ddev.cli.size.timeline.compress", return_value=1234), ): result = get_files( - "/tmp/fake_repo", - "int1", - "abc1234", - datetime(2025, 4, 4).date(), - "auth", - "Added int1", - [], - True + "/tmp/fake_repo", "int1", "abc1234", datetime(2025, 4, 4).date(), "auth", "Added int1", [], True ) assert result == [ { @@ -38,10 +33,11 @@ def test_get_compressed_files(): "Date": datetime(2025, 4, 4).date(), "Author": "auth", "Commit Message": "Added int1", - "Commit SHA": "abc1234" + "Commit SHA": "abc1234", } ] + def test_get_compressed_files_deleted_only(): repo_path = "/tmp/fake_repo" module = "foo" @@ -54,7 +50,7 @@ def test_get_compressed_files_deleted_only(): patch("ddev.cli.size.timeline.get_gitignore_files", return_value=set()), patch("os.walk", return_value=[]), patch("os.path.relpath", side_effect=lambda path, _: path.replace(f"{repo_path}/", "")), - patch("os.path.exists", return_value=False), + patch("os.path.exists", return_value=False), ): file_data = get_files(repo_path, module, commit, date, author, message, [], True) @@ -74,8 +70,11 @@ def test_get_version(): version = get_version(files, "linux-x86_64") assert version == "3.12" + def test_format_commit_data(): - date, message, commit = format_commit_data("Apr 4 2025", "this is a very long commit message that should be trimmed (#1234)", "abc1234def", "abc1234def") + date, message, commit = format_commit_data( + "Apr 4 2025", "this is a very long commit message that should be trimmed (#1234)", "abc1234def", "abc1234def" + ) expected_date = datetime.strptime("Apr 4 2025", "%b %d %Y").date() expected_message = "(NEW) this is a very long...(#1234)" expected_commit = "abc1234" @@ -83,6 +82,7 @@ def test_format_commit_data(): assert message == expected_message assert commit == expected_commit + def test_trim_modules_keep_some_remove_some(): modules = [ {"Size (Bytes)": 1000}, @@ -99,9 +99,27 @@ def test_trim_modules_keep_some_remove_some(): def test_group_modules(): modules = [ - {"Size (Bytes)": 1000, "Date": datetime(2025, 4, 4).date(), "Author": "A", "Commit Message": "msg", "Commit SHA": "c1"}, - {"Size (Bytes)": 500, "Date": datetime(2025, 4, 4).date(), "Author": "A", "Commit Message": "msg", "Commit SHA": "c1"}, - {"Size (Bytes)": 1500, "Date": datetime(2025, 4, 5).date(), "Author": "A", "Commit Message": "msg2", "Commit SHA": "c2"}, + { + "Size (Bytes)": 1000, + "Date": datetime(2025, 4, 4).date(), + "Author": "A", + "Commit Message": "msg", + "Commit SHA": "c1", + }, + { + "Size (Bytes)": 500, + "Date": datetime(2025, 4, 4).date(), + "Author": "A", + "Commit Message": "msg", + "Commit SHA": "c1", + }, + { + "Size (Bytes)": 1500, + "Date": datetime(2025, 4, 5).date(), + "Author": "A", + "Commit Message": "msg2", + "Commit SHA": "c2", + }, ] expected = [ { @@ -138,6 +156,7 @@ def test_get_dependency(): url = get_dependency("some/path/file.txt", "dep2") assert url == "https://example.com/dep2.whl" + def make_mock_response(size): mock_response = MagicMock() mock_response.__enter__.return_value = mock_response @@ -145,10 +164,13 @@ def make_mock_response(size): mock_response.raise_for_status = lambda: None return mock_response + def test_get_dependency_size(): mock_response = make_mock_response("45678") - with patch("requests.get", return_value=mock_response): - info = get_dependency_size("https://example.com/file.whl", "abc1234", datetime(2025, 4, 4).date(), "auth", "Fixed bug") + with patch("requests.head", return_value=mock_response): + info = get_dependency_size( + "https://example.com/file.whl", "abc1234", datetime(2025, 4, 4).date(), "auth", "Fixed bug", True + ) assert info == { "Size (Bytes)": 45678, "Date": datetime(2025, 4, 4).date(), @@ -157,6 +179,7 @@ def test_get_dependency_size(): "Commit SHA": "abc1234", } + def test_get_compressed_dependencies(): with ( patch("os.path.exists", return_value=True), @@ -164,42 +187,17 @@ def test_get_compressed_dependencies(): patch("os.path.isfile", return_value=True), patch("os.listdir", return_value=["linux-x86_64_3.12.txt"]), patch("ddev.cli.size.timeline.get_dependency", return_value="https://example.com/dep1.whl"), - patch("ddev.cli.size.timeline.requests.get", return_value=make_mock_response("12345")), + patch("ddev.cli.size.timeline.requests.head", return_value=make_mock_response("12345")), ): result = get_dependencies( - "/tmp/fake_repo", - "dep1", - "linux-x86_64", - "abc1234", - datetime(2025, 4, 4).date(), - "auth", - "Added dep1", - True + "/tmp/fake_repo", "dep1", "linux-x86_64", "abc1234", datetime(2025, 4, 4).date(), "auth", "Added dep1", True ) assert result == { "Size (Bytes)": 12345, "Date": datetime(2025, 4, 4).date(), "Author": "auth", "Commit Message": "Added dep1", - "Commit SHA": "abc1234" - } - -def test_get_dependency_size(): - with patch("requests.get", return_value=make_mock_response("45678")): - result = get_dependency_size( - "https://example.com/dep1.whl", - "abc1234", - datetime(2025, 4, 4).date(), - "auth", - "Fixed bug", - True - ) - assert result == { - "Size (Bytes)": 45678, - "Date": datetime(2025, 4, 4).date(), - "Author": "auth", - "Commit Message": "Fixed bug", - "Commit SHA": "abc1234" + "Commit SHA": "abc1234", } @@ -210,7 +208,7 @@ def mock_timeline_gitrepo(): mock_git_repo.get_module_commits.return_value = ["commit1", "commit2"] mock_git_repo.get_creation_commit_module.return_value = "commit1" mock_git_repo.get_commit_metadata.side_effect = lambda c: ("Apr 4 2025", "Initial commit", c) - + with ( patch("ddev.cli.size.timeline.GitRepo.__enter__", return_value=mock_git_repo), patch("ddev.cli.size.timeline.GitRepo.__exit__", return_value=None), @@ -226,23 +224,23 @@ def mock_timeline_gitrepo(): patch("os.path.exists", return_value=True), patch("os.path.isdir", return_value=True), patch("os.path.isfile", return_value=True), - patch("os.listdir", return_value=[ - "linux-x86_64_3.12_dep1.whl", - "linux-x86_64_3.12_dep2.whl" - ]), + patch("os.listdir", return_value=["linux-x86_64_3.12_dep1.whl", "linux-x86_64_3.12_dep2.whl"]), ): yield + @pytest.fixture def app(): mock_app = MagicMock() mock_app.repo.path = "/tmp/fake_repo" return mock_app + def test_timeline_integration_compressed(ddev, mock_timeline_gitrepo, app): result = ddev("size", "timeline", "integration", "int1", "commit1", "commit2", "--compressed", obj=app) assert result.exit_code == 0 + @pytest.fixture def mock_timeline_dependencies(): mock_git_repo = MagicMock() @@ -254,7 +252,11 @@ def mock_timeline_dependencies(): patch("ddev.cli.size.timeline.GitRepo.__enter__", return_value=mock_git_repo), patch("ddev.cli.size.timeline.GitRepo.__exit__", return_value=None), patch("ddev.cli.size.timeline.GitRepo.sparse_checkout_commit"), - patch("ddev.cli.size.timeline.valid_platforms_versions", return_value=({'linux-x86_64', 'macos-x86_64', 'linux-aarch64', 'windows-x86_64'}, {'3.12'})), + patch( + "ddev.cli.size.timeline.valid_platforms_versions", + return_value=({'linux-x86_64', 'macos-x86_64', 'linux-aarch64', 'windows-x86_64'}, {'3.12'}), + ), + patch("ddev.cli.size.timeline.get_dependency_list", return_value={"dep1"}), patch("os.path.exists", return_value=True), patch("os.path.isdir", return_value=True), patch("os.listdir", return_value=["linux-x86_64-3.12"]), @@ -273,10 +275,18 @@ def mock_timeline_dependencies(): yield + def test_timeline_dependency_compressed(ddev, mock_timeline_dependencies, app): result = ddev( - "size", "timeline", "dependency", "dep1", "commit1", "commit2", - "--compressed", "--platform", "linux-x86_64", + "size", + "timeline", + "dependency", + "dep1", + "commit1", + "commit2", + "--compressed", + "--platform", + "linux-x86_64", obj=app, ) @@ -284,28 +294,133 @@ def test_timeline_dependency_compressed(ddev, mock_timeline_dependencies, app): def test_timeline_invalid_platform(ddev): + mock_git_repo = MagicMock() + mock_git_repo.repo_dir = "/tmp/fake_repo" + mock_git_repo.get_module_commits.return_value = ["commit1", "commit2"] + mock_git_repo.get_commit_metadata.side_effect = lambda c: ("Apr 4 2025", "Fix dep", c) + patch( + "ddev.cli.size.timeline.valid_platforms_versions", + return_value=({'linux-x86_64', 'macos-x86_64', 'linux-aarch64', 'windows-x86_64'}, {'3.12'}), + ), result = ddev( - "size", "timeline", "dependency", "dep1", "commit1", "commit2", - "--compressed", "--platform", "invalid-platform" + "size", "timeline", "dependency", "dep1", "commit1", "commit2", "--compressed", "--platform", "invalid-platform" ) assert result.exit_code != 0 - - def test_timeline_no_changes_in_integration(ddev): mock_git_repo = MagicMock() mock_git_repo.repo_dir = "/tmp/fake_repo" mock_git_repo.get_module_commits.return_value = [""] - + with ( patch("ddev.cli.size.timeline.GitRepo.__enter__", return_value=mock_git_repo), patch("ddev.cli.size.timeline.GitRepo.__exit__", return_value=None), patch("os.path.exists", return_value=True), patch("os.path.isdir", return_value=True), patch("os.listdir", return_value=[]), - ): result = ddev("size", "timeline", "integration", "integration/foo", "commit1", "commit2", "--compressed") assert result.exit_code != 0 assert "No changes found" in result.output + + +def test_timeline_integration_not_found(ddev): + mock_repo = MagicMock() + mock_repo.repo_dir = "/fake" + mock_repo.get_module_commits.return_value = [""] + mock_repo.get_creation_commit_module.return_value = "c1" + mock_repo.checkout_commit.return_value = None + + with ( + patch("ddev.cli.size.timeline.GitRepo.__enter__", return_value=mock_repo), + patch("ddev.cli.size.timeline.GitRepo.__exit__", return_value=None), + patch( + "ddev.cli.size.timeline.valid_platforms_versions", + return_value=({'linux-x86_64', 'macos-x86_64', 'linux-aarch64', 'windows-x86_64'}, {'3.12'}), + ), + patch("ddev.cli.size.timeline.module_exists", return_value=False), + ): + result = ddev("size", "timeline", "integration", "missing_module", "c1", "c2") + assert result.exit_code != 0 + assert "not found" in result.output + + +def test_timeline_dependency_missing_no_platform(ddev): + mock_repo = MagicMock() + mock_repo.repo_dir = "/fake" + mock_repo.get_module_commits.return_value = ["c1"] + mock_repo.get_creation_commit_module.return_value = "c1" + mock_repo.checkout_commit.return_value = None + + with ( + patch("ddev.cli.size.timeline.GitRepo.__enter__", return_value=mock_repo), + patch("ddev.cli.size.timeline.GitRepo.__exit__", return_value=None), + patch("ddev.cli.size.timeline.valid_platforms_versions", return_value=({"linux-x86_64"}, {"3.12"})), + patch("ddev.cli.size.timeline.get_dependency_list", return_value=set()), + ): + result = ddev("size", "timeline", "dependency", "missing_module", "c1", "c2") + assert result.exit_code != 0 + assert "Dependency missing_module not found in latest commit" in result.output + + +def test_timeline_dependency_missing_for_platform(ddev, app): + mock_repo = MagicMock() + mock_repo.repo_dir = "/fake" + mock_repo.get_module_commits.return_value = ["c1"] + mock_repo.get_creation_commit_module.return_value = "c1" + mock_repo.checkout_commit.return_value = None + + with ( + patch("ddev.cli.size.timeline.GitRepo.__enter__", return_value=mock_repo), + patch("ddev.cli.size.timeline.GitRepo.__exit__", return_value=None), + patch("ddev.cli.size.timeline.valid_platforms_versions", return_value=({"linux-x86_64"}, {"3.12"})), + patch("ddev.cli.size.timeline.get_dependency_list", return_value=set()), + ): + + result = ddev( + "size", + "timeline", + "dependency", + "missing_module", + "c1", + "c2", + "--platform", + "linux-x86_64", + ) + + assert result.exit_code != 0 + assert ( + "Dependency missing_module not found in latest commit for the platform linux-x86_64, is the name correct?" + in result.output + ) + + +def test_timeline_dependency_no_changes(ddev, app): + mock_repo = MagicMock() + mock_repo.repo_dir = "/fake" + mock_repo.get_module_commits.return_value = [""] + mock_repo.get_creation_commit_module.return_value = "c1" + mock_repo.checkout_commit.return_value = None + + with ( + patch("ddev.cli.size.timeline.GitRepo.__enter__", return_value=mock_repo), + patch("ddev.cli.size.timeline.GitRepo.__exit__", return_value=None), + patch("ddev.cli.size.timeline.valid_platforms_versions", return_value=({"linux-x86_64"}, {"3.12"})), + patch("ddev.cli.size.timeline.get_dependency_list", return_value={"dep1"}), + ): + + result = ddev( + "size", + "timeline", + "dependency", + "dep1", + "c1", + "c2", + "--platform", + "linux-x86_64", + obj=app, + ) + + assert result.exit_code != 0 + assert "no changes found" in result.output.lower() From 0f7b09c37d8691b111d52c4fade755ebdfc8c5fd Mon Sep 17 00:00:00 2001 From: Lucia Sanchez Bella Date: Mon, 21 Apr 2025 10:14:09 +0200 Subject: [PATCH 15/70] change --help --- ddev/src/ddev/cli/size/__init__.py | 8 +------- 1 file changed, 1 insertion(+), 7 deletions(-) diff --git a/ddev/src/ddev/cli/size/__init__.py b/ddev/src/ddev/cli/size/__init__.py index df20d8628ba2f..63ddba4fa4af8 100644 --- a/ddev/src/ddev/cli/size/__init__.py +++ b/ddev/src/ddev/cli/size/__init__.py @@ -14,15 +14,9 @@ def size(): """ Analyze the download size of integrations and dependencies in various modes. - This command provides tools to inspect, compare, and monitor size changes of modules + This command provides tools to inspect the current status, compare commits and monitor size changes of modules across different commits, platforms, and Python versions. - Available subcommands: - • status Show the current sizes of all modules - • diff Compare sizes between two commits - • timeline Show the size evolution of a module over time - - Use `ddev size --help` for more details on each mode. """ pass From 7064fd9ad7e0f43dfe531dff4899ee69b1d7ce9d Mon Sep 17 00:00:00 2001 From: Lucia Sanchez Bella Date: Mon, 21 Apr 2025 12:18:55 +0200 Subject: [PATCH 16/70] fix --- .github/workflows/measure-disk-usage.yml | 44 ++++++++++++++++++++ ddev/src/ddev/cli/size/common.py | 16 ++++++-- ddev/src/ddev/cli/size/diff.py | 51 +++++++++++++++--------- ddev/src/ddev/cli/size/status.py | 2 +- ddev/src/ddev/cli/size/timeline.py | 5 ++- 5 files changed, 93 insertions(+), 25 deletions(-) diff --git a/.github/workflows/measure-disk-usage.yml b/.github/workflows/measure-disk-usage.yml index 31f78b1316415..181cdfabd5880 100644 --- a/.github/workflows/measure-disk-usage.yml +++ b/.github/workflows/measure-disk-usage.yml @@ -45,15 +45,59 @@ jobs: echo '```' >> $GITHUB_STEP_SUMMARY cat size-compressed.txt >> $GITHUB_STEP_SUMMARY echo '```' >> $GITHUB_STEP_SUMMARY + + - name: Measure disk usage differences from last commit (uncompressed) + if: false # Disabled: size difference is not accurate due to dependency sizes not updated + run: | + BEFORE=$(git rev-parse HEAD^) + AFTER=$(git rev-parse HEAD) + ddev size diff $BEFORE $AFTER --csv > diff-uncompressed.csv + ddev size diff $BEFORE $AFTER > diff-uncompressed.txt + cat diff-uncompressed.txt + echo "# Size diff (uncompressed)" >> $GITHUB_STEP_SUMMARY + echo '```' >> $GITHUB_STEP_SUMMARY + cat diff-uncompressed.txt >> $GITHUB_STEP_SUMMARY + echo '```' >> $GITHUB_STEP_SUMMARY + + - name: Measure disk usage differences from last commit (compressed) + if: false # Disabled: size difference is not accurate due to dependency sizes not updated + run: | + BEFORE=$(git rev-parse HEAD^) + AFTER=$(git rev-parse HEAD) + ddev size diff $BEFORE $AFTER --compressed --csv > diff-compressed.csv + ddev size diff $BEFORE $AFTER --compressed > diff-compressed.txt + cat diff-compressed.txt + echo "# Size diff (compressed)" >> $GITHUB_STEP_SUMMARY + echo '```' >> $GITHUB_STEP_SUMMARY + cat diff-compressed.txt >> $GITHUB_STEP_SUMMARY + echo '```' >> $GITHUB_STEP_SUMMARY + + - name: Upload file sizes (uncompressed) uses: actions/upload-artifact@v4 with: name: size-uncompressed.csv path: size-uncompressed.csv if-no-files-found: error + - name: Upload file sizes (compressed) uses: actions/upload-artifact@v4 with: name: size-compressed.csv path: size-compressed.csv if-no-files-found: error + + - name: Upload file sizes diff (uncompressed) + if: false # Disabled: size difference is not accurate due to dependency sizes not updated + uses: actions/upload-artifact@v4 + with: + name: diff-uncompressed.csv + path: diff-uncompressed.csv + if-no-files-found: error + - name: Upload file sizes diff (compressed) + if: false # Disabled: size difference is not accurate due to dependency sizes not updated + uses: actions/upload-artifact@v4 + with: + name: diff-compressed.csv + path: diff-compressed.csv + if-no-files-found: error diff --git a/ddev/src/ddev/cli/size/common.py b/ddev/src/ddev/cli/size/common.py index 491ff024d4d48..7101401c72c7f 100644 --- a/ddev/src/ddev/cli/size/common.py +++ b/ddev/src/ddev/cli/size/common.py @@ -64,7 +64,8 @@ def print_csv(app: Application, i: Optional[int], modules: List[Dict[str, Union[ app.display(",".join(headers)) for row in modules: - app.display(",".join(format(str(row[h])) for h in headers)) + if any(str(value).strip() not in ("", "0") for value in row.values()): + app.display(",".join(format(str(row[h])) for h in headers)) def format(s: str) -> str: @@ -131,7 +132,17 @@ def group_modules( modules: List[Dict[str, Union[str, int]]], platform: str, version: str, i: Optional[int] ) -> List[Dict[str, Union[str, int]]]: grouped_aux = {} - + if modules == []: + return [ + { + 'Name': '', + 'Type': '', + 'Size (Bytes)': 0, + 'Size': '', + 'Platform': '', + 'Version': '', + } + ] for file in modules: key = (file['Name'], file['Type']) grouped_aux[key] = grouped_aux.get(key, 0) + file["Size (Bytes)"] @@ -234,7 +245,6 @@ def get_commit_metadata(self, commit: str) -> Tuple[str, str, str]: def get_creation_commit_module(self, module: str) -> str: return self._run(f'git log --reverse --format="%H" -- {module}')[0] - def __exit__( self, exception_type: Optional[Type[BaseException]], diff --git a/ddev/src/ddev/cli/size/diff.py b/ddev/src/ddev/cli/size/diff.py index bfad6bf61442a..15ea32a971a1b 100644 --- a/ddev/src/ddev/cli/size/diff.py +++ b/ddev/src/ddev/cli/size/diff.py @@ -42,7 +42,13 @@ @click.option('--csv', is_flag=True, help="Output in CSV format") @click.pass_obj def diff( - app: str, before: str, after: str, platform: Optional[str], version: Optional[str], compressed: bool, csv: bool + app: Application, + before: str, + after: str, + platform: Optional[str], + version: Optional[str], + compressed: bool, + csv: bool, ) -> None: """ Compare the size of integrations and dependencies between two commits. @@ -69,7 +75,7 @@ def diff( progress.remove_task(task) for i, (plat, ver) in enumerate([(p, v) for p in platforms for v in versions]): - diff_mode(app, gitRepo, before, after, plat, ver, compressed, csv, i,progress) + diff_mode(app, gitRepo, before, after, plat, ver, compressed, csv, i, progress) else: progress.remove_task(task) diff_mode(app, gitRepo, before, after, platform, version, compressed, csv, None, progress) @@ -91,18 +97,19 @@ def diff_mode( progress: Progress, ) -> None: files_b, dependencies_b, files_a, dependencies_a = get_repo_info( - gitRepo, platform, version, before, after, compressed,progress + gitRepo, platform, version, before, after, compressed, progress ) integrations = get_diff(files_b, files_a, 'Integration') dependencies = get_diff(dependencies_b, dependencies_a, 'Dependency') + if integrations + dependencies == [] and not csv: + app.display(f"No size differences were detected between the selected commits for {platform}.") + grouped_modules = group_modules(integrations + dependencies, platform, version, i) grouped_modules.sort(key=lambda x: abs(x['Size (Bytes)']), reverse=True) for module in grouped_modules: if module['Size (Bytes)'] > 0: module['Size'] = f"+{module['Size']}" - if grouped_modules == []: - app.display("No size differences were detected between the selected commits.") else: if csv: print_csv(app, i, grouped_modules) @@ -111,21 +118,27 @@ def diff_mode( def get_repo_info( - gitRepo: GitRepo, platform: str, version: str, before: str, after: str, compressed: bool,progress: Progress, + gitRepo: GitRepo, + platform: str, + version: str, + before: str, + after: str, + compressed: bool, + progress: Progress, ) -> Tuple[Dict[str, int], Dict[str, int], Dict[str, int], Dict[str, int]]: - repo = gitRepo.repo_dir - task = progress.add_task("[cyan]Calculating sizes for the first commit...", total=None) - gitRepo.checkout_commit(before) - files_b = get_files(repo, compressed) - dependencies_b = get_dependencies(repo, platform, version, compressed) - progress.remove_task(task) - - task = progress.add_task("[cyan]Calculating sizes for the second commit...", total=None) - gitRepo.checkout_commit(after) - files_a = get_files(repo, compressed) - dependencies_a = get_dependencies(repo, platform, version, compressed) - progress.remove_task(task) - + with progress: + repo = gitRepo.repo_dir + task = progress.add_task("[cyan]Calculating sizes for the first commit...", total=None) + gitRepo.checkout_commit(before) + files_b = get_files(repo, compressed) + dependencies_b = get_dependencies(repo, platform, version, compressed) + progress.remove_task(task) + + task = progress.add_task("[cyan]Calculating sizes for the second commit...", total=None) + gitRepo.checkout_commit(after) + files_a = get_files(repo, compressed) + dependencies_a = get_dependencies(repo, platform, version, compressed) + progress.remove_task(task) return files_b, dependencies_b, files_a, dependencies_a diff --git a/ddev/src/ddev/cli/size/status.py b/ddev/src/ddev/cli/size/status.py index 66efd243b317b..f8877b1c9cbf9 100644 --- a/ddev/src/ddev/cli/size/status.py +++ b/ddev/src/ddev/cli/size/status.py @@ -69,7 +69,7 @@ def status_mode(app: Application, platform: str, version: str, compressed: bool, if csv: print_csv(app, i, grouped_modules) else: - print_table(app, "STATUS", grouped_modules) + print_table(app, "Status", grouped_modules) def get_files(compressed: bool) -> List[Dict[str, Union[str, int]]]: diff --git a/ddev/src/ddev/cli/size/timeline.py b/ddev/src/ddev/cli/size/timeline.py index 6cb1b5c3a8775..f41acf71b112c 100644 --- a/ddev/src/ddev/cli/size/timeline.py +++ b/ddev/src/ddev/cli/size/timeline.py @@ -33,7 +33,7 @@ @click.command() @click.argument('type', type=click.Choice(['integration', 'dependency'])) -@click.argument('module') +@click.argument('name') @click.argument('initial', required=False) @click.argument('final', required=False) @click.option( @@ -52,7 +52,7 @@ def timeline( app: Application, type: str, - module: str, + name: str, initial: Optional[str], final: Optional[str], time: Optional[str], @@ -71,6 +71,7 @@ def timeline( TimeElapsedColumn(), transient=True, ) as progress: + module = name # module is the name of the integration or the dependency task = progress.add_task("[cyan]Calculating timeline...", total=None) url = app.repo.path with GitRepo(url) as gitRepo: From 8cc771027ae4eaf2908d1b4015e9a4f8c8ba4a3b Mon Sep 17 00:00:00 2001 From: Lucia Sanchez Bella Date: Mon, 21 Apr 2025 14:33:25 +0200 Subject: [PATCH 17/70] fix typing --- ddev/src/ddev/cli/size/common.py | 21 +++++++++++++-------- ddev/src/ddev/cli/size/timeline.py | 30 ++++++++++++++++++------------ 2 files changed, 31 insertions(+), 20 deletions(-) diff --git a/ddev/src/ddev/cli/size/common.py b/ddev/src/ddev/cli/size/common.py index 7101401c72c7f..5c384a71dceea 100644 --- a/ddev/src/ddev/cli/size/common.py +++ b/ddev/src/ddev/cli/size/common.py @@ -8,6 +8,7 @@ import tempfile import zipfile import zlib +from datetime import date from pathlib import Path from types import TracebackType from typing import Dict, List, Optional, Set, Tuple, Type, Union @@ -29,7 +30,7 @@ def valid_platforms_versions(repo_path: str) -> Tuple[Set[str], Set[str]]: return set(platforms), set(versions) -def convert_size(size_bytes: int) -> str: +def convert_size(size_bytes: float) -> str: for unit in [' B', ' KB', ' MB', ' GB']: if abs(size_bytes) < 1024: return str(round(size_bytes, 2)) + unit @@ -58,7 +59,7 @@ def is_correct_dependency(platform: str, version: str, name: str) -> bool: return platform in name and version in name -def print_csv(app: Application, i: Optional[int], modules: List[Dict[str, Union[str, int]]]) -> None: +def print_csv(app: Application, i: Optional[int], modules: List[Dict[str, Union[str, int, date]]]) -> None: headers = [k for k in modules[0].keys() if k not in ['Size', 'Delta']] if not i: app.display(",".join(headers)) @@ -72,8 +73,8 @@ def format(s: str) -> str: return f'"{s}"' if "," in s else s -def print_table(app: Application, mode: str, modules: List[Dict[str, Union[str, int]]]) -> None: - modules_table = {col: {} for col in modules[0].keys() if '(Bytes)' not in col} +def print_table(app: Application, mode: str, modules: List[Dict[str, Union[str, int, date]]]) -> None: + modules_table : Dict[str, Dict[str, Union[str, int]]] = {col: {} for col in modules[0].keys() if '(Bytes)' not in col} for i, row in enumerate(modules): for key, value in row.items(): if key in modules_table: @@ -89,7 +90,11 @@ def get_dependencies_sizes( if compressed: response = requests.head(url) response.raise_for_status() - size = int(response.headers.get("Content-Length")) + size_str = response.headers.get("Content-Length") + if size_str is None: + raise ValueError(f"Missing size for {dep}") + size = int(size_str) + else: with requests.get(url, stream=True) as response: response.raise_for_status() @@ -131,7 +136,6 @@ def get_dependencies_list(file_path: str) -> Tuple[List[str], List[str]]: def group_modules( modules: List[Dict[str, Union[str, int]]], platform: str, version: str, i: Optional[int] ) -> List[Dict[str, Union[str, int]]]: - grouped_aux = {} if modules == []: return [ { @@ -143,6 +147,7 @@ def group_modules( 'Version': '', } ] + grouped_aux : Dict[tuple[str, str], int] = {} for file in modules: key = (file['Name'], file['Type']) grouped_aux[key] = grouped_aux.get(key, 0) + file["Size (Bytes)"] @@ -193,9 +198,9 @@ def __init__(self, mensaje: str) -> None: class GitRepo: - def __init__(self, url: str) -> None: + def __init__(self, url: Union[Path,str]) -> None: self.url = url - self.repo_dir = None + self.repo_dir : str def __enter__(self): self.repo_dir = tempfile.mkdtemp() diff --git a/ddev/src/ddev/cli/size/timeline.py b/ddev/src/ddev/cli/size/timeline.py index f41acf71b112c..c17e30e451a44 100644 --- a/ddev/src/ddev/cli/size/timeline.py +++ b/ddev/src/ddev/cli/size/timeline.py @@ -4,7 +4,7 @@ import zipfile from datetime import date, datetime from pathlib import Path -from typing import Dict, List, Optional, Set, Tuple, Union +from typing import Dict, List, Optional, Set, Tuple, Union, cast import click import requests @@ -91,7 +91,7 @@ def timeline( elif ( type == 'dependency' and platform - and module not in get_dependency_list(gitRepo.repo_dir, [platform]) + and module not in get_dependency_list(gitRepo.repo_dir, {platform}) ): raise ValueError( f"Dependency {module} not found in latest commit for the platform {platform}, " @@ -193,8 +193,8 @@ def process_commits( folder = module if type == 'integration' else '.deps/resolved' for commit in commits: gitRepo.sparse_checkout_commit(commit, folder) - date, author, message = gitRepo.get_commit_metadata(commit) - date, message, commit = format_commit_data(date, message, commit, first_commit) + date_str, author, message = gitRepo.get_commit_metadata(commit) + date, message, commit = format_commit_data(date_str, message, commit, first_commit) if type == 'dependency' and date < MINIMUM_DATE: continue elif type == 'dependency': @@ -257,7 +257,7 @@ def get_files( def get_dependencies( repo_path: str, module: str, - platform: Optional[str], + platform: str, commit: str, date: date, author: str, @@ -274,6 +274,7 @@ def get_dependencies( return ( get_dependency_size(download_url, commit, date, author, message, compressed) if download_url else None ) + return None def get_dependency(file_path: str, module: str) -> Optional[str]: @@ -295,7 +296,10 @@ def get_dependency_size( if compressed: response = requests.head(download_url) response.raise_for_status() - size = int(response.headers.get("Content-Length")) + size_str = response.headers.get("Content-Length") + if size_str is None: + raise ValueError(f"Missing size for commit {commit}") + size = int(size_str) else: with requests.get(download_url, stream=True) as response: response.raise_for_status() @@ -318,14 +322,14 @@ def get_dependency_size( return {"Size (Bytes)": size, "Date": date, "Author": author, "Commit Message": message, "Commit SHA": commit} -def get_version(files: List[str], platform: Optional[str]) -> str: +def get_version(files: List[str], platform: str) -> str: final_version = '' for file in files: if platform in file: - version = file.split('_')[-1] - match = re.search(r"\d+(?:\.\d+)?", version) + curr_version = file.split('_')[-1] + match = re.search(r"\d+(?:\.\d+)?", curr_version) version = match.group(0) if match else None - if version > final_version: + if version and version > final_version: final_version = version return final_version if len(final_version) != 1 else 'py' + final_version @@ -333,7 +337,7 @@ def get_version(files: List[str], platform: Optional[str]) -> str: def group_modules( modules: List[Dict[str, Union[str, int, date]]], platform: Optional[str], i: Optional[int] ) -> List[Dict[str, Union[str, int, date]]]: - grouped_aux = {} + grouped_aux : Dict[tuple[date, str, str, str], int] = {} for file in modules: key = (file['Date'], file['Author'], file['Commit Message'], file['Commit SHA']) @@ -353,6 +357,7 @@ def group_modules( for (date, author, message, commit), size in grouped_aux.items() ] else: + assert platform is not None return [ { "Commit SHA": commit, @@ -380,7 +385,8 @@ def trim_modules( for i in range(1, len(modules)): prev = modules[i - 1] curr = modules[i] - delta = curr['Size (Bytes)'] - prev['Size (Bytes)'] + delta = cast(int, curr['Size (Bytes)']) - cast(int, prev['Size (Bytes)']) + if abs(delta) > threshold_value or i == len(modules) - 1: curr['Delta (Bytes)'] = delta curr['Delta'] = convert_size(delta) From f1711ccdfb7921ba33ae38495d869e1a6f8adcf4 Mon Sep 17 00:00:00 2001 From: Lucia Sanchez Bella Date: Mon, 21 Apr 2025 14:39:34 +0200 Subject: [PATCH 18/70] fix typing --- ddev/src/ddev/cli/size/common.py | 10 ++++++---- ddev/src/ddev/cli/size/timeline.py | 2 +- 2 files changed, 7 insertions(+), 5 deletions(-) diff --git a/ddev/src/ddev/cli/size/common.py b/ddev/src/ddev/cli/size/common.py index 5c384a71dceea..68cc172689676 100644 --- a/ddev/src/ddev/cli/size/common.py +++ b/ddev/src/ddev/cli/size/common.py @@ -74,7 +74,9 @@ def format(s: str) -> str: def print_table(app: Application, mode: str, modules: List[Dict[str, Union[str, int, date]]]) -> None: - modules_table : Dict[str, Dict[str, Union[str, int]]] = {col: {} for col in modules[0].keys() if '(Bytes)' not in col} + modules_table: Dict[str, Dict[str, Union[str, int]]] = { + col: {} for col in modules[0].keys() if '(Bytes)' not in col + } for i, row in enumerate(modules): for key, value in row.items(): if key in modules_table: @@ -147,7 +149,7 @@ def group_modules( 'Version': '', } ] - grouped_aux : Dict[tuple[str, str], int] = {} + grouped_aux: Dict[tuple[str, str], int] = {} for file in modules: key = (file['Name'], file['Type']) grouped_aux[key] = grouped_aux.get(key, 0) + file["Size (Bytes)"] @@ -198,9 +200,9 @@ def __init__(self, mensaje: str) -> None: class GitRepo: - def __init__(self, url: Union[Path,str]) -> None: + def __init__(self, url: Union[Path, str]) -> None: self.url = url - self.repo_dir : str + self.repo_dir: str def __enter__(self): self.repo_dir = tempfile.mkdtemp() diff --git a/ddev/src/ddev/cli/size/timeline.py b/ddev/src/ddev/cli/size/timeline.py index c17e30e451a44..dae017f410a69 100644 --- a/ddev/src/ddev/cli/size/timeline.py +++ b/ddev/src/ddev/cli/size/timeline.py @@ -337,7 +337,7 @@ def get_version(files: List[str], platform: str) -> str: def group_modules( modules: List[Dict[str, Union[str, int, date]]], platform: Optional[str], i: Optional[int] ) -> List[Dict[str, Union[str, int, date]]]: - grouped_aux : Dict[tuple[date, str, str, str], int] = {} + grouped_aux: Dict[tuple[date, str, str, str], int] = {} for file in modules: key = (file['Date'], file['Author'], file['Commit Message'], file['Commit SHA']) From b33a1fde304144e6f6877510b2d8b109bc280039 Mon Sep 17 00:00:00 2001 From: Lucia Sanchez Bella Date: Mon, 21 Apr 2025 15:28:55 +0200 Subject: [PATCH 19/70] fix types --- ddev/src/ddev/cli/size/common.py | 16 +++--- ddev/src/ddev/cli/size/diff.py | 5 +- ddev/src/ddev/cli/size/status.py | 1 + ddev/src/ddev/cli/size/timeline.py | 12 +++-- ddev/tests/cli/size/test_diff.py | 73 ++++++++++++++++------------ ddev/tests/cli/size/test_status.py | 24 ++++----- ddev/tests/cli/size/test_timeline.py | 29 ++++++++--- 7 files changed, 98 insertions(+), 62 deletions(-) diff --git a/ddev/src/ddev/cli/size/common.py b/ddev/src/ddev/cli/size/common.py index 68cc172689676..f9c70d4747938 100644 --- a/ddev/src/ddev/cli/size/common.py +++ b/ddev/src/ddev/cli/size/common.py @@ -18,7 +18,7 @@ from ddev.cli.application import Application -def valid_platforms_versions(repo_path: str) -> Tuple[Set[str], Set[str]]: +def valid_platforms_versions(repo_path: Union[Path, str]) -> Tuple[Set[str], Set[str]]: resolved_path = os.path.join(repo_path, ".deps/resolved") platforms = [] versions = [] @@ -74,9 +74,7 @@ def format(s: str) -> str: def print_table(app: Application, mode: str, modules: List[Dict[str, Union[str, int, date]]]) -> None: - modules_table: Dict[str, Dict[str, Union[str, int]]] = { - col: {} for col in modules[0].keys() if '(Bytes)' not in col - } + modules_table: Dict[str, Dict[int, str]] = {col: {} for col in modules[0].keys() if '(Bytes)' not in col} for i, row in enumerate(modules): for key, value in row.items(): if key in modules_table: @@ -115,7 +113,7 @@ def get_dependencies_sizes( for name in filenames: file_path = os.path.join(dirpath, name) size += os.path.getsize(file_path) - file_data.append({"File Path": dep, "Type": "Dependency", "Name": dep, "Size (Bytes)": int(size)}) + file_data.append({"File Path": str(dep), "Type": "Dependency", "Name": str(dep), "Size (Bytes)": int(size)}) return file_data @@ -137,7 +135,7 @@ def get_dependencies_list(file_path: str) -> Tuple[List[str], List[str]]: def group_modules( modules: List[Dict[str, Union[str, int]]], platform: str, version: str, i: Optional[int] -) -> List[Dict[str, Union[str, int]]]: +) -> List[Dict[str, Union[str, int, date]]]: if modules == []: return [ { @@ -151,8 +149,8 @@ def group_modules( ] grouped_aux: Dict[tuple[str, str], int] = {} for file in modules: - key = (file['Name'], file['Type']) - grouped_aux[key] = grouped_aux.get(key, 0) + file["Size (Bytes)"] + key = (str(file['Name']), str(file['Type'])) + grouped_aux[key] = grouped_aux.get(key, 0) + int(file["Size (Bytes)"]) if i is None: return [ {'Name': name, 'Type': type, 'Size (Bytes)': size, 'Size': convert_size(size)} @@ -172,7 +170,7 @@ def group_modules( ] -def get_gitignore_files(repo_path: str) -> List[str]: +def get_gitignore_files(repo_path: Union[str, Path]) -> List[str]: gitignore_path = os.path.join(repo_path, ".gitignore") with open(gitignore_path, "r", encoding="utf-8") as file: gitignore_content = file.read() diff --git a/ddev/src/ddev/cli/size/diff.py b/ddev/src/ddev/cli/size/diff.py index 15ea32a971a1b..e0040e1e94b68 100644 --- a/ddev/src/ddev/cli/size/diff.py +++ b/ddev/src/ddev/cli/size/diff.py @@ -224,7 +224,10 @@ def get_dependencies_sizes(deps: List[str], download_urls: List[str], compressed if compressed: response = requests.head(url) response.raise_for_status() - size = int(response.headers.get("Content-Length")) + size_str = response.headers.get("Content-Length") + if size_str is None: + raise ValueError(f"Missing size for {dep}") + size = int(size_str) else: with requests.get(url, stream=True) as response: response.raise_for_status() diff --git a/ddev/src/ddev/cli/size/status.py b/ddev/src/ddev/cli/size/status.py index f8877b1c9cbf9..0d0b1126aff08 100644 --- a/ddev/src/ddev/cli/size/status.py +++ b/ddev/src/ddev/cli/size/status.py @@ -109,3 +109,4 @@ def get_dependencies(platform: str, version: str, compressed: bool) -> List[Dict if os.path.isfile(file_path) and is_correct_dependency(platform, version, filename): deps, download_urls = get_dependencies_list(file_path) return get_dependencies_sizes(deps, download_urls, compressed) + return {} diff --git a/ddev/src/ddev/cli/size/timeline.py b/ddev/src/ddev/cli/size/timeline.py index dae017f410a69..d268d44c05109 100644 --- a/ddev/src/ddev/cli/size/timeline.py +++ b/ddev/src/ddev/cli/size/timeline.py @@ -198,6 +198,7 @@ def process_commits( if type == 'dependency' and date < MINIMUM_DATE: continue elif type == 'dependency': + assert platform is not None result = get_dependencies(repo, module, platform, commit, date, author, message, compressed) if result: file_data.append(result) @@ -340,8 +341,13 @@ def group_modules( grouped_aux: Dict[tuple[date, str, str, str], int] = {} for file in modules: - key = (file['Date'], file['Author'], file['Commit Message'], file['Commit SHA']) - grouped_aux[key] = grouped_aux.get(key, 0) + file["Size (Bytes)"] + key = ( + cast(date, file['Date']), + cast(str, file['Author']), + cast(str, file['Commit Message']), + cast(str, file['Commit SHA']), + ) + grouped_aux[key] = grouped_aux.get(key, 0) + cast(int, file["Size (Bytes)"]) if i is None: return [ { @@ -407,7 +413,7 @@ def module_exists(path: str, module: str) -> bool: return os.path.exists(os.path.join(path, module)) -def get_dependency_list(path: str, platforms: List[str]) -> Set[str]: +def get_dependency_list(path: str, platforms: Set[str]) -> Set[str]: resolved_path = os.path.join(path, ".deps/resolved") all_files = os.listdir(resolved_path) dependencies = set() diff --git a/ddev/tests/cli/size/test_diff.py b/ddev/tests/cli/size/test_diff.py index ba1d0a826ad63..ab2dad24a32de 100644 --- a/ddev/tests/cli/size/test_diff.py +++ b/ddev/tests/cli/size/test_diff.py @@ -231,14 +231,18 @@ def test_diff_invalid_platform(ddev): mock_git_repo.repo_dir = "/tmp/fake_repo" mock_git_repo.get_module_commits.return_value = ["commit1", "commit2"] mock_git_repo.get_commit_metadata.side_effect = lambda c: ("Apr 4 2025", "Fix dep", c) - patch( - "ddev.cli.size.timeline.valid_platforms_versions", - return_value=({'linux-x86_64', 'macos-x86_64', 'linux-aarch64', 'windows-x86_64'}, {'3.12'}), - ), - result = ddev( - 'size', 'diff', 'commit1', 'commit2', '--platform', 'linux', '--python', '3.12', '--compressed' # inválido - ) - assert result.exit_code != 0 + mock_git_repo.__enter__.return_value = mock_git_repo + with ( + patch("ddev.cli.size.diff.GitRepo", return_value=mock_git_repo), + patch( + "ddev.cli.size.timeline.valid_platforms_versions", + return_value=({'linux-x86_64', 'macos-x86_64', 'linux-aarch64', 'windows-x86_64'}, {'3.12'}), + ), + ): + result = ddev( + 'size', 'diff', 'commit1', 'commit2', '--platform', 'linux', '--python', '3.12', '--compressed' # inválido + ) + assert result.exit_code != 0 def test_diff_invalid_version(ddev): @@ -246,22 +250,27 @@ def test_diff_invalid_version(ddev): mock_git_repo.repo_dir = "/tmp/fake_repo" mock_git_repo.get_module_commits.return_value = ["commit1", "commit2"] mock_git_repo.get_commit_metadata.side_effect = lambda c: ("Apr 4 2025", "Fix dep", c) - patch( - "ddev.cli.size.timeline.valid_platforms_versions", - return_value=({'linux-x86_64', 'macos-x86_64', 'linux-aarch64', 'windows-x86_64'}, {'3.12'}), - ), - result = ddev( - 'size', - 'diff', - 'commit1', - 'commit2', - '--platform', - 'linux-aarch64', - '--python', - '2.10', # inválido - '--compressed', - ) - assert result.exit_code != 0 + mock_git_repo.__enter__.return_value = mock_git_repo + + with ( + patch("ddev.cli.size.diff.GitRepo", return_value=mock_git_repo), + patch( + "ddev.cli.size.timeline.valid_platforms_versions", + return_value=({'linux-x86_64', 'macos-x86_64', 'linux-aarch64', 'windows-x86_64'}, {'3.12'}), + ), + ): + result = ddev( + 'size', + 'diff', + 'commit1', + 'commit2', + '--platform', + 'linux-aarch64', + '--python', + '2.10', # invalid + '--compressed', + ) + assert result.exit_code != 0 def test_diff_invalid_platform_and_version(ddev): @@ -269,9 +278,13 @@ def test_diff_invalid_platform_and_version(ddev): mock_git_repo.repo_dir = "/tmp/fake_repo" mock_git_repo.get_module_commits.return_value = ["commit1", "commit2"] mock_git_repo.get_commit_metadata.side_effect = lambda c: ("Apr 4 2025", "Fix dep", c) - patch( - "ddev.cli.size.timeline.valid_platforms_versions", - return_value=({'linux-x86_64', 'macos-x86_64', 'linux-aarch64', 'windows-x86_64'}, {'3.12'}), - ), - result = ddev('size', 'diff', 'commit1', 'commit2', '--platform', 'linux', '--python', '2.10', '--compressed') - assert result.exit_code != 0 + mock_git_repo.__enter__.return_value = mock_git_repo + with ( + patch("ddev.cli.size.diff.GitRepo", return_value=mock_git_repo), + patch( + "ddev.cli.size.timeline.valid_platforms_versions", + return_value=({'linux-x86_64', 'macos-x86_64', 'linux-aarch64', 'windows-x86_64'}, {'3.12'}), + ), + ): + result = ddev('size', 'diff', 'commit1', 'commit2', '--platform', 'linux', '--python', '2.10', '--compressed') + assert result.exit_code != 0 diff --git a/ddev/tests/cli/size/test_status.py b/ddev/tests/cli/size/test_status.py index cce9193345de8..141b0b5529675 100644 --- a/ddev/tests/cli/size/test_status.py +++ b/ddev/tests/cli/size/test_status.py @@ -140,27 +140,27 @@ def test_status_csv(ddev, mock_size_status): def test_status_wrong_platform(ddev): - patch( + with patch( "ddev.cli.size.timeline.valid_platforms_versions", return_value=({'linux-x86_64', 'macos-x86_64', 'linux-aarch64', 'windows-x86_64'}, {'3.12'}), - ), - result = ddev('size', 'status', '--platform', 'linux', '--python', '3.12', '--compressed') - assert result.exit_code != 0 + ): + result = ddev('size', 'status', '--platform', 'linux', '--python', '3.12', '--compressed') + assert result.exit_code != 0 def test_status_wrong_version(ddev): - patch( + with patch( "ddev.cli.size.timeline.valid_platforms_versions", return_value=({'linux-x86_64', 'macos-x86_64', 'linux-aarch64', 'windows-x86_64'}, {'3.12'}), - ), - result = ddev('size', 'status', '--platform', 'linux-aarch64', '--python', '2.10', '--compressed') - assert result.exit_code != 0 + ): + result = ddev('size', 'status', '--platform', 'linux-aarch64', '--python', '2.10', '--compressed') + assert result.exit_code != 0 def test_status_wrong_plat_and_version(ddev): - patch( + with patch( "ddev.cli.size.timeline.valid_platforms_versions", return_value=({'linux-x86_64', 'macos-x86_64', 'linux-aarch64', 'windows-x86_64'}, {'3.12'}), - ), - result = ddev('size', 'status', '--platform', 'linux', '--python', '2.10', '--compressed') - assert result.exit_code != 0 + ): + result = ddev('size', 'status', '--platform', 'linux', '--python', '2.10', '--compressed') + assert result.exit_code != 0 diff --git a/ddev/tests/cli/size/test_timeline.py b/ddev/tests/cli/size/test_timeline.py index 6042f92f85d5a..a55c1e8851333 100644 --- a/ddev/tests/cli/size/test_timeline.py +++ b/ddev/tests/cli/size/test_timeline.py @@ -298,13 +298,28 @@ def test_timeline_invalid_platform(ddev): mock_git_repo.repo_dir = "/tmp/fake_repo" mock_git_repo.get_module_commits.return_value = ["commit1", "commit2"] mock_git_repo.get_commit_metadata.side_effect = lambda c: ("Apr 4 2025", "Fix dep", c) - patch( - "ddev.cli.size.timeline.valid_platforms_versions", - return_value=({'linux-x86_64', 'macos-x86_64', 'linux-aarch64', 'windows-x86_64'}, {'3.12'}), - ), - result = ddev( - "size", "timeline", "dependency", "dep1", "commit1", "commit2", "--compressed", "--platform", "invalid-platform" - ) + mock_git_repo.__enter__.return_value = mock_git_repo + + with ( + patch("ddev.cli.size.timeline.GitRepo", return_value=mock_git_repo), + patch( + "ddev.cli.size.timeline.valid_platforms_versions", + return_value=({'linux-x86_64', 'linux-aarch64', 'macos-x86_64'}, {'3.12'}), + ), + ): + + result = ddev( + "size", + "timeline", + "dependency", + "dep1", + "commit1", + "commit2", + "--compressed", + "--platform", + "invalid-platform", + ) + assert result.exit_code != 0 From f16938b15d0612a594f12ade5863e6cc87dc4c0b Mon Sep 17 00:00:00 2001 From: Lucia Sanchez Bella Date: Mon, 21 Apr 2025 15:40:33 +0200 Subject: [PATCH 20/70] fix types --- ddev/src/ddev/cli/size/common.py | 5 +++-- ddev/src/ddev/cli/size/diff.py | 8 ++++---- ddev/src/ddev/cli/size/status.py | 8 ++++---- 3 files changed, 11 insertions(+), 10 deletions(-) diff --git a/ddev/src/ddev/cli/size/common.py b/ddev/src/ddev/cli/size/common.py index f9c70d4747938..df36acd4ee195 100644 --- a/ddev/src/ddev/cli/size/common.py +++ b/ddev/src/ddev/cli/size/common.py @@ -11,7 +11,7 @@ from datetime import date from pathlib import Path from types import TracebackType -from typing import Dict, List, Optional, Set, Tuple, Type, Union +from typing import Dict, List, Optional, Set, Tuple, Type, Union, cast import requests @@ -114,7 +114,8 @@ def get_dependencies_sizes( file_path = os.path.join(dirpath, name) size += os.path.getsize(file_path) file_data.append({"File Path": str(dep), "Type": "Dependency", "Name": str(dep), "Size (Bytes)": int(size)}) - return file_data + return cast(List[Dict[str, Union[str, int]]], file_data) + def get_dependencies_list(file_path: str) -> Tuple[List[str], List[str]]: diff --git a/ddev/src/ddev/cli/size/diff.py b/ddev/src/ddev/cli/size/diff.py index e0040e1e94b68..64b13989ce07a 100644 --- a/ddev/src/ddev/cli/size/diff.py +++ b/ddev/src/ddev/cli/size/diff.py @@ -6,7 +6,7 @@ import tempfile import zipfile from pathlib import Path -from typing import Dict, List, Optional, Tuple +from typing import Dict, List, Optional, Tuple, cast import click import requests @@ -106,9 +106,9 @@ def diff_mode( app.display(f"No size differences were detected between the selected commits for {platform}.") grouped_modules = group_modules(integrations + dependencies, platform, version, i) - grouped_modules.sort(key=lambda x: abs(x['Size (Bytes)']), reverse=True) + grouped_modules.sort(key=lambda x: abs(int(x['Size (Bytes)'])), reverse=True) for module in grouped_modules: - if module['Size (Bytes)'] > 0: + if int(module['Size (Bytes)']) > 0: module['Size'] = f"+{module['Size']}" else: if csv: @@ -181,7 +181,7 @@ def get_diff(size_before: Dict[str, int], size_after: Dict[str, int], type: str) } ) - return diff_files + return cast(List[Dict[str, str | int]], diff_files) def get_files(repo_path: str, compressed: bool) -> Dict[str, int]: diff --git a/ddev/src/ddev/cli/size/status.py b/ddev/src/ddev/cli/size/status.py index 0d0b1126aff08..ac0b545c481c5 100644 --- a/ddev/src/ddev/cli/size/status.py +++ b/ddev/src/ddev/cli/size/status.py @@ -4,7 +4,7 @@ import os from pathlib import Path -from typing import Dict, List, Optional, Union +from typing import Dict, List, Optional, Union, cast import click from rich.console import Console @@ -95,10 +95,10 @@ def get_files(compressed: bool) -> List[Dict[str, Union[str, int]]]: "File Path": relative_path, "Type": "Integration", "Name": integration, - "Size (Bytes)": size, + "Size (Bytes)": int(size), } ) - return file_data + return cast(List[Dict[str, Union[str, int]]], file_data) def get_dependencies(platform: str, version: str, compressed: bool) -> List[Dict[str, Union[str, int]]]: @@ -109,4 +109,4 @@ def get_dependencies(platform: str, version: str, compressed: bool) -> List[Dict if os.path.isfile(file_path) and is_correct_dependency(platform, version, filename): deps, download_urls = get_dependencies_list(file_path) return get_dependencies_sizes(deps, download_urls, compressed) - return {} + return [] From 4d2c8e96cbef90727788265f1d2d7730c6853d76 Mon Sep 17 00:00:00 2001 From: Lucia Sanchez Bella Date: Mon, 21 Apr 2025 15:52:00 +0200 Subject: [PATCH 21/70] fix typing --- ddev/src/ddev/cli/size/diff.py | 1 - 1 file changed, 1 deletion(-) diff --git a/ddev/src/ddev/cli/size/diff.py b/ddev/src/ddev/cli/size/diff.py index 64b13989ce07a..37347609a4066 100644 --- a/ddev/src/ddev/cli/size/diff.py +++ b/ddev/src/ddev/cli/size/diff.py @@ -217,7 +217,6 @@ def get_dependencies(repo_path: str, platform: str, version: str, compressed: bo return get_dependencies_sizes(deps, download_urls, compressed) return {} - def get_dependencies_sizes(deps: List[str], download_urls: List[str], compressed: bool) -> Dict[str, int]: file_data = {} for dep, url in zip(deps, download_urls, strict=False): From 3d6a2288c7186f68388f73a29ddcf94be7a3797b Mon Sep 17 00:00:00 2001 From: Lucia Sanchez Bella Date: Mon, 21 Apr 2025 15:54:12 +0200 Subject: [PATCH 22/70] fix typing --- ddev/src/ddev/cli/size/common.py | 1 - ddev/src/ddev/cli/size/diff.py | 1 + 2 files changed, 1 insertion(+), 1 deletion(-) diff --git a/ddev/src/ddev/cli/size/common.py b/ddev/src/ddev/cli/size/common.py index df36acd4ee195..1c619c7d4e990 100644 --- a/ddev/src/ddev/cli/size/common.py +++ b/ddev/src/ddev/cli/size/common.py @@ -117,7 +117,6 @@ def get_dependencies_sizes( return cast(List[Dict[str, Union[str, int]]], file_data) - def get_dependencies_list(file_path: str) -> Tuple[List[str], List[str]]: download_urls = [] deps = [] diff --git a/ddev/src/ddev/cli/size/diff.py b/ddev/src/ddev/cli/size/diff.py index 37347609a4066..64b13989ce07a 100644 --- a/ddev/src/ddev/cli/size/diff.py +++ b/ddev/src/ddev/cli/size/diff.py @@ -217,6 +217,7 @@ def get_dependencies(repo_path: str, platform: str, version: str, compressed: bo return get_dependencies_sizes(deps, download_urls, compressed) return {} + def get_dependencies_sizes(deps: List[str], download_urls: List[str], compressed: bool) -> Dict[str, int]: file_data = {} for dep, url in zip(deps, download_urls, strict=False): From d97854beeb51ef8a51a786e1111dbf44eb1aa882 Mon Sep 17 00:00:00 2001 From: Lucia Sanchez Bella Date: Mon, 21 Apr 2025 16:55:17 +0200 Subject: [PATCH 23/70] fix typing --- ddev/src/ddev/cli/size/diff.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/ddev/src/ddev/cli/size/diff.py b/ddev/src/ddev/cli/size/diff.py index 64b13989ce07a..83d35a97326fa 100644 --- a/ddev/src/ddev/cli/size/diff.py +++ b/ddev/src/ddev/cli/size/diff.py @@ -106,9 +106,9 @@ def diff_mode( app.display(f"No size differences were detected between the selected commits for {platform}.") grouped_modules = group_modules(integrations + dependencies, platform, version, i) - grouped_modules.sort(key=lambda x: abs(int(x['Size (Bytes)'])), reverse=True) + grouped_modules.sort(key=lambda x: abs(cast(int, x['Size (Bytes)'])), reverse=True) for module in grouped_modules: - if int(module['Size (Bytes)']) > 0: + if cast(int,module['Size (Bytes)']) > 0: module['Size'] = f"+{module['Size']}" else: if csv: From f71309711e588dd373d3c165889521164e1049eb Mon Sep 17 00:00:00 2001 From: Lucia Sanchez Bella Date: Mon, 21 Apr 2025 16:58:42 +0200 Subject: [PATCH 24/70] fix typing --- ddev/src/ddev/cli/size/diff.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/ddev/src/ddev/cli/size/diff.py b/ddev/src/ddev/cli/size/diff.py index 83d35a97326fa..12e57c313aec4 100644 --- a/ddev/src/ddev/cli/size/diff.py +++ b/ddev/src/ddev/cli/size/diff.py @@ -108,7 +108,7 @@ def diff_mode( grouped_modules = group_modules(integrations + dependencies, platform, version, i) grouped_modules.sort(key=lambda x: abs(cast(int, x['Size (Bytes)'])), reverse=True) for module in grouped_modules: - if cast(int,module['Size (Bytes)']) > 0: + if cast(int, module['Size (Bytes)']) > 0: module['Size'] = f"+{module['Size']}" else: if csv: From 0d750b332df6f5ebee1e38b8d77230fb345d2310 Mon Sep 17 00:00:00 2001 From: Lucia Sanchez Bella Date: Tue, 22 Apr 2025 09:36:37 +0200 Subject: [PATCH 25/70] fix tests for Windows --- ddev/tests/cli/size/test_status.py | 20 ++++++++++++++------ 1 file changed, 14 insertions(+), 6 deletions(-) diff --git a/ddev/tests/cli/size/test_status.py b/ddev/tests/cli/size/test_status.py index 141b0b5529675..ba7e943c59b82 100644 --- a/ddev/tests/cli/size/test_status.py +++ b/ddev/tests/cli/size/test_status.py @@ -2,6 +2,7 @@ # All rights reserved # Licensed under a 3-clause BSD style license (see LICENSE) +import os from unittest.mock import MagicMock, mock_open, patch import pytest @@ -12,10 +13,14 @@ ) +def to_native_path(path: str) -> str: + return path.replace("/", os.sep) + + def test_get_files_compressed(): mock_files = [ - ("root/integration/datadog_checks", [], ["file1.py", "file2.py"]), - ("root/integration_b/datadog_checks", [], ["file3.py"]), + (os.path.join("root", "integration", "datadog_checks"), [], ["file1.py", "file2.py"]), + (os.path.join("root", "integration_b", "datadog_checks"), [], ["file3.py"]), ("root", [], ["ignored.py"]), ] @@ -26,7 +31,7 @@ def fake_compress(file_path): with ( patch("os.walk", return_value=mock_files), - patch("os.path.relpath", side_effect=lambda path, _: path.replace("root/", "")), + patch("os.path.relpath", side_effect=lambda path, _: os.path.relpath(path, "root")), patch("ddev.cli.size.status.get_gitignore_files", return_value=fake_gitignore), patch( "ddev.cli.size.status.is_valid_integration", @@ -39,19 +44,19 @@ def fake_compress(file_path): expected = [ { - "File Path": "integration/datadog_checks/file1.py", + "File Path": to_native_path("integration/datadog_checks/file1.py"), "Type": "Integration", "Name": "integration", "Size (Bytes)": 1000, }, { - "File Path": "integration/datadog_checks/file2.py", + "File Path": to_native_path("integration/datadog_checks/file2.py"), "Type": "Integration", "Name": "integration", "Size (Bytes)": 1000, }, { - "File Path": "integration_b/datadog_checks/file3.py", + "File Path": to_native_path("integration_b/datadog_checks/file3.py"), "Type": "Integration", "Name": "integration_b", "Size (Bytes)": 1000, @@ -126,16 +131,19 @@ def mock_size_status(): def test_status_no_args(ddev, mock_size_status): result = ddev('size', 'status', '--compressed') + print(result.output) assert result.exit_code == 0 def test_status(ddev, mock_size_status): result = ddev('size', 'status', '--platform', 'linux-aarch64', '--python', '3.12', '--compressed') + print(result.output) assert result.exit_code == 0 def test_status_csv(ddev, mock_size_status): result = ddev('size', 'status', '--platform', 'linux-aarch64', '--python', '3.12', '--compressed', '--csv') + print(result.output) assert result.exit_code == 0 From 06a2b5f0109a17ae79cf3d8d769bba20124c629f Mon Sep 17 00:00:00 2001 From: Lucia Sanchez Bella Date: Tue, 22 Apr 2025 10:03:22 +0200 Subject: [PATCH 26/70] Fix tests --- ddev/tests/cli/size/test_status.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/ddev/tests/cli/size/test_status.py b/ddev/tests/cli/size/test_status.py index ba7e943c59b82..2825afa5e7d43 100644 --- a/ddev/tests/cli/size/test_status.py +++ b/ddev/tests/cli/size/test_status.py @@ -31,7 +31,7 @@ def fake_compress(file_path): with ( patch("os.walk", return_value=mock_files), - patch("os.path.relpath", side_effect=lambda path, _: os.path.relpath(path, "root")), + patch("os.path.relpath", side_effect=lambda path, _: path.replace(f"root{os.sep}", "")), patch("ddev.cli.size.status.get_gitignore_files", return_value=fake_gitignore), patch( "ddev.cli.size.status.is_valid_integration", From 63137be3a5db019c15848ae9ceb0a325fdd9a2ca Mon Sep 17 00:00:00 2001 From: Lucia Sanchez Bella Date: Tue, 22 Apr 2025 12:05:24 +0200 Subject: [PATCH 27/70] fix windows tests --- ddev/pyproject.toml | 4 +- ddev/src/ddev/cli/size/common.py | 126 ++++++++++++++++++++++----- ddev/src/ddev/cli/size/diff.py | 4 +- ddev/src/ddev/cli/size/status.py | 30 ++++--- ddev/tests/cli/size/test_common.py | 27 +++--- ddev/tests/cli/size/test_diff.py | 57 ++++++------ ddev/tests/cli/size/test_status.py | 53 +++++------ ddev/tests/cli/size/test_timeline.py | 38 ++++---- 8 files changed, 219 insertions(+), 120 deletions(-) diff --git a/ddev/pyproject.toml b/ddev/pyproject.toml index 8a1ab15edbb2c..78378e34e80de 100644 --- a/ddev/pyproject.toml +++ b/ddev/pyproject.toml @@ -40,7 +40,9 @@ dependencies = [ "tomli-w", "tomlkit", "tqdm", - "requests" + "requests", + "matplotlib", + "squarify" ] dynamic = ["version"] diff --git a/ddev/src/ddev/cli/size/common.py b/ddev/src/ddev/cli/size/common.py index 1c619c7d4e990..25c0521e6ecc1 100644 --- a/ddev/src/ddev/cli/size/common.py +++ b/ddev/src/ddev/cli/size/common.py @@ -13,17 +13,21 @@ from types import TracebackType from typing import Dict, List, Optional, Set, Tuple, Type, Union, cast +import matplotlib.cm as cm +import matplotlib.pyplot as plt import requests +import squarify +from matplotlib.patches import Patch from ddev.cli.application import Application def valid_platforms_versions(repo_path: Union[Path, str]) -> Tuple[Set[str], Set[str]]: - resolved_path = os.path.join(repo_path, ".deps/resolved") + resolved_path = os.path.join(repo_path, os.path.join(repo_path, ".deps", "resolved")) platforms = [] versions = [] for file in os.listdir(resolved_path): - platforms.append("_".join(file.split('_')[:-1])) + platforms.append("_".join(file.split("_")[:-1])) match = re.search(r"\d+\.\d+", file) if match: versions.append(match.group()) @@ -31,7 +35,7 @@ def valid_platforms_versions(repo_path: Union[Path, str]) -> Tuple[Set[str], Set def convert_size(size_bytes: float) -> str: - for unit in [' B', ' KB', ' MB', ' GB']: + for unit in [" B", " KB", " MB", " GB"]: if abs(size_bytes) < 1024: return str(round(size_bytes, 2)) + unit size_bytes /= 1024 @@ -40,7 +44,7 @@ def convert_size(size_bytes: float) -> str: def is_valid_integration(path: str, included_folder: str, ignored_files: Set[str], git_ignore: List[str]) -> bool: # It is not an integration - if path.startswith('.'): + if path.startswith("."): return False # It is part of an integration and it is not in the datadog_checks folder elif included_folder not in path: @@ -60,7 +64,7 @@ def is_correct_dependency(platform: str, version: str, name: str) -> bool: def print_csv(app: Application, i: Optional[int], modules: List[Dict[str, Union[str, int, date]]]) -> None: - headers = [k for k in modules[0].keys() if k not in ['Size', 'Delta']] + headers = [k for k in modules[0].keys() if k not in ["Size", "Delta"]] if not i: app.display(",".join(headers)) @@ -74,7 +78,7 @@ def format(s: str) -> str: def print_table(app: Application, mode: str, modules: List[Dict[str, Union[str, int, date]]]) -> None: - modules_table: Dict[str, Dict[int, str]] = {col: {} for col in modules[0].keys() if '(Bytes)' not in col} + modules_table: Dict[str, Dict[int, str]] = {col: {} for col in modules[0].keys() if "(Bytes)" not in col} for i, row in enumerate(modules): for key, value in row.items(): if key in modules_table: @@ -82,6 +86,84 @@ def print_table(app: Application, mode: str, modules: List[Dict[str, Union[str, app.display_table(mode, modules_table) +def plot_treemap(modules): + sizes = [mod["Size (Bytes)"] for mod in modules] + + integrations = [mod for mod in modules if mod["Type"] == "Integration"] + dependencies = [mod for mod in modules if mod["Type"] == "Dependency"] + + def normalize(mods): + if not mods: + return [] + sizes = [mod["Size (Bytes)"] for mod in mods] + min_size = min(sizes) + max_size = max(sizes) + range_size = max_size - min_size or 1 + return [(s - min_size) / range_size for s in sizes] + + norm_int = normalize(integrations) + norm_dep = normalize(dependencies) + + # Use lighter color range: from 0.3 to 0.85 + def scale(val, vmin=0.3, vmax=0.85): + return vmin + val * (vmax - vmin) + + cmap_int = cm.get_cmap("Purples") + cmap_dep = cm.get_cmap("Reds") + + colors = [] + for mod in modules: + if mod["Type"] == "Integration": + idx = integrations.index(mod) + colors.append(cmap_int(scale(norm_int[idx], 0.6, 0.85))) # lighter start for integrations + elif mod["Type"] == "Dependency": + idx = dependencies.index(mod) + colors.append(cmap_dep(scale(norm_dep[idx], 0.3, 0.85))) + else: + colors.append("#999999") + + plt.figure(figsize=(12, 8)) + ax = plt.gca() + ax.set_axis_off() + + rects = squarify.normalize_sizes(sizes, 100, 100) + rects = squarify.squarify(rects, 0, 0, 100, 100) + + for rect, mod, color in zip(rects, modules, colors, strict=False): + x, y, dx, dy = rect["x"], rect["y"], rect["dx"], rect["dy"] + ax.add_patch(plt.Rectangle((x, y), dx, dy, color=color, ec="white")) + + area = dx * dy + font_size = max(6, min(18, area / 100)) + + if area > 400: + label = f"{mod['Name']}\n({mod['Size']})" + elif area > 40: + label = f"{mod['Name']}" + else: + label = None + + if label: + ax.text( + x + dx / 2, y + dy / 2, label, va="center", ha="center", fontsize=font_size, color="black", wrap=True + ) + + ax.set_xlim(0, 100) + ax.set_ylim(0, 100) + + plt.title("Modules by Disk Usage", fontsize=16) + + legend_handles = [ + Patch(color=cmap_int(0.6), label="Integration"), + Patch(color=cmap_dep(0.6), label="Dependency"), + ] + plt.legend(handles=legend_handles, title="Type", loc="center left", bbox_to_anchor=(1.0, 0.5)) + + plt.subplots_adjust(right=0.8) + plt.tight_layout() + plt.show() + + def get_dependencies_sizes( deps: List[str], download_urls: List[str], compressed: bool ) -> List[Dict[str, Union[str, int]]]: @@ -105,7 +187,7 @@ def get_dependencies_sizes( with open(wheel_path, "wb") as f: f.write(wheel_data) extract_path = Path(tmpdir) / "extracted" - with zipfile.ZipFile(wheel_path, 'r') as zip_ref: + with zipfile.ZipFile(wheel_path, "r") as zip_ref: zip_ref.extractall(extract_path) size = 0 @@ -139,32 +221,32 @@ def group_modules( if modules == []: return [ { - 'Name': '', - 'Type': '', - 'Size (Bytes)': 0, - 'Size': '', - 'Platform': '', - 'Version': '', + "Name": "", + "Type": "", + "Size (Bytes)": 0, + "Size": "", + "Platform": "", + "Version": "", } ] grouped_aux: Dict[tuple[str, str], int] = {} for file in modules: - key = (str(file['Name']), str(file['Type'])) + key = (str(file["Name"]), str(file["Type"])) grouped_aux[key] = grouped_aux.get(key, 0) + int(file["Size (Bytes)"]) if i is None: return [ - {'Name': name, 'Type': type, 'Size (Bytes)': size, 'Size': convert_size(size)} + {"Name": name, "Type": type, "Size (Bytes)": size, "Size": convert_size(size)} for (name, type), size in grouped_aux.items() ] else: return [ { - 'Name': name, - 'Type': type, - 'Size (Bytes)': size, - 'Size': convert_size(size), - 'Platform': platform, - 'Version': version, + "Name": name, + "Type": type, + "Size (Bytes)": size, + "Size": convert_size(size), + "Platform": platform, + "Version": version, } for (name, type), size in grouped_aux.items() ] @@ -213,7 +295,7 @@ def __enter__(self): def _run(self, command: str) -> List[str]: result = subprocess.run(command, shell=True, capture_output=True, text=True, check=True, cwd=self.repo_dir) - return result.stdout.strip().split('\n') + return result.stdout.strip().split("\n") def get_module_commits( self, module_path: str, initial: Optional[str], final: Optional[str], time: Optional[str] diff --git a/ddev/src/ddev/cli/size/diff.py b/ddev/src/ddev/cli/size/diff.py index 12e57c313aec4..091f5af8bfe73 100644 --- a/ddev/src/ddev/cli/size/diff.py +++ b/ddev/src/ddev/cli/size/diff.py @@ -188,7 +188,7 @@ def get_files(repo_path: str, compressed: bool) -> Dict[str, int]: ignored_files = {"datadog_checks_dev", "datadog_checks_tests_helper"} git_ignore = get_gitignore_files(repo_path) - included_folder = "datadog_checks/" + included_folder = "datadog_checks" + os.sep file_data = {} for root, _, files in os.walk(repo_path): @@ -207,7 +207,7 @@ def get_files(repo_path: str, compressed: bool) -> Dict[str, int]: def get_dependencies(repo_path: str, platform: str, version: str, compressed: bool) -> Dict[str, int]: - resolved_path = os.path.join(repo_path, ".deps/resolved") + resolved_path = os.path.join(repo_path, os.path.join(repo_path, ".deps", "resolved")) for filename in os.listdir(resolved_path): file_path = os.path.join(resolved_path, filename) diff --git a/ddev/src/ddev/cli/size/status.py b/ddev/src/ddev/cli/size/status.py index ac0b545c481c5..5ea54ac76adc6 100644 --- a/ddev/src/ddev/cli/size/status.py +++ b/ddev/src/ddev/cli/size/status.py @@ -19,12 +19,13 @@ group_modules, is_correct_dependency, is_valid_integration, + plot_treemap, print_csv, print_table, valid_platforms_versions, ) -REPO_PATH = Path(__file__).resolve().parents[5] +# REPO_PATH = Path(__file__).resolve().parents[5] console = Console() @@ -52,17 +53,19 @@ def status(app: Application, platform: Optional[str], version: Optional[str], co platforms = valid_platforms if platform is None else [platform] versions = valid_versions if version is None else [version] for i, (plat, ver) in enumerate([(p, v) for p in platforms for v in versions]): - status_mode(app, plat, ver, compressed, csv, i) + status_mode(app, repo_path, plat, ver, compressed, csv, i) else: - status_mode(app, platform, version, compressed, csv, None) + status_mode(app, repo_path, platform, version, compressed, csv, None) except Exception as e: app.abort(str(e)) -def status_mode(app: Application, platform: str, version: str, compressed: bool, csv: bool, i: Optional[int]) -> None: +def status_mode( + app: Application, repo_path: Path, platform: str, version: str, compressed: bool, csv: bool, i: Optional[int] +) -> None: with console.status("[cyan]Calculating sizes...", spinner="dots"): - modules = get_files(compressed) + get_dependencies(platform, version, compressed) + modules = get_files(compressed, repo_path) + get_dependencies(repo_path, platform, version, compressed) grouped_modules = group_modules(modules, platform, version, i) grouped_modules.sort(key=lambda x: x['Size (Bytes)'], reverse=True) @@ -70,21 +73,22 @@ def status_mode(app: Application, platform: str, version: str, compressed: bool, print_csv(app, i, grouped_modules) else: print_table(app, "Status", grouped_modules) + plot_treemap(grouped_modules) -def get_files(compressed: bool) -> List[Dict[str, Union[str, int]]]: +def get_files(compressed: bool, repo_path: Path) -> List[Dict[str, Union[str, int]]]: ignored_files = {"datadog_checks_dev", "datadog_checks_tests_helper"} - git_ignore = get_gitignore_files(REPO_PATH) - included_folder = "datadog_checks/" + git_ignore = get_gitignore_files(repo_path) + included_folder = "datadog_checks" + os.sep file_data = [] - for root, _, files in os.walk(REPO_PATH): + for root, _, files in os.walk(repo_path): for file in files: file_path = os.path.join(root, file) # Convert the path to a relative format within the repo - relative_path = os.path.relpath(file_path, REPO_PATH) + relative_path = os.path.relpath(file_path, repo_path) # Filter files if is_valid_integration(relative_path, included_folder, ignored_files, git_ignore): @@ -101,9 +105,11 @@ def get_files(compressed: bool) -> List[Dict[str, Union[str, int]]]: return cast(List[Dict[str, Union[str, int]]], file_data) -def get_dependencies(platform: str, version: str, compressed: bool) -> List[Dict[str, Union[str, int]]]: +def get_dependencies( + repo_path: Path, platform: str, version: str, compressed: bool +) -> List[Dict[str, Union[str, int]]]: - resolved_path = os.path.join(REPO_PATH, ".deps/resolved") + resolved_path = os.path.join(repo_path, os.path.join(repo_path, ".deps", "resolved")) for filename in os.listdir(resolved_path): file_path = os.path.join(resolved_path, filename) if os.path.isfile(file_path) and is_correct_dependency(platform, version, filename): diff --git a/ddev/tests/cli/size/test_common.py b/ddev/tests/cli/size/test_common.py index cd11c1ba8c41f..bed031f7e0260 100644 --- a/ddev/tests/cli/size/test_common.py +++ b/ddev/tests/cli/size/test_common.py @@ -1,3 +1,4 @@ +import os from unittest.mock import MagicMock, mock_open, patch from ddev.cli.size.common import ( @@ -14,6 +15,10 @@ ) +def to_native_path(path: str) -> str: + return path.replace("/", os.sep) + + def test_valid_platforms_versions(): filenames = [ "linux-aarch64_3.12.txt", @@ -33,7 +38,7 @@ def test_valid_platforms_versions(): expected_platforms = {"linux-aarch64", "linux-x86_64", "macos-x86_64", "windows-x86_64"} expected_versions = {"3.12"} with patch("os.listdir", return_value=filenames): - platforms, versions = valid_platforms_versions("/tmp/fake_repo") + platforms, versions = valid_platforms_versions("fake_repo") assert platforms == expected_platforms assert versions == expected_versions @@ -52,14 +57,16 @@ def test_convert_size(): def test_is_valid_integration(): - included_folder = "datadog_checks/" + included_folder = "datadog_checks" + os.sep ignored_files = {"datadog_checks_dev", "datadog_checks_tests_helper"} git_ignore = [".git", "__pycache__"] - assert is_valid_integration("datadog_checks/example.py", included_folder, ignored_files, git_ignore) - assert not is_valid_integration("__pycache__/file.py", included_folder, ignored_files, git_ignore) - assert not is_valid_integration("datadog_checks_dev/example.py", included_folder, ignored_files, git_ignore) - assert not is_valid_integration(".git/config", included_folder, ignored_files, git_ignore) + assert is_valid_integration(to_native_path("datadog_checks/example.py"), included_folder, ignored_files, git_ignore) + assert not is_valid_integration(to_native_path("__pycache__/file.py"), included_folder, ignored_files, git_ignore) + assert not is_valid_integration( + to_native_path("datadog_checks_dev/example.py"), included_folder, ignored_files, git_ignore + ) + assert not is_valid_integration(to_native_path(".git/config"), included_folder, ignored_files, git_ignore) def test_get_dependencies_list(): @@ -126,12 +133,12 @@ def test_group_modules(): def test_get_gitignore_files(): - mock_gitignore = "__pycache__/\n*.log\n" # Sample .gitignore file - repo_path = "/fake/repo" + mock_gitignore = f"__pycache__{os.sep}\n*.log\n" # Sample .gitignore file + repo_path = "fake_repo" with patch("builtins.open", mock_open(read_data=mock_gitignore)): with patch("os.path.exists", return_value=True): ignored_patterns = get_gitignore_files(repo_path) - assert ignored_patterns == ["__pycache__/", "*.log"] + assert ignored_patterns == ["__pycache__" + os.sep, "*.log"] def test_compress(): @@ -140,7 +147,7 @@ def test_compress(): m = mock_open(read_data=fake_content) with patch("builtins.open", m): - compressed_size = compress("fake/path/file.py") + compressed_size = compress(to_native_path("fake/path/file.py")) assert isinstance(compressed_size, int) assert compressed_size > 0 diff --git a/ddev/tests/cli/size/test_diff.py b/ddev/tests/cli/size/test_diff.py index ab2dad24a32de..04649f6a5e441 100644 --- a/ddev/tests/cli/size/test_diff.py +++ b/ddev/tests/cli/size/test_diff.py @@ -2,6 +2,8 @@ # All rights reserved # Licensed under a 3-clause BSD style license (see LICENSE) +import os +from pathlib import Path from unittest.mock import MagicMock, mock_open, patch import pytest @@ -9,12 +11,16 @@ from ddev.cli.size.diff import get_dependencies, get_diff, get_files +def to_native_path(path: str) -> str: + return path.replace("/", os.sep) + + def test_get_compressed_files(): mock_repo_path = "root" mock_files = [ - ("root/integration/datadog_checks", [], ["file1.py", "file2.py"]), - ("root/integration_b/datadog_checks", [], ["file3.py"]), + (os.path.join("root", "integration", "datadog_checks"), [], ["file1.py", "file2.py"]), + (os.path.join("root", "integration_b", "datadog_checks"), [], ["file3.py"]), ("root", [], ["ignored.py"]), ] @@ -25,7 +31,7 @@ def fake_compress(file_path): with ( patch("os.walk", return_value=mock_files), - patch("os.path.relpath", side_effect=lambda path, _: path.replace("root/", "")), + patch("os.path.relpath", side_effect=lambda path, _: path.replace(f"root{os.sep}", "")), patch("os.path.exists", return_value=True), patch("builtins.open", mock_open(read_data="__pycache__/\n*.log\n")), patch("ddev.cli.size.diff.get_gitignore_files", return_value=fake_gitignore), @@ -39,9 +45,9 @@ def fake_compress(file_path): result = get_files(mock_repo_path, True) expected = { - "integration/datadog_checks/file1.py": 1000, - "integration/datadog_checks/file2.py": 1000, - "integration_b/datadog_checks/file3.py": 1000, + to_native_path("integration/datadog_checks/file1.py"): 1000, + to_native_path("integration/datadog_checks/file2.py"): 1000, + to_native_path("integration_b/datadog_checks/file3.py"): 1000, } assert result == expected @@ -86,31 +92,31 @@ def test_get_compressed_dependencies(terminal): def test_get_diff(): size_before = { - "integration/foo.py": 1000, - "integration/bar.py": 2000, - "integration/deleted.py": 1500, + to_native_path("integration/foo.py"): 1000, + to_native_path("integration/bar.py"): 2000, + to_native_path("integration/deleted.py"): 1500, } size_after = { - "integration/foo.py": 1200, # modified - "integration/bar.py": 2000, # unchanged - "integration/new.py": 800, # new + to_native_path("integration/foo.py"): 1200, + to_native_path("integration/bar.py"): 2000, + to_native_path("integration/new.py"): 800, } expected = [ { - "File Path": "integration/foo.py", + "File Path": to_native_path("integration/foo.py"), "Type": "Integration", "Name": "integration", "Size (Bytes)": 200, }, { - "File Path": "integration/deleted.py", + "File Path": to_native_path("integration/deleted.py"), "Type": "Integration", "Name": "integration (DELETED)", "Size (Bytes)": -1500, }, { - "File Path": "integration/new.py", + "File Path": to_native_path("integration/new.py"), "Type": "Integration", "Name": "integration (NEW)", "Size (Bytes)": 800, @@ -124,7 +130,7 @@ def test_get_diff(): @pytest.fixture def mock_size_diff_dependencies(): mock_git_repo = MagicMock() - mock_git_repo.repo_dir = "/tmp/fake_repo" + mock_git_repo.repo_dir = "fake_repo" def get_compressed_files_side_effect(_, __): get_compressed_files_side_effect.counter += 1 @@ -152,7 +158,7 @@ def get_compressed_dependencies_side_effect(_, __, ___, ____): patch("ddev.cli.size.diff.GitRepo.__enter__", return_value=mock_git_repo), patch("ddev.cli.size.diff.GitRepo.__exit__", return_value=None), patch("ddev.cli.size.diff.GitRepo.checkout_commit"), - patch("tempfile.mkdtemp", return_value="/tmp/fake_repo"), + patch("tempfile.mkdtemp", return_value="fake_repo"), patch("ddev.cli.size.diff.get_files", side_effect=get_compressed_files_side_effect), patch("ddev.cli.size.diff.get_dependencies", side_effect=get_compressed_dependencies_side_effect), patch("ddev.cli.size.common.group_modules", side_effect=lambda m, *_: m), @@ -164,9 +170,6 @@ def get_compressed_dependencies_side_effect(_, __, ___, ____): def test_diff_no_args(ddev, mock_size_diff_dependencies): result = ddev('size', 'diff', 'commit1', 'commit2', '--compressed') - print("Exit code:", result.exit_code) - print("Output:\n", result.output) - print("Exception:", result.exception) assert result.exit_code == 0 @@ -186,7 +189,7 @@ def test_diff_csv(ddev, mock_size_diff_dependencies): def test_diff_no_differences(ddev): fake_repo = MagicMock() - fake_repo.repo_dir = "/tmp/fake_repo" + fake_repo.repo_dir = "fake_repo" with ( patch("ddev.cli.size.diff.GitRepo.__enter__", return_value=fake_repo), @@ -196,7 +199,7 @@ def test_diff_no_differences(ddev): ), patch("ddev.cli.size.diff.GitRepo.__exit__", return_value=None), patch.object(fake_repo, "checkout_commit"), - patch("tempfile.mkdtemp", return_value="/tmp/fake_repo"), + patch("tempfile.mkdtemp", return_value="fake_repo"), patch("os.path.exists", return_value=True), patch("os.path.isdir", return_value=True), patch("os.path.isfile", return_value=True), @@ -228,7 +231,7 @@ def test_diff_no_differences(ddev): def test_diff_invalid_platform(ddev): mock_git_repo = MagicMock() - mock_git_repo.repo_dir = "/tmp/fake_repo" + mock_git_repo.repo_dir = "fake_repo" mock_git_repo.get_module_commits.return_value = ["commit1", "commit2"] mock_git_repo.get_commit_metadata.side_effect = lambda c: ("Apr 4 2025", "Fix dep", c) mock_git_repo.__enter__.return_value = mock_git_repo @@ -239,15 +242,13 @@ def test_diff_invalid_platform(ddev): return_value=({'linux-x86_64', 'macos-x86_64', 'linux-aarch64', 'windows-x86_64'}, {'3.12'}), ), ): - result = ddev( - 'size', 'diff', 'commit1', 'commit2', '--platform', 'linux', '--python', '3.12', '--compressed' # inválido - ) + result = ddev('size', 'diff', 'commit1', 'commit2', '--platform', 'linux', '--python', '3.12', '--compressed') assert result.exit_code != 0 def test_diff_invalid_version(ddev): mock_git_repo = MagicMock() - mock_git_repo.repo_dir = "/tmp/fake_repo" + mock_git_repo.repo_dir = "fake_repo" mock_git_repo.get_module_commits.return_value = ["commit1", "commit2"] mock_git_repo.get_commit_metadata.side_effect = lambda c: ("Apr 4 2025", "Fix dep", c) mock_git_repo.__enter__.return_value = mock_git_repo @@ -275,7 +276,7 @@ def test_diff_invalid_version(ddev): def test_diff_invalid_platform_and_version(ddev): mock_git_repo = MagicMock() - mock_git_repo.repo_dir = "/tmp/fake_repo" + mock_git_repo.repo_dir = "fake_repo" mock_git_repo.get_module_commits.return_value = ["commit1", "commit2"] mock_git_repo.get_commit_metadata.side_effect = lambda c: ("Apr 4 2025", "Fix dep", c) mock_git_repo.__enter__.return_value = mock_git_repo diff --git a/ddev/tests/cli/size/test_status.py b/ddev/tests/cli/size/test_status.py index 2825afa5e7d43..28697345ab783 100644 --- a/ddev/tests/cli/size/test_status.py +++ b/ddev/tests/cli/size/test_status.py @@ -3,6 +3,7 @@ # Licensed under a 3-clause BSD style license (see LICENSE) import os +from pathlib import Path from unittest.mock import MagicMock, mock_open, patch import pytest @@ -23,6 +24,7 @@ def test_get_files_compressed(): (os.path.join("root", "integration_b", "datadog_checks"), [], ["file3.py"]), ("root", [], ["ignored.py"]), ] + mock_repo_path = "root" def fake_compress(file_path): return 1000 @@ -39,8 +41,7 @@ def fake_compress(file_path): ), patch("ddev.cli.size.status.compress", side_effect=fake_compress), ): - - result = get_files(True) + result = get_files(True, mock_repo_path) expected = [ { @@ -77,6 +78,7 @@ def test_get_compressed_dependencies(): mock_response = MagicMock() mock_response.status_code = 200 mock_response.headers = {"Content-Length": "12345"} + mock_repo_path = "root" with ( patch("os.path.exists", return_value=True), @@ -86,8 +88,7 @@ def test_get_compressed_dependencies(): patch("builtins.open", mock_open(read_data=fake_file_content)), patch("requests.head", return_value=mock_response), ): - - file_data = get_dependencies(platform, version, True) + file_data = get_dependencies(mock_repo_path, platform, version, True) assert file_data == [ {"File Path": "dependency1", "Type": "Dependency", "Name": "dependency1", "Size (Bytes)": 12345}, @@ -97,52 +98,52 @@ def test_get_compressed_dependencies(): @pytest.fixture() def mock_size_status(): + fake_repo_path = Path(os.path.join("fake_root")).resolve() + + mock_walk = [(os.path.join(str(fake_repo_path), "datadog_checks", "my_check"), [], ["__init__.py"])] + + mock_app = MagicMock() + mock_app.repo.path = fake_repo_path + with ( + patch("ddev.cli.size.status.get_gitignore_files", return_value=set()), patch( "ddev.cli.size.status.valid_platforms_versions", - return_value=({'linux-x86_64', 'macos-x86_64', 'linux-aarch64', 'windows-x86_64'}, {'3.12'}), + return_value=({"linux-x86_64", "macos-x86_64", "linux-aarch64", "windows-x86_64"}, {"3.12"}), ), - patch("ddev.cli.size.status.get_gitignore_files", return_value=set()), patch("ddev.cli.size.status.compress", return_value=1234), patch( "ddev.cli.size.status.get_dependencies_list", return_value=(["dep1"], {"dep1": "https://example.com/dep1"}) ), patch( "ddev.cli.size.status.get_dependencies_sizes", - return_value=[ - {"File Path": "dep1.whl", "Type": "Dependency", "Name": "dep1", "Size (Bytes)": 5678}, - ], + return_value=[{"File Path": "dep1.whl", "Type": "Dependency", "Name": "dep1", "Size (Bytes)": 5678}], ), patch("ddev.cli.size.status.is_valid_integration", return_value=True), patch("ddev.cli.size.status.is_correct_dependency", return_value=True), patch("ddev.cli.size.status.print_csv"), patch("ddev.cli.size.status.print_table"), - patch( - "os.walk", - return_value=[ - ("datadog_checks/my_check", [], ["__init__.py"]), - ], - ), + patch("ddev.cli.size.status.plot_treemap"), + patch("os.walk", return_value=mock_walk), patch("os.listdir", return_value=["fake_dep.whl"]), patch("os.path.isfile", return_value=True), ): - yield + yield mock_app def test_status_no_args(ddev, mock_size_status): - result = ddev('size', 'status', '--compressed') - print(result.output) + result = ddev("size", "status", "--compressed") assert result.exit_code == 0 def test_status(ddev, mock_size_status): - result = ddev('size', 'status', '--platform', 'linux-aarch64', '--python', '3.12', '--compressed') + result = ddev("size", "status", "--platform", "linux-aarch64", "--python", "3.12", "--compressed") print(result.output) assert result.exit_code == 0 def test_status_csv(ddev, mock_size_status): - result = ddev('size', 'status', '--platform', 'linux-aarch64', '--python', '3.12', '--compressed', '--csv') + result = ddev("size", "status", "--platform", "linux-aarch64", "--python", "3.12", "--compressed", "--csv") print(result.output) assert result.exit_code == 0 @@ -150,25 +151,25 @@ def test_status_csv(ddev, mock_size_status): def test_status_wrong_platform(ddev): with patch( "ddev.cli.size.timeline.valid_platforms_versions", - return_value=({'linux-x86_64', 'macos-x86_64', 'linux-aarch64', 'windows-x86_64'}, {'3.12'}), + return_value=({"linux-x86_64", "macos-x86_64", "linux-aarch64", "windows-x86_64"}, {"3.12"}), ): - result = ddev('size', 'status', '--platform', 'linux', '--python', '3.12', '--compressed') + result = ddev("size", "status", "--platform", "linux", "--python", "3.12", "--compressed") assert result.exit_code != 0 def test_status_wrong_version(ddev): with patch( "ddev.cli.size.timeline.valid_platforms_versions", - return_value=({'linux-x86_64', 'macos-x86_64', 'linux-aarch64', 'windows-x86_64'}, {'3.12'}), + return_value=({"linux-x86_64", "macos-x86_64", "linux-aarch64", "windows-x86_64"}, {"3.12"}), ): - result = ddev('size', 'status', '--platform', 'linux-aarch64', '--python', '2.10', '--compressed') + result = ddev("size", "status", "--platform", "linux-aarch64", "--python", "2.10", "--compressed") assert result.exit_code != 0 def test_status_wrong_plat_and_version(ddev): with patch( "ddev.cli.size.timeline.valid_platforms_versions", - return_value=({'linux-x86_64', 'macos-x86_64', 'linux-aarch64', 'windows-x86_64'}, {'3.12'}), + return_value=({"linux-x86_64", "macos-x86_64", "linux-aarch64", "windows-x86_64"}, {"3.12"}), ): - result = ddev('size', 'status', '--platform', 'linux', '--python', '2.10', '--compressed') + result = ddev("size", "status", "--platform", "linux", "--python", "2.10", "--compressed") assert result.exit_code != 0 diff --git a/ddev/tests/cli/size/test_timeline.py b/ddev/tests/cli/size/test_timeline.py index a55c1e8851333..0f3fb40d8d147 100644 --- a/ddev/tests/cli/size/test_timeline.py +++ b/ddev/tests/cli/size/test_timeline.py @@ -1,4 +1,6 @@ +import os from datetime import datetime +from pathlib import Path from unittest.mock import MagicMock, mock_open, patch import pytest @@ -17,16 +19,14 @@ def test_get_compressed_files(): with ( - patch("os.walk", return_value=[("/tmp/fake_repo/int1", [], ["int1.py"])]), - patch("os.path.relpath", return_value="int1/int1.py"), + patch("os.walk", return_value=[(os.path.join("fake_repo", "int1"), [], ["int1.py"])]), + patch("os.path.relpath", return_value=os.path.join("int1", "int1.py")), patch("os.path.exists", return_value=True), patch("ddev.cli.size.timeline.get_gitignore_files", return_value=set()), patch("ddev.cli.size.timeline.is_valid_integration", return_value=True), patch("ddev.cli.size.timeline.compress", return_value=1234), ): - result = get_files( - "/tmp/fake_repo", "int1", "abc1234", datetime(2025, 4, 4).date(), "auth", "Added int1", [], True - ) + result = get_files("fake_repo", "int1", "abc1234", datetime(2025, 4, 4).date(), "auth", "Added int1", [], True) assert result == [ { "Size (Bytes)": 1234, @@ -39,7 +39,7 @@ def test_get_compressed_files(): def test_get_compressed_files_deleted_only(): - repo_path = "/tmp/fake_repo" + repo_path = "fake_repo" module = "foo" commit = "abc1234" date = datetime.strptime("Apr 5 2025", "%b %d %Y").date() @@ -49,7 +49,7 @@ def test_get_compressed_files_deleted_only(): with ( patch("ddev.cli.size.timeline.get_gitignore_files", return_value=set()), patch("os.walk", return_value=[]), - patch("os.path.relpath", side_effect=lambda path, _: path.replace(f"{repo_path}/", "")), + patch("os.path.relpath", side_effect=lambda path, _: path.replace(f"{repo_path}{os.sep}", "")), patch("os.path.exists", return_value=False), ): file_data = get_files(repo_path, module, commit, date, author, message, [], True) @@ -153,7 +153,7 @@ def test_get_dependency(): content = """dep1 @ https://example.com/dep1.whl dep2 @ https://example.com/dep2.whl""" with patch("builtins.open", mock_open(read_data=content)): - url = get_dependency("some/path/file.txt", "dep2") + url = get_dependency(Path("some") / "path" / "file.txt", "dep2") assert url == "https://example.com/dep2.whl" @@ -190,7 +190,7 @@ def test_get_compressed_dependencies(): patch("ddev.cli.size.timeline.requests.head", return_value=make_mock_response("12345")), ): result = get_dependencies( - "/tmp/fake_repo", "dep1", "linux-x86_64", "abc1234", datetime(2025, 4, 4).date(), "auth", "Added dep1", True + "fake_repo", "dep1", "linux-x86_64", "abc1234", datetime(2025, 4, 4).date(), "auth", "Added dep1", True ) assert result == { "Size (Bytes)": 12345, @@ -204,7 +204,7 @@ def test_get_compressed_dependencies(): @pytest.fixture def mock_timeline_gitrepo(): mock_git_repo = MagicMock() - mock_git_repo.repo_dir = "/tmp/fake_repo" + mock_git_repo.repo_dir = "fake_repo" mock_git_repo.get_module_commits.return_value = ["commit1", "commit2"] mock_git_repo.get_creation_commit_module.return_value = "commit1" mock_git_repo.get_commit_metadata.side_effect = lambda c: ("Apr 4 2025", "Initial commit", c) @@ -215,7 +215,7 @@ def mock_timeline_gitrepo(): patch("ddev.cli.size.timeline.GitRepo.sparse_checkout_commit"), patch("ddev.cli.size.timeline.get_gitignore_files", return_value=set()), patch("ddev.cli.size.timeline.compress", return_value=1234), - patch("os.walk", return_value=[("/tmp/fake_repo/int", [], ["file1.py"])]), + patch("os.walk", return_value=[(Path("/tmp") / "fake_repo" / "int", [], ["file1.py"])]), patch("os.path.exists", return_value=True), patch("ddev.cli.size.timeline.group_modules", side_effect=lambda m, *_: m), patch("ddev.cli.size.timeline.trim_modules", side_effect=lambda m, *_: m), @@ -232,7 +232,7 @@ def mock_timeline_gitrepo(): @pytest.fixture def app(): mock_app = MagicMock() - mock_app.repo.path = "/tmp/fake_repo" + mock_app.repo.path = "fake_repo" return mock_app @@ -244,7 +244,7 @@ def test_timeline_integration_compressed(ddev, mock_timeline_gitrepo, app): @pytest.fixture def mock_timeline_dependencies(): mock_git_repo = MagicMock() - mock_git_repo.repo_dir = "/tmp/fake_repo" + mock_git_repo.repo_dir = "fake_repo" mock_git_repo.get_module_commits.return_value = ["commit1", "commit2"] mock_git_repo.get_commit_metadata.side_effect = lambda c: ("Apr 4 2025", "Fix dep", c) @@ -295,7 +295,7 @@ def test_timeline_dependency_compressed(ddev, mock_timeline_dependencies, app): def test_timeline_invalid_platform(ddev): mock_git_repo = MagicMock() - mock_git_repo.repo_dir = "/tmp/fake_repo" + mock_git_repo.repo_dir = "fake_repo" mock_git_repo.get_module_commits.return_value = ["commit1", "commit2"] mock_git_repo.get_commit_metadata.side_effect = lambda c: ("Apr 4 2025", "Fix dep", c) mock_git_repo.__enter__.return_value = mock_git_repo @@ -325,7 +325,7 @@ def test_timeline_invalid_platform(ddev): def test_timeline_no_changes_in_integration(ddev): mock_git_repo = MagicMock() - mock_git_repo.repo_dir = "/tmp/fake_repo" + mock_git_repo.repo_dir = "fake_repo" mock_git_repo.get_module_commits.return_value = [""] with ( @@ -342,7 +342,7 @@ def test_timeline_no_changes_in_integration(ddev): def test_timeline_integration_not_found(ddev): mock_repo = MagicMock() - mock_repo.repo_dir = "/fake" + mock_repo.repo_dir = "fake" mock_repo.get_module_commits.return_value = [""] mock_repo.get_creation_commit_module.return_value = "c1" mock_repo.checkout_commit.return_value = None @@ -363,7 +363,7 @@ def test_timeline_integration_not_found(ddev): def test_timeline_dependency_missing_no_platform(ddev): mock_repo = MagicMock() - mock_repo.repo_dir = "/fake" + mock_repo.repo_dir = "fake" mock_repo.get_module_commits.return_value = ["c1"] mock_repo.get_creation_commit_module.return_value = "c1" mock_repo.checkout_commit.return_value = None @@ -381,7 +381,7 @@ def test_timeline_dependency_missing_no_platform(ddev): def test_timeline_dependency_missing_for_platform(ddev, app): mock_repo = MagicMock() - mock_repo.repo_dir = "/fake" + mock_repo.repo_dir = "fake" mock_repo.get_module_commits.return_value = ["c1"] mock_repo.get_creation_commit_module.return_value = "c1" mock_repo.checkout_commit.return_value = None @@ -413,7 +413,7 @@ def test_timeline_dependency_missing_for_platform(ddev, app): def test_timeline_dependency_no_changes(ddev, app): mock_repo = MagicMock() - mock_repo.repo_dir = "/fake" + mock_repo.repo_dir = "fake" mock_repo.get_module_commits.return_value = [""] mock_repo.get_creation_commit_module.return_value = "c1" mock_repo.checkout_commit.return_value = None From 179172823d1edbb0ccce425f0c46ef52b2d7bd3d Mon Sep 17 00:00:00 2001 From: Lucia Sanchez Bella Date: Tue, 22 Apr 2025 12:22:54 +0200 Subject: [PATCH 28/70] Fix tests --- ddev/src/ddev/cli/size/common.py | 26 +++++++++++++++----------- ddev/tests/cli/size/test_diff.py | 1 - 2 files changed, 15 insertions(+), 12 deletions(-) diff --git a/ddev/src/ddev/cli/size/common.py b/ddev/src/ddev/cli/size/common.py index 25c0521e6ecc1..669180debaa59 100644 --- a/ddev/src/ddev/cli/size/common.py +++ b/ddev/src/ddev/cli/size/common.py @@ -86,7 +86,7 @@ def print_table(app: Application, mode: str, modules: List[Dict[str, Union[str, app.display_table(mode, modules_table) -def plot_treemap(modules): +def plot_treemap(modules: List[Dict[str, Union[str, int, date]]]) -> None: sizes = [mod["Size (Bytes)"] for mod in modules] integrations = [mod for mod in modules if mod["Type"] == "Integration"] @@ -104,7 +104,6 @@ def normalize(mods): norm_int = normalize(integrations) norm_dep = normalize(dependencies) - # Use lighter color range: from 0.3 to 0.85 def scale(val, vmin=0.3, vmax=0.85): return vmin + val * (vmax - vmin) @@ -115,7 +114,7 @@ def scale(val, vmin=0.3, vmax=0.85): for mod in modules: if mod["Type"] == "Integration": idx = integrations.index(mod) - colors.append(cmap_int(scale(norm_int[idx], 0.6, 0.85))) # lighter start for integrations + colors.append(cmap_int(scale(norm_int[idx], 0.6, 0.85))) elif mod["Type"] == "Dependency": idx = dependencies.index(mod) colors.append(cmap_dep(scale(norm_dep[idx], 0.3, 0.85))) @@ -135,18 +134,23 @@ def scale(val, vmin=0.3, vmax=0.85): area = dx * dy font_size = max(6, min(18, area / 100)) - - if area > 400: - label = f"{mod['Name']}\n({mod['Size']})" - elif area > 40: - label = f"{mod['Name']}" + name = mod["Name"] + size_str = f"({mod['Size']})" + + label = "" + name_fits = 0.5 * (len(name) + 2) < dx + size_fits = 0.5 * (len(size_str) + 2) + both_fit = 5 < dy + + if name_fits and size_fits and both_fit: + label = f"{name}\n{size_str}" + elif name_fits: + label = name else: label = None if label: - ax.text( - x + dx / 2, y + dy / 2, label, va="center", ha="center", fontsize=font_size, color="black", wrap=True - ) + ax.text(x + dx / 2, y + dy / 2, label, va="center", ha="center", fontsize=font_size, color="black") ax.set_xlim(0, 100) ax.set_ylim(0, 100) diff --git a/ddev/tests/cli/size/test_diff.py b/ddev/tests/cli/size/test_diff.py index 04649f6a5e441..06c2ba443989f 100644 --- a/ddev/tests/cli/size/test_diff.py +++ b/ddev/tests/cli/size/test_diff.py @@ -3,7 +3,6 @@ # Licensed under a 3-clause BSD style license (see LICENSE) import os -from pathlib import Path from unittest.mock import MagicMock, mock_open, patch import pytest From c2328680f392b7f3b8bede1943280544bf5268f1 Mon Sep 17 00:00:00 2001 From: Lucia Sanchez Bella Date: Tue, 22 Apr 2025 12:30:18 +0200 Subject: [PATCH 29/70] Fix tests --- ddev/src/ddev/cli/size/common.py | 10 ++++++---- 1 file changed, 6 insertions(+), 4 deletions(-) diff --git a/ddev/src/ddev/cli/size/common.py b/ddev/src/ddev/cli/size/common.py index 669180debaa59..86fb0f69ab9d6 100644 --- a/ddev/src/ddev/cli/size/common.py +++ b/ddev/src/ddev/cli/size/common.py @@ -13,12 +13,12 @@ from types import TracebackType from typing import Dict, List, Optional, Set, Tuple, Type, Union, cast -import matplotlib.cm as cm -import matplotlib.pyplot as plt +# import matplotlib.cm as cm +# import matplotlib.pyplot as plt import requests -import squarify -from matplotlib.patches import Patch +# import squarify +# from matplotlib.patches import Patch from ddev.cli.application import Application @@ -87,6 +87,7 @@ def print_table(app: Application, mode: str, modules: List[Dict[str, Union[str, def plot_treemap(modules: List[Dict[str, Union[str, int, date]]]) -> None: + ''' sizes = [mod["Size (Bytes)"] for mod in modules] integrations = [mod for mod in modules if mod["Type"] == "Integration"] @@ -166,6 +167,7 @@ def scale(val, vmin=0.3, vmax=0.85): plt.subplots_adjust(right=0.8) plt.tight_layout() plt.show() + ''' def get_dependencies_sizes( From b02658d42eb6d638e28df58de81c6c426da313e1 Mon Sep 17 00:00:00 2001 From: Lucia Sanchez Bella Date: Tue, 22 Apr 2025 14:31:42 +0200 Subject: [PATCH 30/70] fix windows tests --- ddev/tests/cli/size/test_status.py | 1 + 1 file changed, 1 insertion(+) diff --git a/ddev/tests/cli/size/test_status.py b/ddev/tests/cli/size/test_status.py index 28697345ab783..967018f8b7c81 100644 --- a/ddev/tests/cli/size/test_status.py +++ b/ddev/tests/cli/size/test_status.py @@ -119,6 +119,7 @@ def mock_size_status(): "ddev.cli.size.status.get_dependencies_sizes", return_value=[{"File Path": "dep1.whl", "Type": "Dependency", "Name": "dep1", "Size (Bytes)": 5678}], ), + patch("os.path.relpath", side_effect=lambda path, _: path.replace(f"fake_root{os.sep}", "")), patch("ddev.cli.size.status.is_valid_integration", return_value=True), patch("ddev.cli.size.status.is_correct_dependency", return_value=True), patch("ddev.cli.size.status.print_csv"), From 0fcf1628bfea44b459cdb901d095de706a8118ac Mon Sep 17 00:00:00 2001 From: Lucia Sanchez Bella Date: Wed, 23 Apr 2025 10:59:29 +0200 Subject: [PATCH 31/70] Final visualizations --- ddev/src/ddev/cli/size/common.py | 203 ++++++++++++++++++++------- ddev/src/ddev/cli/size/diff.py | 50 ++++++- ddev/src/ddev/cli/size/status.py | 40 +++++- ddev/src/ddev/cli/size/timeline.py | 52 ++++++- ddev/tests/cli/size/test_diff.py | 1 + ddev/tests/cli/size/test_timeline.py | 1 + 6 files changed, 287 insertions(+), 60 deletions(-) diff --git a/ddev/src/ddev/cli/size/common.py b/ddev/src/ddev/cli/size/common.py index 86fb0f69ab9d6..c6a01615c79bd 100644 --- a/ddev/src/ddev/cli/size/common.py +++ b/ddev/src/ddev/cli/size/common.py @@ -11,14 +11,16 @@ from datetime import date from pathlib import Path from types import TracebackType -from typing import Dict, List, Optional, Set, Tuple, Type, Union, cast +from typing import Dict, List, Literal, Optional, Set, Tuple, Type, Union, cast -# import matplotlib.cm as cm -# import matplotlib.pyplot as plt +import matplotlib.cm as cm + +# import matplotlib.patheffects as path_effects +import matplotlib.pyplot as plt import requests +import squarify +from matplotlib.patches import Patch -# import squarify -# from matplotlib.patches import Patch from ddev.cli.application import Application @@ -86,62 +88,151 @@ def print_table(app: Application, mode: str, modules: List[Dict[str, Union[str, app.display_table(mode, modules_table) -def plot_treemap(modules: List[Dict[str, Union[str, int, date]]]) -> None: - ''' - sizes = [mod["Size (Bytes)"] for mod in modules] +def plot_treemap( + modules: List[Dict[str, Union[str, int, date]]], + title: str, + show: bool, + mode: Literal["status", "diff"] = "status", + path: Optional[str] = None, +) -> None: + # Always use absolute value for sizing + sizes = [abs(mod["Size (Bytes)"]) for mod in modules] - integrations = [mod for mod in modules if mod["Type"] == "Integration"] - dependencies = [mod for mod in modules if mod["Type"] == "Dependency"] + # Setup figure + plt.figure(figsize=(12, 8)) + ax = plt.gca() + ax.set_axis_off() - def normalize(mods): - if not mods: - return [] - sizes = [mod["Size (Bytes)"] for mod in mods] - min_size = min(sizes) - max_size = max(sizes) - range_size = max_size - min_size or 1 - return [(s - min_size) / range_size for s in sizes] + # Compute layout + rects = squarify.normalize_sizes(sizes, 100, 100) + rects = squarify.squarify(rects, 0, 0, 100, 100) - norm_int = normalize(integrations) - norm_dep = normalize(dependencies) + colors = [] - def scale(val, vmin=0.3, vmax=0.85): - return vmin + val * (vmax - vmin) + if mode == "status": + # Normalization by type + integrations = [mod for mod in modules if mod["Type"] == "Integration"] + dependencies = [mod for mod in modules if mod["Type"] == "Dependency"] + + def normalize(mods): + if not mods: + return [] + sizes = [mod["Size (Bytes)"] for mod in mods] + min_size = min(sizes) + max_size = max(sizes) + range_size = max_size - min_size or 1 + return [(s - min_size) / range_size for s in sizes] + + norm_int = normalize(integrations) + norm_dep = normalize(dependencies) + + def scale(val, vmin=0.3, vmax=0.85): + return vmin + val * (vmax - vmin) + + cmap_int = cm.get_cmap("Purples") + cmap_dep = cm.get_cmap("Reds") + + for mod in modules: + if mod["Type"] == "Integration": + idx = integrations.index(mod) + colors.append(cmap_int(scale(norm_int[idx], 0.3, 0.6))) + elif mod["Type"] == "Dependency": + idx = dependencies.index(mod) + colors.append(cmap_dep(scale(norm_dep[idx], 0.3, 0.85))) + else: + colors.append("#999999") - cmap_int = cm.get_cmap("Purples") - cmap_dep = cm.get_cmap("Reds") + elif mode == "diff": + ''' + # ------- BOTH POSITIVE AND NEGATIVE IN THE SAME TREEMAP -------- + # Reds for positive, Greens for negative + cmap_pos = cm.get_cmap("Reds") + cmap_neg = cm.get_cmap("Greens") - colors = [] - for mod in modules: - if mod["Type"] == "Integration": - idx = integrations.index(mod) - colors.append(cmap_int(scale(norm_int[idx], 0.6, 0.85))) - elif mod["Type"] == "Dependency": - idx = dependencies.index(mod) - colors.append(cmap_dep(scale(norm_dep[idx], 0.3, 0.85))) + max_size = max(abs(mod["Size (Bytes)"]) for mod in modules) + + for mod in modules: + value = mod["Size (Bytes)"] + intensity = abs(value) / max_size + color = cmap_pos(intensity) if value > 0 else cmap_neg(intensity) + colors.append(color) + + ''' + cmap_pos = cm.get_cmap("Oranges") + cmap_neg = cm.get_cmap("Blues") + + positives = [mod for mod in modules if mod["Size (Bytes)"] > 0] + negatives = [mod for mod in modules if mod["Size (Bytes)"] < 0] + + sizes_pos = [mod["Size (Bytes)"] for mod in positives] + sizes_neg = [abs(mod["Size (Bytes)"]) for mod in negatives] + + sum_pos = sum(sizes_pos) + sum_neg = sum(sizes_neg) + + canvas_area = 50 * 100 # each half has same max area + + # Determine which side is dominant (fills fully) + if sum_pos >= sum_neg: + # Red fills right, green scales left + norm_sizes_pos = [s / sum_pos * canvas_area for s in sizes_pos] + norm_sizes_neg = [s / sum_pos * canvas_area for s in sizes_neg] + rects_pos = squarify.squarify(norm_sizes_pos, 50, 0, 50, 100) + rects_neg = squarify.squarify(norm_sizes_neg, 0, 0, 50, 100) else: - colors.append("#999999") + # Green fills left, red scales right + norm_sizes_neg = [s / sum_neg * canvas_area for s in sizes_neg] + norm_sizes_pos = [s / sum_neg * canvas_area for s in sizes_pos] + rects_neg = squarify.squarify(norm_sizes_neg, 0, 0, 50, 100) + rects_pos = squarify.squarify(norm_sizes_pos, 50, 0, 50, 100) - plt.figure(figsize=(12, 8)) - ax = plt.gca() - ax.set_axis_off() + rects = rects_neg + rects_pos + modules = negatives + positives - rects = squarify.normalize_sizes(sizes, 100, 100) - rects = squarify.squarify(rects, 0, 0, 100, 100) + # Draw colors + def rescale_intensity(val, min_val=0.3, max_val=0.8): + return min_val + (max_val - min_val) * val + + max_size = max(sizes_pos + sizes_neg) or 1 + colors = [] + + for mod in negatives: + raw = abs(mod["Size (Bytes)"]) / max_size + intensity = rescale_intensity(raw) + colors.append(cmap_neg(intensity)) + for mod in positives: + raw = mod["Size (Bytes)"] / max_size + intensity = rescale_intensity(raw) + colors.append(cmap_pos(intensity)) + + # Draw rectangles and labels for rect, mod, color in zip(rects, modules, colors, strict=False): x, y, dx, dy = rect["x"], rect["y"], rect["dx"], rect["dy"] ax.add_patch(plt.Rectangle((x, y), dx, dy, color=color, ec="white")) - area = dx * dy - font_size = max(6, min(18, area / 100)) + # Font size config + MIN_FONT_SIZE = 6 + MAX_FONT_SIZE = 12 + FONT_SIZE_SCALE = 0.4 + AVG_SIDE = (dx * dy) ** 0.5 + font_size = max(MIN_FONT_SIZE, min(MAX_FONT_SIZE, AVG_SIDE * FONT_SIZE_SCALE)) name = mod["Name"] size_str = f"({mod['Size']})" - label = "" - name_fits = 0.5 * (len(name) + 2) < dx - size_fits = 0.5 * (len(size_str) + 2) - both_fit = 5 < dy + CHAR_WIDTH_FACTOR = 0.1 + CHAR_HEIGHT_FACTOR = 0.5 + name_fits = (len(name) + 2) * font_size * CHAR_WIDTH_FACTOR < dx and dy > font_size * CHAR_HEIGHT_FACTOR + size_fits = (len(size_str) + 2) * font_size * CHAR_WIDTH_FACTOR < dx + both_fit = dy > font_size * CHAR_HEIGHT_FACTOR * 2 + + if dx < 5 or dy < 5: + label = None + elif not name_fits and dx > 5: + max_chars = int(dx / (font_size * CHAR_WIDTH_FACTOR)) - 2 + if 4 <= max_chars: + name = name[: max_chars - 3] + "..." + name_fits = True if name_fits and size_fits and both_fit: label = f"{name}\n{size_str}" @@ -156,18 +247,26 @@ def scale(val, vmin=0.3, vmax=0.85): ax.set_xlim(0, 100) ax.set_ylim(0, 100) - plt.title("Modules by Disk Usage", fontsize=16) + plt.title(title, fontsize=16) - legend_handles = [ - Patch(color=cmap_int(0.6), label="Integration"), - Patch(color=cmap_dep(0.6), label="Dependency"), - ] - plt.legend(handles=legend_handles, title="Type", loc="center left", bbox_to_anchor=(1.0, 0.5)) + if mode == "status": + legend_handles = [ + Patch(color=cm.get_cmap("Purples")(0.6), label="Integration"), + Patch(color=cm.get_cmap("Reds")(0.6), label="Dependency"), + ] + elif mode == "diff": + legend_handles = [ + Patch(color=cm.get_cmap("Oranges")(0.7), label="Increase"), + Patch(color=cm.get_cmap("Blues")(0.7), label="Decrease"), + ] + plt.legend(handles=legend_handles, title="Type", loc="center left", bbox_to_anchor=(1.0, 0.5)) plt.subplots_adjust(right=0.8) plt.tight_layout() - plt.show() - ''' + if show: + plt.show() + if path: + plt.savefig(path, bbox_inches='tight') def get_dependencies_sizes( diff --git a/ddev/src/ddev/cli/size/diff.py b/ddev/src/ddev/cli/size/diff.py index 091f5af8bfe73..dcce6a37f1521 100644 --- a/ddev/src/ddev/cli/size/diff.py +++ b/ddev/src/ddev/cli/size/diff.py @@ -23,6 +23,7 @@ group_modules, is_correct_dependency, is_valid_integration, + plot_treemap, print_csv, print_table, valid_platforms_versions, @@ -40,6 +41,12 @@ @click.option('--python', 'version', help="Python version (e.g 3.12). If not specified, all versions will be analyzed") @click.option('--compressed', is_flag=True, help="Measure compressed size") @click.option('--csv', is_flag=True, help="Output in CSV format") +@click.option('--save_to_png_path', help="Path to save the treemap as PNG") +@click.option( + '--show_gui', + is_flag=True, + help="Display a pop-up window with a treemap showing size differences between the two commits.", +) @click.pass_obj def diff( app: Application, @@ -49,6 +56,8 @@ def diff( version: Optional[str], compressed: bool, csv: bool, + save_to_png_path: str, + show_gui: bool, ) -> None: """ Compare the size of integrations and dependencies between two commits. @@ -75,10 +84,36 @@ def diff( progress.remove_task(task) for i, (plat, ver) in enumerate([(p, v) for p in platforms for v in versions]): - diff_mode(app, gitRepo, before, after, plat, ver, compressed, csv, i, progress) + diff_mode( + app, + gitRepo, + before, + after, + plat, + ver, + compressed, + csv, + i, + progress, + save_to_png_path, + show_gui, + ) else: progress.remove_task(task) - diff_mode(app, gitRepo, before, after, platform, version, compressed, csv, None, progress) + diff_mode( + app, + gitRepo, + before, + after, + platform, + version, + compressed, + csv, + None, + progress, + save_to_png_path, + show_gui, + ) except Exception as e: app.abort(str(e)) @@ -95,6 +130,8 @@ def diff_mode( csv: bool, i: Optional[int], progress: Progress, + save_to_png_path: str, + show_gui: bool, ) -> None: files_b, dependencies_b, files_a, dependencies_a = get_repo_info( gitRepo, platform, version, before, after, compressed, progress @@ -113,6 +150,15 @@ def diff_mode( else: if csv: print_csv(app, i, grouped_modules) + elif show_gui or save_to_png_path: + print_table(app, "Diff", grouped_modules) + plot_treemap( + grouped_modules, + f"Disk Usage Differences for {platform} and Python version {version}", + show_gui, + "diff", + save_to_png_path, + ) else: print_table(app, "Diff", grouped_modules) diff --git a/ddev/src/ddev/cli/size/status.py b/ddev/src/ddev/cli/size/status.py index 5ea54ac76adc6..35f122984f9e6 100644 --- a/ddev/src/ddev/cli/size/status.py +++ b/ddev/src/ddev/cli/size/status.py @@ -37,8 +37,22 @@ @click.option('--python', 'version', help="Python version (e.g 3.12). If not specified, all versions will be analyzed") @click.option('--compressed', is_flag=True, help="Measure compressed size") @click.option('--csv', is_flag=True, help="Output in CSV format") +@click.option('--save_to_png_path', help="Path to save the treemap as PNG") +@click.option( + '--show_gui', + is_flag=True, + help="Display a pop-up window with a treemap showing the current size distribution of modules.", +) @click.pass_obj -def status(app: Application, platform: Optional[str], version: Optional[str], compressed: bool, csv: bool) -> None: +def status( + app: Application, + platform: Optional[str], + version: Optional[str], + compressed: bool, + csv: bool, + save_to_png_path: str, + show_gui: bool, +) -> None: """ Show the current size of all integrations and dependencies. """ @@ -53,16 +67,24 @@ def status(app: Application, platform: Optional[str], version: Optional[str], co platforms = valid_platforms if platform is None else [platform] versions = valid_versions if version is None else [version] for i, (plat, ver) in enumerate([(p, v) for p in platforms for v in versions]): - status_mode(app, repo_path, plat, ver, compressed, csv, i) + status_mode(app, repo_path, plat, ver, compressed, csv, i, save_to_png_path, show_gui) else: - status_mode(app, repo_path, platform, version, compressed, csv, None) + status_mode(app, repo_path, platform, version, compressed, csv, None, save_to_png_path, show_gui) except Exception as e: app.abort(str(e)) def status_mode( - app: Application, repo_path: Path, platform: str, version: str, compressed: bool, csv: bool, i: Optional[int] + app: Application, + repo_path: Path, + platform: str, + version: str, + compressed: bool, + csv: bool, + i: Optional[int], + save_to_png_path: str, + show_gui: bool, ) -> None: with console.status("[cyan]Calculating sizes...", spinner="dots"): modules = get_files(compressed, repo_path) + get_dependencies(repo_path, platform, version, compressed) @@ -71,9 +93,17 @@ def status_mode( if csv: print_csv(app, i, grouped_modules) + elif show_gui or save_to_png_path: + print_table(app, "Status", grouped_modules) + plot_treemap( + grouped_modules, + f"Disk Usage Status for {platform} and Python version {version}", + show_gui, + "status", + save_to_png_path, + ) else: print_table(app, "Status", grouped_modules) - plot_treemap(grouped_modules) def get_files(compressed: bool, repo_path: Path) -> List[Dict[str, Union[str, int]]]: diff --git a/ddev/src/ddev/cli/size/timeline.py b/ddev/src/ddev/cli/size/timeline.py index d268d44c05109..08ec61be68fb3 100644 --- a/ddev/src/ddev/cli/size/timeline.py +++ b/ddev/src/ddev/cli/size/timeline.py @@ -7,6 +7,7 @@ from typing import Dict, List, Optional, Set, Tuple, Union, cast import click +import matplotlib.pyplot as plt import requests from rich.console import Console from rich.progress import BarColumn, Progress, SpinnerColumn, TextColumn, TimeElapsedColumn @@ -48,6 +49,12 @@ ) @click.option('--compressed', is_flag=True, help="Measure compressed size") @click.option('--csv', is_flag=True, help="Output results in CSV format") +@click.option('--save_to_png_path', help="Path to save the treemap as PNG") +@click.option( + '--show_gui', + is_flag=True, + help="Display a pop-up window with a line chart showing the size evolution of the selected module over time.", +) @click.pass_obj def timeline( app: Application, @@ -60,6 +67,8 @@ def timeline( platform: Optional[str], compressed: bool, csv: bool, + save_to_png_path: str, + show_gui: bool, ) -> None: """ Show the size evolution of a module (integration or dependency) over time. @@ -109,7 +118,20 @@ def timeline( progress.remove_task(task) for i, plat in enumerate(valid_platforms): timeline_mode( - app, gitRepo, type, module, commits, threshold, plat, compressed, csv, i, None, progress + app, + gitRepo, + type, + module, + commits, + threshold, + plat, + compressed, + csv, + i, + None, + progress, + save_to_png_path, + show_gui, ) else: progress.remove_task(task) @@ -127,6 +149,8 @@ def timeline( None, first_commit, progress, + save_to_png_path, + show_gui, ) except Exception as e: @@ -147,6 +171,8 @@ def timeline_mode( i: Optional[int], first_commit: Optional[str], progress: Progress, + save_to_png_path: str, + show_gui: bool, ) -> None: modules = get_repo_info(gitRepo, type, platform, module, commits, compressed, first_commit, progress) if modules != []: @@ -154,6 +180,9 @@ def timeline_mode( trimmed_modules = trim_modules(grouped_modules, threshold) if csv: print_csv(app, i, trimmed_modules) + elif show_gui or save_to_png_path: + print_table(app, "Timeline for " + module, trimmed_modules) + plot_linegraph(trimmed_modules, module, platform, show_gui, save_to_png_path) else: print_table(app, "Timeline for " + module, trimmed_modules) @@ -427,3 +456,24 @@ def get_dependency_list(path: str, platforms: Set[str]) -> Set[str]: matches = re.findall(r"([\w\-\d\.]+) @ https?://[^\s#]+", file.read()) dependencies.update(matches) return dependencies + + +def plot_linegraph(modules, module, platform, show, path): + dates = [entry["Date"] for entry in modules] + sizes = [entry["Size (Bytes)"] for entry in modules] + title = f"Disk Usage Evolution of {module} for {platform}" if platform else f"Disk Usage Evolution of {module}" + + plt.figure(figsize=(10, 6)) + plt.plot(dates, sizes, linestyle='-') + plt.title(title) + plt.xlabel("Date") + plt.ylabel("Size (Bytes)") + plt.grid(True) + plt.xticks(rotation=45) + plt.tight_layout() + + if path: + plt.savefig(path) + if show: + plt.show() + plt.close() diff --git a/ddev/tests/cli/size/test_diff.py b/ddev/tests/cli/size/test_diff.py index 06c2ba443989f..038c4b227d9d1 100644 --- a/ddev/tests/cli/size/test_diff.py +++ b/ddev/tests/cli/size/test_diff.py @@ -163,6 +163,7 @@ def get_compressed_dependencies_side_effect(_, __, ___, ____): patch("ddev.cli.size.common.group_modules", side_effect=lambda m, *_: m), patch("ddev.cli.size.common.print_csv"), patch("ddev.cli.size.common.print_table"), + patch("ddev.cli.size.common.plot_treemap"), ): yield diff --git a/ddev/tests/cli/size/test_timeline.py b/ddev/tests/cli/size/test_timeline.py index 0f3fb40d8d147..7e7f7abc163b8 100644 --- a/ddev/tests/cli/size/test_timeline.py +++ b/ddev/tests/cli/size/test_timeline.py @@ -267,6 +267,7 @@ def mock_timeline_dependencies(): patch("ddev.cli.size.timeline.group_modules", side_effect=lambda m, *_: m), patch("ddev.cli.size.timeline.trim_modules", side_effect=lambda m, *_: m), patch("ddev.cli.size.timeline.print_table"), + patch("ddev.cli.size.timeline.plot_linegraph"), ): mock_response = MagicMock() mock_response.headers = {"Content-Length": "1024"} From dc6a9db268277f7691b2cc79c967f6148c0815cd Mon Sep 17 00:00:00 2001 From: Lucia Sanchez Bella Date: Wed, 23 Apr 2025 11:13:49 +0200 Subject: [PATCH 32/70] Changelog --- ddev/changelog.d/20128.added | 4 ++++ 1 file changed, 4 insertions(+) create mode 100644 ddev/changelog.d/20128.added diff --git a/ddev/changelog.d/20128.added b/ddev/changelog.d/20128.added new file mode 100644 index 0000000000000..6b95fbdfccc6f --- /dev/null +++ b/ddev/changelog.d/20128.added @@ -0,0 +1,4 @@ +Added new commands to track and analyze size changes in integrations and dependencies: +- **`ddev size status`**: Shows current sizes of all modules. +- **`ddev size diff [COMMIT_BEFORE] [COMMIT_AFTER]`**: Compares size changes between two commits. +- **`ddev size timeline {integration | dependency} [INTEGRATION_NAME/DEPENDENCY_NAME]`**: Visualizes the size evolution of a module over time. From 90d10e77cc71db9d4275caaf4c738caa9950cba1 Mon Sep 17 00:00:00 2001 From: Lucia Sanchez Bella Date: Wed, 23 Apr 2025 12:32:26 +0200 Subject: [PATCH 33/70] CI images integration and fixed typing --- .github/workflows/measure-disk-usage.yml | 38 +++++++++++--- ddev/src/ddev/cli/size/common.py | 64 +++++++++++------------- ddev/src/ddev/cli/size/diff.py | 7 ++- ddev/src/ddev/cli/size/status.py | 6 ++- ddev/src/ddev/cli/size/timeline.py | 5 +- 5 files changed, 73 insertions(+), 47 deletions(-) diff --git a/.github/workflows/measure-disk-usage.yml b/.github/workflows/measure-disk-usage.yml index 181cdfabd5880..b7596e218972a 100644 --- a/.github/workflows/measure-disk-usage.yml +++ b/.github/workflows/measure-disk-usage.yml @@ -29,30 +29,35 @@ jobs: ddev config set repo core - name: Measure disk usage (uncompressed) run: | + mkdir -p status_visualizations ddev size status --csv > size-uncompressed.csv - ddev size status > size-uncompressed.txt + ddev size status --save_to_png_path status_visualizations/uncompressed.png > size-uncompressed.txt cat size-uncompressed.txt echo "# Size (uncompressed)" >> $GITHUB_STEP_SUMMARY echo '```' >> $GITHUB_STEP_SUMMARY cat size-uncompressed.txt >> $GITHUB_STEP_SUMMARY echo '```' >> $GITHUB_STEP_SUMMARY + - name: Measure disk usage (compressed) run: | + mkdir -p status_visualizations ddev size status --csv --compressed > size-compressed.csv - ddev size status --compressed > size-compressed.txt + ddev size status --compressed --save_to_png_path status_visualizations/compressed.png > size-compressed.txt cat size-compressed.txt echo "# Size (compressed)" >> $GITHUB_STEP_SUMMARY echo '```' >> $GITHUB_STEP_SUMMARY cat size-compressed.txt >> $GITHUB_STEP_SUMMARY echo '```' >> $GITHUB_STEP_SUMMARY + - name: Measure disk usage differences from last commit (uncompressed) if: false # Disabled: size difference is not accurate due to dependency sizes not updated run: | + mkdir -p diff_visualizations BEFORE=$(git rev-parse HEAD^) AFTER=$(git rev-parse HEAD) ddev size diff $BEFORE $AFTER --csv > diff-uncompressed.csv - ddev size diff $BEFORE $AFTER > diff-uncompressed.txt + ddev size diff $BEFORE $AFTER --save_to_png_path diff_visualizations/diff-uncompressed-linux.png > diff-uncompressed.txt cat diff-uncompressed.txt echo "# Size diff (uncompressed)" >> $GITHUB_STEP_SUMMARY echo '```' >> $GITHUB_STEP_SUMMARY @@ -62,38 +67,39 @@ jobs: - name: Measure disk usage differences from last commit (compressed) if: false # Disabled: size difference is not accurate due to dependency sizes not updated run: | + mkdir -p diff_visualizations BEFORE=$(git rev-parse HEAD^) AFTER=$(git rev-parse HEAD) ddev size diff $BEFORE $AFTER --compressed --csv > diff-compressed.csv - ddev size diff $BEFORE $AFTER --compressed > diff-compressed.txt + ddev size diff $BEFORE $AFTER --compressed --save_to_png_path diff_visualizations/diff-compressed-linux.png > diff-compressed.txt cat diff-compressed.txt echo "# Size diff (compressed)" >> $GITHUB_STEP_SUMMARY echo '```' >> $GITHUB_STEP_SUMMARY cat diff-compressed.txt >> $GITHUB_STEP_SUMMARY echo '```' >> $GITHUB_STEP_SUMMARY - - name: Upload file sizes (uncompressed) uses: actions/upload-artifact@v4 with: name: size-uncompressed.csv path: size-uncompressed.csv if-no-files-found: error - + - name: Upload file sizes (compressed) uses: actions/upload-artifact@v4 with: name: size-compressed.csv path: size-compressed.csv if-no-files-found: error - + - name: Upload file sizes diff (uncompressed) - if: false # Disabled: size difference is not accurate due to dependency sizes not updated + if: false # Disabled: size difference is not accurate due to dependency sizes not updated uses: actions/upload-artifact@v4 with: name: diff-uncompressed.csv path: diff-uncompressed.csv if-no-files-found: error + - name: Upload file sizes diff (compressed) if: false # Disabled: size difference is not accurate due to dependency sizes not updated uses: actions/upload-artifact@v4 @@ -101,3 +107,19 @@ jobs: name: diff-compressed.csv path: diff-compressed.csv if-no-files-found: error + + - name: Upload status PNGs + uses: actions/upload-artifact@v4 + with: + name: size-visuals + path: status_visualizations/ + if-no-files-found: error + + - name: Upload diff PNGs + if: false # Disabled: size difference is not accurate due to dependency sizes not updated + uses: actions/upload-artifact@v4 + with: + name: diff-visuals + path: diff_visualizations/ + if-no-files-found: error + diff --git a/ddev/src/ddev/cli/size/common.py b/ddev/src/ddev/cli/size/common.py index c6a01615c79bd..d159e6fcd7a5d 100644 --- a/ddev/src/ddev/cli/size/common.py +++ b/ddev/src/ddev/cli/size/common.py @@ -87,7 +87,6 @@ def print_table(app: Application, mode: str, modules: List[Dict[str, Union[str, modules_table[key][i] = str(value) app.display_table(mode, modules_table) - def plot_treemap( modules: List[Dict[str, Union[str, int, date]]], title: str, @@ -95,29 +94,30 @@ def plot_treemap( mode: Literal["status", "diff"] = "status", path: Optional[str] = None, ) -> None: - # Always use absolute value for sizing - sizes = [abs(mod["Size (Bytes)"]) for mod in modules] + # Convert sizes to absolute values for layout computation + sizes = [abs(cast(int, mod["Size (Bytes)"])) for mod in modules] - # Setup figure + # Initialize figure and axis plt.figure(figsize=(12, 8)) ax = plt.gca() ax.set_axis_off() - # Compute layout + # Compute layout rectangles based on size rects = squarify.normalize_sizes(sizes, 100, 100) rects = squarify.squarify(rects, 0, 0, 100, 100) colors = [] if mode == "status": - # Normalization by type + # Separate modules by type integrations = [mod for mod in modules if mod["Type"] == "Integration"] dependencies = [mod for mod in modules if mod["Type"] == "Dependency"] + # Normalize sizes within each group def normalize(mods): if not mods: return [] - sizes = [mod["Size (Bytes)"] for mod in mods] + sizes = [cast(int, mod["Size (Bytes)"]) for mod in mods] min_size = min(sizes) max_size = max(sizes) range_size = max_size - min_size or 1 @@ -126,12 +126,14 @@ def normalize(mods): norm_int = normalize(integrations) norm_dep = normalize(dependencies) + # Map normalized values to color intensity def scale(val, vmin=0.3, vmax=0.85): return vmin + val * (vmax - vmin) cmap_int = cm.get_cmap("Purples") cmap_dep = cm.get_cmap("Reds") + # Assign colors based on type and normalized size for mod in modules: if mod["Type"] == "Integration": idx = integrations.index(mod) @@ -143,53 +145,38 @@ def scale(val, vmin=0.3, vmax=0.85): colors.append("#999999") elif mode == "diff": - ''' - # ------- BOTH POSITIVE AND NEGATIVE IN THE SAME TREEMAP -------- - # Reds for positive, Greens for negative - cmap_pos = cm.get_cmap("Reds") - cmap_neg = cm.get_cmap("Greens") - - max_size = max(abs(mod["Size (Bytes)"]) for mod in modules) - - for mod in modules: - value = mod["Size (Bytes)"] - intensity = abs(value) / max_size - color = cmap_pos(intensity) if value > 0 else cmap_neg(intensity) - colors.append(color) - - ''' + # Separate modules by positive and negative size change cmap_pos = cm.get_cmap("Oranges") cmap_neg = cm.get_cmap("Blues") - positives = [mod for mod in modules if mod["Size (Bytes)"] > 0] - negatives = [mod for mod in modules if mod["Size (Bytes)"] < 0] + positives = [mod for mod in modules if cast(int, mod["Size (Bytes)"]) > 0] + negatives = [mod for mod in modules if cast(int, mod["Size (Bytes)"]) < 0] - sizes_pos = [mod["Size (Bytes)"] for mod in positives] - sizes_neg = [abs(mod["Size (Bytes)"]) for mod in negatives] + sizes_pos = [cast(int, mod["Size (Bytes)"]) for mod in positives] + sizes_neg = [abs(cast(int, mod["Size (Bytes)"])) for mod in negatives] sum_pos = sum(sizes_pos) sum_neg = sum(sizes_neg) - canvas_area = 50 * 100 # each half has same max area + canvas_area = 50 * 100 - # Determine which side is dominant (fills fully) + # Determine dominant side and scale layout accordingly if sum_pos >= sum_neg: - # Red fills right, green scales left norm_sizes_pos = [s / sum_pos * canvas_area for s in sizes_pos] norm_sizes_neg = [s / sum_pos * canvas_area for s in sizes_neg] rects_pos = squarify.squarify(norm_sizes_pos, 50, 0, 50, 100) rects_neg = squarify.squarify(norm_sizes_neg, 0, 0, 50, 100) else: - # Green fills left, red scales right norm_sizes_neg = [s / sum_neg * canvas_area for s in sizes_neg] norm_sizes_pos = [s / sum_neg * canvas_area for s in sizes_pos] rects_neg = squarify.squarify(norm_sizes_neg, 0, 0, 50, 100) rects_pos = squarify.squarify(norm_sizes_pos, 50, 0, 50, 100) + # Merge layout and module lists for unified drawing rects = rects_neg + rects_pos modules = negatives + positives - # Draw colors + # Compute color intensity for each module def rescale_intensity(val, min_val=0.3, max_val=0.8): return min_val + (max_val - min_val) * val @@ -197,21 +184,21 @@ def rescale_intensity(val, min_val=0.3, max_val=0.8): colors = [] for mod in negatives: - raw = abs(mod["Size (Bytes)"]) / max_size + raw = abs(cast(int, mod["Size (Bytes)"])) / max_size intensity = rescale_intensity(raw) colors.append(cmap_neg(intensity)) for mod in positives: - raw = mod["Size (Bytes)"] / max_size + raw = cast(int, mod["Size (Bytes)"]) / max_size intensity = rescale_intensity(raw) colors.append(cmap_pos(intensity)) - # Draw rectangles and labels + # Manual treemap layout and coloring to personalize labels for rect, mod, color in zip(rects, modules, colors, strict=False): x, y, dx, dy = rect["x"], rect["y"], rect["dx"], rect["dy"] ax.add_patch(plt.Rectangle((x, y), dx, dy, color=color, ec="white")) - # Font size config + # Determine font size based on rectangle area MIN_FONT_SIZE = 6 MAX_FONT_SIZE = 12 FONT_SIZE_SCALE = 0.4 @@ -220,12 +207,14 @@ def rescale_intensity(val, min_val=0.3, max_val=0.8): name = mod["Name"] size_str = f"({mod['Size']})" + # Check whether text fits inside the rectangle CHAR_WIDTH_FACTOR = 0.1 CHAR_HEIGHT_FACTOR = 0.5 name_fits = (len(name) + 2) * font_size * CHAR_WIDTH_FACTOR < dx and dy > font_size * CHAR_HEIGHT_FACTOR size_fits = (len(size_str) + 2) * font_size * CHAR_WIDTH_FACTOR < dx both_fit = dy > font_size * CHAR_HEIGHT_FACTOR * 2 + # Possibly truncate name if it doesn't fit if dx < 5 or dy < 5: label = None elif not name_fits and dx > 5: @@ -234,6 +223,7 @@ def rescale_intensity(val, min_val=0.3, max_val=0.8): name = name[: max_chars - 3] + "..." name_fits = True + # Construct label if there's space if name_fits and size_fits and both_fit: label = f"{name}\n{size_str}" elif name_fits: @@ -241,9 +231,11 @@ def rescale_intensity(val, min_val=0.3, max_val=0.8): else: label = None + # Draw label if label: ax.text(x + dx / 2, y + dy / 2, label, va="center", ha="center", fontsize=font_size, color="black") + # Finalize layout and show/save plot ax.set_xlim(0, 100) ax.set_ylim(0, 100) @@ -263,12 +255,12 @@ def rescale_intensity(val, min_val=0.3, max_val=0.8): plt.legend(handles=legend_handles, title="Type", loc="center left", bbox_to_anchor=(1.0, 0.5)) plt.subplots_adjust(right=0.8) plt.tight_layout() + if show: plt.show() if path: plt.savefig(path, bbox_inches='tight') - def get_dependencies_sizes( deps: List[str], download_urls: List[str], compressed: bool ) -> List[Dict[str, Union[str, int]]]: diff --git a/ddev/src/ddev/cli/size/diff.py b/ddev/src/ddev/cli/size/diff.py index dcce6a37f1521..392d3f6354d26 100644 --- a/ddev/src/ddev/cli/size/diff.py +++ b/ddev/src/ddev/cli/size/diff.py @@ -84,6 +84,11 @@ def diff( progress.remove_task(task) for i, (plat, ver) in enumerate([(p, v) for p in platforms for v in versions]): + path = None + if save_to_png_path: + base, ext = os.path.splitext(save_to_png_path) + path = f"{base}_{plat}_{ver}{ext}" + diff_mode( app, gitRepo, @@ -95,7 +100,7 @@ def diff( csv, i, progress, - save_to_png_path, + path, show_gui, ) else: diff --git a/ddev/src/ddev/cli/size/status.py b/ddev/src/ddev/cli/size/status.py index 35f122984f9e6..334ff7e264f14 100644 --- a/ddev/src/ddev/cli/size/status.py +++ b/ddev/src/ddev/cli/size/status.py @@ -67,7 +67,11 @@ def status( platforms = valid_platforms if platform is None else [platform] versions = valid_versions if version is None else [version] for i, (plat, ver) in enumerate([(p, v) for p in platforms for v in versions]): - status_mode(app, repo_path, plat, ver, compressed, csv, i, save_to_png_path, show_gui) + path = None + if save_to_png_path: + base, ext = os.path.splitext(save_to_png_path) + path = f"{base}_{plat}_{ver}{ext}" + status_mode(app, repo_path, plat, ver, compressed, csv, i, path, show_gui) else: status_mode(app, repo_path, platform, version, compressed, csv, None, save_to_png_path, show_gui) diff --git a/ddev/src/ddev/cli/size/timeline.py b/ddev/src/ddev/cli/size/timeline.py index 08ec61be68fb3..5f205eaddbd55 100644 --- a/ddev/src/ddev/cli/size/timeline.py +++ b/ddev/src/ddev/cli/size/timeline.py @@ -117,6 +117,9 @@ def timeline( if type == "dependency" and platform is None: progress.remove_task(task) for i, plat in enumerate(valid_platforms): + if save_to_png_path: + base, ext = os.path.splitext(save_to_png_path) + path = f"{base}_{plat}{ext}" timeline_mode( app, gitRepo, @@ -130,7 +133,7 @@ def timeline( i, None, progress, - save_to_png_path, + path, show_gui, ) else: From c97eaad8ff983da576707d02fc24d669d60d2d67 Mon Sep 17 00:00:00 2001 From: Lucia Sanchez Bella Date: Fri, 25 Apr 2025 16:10:47 +0200 Subject: [PATCH 34/70] user errors and dep and int versions --- ddev/src/ddev/cli/size/common.py | 267 +++++++++++++++++------ ddev/src/ddev/cli/size/diff.py | 221 ++++++++----------- ddev/src/ddev/cli/size/status.py | 62 +----- ddev/src/ddev/cli/size/timeline.py | 305 +++++++++++++++------------ ddev/tests/cli/size/test_common.py | 116 +++++++--- ddev/tests/cli/size/test_diff.py | 137 ++++++------ ddev/tests/cli/size/test_status.py | 120 +++-------- ddev/tests/cli/size/test_timeline.py | 139 +++++------- 8 files changed, 712 insertions(+), 655 deletions(-) diff --git a/ddev/src/ddev/cli/size/common.py b/ddev/src/ddev/cli/size/common.py index d159e6fcd7a5d..35413e37eac61 100644 --- a/ddev/src/ddev/cli/size/common.py +++ b/ddev/src/ddev/cli/size/common.py @@ -11,11 +11,9 @@ from datetime import date from pathlib import Path from types import TracebackType -from typing import Dict, List, Literal, Optional, Set, Tuple, Type, Union, cast +from typing import Dict, List, Literal, Optional, Set, Tuple, Type, TypedDict, Union, cast import matplotlib.cm as cm - -# import matplotlib.patheffects as path_effects import matplotlib.pyplot as plt import requests import squarify @@ -24,6 +22,37 @@ from ddev.cli.application import Application +class FileDataEntry(TypedDict): + Name: str + Version: str + Size_Bytes: int + Size: str + Type: str + + +class FileDataEntryPlatformVersion(FileDataEntry): + Platform: str + Python_Version: str + + +class CommitEntry(TypedDict): + Size_Bytes: int + Version: str + Date: date + Author: str + Commit_Message: str + Commit_SHA: str + + +class CommitEntryWithDelta(CommitEntry): + Delta_Bytes: int + Delta: str + + +class CommitEntryPlatformWithDelta(CommitEntryWithDelta): + Platform: str + + def valid_platforms_versions(repo_path: Union[Path, str]) -> Tuple[Set[str], Set[str]]: resolved_path = os.path.join(repo_path, os.path.join(repo_path, ".deps", "resolved")) platforms = [] @@ -65,37 +94,58 @@ def is_correct_dependency(platform: str, version: str, name: str) -> bool: return platform in name and version in name -def print_csv(app: Application, i: Optional[int], modules: List[Dict[str, Union[str, int, date]]]) -> None: +def print_csv( + app: Application, + i: Optional[int], + modules: ( + List[FileDataEntry] + | List[FileDataEntryPlatformVersion] + | List[CommitEntryWithDelta] + | List[CommitEntryPlatformWithDelta] + ), +) -> None: headers = [k for k in modules[0].keys() if k not in ["Size", "Delta"]] if not i: app.display(",".join(headers)) for row in modules: if any(str(value).strip() not in ("", "0") for value in row.values()): - app.display(",".join(format(str(row[h])) for h in headers)) + app.display(",".join(format(str(row.get(h, ""))) for h in headers)) def format(s: str) -> str: return f'"{s}"' if "," in s else s -def print_table(app: Application, mode: str, modules: List[Dict[str, Union[str, int, date]]]) -> None: - modules_table: Dict[str, Dict[int, str]] = {col: {} for col in modules[0].keys() if "(Bytes)" not in col} +def print_table( + app: Application, + mode: str, + modules: ( + List[FileDataEntry] + | List[FileDataEntryPlatformVersion] + | List[CommitEntryWithDelta] + | List[CommitEntryPlatformWithDelta] + ), +) -> None: + columns = [col for col in modules[0].keys() if "Bytes" not in col] + modules_table: Dict[str, Dict[int, str]] = {col: {} for col in columns} + for i, row in enumerate(modules): - for key, value in row.items(): - if key in modules_table: - modules_table[key][i] = str(value) + for key in columns: + modules_table[key][i] = str(row.get(key, "")) + app.display_table(mode, modules_table) + def plot_treemap( - modules: List[Dict[str, Union[str, int, date]]], + modules: List[FileDataEntry] | List[FileDataEntryPlatformVersion], title: str, show: bool, mode: Literal["status", "diff"] = "status", path: Optional[str] = None, ) -> None: # Convert sizes to absolute values for layout computation - sizes = [abs(cast(int, mod["Size (Bytes)"])) for mod in modules] + sizes = [abs(mod["Size_Bytes"]) for mod in modules] # Initialize figure and axis plt.figure(figsize=(12, 8)) @@ -117,7 +167,7 @@ def plot_treemap( def normalize(mods): if not mods: return [] - sizes = [cast(int, mod["Size (Bytes)"]) for mod in mods] + sizes = [mod["Size_Bytes"] for mod in mods] min_size = min(sizes) max_size = max(sizes) range_size = max_size - min_size or 1 @@ -149,11 +199,11 @@ def scale(val, vmin=0.3, vmax=0.85): cmap_pos = cm.get_cmap("Oranges") cmap_neg = cm.get_cmap("Blues") - positives = [mod for mod in modules if cast(int, mod["Size (Bytes)"]) > 0] - negatives = [mod for mod in modules if cast(int, mod["Size (Bytes)"]) < 0] + positives = [mod for mod in modules if cast(int, mod["Size_Bytes"]) > 0] + negatives = [mod for mod in modules if cast(int, mod["Size_Bytes"]) < 0] - sizes_pos = [cast(int, mod["Size (Bytes)"]) for mod in positives] - sizes_neg = [abs(cast(int, mod["Size (Bytes)"])) for mod in negatives] + sizes_pos = [mod["Size_Bytes"] for mod in positives] + sizes_neg = [abs(mod["Size_Bytes"]) for mod in negatives] sum_pos = sum(sizes_pos) sum_neg = sum(sizes_neg) @@ -184,12 +234,12 @@ def rescale_intensity(val, min_val=0.3, max_val=0.8): colors = [] for mod in negatives: - raw = abs(cast(int, mod["Size (Bytes)"])) / max_size + raw = abs(mod["Size_Bytes"]) / max_size intensity = rescale_intensity(raw) colors.append(cmap_neg(intensity)) for mod in positives: - raw = cast(int, mod["Size (Bytes)"]) / max_size + raw = mod["Size_Bytes"] / max_size intensity = rescale_intensity(raw) colors.append(cmap_pos(intensity)) @@ -259,13 +309,14 @@ def rescale_intensity(val, min_val=0.3, max_val=0.8): if show: plt.show() if path: - plt.savefig(path, bbox_inches='tight') + plt.savefig(path, bbox_inches="tight", format="png") + def get_dependencies_sizes( - deps: List[str], download_urls: List[str], compressed: bool -) -> List[Dict[str, Union[str, int]]]: - file_data = [] - for dep, url in zip(deps, download_urls, strict=False): + deps: List[str], download_urls: List[str], versions: List[str], compressed: bool +) -> List[FileDataEntry]: + file_data: List[FileDataEntry] = [] + for dep, url, version in zip(deps, download_urls, versions, strict=False): if compressed: response = requests.head(url) response.raise_for_status() @@ -292,64 +343,140 @@ def get_dependencies_sizes( for name in filenames: file_path = os.path.join(dirpath, name) size += os.path.getsize(file_path) - file_data.append({"File Path": str(dep), "Type": "Dependency", "Name": str(dep), "Size (Bytes)": int(size)}) - return cast(List[Dict[str, Union[str, int]]], file_data) + file_data.append( + { + "Name": str(dep), + "Version": version, + "Size_Bytes": int(size), + "Size": convert_size(size), + "Type": "Dependency", + } + ) + + return file_data + + +def get_files(repo_path: str | Path, compressed: bool) -> List[FileDataEntry]: + ignored_files = {"datadog_checks_dev", "datadog_checks_tests_helper"} + git_ignore = get_gitignore_files(repo_path) + included_folder = "datadog_checks/" + + integration_sizes: Dict[str, int] = {} + integration_versions: Dict[str, str] = {} + for root, _, files in os.walk(repo_path): + for file in files: + file_path = os.path.join(root, file) + relative_path = os.path.relpath(file_path, repo_path) -def get_dependencies_list(file_path: str) -> Tuple[List[str], List[str]]: + if not is_valid_integration(relative_path, included_folder, ignored_files, git_ignore): + continue + path = Path(relative_path) + parts = path.parts + + integration_name = parts[0] + + size = compress(file_path) if compressed else os.path.getsize(file_path) + integration_sizes[integration_name] = integration_sizes.get(integration_name, 0) + size + + if integration_name not in integration_versions: + about_path = os.path.join( + repo_path, integration_name, "datadog_checks", integration_name, "__about__.py" + ) + version = extract_version_from_about_py(about_path) + integration_versions[integration_name] = version + + return [ + { + "Name": name, + "Version": integration_versions.get(name, ""), + "Size_Bytes": size, + "Size": convert_size(size), + "Type": "Integration", + } + for name, size in integration_sizes.items() + ] + + +def get_dependencies_list(file_path: str) -> Tuple[List[str], List[str], List[str]]: download_urls = [] deps = [] + versions = [] with open(file_path, "r", encoding="utf-8") as file: file_content = file.read() for line in file_content.splitlines(): match = re.search(r"([\w\-\d\.]+) @ (https?://[^\s#]+)", line) - if match: - deps.append(match.group(1)) - download_urls.append(match.group(2)) - else: + if not match: raise WrongDependencyFormat("The dependency format 'name @ link' is no longer supported.") + name = match.group(1) + url = match.group(2) + + deps.append(name) + download_urls.append(url) + version_match = re.search(rf"{re.escape(name)}-([0-9]+(?:\.[0-9]+)*)-", url) + if version_match: + versions.append(version_match.group(1)) - return deps, download_urls + return deps, download_urls, versions def group_modules( - modules: List[Dict[str, Union[str, int]]], platform: str, version: str, i: Optional[int] -) -> List[Dict[str, Union[str, int, date]]]: - if modules == []: - return [ - { - "Name": "", - "Type": "", - "Size (Bytes)": 0, - "Size": "", - "Platform": "", - "Version": "", - } - ] - grouped_aux: Dict[tuple[str, str], int] = {} - for file in modules: - key = (str(file["Name"]), str(file["Type"])) - grouped_aux[key] = grouped_aux.get(key, 0) + int(file["Size (Bytes)"]) - if i is None: - return [ - {"Name": name, "Type": type, "Size (Bytes)": size, "Size": convert_size(size)} - for (name, type), size in grouped_aux.items() + modules: List[FileDataEntry], platform: str, version: str, i: Optional[int] +) -> List[FileDataEntryPlatformVersion] | List[FileDataEntry]: + if modules == [] and i is None: + empty_entry: FileDataEntry = { + "Name": "", + "Version": "", + "Size_Bytes": 0, + "Size": "", + "Type": "", + } + return [empty_entry] + elif modules == []: + empty_entry_with_platform: FileDataEntryPlatformVersion = { + "Name": "", + "Version": "", + "Size_Bytes": 0, + "Size": "", + "Type": "", + "Platform": "", + "Python_Version": "", + } + return [empty_entry_with_platform] + elif i is not None: + new_modules: List[FileDataEntryPlatformVersion] = [ + {**entry, "Platform": platform, "Python_Version": version} for entry in modules ] + return new_modules else: - return [ - { - "Name": name, - "Type": type, - "Size (Bytes)": size, - "Size": convert_size(size), - "Platform": platform, - "Version": version, - } - for (name, type), size in grouped_aux.items() - ] + return modules + + +def extract_version_from_about_py(path: str) -> str: + try: + with open(path, "r", encoding="utf-8") as f: + for line in f: + line = line.strip() + if line.startswith("__version__"): + return line.split("=")[1].strip().strip("'\"") + except Exception: + pass + return "" -def get_gitignore_files(repo_path: Union[str, Path]) -> List[str]: +def get_dependencies(repo_path: str | Path, platform: str, version: str, compressed: bool) -> List[FileDataEntry]: + resolved_path = os.path.join(repo_path, os.path.join(repo_path, ".deps", "resolved")) + + for filename in os.listdir(resolved_path): + file_path = os.path.join(resolved_path, filename) + + if os.path.isfile(file_path) and is_correct_dependency(platform, version, filename): + deps, download_urls, versions = get_dependencies_list(file_path) + return get_dependencies_sizes(deps, download_urls, versions, compressed) + return [] + + +def get_gitignore_files(repo_path: str | Path) -> List[str]: gitignore_path = os.path.join(repo_path, ".gitignore") with open(gitignore_path, "r", encoding="utf-8") as file: gitignore_content = file.read() @@ -413,7 +540,15 @@ def get_module_commits( return self._run(f"git log --reverse --pretty=format:%H {initial}..{final} -- {module_path}") def checkout_commit(self, commit: str) -> None: - self._run(f"git fetch --quiet --depth 1 origin {commit}") + try: + self._run(f"git fetch --quiet --depth 1 origin {commit}") + except subprocess.CalledProcessError as e: + if e.returncode == 128: + raise ValueError( + f"Failed to fetch commit '{commit}'.\n" + f"Make sure the commit hash is correct and that your local repository " + "is up to date with the remote.\n" + ) from e self._run(f"git checkout --quiet {commit}") def sparse_checkout_commit(self, commit_sha: str, module: str) -> None: diff --git a/ddev/src/ddev/cli/size/diff.py b/ddev/src/ddev/cli/size/diff.py index 392d3f6354d26..ab0899f870e0c 100644 --- a/ddev/src/ddev/cli/size/diff.py +++ b/ddev/src/ddev/cli/size/diff.py @@ -3,26 +3,21 @@ # Licensed under a 3-clause BSD style license (see LICENSE) import os -import tempfile -import zipfile -from pathlib import Path -from typing import Dict, List, Optional, Tuple, cast +from typing import List, Optional, Tuple, cast import click -import requests from rich.console import Console from rich.progress import BarColumn, Progress, SpinnerColumn, TextColumn, TimeElapsedColumn from ddev.cli.application import Application from .common import ( + FileDataEntry, GitRepo, - compress, - get_dependencies_list, - get_gitignore_files, + convert_size, + get_dependencies, + get_files, group_modules, - is_correct_dependency, - is_valid_integration, plot_treemap, print_csv, print_table, @@ -33,8 +28,8 @@ @click.command() -@click.argument("before") -@click.argument("after") +@click.argument("first_commit") +@click.argument("second_commit") @click.option( '--platform', help="Target platform (e.g. linux-aarch64). If not specified, all platforms will be analyzed" ) @@ -50,8 +45,8 @@ @click.pass_obj def diff( app: Application, - before: str, - after: str, + first_commit: str, + second_commit: str, platform: Optional[str], version: Optional[str], compressed: bool, @@ -70,6 +65,17 @@ def diff( transient=True, ) as progress: task = progress.add_task("[cyan]Calculating differences...", total=None) + if len(first_commit) < 7 and len(second_commit) < 7: + raise click.BadParameter("Commit hashes must be at least 7 characters long") + elif len(first_commit) < 7: + raise click.BadParameter("First commit hash must be at least 7 characters long.", param_hint="first_commit") + elif len(second_commit) < 7: + raise click.BadParameter( + "Second commit hash must be at least 7 characters long.", param_hint="second_commit" + ) + # if first_commit == second_commit: + # raise click.BadParameter("Commit hashes must be different") + repo_url = app.repo.path with GitRepo(repo_url) as gitRepo: try: @@ -92,8 +98,8 @@ def diff( diff_mode( app, gitRepo, - before, - after, + first_commit, + second_commit, plat, ver, compressed, @@ -108,8 +114,8 @@ def diff( diff_mode( app, gitRepo, - before, - after, + first_commit, + second_commit, platform, version, compressed, @@ -121,42 +127,47 @@ def diff( ) except Exception as e: + if progress and progress.tasks: + progress.remove_task(task) + progress.stop() + app.abort(str(e)) def diff_mode( app: Application, gitRepo: GitRepo, - before: str, - after: str, + first_commit: str, + second_commit: str, platform: str, version: str, compressed: bool, csv: bool, i: Optional[int], progress: Progress, - save_to_png_path: str, + save_to_png_path: Optional[str], show_gui: bool, ) -> None: files_b, dependencies_b, files_a, dependencies_a = get_repo_info( - gitRepo, platform, version, before, after, compressed, progress + gitRepo, platform, version, first_commit, second_commit, compressed, progress ) integrations = get_diff(files_b, files_a, 'Integration') dependencies = get_diff(dependencies_b, dependencies_a, 'Dependency') if integrations + dependencies == [] and not csv: app.display(f"No size differences were detected between the selected commits for {platform}.") - - grouped_modules = group_modules(integrations + dependencies, platform, version, i) - grouped_modules.sort(key=lambda x: abs(cast(int, x['Size (Bytes)'])), reverse=True) - for module in grouped_modules: - if cast(int, module['Size (Bytes)']) > 0: - module['Size'] = f"+{module['Size']}" else: + grouped_modules = group_modules(integrations + dependencies, platform, version, i) + grouped_modules.sort(key=lambda x: abs(cast(int, x['Size_Bytes'])), reverse=True) + for module in grouped_modules: + if cast(int, module['Size_Bytes']) > 0: + module['Size'] = f"+{module['Size']}" if csv: print_csv(app, i, grouped_modules) - elif show_gui or save_to_png_path: + else: print_table(app, "Diff", grouped_modules) + + if show_gui or save_to_png_path: plot_treemap( grouped_modules, f"Disk Usage Differences for {platform} and Python version {version}", @@ -164,29 +175,27 @@ def diff_mode( "diff", save_to_png_path, ) - else: - print_table(app, "Diff", grouped_modules) def get_repo_info( gitRepo: GitRepo, platform: str, version: str, - before: str, - after: str, + first_commit: str, + second_commit: str, compressed: bool, progress: Progress, -) -> Tuple[Dict[str, int], Dict[str, int], Dict[str, int], Dict[str, int]]: +) -> Tuple[List[FileDataEntry], List[FileDataEntry], List[FileDataEntry], List[FileDataEntry]]: with progress: repo = gitRepo.repo_dir task = progress.add_task("[cyan]Calculating sizes for the first commit...", total=None) - gitRepo.checkout_commit(before) + gitRepo.checkout_commit(first_commit) files_b = get_files(repo, compressed) dependencies_b = get_dependencies(repo, platform, version, compressed) progress.remove_task(task) task = progress.add_task("[cyan]Calculating sizes for the second commit...", total=None) - gitRepo.checkout_commit(after) + gitRepo.checkout_commit(second_commit) files_a = get_files(repo, compressed) dependencies_a = get_dependencies(repo, platform, version, compressed) progress.remove_task(task) @@ -194,108 +203,54 @@ def get_repo_info( return files_b, dependencies_b, files_a, dependencies_a -def get_diff(size_before: Dict[str, int], size_after: Dict[str, int], type: str) -> List[Dict[str, str | int]]: - all_paths = set(size_before.keys()) | set(size_after.keys()) - diff_files = [] - - for path in all_paths: - size_b = size_before.get(path, 0) - size_a = size_after.get(path, 0) - size_delta = size_a - size_b - module = Path(path).parts[0] - if size_delta != 0: - if size_b == 0: - diff_files.append( - { - 'File Path': path, - 'Type': type, - 'Name': module + " (NEW)", - 'Size (Bytes)': size_delta, - } - ) - elif size_a == 0: - diff_files.append( - { - 'File Path': path, - 'Type': type, - 'Name': module + " (DELETED)", - 'Size (Bytes)': size_delta, - } - ) - else: - diff_files.append( - { - 'File Path': path, - 'Type': type, - 'Name': module, - 'Size (Bytes)': size_delta, - } - ) +def get_diff( + size_first_commit: List[FileDataEntry], size_second_commit: List[FileDataEntry], type: str +) -> List[FileDataEntry]: - return cast(List[Dict[str, str | int]], diff_files) + first_commit = {entry["Name"]: entry for entry in size_first_commit} + second_commit = {entry["Name"]: entry for entry in size_second_commit} + all_names = set(first_commit) | set(second_commit) + diffs: List[FileDataEntry] = [] -def get_files(repo_path: str, compressed: bool) -> Dict[str, int]: + for name in all_names: + b = first_commit.get(name) + a = second_commit.get(name) - ignored_files = {"datadog_checks_dev", "datadog_checks_tests_helper"} - git_ignore = get_gitignore_files(repo_path) - included_folder = "datadog_checks" + os.sep + size_b = b["Size_Bytes"] if b else 0 + size_a = a["Size_Bytes"] if a else 0 + delta = size_a - size_b - file_data = {} - for root, _, files in os.walk(repo_path): - for file in files: - file_path = os.path.join(root, file) + if delta == 0: + continue - # Convert the path to a relative format within the repo - relative_path = os.path.relpath(file_path, repo_path) + ver_b = b["Version"] if b else "" + ver_a = a["Version"] if a else "" - # Filter files - if is_valid_integration(relative_path, included_folder, ignored_files, git_ignore): - size = compress(file_path) if compressed else os.path.getsize(file_path) - file_data[relative_path] = size - return file_data - - -def get_dependencies(repo_path: str, platform: str, version: str, compressed: bool) -> Dict[str, int]: - - resolved_path = os.path.join(repo_path, os.path.join(repo_path, ".deps", "resolved")) - - for filename in os.listdir(resolved_path): - file_path = os.path.join(resolved_path, filename) - - if os.path.isfile(file_path) and is_correct_dependency(platform, version, filename): - deps, download_urls = get_dependencies_list(file_path) - return get_dependencies_sizes(deps, download_urls, compressed) - return {} - - -def get_dependencies_sizes(deps: List[str], download_urls: List[str], compressed: bool) -> Dict[str, int]: - file_data = {} - for dep, url in zip(deps, download_urls, strict=False): - if compressed: - response = requests.head(url) - response.raise_for_status() - size_str = response.headers.get("Content-Length") - if size_str is None: - raise ValueError(f"Missing size for {dep}") - size = int(size_str) + if size_b == 0: + name_str = f"{name} (NEW)" + version_str = ver_a + elif size_a == 0: + name_str = f"{name} (DELETED)" + version_str = ver_b else: - with requests.get(url, stream=True) as response: - response.raise_for_status() - wheel_data = response.content - - with tempfile.TemporaryDirectory() as tmpdir: - wheel_path = Path(tmpdir) / "package.whl" - with open(wheel_path, "wb") as f: - f.write(wheel_data) - extract_path = Path(tmpdir) / "extracted" - with zipfile.ZipFile(wheel_path, 'r') as zip_ref: - zip_ref.extractall(extract_path) - - size = 0 - for dirpath, _, filenames in os.walk(extract_path): - for name in filenames: - file_path = os.path.join(dirpath, name) - size += os.path.getsize(file_path) - file_data[dep] = size - return file_data + name_str = name + version_str = f"{ver_b} -> {ver_a}" if ver_a != ver_b else ver_a + if a: + type = a["Type"] + elif b: + type = b["Type"] + else: + type = "" + + diffs.append( + { + "Name": name_str, + "Version": version_str, + "Type": type, + "Size_Bytes": delta, + "Size": convert_size(delta), + } + ) + + return diffs diff --git a/ddev/src/ddev/cli/size/status.py b/ddev/src/ddev/cli/size/status.py index 334ff7e264f14..cef211e254884 100644 --- a/ddev/src/ddev/cli/size/status.py +++ b/ddev/src/ddev/cli/size/status.py @@ -4,7 +4,7 @@ import os from pathlib import Path -from typing import Dict, List, Optional, Union, cast +from typing import Optional import click from rich.console import Console @@ -12,21 +12,15 @@ from ddev.cli.application import Application from .common import ( - compress, - get_dependencies_list, - get_dependencies_sizes, - get_gitignore_files, + get_dependencies, + get_files, group_modules, - is_correct_dependency, - is_valid_integration, plot_treemap, print_csv, print_table, valid_platforms_versions, ) -# REPO_PATH = Path(__file__).resolve().parents[5] - console = Console() @@ -50,7 +44,7 @@ def status( version: Optional[str], compressed: bool, csv: bool, - save_to_png_path: str, + save_to_png_path: Optional[str], show_gui: bool, ) -> None: """ @@ -87,13 +81,13 @@ def status_mode( compressed: bool, csv: bool, i: Optional[int], - save_to_png_path: str, + save_to_png_path: Optional[str], show_gui: bool, ) -> None: with console.status("[cyan]Calculating sizes...", spinner="dots"): - modules = get_files(compressed, repo_path) + get_dependencies(repo_path, platform, version, compressed) + modules = get_files(repo_path, compressed) + get_dependencies(repo_path, platform, version, compressed) grouped_modules = group_modules(modules, platform, version, i) - grouped_modules.sort(key=lambda x: x['Size (Bytes)'], reverse=True) + grouped_modules.sort(key=lambda x: x['Size_Bytes'], reverse=True) if csv: print_csv(app, i, grouped_modules) @@ -108,45 +102,3 @@ def status_mode( ) else: print_table(app, "Status", grouped_modules) - - -def get_files(compressed: bool, repo_path: Path) -> List[Dict[str, Union[str, int]]]: - - ignored_files = {"datadog_checks_dev", "datadog_checks_tests_helper"} - git_ignore = get_gitignore_files(repo_path) - included_folder = "datadog_checks" + os.sep - - file_data = [] - for root, _, files in os.walk(repo_path): - for file in files: - file_path = os.path.join(root, file) - - # Convert the path to a relative format within the repo - relative_path = os.path.relpath(file_path, repo_path) - - # Filter files - if is_valid_integration(relative_path, included_folder, ignored_files, git_ignore): - size = compress(file_path) if compressed else os.path.getsize(file_path) - integration = relative_path.split(os.sep)[0] - file_data.append( - { - "File Path": relative_path, - "Type": "Integration", - "Name": integration, - "Size (Bytes)": int(size), - } - ) - return cast(List[Dict[str, Union[str, int]]], file_data) - - -def get_dependencies( - repo_path: Path, platform: str, version: str, compressed: bool -) -> List[Dict[str, Union[str, int]]]: - - resolved_path = os.path.join(repo_path, os.path.join(repo_path, ".deps", "resolved")) - for filename in os.listdir(resolved_path): - file_path = os.path.join(resolved_path, filename) - if os.path.isfile(file_path) and is_correct_dependency(platform, version, filename): - deps, download_urls = get_dependencies_list(file_path) - return get_dependencies_sizes(deps, download_urls, compressed) - return [] diff --git a/ddev/src/ddev/cli/size/timeline.py b/ddev/src/ddev/cli/size/timeline.py index 5f205eaddbd55..e0c3fc962d2e2 100644 --- a/ddev/src/ddev/cli/size/timeline.py +++ b/ddev/src/ddev/cli/size/timeline.py @@ -4,7 +4,7 @@ import zipfile from datetime import date, datetime from pathlib import Path -from typing import Dict, List, Optional, Set, Tuple, Union, cast +from typing import List, Optional, Set, Tuple import click import matplotlib.pyplot as plt @@ -15,10 +15,14 @@ from ddev.cli.application import Application from .common import ( + CommitEntry, + CommitEntryPlatformWithDelta, + CommitEntryWithDelta, GitRepo, WrongDependencyFormat, compress, convert_size, + extract_version_from_about_py, get_gitignore_files, is_correct_dependency, is_valid_integration, @@ -28,30 +32,35 @@ ) DEPENDENCY_FILE_CHANGE = datetime.strptime("Sep 17 2024", "%b %d %Y").date() -MINIMUM_DATE = datetime.strptime("Apr 3 2024", "%b %d %Y").date() +MINIMUM_DATE_DEPENDENCIES = datetime.strptime("Apr 3 2024", "%b %d %Y").date() +MINIMUM_DATE_INTEGRATIONS = datetime.strptime("Feb 1 2024", "%b %d %Y").date() console = Console() @click.command() -@click.argument('type', type=click.Choice(['integration', 'dependency'])) -@click.argument('name') -@click.argument('initial', required=False) -@click.argument('final', required=False) +@click.argument("type", type=click.Choice(["integration", "dependency"])) +@click.argument("name") +@click.argument("initial_commit", required=False) +@click.argument("final_commit", required=False) @click.option( - '--time', + "--time", help="Filter commits starting from a specific date. Accepts both absolute and relative formats, " "such as '2025-03-01', '2 weeks ago', or 'yesterday'", ) -@click.option('--threshold', help="Only show modules with size differences greater than a threshold in bytes") @click.option( - '--platform', + "--threshold", + type=click.IntRange(min=0), + help="Only show modules with size differences greater than a threshold in bytes", +) +@click.option( + "--platform", help="Target platform to analyze. Only required for dependencies. If not specified, all platforms will be analyzed", ) -@click.option('--compressed', is_flag=True, help="Measure compressed size") -@click.option('--csv', is_flag=True, help="Output results in CSV format") -@click.option('--save_to_png_path', help="Path to save the treemap as PNG") +@click.option("--compressed", is_flag=True, help="Measure compressed size") +@click.option("--csv", is_flag=True, help="Output results in CSV format") +@click.option("--save_to_png_path", help="Path to save the treemap as PNG") @click.option( - '--show_gui', + "--show_gui", is_flag=True, help="Display a pop-up window with a line chart showing the size evolution of the selected module over time.", ) @@ -60,10 +69,10 @@ def timeline( app: Application, type: str, name: str, - initial: Optional[str], - final: Optional[str], + initial_commit: Optional[str], + final_commit: Optional[str], time: Optional[str], - threshold: Optional[str], + threshold: Optional[int], platform: Optional[str], compressed: bool, csv: bool, @@ -81,24 +90,41 @@ def timeline( transient=True, ) as progress: module = name # module is the name of the integration or the dependency + if initial_commit and final_commit and len(initial_commit) < 7 and len(final_commit) < 7: + raise click.BadParameter("Commit hashes must be at least 7 characters long") + elif initial_commit and len(initial_commit) < 7: + raise click.BadParameter("Initial commit hash must be at least 7 characters long.", param_hint="initial") + elif final_commit and len(final_commit) < 7: + raise click.BadParameter("Final commit hash must be at least 7 characters long.", param_hint="final") + task = progress.add_task("[cyan]Calculating timeline...", total=None) url = app.repo.path with GitRepo(url) as gitRepo: try: - # with console.status("[cyan]Fetching commits...", spinner="dots"): - folder = module if type == 'integration' else '.deps/resolved' - commits = gitRepo.get_module_commits(folder, initial, final, time) + folder = module if type == "integration" else ".deps/resolved" + commits = gitRepo.get_module_commits(folder, initial_commit, final_commit, time) first_commit = gitRepo.get_creation_commit_module(module) gitRepo.checkout_commit(commits[-1]) + date_str, _, _ = gitRepo.get_commit_metadata(commits[-1]) + date = datetime.strptime(date_str, "%b %d %Y").date() + if final_commit and ( + (type == "integration" and date < MINIMUM_DATE_INTEGRATIONS) + or (type == "dependency" and date < MINIMUM_DATE_DEPENDENCIES) + ): + raise ValueError( + f"Final commit must be after {MINIMUM_DATE_INTEGRATIONS.strftime('%b %d %Y')} " + "in case of Integrations " + "and after {MINIMUM_DATE_DEPENDENCIES.strftime('%b %d %Y')} in case of Dependencies" + ) valid_platforms, _ = valid_platforms_versions(gitRepo.repo_dir) if platform and platform not in valid_platforms: raise ValueError(f"Invalid platform: {platform}") - elif commits == [''] and type == "integration" and module_exists(gitRepo.repo_dir, module): + elif commits == [""] and type == "integration" and module_exists(gitRepo.repo_dir, module): raise ValueError(f"No changes found: {module}") - elif commits == [''] and type == "integration" and not module_exists(gitRepo.repo_dir, module): + elif commits == [""] and type == "integration" and not module_exists(gitRepo.repo_dir, module): raise ValueError(f"Integration {module} not found in latest commit, is the name correct?") elif ( - type == 'dependency' + type == "dependency" and platform and module not in get_dependency_list(gitRepo.repo_dir, {platform}) ): @@ -107,16 +133,17 @@ def timeline( "is the name correct?" ) elif ( - type == 'dependency' + type == "dependency" and not platform and module not in get_dependency_list(gitRepo.repo_dir, valid_platforms) ): raise ValueError(f"Dependency {module} not found in latest commit, is the name correct?") - elif type == 'dependency' and commits == ['']: + elif type == "dependency" and commits == [""]: raise ValueError(f"No changes found: {module}") if type == "dependency" and platform is None: progress.remove_task(task) for i, plat in enumerate(valid_platforms): + path = save_to_png_path if save_to_png_path: base, ext = os.path.splitext(save_to_png_path) path = f"{base}_{plat}{ext}" @@ -158,6 +185,8 @@ def timeline( except Exception as e: progress.remove_task(task) + progress.stop() + app.abort(str(e)) @@ -167,7 +196,7 @@ def timeline_mode( type: str, module: str, commits: List[str], - threshold: Optional[str], + threshold: Optional[int], platform: Optional[str], compressed: bool, csv: bool, @@ -179,15 +208,14 @@ def timeline_mode( ) -> None: modules = get_repo_info(gitRepo, type, platform, module, commits, compressed, first_commit, progress) if modules != []: - grouped_modules = group_modules(modules, platform, i) - trimmed_modules = trim_modules(grouped_modules, threshold) + trimmed_modules = trim_modules(modules, threshold) + grouped_modules = group_modules(trimmed_modules, platform, i) if csv: - print_csv(app, i, trimmed_modules) - elif show_gui or save_to_png_path: - print_table(app, "Timeline for " + module, trimmed_modules) - plot_linegraph(trimmed_modules, module, platform, show_gui, save_to_png_path) + print_csv(app, i, grouped_modules) else: - print_table(app, "Timeline for " + module, trimmed_modules) + print_table(app, "Timeline for " + module, grouped_modules) + if show_gui or save_to_png_path: + plot_linegraph(grouped_modules, module, platform, show_gui, save_to_png_path) def get_repo_info( @@ -199,7 +227,7 @@ def get_repo_info( compressed: bool, first_commit: Optional[str], progress: Progress, -) -> List[Dict[str, Union[str, int, date]]]: +) -> List[CommitEntry]: with progress: if type == "integration": file_data = process_commits(commits, module, gitRepo, progress, platform, type, compressed, first_commit) @@ -217,24 +245,22 @@ def process_commits( type: str, compressed: bool, first_commit: Optional[str], -) -> List[Dict[str, Union[str, int, date]]]: - file_data = [] +) -> List[CommitEntry]: + file_data: List[CommitEntry] = [] task = progress.add_task("[cyan]Processing commits...", total=len(commits)) repo = gitRepo.repo_dir - folder = module if type == 'integration' else '.deps/resolved' + folder = module if type == "integration" else ".deps/resolved" for commit in commits: gitRepo.sparse_checkout_commit(commit, folder) date_str, author, message = gitRepo.get_commit_metadata(commit) date, message, commit = format_commit_data(date_str, message, commit, first_commit) - if type == 'dependency' and date < MINIMUM_DATE: - continue - elif type == 'dependency': + if type == "dependency": assert platform is not None result = get_dependencies(repo, module, platform, commit, date, author, message, compressed) if result: file_data.append(result) - elif type == 'integration': + elif type == "integration": file_data = get_files(repo, module, commit, date, author, message, file_data, compressed) progress.advance(task) progress.remove_task(task) @@ -249,41 +275,55 @@ def get_files( date: date, author: str, message: str, - file_data: List[Dict[str, Union[str, int, date]]], + file_data: List[CommitEntry], compressed: bool, -) -> List[Dict[str, Union[str, int, date]]]: +) -> List[CommitEntry]: + module_path = os.path.join(repo_path, module) + if not module_exists(repo_path, module): file_data.append( { - "Size (Bytes)": 0, + "Size_Bytes": 0, + "Version": "", "Date": date, "Author": author, - "Commit Message": "(DELETED) " + message, - "Commit SHA": commit, + "Commit_Message": f"(DELETED) {message}", + "Commit_SHA": commit, } ) return file_data ignored_files = {"datadog_checks_dev", "datadog_checks_tests_helper"} - git_ignore = get_gitignore_files(repo_path) included_folder = "datadog_checks/" - for root, _, files in os.walk(repo_path): + + total_size = 0 + version = "" + + for root, _, files in os.walk(module_path): for file in files: file_path = os.path.join(root, file) relative_path = os.path.relpath(file_path, repo_path) - if is_valid_integration(relative_path, included_folder, ignored_files, git_ignore): - size = compress(file_path) if compressed else os.path.getsize(file_path) - file_data.append( - { - "Size (Bytes)": size, - "Date": date, - "Author": author, - "Commit Message": message, - "Commit SHA": commit, - } - ) + if not is_valid_integration(relative_path, included_folder, ignored_files, git_ignore): + continue + + if file == "__about__.py" and "datadog_checks" in relative_path: + version = extract_version_from_about_py(file_path) + + size = compress(file_path) if compressed else os.path.getsize(file_path) + total_size += size + + file_data.append( + { + "Size_Bytes": total_size, + "Version": version, + "Date": date, + "Author": author, + "Commit_Message": message, + "Commit_SHA": commit, + } + ) return file_data @@ -296,21 +336,23 @@ def get_dependencies( author: str, message: str, compressed: bool, -) -> Optional[Dict[str, Union[str, int, date]]]: +) -> Optional[CommitEntry]: resolved_path = os.path.join(repo_path, ".deps/resolved") paths = os.listdir(resolved_path) version = get_version(paths, platform) for filename in paths: file_path = os.path.join(resolved_path, filename) if os.path.isfile(file_path) and is_correct_dependency(platform, version, filename): - download_url = get_dependency(file_path, module) + download_url, dep_version = get_dependency(file_path, module) return ( - get_dependency_size(download_url, commit, date, author, message, compressed) if download_url else None + get_dependency_size(download_url, dep_version, commit, date, author, message, compressed) + if download_url and dep_version is not None + else None ) return None -def get_dependency(file_path: str, module: str) -> Optional[str]: +def get_dependency(file_path: str, module: str) -> Tuple[Optional[str], Optional[str]]: with open(file_path, "r", encoding="utf-8") as file: file_content = file.read() for line in file_content.splitlines(): @@ -319,13 +361,15 @@ def get_dependency(file_path: str, module: str) -> Optional[str]: raise WrongDependencyFormat("The dependency format 'name @ link' is no longer supported.") name, url = match.groups() if name == module: - return url - return None + version_match = re.search(rf"{re.escape(name)}-([0-9]+(?:\.[0-9]+)*)-", url) + version = version_match.group(1) if version_match else "" + return url, version + return None, None def get_dependency_size( - download_url: str, commit: str, date: date, author: str, message: str, compressed: bool -) -> Dict[str, Union[str, int, date]]: + download_url: str, version: str, commit: str, date: date, author: str, message: str, compressed: bool +) -> CommitEntry: if compressed: response = requests.head(download_url) response.raise_for_status() @@ -343,7 +387,7 @@ def get_dependency_size( with open(wheel_path, "wb") as f: f.write(wheel_data) extract_path = Path(tmpdir) / "extracted" - with zipfile.ZipFile(wheel_path, 'r') as zip_ref: + with zipfile.ZipFile(wheel_path, "r") as zip_ref: zip_ref.extractall(extract_path) size = 0 @@ -352,83 +396,74 @@ def get_dependency_size( file_path = os.path.join(dirpath, name) size += os.path.getsize(file_path) - return {"Size (Bytes)": size, "Date": date, "Author": author, "Commit Message": message, "Commit SHA": commit} + commit_entry: CommitEntry = { + "Size_Bytes": size, + "Version": version, + "Date": date, + "Author": author, + "Commit_Message": message, + "Commit_SHA": commit, + } + return commit_entry def get_version(files: List[str], platform: str) -> str: - final_version = '' + final_version = "" for file in files: if platform in file: - curr_version = file.split('_')[-1] + curr_version = file.split("_")[-1] match = re.search(r"\d+(?:\.\d+)?", curr_version) version = match.group(0) if match else None if version and version > final_version: final_version = version - return final_version if len(final_version) != 1 else 'py' + final_version + return final_version if len(final_version) != 1 else "py" + final_version def group_modules( - modules: List[Dict[str, Union[str, int, date]]], platform: Optional[str], i: Optional[int] -) -> List[Dict[str, Union[str, int, date]]]: - grouped_aux: Dict[tuple[date, str, str, str], int] = {} - - for file in modules: - key = ( - cast(date, file['Date']), - cast(str, file['Author']), - cast(str, file['Commit Message']), - cast(str, file['Commit SHA']), - ) - grouped_aux[key] = grouped_aux.get(key, 0) + cast(int, file["Size (Bytes)"]) - if i is None: - return [ - { - "Commit SHA": commit, - "Size (Bytes)": size, - 'Size': convert_size(size), - 'Delta (Bytes)': 'N/A', - 'Delta': 'N/A', - "Date": date, - "Author": author, - "Commit Message": message, - } - for (date, author, message, commit), size in grouped_aux.items() - ] + modules: List[CommitEntryWithDelta], platform: Optional[str], i: Optional[int] +) -> List[CommitEntryWithDelta] | List[CommitEntryPlatformWithDelta]: + if i is not None and platform: + new_modules: List[CommitEntryPlatformWithDelta] = [{**entry, "Platform": platform} for entry in modules] + return new_modules else: - assert platform is not None - return [ - { - "Commit SHA": commit, - "Size (Bytes)": size, - 'Size': convert_size(size), - 'Delta (Bytes)': 'N/A', - 'Delta': 'N/A', - "Date": date, - "Author": author, - "Commit Message": message, - 'Platform': platform, - } - for (date, author, message, commit), size in grouped_aux.items() - ] + return modules def trim_modules( - modules: List[Dict[str, Union[str, int, date]]], threshold: Optional[str] = None -) -> List[Dict[str, Union[str, int, date]]]: - modules[0]['Delta (Bytes)'] = 0 - modules[0]['Delta'] = ' ' - trimmed_modules = [modules[0]] - threshold_value = int(threshold) if threshold else 0 - - for i in range(1, len(modules)): - prev = modules[i - 1] - curr = modules[i] - delta = cast(int, curr['Size (Bytes)']) - cast(int, prev['Size (Bytes)']) - - if abs(delta) > threshold_value or i == len(modules) - 1: - curr['Delta (Bytes)'] = delta - curr['Delta'] = convert_size(delta) - trimmed_modules.append(curr) + modules: List[CommitEntry], + threshold: Optional[int] = None, +) -> List[CommitEntryWithDelta]: + threshold = threshold or 0 + + trimmed_modules: List[CommitEntryWithDelta] = [] + + first: CommitEntryWithDelta = { + **modules[0], + "Delta_Bytes": 0, + "Delta": " ", + } + trimmed_modules.append(first) + + last_version = modules[0]["Version"] + + for j in range(1, len(modules)): + prev = modules[j - 1] + curr = modules[j] + delta = curr["Size_Bytes"] - prev["Size_Bytes"] + + if abs(delta) > threshold or j == len(modules) - 1: + new_entry: CommitEntryWithDelta = { + **curr, + "Delta_Bytes": delta, + "Delta": convert_size(delta), + } + + curr_version = curr["Version"] + if curr_version != "" and curr_version != last_version: + new_entry["Version"] = f"{last_version} -> {curr_version}" + last_version = curr_version + + trimmed_modules.append(new_entry) return trimmed_modules @@ -461,16 +496,22 @@ def get_dependency_list(path: str, platforms: Set[str]) -> Set[str]: return dependencies -def plot_linegraph(modules, module, platform, show, path): +def plot_linegraph( + modules: List[CommitEntryWithDelta] | List[CommitEntryPlatformWithDelta], + module: str, + platform: Optional[str], + show: bool, + path: Optional[str], +) -> None: dates = [entry["Date"] for entry in modules] - sizes = [entry["Size (Bytes)"] for entry in modules] + sizes = [entry["Size_Bytes"] for entry in modules] title = f"Disk Usage Evolution of {module} for {platform}" if platform else f"Disk Usage Evolution of {module}" plt.figure(figsize=(10, 6)) - plt.plot(dates, sizes, linestyle='-') + plt.plot(dates, sizes, linestyle="-") plt.title(title) plt.xlabel("Date") - plt.ylabel("Size (Bytes)") + plt.ylabel("Size_Bytes") plt.grid(True) plt.xticks(rotation=45) plt.tight_layout() diff --git a/ddev/tests/cli/size/test_common.py b/ddev/tests/cli/size/test_common.py index bed031f7e0260..486d8955bbcfc 100644 --- a/ddev/tests/cli/size/test_common.py +++ b/ddev/tests/cli/size/test_common.py @@ -1,11 +1,14 @@ import os +from pathlib import Path from unittest.mock import MagicMock, mock_open, patch from ddev.cli.size.common import ( compress, convert_size, + extract_version_from_about_py, get_dependencies_list, get_dependencies_sizes, + get_files, get_gitignore_files, group_modules, is_correct_dependency, @@ -70,14 +73,13 @@ def test_is_valid_integration(): def test_get_dependencies_list(): - file_content = ( - "dependency1 @ https://example.com/dependency1.whl\ndependency2 @ https://example.com/dependency2.whl" - ) + file_content = "dependency1 @ https://example.com/dependency1-1.1.1-.whl\ndependency2 @ https://example.com/dependency2-1.1.1-.whl" mock_open_obj = mock_open(read_data=file_content) with patch("builtins.open", mock_open_obj): - deps, urls = get_dependencies_list("fake_path") + deps, urls, versions = get_dependencies_list("fake_path") assert deps == ["dependency1", "dependency2"] - assert urls == ["https://example.com/dependency1.whl", "https://example.com/dependency2.whl"] + assert urls == ["https://example.com/dependency1-1.1.1-.whl", "https://example.com/dependency2-1.1.1-.whl"] + assert versions == ["1.1.1", "1.1.1"] def test_get_dependencies_sizes(): @@ -85,20 +87,23 @@ def test_get_dependencies_sizes(): mock_response.status_code = 200 mock_response.headers = {"Content-Length": "12345"} with patch("requests.head", return_value=mock_response): - file_data = get_dependencies_sizes(["dependency1"], ["https://example.com/dependency1.whl"], True) + file_data = get_dependencies_sizes(["dependency1"], ["https://example.com/dependency1.whl"], ["1.1.1"], True) assert file_data == [ - {"File Path": "dependency1", "Type": "Dependency", "Name": "dependency1", "Size (Bytes)": 12345} + { + "Name": "dependency1", + "Version": "1.1.1", + "Size_Bytes": 12345, + "Size": convert_size(12345), + "Type": "Dependency", + } ] def test_group_modules(): modules = [ - {"Name": "module1", "Type": "A", "Size (Bytes)": 1500}, - {"Name": "module2", "Type": "B", "Size (Bytes)": 3000}, - {"Name": "module1", "Type": "A", "Size (Bytes)": 2500}, - {"Name": "module3", "Type": "A", "Size (Bytes)": 4000}, + {"Name": "module1", "Type": "A", "Size_Bytes": 1500}, + {"Name": "module2", "Type": "B", "Size_Bytes": 3000}, ] - platform = "linux-aarch64" version = "3.12" @@ -106,30 +111,70 @@ def test_group_modules(): { "Name": "module1", "Type": "A", - "Size (Bytes)": 4000, - "Size": "3.91 KB", + "Size_Bytes": 1500, "Platform": "linux-aarch64", - "Version": "3.12", + "Python_Version": "3.12", }, { "Name": "module2", "Type": "B", - "Size (Bytes)": 3000, - "Size": "2.93 KB", + "Size_Bytes": 3000, "Platform": "linux-aarch64", - "Version": "3.12", + "Python_Version": "3.12", }, + ] + + assert group_modules(modules, platform, version, 0) == expected_output + + +def test_get_files_grouped_and_with_versions(): + repo_path = Path("fake_repo") + + os_walk_output = [ + (repo_path / "integration1" / "datadog_checks", [], ["file1.py", "file2.py"]), + (repo_path / "integration2" / "datadog_checks", [], ["file3.py"]), + ] + + def mock_is_valid_integration(path, included_folder, ignored, ignored_files): + return True + + def mock_getsize(path): + file_sizes = { + repo_path / "integration1" / "datadog_checks" / "file1.py": 1000, + repo_path / "integration1" / "datadog_checks" / "file2.py": 2000, + repo_path / "integration2" / "datadog_checks" / "file3.py": 3000, + } + return file_sizes[Path(path)] + + with ( + patch("os.walk", return_value=[(str(p), dirs, files) for p, dirs, files in os_walk_output]), + patch("os.path.getsize", side_effect=mock_getsize), + patch("ddev.cli.size.common.get_gitignore_files", return_value=set()), + patch("ddev.cli.size.common.is_valid_integration", side_effect=mock_is_valid_integration), + patch("ddev.cli.size.common.extract_version_from_about_py", return_value="1.2.3"), + patch("ddev.cli.size.common.convert_size", side_effect=lambda s: f"{s / 1024:.2f} KB"), + ): + + result = get_files(repo_path, compressed=False) + + expected = [ { - "Name": "module3", - "Type": "A", - "Size (Bytes)": 4000, - "Size": "3.91 KB", - "Platform": "linux-aarch64", - "Version": "3.12", + "Name": "integration1", + "Version": "1.2.3", + "Size_Bytes": 3000, + "Size": "2.93 KB", + "Type": "Integration", + }, + { + "Name": "integration2", + "Version": "1.2.3", + "Size_Bytes": 3000, + "Size": "2.93 KB", + "Type": "Integration", }, ] - assert group_modules(modules, platform, version, 0) == expected_output + assert result == expected def test_get_gitignore_files(): @@ -171,3 +216,24 @@ def test_print_csv(): actual_calls = mock_app.display.call_args_list assert actual_calls == expected_calls + + +def test_extract_version_from_about_py_pathlib(): + # Usa Path para compatibilidad multiplataforma + fake_path = Path("some") / "module" / "__about__.py" + fake_content = "__version__ = '1.2.3'\n" + + with patch("builtins.open", mock_open(read_data=fake_content)): + version = extract_version_from_about_py(str(fake_path)) + + assert version == "1.2.3" + + +def test_extract_version_from_about_py_no_version_pathlib(): + fake_path = Path("another") / "module" / "__about__.py" + fake_content = "version = 'not_defined'\n" + + with patch("builtins.open", mock_open(read_data=fake_content)): + version = extract_version_from_about_py(str(fake_path)) + + assert version == "" diff --git a/ddev/tests/cli/size/test_diff.py b/ddev/tests/cli/size/test_diff.py index 038c4b227d9d1..c43b456be2ee9 100644 --- a/ddev/tests/cli/size/test_diff.py +++ b/ddev/tests/cli/size/test_diff.py @@ -3,17 +3,19 @@ # Licensed under a 3-clause BSD style license (see LICENSE) import os -from unittest.mock import MagicMock, mock_open, patch +from unittest.mock import MagicMock, patch import pytest -from ddev.cli.size.diff import get_dependencies, get_diff, get_files +from ddev.cli.size.common import convert_size +from ddev.cli.size.diff import get_diff def to_native_path(path: str) -> str: return path.replace("/", os.sep) +""" def test_get_compressed_files(): mock_repo_path = "root" @@ -88,42 +90,49 @@ def test_get_compressed_dependencies(terminal): "dependency2": 12345, } +""" + def test_get_diff(): - size_before = { - to_native_path("integration/foo.py"): 1000, - to_native_path("integration/bar.py"): 2000, - to_native_path("integration/deleted.py"): 1500, - } - size_after = { - to_native_path("integration/foo.py"): 1200, - to_native_path("integration/bar.py"): 2000, - to_native_path("integration/new.py"): 800, - } + size_before = [ + {"Name": "foo", "Version": "1.0.0", "Size_Bytes": 1000, "Type": "Integration"}, + {"Name": "bar", "Version": "2.0.0", "Size_Bytes": 2000, "Type": "Integration"}, + {"Name": "deleted", "Version": "3.0.0", "Size_Bytes": 1500, "Type": "Integration"}, + ] + + size_after = [ + {"Name": "foo", "Version": "1.1.0", "Size_Bytes": 1200, "Type": "Integration"}, + {"Name": "bar", "Version": "2.0.0", "Size_Bytes": 2000, "Type": "Integration"}, + {"Name": "new", "Version": "0.1.0", "Size_Bytes": 800, "Type": "Integration"}, + ] + + result = get_diff(size_before, size_after, "Integration") expected = [ { - "File Path": to_native_path("integration/foo.py"), + "Name": "deleted (DELETED)", + "Version": "3.0.0", "Type": "Integration", - "Name": "integration", - "Size (Bytes)": 200, + "Size_Bytes": -1500, + "Size": convert_size(-1500), }, { - "File Path": to_native_path("integration/deleted.py"), + "Name": "foo", + "Version": "1.0.0 -> 1.1.0", "Type": "Integration", - "Name": "integration (DELETED)", - "Size (Bytes)": -1500, + "Size_Bytes": 200, + "Size": convert_size(200), }, { - "File Path": to_native_path("integration/new.py"), + "Name": "new (NEW)", + "Version": "0.1.0", "Type": "Integration", - "Name": "integration (NEW)", - "Size (Bytes)": 800, + "Size_Bytes": 800, + "Size": convert_size(800), }, ] - result = get_diff(size_before, size_after, "Integration") - assert sorted(result, key=lambda x: x["File Path"]) == sorted(expected, key=lambda x: x["File Path"]) + assert sorted(result, key=lambda x: x["Name"]) == expected @pytest.fixture @@ -134,25 +143,31 @@ def mock_size_diff_dependencies(): def get_compressed_files_side_effect(_, __): get_compressed_files_side_effect.counter += 1 if get_compressed_files_side_effect.counter % 2 == 1: - return {"path1.py": 1000} # before + return [{"Name": "path1.py", "Version": "1.1.1", "Size_Bytes": 1000, "Type": "Integration"}] # before else: - return {"path1.py": 1200, "path2.py": 500} # after + return [ + {"Name": "path1.py", "Version": "1.1.2", "Size_Bytes": 1200, "Type": "Integration"}, + {"Name": "path2.py", "Version": "1.1.1", "Size_Bytes": 500, "Type": "Integration"}, + ] # after get_compressed_files_side_effect.counter = 0 def get_compressed_dependencies_side_effect(_, __, ___, ____): get_compressed_dependencies_side_effect.counter += 1 if get_compressed_dependencies_side_effect.counter % 2 == 1: - return {"dep1.whl": 2000} # before + return [{"Name": "dep1", "Version": "1.0.0", "Size_Bytes": 2000, "Type": "Dependency"}] # before else: - return {"dep1.whl": 2500, "dep2.whl": 1000} # after + return [ + {"Name": "dep1", "Version": "1.1.0", "Size_Bytes": 2500, "Type": "Dependency"}, + {"Name": "dep2", "Version": "1.0.0", "Size_Bytes": 1000, "Type": "Dependency"}, + ] # after get_compressed_dependencies_side_effect.counter = 0 with ( patch( "ddev.cli.size.diff.valid_platforms_versions", - return_value=({'linux-x86_64', 'macos-x86_64', 'linux-aarch64', 'windows-x86_64'}, {'3.12'}), + return_value=({"linux-x86_64", "macos-x86_64", "linux-aarch64", "windows-x86_64"}, {"3.12"}), ), patch("ddev.cli.size.diff.GitRepo.__enter__", return_value=mock_git_repo), patch("ddev.cli.size.diff.GitRepo.__exit__", return_value=None), @@ -160,7 +175,7 @@ def get_compressed_dependencies_side_effect(_, __, ___, ____): patch("tempfile.mkdtemp", return_value="fake_repo"), patch("ddev.cli.size.diff.get_files", side_effect=get_compressed_files_side_effect), patch("ddev.cli.size.diff.get_dependencies", side_effect=get_compressed_dependencies_side_effect), - patch("ddev.cli.size.common.group_modules", side_effect=lambda m, *_: m), + patch("ddev.cli.size.diff.group_modules", side_effect=lambda m, *_: m), patch("ddev.cli.size.common.print_csv"), patch("ddev.cli.size.common.print_table"), patch("ddev.cli.size.common.plot_treemap"), @@ -169,20 +184,20 @@ def get_compressed_dependencies_side_effect(_, __, ___, ____): def test_diff_no_args(ddev, mock_size_diff_dependencies): - result = ddev('size', 'diff', 'commit1', 'commit2', '--compressed') + result = ddev("size", "diff", "commit1", "commit2", "--compressed") assert result.exit_code == 0 def test_diff_with_platform_and_version(ddev, mock_size_diff_dependencies): result = ddev( - 'size', 'diff', 'commit1', 'commit2', '--platform', 'linux-aarch64', '--python', '3.12', '--compressed' + "size", "diff", "commit1", "commit2", "--platform", "linux-aarch64", "--python", "3.12", "--compressed" ) assert result.exit_code == 0 def test_diff_csv(ddev, mock_size_diff_dependencies): result = ddev( - 'size', 'diff', 'commit1', 'commit2', '--platform', 'linux-aarch64', '--python', '3.12', '--compressed', '--csv' + "size", "diff", "commit1", "commit2", "--platform", "linux-aarch64", "--python", "3.12", "--compressed", "--csv" ) assert result.exit_code == 0 @@ -193,11 +208,8 @@ def test_diff_no_differences(ddev): with ( patch("ddev.cli.size.diff.GitRepo.__enter__", return_value=fake_repo), - patch( - "ddev.cli.size.diff.valid_platforms_versions", - return_value=({'linux-x86_64', 'macos-x86_64', 'linux-aarch64', 'windows-x86_64'}, {'3.12'}), - ), patch("ddev.cli.size.diff.GitRepo.__exit__", return_value=None), + patch("ddev.cli.size.diff.valid_platforms_versions", return_value=({"linux-aarch64"}, {"3.12"})), patch.object(fake_repo, "checkout_commit"), patch("tempfile.mkdtemp", return_value="fake_repo"), patch("os.path.exists", return_value=True), @@ -206,27 +218,26 @@ def test_diff_no_differences(ddev): patch("os.listdir", return_value=["linux-aarch64_3.12"]), patch( "ddev.cli.size.diff.get_files", - return_value={ - "path1.py": 1000, - "path2.py": 500, - }, + return_value=[ + {"Name": "path1.py", "Version": "1.0.0", "Size_Bytes": 1000}, + {"Name": "path2.py", "Version": "1.0.0", "Size_Bytes": 500}, + ], ), patch( "ddev.cli.size.diff.get_dependencies", - return_value={ - "dep1.whl": 2000, - "dep2.whl": 1000, - }, + return_value=[ + {"Name": "dep1.whl", "Version": "2.0.0", "Size_Bytes": 2000}, + {"Name": "dep2.whl", "Version": "2.0.0", "Size_Bytes": 1000}, + ], ), - patch("ddev.cli.size.common.group_modules", side_effect=lambda m, *_: m), ): result = ddev( - 'size', 'diff', 'commit1', 'commit2', '--platform', 'linux-aarch64', '--python', '3.12', '--compressed' + "size", "diff", "commit1", "commit2", "--platform", "linux-aarch64", "--python", "3.12", "--compressed" ) - print(result.output) - print(result.exit_code) - assert result.exit_code == 0 + print(result.output) + assert result.exit_code == 0, result.output + assert "No size differences were detected" in result.output def test_diff_invalid_platform(ddev): @@ -239,10 +250,10 @@ def test_diff_invalid_platform(ddev): patch("ddev.cli.size.diff.GitRepo", return_value=mock_git_repo), patch( "ddev.cli.size.timeline.valid_platforms_versions", - return_value=({'linux-x86_64', 'macos-x86_64', 'linux-aarch64', 'windows-x86_64'}, {'3.12'}), + return_value=({"linux-x86_64", "macos-x86_64", "linux-aarch64", "windows-x86_64"}, {"3.12"}), ), ): - result = ddev('size', 'diff', 'commit1', 'commit2', '--platform', 'linux', '--python', '3.12', '--compressed') + result = ddev("size", "diff", "commit1", "commit2", "--platform", "linux", "--python", "3.12", "--compressed") assert result.exit_code != 0 @@ -257,19 +268,19 @@ def test_diff_invalid_version(ddev): patch("ddev.cli.size.diff.GitRepo", return_value=mock_git_repo), patch( "ddev.cli.size.timeline.valid_platforms_versions", - return_value=({'linux-x86_64', 'macos-x86_64', 'linux-aarch64', 'windows-x86_64'}, {'3.12'}), + return_value=({"linux-x86_64", "macos-x86_64", "linux-aarch64", "windows-x86_64"}, {"3.12"}), ), ): result = ddev( - 'size', - 'diff', - 'commit1', - 'commit2', - '--platform', - 'linux-aarch64', - '--python', - '2.10', # invalid - '--compressed', + "size", + "diff", + "commit1", + "commit2", + "--platform", + "linux-aarch64", + "--python", + "2.10", # invalid + "--compressed", ) assert result.exit_code != 0 @@ -284,8 +295,8 @@ def test_diff_invalid_platform_and_version(ddev): patch("ddev.cli.size.diff.GitRepo", return_value=mock_git_repo), patch( "ddev.cli.size.timeline.valid_platforms_versions", - return_value=({'linux-x86_64', 'macos-x86_64', 'linux-aarch64', 'windows-x86_64'}, {'3.12'}), + return_value=({"linux-x86_64", "macos-x86_64", "linux-aarch64", "windows-x86_64"}, {"3.12"}), ), ): - result = ddev('size', 'diff', 'commit1', 'commit2', '--platform', 'linux', '--python', '2.10', '--compressed') + result = ddev("size", "diff", "commit1", "commit2", "--platform", "linux", "--python", "2.10", "--compressed") assert result.exit_code != 0 diff --git a/ddev/tests/cli/size/test_status.py b/ddev/tests/cli/size/test_status.py index 967018f8b7c81..1a96e1fe37b6c 100644 --- a/ddev/tests/cli/size/test_status.py +++ b/ddev/tests/cli/size/test_status.py @@ -4,98 +4,15 @@ import os from pathlib import Path -from unittest.mock import MagicMock, mock_open, patch +from unittest.mock import MagicMock, patch import pytest -from ddev.cli.size.status import ( - get_dependencies, - get_files, -) - def to_native_path(path: str) -> str: return path.replace("/", os.sep) -def test_get_files_compressed(): - mock_files = [ - (os.path.join("root", "integration", "datadog_checks"), [], ["file1.py", "file2.py"]), - (os.path.join("root", "integration_b", "datadog_checks"), [], ["file3.py"]), - ("root", [], ["ignored.py"]), - ] - mock_repo_path = "root" - - def fake_compress(file_path): - return 1000 - - fake_gitignore = {"ignored.py"} - - with ( - patch("os.walk", return_value=mock_files), - patch("os.path.relpath", side_effect=lambda path, _: path.replace(f"root{os.sep}", "")), - patch("ddev.cli.size.status.get_gitignore_files", return_value=fake_gitignore), - patch( - "ddev.cli.size.status.is_valid_integration", - side_effect=lambda path, folder, ignored, git_ignore: path.startswith("integration"), - ), - patch("ddev.cli.size.status.compress", side_effect=fake_compress), - ): - result = get_files(True, mock_repo_path) - - expected = [ - { - "File Path": to_native_path("integration/datadog_checks/file1.py"), - "Type": "Integration", - "Name": "integration", - "Size (Bytes)": 1000, - }, - { - "File Path": to_native_path("integration/datadog_checks/file2.py"), - "Type": "Integration", - "Name": "integration", - "Size (Bytes)": 1000, - }, - { - "File Path": to_native_path("integration_b/datadog_checks/file3.py"), - "Type": "Integration", - "Name": "integration_b", - "Size (Bytes)": 1000, - }, - ] - - assert result == expected - - -def test_get_compressed_dependencies(): - platform = "windows-x86_64" - version = "3.12" - - fake_file_content = ( - "dependency1 @ https://example.com/dependency1.whl\ndependency2 @ https://example.com/dependency2.whl" - ) - - mock_response = MagicMock() - mock_response.status_code = 200 - mock_response.headers = {"Content-Length": "12345"} - mock_repo_path = "root" - - with ( - patch("os.path.exists", return_value=True), - patch("os.path.isdir", return_value=True), - patch("os.listdir", return_value=[f"{platform}-{version}"]), - patch("os.path.isfile", return_value=True), - patch("builtins.open", mock_open(read_data=fake_file_content)), - patch("requests.head", return_value=mock_response), - ): - file_data = get_dependencies(mock_repo_path, platform, version, True) - - assert file_data == [ - {"File Path": "dependency1", "Type": "Dependency", "Name": "dependency1", "Size (Bytes)": 12345}, - {"File Path": "dependency2", "Type": "Dependency", "Name": "dependency2", "Size (Bytes)": 12345}, - ] - - @pytest.fixture() def mock_size_status(): fake_repo_path = Path(os.path.join("fake_root")).resolve() @@ -105,24 +22,37 @@ def mock_size_status(): mock_app = MagicMock() mock_app.repo.path = fake_repo_path + fake_files = [ + { + "Name": "int1", + "Version": "1.1.1", + "Size_Bytes": 1234, + "Size": 100, + "Type": "Integration", + } + ] + + fake_deps = [ + { + "Name": "dep1", + "Version": "1.1.1", + "Size_Bytes": 5678, + "Size": 123, + "Type": "Dependency", + } + ] + with ( - patch("ddev.cli.size.status.get_gitignore_files", return_value=set()), + patch("ddev.cli.size.common.get_gitignore_files", return_value=set()), patch( "ddev.cli.size.status.valid_platforms_versions", return_value=({"linux-x86_64", "macos-x86_64", "linux-aarch64", "windows-x86_64"}, {"3.12"}), ), - patch("ddev.cli.size.status.compress", return_value=1234), - patch( - "ddev.cli.size.status.get_dependencies_list", return_value=(["dep1"], {"dep1": "https://example.com/dep1"}) - ), - patch( - "ddev.cli.size.status.get_dependencies_sizes", - return_value=[{"File Path": "dep1.whl", "Type": "Dependency", "Name": "dep1", "Size (Bytes)": 5678}], - ), + patch("ddev.cli.size.common.get_files", return_value=fake_files), + patch("ddev.cli.size.common.get_dependencies", return_value=fake_deps), patch("os.path.relpath", side_effect=lambda path, _: path.replace(f"fake_root{os.sep}", "")), - patch("ddev.cli.size.status.is_valid_integration", return_value=True), - patch("ddev.cli.size.status.is_correct_dependency", return_value=True), patch("ddev.cli.size.status.print_csv"), + patch("ddev.cli.size.common.compress", return_value=1234), patch("ddev.cli.size.status.print_table"), patch("ddev.cli.size.status.plot_treemap"), patch("os.walk", return_value=mock_walk), diff --git a/ddev/tests/cli/size/test_timeline.py b/ddev/tests/cli/size/test_timeline.py index 7e7f7abc163b8..fbb5f694d0f88 100644 --- a/ddev/tests/cli/size/test_timeline.py +++ b/ddev/tests/cli/size/test_timeline.py @@ -12,28 +12,30 @@ get_dependency_size, get_files, get_version, - group_modules, trim_modules, ) def test_get_compressed_files(): with ( - patch("os.walk", return_value=[(os.path.join("fake_repo", "int1"), [], ["int1.py"])]), - patch("os.path.relpath", return_value=os.path.join("int1", "int1.py")), + patch("os.walk", return_value=[(os.path.join("fake_repo", "datadog_checks"), [], ["__about__.py"])]), + patch("os.path.relpath", return_value=os.path.join("datadog_checks", "__about__.py")), patch("os.path.exists", return_value=True), patch("ddev.cli.size.timeline.get_gitignore_files", return_value=set()), patch("ddev.cli.size.timeline.is_valid_integration", return_value=True), patch("ddev.cli.size.timeline.compress", return_value=1234), + patch("ddev.cli.size.timeline.extract_version_from_about_py", return_value='1.1.1'), ): result = get_files("fake_repo", "int1", "abc1234", datetime(2025, 4, 4).date(), "auth", "Added int1", [], True) + print(result) assert result == [ { - "Size (Bytes)": 1234, + "Size_Bytes": 1234, + "Version": '1.1.1', "Date": datetime(2025, 4, 4).date(), "Author": "auth", - "Commit Message": "Added int1", - "Commit SHA": "abc1234", + "Commit_Message": "Added int1", + "Commit_SHA": "abc1234", } ] @@ -56,11 +58,12 @@ def test_get_compressed_files_deleted_only(): assert file_data == [ { - "Size (Bytes)": 0, + "Size_Bytes": 0, + "Version": "", "Date": date, "Author": author, - "Commit Message": "(DELETED) " + message, - "Commit SHA": commit, + "Commit_Message": "(DELETED) " + message, + "Commit_SHA": commit, } ] @@ -85,76 +88,24 @@ def test_format_commit_data(): def test_trim_modules_keep_some_remove_some(): modules = [ - {"Size (Bytes)": 1000}, - {"Size (Bytes)": 1100}, # diff = 100 -> should be removed if threshold = 200 - {"Size (Bytes)": 1400}, # diff = 300 -> should be kept + {"Size_Bytes": 1000, "Version": "1.0.0"}, + {"Size_Bytes": 1100, "Version": "1.0.0"}, + {"Size_Bytes": 1400, "Version": "1.1.0"}, ] expected = [ - {"Size (Bytes)": 1000, "Delta (Bytes)": 0, "Delta": " "}, - {"Size (Bytes)": 1400, "Delta (Bytes)": 300, "Delta": "300 B"}, + {"Size_Bytes": 1000, "Delta (Bytes)": 0, "Delta": " ", "Version": "1.0.0"}, + {"Size_Bytes": 1400, "Delta (Bytes)": 300, "Delta": "300 B", "Version": "1.0.0 -> 1.1.0"}, ] trimmed = trim_modules(modules, threshold=200) assert trimmed == expected -def test_group_modules(): - modules = [ - { - "Size (Bytes)": 1000, - "Date": datetime(2025, 4, 4).date(), - "Author": "A", - "Commit Message": "msg", - "Commit SHA": "c1", - }, - { - "Size (Bytes)": 500, - "Date": datetime(2025, 4, 4).date(), - "Author": "A", - "Commit Message": "msg", - "Commit SHA": "c1", - }, - { - "Size (Bytes)": 1500, - "Date": datetime(2025, 4, 5).date(), - "Author": "A", - "Commit Message": "msg2", - "Commit SHA": "c2", - }, - ] - expected = [ - { - "Commit SHA": "c1", - "Size (Bytes)": 1500, - "Size": "1.46 KB", - "Delta (Bytes)": "N/A", - "Delta": "N/A", - "Date": datetime(2025, 4, 4).date(), - "Author": "A", - "Commit Message": "msg", - "Platform": "linux-x86_64", - }, - { - "Commit SHA": "c2", - "Size (Bytes)": 1500, - "Size": "1.46 KB", - "Delta (Bytes)": "N/A", - "Delta": "N/A", - "Date": datetime(2025, 4, 5).date(), - "Author": "A", - "Commit Message": "msg2", - "Platform": "linux-x86_64", - }, - ] - grouped = group_modules(modules, "linux-x86_64", 0) - assert grouped == expected - - def test_get_dependency(): - content = """dep1 @ https://example.com/dep1.whl -dep2 @ https://example.com/dep2.whl""" + content = """dep1 @ https://example.com/dep1-1.1.1-.whl +dep2 @ https://example.com/dep2-1.1.2-.whl""" with patch("builtins.open", mock_open(read_data=content)): - url = get_dependency(Path("some") / "path" / "file.txt", "dep2") - assert url == "https://example.com/dep2.whl" + url, version = get_dependency(Path("some") / "path" / "file.txt", "dep2") + assert (url, version) == ("https://example.com/dep2-1.1.2-.whl", "1.1.2") def make_mock_response(size): @@ -169,14 +120,21 @@ def test_get_dependency_size(): mock_response = make_mock_response("45678") with patch("requests.head", return_value=mock_response): info = get_dependency_size( - "https://example.com/file.whl", "abc1234", datetime(2025, 4, 4).date(), "auth", "Fixed bug", True + "https://example.com/file-1.1.1-.whl", + "1.1.1", + "abc1234", + datetime(2025, 4, 4).date(), + "auth", + "Fixed bug", + True, ) assert info == { - "Size (Bytes)": 45678, + "Size_Bytes": 45678, + "Version": "1.1.1", "Date": datetime(2025, 4, 4).date(), "Author": "auth", - "Commit Message": "Fixed bug", - "Commit SHA": "abc1234", + "Commit_Message": "Fixed bug", + "Commit_SHA": "abc1234", } @@ -186,18 +144,19 @@ def test_get_compressed_dependencies(): patch("os.path.isdir", return_value=True), patch("os.path.isfile", return_value=True), patch("os.listdir", return_value=["linux-x86_64_3.12.txt"]), - patch("ddev.cli.size.timeline.get_dependency", return_value="https://example.com/dep1.whl"), + patch("ddev.cli.size.timeline.get_dependency", return_value=("https://example.com/dep1.whl", '1.1.1')), patch("ddev.cli.size.timeline.requests.head", return_value=make_mock_response("12345")), ): result = get_dependencies( "fake_repo", "dep1", "linux-x86_64", "abc1234", datetime(2025, 4, 4).date(), "auth", "Added dep1", True ) assert result == { - "Size (Bytes)": 12345, + "Size_Bytes": 12345, + "Version": '1.1.1', "Date": datetime(2025, 4, 4).date(), "Author": "auth", - "Commit Message": "Added dep1", - "Commit SHA": "abc1234", + "Commit_Message": "Added dep1", + "Commit_SHA": "abc1234", } @@ -221,6 +180,7 @@ def mock_timeline_gitrepo(): patch("ddev.cli.size.timeline.trim_modules", side_effect=lambda m, *_: m), patch("ddev.cli.size.timeline.print_table"), patch("ddev.cli.size.timeline.print_csv"), + patch("ddev.cli.size.timeline.plot_linegraph"), patch("os.path.exists", return_value=True), patch("os.path.isdir", return_value=True), patch("os.path.isfile", return_value=True), @@ -262,7 +222,7 @@ def mock_timeline_dependencies(): patch("os.listdir", return_value=["linux-x86_64-3.12"]), patch("os.path.isfile", return_value=True), patch("ddev.cli.size.timeline.get_gitignore_files", return_value=set()), - patch("ddev.cli.size.timeline.get_dependency", return_value="https://example.com/dep1.whl"), + patch("ddev.cli.size.timeline.get_dependency", return_value=("https://example.com/dep1.whl", '1.1.1)')), patch("ddev.cli.size.timeline.requests.head") as mock_head, patch("ddev.cli.size.timeline.group_modules", side_effect=lambda m, *_: m), patch("ddev.cli.size.timeline.trim_modules", side_effect=lambda m, *_: m), @@ -328,6 +288,7 @@ def test_timeline_no_changes_in_integration(ddev): mock_git_repo = MagicMock() mock_git_repo.repo_dir = "fake_repo" mock_git_repo.get_module_commits.return_value = [""] + mock_git_repo.get_commit_metadata.return_value = ("Feb 1 2025", "", "") with ( patch("ddev.cli.size.timeline.GitRepo.__enter__", return_value=mock_git_repo), @@ -335,8 +296,9 @@ def test_timeline_no_changes_in_integration(ddev): patch("os.path.exists", return_value=True), patch("os.path.isdir", return_value=True), patch("os.listdir", return_value=[]), + patch("ddev.cli.size.timeline.valid_platforms_versions", return_value=("", "")), ): - result = ddev("size", "timeline", "integration", "integration/foo", "commit1", "commit2", "--compressed") + result = ddev("size", "timeline", "integration", "int1", "commit1", "commit2", "--compressed") assert result.exit_code != 0 assert "No changes found" in result.output @@ -347,17 +309,19 @@ def test_timeline_integration_not_found(ddev): mock_repo.get_module_commits.return_value = [""] mock_repo.get_creation_commit_module.return_value = "c1" mock_repo.checkout_commit.return_value = None + mock_repo.get_commit_metadata.return_value = ("Feb 1 2025", "", "") with ( patch("ddev.cli.size.timeline.GitRepo.__enter__", return_value=mock_repo), patch("ddev.cli.size.timeline.GitRepo.__exit__", return_value=None), + patch("ddev.cli.size.timeline.valid_platforms_versions", return_value=("", "")), patch( "ddev.cli.size.timeline.valid_platforms_versions", return_value=({'linux-x86_64', 'macos-x86_64', 'linux-aarch64', 'windows-x86_64'}, {'3.12'}), ), patch("ddev.cli.size.timeline.module_exists", return_value=False), ): - result = ddev("size", "timeline", "integration", "missing_module", "c1", "c2") + result = ddev("size", "timeline", "integration", "missing_module", "c123456", "c2345667") assert result.exit_code != 0 assert "not found" in result.output @@ -368,6 +332,7 @@ def test_timeline_dependency_missing_no_platform(ddev): mock_repo.get_module_commits.return_value = ["c1"] mock_repo.get_creation_commit_module.return_value = "c1" mock_repo.checkout_commit.return_value = None + mock_repo.get_commit_metadata.return_value = ("Feb 1 2025", "", "") with ( patch("ddev.cli.size.timeline.GitRepo.__enter__", return_value=mock_repo), @@ -375,7 +340,7 @@ def test_timeline_dependency_missing_no_platform(ddev): patch("ddev.cli.size.timeline.valid_platforms_versions", return_value=({"linux-x86_64"}, {"3.12"})), patch("ddev.cli.size.timeline.get_dependency_list", return_value=set()), ): - result = ddev("size", "timeline", "dependency", "missing_module", "c1", "c2") + result = ddev("size", "timeline", "dependency", "missing_module", "c123456", "c2345667") assert result.exit_code != 0 assert "Dependency missing_module not found in latest commit" in result.output @@ -386,6 +351,7 @@ def test_timeline_dependency_missing_for_platform(ddev, app): mock_repo.get_module_commits.return_value = ["c1"] mock_repo.get_creation_commit_module.return_value = "c1" mock_repo.checkout_commit.return_value = None + mock_repo.get_commit_metadata.return_value = ("Feb 1 2025", "", "") with ( patch("ddev.cli.size.timeline.GitRepo.__enter__", return_value=mock_repo), @@ -399,8 +365,8 @@ def test_timeline_dependency_missing_for_platform(ddev, app): "timeline", "dependency", "missing_module", - "c1", - "c2", + "c123456", + "c2345667", "--platform", "linux-x86_64", ) @@ -418,6 +384,7 @@ def test_timeline_dependency_no_changes(ddev, app): mock_repo.get_module_commits.return_value = [""] mock_repo.get_creation_commit_module.return_value = "c1" mock_repo.checkout_commit.return_value = None + mock_repo.get_commit_metadata.return_value = ("Feb 1 2025", "", "") with ( patch("ddev.cli.size.timeline.GitRepo.__enter__", return_value=mock_repo), @@ -431,8 +398,8 @@ def test_timeline_dependency_no_changes(ddev, app): "timeline", "dependency", "dep1", - "c1", - "c2", + "c123456", + "c2345667", "--platform", "linux-x86_64", obj=app, From 6c4390debc130f07c921de3ae682edc720cda3c8 Mon Sep 17 00:00:00 2001 From: Lucia Sanchez Bella Date: Fri, 25 Apr 2025 16:23:52 +0200 Subject: [PATCH 35/70] user errors and dep and int versions --- ddev/src/ddev/cli/size/diff.py | 4 ++-- ddev/tests/cli/size/test_timeline.py | 4 ++-- 2 files changed, 4 insertions(+), 4 deletions(-) diff --git a/ddev/src/ddev/cli/size/diff.py b/ddev/src/ddev/cli/size/diff.py index ab0899f870e0c..e9e3a5826309f 100644 --- a/ddev/src/ddev/cli/size/diff.py +++ b/ddev/src/ddev/cli/size/diff.py @@ -73,8 +73,8 @@ def diff( raise click.BadParameter( "Second commit hash must be at least 7 characters long.", param_hint="second_commit" ) - # if first_commit == second_commit: - # raise click.BadParameter("Commit hashes must be different") + if first_commit == second_commit: + raise click.BadParameter("Commit hashes must be different") repo_url = app.repo.path with GitRepo(repo_url) as gitRepo: diff --git a/ddev/tests/cli/size/test_timeline.py b/ddev/tests/cli/size/test_timeline.py index fbb5f694d0f88..77fd9205b8ec2 100644 --- a/ddev/tests/cli/size/test_timeline.py +++ b/ddev/tests/cli/size/test_timeline.py @@ -93,8 +93,8 @@ def test_trim_modules_keep_some_remove_some(): {"Size_Bytes": 1400, "Version": "1.1.0"}, ] expected = [ - {"Size_Bytes": 1000, "Delta (Bytes)": 0, "Delta": " ", "Version": "1.0.0"}, - {"Size_Bytes": 1400, "Delta (Bytes)": 300, "Delta": "300 B", "Version": "1.0.0 -> 1.1.0"}, + {"Size_Bytes": 1000, "Delta_Bytes": 0, "Delta": " ", "Version": "1.0.0"}, + {"Size_Bytes": 1400, "Delta_Bytes": 300, "Delta": "300 B", "Version": "1.0.0 -> 1.1.0"}, ] trimmed = trim_modules(modules, threshold=200) assert trimmed == expected From 7f587e15e19d397df5d2e5a17bfa8c9104664ece Mon Sep 17 00:00:00 2001 From: Lucia Sanchez Bella Date: Fri, 25 Apr 2025 18:45:38 +0200 Subject: [PATCH 36/70] user errors and dep and int versions --- ddev/src/ddev/cli/size/common.py | 42 ++++++++++++++++-------------- ddev/src/ddev/cli/size/diff.py | 34 ++++++++++++------------ ddev/src/ddev/cli/size/status.py | 20 +++++++------- ddev/src/ddev/cli/size/timeline.py | 10 +++---- ddev/tests/cli/size/test_common.py | 8 +++--- ddev/tests/cli/size/test_diff.py | 3 +++ 6 files changed, 63 insertions(+), 54 deletions(-) diff --git a/ddev/src/ddev/cli/size/common.py b/ddev/src/ddev/cli/size/common.py index 35413e37eac61..bb03b1b9dbd56 100644 --- a/ddev/src/ddev/cli/size/common.py +++ b/ddev/src/ddev/cli/size/common.py @@ -379,11 +379,8 @@ def get_files(repo_path: str | Path, compressed: bool) -> List[FileDataEntry]: size = compress(file_path) if compressed else os.path.getsize(file_path) integration_sizes[integration_name] = integration_sizes.get(integration_name, 0) + size - if integration_name not in integration_versions: - about_path = os.path.join( - repo_path, integration_name, "datadog_checks", integration_name, "__about__.py" - ) - version = extract_version_from_about_py(about_path) + if integration_name not in integration_versions and file == "__about__.py": + version = extract_version_from_about_py(file_path) integration_versions[integration_name] = version return [ @@ -526,18 +523,25 @@ def get_module_commits( ) -> List[str]: self._run("git fetch origin --quiet") self._run("git checkout origin/HEAD") - if time: - return self._run(f'git log --since="{time}" --reverse --pretty=format:%H -- {module_path}') - elif not initial and not final: - return self._run(f"git log --reverse --pretty=format:%H -- {module_path}") - elif not final: - return self._run(f"git log --reverse --pretty=format:%H {initial}..HEAD -- {module_path}") - else: - try: - self._run(f"git merge-base --is-ancestor {initial} {final}") - except subprocess.CalledProcessError: - raise ValueError(f"Commit {initial} does not come before {final}") - return self._run(f"git log --reverse --pretty=format:%H {initial}..{final} -- {module_path}") + try: + if time: + return self._run(f'git log --since="{time}" --reverse --pretty=format:%H -- {module_path}') + elif not initial and not final: + return self._run(f"git log --reverse --pretty=format:%H -- {module_path}") + elif not final: + return self._run(f"git log --reverse --pretty=format:%H {initial}..HEAD -- {module_path}") + else: + try: + self._run(f"git merge-base --is-ancestor {initial} {final}") + except subprocess.CalledProcessError: + raise ValueError(f"Commit {initial} does not come before {final}") + return self._run(f"git log --reverse --pretty=format:%H {initial}..{final} -- {module_path}") + except subprocess.CalledProcessError as e: + raise ValueError( + "Failed to retrieve commit history.\n" + "Make sure that the provided commits are correct and that your local repository is up to" + "date with the remote" + ) from e def checkout_commit(self, commit: str) -> None: try: @@ -546,8 +550,8 @@ def checkout_commit(self, commit: str) -> None: if e.returncode == 128: raise ValueError( f"Failed to fetch commit '{commit}'.\n" - f"Make sure the commit hash is correct and that your local repository " - "is up to date with the remote.\n" + f"Make sure the provided commit hash is correct and that your local repository " + "is up to date with the remote\n" ) from e self._run(f"git checkout --quiet {commit}") diff --git a/ddev/src/ddev/cli/size/diff.py b/ddev/src/ddev/cli/size/diff.py index e9e3a5826309f..44ecfeb07adfb 100644 --- a/ddev/src/ddev/cli/size/diff.py +++ b/ddev/src/ddev/cli/size/diff.py @@ -3,6 +3,7 @@ # Licensed under a 3-clause BSD style license (see LICENSE) import os +from datetime import datetime from typing import List, Optional, Tuple, cast import click @@ -25,20 +26,21 @@ ) console = Console() +MINIMUM_DATE = datetime.strptime("Sep 17 2024", "%b %d %Y").date() @click.command() @click.argument("first_commit") @click.argument("second_commit") @click.option( - '--platform', help="Target platform (e.g. linux-aarch64). If not specified, all platforms will be analyzed" + "--platform", help="Target platform (e.g. linux-aarch64). If not specified, all platforms will be analyzed" ) -@click.option('--python', 'version', help="Python version (e.g 3.12). If not specified, all versions will be analyzed") -@click.option('--compressed', is_flag=True, help="Measure compressed size") -@click.option('--csv', is_flag=True, help="Output in CSV format") -@click.option('--save_to_png_path', help="Path to save the treemap as PNG") +@click.option("--python", "version", help="Python version (e.g 3.12). If not specified, all versions will be analyzed") +@click.option("--compressed", is_flag=True, help="Measure compressed size") +@click.option("--csv", is_flag=True, help="Output in CSV format") +@click.option("--save_to_png_path", help="Path to save the treemap as PNG") @click.option( - '--show_gui', + "--show_gui", is_flag=True, help="Display a pop-up window with a treemap showing size differences between the two commits.", ) @@ -79,6 +81,10 @@ def diff( repo_url = app.repo.path with GitRepo(repo_url) as gitRepo: try: + date_str, _, _ = gitRepo.get_commit_metadata(first_commit) + date = datetime.strptime(date_str, "%b %d %Y").date() + if date < MINIMUM_DATE: + raise ValueError(f"First commit must be after {MINIMUM_DATE.strftime('%b %d %Y')} ") valid_platforms, valid_versions = valid_platforms_versions(gitRepo.repo_dir) if platform and platform not in valid_platforms: raise ValueError(f"Invalid platform: {platform}") @@ -127,10 +133,7 @@ def diff( ) except Exception as e: - if progress and progress.tasks: - progress.remove_task(task) - progress.stop() - + progress.stop() app.abort(str(e)) @@ -152,16 +155,16 @@ def diff_mode( gitRepo, platform, version, first_commit, second_commit, compressed, progress ) - integrations = get_diff(files_b, files_a, 'Integration') - dependencies = get_diff(dependencies_b, dependencies_a, 'Dependency') + integrations = get_diff(files_b, files_a, "Integration") + dependencies = get_diff(dependencies_b, dependencies_a, "Dependency") if integrations + dependencies == [] and not csv: app.display(f"No size differences were detected between the selected commits for {platform}.") else: grouped_modules = group_modules(integrations + dependencies, platform, version, i) - grouped_modules.sort(key=lambda x: abs(cast(int, x['Size_Bytes'])), reverse=True) + grouped_modules.sort(key=lambda x: abs(cast(int, x["Size_Bytes"])), reverse=True) for module in grouped_modules: - if cast(int, module['Size_Bytes']) > 0: - module['Size'] = f"+{module['Size']}" + if cast(int, module["Size_Bytes"]) > 0: + module["Size"] = f"+{module['Size']}" if csv: print_csv(app, i, grouped_modules) else: @@ -206,7 +209,6 @@ def get_repo_info( def get_diff( size_first_commit: List[FileDataEntry], size_second_commit: List[FileDataEntry], type: str ) -> List[FileDataEntry]: - first_commit = {entry["Name"]: entry for entry in size_first_commit} second_commit = {entry["Name"]: entry for entry in size_second_commit} diff --git a/ddev/src/ddev/cli/size/status.py b/ddev/src/ddev/cli/size/status.py index cef211e254884..2892f7e60b191 100644 --- a/ddev/src/ddev/cli/size/status.py +++ b/ddev/src/ddev/cli/size/status.py @@ -26,14 +26,14 @@ @click.command() @click.option( - '--platform', help="Target platform (e.g. linux-aarch64). If not specified, all platforms will be analyzed" + "--platform", help="Target platform (e.g. linux-aarch64). If not specified, all platforms will be analyzed" ) -@click.option('--python', 'version', help="Python version (e.g 3.12). If not specified, all versions will be analyzed") -@click.option('--compressed', is_flag=True, help="Measure compressed size") -@click.option('--csv', is_flag=True, help="Output in CSV format") -@click.option('--save_to_png_path', help="Path to save the treemap as PNG") +@click.option("--python", "version", help="Python version (e.g 3.12). If not specified, all versions will be analyzed") +@click.option("--compressed", is_flag=True, help="Measure compressed size") +@click.option("--csv", is_flag=True, help="Output in CSV format") +@click.option("--save_to_png_path", help="Path to save the treemap as PNG") @click.option( - '--show_gui', + "--show_gui", is_flag=True, help="Display a pop-up window with a treemap showing the current size distribution of modules.", ) @@ -87,12 +87,14 @@ def status_mode( with console.status("[cyan]Calculating sizes...", spinner="dots"): modules = get_files(repo_path, compressed) + get_dependencies(repo_path, platform, version, compressed) grouped_modules = group_modules(modules, platform, version, i) - grouped_modules.sort(key=lambda x: x['Size_Bytes'], reverse=True) + grouped_modules.sort(key=lambda x: x["Size_Bytes"], reverse=True) if csv: print_csv(app, i, grouped_modules) - elif show_gui or save_to_png_path: + else: print_table(app, "Status", grouped_modules) + + if show_gui or save_to_png_path: plot_treemap( grouped_modules, f"Disk Usage Status for {platform} and Python version {version}", @@ -100,5 +102,3 @@ def status_mode( "status", save_to_png_path, ) - else: - print_table(app, "Status", grouped_modules) diff --git a/ddev/src/ddev/cli/size/timeline.py b/ddev/src/ddev/cli/size/timeline.py index e0c3fc962d2e2..75401f244bf6c 100644 --- a/ddev/src/ddev/cli/size/timeline.py +++ b/ddev/src/ddev/cli/size/timeline.py @@ -96,7 +96,8 @@ def timeline( raise click.BadParameter("Initial commit hash must be at least 7 characters long.", param_hint="initial") elif final_commit and len(final_commit) < 7: raise click.BadParameter("Final commit hash must be at least 7 characters long.", param_hint="final") - + elif final_commit and initial_commit and final_commit == initial_commit: + raise click.BadParameter("Commit hashes must be different") task = progress.add_task("[cyan]Calculating timeline...", total=None) url = app.repo.path with GitRepo(url) as gitRepo: @@ -114,13 +115,13 @@ def timeline( raise ValueError( f"Final commit must be after {MINIMUM_DATE_INTEGRATIONS.strftime('%b %d %Y')} " "in case of Integrations " - "and after {MINIMUM_DATE_DEPENDENCIES.strftime('%b %d %Y')} in case of Dependencies" + f"and after {MINIMUM_DATE_DEPENDENCIES.strftime('%b %d %Y')} in case of Dependencies" ) valid_platforms, _ = valid_platforms_versions(gitRepo.repo_dir) if platform and platform not in valid_platforms: raise ValueError(f"Invalid platform: {platform}") elif commits == [""] and type == "integration" and module_exists(gitRepo.repo_dir, module): - raise ValueError(f"No changes found: {module}") + raise ValueError(f"No changes found for {type}: {module}") elif commits == [""] and type == "integration" and not module_exists(gitRepo.repo_dir, module): raise ValueError(f"Integration {module} not found in latest commit, is the name correct?") elif ( @@ -139,7 +140,7 @@ def timeline( ): raise ValueError(f"Dependency {module} not found in latest commit, is the name correct?") elif type == "dependency" and commits == [""]: - raise ValueError(f"No changes found: {module}") + raise ValueError(f"No changes found for {type}: {module}") if type == "dependency" and platform is None: progress.remove_task(task) for i, plat in enumerate(valid_platforms): @@ -184,7 +185,6 @@ def timeline( ) except Exception as e: - progress.remove_task(task) progress.stop() app.abort(str(e)) diff --git a/ddev/tests/cli/size/test_common.py b/ddev/tests/cli/size/test_common.py index 486d8955bbcfc..285bf77ce66ab 100644 --- a/ddev/tests/cli/size/test_common.py +++ b/ddev/tests/cli/size/test_common.py @@ -131,8 +131,8 @@ def test_get_files_grouped_and_with_versions(): repo_path = Path("fake_repo") os_walk_output = [ - (repo_path / "integration1" / "datadog_checks", [], ["file1.py", "file2.py"]), - (repo_path / "integration2" / "datadog_checks", [], ["file3.py"]), + (repo_path / "integration1" / "datadog_checks", [], ["__about__.py", "file2.py"]), + (repo_path / "integration2" / "datadog_checks", [], ["__about__.py"]), ] def mock_is_valid_integration(path, included_folder, ignored, ignored_files): @@ -140,9 +140,9 @@ def mock_is_valid_integration(path, included_folder, ignored, ignored_files): def mock_getsize(path): file_sizes = { - repo_path / "integration1" / "datadog_checks" / "file1.py": 1000, repo_path / "integration1" / "datadog_checks" / "file2.py": 2000, - repo_path / "integration2" / "datadog_checks" / "file3.py": 3000, + repo_path / "integration1" / "datadog_checks" / "__about__.py": 1000, + repo_path / "integration2" / "datadog_checks" / "__about__.py": 3000, } return file_sizes[Path(path)] diff --git a/ddev/tests/cli/size/test_diff.py b/ddev/tests/cli/size/test_diff.py index c43b456be2ee9..cecb58bc6cad9 100644 --- a/ddev/tests/cli/size/test_diff.py +++ b/ddev/tests/cli/size/test_diff.py @@ -139,6 +139,7 @@ def test_get_diff(): def mock_size_diff_dependencies(): mock_git_repo = MagicMock() mock_git_repo.repo_dir = "fake_repo" + mock_git_repo.get_commit_metadata.return_value = ("Feb 1 2025", "", "") def get_compressed_files_side_effect(_, __): get_compressed_files_side_effect.counter += 1 @@ -185,6 +186,7 @@ def get_compressed_dependencies_side_effect(_, __, ___, ____): def test_diff_no_args(ddev, mock_size_diff_dependencies): result = ddev("size", "diff", "commit1", "commit2", "--compressed") + print(result.output) assert result.exit_code == 0 @@ -205,6 +207,7 @@ def test_diff_csv(ddev, mock_size_diff_dependencies): def test_diff_no_differences(ddev): fake_repo = MagicMock() fake_repo.repo_dir = "fake_repo" + fake_repo.get_commit_metadata.return_value = ("Feb 1 2025", "", "") with ( patch("ddev.cli.size.diff.GitRepo.__enter__", return_value=fake_repo), From d556d043105d8485534a64d031407df9dd97d3ff Mon Sep 17 00:00:00 2001 From: Lucia Sanchez Bella Date: Mon, 28 Apr 2025 09:12:19 +0200 Subject: [PATCH 37/70] fix timeline error --- ddev/src/ddev/cli/size/timeline.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/ddev/src/ddev/cli/size/timeline.py b/ddev/src/ddev/cli/size/timeline.py index 75401f244bf6c..2082a4e0df2f5 100644 --- a/ddev/src/ddev/cli/size/timeline.py +++ b/ddev/src/ddev/cli/size/timeline.py @@ -255,7 +255,7 @@ def process_commits( gitRepo.sparse_checkout_commit(commit, folder) date_str, author, message = gitRepo.get_commit_metadata(commit) date, message, commit = format_commit_data(date_str, message, commit, first_commit) - if type == "dependency": + if type == "dependency" and date > MINIMUM_DATE_DEPENDENCIES: assert platform is not None result = get_dependencies(repo, module, platform, commit, date, author, message, compressed) if result: From 91b68c9758aa455149da4b3910c1f7693571535f Mon Sep 17 00:00:00 2001 From: Lucia Sanchez Bella Date: Mon, 5 May 2025 10:28:17 +0200 Subject: [PATCH 38/70] Adding version, and json and markdown formats --- .github/workflows/measure-disk-usage.yml | 11 +- .github/workflows/slapr.yml | 75 ++---- ddev/src/ddev/cli/size/common.py | 309 ++++++++++++++++++++--- ddev/src/ddev/cli/size/diff.py | 109 ++++++-- ddev/src/ddev/cli/size/status.py | 55 +++- ddev/src/ddev/cli/size/timeline.py | 307 +++++++++++++++++++--- ddev/tests/cli/size/test_diff.py | 167 +++--------- ddev/tests/cli/size/test_status.py | 48 +++- ddev/tests/cli/size/test_timeline.py | 207 +++------------ ddev/tests/size/__init__.py | 3 + ddev/tests/{cli => }/size/test_common.py | 164 ++++++++++-- ddev/tests/size/test_diff.py | 54 ++++ ddev/tests/size/test_timeline.py | 158 ++++++++++++ 13 files changed, 1169 insertions(+), 498 deletions(-) create mode 100644 ddev/tests/size/__init__.py rename ddev/tests/{cli => }/size/test_common.py (63%) create mode 100644 ddev/tests/size/test_diff.py create mode 100644 ddev/tests/size/test_timeline.py diff --git a/.github/workflows/measure-disk-usage.yml b/.github/workflows/measure-disk-usage.yml index b7596e218972a..55d332586c5d9 100644 --- a/.github/workflows/measure-disk-usage.yml +++ b/.github/workflows/measure-disk-usage.yml @@ -51,7 +51,7 @@ jobs: - name: Measure disk usage differences from last commit (uncompressed) - if: false # Disabled: size difference is not accurate due to dependency sizes not updated + if: false # Disabled for now: size difference can be misleading due to dependencies not being built in the same PR run: | mkdir -p diff_visualizations BEFORE=$(git rev-parse HEAD^) @@ -65,7 +65,7 @@ jobs: echo '```' >> $GITHUB_STEP_SUMMARY - name: Measure disk usage differences from last commit (compressed) - if: false # Disabled: size difference is not accurate due to dependency sizes not updated + if: false # Disabled for now: size difference can be misleading due to dependencies not being built in the same PR run: | mkdir -p diff_visualizations BEFORE=$(git rev-parse HEAD^) @@ -93,7 +93,7 @@ jobs: if-no-files-found: error - name: Upload file sizes diff (uncompressed) - if: false # Disabled: size difference is not accurate due to dependency sizes not updated + if: false # DDisabled for now: size difference can be misleading due to dependencies not being built in the same PR uses: actions/upload-artifact@v4 with: name: diff-uncompressed.csv @@ -101,7 +101,7 @@ jobs: if-no-files-found: error - name: Upload file sizes diff (compressed) - if: false # Disabled: size difference is not accurate due to dependency sizes not updated + if: false # Disabled for now: size difference can be misleading due to dependencies not being built in the same PR uses: actions/upload-artifact@v4 with: name: diff-compressed.csv @@ -116,8 +116,7 @@ jobs: if-no-files-found: error - name: Upload diff PNGs - if: false # Disabled: size difference is not accurate due to dependency sizes not updated - uses: actions/upload-artifact@v4 + if: false # Disabled for now: size difference can be misleading due to dependencies not being built in the same PR with: name: diff-visuals path: diff_visualizations/ diff --git a/.github/workflows/slapr.yml b/.github/workflows/slapr.yml index 1ea4befc58971..dbec90dedbd4c 100644 --- a/.github/workflows/slapr.yml +++ b/.github/workflows/slapr.yml @@ -1,57 +1,30 @@ -name: Measure Disk Usage +# https://github.com/DataDog/slapr +name: Slack emoji PR updates on: + pull_request_review: + types: [submitted] pull_request: - branches: - - master -env: - PYTHON_VERSION: "3.12" + types: [closed] jobs: - measure-disk-usage: - runs-on: ubuntu-22.04 + run_slapr_agent_integrations: + runs-on: ubuntu-latest + strategy: + matrix: + slack_channel_variable: + - SLACK_CHANNEL_ID + - SLACK_CHANNEL_ID_AGENT_INTEGRATIONS_REVIEWS + - SLACK_CHANNEL_ID_INFRA_INTEGRATIONS steps: - - uses: actions/checkout@v4 - with: - fetch-depth: 0 - - name: Set up Python ${{ env.PYTHON_VERSION }} - uses: actions/setup-python@v5 - with: - python-version: ${{ env.PYTHON_VERSION }} - - name: Install ddev - run: | - pip install -e ./datadog_checks_dev[cli] - pip install -e ./ddev - - - name: Configure ddev - run: | - ddev config set repos.core . - ddev config set repo core - - name: Measure disk usage (uncompressed) - run: | - ddev size status --csv > size-uncompressed.csv - ddev size status - echo "# Size (uncompressed)" >> $GITHUB_STEP_SUMMARY - echo '```' >> $GITHUB_STEP_SUMMARY - ddev size status >> $GITHUB_STEP_SUMMARY - echo '```' >> $GITHUB_STEP_SUMMARY - - name: Measure disk usage (compressed) - run: | - ddev size status --csv --compressed > size-compressed.csv - ddev size status --compressed - echo "# Size (compressed)" >> $GITHUB_STEP_SUMMARY - echo '```' >> $GITHUB_STEP_SUMMARY - ddev size status --compressed >> $GITHUB_STEP_SUMMARY - echo '```' >> $GITHUB_STEP_SUMMARY - - name: Upload file sizes (uncompressed) - uses: actions/upload-artifact@v4 - with: - name: size-uncompressed.csv - path: size-uncompressed.csv - if-no-files-found: error - - name: Upload file sizes (compressed) - uses: actions/upload-artifact@v4 - with: - name: size-compressed.csv - path: size-compressed.csv - if-no-files-found: error + - uses: DataDog/slapr@master + env: + GITHUB_TOKEN: "${{ secrets.GITHUB_TOKEN }}" + SLACK_CHANNEL_ID: "${{ secrets[matrix.slack_channel_variable] }}" + SLACK_API_TOKEN: "${{ secrets.SLACK_API_TOKEN }}" + SLAPR_BOT_USER_ID: "${{ secrets.SLAPR_BOT_USER_ID }}" + SLAPR_EMOJI_REVIEW_STARTED: "review_started" + SLAPR_EMOJI_APPROVED: "approved2" + SLAPR_EMOJI_CHANGES_REQUESTED: "changes_requested" + SLAPR_EMOJI_MERGED: "merged" + SLAPR_EMOJI_CLOSED: "closed" \ No newline at end of file diff --git a/ddev/src/ddev/cli/size/common.py b/ddev/src/ddev/cli/size/common.py index bb03b1b9dbd56..8212b8b385b40 100644 --- a/ddev/src/ddev/cli/size/common.py +++ b/ddev/src/ddev/cli/size/common.py @@ -1,6 +1,7 @@ # (C) Datadog, Inc. 2022-present # All rights reserved # Licensed under a 3-clause BSD style license (see LICENSE) +import json import os import re import shutil @@ -21,51 +22,71 @@ from ddev.cli.application import Application +''' + Custom typed dictionaries +''' + class FileDataEntry(TypedDict): - Name: str - Version: str - Size_Bytes: int - Size: str - Type: str + Name: str # Integration/Dependency name + Version: str # Version of the Integration/Dependency + Size_Bytes: int # Size in bytes + Size: str # Human-readable size + Type: str # Integration/Dependency class FileDataEntryPlatformVersion(FileDataEntry): - Platform: str - Python_Version: str + Platform: str # Target platform (e.g. linux-aarch64) + Python_Version: str # Target Python version (e.g. 3.12) class CommitEntry(TypedDict): - Size_Bytes: int - Version: str - Date: date - Author: str - Commit_Message: str - Commit_SHA: str + Size_Bytes: int # Total size in bytes at commit + Version: str # Version of the Integration/Dependency at commit + Date: date # Commit date + Author: str # Commit author + Commit_Message: str # Commit message + Commit_SHA: str # Commit SHA hash class CommitEntryWithDelta(CommitEntry): - Delta_Bytes: int - Delta: str + Delta_Bytes: int # Size change in bytes compared to previous commit + Delta: str # Human-readable size change class CommitEntryPlatformWithDelta(CommitEntryWithDelta): - Platform: str + Platform: str # Target platform (e.g. linux-aarch64) + +def get_valid_platforms(repo_path: Union[Path, str]) -> Set[str]: + """ + Extracts the platforms we support from the .deps/resolved file names. + """ -def valid_platforms_versions(repo_path: Union[Path, str]) -> Tuple[Set[str], Set[str]]: resolved_path = os.path.join(repo_path, os.path.join(repo_path, ".deps", "resolved")) platforms = [] - versions = [] for file in os.listdir(resolved_path): platforms.append("_".join(file.split("_")[:-1])) + return set(platforms) + + +def get_valid_versions(repo_path: Union[Path, str]) -> Set[str]: + """ + Extracts the Python versions we support from the .deps/resolved file names. + """ + resolved_path = os.path.join(repo_path, os.path.join(repo_path, ".deps", "resolved")) + versions = [] + for file in os.listdir(resolved_path): match = re.search(r"\d+\.\d+", file) if match: versions.append(match.group()) - return set(platforms), set(versions) + return set(versions) -def convert_size(size_bytes: float) -> str: +def convert_to_human_readable_size(size_bytes: float) -> str: + """ + Converts a size in bytes into a human-readable string (B, KB, MB, GB, or TB) + """ for unit in [" B", " KB", " MB", " GB"]: if abs(size_bytes) < 1024: return str(round(size_bytes, 2)) + unit @@ -74,6 +95,18 @@ def convert_size(size_bytes: float) -> str: def is_valid_integration(path: str, included_folder: str, ignored_files: Set[str], git_ignore: List[str]) -> bool: + """ + Determines whether a given file path corresponds to a valid integration file. + + Args: + path: The file path to check. + included_folder: Required subfolder (e.g. 'datadog_checks') that marks valid integrations. + ignored_files: Set of filenames or patterns to exclude. + git_ignore: List of .gitignore patterns to exclude. + + Returns: + True if the file should be considered part of a valid integration, False otherwise. + """ # It is not an integration if path.startswith("."): return False @@ -91,9 +124,56 @@ def is_valid_integration(path: str, included_folder: str, ignored_files: Set[str def is_correct_dependency(platform: str, version: str, name: str) -> bool: + """ + Checks whether a dependency filename matches a given platform and Python version. + """ + return platform in name and version in name +def print_json( + app: Application, + i: Optional[int], + n_iterations: Optional[int], + printed_yet: bool, + modules: ( + List[FileDataEntry] + | List[FileDataEntryPlatformVersion] + | List[CommitEntryWithDelta] + | List[CommitEntryPlatformWithDelta] + ), +) -> None: + """ + Prints a list of data entries as part of a JSON array. + + This function is designed to be called multiple times, and ensures that: + - The opening bracket "[" is printed only once at the start (when i is None or 0). + - Each valid entry is printed on a separate line using JSON format. + - Commas are inserted appropriately between entries, but not before the first one. + - The closing bracket "]" is printed only at the final call (when i == n_iterations - 1). + + Args: + app: Application instance used to display output. + i: Index of the current batch of data being printed. If None or 0, this is the first chunk. + n_iterations: Total number of iterations (chunks). Used to detect the last chunk. + printed_yet: Whether at least one entry has already been printed before this call. + modules: List of dictionaries to print. Only non-empty entries are included. + """ + + if not i: + app.display("[") + + for idx, row in enumerate(modules): + if any(str(value).strip() not in ("", "0", "0001-01-01") for value in row.values()): + if printed_yet or (i != 0 and idx != 0): + app.display(",") + app.display(json.dumps(row, default=str)) + printed_yet = True + + if not n_iterations or i == n_iterations - 1: + app.display("]") + + def print_csv( app: Application, i: Optional[int], @@ -104,19 +184,57 @@ def print_csv( | List[CommitEntryPlatformWithDelta] ), ) -> None: + """ + Prints a list of data entries in CSV format. + + This function is designed to be called multiple times, and ensures that: + - The headers are printed only once at the start (when i is None or 0). + - Each valid entry is printed on a separate line using CSV format. + Args: + app: Application instance used to display output. + i: Index of the current batch of data being printed. If None or 0, this is the first chunk. + modules: List of dictionaries to print. Only non-empty entries are included. + """ headers = [k for k in modules[0].keys() if k not in ["Size", "Delta"]] if not i: app.display(",".join(headers)) for row in modules: - if any(str(value).strip() not in ("", "0") for value in row.values()): + if any(str(value).strip() not in ("", "0", "0001-01-01") for value in row.values()): app.display(",".join(format(str(row.get(h, ""))) for h in headers)) def format(s: str) -> str: + """ + Adds brackets to a value if it has a comma inside for the CSV + """ return f'"{s}"' if "," in s else s +def print_markdown( + app: Application, + title: str, + modules: ( + List[FileDataEntry] + | List[FileDataEntryPlatformVersion] + | List[CommitEntryWithDelta] + | List[CommitEntryPlatformWithDelta] + ), +) -> None: + """ + Prints a list of entries as a Markdown table. + Only non-empty tables are printed. + """ + if any(str(value).strip() not in ("", "0", "0001-01-01") for value in modules[0].values()): # table is not empty + headers = [k for k in modules[0].keys() if "Bytes" not in k] + app.display(f"### {title}") + app.display("| " + " | ".join(headers) + " |") + app.display("| " + " | ".join("---" for _ in headers) + " |") + + for row in modules: + app.display("| " + " | ".join(format(str(row.get(h, ""))) for h in headers) + " |") + + def print_table( app: Application, mode: str, @@ -127,12 +245,17 @@ def print_table( | List[CommitEntryPlatformWithDelta] ), ) -> None: + """ + Prints a list of entries as a Rich table. + Only non-empty tables are printed. + """ + # if any(str(value).strip() not in ("", "0", "0001-01-01") for value in modules[0].values()): # table is not empty columns = [col for col in modules[0].keys() if "Bytes" not in col] modules_table: Dict[str, Dict[int, str]] = {col: {} for col in columns} - for i, row in enumerate(modules): - for key in columns: - modules_table[key][i] = str(row.get(key, "")) + if any(str(value).strip() not in ("", "0", "0001-01-01") for value in row.values()): + for key in columns: + modules_table[key][i] = str(row.get(key, "")) app.display_table(mode, modules_table) @@ -144,6 +267,37 @@ def plot_treemap( mode: Literal["status", "diff"] = "status", path: Optional[str] = None, ) -> None: + """ + Generates and displays or saves a treemap visualization of module sizes. + + The plot layout is computed using the size of each module (in bytes), and color is used to + encode either the type of module or the direction/magnitude of size change, depending on the mode. + + - Modules with very small area may not show labels to avoid overlap. + - Labels display module name and size if space allows. + - Color intensity reflects relative size (or change) within its group. + - A legend is added depending on the selected mode. + + Args: + modules: List of module entries. Each entry must contain at least: + - 'Name': The module name, + - 'Size_Bytes': Module size in bytes (can be negative in 'diff' mode), + - 'Size': Human-readable size string, + - 'Type': Either 'Integration' or 'Dependency'. + title: Title to display at the top of the plot. + show: If True, the plot is shown interactively using matplotlib. + mode: + - 'status': Shows the current sizes of modules. + Integrations and dependencies are grouped and colored separately (Purples/Reds), + with size intensity mapped to color darkness. + - 'diff': Shows the size change between two commits. + Positive changes are colored in Oranges, negative changes in Blues. + The plot is split in half: left for decreases, right for increases. + path: Optional path to save the plot as a PNG file. If not provided, nothing is saved. + """ + if not any(str(value).strip() not in ("", "0") for value in modules[0].values()): # table is empty + return + # Convert sizes to absolute values for layout computation sizes = [abs(mod["Size_Bytes"]) for mod in modules] @@ -315,6 +469,19 @@ def rescale_intensity(val, min_val=0.3, max_val=0.8): def get_dependencies_sizes( deps: List[str], download_urls: List[str], versions: List[str], compressed: bool ) -> List[FileDataEntry]: + """ + Calculates the sizes of dependencies, either compressed or uncompressed. + + Args: + deps: List of dependency names. + download_urls: Corresponding download URLs for the dependencies. + versions: Corresponding version strings for the dependencies. + compressed: If True, use the Content-Length from the HTTP headers. + If False, download, extract, and compute actual uncompressed size. + + Returns: + A list of FileDataEntry dictionaries with name, version, size in bytes, and human-readable size. + """ file_data: List[FileDataEntry] = [] for dep, url, version in zip(deps, download_urls, versions, strict=False): if compressed: @@ -348,7 +515,7 @@ def get_dependencies_sizes( "Name": str(dep), "Version": version, "Size_Bytes": int(size), - "Size": convert_size(size), + "Size": convert_to_human_readable_size(size), "Type": "Dependency", } ) @@ -357,6 +524,16 @@ def get_dependencies_sizes( def get_files(repo_path: str | Path, compressed: bool) -> List[FileDataEntry]: + """ + Calculates integration file sizes and versions from a repository. + + Args: + repo_path: Path to the repository root. + compressed: If True, measure compressed file sizes. If False, measure uncompressed sizes. + + Returns: + A list of FileDataEntry dictionaries with name, version, size in bytes, and human-readable size. + """ ignored_files = {"datadog_checks_dev", "datadog_checks_tests_helper"} git_ignore = get_gitignore_files(repo_path) included_folder = "datadog_checks/" @@ -388,7 +565,7 @@ def get_files(repo_path: str | Path, compressed: bool) -> List[FileDataEntry]: "Name": name, "Version": integration_versions.get(name, ""), "Size_Bytes": size, - "Size": convert_size(size), + "Size": convert_to_human_readable_size(size), "Type": "Integration", } for name, size in integration_sizes.items() @@ -396,6 +573,18 @@ def get_files(repo_path: str | Path, compressed: bool) -> List[FileDataEntry]: def get_dependencies_list(file_path: str) -> Tuple[List[str], List[str], List[str]]: + """ + Parses a dependency file and extracts the dependency names, download URLs, and versions. + + Args: + file_path: Path to the file containing the dependencies. + + Returns: + A tuple of three lists: + - List of dependency names + - List of download URLs + - List of extracted version strings + """ download_urls = [] deps = [] versions = [] @@ -417,9 +606,24 @@ def get_dependencies_list(file_path: str) -> Tuple[List[str], List[str], List[st return deps, download_urls, versions -def group_modules( +def format_modules( modules: List[FileDataEntry], platform: str, version: str, i: Optional[int] ) -> List[FileDataEntryPlatformVersion] | List[FileDataEntry]: + """ + Formats the modules list, adding platform and Python version information if needed. + + If the modules list is empty, returns a default empty entry (with or without platform information). + + Args: + modules: List of modules to format. + platform: Platform string to add to each entry if needed. + version: Python version string to add to each entry if needed. + i: Index of the current (platform, version) combination being processed. + If None, it means the data is being processed for only one combination of platform and version. + + Returns: + A list of formatted entries. + """ if modules == [] and i is None: empty_entry: FileDataEntry = { "Name": "", @@ -450,6 +654,15 @@ def group_modules( def extract_version_from_about_py(path: str) -> str: + """ + Extracts the __version__ string from a given __about__.py file. + + Args: + path: Path to the __about__.py file. + + Returns: + The extracted version string if found, otherwise an empty string. + """ try: with open(path, "r", encoding="utf-8") as f: for line in f: @@ -462,6 +675,19 @@ def extract_version_from_about_py(path: str) -> str: def get_dependencies(repo_path: str | Path, platform: str, version: str, compressed: bool) -> List[FileDataEntry]: + """ + Gets the list of dependencies for a given platform and Python version. + Each FileDataEntry includes: Name, Version, Size_Bytes, Size, and Type. + + Args: + repo_path: Path to the repository. + platform: Target platform. + version: Target Python version. + compressed: If True, measure compressed file sizes. If False, measure uncompressed sizes. + + Returns: + A list of FileDataEntry dictionaries containing the dependency information. + """ resolved_path = os.path.join(repo_path, os.path.join(repo_path, ".deps", "resolved")) for filename in os.listdir(resolved_path): @@ -474,6 +700,9 @@ def get_dependencies(repo_path: str | Path, platform: str, version: str, compres def get_gitignore_files(repo_path: str | Path) -> List[str]: + """ + Returns the list of non-commented files from the .gitignore file. + """ gitignore_path = os.path.join(repo_path, ".gitignore") with open(gitignore_path, "r", encoding="utf-8") as file: gitignore_content = file.read() @@ -484,6 +713,9 @@ def get_gitignore_files(repo_path: str | Path) -> List[str]: def compress(file_path: str) -> int: + ''' + Returns the compressed size (in bytes) of a file using zlib + ''' compressor = zlib.compressobj() compressed_size = 0 # original_size = os.path.getsize(file_path) @@ -501,6 +733,10 @@ def __init__(self, mensaje: str) -> None: class GitRepo: + """ + Clones the repo to a temp folder and deletes the folder on exit. + """ + def __init__(self, url: Union[Path, str]) -> None: self.url = url self.repo_dir: str @@ -521,6 +757,18 @@ def _run(self, command: str) -> List[str]: def get_module_commits( self, module_path: str, initial: Optional[str], final: Optional[str], time: Optional[str] ) -> List[str]: + """ + Returns the list of commits (SHA) that modified a given module, filtered by time or commit range. + + Args: + module_path: Integration name or path to the .deps/resolved file (for dependencies). + initial: Optional initial commit hash. + final: Optional final commit hash. + time: Optional time filter (e.g. '2 weeks ago'). + + Returns: + List of commit SHAs (oldest to newest). + """ self._run("git fetch origin --quiet") self._run("git checkout origin/HEAD") try: @@ -565,8 +813,11 @@ def get_commit_metadata(self, commit: str) -> Tuple[str, str, str]: date, author, message = result return date, author, message - def get_creation_commit_module(self, module: str) -> str: - return self._run(f'git log --reverse --format="%H" -- {module}')[0] + def get_creation_commit_module(self, integration: str) -> str: + ''' + Returns the first commit (SHA) where the given integration was introduced. + ''' + return self._run(f'git log --reverse --format="%H" -- {integration}')[0] def __exit__( self, diff --git a/ddev/src/ddev/cli/size/diff.py b/ddev/src/ddev/cli/size/diff.py index 44ecfeb07adfb..86f4dadd49ec9 100644 --- a/ddev/src/ddev/cli/size/diff.py +++ b/ddev/src/ddev/cli/size/diff.py @@ -15,14 +15,17 @@ from .common import ( FileDataEntry, GitRepo, - convert_size, + convert_to_human_readable_size, + format_modules, get_dependencies, get_files, - group_modules, + get_valid_platforms, + get_valid_versions, plot_treemap, print_csv, + print_json, + print_markdown, print_table, - valid_platforms_versions, ) console = Console() @@ -38,6 +41,8 @@ @click.option("--python", "version", help="Python version (e.g 3.12). If not specified, all versions will be analyzed") @click.option("--compressed", is_flag=True, help="Measure compressed size") @click.option("--csv", is_flag=True, help="Output in CSV format") +@click.option("--markdown", is_flag=True, help="Output in Markdown format") +@click.option("--json", is_flag=True, help="Output in JSON format") @click.option("--save_to_png_path", help="Path to save the treemap as PNG") @click.option( "--show_gui", @@ -53,6 +58,8 @@ def diff( version: Optional[str], compressed: bool, csv: bool, + markdown: bool, + json: bool, save_to_png_path: str, show_gui: bool, ) -> None: @@ -67,6 +74,8 @@ def diff( transient=True, ) as progress: task = progress.add_task("[cyan]Calculating differences...", total=None) + if sum([csv, markdown, json]) > 1: + raise click.BadParameter("Only one output format can be selected: --csv, --markdown, or --json") if len(first_commit) < 7 and len(second_commit) < 7: raise click.BadParameter("Commit hashes must be at least 7 characters long") elif len(first_commit) < 7: @@ -85,7 +94,8 @@ def diff( date = datetime.strptime(date_str, "%b %d %Y").date() if date < MINIMUM_DATE: raise ValueError(f"First commit must be after {MINIMUM_DATE.strftime('%b %d %Y')} ") - valid_platforms, valid_versions = valid_platforms_versions(gitRepo.repo_dir) + valid_platforms = get_valid_platforms(gitRepo.repo_dir) + valid_versions = get_valid_versions(gitRepo.repo_dir) if platform and platform not in valid_platforms: raise ValueError(f"Invalid platform: {platform}") elif version and version not in valid_versions: @@ -94,14 +104,15 @@ def diff( platforms = valid_platforms if platform is None else [platform] versions = valid_versions if version is None else [version] progress.remove_task(task) - - for i, (plat, ver) in enumerate([(p, v) for p in platforms for v in versions]): + printed_yet = False + combinations = [(p, v) for p in platforms for v in versions] + for i, (plat, ver) in enumerate(combinations): path = None if save_to_png_path: base, ext = os.path.splitext(save_to_png_path) path = f"{base}_{plat}_{ver}{ext}" - diff_mode( + printed_yet = diff_mode( app, gitRepo, first_commit, @@ -110,13 +121,18 @@ def diff( ver, compressed, csv, + markdown, + json, i, progress, path, show_gui, + len(combinations), + printed_yet, ) else: progress.remove_task(task) + diff_mode( app, gitRepo, @@ -126,15 +142,19 @@ def diff( version, compressed, csv, + markdown, + json, None, progress, save_to_png_path, show_gui, + None, + False, ) - except Exception as e: progress.stop() app.abort(str(e)) + return None def diff_mode( @@ -146,38 +166,50 @@ def diff_mode( version: str, compressed: bool, csv: bool, + markdown: bool, + json: bool, i: Optional[int], progress: Progress, save_to_png_path: Optional[str], show_gui: bool, -) -> None: + n_iterations: Optional[int], + printed_yet: bool, +) -> bool: files_b, dependencies_b, files_a, dependencies_a = get_repo_info( gitRepo, platform, version, first_commit, second_commit, compressed, progress ) integrations = get_diff(files_b, files_a, "Integration") dependencies = get_diff(dependencies_b, dependencies_a, "Dependency") - if integrations + dependencies == [] and not csv: - app.display(f"No size differences were detected between the selected commits for {platform}.") + if integrations + dependencies == [] and not csv and not json: + app.display(f"No size differences were detected between the selected commits for {platform}") else: - grouped_modules = group_modules(integrations + dependencies, platform, version, i) - grouped_modules.sort(key=lambda x: abs(cast(int, x["Size_Bytes"])), reverse=True) - for module in grouped_modules: - if cast(int, module["Size_Bytes"]) > 0: + formated_modules = format_modules(integrations + dependencies, platform, version, i) + formated_modules.sort(key=lambda x: abs(cast(int, x["Size_Bytes"])), reverse=True) + for module in formated_modules: + if module["Size_Bytes"] > 0: module["Size"] = f"+{module['Size']}" if csv: - print_csv(app, i, grouped_modules) + print_csv(app, i, formated_modules) + elif json: + print_json(app, i, n_iterations, printed_yet, formated_modules) + elif markdown: + print_markdown(app, "Differences between selected commits", formated_modules) else: - print_table(app, "Diff", grouped_modules) + print_table(app, "Differences between selected commits", formated_modules) if show_gui or save_to_png_path: plot_treemap( - grouped_modules, + formated_modules, f"Disk Usage Differences for {platform} and Python version {version}", show_gui, "diff", save_to_png_path, ) + if integrations + dependencies != []: + printed_yet = True + + return printed_yet def get_repo_info( @@ -190,6 +222,26 @@ def get_repo_info( progress: Progress, ) -> Tuple[List[FileDataEntry], List[FileDataEntry], List[FileDataEntry], List[FileDataEntry]]: with progress: + """ + Retrieves integration and dependency sizes for two commits in the repo. + + Args: + gitRepo: An instance of GitRepo for accessing the repository. + platform: Target platform for dependency resolution. + version: Python version for dependency resolution. + first_commit: The earlier commit SHA to compare. + second_commit: The later commit SHA to compare. + compressed: Whether to measure compressed sizes. + progress: Rich Progress bar. + + Returns: + A tuple of four lists: + - files_b: Integration sizes at first_commit + - dependencies_b: Dependency sizes at first_commit + - files_a: Integration sizes at second_commit + - dependencies_a: Dependency sizes at second_commit + """ + repo = gitRepo.repo_dir task = progress.add_task("[cyan]Calculating sizes for the first commit...", total=None) gitRepo.checkout_commit(first_commit) @@ -209,6 +261,19 @@ def get_repo_info( def get_diff( size_first_commit: List[FileDataEntry], size_second_commit: List[FileDataEntry], type: str ) -> List[FileDataEntry]: + """ + Computes size differences between two sets of integrations or dependencies. + + Args: + size_first_commit: Entries from the first (earlier) commit. + size_second_commit: Entries from the second (later) commit. + type: Integration/Dependency + + Returns: + A list of FileDataEntry items representing only the entries with a size difference. + Entries include new, deleted, or changed modules, with delta size in bytes and human-readable format. + """ + first_commit = {entry["Name"]: entry for entry in size_first_commit} second_commit = {entry["Name"]: entry for entry in size_second_commit} @@ -238,12 +303,6 @@ def get_diff( else: name_str = name version_str = f"{ver_b} -> {ver_a}" if ver_a != ver_b else ver_a - if a: - type = a["Type"] - elif b: - type = b["Type"] - else: - type = "" diffs.append( { @@ -251,7 +310,7 @@ def get_diff( "Version": version_str, "Type": type, "Size_Bytes": delta, - "Size": convert_size(delta), + "Size": convert_to_human_readable_size(delta), } ) diff --git a/ddev/src/ddev/cli/size/status.py b/ddev/src/ddev/cli/size/status.py index 2892f7e60b191..7f09ec3558dc2 100644 --- a/ddev/src/ddev/cli/size/status.py +++ b/ddev/src/ddev/cli/size/status.py @@ -12,13 +12,16 @@ from ddev.cli.application import Application from .common import ( + format_modules, get_dependencies, get_files, - group_modules, + get_valid_platforms, + get_valid_versions, plot_treemap, print_csv, + print_json, + print_markdown, print_table, - valid_platforms_versions, ) console = Console() @@ -31,6 +34,8 @@ @click.option("--python", "version", help="Python version (e.g 3.12). If not specified, all versions will be analyzed") @click.option("--compressed", is_flag=True, help="Measure compressed size") @click.option("--csv", is_flag=True, help="Output in CSV format") +@click.option("--markdown", is_flag=True, help="Output in Markdown format") +@click.option("--json", is_flag=True, help="Output in JSON format") @click.option("--save_to_png_path", help="Path to save the treemap as PNG") @click.option( "--show_gui", @@ -44,6 +49,8 @@ def status( version: Optional[str], compressed: bool, csv: bool, + markdown: bool, + json: bool, save_to_png_path: Optional[str], show_gui: bool, ) -> None: @@ -51,8 +58,11 @@ def status( Show the current size of all integrations and dependencies. """ try: + if sum([csv, markdown, json]) > 1: + raise click.BadParameter("Only one output format can be selected: --csv, --markdown, or --json") repo_path = app.repo.path - valid_platforms, valid_versions = valid_platforms_versions(repo_path) + valid_platforms = get_valid_platforms(repo_path) + valid_versions = get_valid_versions(repo_path) if platform and platform not in valid_platforms: raise ValueError(f"Invalid platform: {platform}") elif version and version not in valid_versions: @@ -60,14 +70,30 @@ def status( if platform is None or version is None: platforms = valid_platforms if platform is None else [platform] versions = valid_versions if version is None else [version] - for i, (plat, ver) in enumerate([(p, v) for p in platforms for v in versions]): + combinations = [(p, v) for p in platforms for v in versions] + for i, (plat, ver) in enumerate(combinations): path = None if save_to_png_path: base, ext = os.path.splitext(save_to_png_path) path = f"{base}_{plat}_{ver}{ext}" - status_mode(app, repo_path, plat, ver, compressed, csv, i, path, show_gui) + status_mode( + app, repo_path, plat, ver, compressed, csv, markdown, json, i, path, show_gui, len(combinations) + ) else: - status_mode(app, repo_path, platform, version, compressed, csv, None, save_to_png_path, show_gui) + status_mode( + app, + repo_path, + platform, + version, + compressed, + csv, + markdown, + json, + None, + save_to_png_path, + show_gui, + None, + ) except Exception as e: app.abort(str(e)) @@ -80,23 +106,30 @@ def status_mode( version: str, compressed: bool, csv: bool, + markdown: bool, + json: bool, i: Optional[int], save_to_png_path: Optional[str], show_gui: bool, + n_iterations: Optional[int], ) -> None: with console.status("[cyan]Calculating sizes...", spinner="dots"): modules = get_files(repo_path, compressed) + get_dependencies(repo_path, platform, version, compressed) - grouped_modules = group_modules(modules, platform, version, i) - grouped_modules.sort(key=lambda x: x["Size_Bytes"], reverse=True) + formated_modules = format_modules(modules, platform, version, i) + formated_modules.sort(key=lambda x: x["Size_Bytes"], reverse=True) if csv: - print_csv(app, i, grouped_modules) + print_csv(app, i, formated_modules) + elif json: + print_json(app, i, n_iterations, False, formated_modules) + elif markdown: + print_markdown(app, "Status", formated_modules) else: - print_table(app, "Status", grouped_modules) + print_table(app, "Status", formated_modules) if show_gui or save_to_png_path: plot_treemap( - grouped_modules, + formated_modules, f"Disk Usage Status for {platform} and Python version {version}", show_gui, "status", diff --git a/ddev/src/ddev/cli/size/timeline.py b/ddev/src/ddev/cli/size/timeline.py index 2082a4e0df2f5..d88daa4fd9df2 100644 --- a/ddev/src/ddev/cli/size/timeline.py +++ b/ddev/src/ddev/cli/size/timeline.py @@ -21,19 +21,21 @@ GitRepo, WrongDependencyFormat, compress, - convert_size, + convert_to_human_readable_size, extract_version_from_about_py, get_gitignore_files, + get_valid_platforms, is_correct_dependency, is_valid_integration, print_csv, + print_json, + print_markdown, print_table, - valid_platforms_versions, ) -DEPENDENCY_FILE_CHANGE = datetime.strptime("Sep 17 2024", "%b %d %Y").date() -MINIMUM_DATE_DEPENDENCIES = datetime.strptime("Apr 3 2024", "%b %d %Y").date() -MINIMUM_DATE_INTEGRATIONS = datetime.strptime("Feb 1 2024", "%b %d %Y").date() +MINIMUM_DATE_DEPENDENCIES = datetime.strptime( + "Apr 3 2024", "%b %d %Y" +).date() # Dependencies not available before this date due to a storage change console = Console() @@ -58,6 +60,8 @@ ) @click.option("--compressed", is_flag=True, help="Measure compressed size") @click.option("--csv", is_flag=True, help="Output results in CSV format") +@click.option("--markdown", is_flag=True, help="Output in Markdown format") +@click.option("--json", is_flag=True, help="Output in JSON format") @click.option("--save_to_png_path", help="Path to save the treemap as PNG") @click.option( "--show_gui", @@ -76,6 +80,8 @@ def timeline( platform: Optional[str], compressed: bool, csv: bool, + markdown: bool, + json: bool, save_to_png_path: str, show_gui: bool, ) -> None: @@ -102,26 +108,27 @@ def timeline( url = app.repo.path with GitRepo(url) as gitRepo: try: + if final_commit and type == "dependency": + date_str, _, _ = gitRepo.get_commit_metadata(final_commit) + date = datetime.strptime(date_str, "%b %d %Y").date() + if date < MINIMUM_DATE_DEPENDENCIES: + raise ValueError( + f"Final commit must be after {MINIMUM_DATE_DEPENDENCIES.strftime('%b %d %Y')}" + " in case of Dependencies" + ) folder = module if type == "integration" else ".deps/resolved" commits = gitRepo.get_module_commits(folder, initial_commit, final_commit, time) first_commit = gitRepo.get_creation_commit_module(module) gitRepo.checkout_commit(commits[-1]) - date_str, _, _ = gitRepo.get_commit_metadata(commits[-1]) - date = datetime.strptime(date_str, "%b %d %Y").date() - if final_commit and ( - (type == "integration" and date < MINIMUM_DATE_INTEGRATIONS) - or (type == "dependency" and date < MINIMUM_DATE_DEPENDENCIES) - ): - raise ValueError( - f"Final commit must be after {MINIMUM_DATE_INTEGRATIONS.strftime('%b %d %Y')} " - "in case of Integrations " - f"and after {MINIMUM_DATE_DEPENDENCIES.strftime('%b %d %Y')} in case of Dependencies" - ) - valid_platforms, _ = valid_platforms_versions(gitRepo.repo_dir) - if platform and platform not in valid_platforms: - raise ValueError(f"Invalid platform: {platform}") - elif commits == [""] and type == "integration" and module_exists(gitRepo.repo_dir, module): - raise ValueError(f"No changes found for {type}: {module}") + if type == 'dependency': + valid_platforms = get_valid_platforms(gitRepo.repo_dir) + if platform and platform not in valid_platforms: + raise ValueError(f"Invalid platform: {platform}") + if commits == [""] and type == "integration" and module_exists(gitRepo.repo_dir, module): + progress.remove_task(task) + progress.stop() + app.display(f"No changes found for {type}: {module}") + return elif commits == [""] and type == "integration" and not module_exists(gitRepo.repo_dir, module): raise ValueError(f"Integration {module} not found in latest commit, is the name correct?") elif ( @@ -140,7 +147,12 @@ def timeline( ): raise ValueError(f"Dependency {module} not found in latest commit, is the name correct?") elif type == "dependency" and commits == [""]: - raise ValueError(f"No changes found for {type}: {module}") + progress.remove_task(task) + progress.stop() + + app.display(f"No changes found for {type}: {module}") + return + printed_yet = False if type == "dependency" and platform is None: progress.remove_task(task) for i, plat in enumerate(valid_platforms): @@ -148,7 +160,7 @@ def timeline( if save_to_png_path: base, ext = os.path.splitext(save_to_png_path) path = f"{base}_{plat}{ext}" - timeline_mode( + printed_yet = timeline_mode( app, gitRepo, type, @@ -158,15 +170,18 @@ def timeline( plat, compressed, csv, + markdown, + json, i, None, progress, path, show_gui, + len(valid_platforms), + printed_yet, ) else: progress.remove_task(task) - timeline_mode( app, gitRepo, @@ -177,11 +192,15 @@ def timeline( platform, compressed, csv, + markdown, + json, None, first_commit, progress, save_to_png_path, show_gui, + None, + printed_yet, ) except Exception as e: @@ -200,22 +219,33 @@ def timeline_mode( platform: Optional[str], compressed: bool, csv: bool, + markdown: bool, + json: bool, i: Optional[int], first_commit: Optional[str], progress: Progress, save_to_png_path: str, show_gui: bool, -) -> None: + n_iterations: Optional[int], + printed_yet: bool, +) -> bool: modules = get_repo_info(gitRepo, type, platform, module, commits, compressed, first_commit, progress) + trimmed_modules = trim_modules(modules, threshold) + grouped_modules = format_modules(trimmed_modules, platform, i) + if csv: + print_csv(app, i, grouped_modules) + elif json: + print_json(app, i, n_iterations, printed_yet, grouped_modules) + elif markdown: + print_markdown(app, "Timeline for " + module, grouped_modules) + else: + print_table(app, "Timeline for " + module, grouped_modules) + if show_gui or save_to_png_path: + plot_linegraph(grouped_modules, module, platform, show_gui, save_to_png_path) if modules != []: - trimmed_modules = trim_modules(modules, threshold) - grouped_modules = group_modules(trimmed_modules, platform, i) - if csv: - print_csv(app, i, grouped_modules) - else: - print_table(app, "Timeline for " + module, grouped_modules) - if show_gui or save_to_png_path: - plot_linegraph(grouped_modules, module, platform, show_gui, save_to_png_path) + printed_yet = True + + return printed_yet def get_repo_info( @@ -228,6 +258,22 @@ def get_repo_info( first_commit: Optional[str], progress: Progress, ) -> List[CommitEntry]: + """ + Retrieves size and metadata info for a module across multiple commits. + + Args: + gitRepo: Active GitRepo instance. + type: integration/dependency. + platform: Target platform (only used for dependencies). + module: Integration or dependency name. + commits: List of commits to process. + compressed: Whether to measure compressed sizes. + first_commit: First commit hash where the given integration was introduced (only for integrations). + progress: Progress bar instance. + + Returns: + A list of CommitEntry objects with size, version, date, author, commit message and commit hash. + """ with progress: if type == "integration": file_data = process_commits(commits, module, gitRepo, progress, platform, type, compressed, first_commit) @@ -246,6 +292,25 @@ def process_commits( compressed: bool, first_commit: Optional[str], ) -> List[CommitEntry]: + """ + Processes a list of commits for a given integration or dependency. + + For each commit, it checks out the corresponding version of the module, + retrieves its metadata, and calculates its size. + + Args: + commits: List of commit SHAs to process. + module: Integration or dependency name. + gitRepo: GitRepo instance managing the repository. + progress: Progress bar instance. + platform: Target platform name (only for dependencies). + type: integration/dependency. + compressed: Whether to measure compressed sizes. + first_commit: First commit hash where the given integration was introduced (only for integrations). + + Returns: + A list of CommitEntry objects with commit metadata and size information. + """ file_data: List[CommitEntry] = [] task = progress.add_task("[cyan]Processing commits...", total=len(commits)) repo = gitRepo.repo_dir @@ -278,13 +343,32 @@ def get_files( file_data: List[CommitEntry], compressed: bool, ) -> List[CommitEntry]: + """ + Calculates integration file sizes and versions from a repository. + + If the integration folder no longer exists, a 'Deleted' entry is added. Otherwise, + it walks the module directory, sums file sizes, extracts the version, and appends a CommitEntry. + + Args: + repo_path: Path to the local Git repository. + module: Name of the integration. + commit: Commit SHA being analyzed. + date: Commit date. + author: Commit author. + message: Commit message. + file_data: List to append the result to. + compressed: Whether to use compressed file sizes. + + Returns: + The updated file_data list with one new CommitEntry appended. + """ module_path = os.path.join(repo_path, module) if not module_exists(repo_path, module): file_data.append( { "Size_Bytes": 0, - "Version": "", + "Version": "Deleted", "Date": date, "Author": author, "Commit_Message": f"(DELETED) {message}", @@ -337,13 +421,29 @@ def get_dependencies( message: str, compressed: bool, ) -> Optional[CommitEntry]: + """ + Returns the size and metadata of a dependency for a given commit and platform. + + Args: + repo_path: Path to the repository. + module: Dependency name to look for. + platform: Target platform to match (e.g., 'linux-x86_64'). + commit: Commit SHA being analyzed. + date: Commit date. + author: Commit author. + message: Commit message. + compressed: Whether to calculate compressed size or uncompressed. + + Returns: + A CommitEntry with size and metadata if the dependency is found, else None. + """ resolved_path = os.path.join(repo_path, ".deps/resolved") paths = os.listdir(resolved_path) version = get_version(paths, platform) for filename in paths: file_path = os.path.join(resolved_path, filename) if os.path.isfile(file_path) and is_correct_dependency(platform, version, filename): - download_url, dep_version = get_dependency(file_path, module) + download_url, dep_version = get_dependency_data(file_path, module) return ( get_dependency_size(download_url, dep_version, commit, date, author, message, compressed) if download_url and dep_version is not None @@ -352,7 +452,19 @@ def get_dependencies( return None -def get_dependency(file_path: str, module: str) -> Tuple[Optional[str], Optional[str]]: +def get_dependency_data(file_path: str, module: str) -> Tuple[Optional[str], Optional[str]]: + """ + Parses a dependency file and extracts the dependency name, download URL, and version. + + Args: + file_path: Path to the file containing the dependencies. + module: Name of the dependency. + + Returns: + A tuple of two strings: + - Download URL + - Extracted dependency version + """ with open(file_path, "r", encoding="utf-8") as file: file_content = file.read() for line in file_content.splitlines(): @@ -370,6 +482,21 @@ def get_dependency(file_path: str, module: str) -> Tuple[Optional[str], Optional def get_dependency_size( download_url: str, version: str, commit: str, date: date, author: str, message: str, compressed: bool ) -> CommitEntry: + """ + Calculates the size of a dependency wheel at a given commit. + + Args: + download_url: URL to download the wheel file. + version: Dependency version. + commit: Commit SHA being analyzed. + date: Commit date. + author: Commit author. + message: Commit message. + compressed: If True, use Content-Length. If False, download and decompress to calculate size. + + Returns: + A CommitEntry with size and metadata for the given dependency and commit. + """ if compressed: response = requests.head(download_url) response.raise_for_status() @@ -408,6 +535,17 @@ def get_dependency_size( def get_version(files: List[str], platform: str) -> str: + """ + Returns the latest Python version for the given target platform based on .deps/resolved filenames. + + Args: + files: List of filenames from the .deps/resolved folder. + platform: Target platform. + + Returns: + If the version is a single digit (e.g., '3'), returns 'py3'; + otherwise (e.g., '3.12'), returns it as-is. + """ final_version = "" for file in files: if platform in file: @@ -419,10 +557,50 @@ def get_version(files: List[str], platform: str) -> str: return final_version if len(final_version) != 1 else "py" + final_version -def group_modules( +def format_modules( modules: List[CommitEntryWithDelta], platform: Optional[str], i: Optional[int] ) -> List[CommitEntryWithDelta] | List[CommitEntryPlatformWithDelta]: - if i is not None and platform: + """ + Formats the modules list, adding platform and Python version information if needed. + + If the modules list is empty, returns a default empty entry (with or without platform information). + + Args: + modules: List of modules to format. + platform: Platform string to add to each entry if needed. + version: Python version string to add to each entry if needed. + i: Index of the current platform, version) combination being processed. + If None, it means the data is being processed for only one platform. + + Returns: + A list of formatted entries. + """ + if modules == [] and i is not None and platform: + empty_module_platform: CommitEntryPlatformWithDelta = { + "Size_Bytes": 0, + "Version": "", + "Date": datetime.min.date(), + "Author": "", + "Commit_Message": "", + "Commit_SHA": "", + "Delta_Bytes": 0, + "Delta": " ", + "Platform": "", + } + return [empty_module_platform] + elif modules == []: + empty_module: CommitEntryWithDelta = { + "Size_Bytes": 0, + "Version": "", + "Date": datetime.min.date(), + "Author": "", + "Commit_Message": "", + "Commit_SHA": "", + "Delta_Bytes": 0, + "Delta": " ", + } + return [empty_module] + elif i is not None and platform: new_modules: List[CommitEntryPlatformWithDelta] = [{**entry, "Platform": platform} for entry in modules] return new_modules else: @@ -433,6 +611,24 @@ def trim_modules( modules: List[CommitEntry], threshold: Optional[int] = None, ) -> List[CommitEntryWithDelta]: + """ + Filters a list of commit entries, keeping only those with significant size changes. + + Args: + modules: List of CommitEntry items ordered by commit date. + threshold: Minimum size change (in bytes) required to keep an entry. Defaults to 0. + + Returns: + A list of CommitEntryWithDelta objects: + - Always includes the first and last entry. + - Includes intermediate entries where size difference exceeds the threshold. + - Adds Delta_Bytes and human-readable Delta for each included entry. + - Marks version transitions as 'X -> Y' when the version changes. + """ + if modules == []: + empty_modules: List[CommitEntryWithDelta] = [] + return empty_modules + threshold = threshold or 0 trimmed_modules: List[CommitEntryWithDelta] = [] @@ -455,7 +651,7 @@ def trim_modules( new_entry: CommitEntryWithDelta = { **curr, "Delta_Bytes": delta, - "Delta": convert_size(delta), + "Delta": convert_to_human_readable_size(delta), } curr_version = curr["Version"] @@ -469,6 +665,20 @@ def trim_modules( def format_commit_data(date_str: str, message: str, commit: str, first_commit: Optional[str]) -> Tuple[date, str, str]: + """ + Formats commit metadata by shortening the message, marking the first commit, and parsing the date. + Args: + date_str: Commit date as a string (e.g., 'Apr 3 2024'). + message: Original commit message. + commit: commit SHA. + first_commit: First commit hash where the given integration was introduced (only for integrations). + + Returns: + A tuple containing: + - Parsed date object, + - Shortened and possibly annotated message, + - Shortened commit SHA (first 7 characters). + """ if commit == first_commit: message = "(NEW) " + message message = message if len(message) <= 35 else message[:30].rsplit(" ", 1)[0] + "..." + message.split()[-1] @@ -477,10 +687,16 @@ def format_commit_data(date_str: str, message: str, commit: str, first_commit: O def module_exists(path: str, module: str) -> bool: + """ + Checks if the given module exists at the specified path + """ return os.path.exists(os.path.join(path, module)) def get_dependency_list(path: str, platforms: Set[str]) -> Set[str]: + """ + Returns the set of dependencies from the .deps/resolved folder for the latest version of the given platform. + """ resolved_path = os.path.join(path, ".deps/resolved") all_files = os.listdir(resolved_path) dependencies = set() @@ -503,6 +719,19 @@ def plot_linegraph( show: bool, path: Optional[str], ) -> None: + """ + Plots the disk usage evolution over time for a given module. + + Args: + modules: List of commit entries with size and date information. + module: Name of the module to display in the title. + platform: Target platform (used in the title if provided). + show: If True, displays the plot interactively. + path: If provided, saves the plot to this file path. + """ + if not any(str(value).strip() not in ("", "0", "0001-01-01") for value in modules[0].values()): # table is empty + return + dates = [entry["Date"] for entry in modules] sizes = [entry["Size_Bytes"] for entry in modules] title = f"Disk Usage Evolution of {module} for {platform}" if platform else f"Disk Usage Evolution of {module}" diff --git a/ddev/tests/cli/size/test_diff.py b/ddev/tests/cli/size/test_diff.py index cecb58bc6cad9..c6f1110153973 100644 --- a/ddev/tests/cli/size/test_diff.py +++ b/ddev/tests/cli/size/test_diff.py @@ -7,134 +7,11 @@ import pytest -from ddev.cli.size.common import convert_size -from ddev.cli.size.diff import get_diff - def to_native_path(path: str) -> str: return path.replace("/", os.sep) -""" -def test_get_compressed_files(): - mock_repo_path = "root" - - mock_files = [ - (os.path.join("root", "integration", "datadog_checks"), [], ["file1.py", "file2.py"]), - (os.path.join("root", "integration_b", "datadog_checks"), [], ["file3.py"]), - ("root", [], ["ignored.py"]), - ] - - def fake_compress(file_path): - return 1000 - - fake_gitignore = {"ignored.py"} - - with ( - patch("os.walk", return_value=mock_files), - patch("os.path.relpath", side_effect=lambda path, _: path.replace(f"root{os.sep}", "")), - patch("os.path.exists", return_value=True), - patch("builtins.open", mock_open(read_data="__pycache__/\n*.log\n")), - patch("ddev.cli.size.diff.get_gitignore_files", return_value=fake_gitignore), - patch( - "ddev.cli.size.diff.is_valid_integration", - side_effect=lambda path, folder, ignored, git_ignore: path.startswith("integration"), - ), - patch("ddev.cli.size.diff.compress", side_effect=fake_compress), - ): - - result = get_files(mock_repo_path, True) - - expected = { - to_native_path("integration/datadog_checks/file1.py"): 1000, - to_native_path("integration/datadog_checks/file2.py"): 1000, - to_native_path("integration_b/datadog_checks/file3.py"): 1000, - } - - assert result == expected - - -def test_get_compressed_dependencies(terminal): - platform = "windows-x86_64" - version = "3.12" - - fake_file_content = ( - "dependency1 @ https://example.com/dependency1.whl\ndependency2 @ https://example.com/dependency2.whl" - ) - - mock_head_response = MagicMock() - mock_head_response.status_code = 200 - mock_head_response.headers = {"Content-Length": "12345"} - - mock_get_response = MagicMock() - mock_get_response.__enter__.return_value = mock_get_response # for use in `with` block - mock_get_response.status_code = 200 - mock_get_response.headers = {"Content-Length": "12345"} - mock_get_response.content = b"Fake wheel file content" - - mock_repo_path = "root" - - with ( - patch("os.path.exists", return_value=True), - patch("os.path.isdir", return_value=True), - patch("os.listdir", return_value=[f"{platform}-{version}"]), - patch("os.path.isfile", return_value=True), - patch("builtins.open", mock_open(read_data=fake_file_content)), - patch("requests.head", return_value=mock_head_response), - patch("requests.get", return_value=mock_get_response), - ): - file_data = get_dependencies(mock_repo_path, platform, version, True) - - assert file_data == { - "dependency1": 12345, - "dependency2": 12345, - } - -""" - - -def test_get_diff(): - size_before = [ - {"Name": "foo", "Version": "1.0.0", "Size_Bytes": 1000, "Type": "Integration"}, - {"Name": "bar", "Version": "2.0.0", "Size_Bytes": 2000, "Type": "Integration"}, - {"Name": "deleted", "Version": "3.0.0", "Size_Bytes": 1500, "Type": "Integration"}, - ] - - size_after = [ - {"Name": "foo", "Version": "1.1.0", "Size_Bytes": 1200, "Type": "Integration"}, - {"Name": "bar", "Version": "2.0.0", "Size_Bytes": 2000, "Type": "Integration"}, - {"Name": "new", "Version": "0.1.0", "Size_Bytes": 800, "Type": "Integration"}, - ] - - result = get_diff(size_before, size_after, "Integration") - - expected = [ - { - "Name": "deleted (DELETED)", - "Version": "3.0.0", - "Type": "Integration", - "Size_Bytes": -1500, - "Size": convert_size(-1500), - }, - { - "Name": "foo", - "Version": "1.0.0 -> 1.1.0", - "Type": "Integration", - "Size_Bytes": 200, - "Size": convert_size(200), - }, - { - "Name": "new (NEW)", - "Version": "0.1.0", - "Type": "Integration", - "Size_Bytes": 800, - "Size": convert_size(800), - }, - ] - - assert sorted(result, key=lambda x: x["Name"]) == expected - - @pytest.fixture def mock_size_diff_dependencies(): mock_git_repo = MagicMock() @@ -167,8 +44,12 @@ def get_compressed_dependencies_side_effect(_, __, ___, ____): with ( patch( - "ddev.cli.size.diff.valid_platforms_versions", - return_value=({"linux-x86_64", "macos-x86_64", "linux-aarch64", "windows-x86_64"}, {"3.12"}), + "ddev.cli.size.diff.get_valid_platforms", + return_value=({'linux-x86_64', 'macos-x86_64', 'linux-aarch64', 'windows-x86_64'}), + ), + patch( + "ddev.cli.size.diff.get_valid_versions", + return_value=({'3.12'}), ), patch("ddev.cli.size.diff.GitRepo.__enter__", return_value=mock_git_repo), patch("ddev.cli.size.diff.GitRepo.__exit__", return_value=None), @@ -176,7 +57,7 @@ def get_compressed_dependencies_side_effect(_, __, ___, ____): patch("tempfile.mkdtemp", return_value="fake_repo"), patch("ddev.cli.size.diff.get_files", side_effect=get_compressed_files_side_effect), patch("ddev.cli.size.diff.get_dependencies", side_effect=get_compressed_dependencies_side_effect), - patch("ddev.cli.size.diff.group_modules", side_effect=lambda m, *_: m), + patch("ddev.cli.size.diff.format_modules", side_effect=lambda m, *_: m), patch("ddev.cli.size.common.print_csv"), patch("ddev.cli.size.common.print_table"), patch("ddev.cli.size.common.plot_treemap"), @@ -201,6 +82,7 @@ def test_diff_csv(ddev, mock_size_diff_dependencies): result = ddev( "size", "diff", "commit1", "commit2", "--platform", "linux-aarch64", "--python", "3.12", "--compressed", "--csv" ) + print(result.output) assert result.exit_code == 0 @@ -212,7 +94,14 @@ def test_diff_no_differences(ddev): with ( patch("ddev.cli.size.diff.GitRepo.__enter__", return_value=fake_repo), patch("ddev.cli.size.diff.GitRepo.__exit__", return_value=None), - patch("ddev.cli.size.diff.valid_platforms_versions", return_value=({"linux-aarch64"}, {"3.12"})), + patch( + "ddev.cli.size.status.get_valid_platforms", + return_value=({'linux-x86_64', 'macos-x86_64', 'linux-aarch64', 'windows-x86_64'}), + ), + patch( + "ddev.cli.size.status.get_valid_versions", + return_value=({'3.12'}), + ), patch.object(fake_repo, "checkout_commit"), patch("tempfile.mkdtemp", return_value="fake_repo"), patch("os.path.exists", return_value=True), @@ -252,8 +141,12 @@ def test_diff_invalid_platform(ddev): with ( patch("ddev.cli.size.diff.GitRepo", return_value=mock_git_repo), patch( - "ddev.cli.size.timeline.valid_platforms_versions", - return_value=({"linux-x86_64", "macos-x86_64", "linux-aarch64", "windows-x86_64"}, {"3.12"}), + "ddev.cli.size.status.get_valid_platforms", + return_value=({'linux-x86_64', 'macos-x86_64', 'linux-aarch64', 'windows-x86_64'}), + ), + patch( + "ddev.cli.size.status.get_valid_versions", + return_value=({'3.12'}), ), ): result = ddev("size", "diff", "commit1", "commit2", "--platform", "linux", "--python", "3.12", "--compressed") @@ -270,8 +163,12 @@ def test_diff_invalid_version(ddev): with ( patch("ddev.cli.size.diff.GitRepo", return_value=mock_git_repo), patch( - "ddev.cli.size.timeline.valid_platforms_versions", - return_value=({"linux-x86_64", "macos-x86_64", "linux-aarch64", "windows-x86_64"}, {"3.12"}), + "ddev.cli.size.status.get_valid_platforms", + return_value=({'linux-x86_64', 'macos-x86_64', 'linux-aarch64', 'windows-x86_64'}), + ), + patch( + "ddev.cli.size.status.get_valid_versions", + return_value=({'3.12'}), ), ): result = ddev( @@ -297,8 +194,12 @@ def test_diff_invalid_platform_and_version(ddev): with ( patch("ddev.cli.size.diff.GitRepo", return_value=mock_git_repo), patch( - "ddev.cli.size.timeline.valid_platforms_versions", - return_value=({"linux-x86_64", "macos-x86_64", "linux-aarch64", "windows-x86_64"}, {"3.12"}), + "ddev.cli.size.status.get_valid_platforms", + return_value=({'linux-x86_64', 'macos-x86_64', 'linux-aarch64', 'windows-x86_64'}), + ), + patch( + "ddev.cli.size.status.get_valid_versions", + return_value=({'3.12'}), ), ): result = ddev("size", "diff", "commit1", "commit2", "--platform", "linux", "--python", "2.10", "--compressed") diff --git a/ddev/tests/cli/size/test_status.py b/ddev/tests/cli/size/test_status.py index 1a96e1fe37b6c..69656f2c46fbb 100644 --- a/ddev/tests/cli/size/test_status.py +++ b/ddev/tests/cli/size/test_status.py @@ -45,11 +45,15 @@ def mock_size_status(): with ( patch("ddev.cli.size.common.get_gitignore_files", return_value=set()), patch( - "ddev.cli.size.status.valid_platforms_versions", - return_value=({"linux-x86_64", "macos-x86_64", "linux-aarch64", "windows-x86_64"}, {"3.12"}), + "ddev.cli.size.status.get_valid_platforms", + return_value=({'linux-x86_64', 'macos-x86_64', 'linux-aarch64', 'windows-x86_64'}), ), - patch("ddev.cli.size.common.get_files", return_value=fake_files), - patch("ddev.cli.size.common.get_dependencies", return_value=fake_deps), + patch( + "ddev.cli.size.status.get_valid_versions", + return_value=({'3.12'}), + ), + patch("ddev.cli.size.status.get_files", return_value=fake_files), + patch("ddev.cli.size.status.get_dependencies", return_value=fake_deps), patch("os.path.relpath", side_effect=lambda path, _: path.replace(f"fake_root{os.sep}", "")), patch("ddev.cli.size.status.print_csv"), patch("ddev.cli.size.common.compress", return_value=1234), @@ -80,27 +84,45 @@ def test_status_csv(ddev, mock_size_status): def test_status_wrong_platform(ddev): - with patch( - "ddev.cli.size.timeline.valid_platforms_versions", - return_value=({"linux-x86_64", "macos-x86_64", "linux-aarch64", "windows-x86_64"}, {"3.12"}), + with ( + patch( + "ddev.cli.size.status.get_valid_platforms", + return_value=({'linux-x86_64', 'macos-x86_64', 'linux-aarch64', 'windows-x86_64'}), + ), + patch( + "ddev.cli.size.status.get_valid_versions", + return_value=({'3.12'}), + ), ): result = ddev("size", "status", "--platform", "linux", "--python", "3.12", "--compressed") assert result.exit_code != 0 def test_status_wrong_version(ddev): - with patch( - "ddev.cli.size.timeline.valid_platforms_versions", - return_value=({"linux-x86_64", "macos-x86_64", "linux-aarch64", "windows-x86_64"}, {"3.12"}), + with ( + patch( + "ddev.cli.size.status.get_valid_platforms", + return_value=({'linux-x86_64', 'macos-x86_64', 'linux-aarch64', 'windows-x86_64'}), + ), + patch( + "ddev.cli.size.status.get_valid_versions", + return_value=({'3.12'}), + ), ): result = ddev("size", "status", "--platform", "linux-aarch64", "--python", "2.10", "--compressed") assert result.exit_code != 0 def test_status_wrong_plat_and_version(ddev): - with patch( - "ddev.cli.size.timeline.valid_platforms_versions", - return_value=({"linux-x86_64", "macos-x86_64", "linux-aarch64", "windows-x86_64"}, {"3.12"}), + with ( + patch( + "ddev.cli.size.status.get_valid_platforms", + return_value=({'linux-x86_64', 'macos-x86_64', 'linux-aarch64', 'windows-x86_64'}), + ), + patch( + "ddev.cli.size.status.get_valid_versions", + return_value=({'3.12'}), + ), ): result = ddev("size", "status", "--platform", "linux", "--python", "2.10", "--compressed") assert result.exit_code != 0 diff --git a/ddev/tests/cli/size/test_timeline.py b/ddev/tests/cli/size/test_timeline.py index 77fd9205b8ec2..895cf710a9e92 100644 --- a/ddev/tests/cli/size/test_timeline.py +++ b/ddev/tests/cli/size/test_timeline.py @@ -1,164 +1,8 @@ -import os -from datetime import datetime from pathlib import Path -from unittest.mock import MagicMock, mock_open, patch +from unittest.mock import MagicMock, patch import pytest -from ddev.cli.size.timeline import ( - format_commit_data, - get_dependencies, - get_dependency, - get_dependency_size, - get_files, - get_version, - trim_modules, -) - - -def test_get_compressed_files(): - with ( - patch("os.walk", return_value=[(os.path.join("fake_repo", "datadog_checks"), [], ["__about__.py"])]), - patch("os.path.relpath", return_value=os.path.join("datadog_checks", "__about__.py")), - patch("os.path.exists", return_value=True), - patch("ddev.cli.size.timeline.get_gitignore_files", return_value=set()), - patch("ddev.cli.size.timeline.is_valid_integration", return_value=True), - patch("ddev.cli.size.timeline.compress", return_value=1234), - patch("ddev.cli.size.timeline.extract_version_from_about_py", return_value='1.1.1'), - ): - result = get_files("fake_repo", "int1", "abc1234", datetime(2025, 4, 4).date(), "auth", "Added int1", [], True) - print(result) - assert result == [ - { - "Size_Bytes": 1234, - "Version": '1.1.1', - "Date": datetime(2025, 4, 4).date(), - "Author": "auth", - "Commit_Message": "Added int1", - "Commit_SHA": "abc1234", - } - ] - - -def test_get_compressed_files_deleted_only(): - repo_path = "fake_repo" - module = "foo" - commit = "abc1234" - date = datetime.strptime("Apr 5 2025", "%b %d %Y").date() - author = "Author" - message = "deleted module" - - with ( - patch("ddev.cli.size.timeline.get_gitignore_files", return_value=set()), - patch("os.walk", return_value=[]), - patch("os.path.relpath", side_effect=lambda path, _: path.replace(f"{repo_path}{os.sep}", "")), - patch("os.path.exists", return_value=False), - ): - file_data = get_files(repo_path, module, commit, date, author, message, [], True) - - assert file_data == [ - { - "Size_Bytes": 0, - "Version": "", - "Date": date, - "Author": author, - "Commit_Message": "(DELETED) " + message, - "Commit_SHA": commit, - } - ] - - -def test_get_version(): - files = ["linux-x86_64_3.12.txt", "linux-x86_64_3.10.txt"] - version = get_version(files, "linux-x86_64") - assert version == "3.12" - - -def test_format_commit_data(): - date, message, commit = format_commit_data( - "Apr 4 2025", "this is a very long commit message that should be trimmed (#1234)", "abc1234def", "abc1234def" - ) - expected_date = datetime.strptime("Apr 4 2025", "%b %d %Y").date() - expected_message = "(NEW) this is a very long...(#1234)" - expected_commit = "abc1234" - assert date == expected_date - assert message == expected_message - assert commit == expected_commit - - -def test_trim_modules_keep_some_remove_some(): - modules = [ - {"Size_Bytes": 1000, "Version": "1.0.0"}, - {"Size_Bytes": 1100, "Version": "1.0.0"}, - {"Size_Bytes": 1400, "Version": "1.1.0"}, - ] - expected = [ - {"Size_Bytes": 1000, "Delta_Bytes": 0, "Delta": " ", "Version": "1.0.0"}, - {"Size_Bytes": 1400, "Delta_Bytes": 300, "Delta": "300 B", "Version": "1.0.0 -> 1.1.0"}, - ] - trimmed = trim_modules(modules, threshold=200) - assert trimmed == expected - - -def test_get_dependency(): - content = """dep1 @ https://example.com/dep1-1.1.1-.whl -dep2 @ https://example.com/dep2-1.1.2-.whl""" - with patch("builtins.open", mock_open(read_data=content)): - url, version = get_dependency(Path("some") / "path" / "file.txt", "dep2") - assert (url, version) == ("https://example.com/dep2-1.1.2-.whl", "1.1.2") - - -def make_mock_response(size): - mock_response = MagicMock() - mock_response.__enter__.return_value = mock_response - mock_response.headers = {"Content-Length": size} - mock_response.raise_for_status = lambda: None - return mock_response - - -def test_get_dependency_size(): - mock_response = make_mock_response("45678") - with patch("requests.head", return_value=mock_response): - info = get_dependency_size( - "https://example.com/file-1.1.1-.whl", - "1.1.1", - "abc1234", - datetime(2025, 4, 4).date(), - "auth", - "Fixed bug", - True, - ) - assert info == { - "Size_Bytes": 45678, - "Version": "1.1.1", - "Date": datetime(2025, 4, 4).date(), - "Author": "auth", - "Commit_Message": "Fixed bug", - "Commit_SHA": "abc1234", - } - - -def test_get_compressed_dependencies(): - with ( - patch("os.path.exists", return_value=True), - patch("os.path.isdir", return_value=True), - patch("os.path.isfile", return_value=True), - patch("os.listdir", return_value=["linux-x86_64_3.12.txt"]), - patch("ddev.cli.size.timeline.get_dependency", return_value=("https://example.com/dep1.whl", '1.1.1')), - patch("ddev.cli.size.timeline.requests.head", return_value=make_mock_response("12345")), - ): - result = get_dependencies( - "fake_repo", "dep1", "linux-x86_64", "abc1234", datetime(2025, 4, 4).date(), "auth", "Added dep1", True - ) - assert result == { - "Size_Bytes": 12345, - "Version": '1.1.1', - "Date": datetime(2025, 4, 4).date(), - "Author": "auth", - "Commit_Message": "Added dep1", - "Commit_SHA": "abc1234", - } - @pytest.fixture def mock_timeline_gitrepo(): @@ -176,15 +20,15 @@ def mock_timeline_gitrepo(): patch("ddev.cli.size.timeline.compress", return_value=1234), patch("os.walk", return_value=[(Path("/tmp") / "fake_repo" / "int", [], ["file1.py"])]), patch("os.path.exists", return_value=True), - patch("ddev.cli.size.timeline.group_modules", side_effect=lambda m, *_: m), + patch("ddev.cli.size.timeline.format_modules", side_effect=lambda m, *_: m), patch("ddev.cli.size.timeline.trim_modules", side_effect=lambda m, *_: m), patch("ddev.cli.size.timeline.print_table"), patch("ddev.cli.size.timeline.print_csv"), patch("ddev.cli.size.timeline.plot_linegraph"), - patch("os.path.exists", return_value=True), - patch("os.path.isdir", return_value=True), - patch("os.path.isfile", return_value=True), - patch("os.listdir", return_value=["linux-x86_64_3.12_dep1.whl", "linux-x86_64_3.12_dep2.whl"]), + patch( + "ddev.cli.size.timeline.get_valid_platforms", + return_value=({'linux-x86_64', 'macos-x86_64', 'linux-aarch64', 'windows-x86_64'}), + ), ): yield @@ -213,8 +57,8 @@ def mock_timeline_dependencies(): patch("ddev.cli.size.timeline.GitRepo.__exit__", return_value=None), patch("ddev.cli.size.timeline.GitRepo.sparse_checkout_commit"), patch( - "ddev.cli.size.timeline.valid_platforms_versions", - return_value=({'linux-x86_64', 'macos-x86_64', 'linux-aarch64', 'windows-x86_64'}, {'3.12'}), + "ddev.cli.size.timeline.get_valid_platforms", + return_value=({'linux-x86_64', 'macos-x86_64', 'linux-aarch64', 'windows-x86_64'}), ), patch("ddev.cli.size.timeline.get_dependency_list", return_value={"dep1"}), patch("os.path.exists", return_value=True), @@ -222,9 +66,9 @@ def mock_timeline_dependencies(): patch("os.listdir", return_value=["linux-x86_64-3.12"]), patch("os.path.isfile", return_value=True), patch("ddev.cli.size.timeline.get_gitignore_files", return_value=set()), - patch("ddev.cli.size.timeline.get_dependency", return_value=("https://example.com/dep1.whl", '1.1.1)')), + patch("ddev.cli.size.timeline.get_dependency_data", return_value=("https://example.com/dep1.whl", '1.1.1)')), patch("ddev.cli.size.timeline.requests.head") as mock_head, - patch("ddev.cli.size.timeline.group_modules", side_effect=lambda m, *_: m), + patch("ddev.cli.size.timeline.format_modules", side_effect=lambda m, *_: m), patch("ddev.cli.size.timeline.trim_modules", side_effect=lambda m, *_: m), patch("ddev.cli.size.timeline.print_table"), patch("ddev.cli.size.timeline.plot_linegraph"), @@ -264,8 +108,8 @@ def test_timeline_invalid_platform(ddev): with ( patch("ddev.cli.size.timeline.GitRepo", return_value=mock_git_repo), patch( - "ddev.cli.size.timeline.valid_platforms_versions", - return_value=({'linux-x86_64', 'linux-aarch64', 'macos-x86_64'}, {'3.12'}), + "ddev.cli.size.timeline.get_valid_platforms", + return_value=({'linux-x86_64', 'macos-x86_64', 'linux-aarch64', 'windows-x86_64'}), ), ): @@ -296,7 +140,10 @@ def test_timeline_no_changes_in_integration(ddev): patch("os.path.exists", return_value=True), patch("os.path.isdir", return_value=True), patch("os.listdir", return_value=[]), - patch("ddev.cli.size.timeline.valid_platforms_versions", return_value=("", "")), + patch( + "ddev.cli.size.timeline.get_valid_platforms", + return_value=({'linux-x86_64', 'macos-x86_64', 'linux-aarch64', 'windows-x86_64'}), + ), ): result = ddev("size", "timeline", "integration", "int1", "commit1", "commit2", "--compressed") assert result.exit_code != 0 @@ -314,10 +161,9 @@ def test_timeline_integration_not_found(ddev): with ( patch("ddev.cli.size.timeline.GitRepo.__enter__", return_value=mock_repo), patch("ddev.cli.size.timeline.GitRepo.__exit__", return_value=None), - patch("ddev.cli.size.timeline.valid_platforms_versions", return_value=("", "")), patch( - "ddev.cli.size.timeline.valid_platforms_versions", - return_value=({'linux-x86_64', 'macos-x86_64', 'linux-aarch64', 'windows-x86_64'}, {'3.12'}), + "ddev.cli.size.timeline.get_valid_platforms", + return_value=({'linux-x86_64', 'macos-x86_64', 'linux-aarch64', 'windows-x86_64'}), ), patch("ddev.cli.size.timeline.module_exists", return_value=False), ): @@ -337,7 +183,10 @@ def test_timeline_dependency_missing_no_platform(ddev): with ( patch("ddev.cli.size.timeline.GitRepo.__enter__", return_value=mock_repo), patch("ddev.cli.size.timeline.GitRepo.__exit__", return_value=None), - patch("ddev.cli.size.timeline.valid_platforms_versions", return_value=({"linux-x86_64"}, {"3.12"})), + patch( + "ddev.cli.size.timeline.get_valid_platforms", + return_value=({'linux-x86_64', 'macos-x86_64', 'linux-aarch64', 'windows-x86_64'}), + ), patch("ddev.cli.size.timeline.get_dependency_list", return_value=set()), ): result = ddev("size", "timeline", "dependency", "missing_module", "c123456", "c2345667") @@ -356,7 +205,10 @@ def test_timeline_dependency_missing_for_platform(ddev, app): with ( patch("ddev.cli.size.timeline.GitRepo.__enter__", return_value=mock_repo), patch("ddev.cli.size.timeline.GitRepo.__exit__", return_value=None), - patch("ddev.cli.size.timeline.valid_platforms_versions", return_value=({"linux-x86_64"}, {"3.12"})), + patch( + "ddev.cli.size.timeline.get_valid_platforms", + return_value=({'linux-x86_64', 'macos-x86_64', 'linux-aarch64', 'windows-x86_64'}), + ), patch("ddev.cli.size.timeline.get_dependency_list", return_value=set()), ): @@ -389,7 +241,10 @@ def test_timeline_dependency_no_changes(ddev, app): with ( patch("ddev.cli.size.timeline.GitRepo.__enter__", return_value=mock_repo), patch("ddev.cli.size.timeline.GitRepo.__exit__", return_value=None), - patch("ddev.cli.size.timeline.valid_platforms_versions", return_value=({"linux-x86_64"}, {"3.12"})), + patch( + "ddev.cli.size.timeline.get_valid_platforms", + return_value=({'linux-x86_64', 'macos-x86_64', 'linux-aarch64', 'windows-x86_64'}), + ), patch("ddev.cli.size.timeline.get_dependency_list", return_value={"dep1"}), ): @@ -405,5 +260,5 @@ def test_timeline_dependency_no_changes(ddev, app): obj=app, ) - assert result.exit_code != 0 + assert result.exit_code == 0 assert "no changes found" in result.output.lower() diff --git a/ddev/tests/size/__init__.py b/ddev/tests/size/__init__.py new file mode 100644 index 0000000000000..3eff9712cbcf5 --- /dev/null +++ b/ddev/tests/size/__init__.py @@ -0,0 +1,3 @@ +# (C) Datadog, Inc. 2022-present +# All rights reserved +# Licensed under a 3-clause BSD style license (see LICENSE) diff --git a/ddev/tests/cli/size/test_common.py b/ddev/tests/size/test_common.py similarity index 63% rename from ddev/tests/cli/size/test_common.py rename to ddev/tests/size/test_common.py index 285bf77ce66ab..5e10d14f0c730 100644 --- a/ddev/tests/cli/size/test_common.py +++ b/ddev/tests/size/test_common.py @@ -1,20 +1,23 @@ +import json import os from pathlib import Path from unittest.mock import MagicMock, mock_open, patch from ddev.cli.size.common import ( compress, - convert_size, + convert_to_human_readable_size, extract_version_from_about_py, + format_modules, get_dependencies_list, get_dependencies_sizes, get_files, get_gitignore_files, - group_modules, + get_valid_platforms, + get_valid_versions, is_correct_dependency, is_valid_integration, print_csv, - valid_platforms_versions, + print_json, ) @@ -22,7 +25,7 @@ def to_native_path(path: str) -> str: return path.replace("/", os.sep) -def test_valid_platforms_versions(): +def test_get_valid_platforms(): filenames = [ "linux-aarch64_3.12.txt", "linux-aarch64_py2.txt", @@ -39,10 +42,30 @@ def test_valid_platforms_versions(): ] expected_platforms = {"linux-aarch64", "linux-x86_64", "macos-x86_64", "windows-x86_64"} - expected_versions = {"3.12"} with patch("os.listdir", return_value=filenames): - platforms, versions = valid_platforms_versions("fake_repo") + platforms = get_valid_platforms("fake_repo") assert platforms == expected_platforms + + +def test_get_valid_versions(): + filenames = [ + "linux-aarch64_3.12.txt", + "linux-aarch64_py2.txt", + "linux-aarch64_py3.txt", + "linux-x86_64_3.12.txt", + "linux-x86_64_py2.txt", + "linux-x86_64_py3.txt", + "macos-x86_64_3.12.txt", + "macos-x86_64_py2.txt", + "macos-x86_64_py3.txt", + "windows-x86_64_3.12.txt", + "windows-x86_64_py2.txt", + "windows-x86_64_py3.txt", + ] + + expected_versions = {"3.12"} + with patch("os.listdir", return_value=filenames): + versions = get_valid_versions("fake_repo") assert versions == expected_versions @@ -52,11 +75,11 @@ def test_is_correct_dependency(): assert not is_correct_dependency("windows-x86_64", "3.13", "windows-x86_64-3.12") -def test_convert_size(): - assert convert_size(500) == "500 B" - assert convert_size(1024) == "1.0 KB" - assert convert_size(1048576) == "1.0 MB" - assert convert_size(1073741824) == "1.0 GB" +def test_convert_to_human_readable_size(): + assert convert_to_human_readable_size(500) == "500 B" + assert convert_to_human_readable_size(1024) == "1.0 KB" + assert convert_to_human_readable_size(1048576) == "1.0 MB" + assert convert_to_human_readable_size(1073741824) == "1.0 GB" def test_is_valid_integration(): @@ -93,13 +116,13 @@ def test_get_dependencies_sizes(): "Name": "dependency1", "Version": "1.1.1", "Size_Bytes": 12345, - "Size": convert_size(12345), + "Size": convert_to_human_readable_size(12345), "Type": "Dependency", } ] -def test_group_modules(): +def test_format_modules(): modules = [ {"Name": "module1", "Type": "A", "Size_Bytes": 1500}, {"Name": "module2", "Type": "B", "Size_Bytes": 3000}, @@ -124,7 +147,7 @@ def test_group_modules(): }, ] - assert group_modules(modules, platform, version, 0) == expected_output + assert format_modules(modules, platform, version, 0) == expected_output def test_get_files_grouped_and_with_versions(): @@ -152,7 +175,7 @@ def mock_getsize(path): patch("ddev.cli.size.common.get_gitignore_files", return_value=set()), patch("ddev.cli.size.common.is_valid_integration", side_effect=mock_is_valid_integration), patch("ddev.cli.size.common.extract_version_from_about_py", return_value="1.2.3"), - patch("ddev.cli.size.common.convert_size", side_effect=lambda s: f"{s / 1024:.2f} KB"), + patch("ddev.cli.size.common.convert_to_human_readable_size", side_effect=lambda s: f"{s / 1024:.2f} KB"), ): result = get_files(repo_path, compressed=False) @@ -218,6 +241,117 @@ def test_print_csv(): assert actual_calls == expected_calls +def test_print_json_multiple_calls(): + mock_app = MagicMock() + printed_yet = False + n_iterations = 3 + + modules_list = [ + [{"name": "mod1", "size": "100"}], + [{"name": "mod2", "size": "200"}], + [{"name": "mod3", "size": "300"}], + ] + + for i, modules in enumerate(modules_list): + if i != 0: + printed_yet = True + print_json(mock_app, i, n_iterations, printed_yet, modules) + + expected_calls = [ + (("[",),), + (('{"name": "mod1", "size": "100"}',),), + ((",",),), + (('{"name": "mod2", "size": "200"}',),), + ((",",),), + (('{"name": "mod3", "size": "300"}',),), + (("]",),), + ] + + actual_calls = mock_app.display.call_args_list + print(actual_calls) + assert actual_calls == expected_calls + + result = "".join(call[0][0] for call in actual_calls) + parsed = json.loads(result) + assert parsed == [ + {"name": "mod1", "size": "100"}, + {"name": "mod2", "size": "200"}, + {"name": "mod3", "size": "300"}, + ] + + +def test_print_json_no_first(): + mock_app = MagicMock() + printed_yet = False + n_iterations = 3 + + modules_list = [ + [{"name": "", "size": ""}], + [{"name": "mod2", "size": "200"}], + [{"name": "mod3", "size": "300"}], + ] + + for i, modules in enumerate(modules_list): + print_json(mock_app, i, n_iterations, printed_yet, modules) + if i == 1: + printed_yet = True + + expected_calls = [ + (("[",),), + (('{"name": "mod2", "size": "200"}',),), + ((",",),), + (('{"name": "mod3", "size": "300"}',),), + (("]",),), + ] + + actual_calls = mock_app.display.call_args_list + print(actual_calls) + assert actual_calls == expected_calls + + result = "".join(call[0][0] for call in actual_calls) + parsed = json.loads(result) + assert parsed == [ + {"name": "mod2", "size": "200"}, + {"name": "mod3", "size": "300"}, + ] + + +def test_print_json_no_last(): + mock_app = MagicMock() + printed_yet = False + n_iterations = 3 + + modules_list = [ + [{"name": "mod1", "size": "100"}], + [{"name": "mod2", "size": "200"}], + [{"name": "", "size": ""}], + ] + + for i, modules in enumerate(modules_list): + if i != 0: + printed_yet = True + print_json(mock_app, i, n_iterations, printed_yet, modules) + + expected_calls = [ + (("[",),), + (('{"name": "mod1", "size": "100"}',),), + ((",",),), + (('{"name": "mod2", "size": "200"}',),), + (("]",),), + ] + + actual_calls = mock_app.display.call_args_list + print(actual_calls) + assert actual_calls == expected_calls + + result = "".join(call[0][0] for call in actual_calls) + parsed = json.loads(result) + assert parsed == [ + {"name": "mod1", "size": "100"}, + {"name": "mod2", "size": "200"}, + ] + + def test_extract_version_from_about_py_pathlib(): # Usa Path para compatibilidad multiplataforma fake_path = Path("some") / "module" / "__about__.py" diff --git a/ddev/tests/size/test_diff.py b/ddev/tests/size/test_diff.py new file mode 100644 index 0000000000000..f5ff3fc5000c4 --- /dev/null +++ b/ddev/tests/size/test_diff.py @@ -0,0 +1,54 @@ +# (C) Datadog, Inc. 2022-present +# All rights reserved +# Licensed under a 3-clause BSD style license (see LICENSE) + +import os + +from ddev.cli.size.common import convert_to_human_readable_size +from ddev.cli.size.diff import get_diff + + +def to_native_path(path: str) -> str: + return path.replace("/", os.sep) + + +def test_get_diff(): + size_before = [ + {"Name": "foo", "Version": "1.0.0", "Size_Bytes": 1000, "Type": "Integration"}, + {"Name": "bar", "Version": "2.0.0", "Size_Bytes": 2000, "Type": "Integration"}, + {"Name": "deleted", "Version": "3.0.0", "Size_Bytes": 1500, "Type": "Integration"}, + ] + + size_after = [ + {"Name": "foo", "Version": "1.1.0", "Size_Bytes": 1200, "Type": "Integration"}, + {"Name": "bar", "Version": "2.0.0", "Size_Bytes": 2000, "Type": "Integration"}, + {"Name": "new", "Version": "0.1.0", "Size_Bytes": 800, "Type": "Integration"}, + ] + + result = get_diff(size_before, size_after, "Integration") + + expected = [ + { + "Name": "deleted (DELETED)", + "Version": "3.0.0", + "Type": "Integration", + "Size_Bytes": -1500, + "Size": convert_to_human_readable_size(-1500), + }, + { + "Name": "foo", + "Version": "1.0.0 -> 1.1.0", + "Type": "Integration", + "Size_Bytes": 200, + "Size": convert_to_human_readable_size(200), + }, + { + "Name": "new (NEW)", + "Version": "0.1.0", + "Type": "Integration", + "Size_Bytes": 800, + "Size": convert_to_human_readable_size(800), + }, + ] + + assert sorted(result, key=lambda x: x["Name"]) == expected diff --git a/ddev/tests/size/test_timeline.py b/ddev/tests/size/test_timeline.py new file mode 100644 index 0000000000000..0942efc3bb393 --- /dev/null +++ b/ddev/tests/size/test_timeline.py @@ -0,0 +1,158 @@ +import os +from datetime import datetime +from pathlib import Path +from unittest.mock import MagicMock, mock_open, patch + +from ddev.cli.size.timeline import ( + format_commit_data, + get_dependencies, + get_dependency_data, + get_dependency_size, + get_files, + get_version, + trim_modules, +) + + +def test_get_compressed_files(): + with ( + patch("os.walk", return_value=[(os.path.join("fake_repo", "datadog_checks"), [], ["__about__.py"])]), + patch("os.path.relpath", return_value=os.path.join("datadog_checks", "__about__.py")), + patch("os.path.exists", return_value=True), + patch("ddev.cli.size.timeline.get_gitignore_files", return_value=set()), + patch("ddev.cli.size.timeline.is_valid_integration", return_value=True), + patch("ddev.cli.size.timeline.compress", return_value=1234), + patch("ddev.cli.size.timeline.extract_version_from_about_py", return_value='1.1.1'), + ): + result = get_files("fake_repo", "int1", "abc1234", datetime(2025, 4, 4).date(), "auth", "Added int1", [], True) + print(result) + assert result == [ + { + "Size_Bytes": 1234, + "Version": '1.1.1', + "Date": datetime(2025, 4, 4).date(), + "Author": "auth", + "Commit_Message": "Added int1", + "Commit_SHA": "abc1234", + } + ] + + +def test_get_compressed_files_deleted_only(): + repo_path = "fake_repo" + module = "foo" + commit = "abc1234" + date = datetime.strptime("Apr 5 2025", "%b %d %Y").date() + author = "Author" + message = "deleted module" + + with ( + patch("ddev.cli.size.timeline.get_gitignore_files", return_value=set()), + patch("os.walk", return_value=[]), + patch("os.path.relpath", side_effect=lambda path, _: path.replace(f"{repo_path}{os.sep}", "")), + patch("os.path.exists", return_value=False), + ): + file_data = get_files(repo_path, module, commit, date, author, message, [], True) + + assert file_data == [ + { + "Size_Bytes": 0, + "Version": "Deleted", + "Date": date, + "Author": author, + "Commit_Message": "(DELETED) " + message, + "Commit_SHA": commit, + } + ] + + +def test_get_version(): + files = ["linux-x86_64_3.12.txt", "linux-x86_64_3.10.txt"] + version = get_version(files, "linux-x86_64") + assert version == "3.12" + + +def test_format_commit_data(): + date, message, commit = format_commit_data( + "Apr 4 2025", "this is a very long commit message that should be trimmed (#1234)", "abc1234def", "abc1234def" + ) + expected_date = datetime.strptime("Apr 4 2025", "%b %d %Y").date() + expected_message = "(NEW) this is a very long...(#1234)" + expected_commit = "abc1234" + assert date == expected_date + assert message == expected_message + assert commit == expected_commit + + +def test_trim_modules_keep_some_remove_some(): + modules = [ + {"Size_Bytes": 1000, "Version": "1.0.0"}, + {"Size_Bytes": 1100, "Version": "1.0.0"}, + {"Size_Bytes": 1400, "Version": "1.1.0"}, + ] + expected = [ + {"Size_Bytes": 1000, "Delta_Bytes": 0, "Delta": " ", "Version": "1.0.0"}, + {"Size_Bytes": 1400, "Delta_Bytes": 300, "Delta": "300 B", "Version": "1.0.0 -> 1.1.0"}, + ] + trimmed = trim_modules(modules, threshold=200) + assert trimmed == expected + + +def test_get_dependency(): + content = """dep1 @ https://example.com/dep1-1.1.1-.whl +dep2 @ https://example.com/dep2-1.1.2-.whl""" + with patch("builtins.open", mock_open(read_data=content)): + url, version = get_dependency_data(Path("some") / "path" / "file.txt", "dep2") + assert (url, version) == ("https://example.com/dep2-1.1.2-.whl", "1.1.2") + + +def make_mock_response(size): + mock_response = MagicMock() + mock_response.__enter__.return_value = mock_response + mock_response.headers = {"Content-Length": size} + mock_response.raise_for_status = lambda: None + return mock_response + + +def test_get_dependency_size(): + mock_response = make_mock_response("45678") + with patch("requests.head", return_value=mock_response): + info = get_dependency_size( + "https://example.com/file-1.1.1-.whl", + "1.1.1", + "abc1234", + datetime(2025, 4, 4).date(), + "auth", + "Fixed bug", + True, + ) + assert info == { + "Size_Bytes": 45678, + "Version": "1.1.1", + "Date": datetime(2025, 4, 4).date(), + "Author": "auth", + "Commit_Message": "Fixed bug", + "Commit_SHA": "abc1234", + } + + +def test_get_compressed_dependencies(): + with ( + patch("os.path.exists", return_value=True), + patch("os.path.isdir", return_value=True), + patch("os.path.isfile", return_value=True), + patch("os.listdir", return_value=["linux-x86_64_3.12.txt"]), + patch("ddev.cli.size.timeline.get_dependency_data", return_value=("https://example.com/dep1.whl", '1.1.1')), + patch("ddev.cli.size.timeline.requests.head", return_value=make_mock_response("12345")), + ): + result = get_dependencies( + "fake_repo", "dep1", "linux-x86_64", "abc1234", datetime(2025, 4, 4).date(), "auth", "Added dep1", True + ) + assert result == { + "Size_Bytes": 12345, + "Version": '1.1.1', + "Date": datetime(2025, 4, 4).date(), + "Author": "auth", + "Commit_Message": "Added dep1", + "Commit_SHA": "abc1234", + } From 192b71894c5f5d30e0e5f8cd808e5c44511116a9 Mon Sep 17 00:00:00 2001 From: Lucia Sanchez Bella Date: Mon, 5 May 2025 11:33:15 +0200 Subject: [PATCH 39/70] Fix test --- ddev/src/ddev/cli/size/status.py | 1 + ddev/tests/cli/size/test_timeline.py | 2 +- 2 files changed, 2 insertions(+), 1 deletion(-) diff --git a/ddev/src/ddev/cli/size/status.py b/ddev/src/ddev/cli/size/status.py index 7f09ec3558dc2..eedb7481fec00 100644 --- a/ddev/src/ddev/cli/size/status.py +++ b/ddev/src/ddev/cli/size/status.py @@ -79,6 +79,7 @@ def status( status_mode( app, repo_path, plat, ver, compressed, csv, markdown, json, i, path, show_gui, len(combinations) ) + else: status_mode( app, diff --git a/ddev/tests/cli/size/test_timeline.py b/ddev/tests/cli/size/test_timeline.py index 895cf710a9e92..782477366258b 100644 --- a/ddev/tests/cli/size/test_timeline.py +++ b/ddev/tests/cli/size/test_timeline.py @@ -146,7 +146,7 @@ def test_timeline_no_changes_in_integration(ddev): ), ): result = ddev("size", "timeline", "integration", "int1", "commit1", "commit2", "--compressed") - assert result.exit_code != 0 + assert result.exit_code == 0 assert "No changes found" in result.output From 929adcae412349433aee58a4e641fc15f0e8f865 Mon Sep 17 00:00:00 2001 From: Lucia Sanchez Bella Date: Fri, 9 May 2025 09:21:28 +0200 Subject: [PATCH 40/70] simplify code --- ddev/src/ddev/cli/size/common.py | 638 ++++++++++++--------------- ddev/src/ddev/cli/size/diff.py | 175 +++++--- ddev/src/ddev/cli/size/status.py | 134 ++++-- ddev/src/ddev/cli/size/timeline.py | 352 ++++++++++----- ddev/tests/cli/size/test_diff.py | 5 +- ddev/tests/cli/size/test_timeline.py | 3 +- ddev/tests/size/test_common.py | 120 ++--- 7 files changed, 753 insertions(+), 674 deletions(-) diff --git a/ddev/src/ddev/cli/size/common.py b/ddev/src/ddev/cli/size/common.py index 8212b8b385b40..a105574993322 100644 --- a/ddev/src/ddev/cli/size/common.py +++ b/ddev/src/ddev/cli/size/common.py @@ -22,10 +22,6 @@ from ddev.cli.application import Application -''' - Custom typed dictionaries -''' - class FileDataEntry(TypedDict): Name: str # Integration/Dependency name @@ -58,11 +54,46 @@ class CommitEntryPlatformWithDelta(CommitEntryWithDelta): Platform: str # Target platform (e.g. linux-aarch64) +class Parameters(TypedDict): + app: Application + platform: str + version: str + compressed: bool + csv: bool + markdown: bool + json: bool + save_to_png_path: Optional[str] + show_gui: bool + + +class ParametersTimeline(TypedDict): + app: Application + module: str + threshold: Optional[int] + compressed: bool + csv: bool + markdown: bool + json: bool + save_to_png_path: Optional[str] + show_gui: bool + + +class ParametersTimelineIntegration(ParametersTimeline): + type: Literal["integration"] + first_commit: str + platform: None + + +class ParametersTimelineDependency(ParametersTimeline): + type: Literal["dependency"] + first_commit: None + platform: str + + def get_valid_platforms(repo_path: Union[Path, str]) -> Set[str]: """ Extracts the platforms we support from the .deps/resolved file names. """ - resolved_path = os.path.join(repo_path, os.path.join(repo_path, ".deps", "resolved")) platforms = [] for file in os.listdir(resolved_path): @@ -83,30 +114,11 @@ def get_valid_versions(repo_path: Union[Path, str]) -> Set[str]: return set(versions) -def convert_to_human_readable_size(size_bytes: float) -> str: - """ - Converts a size in bytes into a human-readable string (B, KB, MB, GB, or TB) - """ - for unit in [" B", " KB", " MB", " GB"]: - if abs(size_bytes) < 1024: - return str(round(size_bytes, 2)) + unit - size_bytes /= 1024 - return str(round(size_bytes, 2)) + " TB" +def is_correct_dependency(platform: str, version: str, name: str) -> bool: + return platform in name and version in name def is_valid_integration(path: str, included_folder: str, ignored_files: Set[str], git_ignore: List[str]) -> bool: - """ - Determines whether a given file path corresponds to a valid integration file. - - Args: - path: The file path to check. - included_folder: Required subfolder (e.g. 'datadog_checks') that marks valid integrations. - ignored_files: Set of filenames or patterns to exclude. - git_ignore: List of .gitignore patterns to exclude. - - Returns: - True if the file should be considered part of a valid integration, False otherwise. - """ # It is not an integration if path.startswith("."): return False @@ -123,60 +135,270 @@ def is_valid_integration(path: str, included_folder: str, ignored_files: Set[str return True -def is_correct_dependency(platform: str, version: str, name: str) -> bool: +def get_gitignore_files(repo_path: str | Path) -> List[str]: + gitignore_path = os.path.join(repo_path, ".gitignore") + with open(gitignore_path, "r", encoding="utf-8") as file: + gitignore_content = file.read() + ignored_patterns = [ + line.strip() for line in gitignore_content.splitlines() if line.strip() and not line.startswith("#") + ] + return ignored_patterns + + +def convert_to_human_readable_size(size_bytes: float) -> str: + for unit in [" B", " KB", " MB", " GB"]: + if abs(size_bytes) < 1024: + return str(round(size_bytes, 2)) + unit + size_bytes /= 1024 + return str(round(size_bytes, 2)) + " TB" + + +def compress(file_path: str) -> int: + compressor = zlib.compressobj() + compressed_size = 0 + # original_size = os.path.getsize(file_path) + with open(file_path, "rb") as f: + while chunk := f.read(8192): # Read in 8KB chunks + compressed_chunk = compressor.compress(chunk) + compressed_size += len(compressed_chunk) + compressed_size += len(compressor.flush()) + return compressed_size + + +def get_files(repo_path: str | Path, compressed: bool) -> List[FileDataEntry]: """ - Checks whether a dependency filename matches a given platform and Python version. + Calculates integration file sizes and versions from a repository. """ + ignored_files = {"datadog_checks_dev", "datadog_checks_tests_helper"} + git_ignore = get_gitignore_files(repo_path) + included_folder = "datadog_checks/" - return platform in name and version in name + integration_sizes: Dict[str, int] = {} + integration_versions: Dict[str, str] = {} + for root, _, files in os.walk(repo_path): + for file in files: + file_path = os.path.join(root, file) + relative_path = os.path.relpath(file_path, repo_path) -def print_json( - app: Application, - i: Optional[int], - n_iterations: Optional[int], - printed_yet: bool, - modules: ( - List[FileDataEntry] - | List[FileDataEntryPlatformVersion] - | List[CommitEntryWithDelta] - | List[CommitEntryPlatformWithDelta] - ), -) -> None: + if not is_valid_integration(relative_path, included_folder, ignored_files, git_ignore): + continue + path = Path(relative_path) + parts = path.parts + + integration_name = parts[0] + + size = compress(file_path) if compressed else os.path.getsize(file_path) + integration_sizes[integration_name] = integration_sizes.get(integration_name, 0) + size + + if integration_name not in integration_versions and file == "__about__.py": + version = extract_version_from_about_py(file_path) + integration_versions[integration_name] = version + + return [ + { + "Name": name, + "Version": integration_versions.get(name, ""), + "Size_Bytes": size, + "Size": convert_to_human_readable_size(size), + "Type": "Integration", + } + for name, size in integration_sizes.items() + ] + + +def extract_version_from_about_py(path: str) -> str: + """ + Extracts the __version__ string from a given __about__.py file. + """ + try: + with open(path, "r", encoding="utf-8") as f: + for line in f: + line = line.strip() + if line.startswith("__version__"): + return line.split("=")[1].strip().strip("'\"") + except Exception: + pass + return "" + + +def get_dependencies(repo_path: str | Path, platform: str, version: str, compressed: bool) -> List[FileDataEntry]: + """ + Gets the list of dependencies for a given platform and Python version. + Each FileDataEntry includes: Name, Version, Size_Bytes, Size, and Type. + + Args: + repo_path: Path to the repository. + platform: Target platform. + version: Target Python version. + compressed: If True, measure compressed file sizes. If False, measure uncompressed sizes. + + Returns: + A list of FileDataEntry dictionaries containing the dependency information. + """ + resolved_path = os.path.join(repo_path, os.path.join(repo_path, ".deps", "resolved")) + + for filename in os.listdir(resolved_path): + file_path = os.path.join(resolved_path, filename) + + if os.path.isfile(file_path) and is_correct_dependency(platform, version, filename): + deps, download_urls, versions = get_dependencies_list(file_path) + return get_dependencies_sizes(deps, download_urls, versions, compressed) + return [] + + +def get_dependencies_list(file_path: str) -> Tuple[List[str], List[str], List[str]]: + """ + Parses a dependency file and extracts the dependency names, download URLs, and versions. """ - Prints a list of data entries as part of a JSON array. + download_urls = [] + deps = [] + versions = [] + with open(file_path, "r", encoding="utf-8") as file: + file_content = file.read() + for line in file_content.splitlines(): + match = re.search(r"([\w\-\d\.]+) @ (https?://[^\s#]+)", line) + if not match: + raise WrongDependencyFormat("The dependency format 'name @ link' is no longer supported.") + name = match.group(1) + url = match.group(2) - This function is designed to be called multiple times, and ensures that: - - The opening bracket "[" is printed only once at the start (when i is None or 0). - - Each valid entry is printed on a separate line using JSON format. - - Commas are inserted appropriately between entries, but not before the first one. - - The closing bracket "]" is printed only at the final call (when i == n_iterations - 1). + deps.append(name) + download_urls.append(url) + version_match = re.search(rf"{re.escape(name)}-([0-9]+(?:\.[0-9]+)*)-", url) + if version_match: + versions.append(version_match.group(1)) + + return deps, download_urls, versions + + +def get_dependencies_sizes( + deps: List[str], download_urls: List[str], versions: List[str], compressed: bool +) -> List[FileDataEntry]: + """ + Calculates the sizes of dependencies, either compressed or uncompressed. Args: - app: Application instance used to display output. - i: Index of the current batch of data being printed. If None or 0, this is the first chunk. - n_iterations: Total number of iterations (chunks). Used to detect the last chunk. - printed_yet: Whether at least one entry has already been printed before this call. - modules: List of dictionaries to print. Only non-empty entries are included. + deps: List of dependency names. + download_urls: Corresponding download URLs for the dependencies. + versions: Corresponding version strings for the dependencies. + compressed: If True, use the Content-Length from the HTTP headers. + If False, download, extract, and compute actual uncompressed size. """ + file_data: List[FileDataEntry] = [] + for dep, url, version in zip(deps, download_urls, versions, strict=False): + if compressed: + response = requests.head(url) + response.raise_for_status() + size_str = response.headers.get("Content-Length") + if size_str is None: + raise ValueError(f"Missing size for {dep}") + size = int(size_str) + + else: + with requests.get(url, stream=True) as response: + response.raise_for_status() + wheel_data = response.content - if not i: - app.display("[") + with tempfile.TemporaryDirectory() as tmpdir: + wheel_path = Path(tmpdir) / "package.whl" + with open(wheel_path, "wb") as f: + f.write(wheel_data) + extract_path = Path(tmpdir) / "extracted" + with zipfile.ZipFile(wheel_path, "r") as zip_ref: + zip_ref.extractall(extract_path) - for idx, row in enumerate(modules): + size = 0 + for dirpath, _, filenames in os.walk(extract_path): + for name in filenames: + file_path = os.path.join(dirpath, name) + size += os.path.getsize(file_path) + file_data.append( + { + "Name": str(dep), + "Version": version, + "Size_Bytes": int(size), + "Size": convert_to_human_readable_size(size), + "Type": "Dependency", + } + ) + + return file_data + + +def format_modules( + modules: List[FileDataEntry], + platform: str, + py_version: str, + multiple_plats_and_vers: bool, +) -> List[FileDataEntryPlatformVersion] | List[FileDataEntry]: + """ + Formats the modules list, adding platform and Python version information if needed. + + If the modules list is empty, returns a default empty entry (with or without platform information). + + Args: + modules: List of modules to format. + platform: Platform string to add to each entry if needed. + version: Python version string to add to each entry if needed. + i: Index of the current (platform, version) combination being processed. + If None, it means the data is being processed for only one combination of platform and version. + + Returns: + A list of formatted entries. + """ + if modules == [] and not multiple_plats_and_vers: + empty_entry: FileDataEntry = { + "Name": "", + "Version": "", + "Size_Bytes": 0, + "Size": "", + "Type": "", + } + return [empty_entry] + elif modules == []: + empty_entry_with_platform: FileDataEntryPlatformVersion = { + "Name": "", + "Version": "", + "Size_Bytes": 0, + "Size": "", + "Type": "", + "Platform": "", + "Python_Version": "", + } + return [empty_entry_with_platform] + elif multiple_plats_and_vers: + new_modules: List[FileDataEntryPlatformVersion] = [ + {**entry, "Platform": platform, "Python_Version": py_version} for entry in modules + ] + return new_modules + else: + return modules + + +def print_json( + app: Application, + modules: ( + List[FileDataEntry] + | List[FileDataEntryPlatformVersion] + | List[CommitEntryWithDelta] + | List[CommitEntryPlatformWithDelta] + ), +) -> None: + printed_yet = False + app.display("[") + for row in modules: if any(str(value).strip() not in ("", "0", "0001-01-01") for value in row.values()): - if printed_yet or (i != 0 and idx != 0): + if printed_yet: app.display(",") app.display(json.dumps(row, default=str)) printed_yet = True - if not n_iterations or i == n_iterations - 1: - app.display("]") + app.display("]") def print_csv( app: Application, - i: Optional[int], modules: ( List[FileDataEntry] | List[FileDataEntryPlatformVersion] @@ -184,20 +406,8 @@ def print_csv( | List[CommitEntryPlatformWithDelta] ), ) -> None: - """ - Prints a list of data entries in CSV format. - - This function is designed to be called multiple times, and ensures that: - - The headers are printed only once at the start (when i is None or 0). - - Each valid entry is printed on a separate line using CSV format. - Args: - app: Application instance used to display output. - i: Index of the current batch of data being printed. If None or 0, this is the first chunk. - modules: List of dictionaries to print. Only non-empty entries are included. - """ headers = [k for k in modules[0].keys() if k not in ["Size", "Delta"]] - if not i: - app.display(",".join(headers)) + app.display(",".join(headers)) for row in modules: if any(str(value).strip() not in ("", "0", "0001-01-01") for value in row.values()): @@ -206,7 +416,7 @@ def print_csv( def format(s: str) -> str: """ - Adds brackets to a value if it has a comma inside for the CSV + Wraps the string in double quotes if it contains a comma, for safe CSV formatting. """ return f'"{s}"' if "," in s else s @@ -221,18 +431,14 @@ def print_markdown( | List[CommitEntryPlatformWithDelta] ), ) -> None: - """ - Prints a list of entries as a Markdown table. - Only non-empty tables are printed. - """ if any(str(value).strip() not in ("", "0", "0001-01-01") for value in modules[0].values()): # table is not empty headers = [k for k in modules[0].keys() if "Bytes" not in k] - app.display(f"### {title}") - app.display("| " + " | ".join(headers) + " |") - app.display("| " + " | ".join("---" for _ in headers) + " |") + app.display_markdown(f"### {title}") + app.display_markdown("| " + " | ".join(headers) + " |") + app.display_markdown("| " + " | ".join("---" for _ in headers) + " |") for row in modules: - app.display("| " + " | ".join(format(str(row.get(h, ""))) for h in headers) + " |") + app.display_markdown("| " + " | ".join(format(str(row.get(h, ""))) for h in headers) + " |") def print_table( @@ -245,11 +451,6 @@ def print_table( | List[CommitEntryPlatformWithDelta] ), ) -> None: - """ - Prints a list of entries as a Rich table. - Only non-empty tables are printed. - """ - # if any(str(value).strip() not in ("", "0", "0001-01-01") for value in modules[0].values()): # table is not empty columns = [col for col in modules[0].keys() if "Bytes" not in col] modules_table: Dict[str, Dict[int, str]] = {col: {} for col in columns} for i, row in enumerate(modules): @@ -466,267 +667,6 @@ def rescale_intensity(val, min_val=0.3, max_val=0.8): plt.savefig(path, bbox_inches="tight", format="png") -def get_dependencies_sizes( - deps: List[str], download_urls: List[str], versions: List[str], compressed: bool -) -> List[FileDataEntry]: - """ - Calculates the sizes of dependencies, either compressed or uncompressed. - - Args: - deps: List of dependency names. - download_urls: Corresponding download URLs for the dependencies. - versions: Corresponding version strings for the dependencies. - compressed: If True, use the Content-Length from the HTTP headers. - If False, download, extract, and compute actual uncompressed size. - - Returns: - A list of FileDataEntry dictionaries with name, version, size in bytes, and human-readable size. - """ - file_data: List[FileDataEntry] = [] - for dep, url, version in zip(deps, download_urls, versions, strict=False): - if compressed: - response = requests.head(url) - response.raise_for_status() - size_str = response.headers.get("Content-Length") - if size_str is None: - raise ValueError(f"Missing size for {dep}") - size = int(size_str) - - else: - with requests.get(url, stream=True) as response: - response.raise_for_status() - wheel_data = response.content - - with tempfile.TemporaryDirectory() as tmpdir: - wheel_path = Path(tmpdir) / "package.whl" - with open(wheel_path, "wb") as f: - f.write(wheel_data) - extract_path = Path(tmpdir) / "extracted" - with zipfile.ZipFile(wheel_path, "r") as zip_ref: - zip_ref.extractall(extract_path) - - size = 0 - for dirpath, _, filenames in os.walk(extract_path): - for name in filenames: - file_path = os.path.join(dirpath, name) - size += os.path.getsize(file_path) - file_data.append( - { - "Name": str(dep), - "Version": version, - "Size_Bytes": int(size), - "Size": convert_to_human_readable_size(size), - "Type": "Dependency", - } - ) - - return file_data - - -def get_files(repo_path: str | Path, compressed: bool) -> List[FileDataEntry]: - """ - Calculates integration file sizes and versions from a repository. - - Args: - repo_path: Path to the repository root. - compressed: If True, measure compressed file sizes. If False, measure uncompressed sizes. - - Returns: - A list of FileDataEntry dictionaries with name, version, size in bytes, and human-readable size. - """ - ignored_files = {"datadog_checks_dev", "datadog_checks_tests_helper"} - git_ignore = get_gitignore_files(repo_path) - included_folder = "datadog_checks/" - - integration_sizes: Dict[str, int] = {} - integration_versions: Dict[str, str] = {} - - for root, _, files in os.walk(repo_path): - for file in files: - file_path = os.path.join(root, file) - relative_path = os.path.relpath(file_path, repo_path) - - if not is_valid_integration(relative_path, included_folder, ignored_files, git_ignore): - continue - path = Path(relative_path) - parts = path.parts - - integration_name = parts[0] - - size = compress(file_path) if compressed else os.path.getsize(file_path) - integration_sizes[integration_name] = integration_sizes.get(integration_name, 0) + size - - if integration_name not in integration_versions and file == "__about__.py": - version = extract_version_from_about_py(file_path) - integration_versions[integration_name] = version - - return [ - { - "Name": name, - "Version": integration_versions.get(name, ""), - "Size_Bytes": size, - "Size": convert_to_human_readable_size(size), - "Type": "Integration", - } - for name, size in integration_sizes.items() - ] - - -def get_dependencies_list(file_path: str) -> Tuple[List[str], List[str], List[str]]: - """ - Parses a dependency file and extracts the dependency names, download URLs, and versions. - - Args: - file_path: Path to the file containing the dependencies. - - Returns: - A tuple of three lists: - - List of dependency names - - List of download URLs - - List of extracted version strings - """ - download_urls = [] - deps = [] - versions = [] - with open(file_path, "r", encoding="utf-8") as file: - file_content = file.read() - for line in file_content.splitlines(): - match = re.search(r"([\w\-\d\.]+) @ (https?://[^\s#]+)", line) - if not match: - raise WrongDependencyFormat("The dependency format 'name @ link' is no longer supported.") - name = match.group(1) - url = match.group(2) - - deps.append(name) - download_urls.append(url) - version_match = re.search(rf"{re.escape(name)}-([0-9]+(?:\.[0-9]+)*)-", url) - if version_match: - versions.append(version_match.group(1)) - - return deps, download_urls, versions - - -def format_modules( - modules: List[FileDataEntry], platform: str, version: str, i: Optional[int] -) -> List[FileDataEntryPlatformVersion] | List[FileDataEntry]: - """ - Formats the modules list, adding platform and Python version information if needed. - - If the modules list is empty, returns a default empty entry (with or without platform information). - - Args: - modules: List of modules to format. - platform: Platform string to add to each entry if needed. - version: Python version string to add to each entry if needed. - i: Index of the current (platform, version) combination being processed. - If None, it means the data is being processed for only one combination of platform and version. - - Returns: - A list of formatted entries. - """ - if modules == [] and i is None: - empty_entry: FileDataEntry = { - "Name": "", - "Version": "", - "Size_Bytes": 0, - "Size": "", - "Type": "", - } - return [empty_entry] - elif modules == []: - empty_entry_with_platform: FileDataEntryPlatformVersion = { - "Name": "", - "Version": "", - "Size_Bytes": 0, - "Size": "", - "Type": "", - "Platform": "", - "Python_Version": "", - } - return [empty_entry_with_platform] - elif i is not None: - new_modules: List[FileDataEntryPlatformVersion] = [ - {**entry, "Platform": platform, "Python_Version": version} for entry in modules - ] - return new_modules - else: - return modules - - -def extract_version_from_about_py(path: str) -> str: - """ - Extracts the __version__ string from a given __about__.py file. - - Args: - path: Path to the __about__.py file. - - Returns: - The extracted version string if found, otherwise an empty string. - """ - try: - with open(path, "r", encoding="utf-8") as f: - for line in f: - line = line.strip() - if line.startswith("__version__"): - return line.split("=")[1].strip().strip("'\"") - except Exception: - pass - return "" - - -def get_dependencies(repo_path: str | Path, platform: str, version: str, compressed: bool) -> List[FileDataEntry]: - """ - Gets the list of dependencies for a given platform and Python version. - Each FileDataEntry includes: Name, Version, Size_Bytes, Size, and Type. - - Args: - repo_path: Path to the repository. - platform: Target platform. - version: Target Python version. - compressed: If True, measure compressed file sizes. If False, measure uncompressed sizes. - - Returns: - A list of FileDataEntry dictionaries containing the dependency information. - """ - resolved_path = os.path.join(repo_path, os.path.join(repo_path, ".deps", "resolved")) - - for filename in os.listdir(resolved_path): - file_path = os.path.join(resolved_path, filename) - - if os.path.isfile(file_path) and is_correct_dependency(platform, version, filename): - deps, download_urls, versions = get_dependencies_list(file_path) - return get_dependencies_sizes(deps, download_urls, versions, compressed) - return [] - - -def get_gitignore_files(repo_path: str | Path) -> List[str]: - """ - Returns the list of non-commented files from the .gitignore file. - """ - gitignore_path = os.path.join(repo_path, ".gitignore") - with open(gitignore_path, "r", encoding="utf-8") as file: - gitignore_content = file.read() - ignored_patterns = [ - line.strip() for line in gitignore_content.splitlines() if line.strip() and not line.startswith("#") - ] - return ignored_patterns - - -def compress(file_path: str) -> int: - ''' - Returns the compressed size (in bytes) of a file using zlib - ''' - compressor = zlib.compressobj() - compressed_size = 0 - # original_size = os.path.getsize(file_path) - with open(file_path, "rb") as f: - while chunk := f.read(8192): # Read in 8KB chunks - compressed_chunk = compressor.compress(chunk) - compressed_size += len(compressed_chunk) - compressed_size += len(compressor.flush()) - return compressed_size - - class WrongDependencyFormat(Exception): def __init__(self, mensaje: str) -> None: super().__init__(mensaje) @@ -814,9 +754,9 @@ def get_commit_metadata(self, commit: str) -> Tuple[str, str, str]: return date, author, message def get_creation_commit_module(self, integration: str) -> str: - ''' + """ Returns the first commit (SHA) where the given integration was introduced. - ''' + """ return self._run(f'git log --reverse --format="%H" -- {integration}')[0] def __exit__( diff --git a/ddev/src/ddev/cli/size/diff.py b/ddev/src/ddev/cli/size/diff.py index 86f4dadd49ec9..489cebdb4ae8b 100644 --- a/ddev/src/ddev/cli/size/diff.py +++ b/ddev/src/ddev/cli/size/diff.py @@ -4,7 +4,7 @@ import os from datetime import datetime -from typing import List, Optional, Tuple, cast +from typing import List, Literal, Optional, Tuple, cast, overload import click from rich.console import Console @@ -14,7 +14,9 @@ from .common import ( FileDataEntry, + FileDataEntryPlatformVersion, GitRepo, + Parameters, convert_to_human_readable_size, format_modules, get_dependencies, @@ -28,7 +30,7 @@ print_table, ) -console = Console() +console = Console(stderr=True) MINIMUM_DATE = datetime.strptime("Sep 17 2024", "%b %d %Y").date() @@ -72,6 +74,7 @@ def diff( BarColumn(), TimeElapsedColumn(), transient=True, + console=console, ) as progress: task = progress.add_task("[cyan]Calculating differences...", total=None) if sum([csv, markdown, json]) > 1: @@ -88,6 +91,7 @@ def diff( raise click.BadParameter("Commit hashes must be different") repo_url = app.repo.path + with GitRepo(repo_url) as gitRepo: try: date_str, _, _ = gitRepo.get_commit_metadata(first_commit) @@ -101,115 +105,140 @@ def diff( elif version and version not in valid_versions: raise ValueError(f"Invalid version: {version}") if platform is None or version is None: + modules_plat_ver: List[FileDataEntryPlatformVersion] = [] platforms = valid_platforms if platform is None else [platform] versions = valid_versions if version is None else [version] progress.remove_task(task) - printed_yet = False combinations = [(p, v) for p in platforms for v in versions] - for i, (plat, ver) in enumerate(combinations): + for plat, ver in combinations: path = None if save_to_png_path: base, ext = os.path.splitext(save_to_png_path) path = f"{base}_{plat}_{ver}{ext}" - - printed_yet = diff_mode( - app, + parameters: Parameters = { + "app": app, + "platform": plat, + "version": ver, + "compressed": compressed, + "csv": csv, + "markdown": markdown, + "json": json, + "save_to_png_path": path, + "show_gui": show_gui, + } + multiple_plats_and_vers: Literal[True] = True + modules_plat_ver.extend( + diff_mode( + gitRepo, + first_commit, + second_commit, + parameters, + progress, + multiple_plats_and_vers, + ) + ) + if csv: + print_csv(app, modules_plat_ver) + elif json: + print_json(app, modules_plat_ver) + else: + progress.remove_task(task) + modules: List[FileDataEntry] = [] + multiple_plat_and_ver: Literal[False] = False + base_parameters: Parameters = { + "app": app, + "platform": platform, + "version": version, + "compressed": compressed, + "csv": csv, + "markdown": markdown, + "json": json, + "save_to_png_path": save_to_png_path, + "show_gui": show_gui, + } + modules.extend( + diff_mode( gitRepo, first_commit, second_commit, - plat, - ver, - compressed, - csv, - markdown, - json, - i, + base_parameters, progress, - path, - show_gui, - len(combinations), - printed_yet, + multiple_plat_and_ver, ) - else: - progress.remove_task(task) - - diff_mode( - app, - gitRepo, - first_commit, - second_commit, - platform, - version, - compressed, - csv, - markdown, - json, - None, - progress, - save_to_png_path, - show_gui, - None, - False, ) + if csv: + print_csv(app, modules) + elif json: + print_json(app, modules) except Exception as e: progress.stop() app.abort(str(e)) return None +@overload def diff_mode( - app: Application, gitRepo: GitRepo, first_commit: str, second_commit: str, - platform: str, - version: str, - compressed: bool, - csv: bool, - markdown: bool, - json: bool, - i: Optional[int], + params: Parameters, progress: Progress, - save_to_png_path: Optional[str], - show_gui: bool, - n_iterations: Optional[int], - printed_yet: bool, -) -> bool: + multiple_plats_and_vers: Literal[True], +) -> List[FileDataEntryPlatformVersion]: ... +@overload +def diff_mode( + gitRepo: GitRepo, + first_commit: str, + second_commit: str, + params: Parameters, + progress: Progress, + multiple_plats_and_vers: Literal[False], +) -> List[FileDataEntry]: ... +def diff_mode( + gitRepo: GitRepo, + first_commit: str, + second_commit: str, + params: Parameters, + progress: Progress, + multiple_plats_and_vers: bool, +) -> List[FileDataEntryPlatformVersion] | List[FileDataEntry]: files_b, dependencies_b, files_a, dependencies_a = get_repo_info( - gitRepo, platform, version, first_commit, second_commit, compressed, progress + gitRepo, params["platform"], params["version"], first_commit, second_commit, params["compressed"], progress ) integrations = get_diff(files_b, files_a, "Integration") dependencies = get_diff(dependencies_b, dependencies_a, "Dependency") - if integrations + dependencies == [] and not csv and not json: - app.display(f"No size differences were detected between the selected commits for {platform}") + + if integrations + dependencies == [] and not params["csv"] and not params["json"]: + params["app"].display( + f"No size differences were detected between the selected commits for {params['platform']}" + ) else: - formated_modules = format_modules(integrations + dependencies, platform, version, i) - formated_modules.sort(key=lambda x: abs(cast(int, x["Size_Bytes"])), reverse=True) - for module in formated_modules: + formatted_modules = format_modules( + integrations + dependencies, params["platform"], params["version"], multiple_plats_and_vers + ) + formatted_modules.sort(key=lambda x: abs(cast(int, x["Size_Bytes"])), reverse=True) + for module in formatted_modules: if module["Size_Bytes"] > 0: module["Size"] = f"+{module['Size']}" - if csv: - print_csv(app, i, formated_modules) - elif json: - print_json(app, i, n_iterations, printed_yet, formated_modules) - elif markdown: - print_markdown(app, "Differences between selected commits", formated_modules) - else: - print_table(app, "Differences between selected commits", formated_modules) - if show_gui or save_to_png_path: + if params["markdown"]: + print_markdown(params["app"], "Differences between selected commits", formatted_modules) + elif not params["csv"] and not params["json"]: + print_table(params["app"], "Differences between selected commits", formatted_modules) + + if params["show_gui"] or params["save_to_png_path"]: plot_treemap( - formated_modules, - f"Disk Usage Differences for {platform} and Python version {version}", - show_gui, + formatted_modules, + f"Disk Usage Differences for {params['platform']} and Python version {params['version']}", + params["show_gui"], "diff", - save_to_png_path, + params["save_to_png_path"], ) - if integrations + dependencies != []: - printed_yet = True - return printed_yet + return formatted_modules + + return [] def get_repo_info( diff --git a/ddev/src/ddev/cli/size/status.py b/ddev/src/ddev/cli/size/status.py index eedb7481fec00..ccb29e1289e91 100644 --- a/ddev/src/ddev/cli/size/status.py +++ b/ddev/src/ddev/cli/size/status.py @@ -2,9 +2,9 @@ # All rights reserved # Licensed under a 3-clause BSD style license (see LICENSE) -import os +import os # noqa: I001 from pathlib import Path -from typing import Optional +from typing import List, Optional, Literal, overload import click from rich.console import Console @@ -12,6 +12,9 @@ from ddev.cli.application import Application from .common import ( + FileDataEntry, + FileDataEntryPlatformVersion, + Parameters, format_modules, get_dependencies, get_files, @@ -24,7 +27,7 @@ print_table, ) -console = Console() +console = Console(stderr=True) @click.command() @@ -67,72 +70,107 @@ def status( raise ValueError(f"Invalid platform: {platform}") elif version and version not in valid_versions: raise ValueError(f"Invalid version: {version}") + if platform is None or version is None: + modules_plat_ver: List[FileDataEntryPlatformVersion] = [] platforms = valid_platforms if platform is None else [platform] versions = valid_versions if version is None else [version] combinations = [(p, v) for p in platforms for v in versions] - for i, (plat, ver) in enumerate(combinations): + for plat, ver in combinations: + multiple_plats_and_vers: Literal[True] = True path = None if save_to_png_path: base, ext = os.path.splitext(save_to_png_path) path = f"{base}_{plat}_{ver}{ext}" - status_mode( - app, repo_path, plat, ver, compressed, csv, markdown, json, i, path, show_gui, len(combinations) + parameters: Parameters = { + "app": app, + "platform": plat, + "version": ver, + "compressed": compressed, + "csv": csv, + "markdown": markdown, + "json": json, + "save_to_png_path": path, + "show_gui": show_gui, + } + modules_plat_ver.extend( + status_mode( + repo_path, + parameters, + multiple_plats_and_vers, + ) ) - + if csv: + print_csv(app, modules_plat_ver) + elif json: + print_json(app, modules_plat_ver) else: - status_mode( - app, - repo_path, - platform, - version, - compressed, - csv, - markdown, - json, - None, - save_to_png_path, - show_gui, - None, + modules: List[FileDataEntry] = [] + multiple_plat_and_ver: Literal[False] = False + base_parameters: Parameters = { + "app": app, + "platform": platform, + "version": version, + "compressed": compressed, + "csv": csv, + "markdown": markdown, + "json": json, + "save_to_png_path": save_to_png_path, + "show_gui": show_gui, + } + modules.extend( + status_mode( + repo_path, + base_parameters, + multiple_plat_and_ver, + ) ) + if csv: + print_csv(app, modules) + elif json: + print_json(app, modules) except Exception as e: app.abort(str(e)) +@overload def status_mode( - app: Application, repo_path: Path, - platform: str, - version: str, - compressed: bool, - csv: bool, - markdown: bool, - json: bool, - i: Optional[int], - save_to_png_path: Optional[str], - show_gui: bool, - n_iterations: Optional[int], -) -> None: + params: Parameters, + multiple_plats_and_vers: Literal[True], +) -> List[FileDataEntryPlatformVersion]: ... +@overload +def status_mode( + repo_path: Path, + params: Parameters, + multiple_plats_and_vers: Literal[False], +) -> List[FileDataEntry]: ... +def status_mode( + repo_path: Path, + params: Parameters, + multiple_plats_and_vers: bool, +) -> List[FileDataEntryPlatformVersion] | List[FileDataEntry]: with console.status("[cyan]Calculating sizes...", spinner="dots"): - modules = get_files(repo_path, compressed) + get_dependencies(repo_path, platform, version, compressed) - formated_modules = format_modules(modules, platform, version, i) - formated_modules.sort(key=lambda x: x["Size_Bytes"], reverse=True) + modules = get_files(repo_path, params["compressed"]) + get_dependencies( + repo_path, params["platform"], params["version"], params["compressed"] + ) - if csv: - print_csv(app, i, formated_modules) - elif json: - print_json(app, i, n_iterations, False, formated_modules) - elif markdown: - print_markdown(app, "Status", formated_modules) - else: - print_table(app, "Status", formated_modules) + formatted_modules = format_modules(modules, params["platform"], params["version"], multiple_plats_and_vers) + formatted_modules.sort(key=lambda x: x["Size_Bytes"], reverse=True) - if show_gui or save_to_png_path: + if params["markdown"]: + print_markdown(params["app"], "Status", formatted_modules) + elif not params["csv"] and not params["json"]: + print_table(params["app"], "Status", formatted_modules) + + if params["show_gui"] or params["save_to_png_path"]: plot_treemap( - formated_modules, - f"Disk Usage Status for {platform} and Python version {version}", - show_gui, + formatted_modules, + f"Disk Usage Status for {params['platform']} and Python version {params['version']}", + params["show_gui"], "status", - save_to_png_path, + params["save_to_png_path"], ) + + return formatted_modules diff --git a/ddev/src/ddev/cli/size/timeline.py b/ddev/src/ddev/cli/size/timeline.py index d88daa4fd9df2..be190f880b1d9 100644 --- a/ddev/src/ddev/cli/size/timeline.py +++ b/ddev/src/ddev/cli/size/timeline.py @@ -4,7 +4,7 @@ import zipfile from datetime import date, datetime from pathlib import Path -from typing import List, Optional, Set, Tuple +from typing import List, Literal, Optional, Set, Tuple, overload import click import matplotlib.pyplot as plt @@ -19,6 +19,8 @@ CommitEntryPlatformWithDelta, CommitEntryWithDelta, GitRepo, + ParametersTimelineDependency, + ParametersTimelineIntegration, WrongDependencyFormat, compress, convert_to_human_readable_size, @@ -36,7 +38,7 @@ MINIMUM_DATE_DEPENDENCIES = datetime.strptime( "Apr 3 2024", "%b %d %Y" ).date() # Dependencies not available before this date due to a storage change -console = Console() +console = Console(stderr=True) @click.command() @@ -94,9 +96,12 @@ def timeline( BarColumn(), TimeElapsedColumn(), transient=True, + console=console, ) as progress: module = name # module is the name of the integration or the dependency - if initial_commit and final_commit and len(initial_commit) < 7 and len(final_commit) < 7: + if sum([csv, markdown, json]) > 1: + raise click.BadParameter("Only one output format can be selected: --csv, --markdown, or --json") + elif initial_commit and final_commit and len(initial_commit) < 7 and len(final_commit) < 7: raise click.BadParameter("Commit hashes must be at least 7 characters long") elif initial_commit and len(initial_commit) < 7: raise click.BadParameter("Initial commit hash must be at least 7 characters long.", param_hint="initial") @@ -106,6 +111,7 @@ def timeline( raise click.BadParameter("Commit hashes must be different") task = progress.add_task("[cyan]Calculating timeline...", total=None) url = app.repo.path + with GitRepo(url) as gitRepo: try: if final_commit and type == "dependency": @@ -120,14 +126,14 @@ def timeline( commits = gitRepo.get_module_commits(folder, initial_commit, final_commit, time) first_commit = gitRepo.get_creation_commit_module(module) gitRepo.checkout_commit(commits[-1]) - if type == 'dependency': + if type == "dependency": valid_platforms = get_valid_platforms(gitRepo.repo_dir) if platform and platform not in valid_platforms: raise ValueError(f"Invalid platform: {platform}") if commits == [""] and type == "integration" and module_exists(gitRepo.repo_dir, module): progress.remove_task(task) progress.stop() - app.display(f"No changes found for {type}: {module}") + app.display_error(f"No changes found for {type}: {module}") return elif commits == [""] and type == "integration" and not module_exists(gitRepo.repo_dir, module): raise ValueError(f"Integration {module} not found in latest commit, is the name correct?") @@ -149,113 +155,197 @@ def timeline( elif type == "dependency" and commits == [""]: progress.remove_task(task) progress.stop() - - app.display(f"No changes found for {type}: {module}") + app.display_error(f"No changes found for {type}: {module}") return - printed_yet = False - if type == "dependency" and platform is None: + if type == "dependency": + modules_plat: List[CommitEntryPlatformWithDelta] = [] + multiple_plats_and_vers: Literal[True] = True + progress.remove_task(task) + dep_parameters: ParametersTimelineDependency + if not platform: + for plat in valid_platforms: + path = None + if save_to_png_path: + base, ext = os.path.splitext(save_to_png_path) + path = f"{base}_{plat}{ext}" + dep_parameters = { + "app": app, + "type": "dependency", + "module": module, + "threshold": threshold, + "platform": plat, + "compressed": compressed, + "csv": csv, + "markdown": markdown, + "json": json, + "save_to_png_path": path, + "show_gui": show_gui, + "first_commit": None, + } + modules_plat.extend( + timeline_mode( + gitRepo, + commits, + dep_parameters, + multiple_plats_and_vers, + progress, + ) + ) + else: + dep_parameters = { + "app": app, + "type": "dependency", + "module": module, + "threshold": threshold, + "platform": platform, + "compressed": compressed, + "csv": csv, + "markdown": markdown, + "json": json, + "save_to_png_path": save_to_png_path, + "show_gui": show_gui, + "first_commit": None, + } + modules_plat.extend( + timeline_mode( + gitRepo, + commits, + dep_parameters, + multiple_plats_and_vers, + progress, + ) + ) + if csv: + print_csv(app, modules_plat) + elif json: + print_json(app, modules_plat) + else: + modules: List[CommitEntryWithDelta] = [] + multiple_plat_and_ver: Literal[False] = False + int_parameters: ParametersTimelineIntegration = { + "app": app, + "type": "integration", + "module": module, + "threshold": threshold, + "platform": None, + "compressed": compressed, + "csv": csv, + "markdown": markdown, + "json": json, + "save_to_png_path": save_to_png_path, + "show_gui": show_gui, + "first_commit": first_commit, + } progress.remove_task(task) - for i, plat in enumerate(valid_platforms): - path = save_to_png_path - if save_to_png_path: - base, ext = os.path.splitext(save_to_png_path) - path = f"{base}_{plat}{ext}" - printed_yet = timeline_mode( - app, + modules.extend( + timeline_mode( gitRepo, - type, - module, commits, - threshold, - plat, - compressed, - csv, - markdown, - json, - i, - None, + int_parameters, + multiple_plat_and_ver, progress, - path, - show_gui, - len(valid_platforms), - printed_yet, ) - else: - progress.remove_task(task) - timeline_mode( - app, - gitRepo, - type, - module, - commits, - threshold, - platform, - compressed, - csv, - markdown, - json, - None, - first_commit, - progress, - save_to_png_path, - show_gui, - None, - printed_yet, ) + if csv: + print_csv(app, modules) + elif json: + print_json(app, modules) except Exception as e: progress.stop() - app.abort(str(e)) +@overload def timeline_mode( - app: Application, gitRepo: GitRepo, - type: str, - module: str, commits: List[str], - threshold: Optional[int], - platform: Optional[str], - compressed: bool, - csv: bool, - markdown: bool, - json: bool, - i: Optional[int], - first_commit: Optional[str], + params: ParametersTimelineDependency, + multiple_plats_and_vers: Literal[True], progress: Progress, - save_to_png_path: str, - show_gui: bool, - n_iterations: Optional[int], - printed_yet: bool, -) -> bool: - modules = get_repo_info(gitRepo, type, platform, module, commits, compressed, first_commit, progress) - trimmed_modules = trim_modules(modules, threshold) - grouped_modules = format_modules(trimmed_modules, platform, i) - if csv: - print_csv(app, i, grouped_modules) - elif json: - print_json(app, i, n_iterations, printed_yet, grouped_modules) - elif markdown: - print_markdown(app, "Timeline for " + module, grouped_modules) +) -> List[CommitEntryPlatformWithDelta]: ... + + +@overload +def timeline_mode( + gitRepo: GitRepo, + commits: List[str], + params: ParametersTimelineIntegration, + multiple_plats_and_vers: Literal[False], + progress: Progress, +) -> List[CommitEntryWithDelta]: ... + + +@overload +def timeline_mode( + gitRepo: GitRepo, + commits: List[str], + params: ParametersTimelineDependency, + multiple_plats_and_vers: Literal[False], + progress: Progress, +) -> List[CommitEntryWithDelta]: ... + + +def timeline_mode( + gitRepo: GitRepo, + commits: List[str], + params: ParametersTimelineIntegration | ParametersTimelineDependency, + multiple_plats_and_vers: bool, + progress: Progress, +) -> List[CommitEntryWithDelta] | List[CommitEntryPlatformWithDelta]: + if params["type"] == "integration": + modules = get_repo_info( + gitRepo, + params, + commits, + progress, + ) else: - print_table(app, "Timeline for " + module, grouped_modules) - if show_gui or save_to_png_path: - plot_linegraph(grouped_modules, module, platform, show_gui, save_to_png_path) - if modules != []: - printed_yet = True + modules = get_repo_info( + gitRepo, + params, + commits, + progress, + ) + + trimmed_modules = trim_modules(modules, params["threshold"]) + formatted_modules = format_modules(trimmed_modules, params["platform"], multiple_plats_and_vers) + + if params["markdown"]: + print_markdown(params["app"], "Timeline for " + params["module"], formatted_modules) + elif not params["csv"] and not params["json"]: + print_table(params["app"], "Timeline for " + params["module"], formatted_modules) + + if params["show_gui"] or params["save_to_png_path"]: + plot_linegraph( + formatted_modules, params["module"], params["platform"], params["show_gui"], params["save_to_png_path"] + ) - return printed_yet + return formatted_modules +@overload def get_repo_info( gitRepo: GitRepo, - type: str, - platform: Optional[str], - module: str, + params: ParametersTimelineIntegration, + commits: List[str], + progress: Progress, +) -> List[CommitEntry]: ... + + +@overload +def get_repo_info( + gitRepo: GitRepo, + params: ParametersTimelineDependency, + commits: List[str], + progress: Progress, +) -> List[CommitEntry]: ... + + +def get_repo_info( + gitRepo: GitRepo, + params: ParametersTimelineIntegration | ParametersTimelineDependency, commits: List[str], - compressed: bool, - first_commit: Optional[str], progress: Progress, ) -> List[CommitEntry]: """ @@ -263,11 +353,8 @@ def get_repo_info( Args: gitRepo: Active GitRepo instance. - type: integration/dependency. - platform: Target platform (only used for dependencies). - module: Integration or dependency name. + params: Parameters Typed Dictionary containing module name, type, platform, and other configuration options. commits: List of commits to process. - compressed: Whether to measure compressed sizes. first_commit: First commit hash where the given integration was introduced (only for integrations). progress: Progress bar instance. @@ -275,21 +362,39 @@ def get_repo_info( A list of CommitEntry objects with size, version, date, author, commit message and commit hash. """ with progress: - if type == "integration": - file_data = process_commits(commits, module, gitRepo, progress, platform, type, compressed, first_commit) + if params["type"] == "integration": + file_data = process_commits(commits, params, gitRepo, progress, params["first_commit"]) else: - file_data = process_commits(commits, module, gitRepo, progress, platform, type, compressed, None) + file_data = process_commits(commits, params, gitRepo, progress, params["first_commit"]) + return file_data +@overload def process_commits( commits: List[str], - module: str, + params: ParametersTimelineIntegration, + gitRepo: GitRepo, + progress: Progress, + first_commit: str, +) -> List[CommitEntry]: ... + + +@overload +def process_commits( + commits: List[str], + params: ParametersTimelineDependency, + gitRepo: GitRepo, + progress: Progress, + first_commit: None, +) -> List[CommitEntry]: ... + + +def process_commits( + commits: List[str], + params: ParametersTimelineIntegration | ParametersTimelineDependency, gitRepo: GitRepo, progress: Progress, - platform: Optional[str], - type: str, - compressed: bool, first_commit: Optional[str], ) -> List[CommitEntry]: """ @@ -300,12 +405,9 @@ def process_commits( Args: commits: List of commit SHAs to process. - module: Integration or dependency name. + params: ParametersTimeline dict containing module name, type, platform, and other configuration options. gitRepo: GitRepo instance managing the repository. progress: Progress bar instance. - platform: Target platform name (only for dependencies). - type: integration/dependency. - compressed: Whether to measure compressed sizes. first_commit: First commit hash where the given integration was introduced (only for integrations). Returns: @@ -315,21 +417,43 @@ def process_commits( task = progress.add_task("[cyan]Processing commits...", total=len(commits)) repo = gitRepo.repo_dir - folder = module if type == "integration" else ".deps/resolved" + folder = params["module"] if params["type"] == "integration" else ".deps/resolved" + for commit in commits: gitRepo.sparse_checkout_commit(commit, folder) date_str, author, message = gitRepo.get_commit_metadata(commit) date, message, commit = format_commit_data(date_str, message, commit, first_commit) - if type == "dependency" and date > MINIMUM_DATE_DEPENDENCIES: - assert platform is not None - result = get_dependencies(repo, module, platform, commit, date, author, message, compressed) + + if params["type"] == "dependency" and date > MINIMUM_DATE_DEPENDENCIES: + assert params["platform"] is not None + result = get_dependencies( + repo, + params["module"], + params["platform"], + commit, + date, + author, + message, + params["compressed"], + ) if result: file_data.append(result) - elif type == "integration": - file_data = get_files(repo, module, commit, date, author, message, file_data, compressed) + + elif params["type"] == "integration": + file_data = get_files( + repo, + params["module"], + commit, + date, + author, + message, + file_data, + params["compressed"], + ) + progress.advance(task) - progress.remove_task(task) + progress.remove_task(task) return file_data @@ -558,7 +682,9 @@ def get_version(files: List[str], platform: str) -> str: def format_modules( - modules: List[CommitEntryWithDelta], platform: Optional[str], i: Optional[int] + modules: List[CommitEntryWithDelta], + platform: Optional[str], + multiple_plats_and_vers: bool, ) -> List[CommitEntryWithDelta] | List[CommitEntryPlatformWithDelta]: """ Formats the modules list, adding platform and Python version information if needed. @@ -575,7 +701,7 @@ def format_modules( Returns: A list of formatted entries. """ - if modules == [] and i is not None and platform: + if modules == [] and multiple_plats_and_vers and platform: empty_module_platform: CommitEntryPlatformWithDelta = { "Size_Bytes": 0, "Version": "", @@ -600,7 +726,7 @@ def format_modules( "Delta": " ", } return [empty_module] - elif i is not None and platform: + elif multiple_plats_and_vers and platform: new_modules: List[CommitEntryPlatformWithDelta] = [{**entry, "Platform": platform} for entry in modules] return new_modules else: diff --git a/ddev/tests/cli/size/test_diff.py b/ddev/tests/cli/size/test_diff.py index c6f1110153973..1ce2d1c1db1c1 100644 --- a/ddev/tests/cli/size/test_diff.py +++ b/ddev/tests/cli/size/test_diff.py @@ -67,7 +67,6 @@ def get_compressed_dependencies_side_effect(_, __, ___, ____): def test_diff_no_args(ddev, mock_size_diff_dependencies): result = ddev("size", "diff", "commit1", "commit2", "--compressed") - print(result.output) assert result.exit_code == 0 @@ -95,11 +94,11 @@ def test_diff_no_differences(ddev): patch("ddev.cli.size.diff.GitRepo.__enter__", return_value=fake_repo), patch("ddev.cli.size.diff.GitRepo.__exit__", return_value=None), patch( - "ddev.cli.size.status.get_valid_platforms", + "ddev.cli.size.diff.get_valid_platforms", return_value=({'linux-x86_64', 'macos-x86_64', 'linux-aarch64', 'windows-x86_64'}), ), patch( - "ddev.cli.size.status.get_valid_versions", + "ddev.cli.size.diff.get_valid_versions", return_value=({'3.12'}), ), patch.object(fake_repo, "checkout_commit"), diff --git a/ddev/tests/cli/size/test_timeline.py b/ddev/tests/cli/size/test_timeline.py index 782477366258b..436547cf9ce86 100644 --- a/ddev/tests/cli/size/test_timeline.py +++ b/ddev/tests/cli/size/test_timeline.py @@ -42,6 +42,7 @@ def app(): def test_timeline_integration_compressed(ddev, mock_timeline_gitrepo, app): result = ddev("size", "timeline", "integration", "int1", "commit1", "commit2", "--compressed", obj=app) + print(result.output) assert result.exit_code == 0 @@ -128,7 +129,7 @@ def test_timeline_invalid_platform(ddev): assert result.exit_code != 0 -def test_timeline_no_changes_in_integration(ddev): +def test_timeline_integration_no_changes(ddev): mock_git_repo = MagicMock() mock_git_repo.repo_dir = "fake_repo" mock_git_repo.get_module_commits.return_value = [""] diff --git a/ddev/tests/size/test_common.py b/ddev/tests/size/test_common.py index 5e10d14f0c730..b8ae30ba35cc1 100644 --- a/ddev/tests/size/test_common.py +++ b/ddev/tests/size/test_common.py @@ -122,7 +122,7 @@ def test_get_dependencies_sizes(): ] -def test_format_modules(): +def test_format_modules_multiple_platform(): modules = [ {"Name": "module1", "Type": "A", "Size_Bytes": 1500}, {"Name": "module2", "Type": "B", "Size_Bytes": 3000}, @@ -147,7 +147,31 @@ def test_format_modules(): }, ] - assert format_modules(modules, platform, version, 0) == expected_output + assert format_modules(modules, platform, version, True) == expected_output + + +def test_format_modules_one_plat(): + modules = [ + {"Name": "module1", "Type": "A", "Size_Bytes": 1500}, + {"Name": "module2", "Type": "B", "Size_Bytes": 3000}, + ] + platform = "linux-aarch64" + version = "3.12" + + expected_output = [ + { + "Name": "module1", + "Type": "A", + "Size_Bytes": 1500, + }, + { + "Name": "module2", + "Type": "B", + "Size_Bytes": 3000, + }, + ] + + assert format_modules(modules, platform, version, False) == expected_output def test_get_files_grouped_and_with_versions(): @@ -229,7 +253,7 @@ def test_print_csv(): {"Name": "module,with,comma", "Size B": 456, "Size": "2 B"}, ] - print_csv(mock_app, i=0, modules=modules) + print_csv(mock_app, modules=modules) expected_calls = [ (("Name,Size B",),), @@ -241,102 +265,23 @@ def test_print_csv(): assert actual_calls == expected_calls -def test_print_json_multiple_calls(): +def test_print_json(): mock_app = MagicMock() - printed_yet = False - n_iterations = 3 - - modules_list = [ - [{"name": "mod1", "size": "100"}], - [{"name": "mod2", "size": "200"}], - [{"name": "mod3", "size": "300"}], - ] - - for i, modules in enumerate(modules_list): - if i != 0: - printed_yet = True - print_json(mock_app, i, n_iterations, printed_yet, modules) - - expected_calls = [ - (("[",),), - (('{"name": "mod1", "size": "100"}',),), - ((",",),), - (('{"name": "mod2", "size": "200"}',),), - ((",",),), - (('{"name": "mod3", "size": "300"}',),), - (("]",),), - ] - - actual_calls = mock_app.display.call_args_list - print(actual_calls) - assert actual_calls == expected_calls - result = "".join(call[0][0] for call in actual_calls) - parsed = json.loads(result) - assert parsed == [ + modules = [ {"name": "mod1", "size": "100"}, {"name": "mod2", "size": "200"}, {"name": "mod3", "size": "300"}, ] - - -def test_print_json_no_first(): - mock_app = MagicMock() - printed_yet = False - n_iterations = 3 - - modules_list = [ - [{"name": "", "size": ""}], - [{"name": "mod2", "size": "200"}], - [{"name": "mod3", "size": "300"}], - ] - - for i, modules in enumerate(modules_list): - print_json(mock_app, i, n_iterations, printed_yet, modules) - if i == 1: - printed_yet = True - - expected_calls = [ - (("[",),), - (('{"name": "mod2", "size": "200"}',),), - ((",",),), - (('{"name": "mod3", "size": "300"}',),), - (("]",),), - ] - - actual_calls = mock_app.display.call_args_list - print(actual_calls) - assert actual_calls == expected_calls - - result = "".join(call[0][0] for call in actual_calls) - parsed = json.loads(result) - assert parsed == [ - {"name": "mod2", "size": "200"}, - {"name": "mod3", "size": "300"}, - ] - - -def test_print_json_no_last(): - mock_app = MagicMock() - printed_yet = False - n_iterations = 3 - - modules_list = [ - [{"name": "mod1", "size": "100"}], - [{"name": "mod2", "size": "200"}], - [{"name": "", "size": ""}], - ] - - for i, modules in enumerate(modules_list): - if i != 0: - printed_yet = True - print_json(mock_app, i, n_iterations, printed_yet, modules) + print_json(mock_app, modules) expected_calls = [ (("[",),), (('{"name": "mod1", "size": "100"}',),), ((",",),), (('{"name": "mod2", "size": "200"}',),), + ((",",),), + (('{"name": "mod3", "size": "300"}',),), (("]",),), ] @@ -349,6 +294,7 @@ def test_print_json_no_last(): assert parsed == [ {"name": "mod1", "size": "100"}, {"name": "mod2", "size": "200"}, + {"name": "mod3", "size": "300"}, ] From 837e9be24437751d2adf9b904910b2f0703c8536 Mon Sep 17 00:00:00 2001 From: Lucia Sanchez Bella Date: Fri, 9 May 2025 16:57:15 +0200 Subject: [PATCH 41/70] final fixes --- ddev/src/ddev/cli/size/common.py | 373 ++++++++++++++------------- ddev/src/ddev/cli/size/diff.py | 33 +-- ddev/src/ddev/cli/size/status.py | 12 +- ddev/src/ddev/cli/size/timeline.py | 123 +++++---- ddev/tests/cli/size/test_diff.py | 77 ++++-- ddev/tests/cli/size/test_status.py | 29 ++- ddev/tests/cli/size/test_timeline.py | 227 +++++++++++++--- ddev/tests/size/test_timeline.py | 2 +- 8 files changed, 554 insertions(+), 322 deletions(-) diff --git a/ddev/src/ddev/cli/size/common.py b/ddev/src/ddev/cli/size/common.py index a105574993322..4f5994839bd24 100644 --- a/ddev/src/ddev/cli/size/common.py +++ b/ddev/src/ddev/cli/size/common.py @@ -12,7 +12,7 @@ from datetime import date from pathlib import Path from types import TracebackType -from typing import Dict, List, Literal, Optional, Set, Tuple, Type, TypedDict, Union, cast +from typing import Literal, Optional, Type, TypedDict import matplotlib.cm as cm import matplotlib.pyplot as plt @@ -90,7 +90,7 @@ class ParametersTimelineDependency(ParametersTimeline): platform: str -def get_valid_platforms(repo_path: Union[Path, str]) -> Set[str]: +def get_valid_platforms(repo_path: Path | str) -> set[str]: """ Extracts the platforms we support from the .deps/resolved file names. """ @@ -101,7 +101,7 @@ def get_valid_platforms(repo_path: Union[Path, str]) -> Set[str]: return set(platforms) -def get_valid_versions(repo_path: Union[Path, str]) -> Set[str]: +def get_valid_versions(repo_path: Path | str) -> set[str]: """ Extracts the Python versions we support from the .deps/resolved file names. """ @@ -118,7 +118,7 @@ def is_correct_dependency(platform: str, version: str, name: str) -> bool: return platform in name and version in name -def is_valid_integration(path: str, included_folder: str, ignored_files: Set[str], git_ignore: List[str]) -> bool: +def is_valid_integration(path: str, included_folder: str, ignored_files: set[str], git_ignore: list[str]) -> bool: # It is not an integration if path.startswith("."): return False @@ -135,7 +135,7 @@ def is_valid_integration(path: str, included_folder: str, ignored_files: Set[str return True -def get_gitignore_files(repo_path: str | Path) -> List[str]: +def get_gitignore_files(repo_path: str | Path) -> list[str]: gitignore_path = os.path.join(repo_path, ".gitignore") with open(gitignore_path, "r", encoding="utf-8") as file: gitignore_content = file.read() @@ -156,16 +156,16 @@ def convert_to_human_readable_size(size_bytes: float) -> str: def compress(file_path: str) -> int: compressor = zlib.compressobj() compressed_size = 0 - # original_size = os.path.getsize(file_path) + chunk_size = 8192 with open(file_path, "rb") as f: - while chunk := f.read(8192): # Read in 8KB chunks + while chunk := f.read(chunk_size): compressed_chunk = compressor.compress(chunk) compressed_size += len(compressed_chunk) compressed_size += len(compressor.flush()) return compressed_size -def get_files(repo_path: str | Path, compressed: bool) -> List[FileDataEntry]: +def get_files(repo_path: str | Path, compressed: bool) -> list[FileDataEntry]: """ Calculates integration file sizes and versions from a repository. """ @@ -173,8 +173,8 @@ def get_files(repo_path: str | Path, compressed: bool) -> List[FileDataEntry]: git_ignore = get_gitignore_files(repo_path) included_folder = "datadog_checks/" - integration_sizes: Dict[str, int] = {} - integration_versions: Dict[str, str] = {} + integration_sizes: dict[str, int] = {} + integration_versions: dict[str, str] = {} for root, _, files in os.walk(repo_path): for file in files: @@ -222,7 +222,7 @@ def extract_version_from_about_py(path: str) -> str: return "" -def get_dependencies(repo_path: str | Path, platform: str, version: str, compressed: bool) -> List[FileDataEntry]: +def get_dependencies(repo_path: str | Path, platform: str, version: str, compressed: bool) -> list[FileDataEntry]: """ Gets the list of dependencies for a given platform and Python version. Each FileDataEntry includes: Name, Version, Size_Bytes, Size, and Type. @@ -247,7 +247,7 @@ def get_dependencies(repo_path: str | Path, platform: str, version: str, compres return [] -def get_dependencies_list(file_path: str) -> Tuple[List[str], List[str], List[str]]: +def get_dependencies_list(file_path: str) -> tuple[list[str], list[str], list[str]]: """ Parses a dependency file and extracts the dependency names, download URLs, and versions. """ @@ -273,8 +273,8 @@ def get_dependencies_list(file_path: str) -> Tuple[List[str], List[str], List[st def get_dependencies_sizes( - deps: List[str], download_urls: List[str], versions: List[str], compressed: bool -) -> List[FileDataEntry]: + deps: list[str], download_urls: list[str], versions: list[str], compressed: bool +) -> list[FileDataEntry]: """ Calculates the sizes of dependencies, either compressed or uncompressed. @@ -285,7 +285,7 @@ def get_dependencies_sizes( compressed: If True, use the Content-Length from the HTTP headers. If False, download, extract, and compute actual uncompressed size. """ - file_data: List[FileDataEntry] = [] + file_data: list[FileDataEntry] = [] for dep, url, version in zip(deps, download_urls, versions, strict=False): if compressed: response = requests.head(url) @@ -327,11 +327,11 @@ def get_dependencies_sizes( def format_modules( - modules: List[FileDataEntry], + modules: list[FileDataEntry], platform: str, py_version: str, multiple_plats_and_vers: bool, -) -> List[FileDataEntryPlatformVersion] | List[FileDataEntry]: +) -> list[FileDataEntryPlatformVersion] | list[FileDataEntry]: """ Formats the modules list, adding platform and Python version information if needed. @@ -368,7 +368,7 @@ def format_modules( } return [empty_entry_with_platform] elif multiple_plats_and_vers: - new_modules: List[FileDataEntryPlatformVersion] = [ + new_modules: list[FileDataEntryPlatformVersion] = [ {**entry, "Platform": platform, "Python_Version": py_version} for entry in modules ] return new_modules @@ -379,10 +379,10 @@ def format_modules( def print_json( app: Application, modules: ( - List[FileDataEntry] - | List[FileDataEntryPlatformVersion] - | List[CommitEntryWithDelta] - | List[CommitEntryPlatformWithDelta] + list[FileDataEntry] + | list[FileDataEntryPlatformVersion] + | list[CommitEntryWithDelta] + | list[CommitEntryPlatformWithDelta] ), ) -> None: printed_yet = False @@ -400,10 +400,10 @@ def print_json( def print_csv( app: Application, modules: ( - List[FileDataEntry] - | List[FileDataEntryPlatformVersion] - | List[CommitEntryWithDelta] - | List[CommitEntryPlatformWithDelta] + list[FileDataEntry] + | list[FileDataEntryPlatformVersion] + | list[CommitEntryWithDelta] + | list[CommitEntryPlatformWithDelta] ), ) -> None: headers = [k for k in modules[0].keys() if k not in ["Size", "Delta"]] @@ -425,10 +425,10 @@ def print_markdown( app: Application, title: str, modules: ( - List[FileDataEntry] - | List[FileDataEntryPlatformVersion] - | List[CommitEntryWithDelta] - | List[CommitEntryPlatformWithDelta] + list[FileDataEntry] + | list[FileDataEntryPlatformVersion] + | list[CommitEntryWithDelta] + | list[CommitEntryPlatformWithDelta] ), ) -> None: if any(str(value).strip() not in ("", "0", "0001-01-01") for value in modules[0].values()): # table is not empty @@ -445,14 +445,14 @@ def print_table( app: Application, mode: str, modules: ( - List[FileDataEntry] - | List[FileDataEntryPlatformVersion] - | List[CommitEntryWithDelta] - | List[CommitEntryPlatformWithDelta] + list[FileDataEntry] + | list[FileDataEntryPlatformVersion] + | list[CommitEntryWithDelta] + | list[CommitEntryPlatformWithDelta] ), ) -> None: columns = [col for col in modules[0].keys() if "Bytes" not in col] - modules_table: Dict[str, Dict[int, str]] = {col: {} for col in columns} + modules_table: dict[str, dict[int, str]] = {col: {} for col in columns} for i, row in enumerate(modules): if any(str(value).strip() not in ("", "0", "0001-01-01") for value in row.values()): for key in columns: @@ -462,209 +462,210 @@ def print_table( def plot_treemap( - modules: List[FileDataEntry] | List[FileDataEntryPlatformVersion], + modules: list[FileDataEntry] | list[FileDataEntryPlatformVersion], title: str, show: bool, mode: Literal["status", "diff"] = "status", path: Optional[str] = None, ) -> None: - """ - Generates and displays or saves a treemap visualization of module sizes. - - The plot layout is computed using the size of each module (in bytes), and color is used to - encode either the type of module or the direction/magnitude of size change, depending on the mode. - - - Modules with very small area may not show labels to avoid overlap. - - Labels display module name and size if space allows. - - Color intensity reflects relative size (or change) within its group. - - A legend is added depending on the selected mode. - - Args: - modules: List of module entries. Each entry must contain at least: - - 'Name': The module name, - - 'Size_Bytes': Module size in bytes (can be negative in 'diff' mode), - - 'Size': Human-readable size string, - - 'Type': Either 'Integration' or 'Dependency'. - title: Title to display at the top of the plot. - show: If True, the plot is shown interactively using matplotlib. - mode: - - 'status': Shows the current sizes of modules. - Integrations and dependencies are grouped and colored separately (Purples/Reds), - with size intensity mapped to color darkness. - - 'diff': Shows the size change between two commits. - Positive changes are colored in Oranges, negative changes in Blues. - The plot is split in half: left for decreases, right for increases. - path: Optional path to save the plot as a PNG file. If not provided, nothing is saved. - """ - if not any(str(value).strip() not in ("", "0") for value in modules[0].values()): # table is empty + if not any(str(value).strip() not in ("", "0") for value in modules[0].values()): + # table is empty return - # Convert sizes to absolute values for layout computation - sizes = [abs(mod["Size_Bytes"]) for mod in modules] - # Initialize figure and axis plt.figure(figsize=(12, 8)) ax = plt.gca() ax.set_axis_off() - # Compute layout rectangles based on size - rects = squarify.normalize_sizes(sizes, 100, 100) - rects = squarify.squarify(rects, 0, 0, 100, 100) + # Calculate the rectangles + if mode == "status": + rects, colors, legend_handles = plot_status_treemap(modules) - colors = [] + if mode == "diff": + rects, colors, legend_handles = plot_diff_treemap(modules) - if mode == "status": - # Separate modules by type - integrations = [mod for mod in modules if mod["Type"] == "Integration"] - dependencies = [mod for mod in modules if mod["Type"] == "Dependency"] - - # Normalize sizes within each group - def normalize(mods): - if not mods: - return [] - sizes = [mod["Size_Bytes"] for mod in mods] - min_size = min(sizes) - max_size = max(sizes) - range_size = max_size - min_size or 1 - return [(s - min_size) / range_size for s in sizes] - - norm_int = normalize(integrations) - norm_dep = normalize(dependencies) - - # Map normalized values to color intensity - def scale(val, vmin=0.3, vmax=0.85): - return vmin + val * (vmax - vmin) - - cmap_int = cm.get_cmap("Purples") - cmap_dep = cm.get_cmap("Reds") - - # Assign colors based on type and normalized size - for mod in modules: - if mod["Type"] == "Integration": - idx = integrations.index(mod) - colors.append(cmap_int(scale(norm_int[idx], 0.3, 0.6))) - elif mod["Type"] == "Dependency": - idx = dependencies.index(mod) - colors.append(cmap_dep(scale(norm_dep[idx], 0.3, 0.85))) - else: - colors.append("#999999") + draw_treemap_rects_with_labels(ax, rects, modules, colors) - elif mode == "diff": - # Separate modules by positive and negative size change - cmap_pos = cm.get_cmap("Oranges") - cmap_neg = cm.get_cmap("Blues") + # Finalize layout and show/save plot + ax.set_xlim(0, 100) + ax.set_ylim(0, 100) - positives = [mod for mod in modules if cast(int, mod["Size_Bytes"]) > 0] - negatives = [mod for mod in modules if cast(int, mod["Size_Bytes"]) < 0] + plt.title(title, fontsize=16) - sizes_pos = [mod["Size_Bytes"] for mod in positives] - sizes_neg = [abs(mod["Size_Bytes"]) for mod in negatives] + plt.legend(handles=legend_handles, title="Type", loc="center left", bbox_to_anchor=(1.0, 0.5)) + plt.subplots_adjust(right=0.8) + plt.tight_layout() + + if show: + plt.show() + if path: + plt.savefig(path, bbox_inches="tight", format="png") - sum_pos = sum(sizes_pos) - sum_neg = sum(sizes_neg) - canvas_area = 50 * 100 +def plot_status_treemap( + modules: list[FileDataEntry] | list[FileDataEntryPlatformVersion], +) -> tuple[list[dict[str, float]], list[tuple[float, float, float, float]], list[Patch]]: + # Calculate the area of the rectangles + sizes = [mod["Size_Bytes"] for mod in modules] + norm_sizes = squarify.normalize_sizes(sizes, 100, 100) + rects = squarify.squarify(norm_sizes, 0, 0, 100, 100) - # Determine dominant side and scale layout accordingly - if sum_pos >= sum_neg: - norm_sizes_pos = [s / sum_pos * canvas_area for s in sizes_pos] - norm_sizes_neg = [s / sum_pos * canvas_area for s in sizes_neg] - rects_pos = squarify.squarify(norm_sizes_pos, 50, 0, 50, 100) - rects_neg = squarify.squarify(norm_sizes_neg, 0, 0, 50, 100) + # Define the colors for each type + cmap_int = cm.get_cmap("Purples") + cmap_dep = cm.get_cmap("Reds") + + # Assign colors based on type and normalized size + colors = [] + max_area = max(norm_sizes) or 1 + for mod, area in zip(modules, norm_sizes, strict=False): + intensity = scale_colors_treemap(area, max_area) + if mod["Type"] == "Integration": + colors.append(cmap_int(intensity)) + elif mod["Type"] == "Dependency": + colors.append(cmap_dep(intensity)) else: - norm_sizes_neg = [s / sum_neg * canvas_area for s in sizes_neg] - norm_sizes_pos = [s / sum_neg * canvas_area for s in sizes_pos] - rects_neg = squarify.squarify(norm_sizes_neg, 0, 0, 50, 100) - rects_pos = squarify.squarify(norm_sizes_pos, 50, 0, 50, 100) + colors.append("#999999") + # Define the legend + legend_handles = [ + Patch(color=cm.get_cmap("Purples")(0.6), label="Integration"), + Patch(color=cm.get_cmap("Reds")(0.6), label="Dependency"), + ] + return rects, colors, legend_handles + - # Merge layout and module lists for unified drawing - rects = rects_neg + rects_pos - modules = negatives + positives +def plot_diff_treemap( + modules: list[FileDataEntry] | list[FileDataEntryPlatformVersion], +) -> tuple[list[dict[str, float]], list[tuple[float, float, float, float]], list[Patch]]: + # Define the colors for each type + cmap_pos = cm.get_cmap("Oranges") + cmap_neg = cm.get_cmap("Blues") - # Compute color intensity for each module - def rescale_intensity(val, min_val=0.3, max_val=0.8): - return min_val + (max_val - min_val) * val + # Separate in negative and positive differences + positives = [mod for mod in modules if mod["Size_Bytes"] > 0] + negatives = [mod for mod in modules if mod["Size_Bytes"] < 0] - max_size = max(sizes_pos + sizes_neg) or 1 - colors = [] + sizes_pos = [mod["Size_Bytes"] for mod in positives] + sizes_neg = [abs(mod["Size_Bytes"]) for mod in negatives] + + sum_pos = sum(sizes_pos) + sum_neg = sum(sizes_neg) + + canvas_area = 50 * 100 + + # Determine dominant side and scale layout accordingly + if sum_pos >= sum_neg: + norm_sizes_pos = [s / sum_pos * canvas_area for s in sizes_pos] + norm_sizes_neg = [s / sum_pos * canvas_area for s in sizes_neg] + rects_neg = squarify.squarify(norm_sizes_neg, 0, 0, 50, 100) + rects_pos = squarify.squarify(norm_sizes_pos, 50, 0, 50, 100) + + else: + norm_sizes_neg = [s / sum_neg * canvas_area for s in sizes_neg] + norm_sizes_pos = [s / sum_neg * canvas_area for s in sizes_pos] + rects_neg = squarify.squarify(norm_sizes_neg, 0, 0, 50, 100) + rects_pos = squarify.squarify(norm_sizes_pos, 50, 0, 50, 100) + + # Merge layout and module lists + rects = rects_neg + rects_pos + modules = negatives + positives + + # Assign colors based on type and normalized size + colors = [] + max_area = max(norm_sizes_pos + norm_sizes_neg) or 1 - for mod in negatives: - raw = abs(mod["Size_Bytes"]) / max_size - intensity = rescale_intensity(raw) - colors.append(cmap_neg(intensity)) + for area in norm_sizes_neg: + intensity = scale_colors_treemap(area, max_area) + colors.append(cmap_neg(intensity)) - for mod in positives: - raw = mod["Size_Bytes"] / max_size - intensity = rescale_intensity(raw) - colors.append(cmap_pos(intensity)) + for area in norm_sizes_pos: + intensity = scale_colors_treemap(area, max_area) + colors.append(cmap_pos(intensity)) - # Manual treemap layout and coloring to personalize labels + legend_handles = [ + Patch(color=cm.get_cmap("Oranges")(0.7), label="Increase"), + Patch(color=cm.get_cmap("Blues")(0.7), label="Decrease"), + ] + + return rects, colors, legend_handles + + +# Map normalized values to color intensity +def scale_colors_treemap(area: float, max_area: float) -> float: + vmin = 0.3 + vmax = 0.65 + return vmin + (area / max_area) * (vmax - vmin) + + +def draw_treemap_rects_with_labels( + ax: plt.Axes, + rects: list[dict], + modules: list[FileDataEntry] | list[FileDataEntryPlatformVersion], + colors: list[tuple[float, float, float, float]], +) -> None: + """ + Draw treemap rectangles with their assigned colors and optional text labels. + + Args: + ax: Matplotlib Axes to draw on. + rects: List of rectangle dicts from squarify, each with 'x', 'y', 'dx', 'dy'. + modules: List of modules associated with each rectangle (same order). + colors: List of colors for each module (same order). + """ for rect, mod, color in zip(rects, modules, colors, strict=False): x, y, dx, dy = rect["x"], rect["y"], rect["dx"], rect["dy"] + + # Draw the rectangle with a white border ax.add_patch(plt.Rectangle((x, y), dx, dy, color=color, ec="white")) # Determine font size based on rectangle area MIN_FONT_SIZE = 6 MAX_FONT_SIZE = 12 FONT_SIZE_SCALE = 0.4 - AVG_SIDE = (dx * dy) ** 0.5 + AVG_SIDE = (dx * dy) ** 0.5 # Geometric mean font_size = max(MIN_FONT_SIZE, min(MAX_FONT_SIZE, AVG_SIDE * FONT_SIZE_SCALE)) + + # Determine the info for the labels name = mod["Name"] size_str = f"({mod['Size']})" - # Check whether text fits inside the rectangle - CHAR_WIDTH_FACTOR = 0.1 - CHAR_HEIGHT_FACTOR = 0.5 + # Estimate if there's enough space for text + CHAR_WIDTH_FACTOR = 0.1 # Width of each character relative to font size + CHAR_HEIGHT_FACTOR = 0.5 # Minimum height for readable text + name_fits = (len(name) + 2) * font_size * CHAR_WIDTH_FACTOR < dx and dy > font_size * CHAR_HEIGHT_FACTOR size_fits = (len(size_str) + 2) * font_size * CHAR_WIDTH_FACTOR < dx - both_fit = dy > font_size * CHAR_HEIGHT_FACTOR * 2 + both_fit = dy > font_size * CHAR_HEIGHT_FACTOR * 2 # Enough room for two lines - # Possibly truncate name if it doesn't fit + # If the rectangle is too small, skip the label if dx < 5 or dy < 5: label = None + + # If the name doesn't fit, truncate it with "..." elif not name_fits and dx > 5: max_chars = int(dx / (font_size * CHAR_WIDTH_FACTOR)) - 2 - if 4 <= max_chars: + if max_chars >= 4: name = name[: max_chars - 3] + "..." name_fits = True - # Construct label if there's space + # Build the label based on available space if name_fits and size_fits and both_fit: - label = f"{name}\n{size_str}" + label = f"{name}\n{size_str}" # Two-line label elif name_fits: label = name else: label = None - # Draw label + # Draw label centered inside the rectangle if label: - ax.text(x + dx / 2, y + dy / 2, label, va="center", ha="center", fontsize=font_size, color="black") - - # Finalize layout and show/save plot - ax.set_xlim(0, 100) - ax.set_ylim(0, 100) - - plt.title(title, fontsize=16) - - if mode == "status": - legend_handles = [ - Patch(color=cm.get_cmap("Purples")(0.6), label="Integration"), - Patch(color=cm.get_cmap("Reds")(0.6), label="Dependency"), - ] - elif mode == "diff": - legend_handles = [ - Patch(color=cm.get_cmap("Oranges")(0.7), label="Increase"), - Patch(color=cm.get_cmap("Blues")(0.7), label="Decrease"), - ] - - plt.legend(handles=legend_handles, title="Type", loc="center left", bbox_to_anchor=(1.0, 0.5)) - plt.subplots_adjust(right=0.8) - plt.tight_layout() - - if show: - plt.show() - if path: - plt.savefig(path, bbox_inches="tight", format="png") + ax.text( + x + dx / 2, + y + dy / 2, + label, + va="center", + ha="center", + fontsize=font_size, + color="black", + ) class WrongDependencyFormat(Exception): @@ -677,7 +678,7 @@ class GitRepo: Clones the repo to a temp folder and deletes the folder on exit. """ - def __init__(self, url: Union[Path, str]) -> None: + def __init__(self, url: Path | str) -> None: self.url = url self.repo_dir: str @@ -690,13 +691,13 @@ def __enter__(self): self._run(f"git clone --quiet {self.url} {self.repo_dir}") return self - def _run(self, command: str) -> List[str]: + def _run(self, command: str) -> list[str]: result = subprocess.run(command, shell=True, capture_output=True, text=True, check=True, cwd=self.repo_dir) return result.stdout.strip().split("\n") def get_module_commits( self, module_path: str, initial: Optional[str], final: Optional[str], time: Optional[str] - ) -> List[str]: + ) -> list[str]: """ Returns the list of commits (SHA) that modified a given module, filtered by time or commit range. @@ -748,7 +749,7 @@ def sparse_checkout_commit(self, commit_sha: str, module: str) -> None: self._run(f"git sparse-checkout set {module}") self._run(f"git checkout {commit_sha}") - def get_commit_metadata(self, commit: str) -> Tuple[str, str, str]: + def get_commit_metadata(self, commit: str) -> tuple[str, str, str]: result = self._run(f'git log -1 --date=format:"%b %d %Y" --pretty=format:"%ad\n%an\n%s" {commit}') date, author, message = result return date, author, message diff --git a/ddev/src/ddev/cli/size/diff.py b/ddev/src/ddev/cli/size/diff.py index 489cebdb4ae8b..92a4bf70215e9 100644 --- a/ddev/src/ddev/cli/size/diff.py +++ b/ddev/src/ddev/cli/size/diff.py @@ -4,7 +4,7 @@ import os from datetime import datetime -from typing import List, Literal, Optional, Tuple, cast, overload +from typing import Literal, Optional, overload import click from rich.console import Console @@ -105,7 +105,7 @@ def diff( elif version and version not in valid_versions: raise ValueError(f"Invalid version: {version}") if platform is None or version is None: - modules_plat_ver: List[FileDataEntryPlatformVersion] = [] + modules_plat_ver: list[FileDataEntryPlatformVersion] = [] platforms = valid_platforms if platform is None else [platform] versions = valid_versions if version is None else [version] progress.remove_task(task) @@ -143,7 +143,7 @@ def diff( print_json(app, modules_plat_ver) else: progress.remove_task(task) - modules: List[FileDataEntry] = [] + modules: list[FileDataEntry] = [] multiple_plat_and_ver: Literal[False] = False base_parameters: Parameters = { "app": app, @@ -184,7 +184,7 @@ def diff_mode( params: Parameters, progress: Progress, multiple_plats_and_vers: Literal[True], -) -> List[FileDataEntryPlatformVersion]: ... +) -> list[FileDataEntryPlatformVersion]: ... @overload def diff_mode( gitRepo: GitRepo, @@ -193,7 +193,7 @@ def diff_mode( params: Parameters, progress: Progress, multiple_plats_and_vers: Literal[False], -) -> List[FileDataEntry]: ... +) -> list[FileDataEntry]: ... def diff_mode( gitRepo: GitRepo, first_commit: str, @@ -201,7 +201,7 @@ def diff_mode( params: Parameters, progress: Progress, multiple_plats_and_vers: bool, -) -> List[FileDataEntryPlatformVersion] | List[FileDataEntry]: +) -> list[FileDataEntryPlatformVersion] | list[FileDataEntry]: files_b, dependencies_b, files_a, dependencies_a = get_repo_info( gitRepo, params["platform"], params["version"], first_commit, second_commit, params["compressed"], progress ) @@ -209,15 +209,18 @@ def diff_mode( integrations = get_diff(files_b, files_a, "Integration") dependencies = get_diff(dependencies_b, dependencies_a, "Dependency") - if integrations + dependencies == [] and not params["csv"] and not params["json"]: - params["app"].display( + if integrations + dependencies == []: + params["app"].display_error( f"No size differences were detected between the selected commits for {params['platform']}" ) + formatted_modules = format_modules( + integrations + dependencies, params["platform"], params["version"], multiple_plats_and_vers + ) else: formatted_modules = format_modules( integrations + dependencies, params["platform"], params["version"], multiple_plats_and_vers ) - formatted_modules.sort(key=lambda x: abs(cast(int, x["Size_Bytes"])), reverse=True) + formatted_modules.sort(key=lambda x: x["Size_Bytes"], reverse=True) for module in formatted_modules: if module["Size_Bytes"] > 0: module["Size"] = f"+{module['Size']}" @@ -236,9 +239,7 @@ def diff_mode( params["save_to_png_path"], ) - return formatted_modules - - return [] + return formatted_modules def get_repo_info( @@ -249,7 +250,7 @@ def get_repo_info( second_commit: str, compressed: bool, progress: Progress, -) -> Tuple[List[FileDataEntry], List[FileDataEntry], List[FileDataEntry], List[FileDataEntry]]: +) -> tuple[list[FileDataEntry], list[FileDataEntry], list[FileDataEntry], list[FileDataEntry]]: with progress: """ Retrieves integration and dependency sizes for two commits in the repo. @@ -288,8 +289,8 @@ def get_repo_info( def get_diff( - size_first_commit: List[FileDataEntry], size_second_commit: List[FileDataEntry], type: str -) -> List[FileDataEntry]: + size_first_commit: list[FileDataEntry], size_second_commit: list[FileDataEntry], type: str +) -> list[FileDataEntry]: """ Computes size differences between two sets of integrations or dependencies. @@ -307,7 +308,7 @@ def get_diff( second_commit = {entry["Name"]: entry for entry in size_second_commit} all_names = set(first_commit) | set(second_commit) - diffs: List[FileDataEntry] = [] + diffs: list[FileDataEntry] = [] for name in all_names: b = first_commit.get(name) diff --git a/ddev/src/ddev/cli/size/status.py b/ddev/src/ddev/cli/size/status.py index ccb29e1289e91..ad1272cc18cdb 100644 --- a/ddev/src/ddev/cli/size/status.py +++ b/ddev/src/ddev/cli/size/status.py @@ -4,7 +4,7 @@ import os # noqa: I001 from pathlib import Path -from typing import List, Optional, Literal, overload +from typing import Optional, Literal, overload import click from rich.console import Console @@ -72,7 +72,7 @@ def status( raise ValueError(f"Invalid version: {version}") if platform is None or version is None: - modules_plat_ver: List[FileDataEntryPlatformVersion] = [] + modules_plat_ver: list[FileDataEntryPlatformVersion] = [] platforms = valid_platforms if platform is None else [platform] versions = valid_versions if version is None else [version] combinations = [(p, v) for p in platforms for v in versions] @@ -105,7 +105,7 @@ def status( elif json: print_json(app, modules_plat_ver) else: - modules: List[FileDataEntry] = [] + modules: list[FileDataEntry] = [] multiple_plat_and_ver: Literal[False] = False base_parameters: Parameters = { "app": app, @@ -139,18 +139,18 @@ def status_mode( repo_path: Path, params: Parameters, multiple_plats_and_vers: Literal[True], -) -> List[FileDataEntryPlatformVersion]: ... +) -> list[FileDataEntryPlatformVersion]: ... @overload def status_mode( repo_path: Path, params: Parameters, multiple_plats_and_vers: Literal[False], -) -> List[FileDataEntry]: ... +) -> list[FileDataEntry]: ... def status_mode( repo_path: Path, params: Parameters, multiple_plats_and_vers: bool, -) -> List[FileDataEntryPlatformVersion] | List[FileDataEntry]: +) -> list[FileDataEntryPlatformVersion] | list[FileDataEntry]: with console.status("[cyan]Calculating sizes...", spinner="dots"): modules = get_files(repo_path, params["compressed"]) + get_dependencies( repo_path, params["platform"], params["version"], params["compressed"] diff --git a/ddev/src/ddev/cli/size/timeline.py b/ddev/src/ddev/cli/size/timeline.py index be190f880b1d9..cbaa50d74d0b9 100644 --- a/ddev/src/ddev/cli/size/timeline.py +++ b/ddev/src/ddev/cli/size/timeline.py @@ -4,7 +4,7 @@ import zipfile from datetime import date, datetime from pathlib import Path -from typing import List, Literal, Optional, Set, Tuple, overload +from typing import Literal, Optional, overload import click import matplotlib.pyplot as plt @@ -38,6 +38,7 @@ MINIMUM_DATE_DEPENDENCIES = datetime.strptime( "Apr 3 2024", "%b %d %Y" ).date() # Dependencies not available before this date due to a storage change +MINIMUM_LENGTH_COMMIT = 7 console = Console(stderr=True) @@ -101,12 +102,21 @@ def timeline( module = name # module is the name of the integration or the dependency if sum([csv, markdown, json]) > 1: raise click.BadParameter("Only one output format can be selected: --csv, --markdown, or --json") - elif initial_commit and final_commit and len(initial_commit) < 7 and len(final_commit) < 7: - raise click.BadParameter("Commit hashes must be at least 7 characters long") - elif initial_commit and len(initial_commit) < 7: - raise click.BadParameter("Initial commit hash must be at least 7 characters long.", param_hint="initial") - elif final_commit and len(final_commit) < 7: - raise click.BadParameter("Final commit hash must be at least 7 characters long.", param_hint="final") + elif ( + initial_commit + and final_commit + and len(initial_commit) < MINIMUM_LENGTH_COMMIT + and len(final_commit) < MINIMUM_LENGTH_COMMIT + ): + raise click.BadParameter(f"Commit hashes must be at least {MINIMUM_LENGTH_COMMIT} characters long") + elif initial_commit and len(initial_commit) < MINIMUM_LENGTH_COMMIT: + raise click.BadParameter( + f"Initial commit hash must be at least {MINIMUM_LENGTH_COMMIT} characters long.", param_hint="initial" + ) + elif final_commit and len(final_commit) < MINIMUM_LENGTH_COMMIT: + raise click.BadParameter( + f"Final commit hash must be at least {MINIMUM_LENGTH_COMMIT} characters long.", param_hint="final" + ) elif final_commit and initial_commit and final_commit == initial_commit: raise click.BadParameter("Commit hashes must be different") task = progress.add_task("[cyan]Calculating timeline...", total=None) @@ -125,7 +135,10 @@ def timeline( folder = module if type == "integration" else ".deps/resolved" commits = gitRepo.get_module_commits(folder, initial_commit, final_commit, time) first_commit = gitRepo.get_creation_commit_module(module) - gitRepo.checkout_commit(commits[-1]) + if final_commit and commits == []: + gitRepo.checkout_commit(final_commit) + elif commits != []: + gitRepo.checkout_commit(commits[-1]) if type == "dependency": valid_platforms = get_valid_platforms(gitRepo.repo_dir) if platform and platform not in valid_platforms: @@ -158,7 +171,7 @@ def timeline( app.display_error(f"No changes found for {type}: {module}") return if type == "dependency": - modules_plat: List[CommitEntryPlatformWithDelta] = [] + modules_plat: list[CommitEntryPlatformWithDelta] = [] multiple_plats_and_vers: Literal[True] = True progress.remove_task(task) dep_parameters: ParametersTimelineDependency @@ -182,6 +195,7 @@ def timeline( "show_gui": show_gui, "first_commit": None, } + modules_plat.extend( timeline_mode( gitRepo, @@ -191,6 +205,7 @@ def timeline( progress, ) ) + else: dep_parameters = { "app": app, @@ -215,12 +230,13 @@ def timeline( progress, ) ) + if csv: print_csv(app, modules_plat) elif json: print_json(app, modules_plat) else: - modules: List[CommitEntryWithDelta] = [] + modules: list[CommitEntryWithDelta] = [] multiple_plat_and_ver: Literal[False] = False int_parameters: ParametersTimelineIntegration = { "app": app, @@ -259,40 +275,40 @@ def timeline( @overload def timeline_mode( gitRepo: GitRepo, - commits: List[str], + commits: list[str], params: ParametersTimelineDependency, multiple_plats_and_vers: Literal[True], progress: Progress, -) -> List[CommitEntryPlatformWithDelta]: ... +) -> list[CommitEntryPlatformWithDelta]: ... @overload def timeline_mode( gitRepo: GitRepo, - commits: List[str], + commits: list[str], params: ParametersTimelineIntegration, multiple_plats_and_vers: Literal[False], progress: Progress, -) -> List[CommitEntryWithDelta]: ... +) -> list[CommitEntryWithDelta]: ... @overload def timeline_mode( gitRepo: GitRepo, - commits: List[str], + commits: list[str], params: ParametersTimelineDependency, multiple_plats_and_vers: Literal[False], progress: Progress, -) -> List[CommitEntryWithDelta]: ... +) -> list[CommitEntryWithDelta]: ... def timeline_mode( gitRepo: GitRepo, - commits: List[str], + commits: list[str], params: ParametersTimelineIntegration | ParametersTimelineDependency, multiple_plats_and_vers: bool, progress: Progress, -) -> List[CommitEntryWithDelta] | List[CommitEntryPlatformWithDelta]: +) -> list[CommitEntryWithDelta] | list[CommitEntryPlatformWithDelta]: if params["type"] == "integration": modules = get_repo_info( gitRepo, @@ -307,7 +323,6 @@ def timeline_mode( commits, progress, ) - trimmed_modules = trim_modules(modules, params["threshold"]) formatted_modules = format_modules(trimmed_modules, params["platform"], multiple_plats_and_vers) @@ -328,26 +343,26 @@ def timeline_mode( def get_repo_info( gitRepo: GitRepo, params: ParametersTimelineIntegration, - commits: List[str], + commits: list[str], progress: Progress, -) -> List[CommitEntry]: ... +) -> list[CommitEntry]: ... @overload def get_repo_info( gitRepo: GitRepo, params: ParametersTimelineDependency, - commits: List[str], + commits: list[str], progress: Progress, -) -> List[CommitEntry]: ... +) -> list[CommitEntry]: ... def get_repo_info( gitRepo: GitRepo, params: ParametersTimelineIntegration | ParametersTimelineDependency, - commits: List[str], + commits: list[str], progress: Progress, -) -> List[CommitEntry]: +) -> list[CommitEntry]: """ Retrieves size and metadata info for a module across multiple commits. @@ -366,37 +381,36 @@ def get_repo_info( file_data = process_commits(commits, params, gitRepo, progress, params["first_commit"]) else: file_data = process_commits(commits, params, gitRepo, progress, params["first_commit"]) - return file_data @overload def process_commits( - commits: List[str], + commits: list[str], params: ParametersTimelineIntegration, gitRepo: GitRepo, progress: Progress, first_commit: str, -) -> List[CommitEntry]: ... +) -> list[CommitEntry]: ... @overload def process_commits( - commits: List[str], + commits: list[str], params: ParametersTimelineDependency, gitRepo: GitRepo, progress: Progress, first_commit: None, -) -> List[CommitEntry]: ... +) -> list[CommitEntry]: ... def process_commits( - commits: List[str], + commits: list[str], params: ParametersTimelineIntegration | ParametersTimelineDependency, gitRepo: GitRepo, progress: Progress, first_commit: Optional[str], -) -> List[CommitEntry]: +) -> list[CommitEntry]: """ Processes a list of commits for a given integration or dependency. @@ -413,7 +427,7 @@ def process_commits( Returns: A list of CommitEntry objects with commit metadata and size information. """ - file_data: List[CommitEntry] = [] + file_data: list[CommitEntry] = [] task = progress.add_task("[cyan]Processing commits...", total=len(commits)) repo = gitRepo.repo_dir @@ -423,7 +437,6 @@ def process_commits( gitRepo.sparse_checkout_commit(commit, folder) date_str, author, message = gitRepo.get_commit_metadata(commit) date, message, commit = format_commit_data(date_str, message, commit, first_commit) - if params["type"] == "dependency" and date > MINIMUM_DATE_DEPENDENCIES: assert params["platform"] is not None result = get_dependencies( @@ -438,7 +451,6 @@ def process_commits( ) if result: file_data.append(result) - elif params["type"] == "integration": file_data = get_files( repo, @@ -464,9 +476,9 @@ def get_files( date: date, author: str, message: str, - file_data: List[CommitEntry], + file_data: list[CommitEntry], compressed: bool, -) -> List[CommitEntry]: +) -> list[CommitEntry]: """ Calculates integration file sizes and versions from a repository. @@ -576,7 +588,7 @@ def get_dependencies( return None -def get_dependency_data(file_path: str, module: str) -> Tuple[Optional[str], Optional[str]]: +def get_dependency_data(file_path: str, module: str) -> tuple[Optional[str], Optional[str]]: """ Parses a dependency file and extracts the dependency name, download URL, and version. @@ -658,7 +670,7 @@ def get_dependency_size( return commit_entry -def get_version(files: List[str], platform: str) -> str: +def get_version(files: list[str], platform: str) -> str: """ Returns the latest Python version for the given target platform based on .deps/resolved filenames. @@ -682,10 +694,10 @@ def get_version(files: List[str], platform: str) -> str: def format_modules( - modules: List[CommitEntryWithDelta], + modules: list[CommitEntryWithDelta], platform: Optional[str], multiple_plats_and_vers: bool, -) -> List[CommitEntryWithDelta] | List[CommitEntryPlatformWithDelta]: +) -> list[CommitEntryWithDelta] | list[CommitEntryPlatformWithDelta]: """ Formats the modules list, adding platform and Python version information if needed. @@ -727,16 +739,16 @@ def format_modules( } return [empty_module] elif multiple_plats_and_vers and platform: - new_modules: List[CommitEntryPlatformWithDelta] = [{**entry, "Platform": platform} for entry in modules] + new_modules: list[CommitEntryPlatformWithDelta] = [{**entry, "Platform": platform} for entry in modules] return new_modules else: return modules def trim_modules( - modules: List[CommitEntry], + modules: list[CommitEntry], threshold: Optional[int] = None, -) -> List[CommitEntryWithDelta]: +) -> list[CommitEntryWithDelta]: """ Filters a list of commit entries, keeping only those with significant size changes. @@ -752,12 +764,12 @@ def trim_modules( - Marks version transitions as 'X -> Y' when the version changes. """ if modules == []: - empty_modules: List[CommitEntryWithDelta] = [] + empty_modules: list[CommitEntryWithDelta] = [] return empty_modules threshold = threshold or 0 - trimmed_modules: List[CommitEntryWithDelta] = [] + trimmed_modules: list[CommitEntryWithDelta] = [] first: CommitEntryWithDelta = { **modules[0], @@ -790,7 +802,7 @@ def trim_modules( return trimmed_modules -def format_commit_data(date_str: str, message: str, commit: str, first_commit: Optional[str]) -> Tuple[date, str, str]: +def format_commit_data(date_str: str, message: str, commit: str, first_commit: Optional[str]) -> tuple[date, str, str]: """ Formats commit metadata by shortening the message, marking the first commit, and parsing the date. Args: @@ -803,13 +815,20 @@ def format_commit_data(date_str: str, message: str, commit: str, first_commit: O A tuple containing: - Parsed date object, - Shortened and possibly annotated message, - - Shortened commit SHA (first 7 characters). + - Shortened commit SHA . """ if commit == first_commit: message = "(NEW) " + message - message = message if len(message) <= 35 else message[:30].rsplit(" ", 1)[0] + "..." + message.split()[-1] + # Truncates the commit message if it's too long, keeping the first words and the PR number within the allowed length + MAX_LENGTH_COMMIT = 45 + PR_NUMBER_LENGTH = 8 + message = ( + message + if len(message) <= MAX_LENGTH_COMMIT + else message[: MAX_LENGTH_COMMIT - PR_NUMBER_LENGTH - 3].rsplit(" ", 1)[0] + "..." + message.split()[-1] + ) date = datetime.strptime(date_str, "%b %d %Y").date() - return date, message, commit[:7] + return date, message, commit[:MINIMUM_LENGTH_COMMIT] def module_exists(path: str, module: str) -> bool: @@ -819,7 +838,7 @@ def module_exists(path: str, module: str) -> bool: return os.path.exists(os.path.join(path, module)) -def get_dependency_list(path: str, platforms: Set[str]) -> Set[str]: +def get_dependency_list(path: str, platforms: set[str]) -> set[str]: """ Returns the set of dependencies from the .deps/resolved folder for the latest version of the given platform. """ @@ -839,7 +858,7 @@ def get_dependency_list(path: str, platforms: Set[str]) -> Set[str]: def plot_linegraph( - modules: List[CommitEntryWithDelta] | List[CommitEntryPlatformWithDelta], + modules: list[CommitEntryWithDelta] | list[CommitEntryPlatformWithDelta], module: str, platform: Optional[str], show: bool, diff --git a/ddev/tests/cli/size/test_diff.py b/ddev/tests/cli/size/test_diff.py index 1ce2d1c1db1c1..bd5db6def4c54 100644 --- a/ddev/tests/cli/size/test_diff.py +++ b/ddev/tests/cli/size/test_diff.py @@ -58,31 +58,67 @@ def get_compressed_dependencies_side_effect(_, __, ___, ____): patch("ddev.cli.size.diff.get_files", side_effect=get_compressed_files_side_effect), patch("ddev.cli.size.diff.get_dependencies", side_effect=get_compressed_dependencies_side_effect), patch("ddev.cli.size.diff.format_modules", side_effect=lambda m, *_: m), - patch("ddev.cli.size.common.print_csv"), - patch("ddev.cli.size.common.print_table"), - patch("ddev.cli.size.common.plot_treemap"), + patch("matplotlib.pyplot.show"), + patch("matplotlib.pyplot.savefig"), ): yield def test_diff_no_args(ddev, mock_size_diff_dependencies): - result = ddev("size", "diff", "commit1", "commit2", "--compressed") - assert result.exit_code == 0 + assert ddev("size", "diff", "commit1", "commit2").exit_code == 0 + assert ddev("size", "diff", "commit1", "commit2", "--compressed").exit_code == 0 + assert ddev("size", "diff", "commit1", "commit2", "--csv").exit_code == 0 + assert ddev("size", "diff", "commit1", "commit2", "--markdown").exit_code == 0 + assert ddev("size", "diff", "commit1", "commit2", "--json").exit_code == 0 + assert ddev("size", "diff", "commit1", "commit2", "--save_to_png_path", "out.png").exit_code == 0 + assert ddev("size", "diff", "commit1", "commit2", "--show_gui").exit_code == 0 def test_diff_with_platform_and_version(ddev, mock_size_diff_dependencies): - result = ddev( - "size", "diff", "commit1", "commit2", "--platform", "linux-aarch64", "--python", "3.12", "--compressed" + assert ddev("size", "diff", "commit1", "commit2", "--platform", "linux-aarch64", "--python", "3.12").exit_code == 0 + assert ( + ddev( + "size", "diff", "commit1", "commit2", "--platform", "linux-aarch64", "--python", "3.12", "--compressed" + ).exit_code + == 0 ) - assert result.exit_code == 0 - - -def test_diff_csv(ddev, mock_size_diff_dependencies): - result = ddev( - "size", "diff", "commit1", "commit2", "--platform", "linux-aarch64", "--python", "3.12", "--compressed", "--csv" + assert ( + ddev("size", "diff", "commit1", "commit2", "--platform", "linux-aarch64", "--python", "3.12", "--csv").exit_code + == 0 + ) + assert ( + ddev( + "size", "diff", "commit1", "commit2", "--platform", "linux-aarch64", "--python", "3.12", "--markdown" + ).exit_code + == 0 + ) + assert ( + ddev( + "size", "diff", "commit1", "commit2", "--platform", "linux-aarch64", "--python", "3.12", "--json" + ).exit_code + == 0 + ) + assert ( + ddev( + "size", + "diff", + "commit1", + "commit2", + "--platform", + "linux-aarch64", + "--python", + "3.12", + "--save_to_png_path", + "out.png", + ).exit_code + == 0 + ) + assert ( + ddev( + "size", "diff", "commit1", "commit2", "--platform", "linux-aarch64", "--python", "3.12", "--show_gui" + ).exit_code + == 0 ) - print(result.output) - assert result.exit_code == 0 def test_diff_no_differences(ddev): @@ -121,15 +157,24 @@ def test_diff_no_differences(ddev): {"Name": "dep2.whl", "Version": "2.0.0", "Size_Bytes": 1000}, ], ), + patch("matplotlib.pyplot.show"), + patch("matplotlib.pyplot.savefig"), ): result = ddev( "size", "diff", "commit1", "commit2", "--platform", "linux-aarch64", "--python", "3.12", "--compressed" ) - print(result.output) assert result.exit_code == 0, result.output assert "No size differences were detected" in result.output + assert ddev("size", "diff", "commit1", "commit2").exit_code == 0 + assert ddev("size", "diff", "commit1", "commit2", "--compressed").exit_code == 0 + assert ddev("size", "diff", "commit1", "commit2", "--csv").exit_code == 0 + assert ddev("size", "diff", "commit1", "commit2", "--markdown").exit_code == 0 + assert ddev("size", "diff", "commit1", "commit2", "--json").exit_code == 0 + assert ddev("size", "diff", "commit1", "commit2", "--save_to_png_path", "out.png").exit_code == 0 + assert ddev("size", "diff", "commit1", "commit2", "--show_gui").exit_code == 0 + def test_diff_invalid_platform(ddev): mock_git_repo = MagicMock() diff --git a/ddev/tests/cli/size/test_status.py b/ddev/tests/cli/size/test_status.py index 69656f2c46fbb..d60b09170bbef 100644 --- a/ddev/tests/cli/size/test_status.py +++ b/ddev/tests/cli/size/test_status.py @@ -62,25 +62,32 @@ def mock_size_status(): patch("os.walk", return_value=mock_walk), patch("os.listdir", return_value=["fake_dep.whl"]), patch("os.path.isfile", return_value=True), + patch("matplotlib.pyplot.show"), + patch("matplotlib.pyplot.savefig"), ): yield mock_app def test_status_no_args(ddev, mock_size_status): - result = ddev("size", "status", "--compressed") - assert result.exit_code == 0 + assert ddev("size", "status").exit_code == 0 + assert ddev("size", "status", "--compressed").exit_code == 0 + assert ddev("size", "status", "--csv").exit_code == 0 + assert ddev("size", "status", "--markdown").exit_code == 0 + assert ddev("size", "status", "--json").exit_code == 0 + assert ddev("size", "status", "--save_to_png_path", "out.png").exit_code == 0 + assert ddev("size", "status", "--show_gui").exit_code == 0 def test_status(ddev, mock_size_status): - result = ddev("size", "status", "--platform", "linux-aarch64", "--python", "3.12", "--compressed") - print(result.output) - assert result.exit_code == 0 - - -def test_status_csv(ddev, mock_size_status): - result = ddev("size", "status", "--platform", "linux-aarch64", "--python", "3.12", "--compressed", "--csv") - print(result.output) - assert result.exit_code == 0 + assert (ddev("size", "status", "--platform", "linux-aarch64", "--python", "3.12")).exit_code == 0 + assert (ddev("size", "status", "--platform", "linux-aarch64", "--python", "3.12", "--compressed")).exit_code == 0 + assert (ddev("size", "status", "--platform", "linux-aarch64", "--python", "3.12", "--csv")).exit_code == 0 + assert (ddev("size", "status", "--platform", "linux-aarch64", "--python", "3.12", "--markdown")).exit_code == 0 + assert (ddev("size", "status", "--platform", "linux-aarch64", "--python", "3.12", "--json")).exit_code == 0 + assert ( + ddev("size", "status", "--platform", "linux-aarch64", "--python", "3.12", "--save_to_png_path", "out.png") + ).exit_code == 0 + assert (ddev("size", "status", "--platform", "linux-aarch64", "--python", "3.12", "--show_gui")).exit_code == 0 def test_status_wrong_platform(ddev): diff --git a/ddev/tests/cli/size/test_timeline.py b/ddev/tests/cli/size/test_timeline.py index 436547cf9ce86..c04b49c772777 100644 --- a/ddev/tests/cli/size/test_timeline.py +++ b/ddev/tests/cli/size/test_timeline.py @@ -1,3 +1,4 @@ +from datetime import date from pathlib import Path from unittest.mock import MagicMock, patch @@ -5,7 +6,7 @@ @pytest.fixture -def mock_timeline_gitrepo(): +def mock_timeline(): mock_git_repo = MagicMock() mock_git_repo.repo_dir = "fake_repo" mock_git_repo.get_module_commits.return_value = ["commit1", "commit2"] @@ -22,13 +23,12 @@ def mock_timeline_gitrepo(): patch("os.path.exists", return_value=True), patch("ddev.cli.size.timeline.format_modules", side_effect=lambda m, *_: m), patch("ddev.cli.size.timeline.trim_modules", side_effect=lambda m, *_: m), - patch("ddev.cli.size.timeline.print_table"), - patch("ddev.cli.size.timeline.print_csv"), - patch("ddev.cli.size.timeline.plot_linegraph"), patch( "ddev.cli.size.timeline.get_valid_platforms", return_value=({'linux-x86_64', 'macos-x86_64', 'linux-aarch64', 'windows-x86_64'}), ), + patch("matplotlib.pyplot.show"), + patch("matplotlib.pyplot.savefig"), ): yield @@ -40,10 +40,22 @@ def app(): return mock_app -def test_timeline_integration_compressed(ddev, mock_timeline_gitrepo, app): - result = ddev("size", "timeline", "integration", "int1", "commit1", "commit2", "--compressed", obj=app) - print(result.output) - assert result.exit_code == 0 +def test_timeline_integration_compressed(ddev, mock_timeline, app): + assert ddev("size", "timeline", "integration", "int1", "commit1", "commit2", "--compressed", obj=app).exit_code == 0 + assert ddev("size", "timeline", "integration", "int1", "commit1", "commit2", "--csv", obj=app).exit_code == 0 + assert ddev("size", "timeline", "integration", "int1", "commit1", "commit2", "--markdown", obj=app).exit_code == 0 + assert ddev("size", "timeline", "integration", "int1", "commit1", "commit2", "--json", obj=app).exit_code == 0 + assert ( + ddev( + "size", "timeline", "integration", "int1", "commit1", "commit2", "--save_to_png_path", "out.png", obj=app + ).exit_code + == 0 + ) + assert ddev("size", "timeline", "integration", "int1", "commit1", "commit2", "--show_gui", obj=app).exit_code == 0 + assert ( + ddev("size", "timeline", "integration", "int1", "commit1", "commit2", "--threshold", "1000", obj=app).exit_code + == 0 + ) @pytest.fixture @@ -52,7 +64,7 @@ def mock_timeline_dependencies(): mock_git_repo.repo_dir = "fake_repo" mock_git_repo.get_module_commits.return_value = ["commit1", "commit2"] mock_git_repo.get_commit_metadata.side_effect = lambda c: ("Apr 4 2025", "Fix dep", c) - + mock_git_repo.get_creation_commit_module.side_effect = "initial_commit" with ( patch("ddev.cli.size.timeline.GitRepo.__enter__", return_value=mock_git_repo), patch("ddev.cli.size.timeline.GitRepo.__exit__", return_value=None), @@ -67,36 +79,131 @@ def mock_timeline_dependencies(): patch("os.listdir", return_value=["linux-x86_64-3.12"]), patch("os.path.isfile", return_value=True), patch("ddev.cli.size.timeline.get_gitignore_files", return_value=set()), - patch("ddev.cli.size.timeline.get_dependency_data", return_value=("https://example.com/dep1.whl", '1.1.1)')), - patch("ddev.cli.size.timeline.requests.head") as mock_head, + patch( + "ddev.cli.size.timeline.get_dependencies", + return_value={ + "Size_Bytes": 12345, + "Version": "1.2.3", + "Date": date(2025, 4, 4), + "Author": "Mock User", + "Commit_Message": "Mock commit message", + "Commit_SHA": "abcdef123456", + }, + ), patch("ddev.cli.size.timeline.format_modules", side_effect=lambda m, *_: m), patch("ddev.cli.size.timeline.trim_modules", side_effect=lambda m, *_: m), - patch("ddev.cli.size.timeline.print_table"), - patch("ddev.cli.size.timeline.plot_linegraph"), + patch("matplotlib.pyplot.show"), + patch("matplotlib.pyplot.savefig"), ): - mock_response = MagicMock() - mock_response.headers = {"Content-Length": "1024"} - mock_response.raise_for_status = lambda: None - mock_head.return_value = mock_response yield -def test_timeline_dependency_compressed(ddev, mock_timeline_dependencies, app): - result = ddev( - "size", - "timeline", - "dependency", - "dep1", - "commit1", - "commit2", - "--compressed", - "--platform", - "linux-x86_64", - obj=app, +def test_timeline_dependency(ddev, mock_timeline_dependencies, app): + assert ( + ddev( + "size", "timeline", "dependency", "dep1", "commit1", "commit2", "--platform", "linux-x86_64", obj=app + ).exit_code + == 0 + ) + assert ddev("size", "timeline", "dependency", "dep1", "commit1", "commit2", obj=app).exit_code == 0 + assert ( + ddev( + "size", + "timeline", + "dependency", + "dep1", + "commit1", + "commit2", + "--platform", + "linux-x86_64", + "--compressed", + obj=app, + ).exit_code + == 0 + ) + assert ( + ddev( + "size", + "timeline", + "dependency", + "dep1", + "commit1", + "commit2", + "--platform", + "linux-x86_64", + "--csv", + obj=app, + ).exit_code + == 0 + ) + assert ( + ddev( + "size", + "timeline", + "dependency", + "dep1", + "commit1", + "commit2", + "--platform", + "linux-x86_64", + "--markdown", + obj=app, + ).exit_code + == 0 + ) + assert ( + ddev( + "size", + "timeline", + "dependency", + "dep1", + "commit1", + "commit2", + "--platform", + "linux-x86_64", + "--json", + obj=app, + ).exit_code + == 0 + ) + assert ( + ddev( + "size", + "timeline", + "dependency", + "dep1", + "commit1", + "commit2", + "--platform", + "linux-x86_64", + "--save_to_png_path", + "out.png", + obj=app, + ).exit_code + == 0 + ) + assert ( + ddev( + "size", + "timeline", + "dependency", + "dep1", + "commit1", + "commit2", + "--platform", + "linux-x86_64", + "--show_gui", + obj=app, + ).exit_code + == 0 + ) + assert ( + ddev( + "size", "timeline", "dependency", "dep1", "--platform", "linux-x86_64", "--threshold", "1000", obj=app + ).exit_code + == 0 ) - - assert result.exit_code == 0 def test_timeline_invalid_platform(ddev): @@ -146,9 +253,59 @@ def test_timeline_integration_no_changes(ddev): return_value=({'linux-x86_64', 'macos-x86_64', 'linux-aarch64', 'windows-x86_64'}), ), ): - result = ddev("size", "timeline", "integration", "int1", "commit1", "commit2", "--compressed") - assert result.exit_code == 0 - assert "No changes found" in result.output + assert ( + "No changes found" + in (result := ddev("size", "timeline", "integration", "int1", "commit1", "commit2")).output + and result.exit_code == 0 + ) + assert ( + "No changes found" + in (result := ddev("size", "timeline", "integration", "int1", "commit1", "commit2", "--compressed")).output + and result.exit_code == 0 + ) + assert ( + "No changes found" + in (result := ddev("size", "timeline", "integration", "int1", "commit1", "commit2", "--csv")).output + and result.exit_code == 0 + ) + assert ( + "No changes found" + in (result := ddev("size", "timeline", "integration", "int1", "commit1", "commit2", "--markdown")).output + and result.exit_code == 0 + ) + assert ( + "No changes found" + in (result := ddev("size", "timeline", "integration", "int1", "commit1", "commit2", "--json")).output + and result.exit_code == 0 + ) + assert ( + "No changes found" + in ( + result := ddev( + "size", "timeline", "integration", "int1", "commit1", "commit2", "--save_to_png_path", "out.png" + ) + ).output + and result.exit_code == 0 + ) + assert ( + "No changes found" + in (result := ddev("size", "timeline", "integration", "int1", "commit1", "commit2", "--show_gui")).output + and result.exit_code == 0 + ) + assert ( + "No changes found" + in ( + result := ddev("size", "timeline", "integration", "int1", "commit1", "commit2", "--time", "2025-04-01") + ).output + and result.exit_code == 0 + ) + assert ( + "No changes found" + in ( + result := ddev("size", "timeline", "integration", "int1", "commit1", "commit2", "--threshold", "1000") + ).output + and result.exit_code == 0 + ) def test_timeline_integration_not_found(ddev): @@ -167,6 +324,8 @@ def test_timeline_integration_not_found(ddev): return_value=({'linux-x86_64', 'macos-x86_64', 'linux-aarch64', 'windows-x86_64'}), ), patch("ddev.cli.size.timeline.module_exists", return_value=False), + patch("matplotlib.pyplot.show"), + patch("matplotlib.pyplot.savefig"), ): result = ddev("size", "timeline", "integration", "missing_module", "c123456", "c2345667") assert result.exit_code != 0 diff --git a/ddev/tests/size/test_timeline.py b/ddev/tests/size/test_timeline.py index 0942efc3bb393..ea6f06a363514 100644 --- a/ddev/tests/size/test_timeline.py +++ b/ddev/tests/size/test_timeline.py @@ -77,7 +77,7 @@ def test_format_commit_data(): "Apr 4 2025", "this is a very long commit message that should be trimmed (#1234)", "abc1234def", "abc1234def" ) expected_date = datetime.strptime("Apr 4 2025", "%b %d %Y").date() - expected_message = "(NEW) this is a very long...(#1234)" + expected_message = "(NEW) this is a very long commit...(#1234)" expected_commit = "abc1234" assert date == expected_date assert message == expected_message From 40a0f8c8d2ec79b14cf03a48bde545351052d6da Mon Sep 17 00:00:00 2001 From: Lucia Sanchez Bella Date: Mon, 12 May 2025 09:25:18 +0200 Subject: [PATCH 42/70] fix ddev windows --- ddev/src/ddev/cli/size/common.py | 17 ++++++++--------- ddev/tests/cli/size/test_timeline.py | 5 +++-- 2 files changed, 11 insertions(+), 11 deletions(-) diff --git a/ddev/src/ddev/cli/size/common.py b/ddev/src/ddev/cli/size/common.py index 4f5994839bd24..32bd85fa94d94 100644 --- a/ddev/src/ddev/cli/size/common.py +++ b/ddev/src/ddev/cli/size/common.py @@ -14,7 +14,6 @@ from types import TracebackType from typing import Literal, Optional, Type, TypedDict -import matplotlib.cm as cm import matplotlib.pyplot as plt import requests import squarify @@ -511,8 +510,8 @@ def plot_status_treemap( rects = squarify.squarify(norm_sizes, 0, 0, 100, 100) # Define the colors for each type - cmap_int = cm.get_cmap("Purples") - cmap_dep = cm.get_cmap("Reds") + cmap_int = plt.get_cmap("Purples") + cmap_dep = plt.get_cmap("Reds") # Assign colors based on type and normalized size colors = [] @@ -527,8 +526,8 @@ def plot_status_treemap( colors.append("#999999") # Define the legend legend_handles = [ - Patch(color=cm.get_cmap("Purples")(0.6), label="Integration"), - Patch(color=cm.get_cmap("Reds")(0.6), label="Dependency"), + Patch(color=plt.get_cmap("Purples")(0.6), label="Integration"), + Patch(color=plt.get_cmap("Reds")(0.6), label="Dependency"), ] return rects, colors, legend_handles @@ -537,8 +536,8 @@ def plot_diff_treemap( modules: list[FileDataEntry] | list[FileDataEntryPlatformVersion], ) -> tuple[list[dict[str, float]], list[tuple[float, float, float, float]], list[Patch]]: # Define the colors for each type - cmap_pos = cm.get_cmap("Oranges") - cmap_neg = cm.get_cmap("Blues") + cmap_pos = plt.get_cmap("Oranges") + cmap_neg = plt.get_cmap("Blues") # Separate in negative and positive differences positives = [mod for mod in modules if mod["Size_Bytes"] > 0] @@ -582,8 +581,8 @@ def plot_diff_treemap( colors.append(cmap_pos(intensity)) legend_handles = [ - Patch(color=cm.get_cmap("Oranges")(0.7), label="Increase"), - Patch(color=cm.get_cmap("Blues")(0.7), label="Decrease"), + Patch(color=plt.get_cmap("Oranges")(0.7), label="Increase"), + Patch(color=plt.get_cmap("Blues")(0.7), label="Decrease"), ] return rects, colors, legend_handles diff --git a/ddev/tests/cli/size/test_timeline.py b/ddev/tests/cli/size/test_timeline.py index c04b49c772777..af7be61ce12c2 100644 --- a/ddev/tests/cli/size/test_timeline.py +++ b/ddev/tests/cli/size/test_timeline.py @@ -92,8 +92,9 @@ def mock_timeline_dependencies(): ), patch("ddev.cli.size.timeline.format_modules", side_effect=lambda m, *_: m), patch("ddev.cli.size.timeline.trim_modules", side_effect=lambda m, *_: m), - patch("matplotlib.pyplot.show"), - patch("matplotlib.pyplot.savefig"), + patch("ddev.cli.size.timeline.plt.show"), + patch("ddev.cli.size.timeline.plt.savefig") +, ): yield From e941a1ab93966685468cb3b8d142889a0fc09084 Mon Sep 17 00:00:00 2001 From: Lucia Sanchez Bella Date: Mon, 12 May 2025 09:39:57 +0200 Subject: [PATCH 43/70] fix lint --- ddev/tests/cli/size/test_timeline.py | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/ddev/tests/cli/size/test_timeline.py b/ddev/tests/cli/size/test_timeline.py index af7be61ce12c2..76928f37a08d9 100644 --- a/ddev/tests/cli/size/test_timeline.py +++ b/ddev/tests/cli/size/test_timeline.py @@ -93,8 +93,7 @@ def mock_timeline_dependencies(): patch("ddev.cli.size.timeline.format_modules", side_effect=lambda m, *_: m), patch("ddev.cli.size.timeline.trim_modules", side_effect=lambda m, *_: m), patch("ddev.cli.size.timeline.plt.show"), - patch("ddev.cli.size.timeline.plt.savefig") -, + patch("ddev.cli.size.timeline.plt.savefig"), ): yield From 5cd0a35dc2b771dac36045b43dfffeaad37211f6 Mon Sep 17 00:00:00 2001 From: Lucia Sanchez Bella Date: Mon, 12 May 2025 10:07:19 +0200 Subject: [PATCH 44/70] testing ddev tests on windows --- ddev/src/ddev/cli/size/common.py | 1 - ddev/tests/cli/size/test_timeline.py | 5 +++-- 2 files changed, 3 insertions(+), 3 deletions(-) diff --git a/ddev/src/ddev/cli/size/common.py b/ddev/src/ddev/cli/size/common.py index 32bd85fa94d94..f13ab5d141bec 100644 --- a/ddev/src/ddev/cli/size/common.py +++ b/ddev/src/ddev/cli/size/common.py @@ -588,7 +588,6 @@ def plot_diff_treemap( return rects, colors, legend_handles -# Map normalized values to color intensity def scale_colors_treemap(area: float, max_area: float) -> float: vmin = 0.3 vmax = 0.65 diff --git a/ddev/tests/cli/size/test_timeline.py b/ddev/tests/cli/size/test_timeline.py index 76928f37a08d9..26e4eeb40e2af 100644 --- a/ddev/tests/cli/size/test_timeline.py +++ b/ddev/tests/cli/size/test_timeline.py @@ -1,3 +1,4 @@ +import os from datetime import date from pathlib import Path from unittest.mock import MagicMock, patch @@ -40,7 +41,7 @@ def app(): return mock_app -def test_timeline_integration_compressed(ddev, mock_timeline, app): +def test_timeline_integration(ddev, mock_timeline, app): assert ddev("size", "timeline", "integration", "int1", "commit1", "commit2", "--compressed", obj=app).exit_code == 0 assert ddev("size", "timeline", "integration", "int1", "commit1", "commit2", "--csv", obj=app).exit_code == 0 assert ddev("size", "timeline", "integration", "int1", "commit1", "commit2", "--markdown", obj=app).exit_code == 0 @@ -178,7 +179,7 @@ def test_timeline_dependency(ddev, mock_timeline_dependencies, app): "--platform", "linux-x86_64", "--save_to_png_path", - "out.png", + os.sep + "out.png", obj=app, ).exit_code == 0 From e3964d7f46be78ba4477d8ea6156cad6488d175d Mon Sep 17 00:00:00 2001 From: Lucia Sanchez Bella Date: Mon, 12 May 2025 11:11:11 +0200 Subject: [PATCH 45/70] test --- ddev/tests/cli/size/test_timeline.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/ddev/tests/cli/size/test_timeline.py b/ddev/tests/cli/size/test_timeline.py index 26e4eeb40e2af..1bacade16885e 100644 --- a/ddev/tests/cli/size/test_timeline.py +++ b/ddev/tests/cli/size/test_timeline.py @@ -179,7 +179,7 @@ def test_timeline_dependency(ddev, mock_timeline_dependencies, app): "--platform", "linux-x86_64", "--save_to_png_path", - os.sep + "out.png", + "." + os.sep + "out.png", obj=app, ).exit_code == 0 From f5c66f8c01f76e5b72c4a24cb9a8db850e8d97a6 Mon Sep 17 00:00:00 2001 From: Lucia Sanchez Bella Date: Mon, 12 May 2025 11:48:05 +0200 Subject: [PATCH 46/70] test --- ddev/tests/cli/size/test_timeline.py | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/ddev/tests/cli/size/test_timeline.py b/ddev/tests/cli/size/test_timeline.py index 1bacade16885e..dca52c1b676d3 100644 --- a/ddev/tests/cli/size/test_timeline.py +++ b/ddev/tests/cli/size/test_timeline.py @@ -93,8 +93,8 @@ def mock_timeline_dependencies(): ), patch("ddev.cli.size.timeline.format_modules", side_effect=lambda m, *_: m), patch("ddev.cli.size.timeline.trim_modules", side_effect=lambda m, *_: m), - patch("ddev.cli.size.timeline.plt.show"), - patch("ddev.cli.size.timeline.plt.savefig"), + patch("matplotlib.pyplot.show"), + patch("matplotlib.pyplot.savefig"), ): yield @@ -179,7 +179,7 @@ def test_timeline_dependency(ddev, mock_timeline_dependencies, app): "--platform", "linux-x86_64", "--save_to_png_path", - "." + os.sep + "out.png", + "out2.png", obj=app, ).exit_code == 0 From 3e5b26b4129bb741e2eb2948eb0b40ed5944b3de Mon Sep 17 00:00:00 2001 From: Lucia Sanchez Bella Date: Mon, 12 May 2025 11:51:12 +0200 Subject: [PATCH 47/70] test --- ddev/tests/cli/size/test_timeline.py | 1 - 1 file changed, 1 deletion(-) diff --git a/ddev/tests/cli/size/test_timeline.py b/ddev/tests/cli/size/test_timeline.py index dca52c1b676d3..59e935e9048e4 100644 --- a/ddev/tests/cli/size/test_timeline.py +++ b/ddev/tests/cli/size/test_timeline.py @@ -1,4 +1,3 @@ -import os from datetime import date from pathlib import Path from unittest.mock import MagicMock, patch From cf51e8ee0c401c5eb4abe0da1748a2a5188ca8a8 Mon Sep 17 00:00:00 2001 From: Lucia Sanchez Bella Date: Mon, 12 May 2025 12:02:31 +0200 Subject: [PATCH 48/70] test --- .github/workflows/measure-disk-usage.yml | 22 ++++++------ ddev/tests/cli/size/test_timeline.py | 46 ++++++++++++++++-------- 2 files changed, 42 insertions(+), 26 deletions(-) diff --git a/.github/workflows/measure-disk-usage.yml b/.github/workflows/measure-disk-usage.yml index 55d332586c5d9..ce6721034f276 100644 --- a/.github/workflows/measure-disk-usage.yml +++ b/.github/workflows/measure-disk-usage.yml @@ -11,11 +11,11 @@ jobs: measure-disk-usage: runs-on: ubuntu-22.04 steps: - - uses: actions/checkout@v4 + - uses: actions/checkout@11bd71901bbe5b1630ceea73d27597364c9af683 # v4.2.2 with: fetch-depth: 0 - name: Set up Python ${{ env.PYTHON_VERSION }} - uses: actions/setup-python@v5 + uses: actions/setup-python@a26af69be951a213d495a4c3e4e4022e16d87065 # v5.6.0 with: python-version: ${{ env.PYTHON_VERSION }} - name: Install ddev @@ -31,7 +31,7 @@ jobs: run: | mkdir -p status_visualizations ddev size status --csv > size-uncompressed.csv - ddev size status --save_to_png_path status_visualizations/uncompressed.png > size-uncompressed.txt + ddev size status --markdown --save_to_png_path status_visualizations/uncompressed.png > size-uncompressed.txt cat size-uncompressed.txt echo "# Size (uncompressed)" >> $GITHUB_STEP_SUMMARY echo '```' >> $GITHUB_STEP_SUMMARY @@ -42,7 +42,7 @@ jobs: run: | mkdir -p status_visualizations ddev size status --csv --compressed > size-compressed.csv - ddev size status --compressed --save_to_png_path status_visualizations/compressed.png > size-compressed.txt + ddev size status --compressed --markdown --save_to_png_path status_visualizations/compressed.png > size-compressed.txt cat size-compressed.txt echo "# Size (compressed)" >> $GITHUB_STEP_SUMMARY echo '```' >> $GITHUB_STEP_SUMMARY @@ -57,7 +57,7 @@ jobs: BEFORE=$(git rev-parse HEAD^) AFTER=$(git rev-parse HEAD) ddev size diff $BEFORE $AFTER --csv > diff-uncompressed.csv - ddev size diff $BEFORE $AFTER --save_to_png_path diff_visualizations/diff-uncompressed-linux.png > diff-uncompressed.txt + ddev size diff $BEFORE $AFTER --markdown --save_to_png_path diff_visualizations/diff-uncompressed-linux.png > diff-uncompressed.txt cat diff-uncompressed.txt echo "# Size diff (uncompressed)" >> $GITHUB_STEP_SUMMARY echo '```' >> $GITHUB_STEP_SUMMARY @@ -71,7 +71,7 @@ jobs: BEFORE=$(git rev-parse HEAD^) AFTER=$(git rev-parse HEAD) ddev size diff $BEFORE $AFTER --compressed --csv > diff-compressed.csv - ddev size diff $BEFORE $AFTER --compressed --save_to_png_path diff_visualizations/diff-compressed-linux.png > diff-compressed.txt + ddev size diff $BEFORE $AFTER --compressed --markdown --save_to_png_path diff_visualizations/diff-compressed-linux.png > diff-compressed.txt cat diff-compressed.txt echo "# Size diff (compressed)" >> $GITHUB_STEP_SUMMARY echo '```' >> $GITHUB_STEP_SUMMARY @@ -79,14 +79,14 @@ jobs: echo '```' >> $GITHUB_STEP_SUMMARY - name: Upload file sizes (uncompressed) - uses: actions/upload-artifact@v4 + uses: actions/upload-artifact@ea165f8d65b6e75b540449e92b4886f43607fa02 # v4.6.2 with: name: size-uncompressed.csv path: size-uncompressed.csv if-no-files-found: error - name: Upload file sizes (compressed) - uses: actions/upload-artifact@v4 + uses: actions/upload-artifact@ea165f8d65b6e75b540449e92b4886f43607fa02 # v4.6.2 with: name: size-compressed.csv path: size-compressed.csv @@ -94,7 +94,7 @@ jobs: - name: Upload file sizes diff (uncompressed) if: false # DDisabled for now: size difference can be misleading due to dependencies not being built in the same PR - uses: actions/upload-artifact@v4 + uses: actions/upload-artifact@ea165f8d65b6e75b540449e92b4886f43607fa02 # v4.6.2 with: name: diff-uncompressed.csv path: diff-uncompressed.csv @@ -102,14 +102,14 @@ jobs: - name: Upload file sizes diff (compressed) if: false # Disabled for now: size difference can be misleading due to dependencies not being built in the same PR - uses: actions/upload-artifact@v4 + uses: actions/upload-artifact@ea165f8d65b6e75b540449e92b4886f43607fa02 # v4.6.2 with: name: diff-compressed.csv path: diff-compressed.csv if-no-files-found: error - name: Upload status PNGs - uses: actions/upload-artifact@v4 + uses: actions/upload-artifact@ea165f8d65b6e75b540449e92b4886f43607fa02 # v4.6.2 with: name: size-visuals path: status_visualizations/ diff --git a/ddev/tests/cli/size/test_timeline.py b/ddev/tests/cli/size/test_timeline.py index 59e935e9048e4..558d4ac36e609 100644 --- a/ddev/tests/cli/size/test_timeline.py +++ b/ddev/tests/cli/size/test_timeline.py @@ -167,22 +167,38 @@ def test_timeline_dependency(ddev, mock_timeline_dependencies, app): ).exit_code == 0 ) - assert ( - ddev( - "size", - "timeline", - "dependency", - "dep1", - "commit1", - "commit2", - "--platform", - "linux-x86_64", - "--save_to_png_path", - "out2.png", - obj=app, - ).exit_code - == 0 + # assert ( + # ddev( + # "size", + # "timeline", + # "dependency", + # "dep1", + # "commit1", + # "commit2", + # "--platform", + # "linux-x86_64", + # "--save_to_png_path", + # "out2.png", + # obj=app, + # ).exit_code + # == 0 + # ) + result = ddev( + "size", + "timeline", + "dependency", + "dep1", + "commit1", + "commit2", + "--platform", + "linux-x86_64", + "--save_to_png_path", + "out.png", + obj=app, ) + print(result.output) + assert result.exit_code == 0 + assert ( ddev( "size", From 06563cd2cc46f288a08dbc77a61c7cdb5f3f17e7 Mon Sep 17 00:00:00 2001 From: Lucia Sanchez Bella Date: Mon, 12 May 2025 12:16:12 +0200 Subject: [PATCH 49/70] test --- ddev/tests/cli/size/test_timeline.py | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) diff --git a/ddev/tests/cli/size/test_timeline.py b/ddev/tests/cli/size/test_timeline.py index 558d4ac36e609..389376db35612 100644 --- a/ddev/tests/cli/size/test_timeline.py +++ b/ddev/tests/cli/size/test_timeline.py @@ -27,8 +27,8 @@ def mock_timeline(): "ddev.cli.size.timeline.get_valid_platforms", return_value=({'linux-x86_64', 'macos-x86_64', 'linux-aarch64', 'windows-x86_64'}), ), - patch("matplotlib.pyplot.show"), - patch("matplotlib.pyplot.savefig"), + patch("ddev.cli.size.timeline.plt.show"), + patch("ddev.cli.size.timeline.plt.savefig"), ): yield @@ -47,7 +47,7 @@ def test_timeline_integration(ddev, mock_timeline, app): assert ddev("size", "timeline", "integration", "int1", "commit1", "commit2", "--json", obj=app).exit_code == 0 assert ( ddev( - "size", "timeline", "integration", "int1", "commit1", "commit2", "--save_to_png_path", "out.png", obj=app + "size", "timeline", "integration", "int1", "commit1", "commit2", "--save_to_png_path", "out_int.png", obj=app ).exit_code == 0 ) @@ -92,8 +92,8 @@ def mock_timeline_dependencies(): ), patch("ddev.cli.size.timeline.format_modules", side_effect=lambda m, *_: m), patch("ddev.cli.size.timeline.trim_modules", side_effect=lambda m, *_: m), - patch("matplotlib.pyplot.show"), - patch("matplotlib.pyplot.savefig"), + patch("ddev.cli.size.timeline.plt.show"), + patch("ddev.cli.size.timeline.plt.savefig"), ): yield @@ -193,7 +193,7 @@ def test_timeline_dependency(ddev, mock_timeline_dependencies, app): "--platform", "linux-x86_64", "--save_to_png_path", - "out.png", + "out_dep.png", obj=app, ) print(result.output) From da01a1ffb69ca500ea892c65a0ddab6dfa64ba58 Mon Sep 17 00:00:00 2001 From: Lucia Sanchez Bella Date: Mon, 12 May 2025 12:19:40 +0200 Subject: [PATCH 50/70] test --- ddev/tests/cli/size/test_timeline.py | 10 +++++++++- 1 file changed, 9 insertions(+), 1 deletion(-) diff --git a/ddev/tests/cli/size/test_timeline.py b/ddev/tests/cli/size/test_timeline.py index 389376db35612..6bec89fffa080 100644 --- a/ddev/tests/cli/size/test_timeline.py +++ b/ddev/tests/cli/size/test_timeline.py @@ -47,7 +47,15 @@ def test_timeline_integration(ddev, mock_timeline, app): assert ddev("size", "timeline", "integration", "int1", "commit1", "commit2", "--json", obj=app).exit_code == 0 assert ( ddev( - "size", "timeline", "integration", "int1", "commit1", "commit2", "--save_to_png_path", "out_int.png", obj=app + "size", + "timeline", + "integration", + "int1", + "commit1", + "commit2", + "--save_to_png_path", + "out_int.png", + obj=app, ).exit_code == 0 ) From 6a8df2259827f9b60ffe6f8d160f72258259d899 Mon Sep 17 00:00:00 2001 From: Lucia Sanchez Bella Date: Mon, 12 May 2025 12:37:35 +0200 Subject: [PATCH 51/70] test --- ddev/tests/cli/size/test_timeline.py | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/ddev/tests/cli/size/test_timeline.py b/ddev/tests/cli/size/test_timeline.py index 6bec89fffa080..5e7cc2d3c8f18 100644 --- a/ddev/tests/cli/size/test_timeline.py +++ b/ddev/tests/cli/size/test_timeline.py @@ -204,7 +204,9 @@ def test_timeline_dependency(ddev, mock_timeline_dependencies, app): "out_dep.png", obj=app, ) - print(result.output) + print(result.stderr) + print(result.exception) + assert result.exit_code == 0 assert ( From d863e627b315d0ae0104f443d9f2bf6bfb7e1143 Mon Sep 17 00:00:00 2001 From: Lucia Sanchez Bella Date: Mon, 12 May 2025 12:54:23 +0200 Subject: [PATCH 52/70] test --- ddev/tests/cli/size/test_timeline.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/ddev/tests/cli/size/test_timeline.py b/ddev/tests/cli/size/test_timeline.py index 5e7cc2d3c8f18..b16e73f8142df 100644 --- a/ddev/tests/cli/size/test_timeline.py +++ b/ddev/tests/cli/size/test_timeline.py @@ -175,7 +175,7 @@ def test_timeline_dependency(ddev, mock_timeline_dependencies, app): ).exit_code == 0 ) - # assert ( + # assert ( a # ddev( # "size", # "timeline", From ea855e3fdf8b5246c78c3f4c198da10fef511704 Mon Sep 17 00:00:00 2001 From: Lucia Sanchez Bella Date: Mon, 12 May 2025 13:02:23 +0200 Subject: [PATCH 53/70] test --- ddev/tests/cli/size/test_timeline.py | 25 +++++++++++-------------- 1 file changed, 11 insertions(+), 14 deletions(-) diff --git a/ddev/tests/cli/size/test_timeline.py b/ddev/tests/cli/size/test_timeline.py index b16e73f8142df..cd2256987385d 100644 --- a/ddev/tests/cli/size/test_timeline.py +++ b/ddev/tests/cli/size/test_timeline.py @@ -25,7 +25,7 @@ def mock_timeline(): patch("ddev.cli.size.timeline.trim_modules", side_effect=lambda m, *_: m), patch( "ddev.cli.size.timeline.get_valid_platforms", - return_value=({'linux-x86_64', 'macos-x86_64', 'linux-aarch64', 'windows-x86_64'}), + return_value=({"linux-x86_64", "macos-x86_64", "linux-aarch64", "windows-x86_64"}), ), patch("ddev.cli.size.timeline.plt.show"), patch("ddev.cli.size.timeline.plt.savefig"), @@ -79,7 +79,7 @@ def mock_timeline_dependencies(): patch("ddev.cli.size.timeline.GitRepo.sparse_checkout_commit"), patch( "ddev.cli.size.timeline.get_valid_platforms", - return_value=({'linux-x86_64', 'macos-x86_64', 'linux-aarch64', 'windows-x86_64'}), + return_value=({"linux-x86_64", "macos-x86_64", "linux-aarch64", "windows-x86_64"}), ), patch("ddev.cli.size.timeline.get_dependency_list", return_value={"dep1"}), patch("os.path.exists", return_value=True), @@ -103,7 +103,6 @@ def mock_timeline_dependencies(): patch("ddev.cli.size.timeline.plt.show"), patch("ddev.cli.size.timeline.plt.savefig"), ): - yield @@ -204,8 +203,9 @@ def test_timeline_dependency(ddev, mock_timeline_dependencies, app): "out_dep.png", obj=app, ) - print(result.stderr) - print(result.exception) + print("❌ Exception raised during CLI call:") + print(f"Type: {type(result.exception).__name__}") + print(f"Message: {result.exception}") assert result.exit_code == 0 @@ -243,10 +243,9 @@ def test_timeline_invalid_platform(ddev): patch("ddev.cli.size.timeline.GitRepo", return_value=mock_git_repo), patch( "ddev.cli.size.timeline.get_valid_platforms", - return_value=({'linux-x86_64', 'macos-x86_64', 'linux-aarch64', 'windows-x86_64'}), + return_value=({"linux-x86_64", "macos-x86_64", "linux-aarch64", "windows-x86_64"}), ), ): - result = ddev( "size", "timeline", @@ -276,7 +275,7 @@ def test_timeline_integration_no_changes(ddev): patch("os.listdir", return_value=[]), patch( "ddev.cli.size.timeline.get_valid_platforms", - return_value=({'linux-x86_64', 'macos-x86_64', 'linux-aarch64', 'windows-x86_64'}), + return_value=({"linux-x86_64", "macos-x86_64", "linux-aarch64", "windows-x86_64"}), ), ): assert ( @@ -347,7 +346,7 @@ def test_timeline_integration_not_found(ddev): patch("ddev.cli.size.timeline.GitRepo.__exit__", return_value=None), patch( "ddev.cli.size.timeline.get_valid_platforms", - return_value=({'linux-x86_64', 'macos-x86_64', 'linux-aarch64', 'windows-x86_64'}), + return_value=({"linux-x86_64", "macos-x86_64", "linux-aarch64", "windows-x86_64"}), ), patch("ddev.cli.size.timeline.module_exists", return_value=False), patch("matplotlib.pyplot.show"), @@ -371,7 +370,7 @@ def test_timeline_dependency_missing_no_platform(ddev): patch("ddev.cli.size.timeline.GitRepo.__exit__", return_value=None), patch( "ddev.cli.size.timeline.get_valid_platforms", - return_value=({'linux-x86_64', 'macos-x86_64', 'linux-aarch64', 'windows-x86_64'}), + return_value=({"linux-x86_64", "macos-x86_64", "linux-aarch64", "windows-x86_64"}), ), patch("ddev.cli.size.timeline.get_dependency_list", return_value=set()), ): @@ -393,11 +392,10 @@ def test_timeline_dependency_missing_for_platform(ddev, app): patch("ddev.cli.size.timeline.GitRepo.__exit__", return_value=None), patch( "ddev.cli.size.timeline.get_valid_platforms", - return_value=({'linux-x86_64', 'macos-x86_64', 'linux-aarch64', 'windows-x86_64'}), + return_value=({"linux-x86_64", "macos-x86_64", "linux-aarch64", "windows-x86_64"}), ), patch("ddev.cli.size.timeline.get_dependency_list", return_value=set()), ): - result = ddev( "size", "timeline", @@ -429,11 +427,10 @@ def test_timeline_dependency_no_changes(ddev, app): patch("ddev.cli.size.timeline.GitRepo.__exit__", return_value=None), patch( "ddev.cli.size.timeline.get_valid_platforms", - return_value=({'linux-x86_64', 'macos-x86_64', 'linux-aarch64', 'windows-x86_64'}), + return_value=({"linux-x86_64", "macos-x86_64", "linux-aarch64", "windows-x86_64"}), ), patch("ddev.cli.size.timeline.get_dependency_list", return_value={"dep1"}), ): - result = ddev( "size", "timeline", From b7a145b5ea5580d722b782d491924c13e34ce8b1 Mon Sep 17 00:00:00 2001 From: Lucia Sanchez Bella Date: Mon, 12 May 2025 14:05:47 +0200 Subject: [PATCH 54/70] test --- ddev/tests/cli/size/test_timeline.py | 10 +++++++--- 1 file changed, 7 insertions(+), 3 deletions(-) diff --git a/ddev/tests/cli/size/test_timeline.py b/ddev/tests/cli/size/test_timeline.py index cd2256987385d..2661d941a0664 100644 --- a/ddev/tests/cli/size/test_timeline.py +++ b/ddev/tests/cli/size/test_timeline.py @@ -1,3 +1,4 @@ +import traceback from datetime import date from pathlib import Path from unittest.mock import MagicMock, patch @@ -203,9 +204,12 @@ def test_timeline_dependency(ddev, mock_timeline_dependencies, app): "out_dep.png", obj=app, ) - print("❌ Exception raised during CLI call:") - print(f"Type: {type(result.exception).__name__}") - print(f"Message: {result.exception}") + if result.exception: + print("❌ Exception raised during CLI call:") + print(f"Type: {type(result.exception).__name__}") + print(f"Message: {result.exception}") + print("--- Traceback ---") + traceback.print_exception(type(result.exception), result.exception, result.exception.__traceback__) assert result.exit_code == 0 From c0c92d111a78d42c24017f455967499e5aba3aba Mon Sep 17 00:00:00 2001 From: Lucia Sanchez Bella Date: Mon, 12 May 2025 14:21:01 +0200 Subject: [PATCH 55/70] test --- ddev/tests/cli/size/test_timeline.py | 2 ++ 1 file changed, 2 insertions(+) diff --git a/ddev/tests/cli/size/test_timeline.py b/ddev/tests/cli/size/test_timeline.py index 2661d941a0664..270bff47943ec 100644 --- a/ddev/tests/cli/size/test_timeline.py +++ b/ddev/tests/cli/size/test_timeline.py @@ -30,6 +30,7 @@ def mock_timeline(): ), patch("ddev.cli.size.timeline.plt.show"), patch("ddev.cli.size.timeline.plt.savefig"), + patch("ddev.cli.size.timeline.plt..figure"), ): yield @@ -103,6 +104,7 @@ def mock_timeline_dependencies(): patch("ddev.cli.size.timeline.trim_modules", side_effect=lambda m, *_: m), patch("ddev.cli.size.timeline.plt.show"), patch("ddev.cli.size.timeline.plt.savefig"), + patch("ddev.cli.size.timeline.plt..figure"), ): yield From 586450172cbbf387155b7a6cb87d241dfc6b391d Mon Sep 17 00:00:00 2001 From: Lucia Sanchez Bella Date: Mon, 12 May 2025 14:23:56 +0200 Subject: [PATCH 56/70] test --- ddev/tests/cli/size/test_timeline.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/ddev/tests/cli/size/test_timeline.py b/ddev/tests/cli/size/test_timeline.py index 270bff47943ec..55a3ccb9d7147 100644 --- a/ddev/tests/cli/size/test_timeline.py +++ b/ddev/tests/cli/size/test_timeline.py @@ -30,7 +30,7 @@ def mock_timeline(): ), patch("ddev.cli.size.timeline.plt.show"), patch("ddev.cli.size.timeline.plt.savefig"), - patch("ddev.cli.size.timeline.plt..figure"), + patch("ddev.cli.size.timeline.plt.figure"), ): yield @@ -104,7 +104,7 @@ def mock_timeline_dependencies(): patch("ddev.cli.size.timeline.trim_modules", side_effect=lambda m, *_: m), patch("ddev.cli.size.timeline.plt.show"), patch("ddev.cli.size.timeline.plt.savefig"), - patch("ddev.cli.size.timeline.plt..figure"), + patch("ddev.cli.size.timeline.plt.figure"), ): yield From 2561a83cc1bc7ce5e5e05b455b0b83dadffc12cc Mon Sep 17 00:00:00 2001 From: Lucia Sanchez Bella Date: Wed, 14 May 2025 10:35:08 +0200 Subject: [PATCH 57/70] fixes --- .github/workflows/measure-disk-usage.yml | 4 +- ddev/src/ddev/cli/size/common.py | 8 ++-- ddev/src/ddev/cli/size/diff.py | 12 +++--- ddev/src/ddev/cli/size/status.py | 12 +++--- ddev/src/ddev/cli/size/timeline.py | 30 +++++++------- ddev/tests/cli/size/test_timeline.py | 52 +++++++----------------- 6 files changed, 48 insertions(+), 70 deletions(-) diff --git a/.github/workflows/measure-disk-usage.yml b/.github/workflows/measure-disk-usage.yml index ce6721034f276..2e8e8046c2892 100644 --- a/.github/workflows/measure-disk-usage.yml +++ b/.github/workflows/measure-disk-usage.yml @@ -1,7 +1,7 @@ name: Measure Disk Usage on: - push: + pull_request: branches: - master env: @@ -93,7 +93,7 @@ jobs: if-no-files-found: error - name: Upload file sizes diff (uncompressed) - if: false # DDisabled for now: size difference can be misleading due to dependencies not being built in the same PR + if: false # Disabled for now: size difference can be misleading due to dependencies not being built in the same PR uses: actions/upload-artifact@ea165f8d65b6e75b540449e92b4886f43607fa02 # v4.6.2 with: name: diff-uncompressed.csv diff --git a/ddev/src/ddev/cli/size/common.py b/ddev/src/ddev/cli/size/common.py index f13ab5d141bec..7b6cb1238c360 100644 --- a/ddev/src/ddev/cli/size/common.py +++ b/ddev/src/ddev/cli/size/common.py @@ -53,7 +53,7 @@ class CommitEntryPlatformWithDelta(CommitEntryWithDelta): Platform: str # Target platform (e.g. linux-aarch64) -class Parameters(TypedDict): +class CLIParameters(TypedDict): app: Application platform: str version: str @@ -65,7 +65,7 @@ class Parameters(TypedDict): show_gui: bool -class ParametersTimeline(TypedDict): +class CLIParametersTimeline(TypedDict): app: Application module: str threshold: Optional[int] @@ -77,13 +77,13 @@ class ParametersTimeline(TypedDict): show_gui: bool -class ParametersTimelineIntegration(ParametersTimeline): +class CLIParametersTimelineIntegration(CLIParametersTimeline): type: Literal["integration"] first_commit: str platform: None -class ParametersTimelineDependency(ParametersTimeline): +class CLIParametersTimelineDependency(CLIParametersTimeline): type: Literal["dependency"] first_commit: None platform: str diff --git a/ddev/src/ddev/cli/size/diff.py b/ddev/src/ddev/cli/size/diff.py index 92a4bf70215e9..27574f26bca12 100644 --- a/ddev/src/ddev/cli/size/diff.py +++ b/ddev/src/ddev/cli/size/diff.py @@ -13,10 +13,10 @@ from ddev.cli.application import Application from .common import ( + CLIParameters, FileDataEntry, FileDataEntryPlatformVersion, GitRepo, - Parameters, convert_to_human_readable_size, format_modules, get_dependencies, @@ -115,7 +115,7 @@ def diff( if save_to_png_path: base, ext = os.path.splitext(save_to_png_path) path = f"{base}_{plat}_{ver}{ext}" - parameters: Parameters = { + parameters: CLIParameters = { "app": app, "platform": plat, "version": ver, @@ -145,7 +145,7 @@ def diff( progress.remove_task(task) modules: list[FileDataEntry] = [] multiple_plat_and_ver: Literal[False] = False - base_parameters: Parameters = { + base_parameters: CLIParameters = { "app": app, "platform": platform, "version": version, @@ -181,7 +181,7 @@ def diff_mode( gitRepo: GitRepo, first_commit: str, second_commit: str, - params: Parameters, + params: CLIParameters, progress: Progress, multiple_plats_and_vers: Literal[True], ) -> list[FileDataEntryPlatformVersion]: ... @@ -190,7 +190,7 @@ def diff_mode( gitRepo: GitRepo, first_commit: str, second_commit: str, - params: Parameters, + params: CLIParameters, progress: Progress, multiple_plats_and_vers: Literal[False], ) -> list[FileDataEntry]: ... @@ -198,7 +198,7 @@ def diff_mode( gitRepo: GitRepo, first_commit: str, second_commit: str, - params: Parameters, + params: CLIParameters, progress: Progress, multiple_plats_and_vers: bool, ) -> list[FileDataEntryPlatformVersion] | list[FileDataEntry]: diff --git a/ddev/src/ddev/cli/size/status.py b/ddev/src/ddev/cli/size/status.py index ad1272cc18cdb..ad09b45e89ed3 100644 --- a/ddev/src/ddev/cli/size/status.py +++ b/ddev/src/ddev/cli/size/status.py @@ -14,7 +14,7 @@ from .common import ( FileDataEntry, FileDataEntryPlatformVersion, - Parameters, + CLIParameters, format_modules, get_dependencies, get_files, @@ -82,7 +82,7 @@ def status( if save_to_png_path: base, ext = os.path.splitext(save_to_png_path) path = f"{base}_{plat}_{ver}{ext}" - parameters: Parameters = { + parameters: CLIParameters = { "app": app, "platform": plat, "version": ver, @@ -107,7 +107,7 @@ def status( else: modules: list[FileDataEntry] = [] multiple_plat_and_ver: Literal[False] = False - base_parameters: Parameters = { + base_parameters: CLIParameters = { "app": app, "platform": platform, "version": version, @@ -137,18 +137,18 @@ def status( @overload def status_mode( repo_path: Path, - params: Parameters, + params: CLIParameters, multiple_plats_and_vers: Literal[True], ) -> list[FileDataEntryPlatformVersion]: ... @overload def status_mode( repo_path: Path, - params: Parameters, + params: CLIParameters, multiple_plats_and_vers: Literal[False], ) -> list[FileDataEntry]: ... def status_mode( repo_path: Path, - params: Parameters, + params: CLIParameters, multiple_plats_and_vers: bool, ) -> list[FileDataEntryPlatformVersion] | list[FileDataEntry]: with console.status("[cyan]Calculating sizes...", spinner="dots"): diff --git a/ddev/src/ddev/cli/size/timeline.py b/ddev/src/ddev/cli/size/timeline.py index cbaa50d74d0b9..c7fcc18f9cd37 100644 --- a/ddev/src/ddev/cli/size/timeline.py +++ b/ddev/src/ddev/cli/size/timeline.py @@ -15,12 +15,12 @@ from ddev.cli.application import Application from .common import ( + CLIParametersTimelineDependency, + CLIParametersTimelineIntegration, CommitEntry, CommitEntryPlatformWithDelta, CommitEntryWithDelta, GitRepo, - ParametersTimelineDependency, - ParametersTimelineIntegration, WrongDependencyFormat, compress, convert_to_human_readable_size, @@ -174,7 +174,7 @@ def timeline( modules_plat: list[CommitEntryPlatformWithDelta] = [] multiple_plats_and_vers: Literal[True] = True progress.remove_task(task) - dep_parameters: ParametersTimelineDependency + dep_parameters: CLIParametersTimelineDependency if not platform: for plat in valid_platforms: path = None @@ -238,7 +238,7 @@ def timeline( else: modules: list[CommitEntryWithDelta] = [] multiple_plat_and_ver: Literal[False] = False - int_parameters: ParametersTimelineIntegration = { + int_parameters: CLIParametersTimelineIntegration = { "app": app, "type": "integration", "module": module, @@ -276,7 +276,7 @@ def timeline( def timeline_mode( gitRepo: GitRepo, commits: list[str], - params: ParametersTimelineDependency, + params: CLIParametersTimelineDependency, multiple_plats_and_vers: Literal[True], progress: Progress, ) -> list[CommitEntryPlatformWithDelta]: ... @@ -286,7 +286,7 @@ def timeline_mode( def timeline_mode( gitRepo: GitRepo, commits: list[str], - params: ParametersTimelineIntegration, + params: CLIParametersTimelineIntegration, multiple_plats_and_vers: Literal[False], progress: Progress, ) -> list[CommitEntryWithDelta]: ... @@ -296,7 +296,7 @@ def timeline_mode( def timeline_mode( gitRepo: GitRepo, commits: list[str], - params: ParametersTimelineDependency, + params: CLIParametersTimelineDependency, multiple_plats_and_vers: Literal[False], progress: Progress, ) -> list[CommitEntryWithDelta]: ... @@ -305,7 +305,7 @@ def timeline_mode( def timeline_mode( gitRepo: GitRepo, commits: list[str], - params: ParametersTimelineIntegration | ParametersTimelineDependency, + params: CLIParametersTimelineIntegration | CLIParametersTimelineDependency, multiple_plats_and_vers: bool, progress: Progress, ) -> list[CommitEntryWithDelta] | list[CommitEntryPlatformWithDelta]: @@ -342,7 +342,7 @@ def timeline_mode( @overload def get_repo_info( gitRepo: GitRepo, - params: ParametersTimelineIntegration, + params: CLIParametersTimelineIntegration, commits: list[str], progress: Progress, ) -> list[CommitEntry]: ... @@ -351,7 +351,7 @@ def get_repo_info( @overload def get_repo_info( gitRepo: GitRepo, - params: ParametersTimelineDependency, + params: CLIParametersTimelineDependency, commits: list[str], progress: Progress, ) -> list[CommitEntry]: ... @@ -359,7 +359,7 @@ def get_repo_info( def get_repo_info( gitRepo: GitRepo, - params: ParametersTimelineIntegration | ParametersTimelineDependency, + params: CLIParametersTimelineIntegration | CLIParametersTimelineDependency, commits: list[str], progress: Progress, ) -> list[CommitEntry]: @@ -387,7 +387,7 @@ def get_repo_info( @overload def process_commits( commits: list[str], - params: ParametersTimelineIntegration, + params: CLIParametersTimelineIntegration, gitRepo: GitRepo, progress: Progress, first_commit: str, @@ -397,7 +397,7 @@ def process_commits( @overload def process_commits( commits: list[str], - params: ParametersTimelineDependency, + params: CLIParametersTimelineDependency, gitRepo: GitRepo, progress: Progress, first_commit: None, @@ -406,7 +406,7 @@ def process_commits( def process_commits( commits: list[str], - params: ParametersTimelineIntegration | ParametersTimelineDependency, + params: CLIParametersTimelineIntegration | CLIParametersTimelineDependency, gitRepo: GitRepo, progress: Progress, first_commit: Optional[str], @@ -419,7 +419,7 @@ def process_commits( Args: commits: List of commit SHAs to process. - params: ParametersTimeline dict containing module name, type, platform, and other configuration options. + params: CLIParametersTimeline dict containing module name, type, platform, and other configuration options. gitRepo: GitRepo instance managing the repository. progress: Progress bar instance. first_commit: First commit hash where the given integration was introduced (only for integrations). diff --git a/ddev/tests/cli/size/test_timeline.py b/ddev/tests/cli/size/test_timeline.py index 55a3ccb9d7147..a07e72a9a0b4d 100644 --- a/ddev/tests/cli/size/test_timeline.py +++ b/ddev/tests/cli/size/test_timeline.py @@ -1,4 +1,3 @@ -import traceback from datetime import date from pathlib import Path from unittest.mock import MagicMock, patch @@ -177,43 +176,22 @@ def test_timeline_dependency(ddev, mock_timeline_dependencies, app): ).exit_code == 0 ) - # assert ( a - # ddev( - # "size", - # "timeline", - # "dependency", - # "dep1", - # "commit1", - # "commit2", - # "--platform", - # "linux-x86_64", - # "--save_to_png_path", - # "out2.png", - # obj=app, - # ).exit_code - # == 0 - # ) - result = ddev( - "size", - "timeline", - "dependency", - "dep1", - "commit1", - "commit2", - "--platform", - "linux-x86_64", - "--save_to_png_path", - "out_dep.png", - obj=app, + assert ( + ddev( + "size", + "timeline", + "dependency", + "dep1", + "commit1", + "commit2", + "--platform", + "linux-x86_64", + "--save_to_png_path", + "out2.png", + obj=app, + ).exit_code + == 0 ) - if result.exception: - print("❌ Exception raised during CLI call:") - print(f"Type: {type(result.exception).__name__}") - print(f"Message: {result.exception}") - print("--- Traceback ---") - traceback.print_exception(type(result.exception), result.exception, result.exception.__traceback__) - - assert result.exit_code == 0 assert ( ddev( From 3f6a0408e5edca2b2349455031354b5dbe667c2b Mon Sep 17 00:00:00 2001 From: Lucia Sanchez Bella Date: Wed, 14 May 2025 10:43:30 +0200 Subject: [PATCH 58/70] test gha --- .github/workflows/measure-disk-usage.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.github/workflows/measure-disk-usage.yml b/.github/workflows/measure-disk-usage.yml index 2e8e8046c2892..951a529c006ba 100644 --- a/.github/workflows/measure-disk-usage.yml +++ b/.github/workflows/measure-disk-usage.yml @@ -3,7 +3,7 @@ name: Measure Disk Usage on: pull_request: branches: - - master + - master env: PYTHON_VERSION: "3.12" From 1399bff1aed74b224d67f54272cb27de8dd72308 Mon Sep 17 00:00:00 2001 From: Lucia Sanchez Bella Date: Wed, 14 May 2025 10:45:55 +0200 Subject: [PATCH 59/70] test gha --- .github/workflows/measure-disk-usage.yml | 1 + 1 file changed, 1 insertion(+) diff --git a/.github/workflows/measure-disk-usage.yml b/.github/workflows/measure-disk-usage.yml index 951a529c006ba..cb6c22b4ec16b 100644 --- a/.github/workflows/measure-disk-usage.yml +++ b/.github/workflows/measure-disk-usage.yml @@ -117,6 +117,7 @@ jobs: - name: Upload diff PNGs if: false # Disabled for now: size difference can be misleading due to dependencies not being built in the same PR + uses: actions/upload-artifact@ea165f8d65b6e75b540449e92b4886f43607fa02 # v4.6.2 with: name: diff-visuals path: diff_visualizations/ From b1990f771b82292f6929bff977f9ada127e29032 Mon Sep 17 00:00:00 2001 From: Lucia Sanchez Bella Date: Wed, 14 May 2025 10:49:46 +0200 Subject: [PATCH 60/70] test gha --- .github/workflows/measure-disk-usage.yml | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/.github/workflows/measure-disk-usage.yml b/.github/workflows/measure-disk-usage.yml index cb6c22b4ec16b..85a21589f19ce 100644 --- a/.github/workflows/measure-disk-usage.yml +++ b/.github/workflows/measure-disk-usage.yml @@ -31,7 +31,7 @@ jobs: run: | mkdir -p status_visualizations ddev size status --csv > size-uncompressed.csv - ddev size status --markdown --save_to_png_path status_visualizations/uncompressed.png > size-uncompressed.txt + ddev size status --save_to_png_path status_visualizations/uncompressed.png > size-uncompressed.txt cat size-uncompressed.txt echo "# Size (uncompressed)" >> $GITHUB_STEP_SUMMARY echo '```' >> $GITHUB_STEP_SUMMARY @@ -42,7 +42,7 @@ jobs: run: | mkdir -p status_visualizations ddev size status --csv --compressed > size-compressed.csv - ddev size status --compressed --markdown --save_to_png_path status_visualizations/compressed.png > size-compressed.txt + ddev size status --compressed --save_to_png_path status_visualizations/compressed.png > size-compressed.txt cat size-compressed.txt echo "# Size (compressed)" >> $GITHUB_STEP_SUMMARY echo '```' >> $GITHUB_STEP_SUMMARY @@ -57,7 +57,7 @@ jobs: BEFORE=$(git rev-parse HEAD^) AFTER=$(git rev-parse HEAD) ddev size diff $BEFORE $AFTER --csv > diff-uncompressed.csv - ddev size diff $BEFORE $AFTER --markdown --save_to_png_path diff_visualizations/diff-uncompressed-linux.png > diff-uncompressed.txt + ddev size diff $BEFORE $AFTER --save_to_png_path diff_visualizations/diff-uncompressed-linux.png > diff-uncompressed.txt cat diff-uncompressed.txt echo "# Size diff (uncompressed)" >> $GITHUB_STEP_SUMMARY echo '```' >> $GITHUB_STEP_SUMMARY @@ -71,7 +71,7 @@ jobs: BEFORE=$(git rev-parse HEAD^) AFTER=$(git rev-parse HEAD) ddev size diff $BEFORE $AFTER --compressed --csv > diff-compressed.csv - ddev size diff $BEFORE $AFTER --compressed --markdown --save_to_png_path diff_visualizations/diff-compressed-linux.png > diff-compressed.txt + ddev size diff $BEFORE $AFTER --compressed --save_to_png_path diff_visualizations/diff-compressed-linux.png > diff-compressed.txt cat diff-compressed.txt echo "# Size diff (compressed)" >> $GITHUB_STEP_SUMMARY echo '```' >> $GITHUB_STEP_SUMMARY From d18e593ecf9d2556d06a6d118e13d17ccf996798 Mon Sep 17 00:00:00 2001 From: Lucia Sanchez Bella Date: Wed, 14 May 2025 11:43:37 +0200 Subject: [PATCH 61/70] fixes --- .github/workflows/measure-disk-usage.yml | 2 +- ddev/src/ddev/cli/size/common.py | 20 +++++++++++++------- 2 files changed, 14 insertions(+), 8 deletions(-) diff --git a/.github/workflows/measure-disk-usage.yml b/.github/workflows/measure-disk-usage.yml index 85a21589f19ce..35c41c3fcb775 100644 --- a/.github/workflows/measure-disk-usage.yml +++ b/.github/workflows/measure-disk-usage.yml @@ -1,7 +1,7 @@ name: Measure Disk Usage on: - pull_request: + push: branches: - master env: diff --git a/ddev/src/ddev/cli/size/common.py b/ddev/src/ddev/cli/size/common.py index 7b6cb1238c360..03f96b71d059f 100644 --- a/ddev/src/ddev/cli/size/common.py +++ b/ddev/src/ddev/cli/size/common.py @@ -430,15 +430,21 @@ def print_markdown( | list[CommitEntryPlatformWithDelta] ), ) -> None: - if any(str(value).strip() not in ("", "0", "0001-01-01") for value in modules[0].values()): # table is not empty - headers = [k for k in modules[0].keys() if "Bytes" not in k] - app.display_markdown(f"### {title}") - app.display_markdown("| " + " | ".join(headers) + " |") - app.display_markdown("| " + " | ".join("---" for _ in headers) + " |") + if all(str(value).strip() in ("", "0", "0001-01-01") for value in modules[0].values()): + return # skip empty table - for row in modules: - app.display_markdown("| " + " | ".join(format(str(row.get(h, ""))) for h in headers) + " |") + headers = [k for k in modules[0].keys() if "Bytes" not in k] + lines = [] + lines.append(f"### {title}") + lines.append("") + lines.append("| " + " | ".join(headers) + " |") + lines.append("| " + " | ".join("---" for _ in headers) + " |") + for row in modules: + lines.append("| " + " | ".join(str(row.get(h, "")) for h in headers) + " |") + + markdown = "\n".join(lines) + app.display_markdown(markdown) def print_table( app: Application, From adcdca2954943ffac47f35ca4f538155b45e04f9 Mon Sep 17 00:00:00 2001 From: Lucia Sanchez Bella Date: Wed, 14 May 2025 11:54:48 +0200 Subject: [PATCH 62/70] fixes --- ddev/src/ddev/cli/size/common.py | 1 + 1 file changed, 1 insertion(+) diff --git a/ddev/src/ddev/cli/size/common.py b/ddev/src/ddev/cli/size/common.py index 03f96b71d059f..3662f5cf4ac4c 100644 --- a/ddev/src/ddev/cli/size/common.py +++ b/ddev/src/ddev/cli/size/common.py @@ -446,6 +446,7 @@ def print_markdown( markdown = "\n".join(lines) app.display_markdown(markdown) + def print_table( app: Application, mode: str, From c5794b6de6d132d2be5bc38378148822400ad77c Mon Sep 17 00:00:00 2001 From: Lucia Sanchez Bella Date: Wed, 14 May 2025 12:28:42 +0200 Subject: [PATCH 63/70] fix --- ddev/src/ddev/cli/size/common.py | 1 - 1 file changed, 1 deletion(-) diff --git a/ddev/src/ddev/cli/size/common.py b/ddev/src/ddev/cli/size/common.py index 3662f5cf4ac4c..22204b30618b0 100644 --- a/ddev/src/ddev/cli/size/common.py +++ b/ddev/src/ddev/cli/size/common.py @@ -437,7 +437,6 @@ def print_markdown( lines = [] lines.append(f"### {title}") - lines.append("") lines.append("| " + " | ".join(headers) + " |") lines.append("| " + " | ".join("---" for _ in headers) + " |") for row in modules: From 47af6dfefb31f8a836fe71321548bc904a1dfe9e Mon Sep 17 00:00:00 2001 From: Lucia Sanchez Bella Date: Fri, 16 May 2025 10:13:05 +0200 Subject: [PATCH 64/70] correct comments --- ddev/src/ddev/cli/size/common.py | 33 ++---- ddev/src/ddev/cli/size/timeline.py | 38 +++--- package_size_analyzer/cli.py | 38 ------ package_size_analyzer/modes.py | 184 ----------------------------- package_size_analyzer/test.py | 58 --------- 5 files changed, 21 insertions(+), 330 deletions(-) delete mode 100644 package_size_analyzer/cli.py delete mode 100644 package_size_analyzer/modes.py delete mode 100644 package_size_analyzer/test.py diff --git a/ddev/src/ddev/cli/size/common.py b/ddev/src/ddev/cli/size/common.py index 22204b30618b0..ead8eb6b478b7 100644 --- a/ddev/src/ddev/cli/size/common.py +++ b/ddev/src/ddev/cli/size/common.py @@ -77,13 +77,13 @@ class CLIParametersTimeline(TypedDict): show_gui: bool -class CLIParametersTimelineIntegration(CLIParametersTimeline): +class InitialParametersTimelineIntegration(CLIParametersTimeline): type: Literal["integration"] first_commit: str platform: None -class CLIParametersTimelineDependency(CLIParametersTimeline): +class InitialParametersTimelineDependency(CLIParametersTimeline): type: Literal["dependency"] first_commit: None platform: str @@ -170,7 +170,7 @@ def get_files(repo_path: str | Path, compressed: bool) -> list[FileDataEntry]: """ ignored_files = {"datadog_checks_dev", "datadog_checks_tests_helper"} git_ignore = get_gitignore_files(repo_path) - included_folder = "datadog_checks/" + included_folder = "datadog_checks" integration_sizes: dict[str, int] = {} integration_versions: dict[str, str] = {} @@ -223,17 +223,8 @@ def extract_version_from_about_py(path: str) -> str: def get_dependencies(repo_path: str | Path, platform: str, version: str, compressed: bool) -> list[FileDataEntry]: """ - Gets the list of dependencies for a given platform and Python version. - Each FileDataEntry includes: Name, Version, Size_Bytes, Size, and Type. - - Args: - repo_path: Path to the repository. - platform: Target platform. - version: Target Python version. - compressed: If True, measure compressed file sizes. If False, measure uncompressed sizes. - - Returns: - A list of FileDataEntry dictionaries containing the dependency information. + Gets the list of dependencies for a given platform and Python version and returns a FileDataEntry that includes: + Name, Version, Size_Bytes, Size, and Type. """ resolved_path = os.path.join(repo_path, os.path.join(repo_path, ".deps", "resolved")) @@ -332,19 +323,9 @@ def format_modules( multiple_plats_and_vers: bool, ) -> list[FileDataEntryPlatformVersion] | list[FileDataEntry]: """ - Formats the modules list, adding platform and Python version information if needed. + Formats the modules list, adding platform and Python version information. - If the modules list is empty, returns a default empty entry (with or without platform information). - - Args: - modules: List of modules to format. - platform: Platform string to add to each entry if needed. - version: Python version string to add to each entry if needed. - i: Index of the current (platform, version) combination being processed. - If None, it means the data is being processed for only one combination of platform and version. - - Returns: - A list of formatted entries. + If the modules list is empty, returns a default empty entry. """ if modules == [] and not multiple_plats_and_vers: empty_entry: FileDataEntry = { diff --git a/ddev/src/ddev/cli/size/timeline.py b/ddev/src/ddev/cli/size/timeline.py index c7fcc18f9cd37..f32873655e41f 100644 --- a/ddev/src/ddev/cli/size/timeline.py +++ b/ddev/src/ddev/cli/size/timeline.py @@ -15,12 +15,12 @@ from ddev.cli.application import Application from .common import ( - CLIParametersTimelineDependency, - CLIParametersTimelineIntegration, CommitEntry, CommitEntryPlatformWithDelta, CommitEntryWithDelta, GitRepo, + InitialParametersTimelineDependency, + InitialParametersTimelineIntegration, WrongDependencyFormat, compress, convert_to_human_readable_size, @@ -174,7 +174,7 @@ def timeline( modules_plat: list[CommitEntryPlatformWithDelta] = [] multiple_plats_and_vers: Literal[True] = True progress.remove_task(task) - dep_parameters: CLIParametersTimelineDependency + dep_parameters: InitialParametersTimelineDependency if not platform: for plat in valid_platforms: path = None @@ -238,7 +238,7 @@ def timeline( else: modules: list[CommitEntryWithDelta] = [] multiple_plat_and_ver: Literal[False] = False - int_parameters: CLIParametersTimelineIntegration = { + int_parameters: InitialParametersTimelineIntegration = { "app": app, "type": "integration", "module": module, @@ -276,7 +276,7 @@ def timeline( def timeline_mode( gitRepo: GitRepo, commits: list[str], - params: CLIParametersTimelineDependency, + params: InitialParametersTimelineDependency, multiple_plats_and_vers: Literal[True], progress: Progress, ) -> list[CommitEntryPlatformWithDelta]: ... @@ -286,7 +286,7 @@ def timeline_mode( def timeline_mode( gitRepo: GitRepo, commits: list[str], - params: CLIParametersTimelineIntegration, + params: InitialParametersTimelineIntegration, multiple_plats_and_vers: Literal[False], progress: Progress, ) -> list[CommitEntryWithDelta]: ... @@ -296,7 +296,7 @@ def timeline_mode( def timeline_mode( gitRepo: GitRepo, commits: list[str], - params: CLIParametersTimelineDependency, + params: InitialParametersTimelineDependency, multiple_plats_and_vers: Literal[False], progress: Progress, ) -> list[CommitEntryWithDelta]: ... @@ -305,7 +305,7 @@ def timeline_mode( def timeline_mode( gitRepo: GitRepo, commits: list[str], - params: CLIParametersTimelineIntegration | CLIParametersTimelineDependency, + params: InitialParametersTimelineIntegration | InitialParametersTimelineDependency, multiple_plats_and_vers: bool, progress: Progress, ) -> list[CommitEntryWithDelta] | list[CommitEntryPlatformWithDelta]: @@ -342,7 +342,7 @@ def timeline_mode( @overload def get_repo_info( gitRepo: GitRepo, - params: CLIParametersTimelineIntegration, + params: InitialParametersTimelineIntegration, commits: list[str], progress: Progress, ) -> list[CommitEntry]: ... @@ -351,7 +351,7 @@ def get_repo_info( @overload def get_repo_info( gitRepo: GitRepo, - params: CLIParametersTimelineDependency, + params: InitialParametersTimelineDependency, commits: list[str], progress: Progress, ) -> list[CommitEntry]: ... @@ -359,7 +359,7 @@ def get_repo_info( def get_repo_info( gitRepo: GitRepo, - params: CLIParametersTimelineIntegration | CLIParametersTimelineDependency, + params: InitialParametersTimelineIntegration | InitialParametersTimelineDependency, commits: list[str], progress: Progress, ) -> list[CommitEntry]: @@ -387,7 +387,7 @@ def get_repo_info( @overload def process_commits( commits: list[str], - params: CLIParametersTimelineIntegration, + params: InitialParametersTimelineIntegration, gitRepo: GitRepo, progress: Progress, first_commit: str, @@ -397,7 +397,7 @@ def process_commits( @overload def process_commits( commits: list[str], - params: CLIParametersTimelineDependency, + params: InitialParametersTimelineDependency, gitRepo: GitRepo, progress: Progress, first_commit: None, @@ -406,7 +406,7 @@ def process_commits( def process_commits( commits: list[str], - params: CLIParametersTimelineIntegration | CLIParametersTimelineDependency, + params: InitialParametersTimelineIntegration | InitialParametersTimelineDependency, gitRepo: GitRepo, progress: Progress, first_commit: Optional[str], @@ -702,16 +702,6 @@ def format_modules( Formats the modules list, adding platform and Python version information if needed. If the modules list is empty, returns a default empty entry (with or without platform information). - - Args: - modules: List of modules to format. - platform: Platform string to add to each entry if needed. - version: Python version string to add to each entry if needed. - i: Index of the current platform, version) combination being processed. - If None, it means the data is being processed for only one platform. - - Returns: - A list of formatted entries. """ if modules == [] and multiple_plats_and_vers and platform: empty_module_platform: CommitEntryPlatformWithDelta = { diff --git a/package_size_analyzer/cli.py b/package_size_analyzer/cli.py deleted file mode 100644 index 23c9a15d0bc0c..0000000000000 --- a/package_size_analyzer/cli.py +++ /dev/null @@ -1,38 +0,0 @@ -import argparse -from modes import status_mode - - -def main(): - parser = argparse.ArgumentParser(description="Package Size Analyzer CLI") - - # Define allowed choices - valid_modes = ["status", "diff", "timeline"] - valid_platforms = ["linux-aarch64", "linux-x86_64", "macos-x86_64", "windows-x86_64"] - valid_python_versions = ["3.12"] - - # Arguments - parser.add_argument("mode", choices=valid_modes, help="Mode of operation") - parser.add_argument("--platform", choices=valid_platforms, required=False, help="Target platform") - parser.add_argument("--python", choices=valid_python_versions, required=False, help="Python version (MAJOR.MINOR)") - parser.add_argument("--compressed", action="store_true", help="Measure compressed size") - - args = parser.parse_args() - - # Execute the corresponding function based on the selected mode - if args.mode == "status": - # if an argument is not specified, all possibilities are executed - if args.platform is None and args.python is None: - for platform in valid_platforms: - for version in valid_python_versions: - status_mode(platform, version, args.compressed) - elif args.platform is None: - for platform in valid_platforms: - status_mode(platform, args.python, args.compressed) - elif args.python is None: - for version in valid_python_versions: - status_mode(args.platform, version, args.compressed) - else: - status_mode(args.platform, args.python, args.compressed) - -if __name__ == "__main__": - main() diff --git a/package_size_analyzer/modes.py b/package_size_analyzer/modes.py deleted file mode 100644 index b9e0f2f05b4ab..0000000000000 --- a/package_size_analyzer/modes.py +++ /dev/null @@ -1,184 +0,0 @@ -import requests -import pandas as pd -import re -import os -from tabulate import tabulate -import zlib -import io - - -def status_mode(platform, version, compressed): - if compressed: - df1 = pd.DataFrame(get_compressed_files()) - print("Compressed integrations done") - - df2 = pd.DataFrame(get_compressed_dependencies(platform,version)) - print("Compressed dependencies done") - - - df = pd.concat([df1, df2], ignore_index=True) - - # Calculate the size for the whole module - df_grouped = df.groupby(["Name", 'Type'], as_index=False).agg({"Size (Bytes)": "sum"}) - df_grouped = df_grouped.sort_values(by="Size (Bytes)", ascending=False).reset_index(drop=True) - - - df_grouped["Size"] = df_grouped["Size (Bytes)"].apply(convert_size) - df_grouped.to_csv("compressed_status_" + platform + "_" + version + ".csv", index=False) - df.to_csv("compressed_status_all_" + platform + "_" + version + ".csv", index=False) - df_grouped = df_grouped.drop(columns=['Size (Bytes)']) - print('--------------', platform,version,'--------------') - print(tabulate(df_grouped, headers='keys', tablefmt='grid')) - print("CSV exported") - - - - -def get_compressed_files(): - print("Getting compressed integrations") - - ignored_files = {"datadog_checks_dev", "datadog_checks_tests_helper"} - git_ignore = get_gitignore_files() - included_folder = "datadog_checks/" - - script_path = os.path.abspath(__file__) - parent_dir = os.path.dirname(script_path) - repo_path = os.path.dirname(parent_dir) - - file_data = [] - for root, _, files in os.walk(repo_path): - for file in files: - file_path = os.path.join(root, file) - - # Convert the path to a relative format within the repo - relative_path = os.path.relpath(file_path, repo_path) - - # Filter files - if is_valid_integration(relative_path, included_folder, ignored_files, git_ignore): - try: - # Compress the file - compressor = zlib.compressobj() - compressed_size = 0 - - # original_size = os.path.getsize(file_path) - with open(file_path, "rb") as f: - while chunk := f.read(8192): # Read in 8KB chunks - compressed_chunk = compressor.compress(chunk) - compressed_size += len(compressed_chunk) - - compressed_size += len(compressor.flush()) # Flush the buffer - integration = relative_path.split("/")[0] - file_data.append({ - "File Path": relative_path, - "Type": "Integration", - "Name": integration, - "Size (Bytes)": compressed_size - }) - - except Exception as e: - print(f"Error processing {relative_path}: {e}") - - return file_data - - -def get_compressed_dependencies(platform=None, version=None): - print("Getting compressed dependencies") - - script_path = os.path.abspath(__file__) - parent_dir = os.path.dirname(script_path) - repo_path = os.path.dirname(parent_dir) - resolved_path = os.path.join(repo_path, ".deps/resolved") - - if not os.path.exists(resolved_path) or not os.path.isdir(resolved_path): - print(f"Error: Directory not found {resolved_path}") - return [] - - file_data = [] - - for filename in os.listdir(resolved_path): - file_path = os.path.join(resolved_path, filename) - - if os.path.isfile(file_path) and is_correct_dependency(platform, version, filename): - deps, download_urls = get_dependencies(file_path) - return get_dependencies_sizes(deps, download_urls) - - - - - -def is_correct_dependency(platform, version, name): - return platform in name and version in name - -def get_dependencies_sizes(deps, download_urls): - file_data = [] - for dep, url in zip(deps, download_urls): - dep_response = requests.head(url) - if dep_response.status_code != 200: - print(f"Error {response.status_code}: Unable to fetch the dependencies file") - else: - size = dep_response.headers.get("Content-Length", None) - file_data.append({"File Path": dep, "Type": "Dependency", "Name": dep, "Size (Bytes)": int(size)}) - - return file_data - - -def get_dependencies(file_path): - download_urls = [] - deps = [] - try: - with open(file_path, "r", encoding="utf-8") as file: - file_content = file.read() - for line in file_content.splitlines(): - match = re.search(r"([\w\-\d\.]+) @ (https?://[^\s#]+)", line) - if match: - deps.append(match.group(1)) - download_urls.append(match.group(2)) - except Exception as e: - print(f"Error reading file {file_path}: {e}") - - return deps, download_urls - -def is_valid_integration(path, included_folder, ignored_files, git_ignore): - # It is not an integration - if path.startswith('.'): - return False - # It is part of an integration and it is not in the datadog_checks folder - elif not (included_folder in path): - return False - # It is an irrelevant file - elif any(ignore in path for ignore in ignored_files): - return False - # This file is contained in .gitignore - elif any(ignore in path for ignore in git_ignore): - return False - else: - return True - - -def get_gitignore_files(): - script_path = os.path.abspath(__file__) - parent_dir = os.path.dirname(script_path) - repo_path = os.path.dirname(parent_dir) - gitignore_path = os.path.join(repo_path, ".gitignore") - if not os.path.exists(gitignore_path): - print(f"Error: .gitignore file not found at {gitignore_path}") - return [] - - try: - with open(gitignore_path, "r", encoding="utf-8") as file: - gitignore_content = file.read() - ignored_patterns = [line.strip() for line in gitignore_content.splitlines() if line.strip() and not line.startswith("#")] - return ignored_patterns - except Exception as e: - print(f"Error reading .gitignore file: {e}") - return [] - -def convert_size(size_bytes): - """Transforms bytes into a human-friendly format (KB, MB, GB) with 3 decimal places.""" - for unit in ['B', 'KB', 'MB', 'GB']: - if size_bytes < 1024: - return (str(round(size_bytes, 2)) + unit) - size_bytes /= 1024 - return (str(round(size_bytes, 2)) + "TB") - - diff --git a/package_size_analyzer/test.py b/package_size_analyzer/test.py deleted file mode 100644 index 89bc7c64a3c11..0000000000000 --- a/package_size_analyzer/test.py +++ /dev/null @@ -1,58 +0,0 @@ - -import pytest -import requests -from unittest.mock import patch, mock_open, MagicMock -from modes import ( - get_compressed_dependencies, - get_gitignore_files, - convert_size, - is_valid_integration, - is_correct_dependency, - get_dependencies, - get_dependencies_sizes -) - -def test_is_correct_dependency(): - assert is_correct_dependency("windows-x86_64", "3.12", "windows-x86_64-3.12") == True - assert is_correct_dependency("windows-x86_64", "3.12", "linux-x86_64-3.12") == False - assert is_correct_dependency("windows-x86_64", "3.13", "windows-x86_64-3.12") == False - - -def test_convert_size(): - assert convert_size(500) == "500B" - assert convert_size(1024) == "1.0KB" - assert convert_size(1048576) == "1.0MB" - assert convert_size(1073741824) == "1.0GB" - -def test_is_valid_integration(): - included_folder = "datadog_checks/" - ignored_files = {"datadog_checks_dev", "datadog_checks_tests_helper"} - git_ignore = [".git", "__pycache__"] - - assert is_valid_integration("datadog_checks/example.py", included_folder, ignored_files, git_ignore) == True - assert is_valid_integration("__pycache__/file.py", included_folder, ignored_files, git_ignore) == False - assert is_valid_integration("datadog_checks_dev/example.py", included_folder, ignored_files, git_ignore) == False - assert is_valid_integration(".git/config", included_folder, ignored_files, git_ignore) == False - -def test_get_dependencies(): - file_content = "dependency1 @ https://example.com/dependency1.whl\ndependency2 @ https://example.com/dependency2.whl" - mock_open_obj = mock_open(read_data=file_content) - with patch("builtins.open", mock_open_obj): - deps, urls = get_dependencies("fake_path") - assert deps == ["dependency1", "dependency2"] - assert urls == ["https://example.com/dependency1.whl", "https://example.com/dependency2.whl"] - -def test_get_gitignore_files(): - mock_gitignore = "__pycache__/\n*.log\n" # Sample .gitignore file - with patch("builtins.open", mock_open(read_data=mock_gitignore)): - with patch("os.path.exists", return_value=True): - ignored_patterns = get_gitignore_files() - assert ignored_patterns == ["__pycache__/", "*.log"] - -def test_get_dependencies_sizes(): - mock_response = MagicMock() - mock_response.status_code = 200 - mock_response.headers = {"Content-Length": "12345"} - with patch("requests.head", return_value=mock_response): - file_data = get_dependencies_sizes(["dependency1"], ["https://example.com/dependency1.whl"]) - assert file_data == [{"File Path": "dependency1", "Type": "Dependency", "Name": "dependency1", "Size (Bytes)": 12345}] From a695625311ea641a8655138e623a98017bc4623f Mon Sep 17 00:00:00 2001 From: Lucia Sanchez Bella Date: Fri, 16 May 2025 15:08:26 +0200 Subject: [PATCH 65/70] correct comments --- ddev/src/ddev/cli/size/common.py | 2 +- ddev/src/ddev/cli/size/timeline.py | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/ddev/src/ddev/cli/size/common.py b/ddev/src/ddev/cli/size/common.py index ead8eb6b478b7..6458186129093 100644 --- a/ddev/src/ddev/cli/size/common.py +++ b/ddev/src/ddev/cli/size/common.py @@ -170,7 +170,7 @@ def get_files(repo_path: str | Path, compressed: bool) -> list[FileDataEntry]: """ ignored_files = {"datadog_checks_dev", "datadog_checks_tests_helper"} git_ignore = get_gitignore_files(repo_path) - included_folder = "datadog_checks" + included_folder = "datadog_checks" + os.sep integration_sizes: dict[str, int] = {} integration_versions: dict[str, str] = {} diff --git a/ddev/src/ddev/cli/size/timeline.py b/ddev/src/ddev/cli/size/timeline.py index f32873655e41f..2c6882a5247c8 100644 --- a/ddev/src/ddev/cli/size/timeline.py +++ b/ddev/src/ddev/cli/size/timeline.py @@ -515,7 +515,7 @@ def get_files( ignored_files = {"datadog_checks_dev", "datadog_checks_tests_helper"} git_ignore = get_gitignore_files(repo_path) - included_folder = "datadog_checks/" + included_folder = "datadog_checks" + os.sep total_size = 0 version = "" From 464d0e6c05c073c99e69ffcc7c3610c2e32b86e0 Mon Sep 17 00:00:00 2001 From: Lucia Sanchez Bella Date: Fri, 16 May 2025 16:54:32 +0200 Subject: [PATCH 66/70] Change comments --- ddev/src/ddev/cli/size/timeline.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/ddev/src/ddev/cli/size/timeline.py b/ddev/src/ddev/cli/size/timeline.py index 2c6882a5247c8..515d85a151829 100644 --- a/ddev/src/ddev/cli/size/timeline.py +++ b/ddev/src/ddev/cli/size/timeline.py @@ -480,7 +480,7 @@ def get_files( compressed: bool, ) -> list[CommitEntry]: """ - Calculates integration file sizes and versions from a repository. + Calculates integration file sizes and versions from a repository If the integration folder no longer exists, a 'Deleted' entry is added. Otherwise, it walks the module directory, sums file sizes, extracts the version, and appends a CommitEntry. From c22b8d730581e82db382803f82190ec19f0c6f4b Mon Sep 17 00:00:00 2001 From: Lucia Sanchez Bella Date: Mon, 19 May 2025 11:57:15 +0200 Subject: [PATCH 67/70] Change number to constant --- ddev/src/ddev/cli/size/diff.py | 7 ++++--- ddev/src/ddev/cli/size/timeline.py | 3 ++- 2 files changed, 6 insertions(+), 4 deletions(-) diff --git a/ddev/src/ddev/cli/size/diff.py b/ddev/src/ddev/cli/size/diff.py index 27574f26bca12..e060feb9e7a31 100644 --- a/ddev/src/ddev/cli/size/diff.py +++ b/ddev/src/ddev/cli/size/diff.py @@ -32,6 +32,7 @@ console = Console(stderr=True) MINIMUM_DATE = datetime.strptime("Sep 17 2024", "%b %d %Y").date() +MINIMUM_LENGTH_COMMIT = 7 @click.command() @@ -79,11 +80,11 @@ def diff( task = progress.add_task("[cyan]Calculating differences...", total=None) if sum([csv, markdown, json]) > 1: raise click.BadParameter("Only one output format can be selected: --csv, --markdown, or --json") - if len(first_commit) < 7 and len(second_commit) < 7: + if len(first_commit) < MINIMUM_LENGTH_COMMIT and len(second_commit) < MINIMUM_LENGTH_COMMIT: raise click.BadParameter("Commit hashes must be at least 7 characters long") - elif len(first_commit) < 7: + elif len(first_commit) < MINIMUM_LENGTH_COMMIT: raise click.BadParameter("First commit hash must be at least 7 characters long.", param_hint="first_commit") - elif len(second_commit) < 7: + elif len(second_commit) < MINIMUM_LENGTH_COMMIT: raise click.BadParameter( "Second commit hash must be at least 7 characters long.", param_hint="second_commit" ) diff --git a/ddev/src/ddev/cli/size/timeline.py b/ddev/src/ddev/cli/size/timeline.py index 515d85a151829..7b10d9b981848 100644 --- a/ddev/src/ddev/cli/size/timeline.py +++ b/ddev/src/ddev/cli/size/timeline.py @@ -419,7 +419,8 @@ def process_commits( Args: commits: List of commit SHAs to process. - params: CLIParametersTimeline dict containing module name, type, platform, and other configuration options. + params: InitialParametersTimelineIntegration or InitialParametersTimelineDependency dict containing module name, + type, platform, and other configuration options. gitRepo: GitRepo instance managing the repository. progress: Progress bar instance. first_commit: First commit hash where the given integration was introduced (only for integrations). From 01e272aed4d6335936e484f6598df9ae34fbf0d8 Mon Sep 17 00:00:00 2001 From: Lucia Sanchez Bella Date: Mon, 19 May 2025 12:47:40 +0200 Subject: [PATCH 68/70] Fix a comment --- ddev/src/ddev/cli/size/diff.py | 10 +++++++--- 1 file changed, 7 insertions(+), 3 deletions(-) diff --git a/ddev/src/ddev/cli/size/diff.py b/ddev/src/ddev/cli/size/diff.py index e060feb9e7a31..479e89e78742a 100644 --- a/ddev/src/ddev/cli/size/diff.py +++ b/ddev/src/ddev/cli/size/diff.py @@ -81,12 +81,16 @@ def diff( if sum([csv, markdown, json]) > 1: raise click.BadParameter("Only one output format can be selected: --csv, --markdown, or --json") if len(first_commit) < MINIMUM_LENGTH_COMMIT and len(second_commit) < MINIMUM_LENGTH_COMMIT: - raise click.BadParameter("Commit hashes must be at least 7 characters long") + raise click.BadParameter(f"Commit hashes must be at least {MINIMUM_LENGTH_COMMIT} characters long") elif len(first_commit) < MINIMUM_LENGTH_COMMIT: - raise click.BadParameter("First commit hash must be at least 7 characters long.", param_hint="first_commit") + raise click.BadParameter( + f"First commit hash must be at least {MINIMUM_LENGTH_COMMIT} characters long.", + param_hint="first_commit", + ) elif len(second_commit) < MINIMUM_LENGTH_COMMIT: raise click.BadParameter( - "Second commit hash must be at least 7 characters long.", param_hint="second_commit" + f"Second commit hash must be at least {MINIMUM_LENGTH_COMMIT} characters long.", + param_hint="second_commit", ) if first_commit == second_commit: raise click.BadParameter("Commit hashes must be different") From 1b58523a709ba35918f7c832b14357a9024bddd9 Mon Sep 17 00:00:00 2001 From: Lucia Sanchez Bella Date: Mon, 19 May 2025 14:27:11 +0200 Subject: [PATCH 69/70] Rerun checks --- ddev/src/ddev/cli/size/common.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/ddev/src/ddev/cli/size/common.py b/ddev/src/ddev/cli/size/common.py index 6458186129093..b56f36a7f0f39 100644 --- a/ddev/src/ddev/cli/size/common.py +++ b/ddev/src/ddev/cli/size/common.py @@ -693,7 +693,7 @@ def get_module_commits( time: Optional time filter (e.g. '2 weeks ago'). Returns: - List of commit SHAs (oldest to newest). + List of commit SHAs (oldest to newest) """ self._run("git fetch origin --quiet") self._run("git checkout origin/HEAD") From 61a70b10caaf9d1ce47d924a31511d46bc8ec98f Mon Sep 17 00:00:00 2001 From: Lucia Sanchez Bella Date: Wed, 21 May 2025 12:49:58 +0200 Subject: [PATCH 70/70] Fix versions --- ddev/src/ddev/cli/size/common.py | 4 +++- ddev/src/ddev/cli/size/timeline.py | 2 +- ddev/tests/size/test_common.py | 7 +++++-- ddev/tests/size/test_timeline.py | 6 +++--- 4 files changed, 12 insertions(+), 7 deletions(-) diff --git a/ddev/src/ddev/cli/size/common.py b/ddev/src/ddev/cli/size/common.py index b56f36a7f0f39..74408f50115e9 100644 --- a/ddev/src/ddev/cli/size/common.py +++ b/ddev/src/ddev/cli/size/common.py @@ -255,9 +255,11 @@ def get_dependencies_list(file_path: str) -> tuple[list[str], list[str], list[st deps.append(name) download_urls.append(url) - version_match = re.search(rf"{re.escape(name)}-([0-9]+(?:\.[0-9]+)*)-", url) + version_match = re.search(rf"{re.escape(name)}/[^/]+?-([0-9]+(?:\.[0-9]+)*)-", url) if version_match: versions.append(version_match.group(1)) + else: + versions.append("") return deps, download_urls, versions diff --git a/ddev/src/ddev/cli/size/timeline.py b/ddev/src/ddev/cli/size/timeline.py index 7b10d9b981848..caec7c3efd992 100644 --- a/ddev/src/ddev/cli/size/timeline.py +++ b/ddev/src/ddev/cli/size/timeline.py @@ -610,7 +610,7 @@ def get_dependency_data(file_path: str, module: str) -> tuple[Optional[str], Opt raise WrongDependencyFormat("The dependency format 'name @ link' is no longer supported.") name, url = match.groups() if name == module: - version_match = re.search(rf"{re.escape(name)}-([0-9]+(?:\.[0-9]+)*)-", url) + version_match = re.search(rf"{re.escape(name)}/[^/]+?-([0-9]+(?:\.[0-9]+)*)-", url) version = version_match.group(1) if version_match else "" return url, version return None, None diff --git a/ddev/tests/size/test_common.py b/ddev/tests/size/test_common.py index b8ae30ba35cc1..00469e80f2ec8 100644 --- a/ddev/tests/size/test_common.py +++ b/ddev/tests/size/test_common.py @@ -96,12 +96,15 @@ def test_is_valid_integration(): def test_get_dependencies_list(): - file_content = "dependency1 @ https://example.com/dependency1-1.1.1-.whl\ndependency2 @ https://example.com/dependency2-1.1.1-.whl" + file_content = "dependency1 @ https://example.com/dependency1/dependency1-1.1.1-.whl\ndependency2 @ https://example.com/dependency2/dependency2-1.1.1-.whl" mock_open_obj = mock_open(read_data=file_content) with patch("builtins.open", mock_open_obj): deps, urls, versions = get_dependencies_list("fake_path") assert deps == ["dependency1", "dependency2"] - assert urls == ["https://example.com/dependency1-1.1.1-.whl", "https://example.com/dependency2-1.1.1-.whl"] + assert urls == [ + "https://example.com/dependency1/dependency1-1.1.1-.whl", + "https://example.com/dependency2/dependency2-1.1.1-.whl", + ] assert versions == ["1.1.1", "1.1.1"] diff --git a/ddev/tests/size/test_timeline.py b/ddev/tests/size/test_timeline.py index ea6f06a363514..331a9c62a0c04 100644 --- a/ddev/tests/size/test_timeline.py +++ b/ddev/tests/size/test_timeline.py @@ -99,11 +99,11 @@ def test_trim_modules_keep_some_remove_some(): def test_get_dependency(): - content = """dep1 @ https://example.com/dep1-1.1.1-.whl -dep2 @ https://example.com/dep2-1.1.2-.whl""" + content = """dep1 @ https://example.com/dep1/dep1-1.1.1-.whl +dep2 @ https://example.com/dep2/dep2-1.1.2-.whl""" with patch("builtins.open", mock_open(read_data=content)): url, version = get_dependency_data(Path("some") / "path" / "file.txt", "dep2") - assert (url, version) == ("https://example.com/dep2-1.1.2-.whl", "1.1.2") + assert (url, version) == ("https://example.com/dep2/dep2-1.1.2-.whl", "1.1.2") def make_mock_response(size):