diff --git a/.cursor/rules/overview.mdc b/.cursor/rules/overview.mdc index a417a59..0fe9e04 100644 --- a/.cursor/rules/overview.mdc +++ b/.cursor/rules/overview.mdc @@ -18,6 +18,8 @@ There is an existing project in `legacy/ios/sizeAnalysis/`. THIS IS CONSIDERED L The new project will be purely in python. Create a project structure that you think makes sense following the Sentry python guidelines and modern python practices. Keep in mind that there will be both iOS and Android size analysis projects, so consider that when organizing code and what might be shared between them. +The iOS analysis code ONLY has to work with `.xcarchive.zip` files as input. + # Python rules For the Python code make sure to follow all of Sentry's best practices, as well as modern Python best practices. Try to use types as much as possible. If standard repo setup is not present, feel free to configure it and add it to the repo since this is currently a bare setup. @@ -28,4 +30,6 @@ For the Mach-O handling, use the `lief` library and follow best practices for th # Testing -Included is a `test/artifacts` directory which contains sample "clean room" apps that can be used for writing integration tests and validating the output of this tool. Always write new tests to validate behavior and functionality. Prefer to write integration tests using the sample apps instead of writing smaller unit tests or using mocks. \ No newline at end of file +Included is a `test/artifacts` directory which contains sample "clean room" apps that can be used for writing integration tests and validating the output of this tool. Always write new tests to validate behavior and functionality. Prefer to write integration tests using the sample apps instead of writing smaller unit tests or using mocks. + +Make sure to write tests using `pytest`. \ No newline at end of file diff --git a/.gitignore b/.gitignore index 6dc5c26..7762583 100644 --- a/.gitignore +++ b/.gitignore @@ -1,4 +1,34 @@ -*.egg-info -*.pyc -__pycache__\ +# Python *.pyc +__pycache__/ +*.egg-info/ +dist/ +build/ +.pytest_cache/ +.coverage +htmlcov/ + +# Virtual environments +venv/ +.venv/ +env/ + +# IDE +.vscode/ +.idea/ +*.swp +*.swo + +# OS +.DS_Store +Thumbs.db + +# Temporary files +*.tmp +*.temp +.temporary/ + +ios-analysis-report.json +android-analysis-report.json + + diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml new file mode 100644 index 0000000..08ce04e --- /dev/null +++ b/.pre-commit-config.yaml @@ -0,0 +1,36 @@ +repos: + - repo: https://github.com/pre-commit/pre-commit-hooks + rev: v4.4.0 + hooks: + - id: trailing-whitespace + - id: end-of-file-fixer + - id: check-yaml + - id: check-added-large-files + - id: check-merge-conflict + - id: debug-statements + + - repo: https://github.com/psf/black + rev: 23.9.1 + hooks: + - id: black + language_version: python3.11 + args: [--line-length=100] + + - repo: https://github.com/pycqa/isort + rev: 5.12.0 + hooks: + - id: isort + args: [--profile=black, --line-length=100] + + - repo: https://github.com/pycqa/flake8 + rev: 6.1.0 + hooks: + - id: flake8 + args: [--max-line-length=100, --extend-ignore=E203,W503] + + - repo: https://github.com/pre-commit/mirrors-mypy + rev: v1.5.1 + hooks: + - id: mypy + additional_dependencies: [types-all] + args: [--strict, --ignore-missing-imports] \ No newline at end of file diff --git a/README.md b/README.md new file mode 100644 index 0000000..05b233c --- /dev/null +++ b/README.md @@ -0,0 +1,100 @@ +# App Size Analyzer + +A CLI tool for analyzing iOS and Android app bundle sizes, providing detailed insights into file composition, binary structure, and optimization opportunities. + +## Installation + +### From Source + +```bash +# Clone the repository +git clone +cd app-size-analyzer + +# Install in development mode +pip install -e ".[dev]" +``` + +## Quick Start + +### Analyze an iOS App + +```bash +# Analyze a bundle +app-size-analyzer ios MyApp.xcarchive.zip + +# Analyze a bundle with custom output location +app-size-analyzer ios MyApp.xcarchive.zip -o detailed-report.json + +# Skip expensive operations for faster analysis +app-size-analyzer ios MyApp.xcarchive.zip --skip-swift-metadata --skip-symbols +``` + +### Command Line Options + +```bash +app-size-analyzer ios [OPTIONS] INPUT_PATH + +Options: + -o, --output PATH Output path for JSON report [default: analysis-report.json] + --working-dir PATH Working directory for temporary files + --platform [ios|android] Target platform (auto-detected if not specified) + --skip-swift-metadata Skip Swift metadata parsing + --skip-symbols Skip symbol extraction + --format [json|table] Output format [default: json] + -v, --verbose Enable verbose logging + -q, --quiet Suppress all output except errors + --help Show this message and exit +``` + +## Development + +### Setup + +```bash +# Clone and setup development environment +git clone +cd app-size-analyzer +pip install -e ".[dev]" + +# Install pre-commit hooks +pre-commit install +``` + +### Code Quality + +This project uses several tools to maintain code quality: + +- **Black**: Code formatting +- **isort**: Import sorting +- **mypy**: Static type checking +- **flake8**: Linting +- **pytest**: Testing + +Run all checks: + +```bash +# Format code +black src tests +isort src tests + +# Type checking +mypy src + +# Linting +flake8 src tests + +# Tests +pytest +``` + +### Testing + +```bash +# Run all tests +pytest + +# Run specific test categories +pytest tests/unit/ +pytest tests/integration/ +``` \ No newline at end of file diff --git a/pyproject.toml b/pyproject.toml index e69de29..2a7284c 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -0,0 +1,102 @@ +[build-system] +requires = ["hatchling"] +build-backend = "hatchling.build" + +[project] +name = "app-size-analyzer" +version = "1.0.0" +description = "CLI tool for analyzing iOS and Android app bundle sizes" +readme = "README.md" +license = {file = "LICENSE"} +authors = [ + {name = "Sentry Team", email = "engineering@sentry.io"}, +] +classifiers = [ + "Development Status :: 4 - Beta", + "Environment :: Console", + "Intended Audience :: Developers", + "License :: OSI Approved :: MIT License", + "Operating System :: MacOS", + "Operating System :: POSIX :: Linux", + "Programming Language :: Python :: 3", + "Programming Language :: Python :: 3.11", + "Programming Language :: Python :: 3.12", + "Topic :: Software Development :: Build Tools", + "Topic :: System :: Archiving", +] +requires-python = ">=3.11" +dependencies = [ + "click>=8.1.0", + "lief>=0.14.0", + "pydantic>=2.0.0", + "rich>=13.0.0", + "typing-extensions>=4.8.0", +] + +[project.optional-dependencies] +dev = [ + "pytest>=7.4.0", + "pytest-mock>=3.11.0", + "black>=23.0.0", + "isort>=5.12.0", + "flake8>=6.0.0", + "mypy>=1.5.0", + "pre-commit>=3.4.0", +] + +[project.scripts] +app-size-analyzer = "app_size_analyzer.cli:main" + +[project.urls] +Repository = "https://github.com/getsentry/app-size-analyzer" +Issues = "https://github.com/getsentry/app-size-analyzer/issues" + +[tool.hatch.build.targets.wheel] +packages = ["src/app_size_analyzer"] + +[tool.hatch.build.targets.sdist] +include = [ + "/src", + "/tests", + "/README.md", + "/LICENSE", +] + +[tool.black] +line-length = 100 +target-version = ["py311"] +include = '\\.pyi?$' + +[tool.isort] +profile = "black" +line_length = 100 +known_first_party = ["app_size_analyzer"] + +[tool.mypy] +python_version = "3.11" +strict = true +warn_return_any = true +warn_unused_configs = true +disallow_untyped_defs = true +disallow_incomplete_defs = true +check_untyped_defs = true +disallow_untyped_decorators = true +no_implicit_optional = true +warn_redundant_casts = true +warn_unused_ignores = true +warn_no_return = true +warn_unreachable = true + +[[tool.mypy.overrides]] +module = ["lief.*"] +ignore_missing_imports = true + +[tool.pytest.ini_options] +testpaths = ["tests"] +python_files = "test_*.py" +python_classes = "Test*" +python_functions = "test_*" +addopts = [ + "--strict-markers", + "--strict-config", +] \ No newline at end of file diff --git a/requirements-dev.txt b/requirements-dev.txt deleted file mode 100644 index e69de29..0000000 diff --git a/src/app_size_analyzer/__init__.py b/src/app_size_analyzer/__init__.py new file mode 100644 index 0000000..a73844c --- /dev/null +++ b/src/app_size_analyzer/__init__.py @@ -0,0 +1,12 @@ +"""App Size Analyzer - CLI tool for analyzing iOS and Android app bundle sizes.""" + +__version__ = "0.0.1" + +from .models import AnalysisResults, AppInfo, FileAnalysis, BinaryAnalysis + +__all__ = [ + "AnalysisResults", + "AppInfo", + "FileAnalysis", + "BinaryAnalysis", +] diff --git a/src/app_size_analyzer/analyzers/__init__.py b/src/app_size_analyzer/analyzers/__init__.py new file mode 100644 index 0000000..af86bb4 --- /dev/null +++ b/src/app_size_analyzer/analyzers/__init__.py @@ -0,0 +1,7 @@ +"""Platform-specific analyzers for app bundles.""" + +from .ios import IOSAnalyzer + +__all__ = [ + "IOSAnalyzer", +] \ No newline at end of file diff --git a/src/app_size_analyzer/analyzers/ios.py b/src/app_size_analyzer/analyzers/ios.py new file mode 100644 index 0000000..2388270 --- /dev/null +++ b/src/app_size_analyzer/analyzers/ios.py @@ -0,0 +1,422 @@ +"""iOS app bundle analyzer using LIEF for Mach-O parsing.""" + +from __future__ import annotations + +import plistlib +from collections import defaultdict +from pathlib import Path +from typing import Dict, List, Optional, Set + +import lief + +from ..models import ( + AnalysisResults, + AppInfo, + BinaryAnalysis, + DuplicateFileGroup, + FileAnalysis, + FileInfo, + SwiftMetadata, + SymbolInfo, +) +from ..utils.file_utils import ( + calculate_file_hash, + cleanup_directory, + create_temp_directory, + extract_archive, + find_app_bundle, + get_file_size, +) +from ..utils.logging import get_logger + +logger = get_logger(__name__) + + +class IOSAnalyzer: + """Analyzer for iOS app bundles (.xcarchive directories).""" + + def __init__( + self, + working_dir: Optional[Path] = None, + skip_swift_metadata: bool = False, + skip_symbols: bool = False, + ) -> None: + """Initialize the iOS analyzer. + + Args: + working_dir: Directory for temporary files (None for system temp) + skip_swift_metadata: Skip Swift metadata extraction for faster analysis + skip_symbols: Skip symbol extraction for faster analysis + """ + self.working_dir = working_dir + self.skip_swift_metadata = skip_swift_metadata + self.skip_symbols = skip_symbols + self._temp_dirs: List[Path] = [] + + def analyze(self, input_path: Path) -> AnalysisResults: + """Analyze an iOS app bundle. + + Args: + input_path: Path to zip archive + + Returns: + Complete analysis results + + Raises: + ValueError: If input is not a valid iOS app bundle + RuntimeError: If analysis fails + """ + logger.info(f"Starting iOS analysis of {input_path}") + + try: + # Prepare app bundle for analysis + app_bundle_path = self._prepare_app_bundle(input_path) + + # Extract basic app information + app_info = self._extract_app_info(app_bundle_path) + logger.info(f"Analyzing app: {app_info.name} v{app_info.version}") + + # Analyze files in the bundle + file_analysis = self._analyze_files(app_bundle_path) + logger.info( + f"Found {file_analysis.file_count} files, total size: {file_analysis.total_size} bytes" + ) + + # Analyze the main executable binary + binary_analysis = self._analyze_binary(app_bundle_path, app_info.executable) + logger.info( + f"Binary analysis complete, executable size: {binary_analysis.executable_size} bytes" + ) + + return AnalysisResults( + app_info=app_info, + file_analysis=file_analysis, + binary_analysis=binary_analysis, + ) + + finally: + self._cleanup() + + def _prepare_app_bundle(self, input_path: Path) -> Path: + """Prepare the app bundle for analysis, extracting if necessary. + + Args: + input_path: Input path (could be .app, .ipa, or .zip) + + Returns: + Path to the .app bundle directory + """ + + logger.debug("Extracting archive to temporary directory") + temp_dir = create_temp_directory("ios-analysis-") + self._temp_dirs.append(temp_dir) + + extract_archive(input_path, temp_dir) + return find_app_bundle(temp_dir, platform="ios") + + def _extract_app_info(self, app_bundle_path: Path) -> AppInfo: + """Extract basic app information from Info.plist. + + Args: + app_bundle_path: Path to the .app bundle + + Returns: + App information + + Raises: + RuntimeError: If Info.plist cannot be read + """ + info_plist_path = app_bundle_path / "Info.plist" + + if not info_plist_path.exists(): + raise RuntimeError(f"Info.plist not found in {app_bundle_path}") + + try: + with open(info_plist_path, "rb") as f: + plist_data = plistlib.load(f) + + return AppInfo( + name=plist_data.get("CFBundleDisplayName") + or plist_data.get("CFBundleName", "Unknown"), + bundle_id=plist_data.get("CFBundleIdentifier", "unknown.bundle.id"), + version=plist_data.get("CFBundleShortVersionString", "Unknown"), + build=plist_data.get("CFBundleVersion", "Unknown"), + executable=plist_data.get("CFBundleExecutable", "Unknown"), + minimum_os_version=plist_data.get("MinimumOSVersion", "Unknown"), + supported_platforms=plist_data.get("CFBundleSupportedPlatforms", []), + sdk_version=plist_data.get("DTSDKName"), + ) + + except Exception as e: + raise RuntimeError(f"Failed to parse Info.plist: {e}") + + def _analyze_files(self, app_bundle_path: Path) -> FileAnalysis: + """Analyze all files in the app bundle. + + Args: + app_bundle_path: Path to the .app bundle + + Returns: + File analysis results + """ + logger.debug("Analyzing files in app bundle") + + files: List[FileInfo] = [] + files_by_type: Dict[str, List[FileInfo]] = defaultdict(list) + files_by_hash: Dict[str, List[FileInfo]] = defaultdict(list) + total_size = 0 + + # Walk through all files in the bundle + for file_path in app_bundle_path.rglob("*"): + if not file_path.is_file(): + continue + + relative_path = file_path.relative_to(app_bundle_path) + file_size = get_file_size(file_path) + file_type = file_path.suffix.lower().lstrip(".") + + # Calculate hash for duplicate detection + file_hash = calculate_file_hash(file_path, algorithm="md5") + + file_info = FileInfo( + path=str(relative_path), + size=file_size, + file_type=file_type or "unknown", + hash_md5=file_hash, + ) + + files.append(file_info) + files_by_type[file_info.file_type].append(file_info) + files_by_hash[file_hash].append(file_info) + total_size += file_size + + # Find duplicate files + duplicate_groups = [] + for file_hash, file_list in files_by_hash.items(): + if len(file_list) > 1: + # Calculate potential savings (all files except one) + total_file_size = sum(f.size for f in file_list) + savings = total_file_size - file_list[0].size + + if savings > 0: # Only include if there are actual savings + duplicate_groups.append( + DuplicateFileGroup( + files=file_list, + potential_savings=savings, + ) + ) + + # Sort files by size for largest files list + largest_files = sorted(files, key=lambda f: f.size, reverse=True)[:20] + + # Sort duplicate groups by potential savings + duplicate_groups.sort(key=lambda g: g.potential_savings, reverse=True) + + return FileAnalysis( + total_size=total_size, + file_count=len(files), + files_by_type=dict(files_by_type), + duplicate_files=duplicate_groups, + largest_files=largest_files, + ) + + def _analyze_binary(self, app_bundle_path: Path, executable_name: str) -> BinaryAnalysis: + """Analyze the main executable binary using LIEF. + + Args: + app_bundle_path: Path to the .app bundle + executable_name: Name of the main executable + + Returns: + Binary analysis results + """ + executable_path = app_bundle_path / executable_name + + if not executable_path.exists(): + logger.warning(f"Executable not found: {executable_path}") + return BinaryAnalysis( + executable_size=0, + architectures=[], + linked_libraries=[], + symbols=[], + swift_metadata=None, + sections={}, + ) + + logger.debug(f"Analyzing binary: {executable_path}") + + try: + binary = lief.parse(str(executable_path)) + + if binary is None: + raise RuntimeError("Failed to parse binary with LIEF") + + executable_size = get_file_size(executable_path) + + architectures = self._extract_architectures(binary) + linked_libraries = self._extract_linked_libraries(binary) + sections = self._extract_sections(binary) + + # Extract symbols if requested + symbols = [] + if not self.skip_symbols: + symbols = self._extract_symbols(binary) + + # Extract Swift metadata if requested + swift_metadata = None + if not self.skip_swift_metadata: + swift_metadata = self._extract_swift_metadata(binary) + + return BinaryAnalysis( + executable_size=executable_size, + architectures=architectures, + linked_libraries=linked_libraries, + symbols=symbols, + swift_metadata=swift_metadata, + sections=sections, + ) + + except Exception as e: + logger.error(f"Failed to analyze binary: {e}") + return BinaryAnalysis( + executable_size=get_file_size(executable_path), + architectures=[], + linked_libraries=[], + symbols=[], + swift_metadata=None, + sections={}, + ) + + def _extract_architectures(self, binary: lief.Binary) -> List[str]: + """Extract CPU architectures from the binary.""" + architectures = [] + + if hasattr(binary, "header") and hasattr(binary.header, "cpu_type"): + # Single architecture binary + arch = self._cpu_type_to_string(binary.header.cpu_type) + if arch: + architectures.append(arch) + elif hasattr(binary, "fat_binaries"): + # Fat binary with multiple architectures + for fat_binary in binary.fat_binaries: + arch = self._cpu_type_to_string(fat_binary.header.cpu_type) + if arch: + architectures.append(arch) + + return architectures or ["unknown"] + + def _cpu_type_to_string(self, cpu_type: int) -> Optional[str]: + """Convert LIEF CPU type to string representation.""" + # Common CPU types from Mach-O + cpu_types = { + 0x0000000C: "arm", # ARM + 0x0100000C: "arm64", # ARM64 + 0x00000007: "x86", # i386 + 0x01000007: "x86_64", # x86_64 + } + return cpu_types.get(cpu_type) + + def _extract_linked_libraries(self, binary: lief.Binary) -> List[str]: + """Extract linked dynamic libraries from the binary.""" + libraries = [] + + if hasattr(binary, "libraries"): + for lib in binary.libraries: + if hasattr(lib, "name"): + libraries.append(lib.name) + + return libraries + + def _extract_sections(self, binary: lief.Binary) -> Dict[str, int]: + """Extract binary sections and their sizes.""" + sections = {} + + if hasattr(binary, "sections"): + for section in binary.sections: + section_name = getattr(section, "name", "unknown") + section_size = getattr(section, "size", 0) + sections[section_name] = section_size + + return sections + + def _extract_symbols(self, binary: lief.Binary) -> List[SymbolInfo]: + """Extract symbol information from the binary.""" + symbols = [] + + if not hasattr(binary, "symbols"): + return symbols + + for symbol in binary.symbols: + try: + symbol_name = getattr(symbol, "name", "unknown") + symbol_size = getattr(symbol, "size", 0) + symbol_type = getattr(symbol, "type", lief.MachO.SYMBOL_TYPES.UNDEFINED) + + # Try to determine the section + section_name = "unknown" + if hasattr(symbol, "numberof_sections") and symbol.numberof_sections > 0: + if hasattr(binary, "sections") and len(binary.sections) > 0: + section_index = min(symbol.numberof_sections - 1, len(binary.sections) - 1) + section = binary.sections[section_index] + section_name = getattr(section, "name", "unknown") + + symbols.append( + SymbolInfo( + name=symbol_name, + mangled_name=symbol_name, # LIEF doesn't demangle automatically + size=symbol_size, + section=section_name, + symbol_type=str(symbol_type), + ) + ) + + except Exception as e: + logger.debug(f"Failed to process symbol: {e}") + continue + + # Sort symbols by size (largest first) + symbols.sort(key=lambda s: s.size, reverse=True) + return symbols[:1000] # Limit to top 1000 symbols to avoid huge outputs + + def _extract_swift_metadata(self, binary: lief.Binary) -> Optional[SwiftMetadata]: + """Extract Swift-specific metadata from the binary. + + This is a simplified implementation. A full implementation would + parse Swift metadata sections more thoroughly. + """ + try: + # Look for Swift-related sections + swift_sections = [] + if hasattr(binary, "sections"): + for section in binary.sections: + section_name = getattr(section, "name", "") + if "swift" in section_name.lower(): + swift_sections.append(section) + + if not swift_sections: + return None + + # Calculate total Swift metadata size + total_metadata_size = sum(getattr(section, "size", 0) for section in swift_sections) + + # For now, return basic metadata + # In a full implementation, you would parse the actual Swift metadata structures + return SwiftMetadata( + classes=[], # Would be extracted from __swift5_types section + protocols=[], # Would be extracted from __swift5_protos section + extensions=[], # Would be extracted from various Swift sections + total_metadata_size=total_metadata_size, + ) + + except Exception as e: + logger.debug(f"Failed to extract Swift metadata: {e}") + return None + + def _cleanup(self) -> None: + """Clean up temporary directories.""" + for temp_dir in self._temp_dirs: + try: + cleanup_directory(temp_dir) + except Exception as e: + logger.warning(f"Failed to cleanup {temp_dir}: {e}") + self._temp_dirs.clear() diff --git a/src/app_size_analyzer/cli.py b/src/app_size_analyzer/cli.py new file mode 100644 index 0000000..40e2797 --- /dev/null +++ b/src/app_size_analyzer/cli.py @@ -0,0 +1,273 @@ +"""Command-line interface for app size analyzer.""" + +from __future__ import annotations + +import json +import time +from pathlib import Path +from typing import Optional + +import click +from rich.console import Console +from rich.progress import Progress, SpinnerColumn, TextColumn +from rich.table import Table + +from . import __version__ +from .analyzers.ios import IOSAnalyzer +from .models import AnalysisResults +from .utils.logging import setup_logging + + +console = Console() + + +@click.group(invoke_without_command=True) +@click.option("--version", is_flag=True, help="Show version information and exit.") +@click.pass_context +def cli(ctx: click.Context, version: bool) -> None: + """App Size Analyzer - Analyze iOS and Android app bundle sizes.""" + if version: + click.echo(f"App Size Analyzer v{__version__}") + ctx.exit() + + if ctx.invoked_subcommand is None: + click.echo(ctx.get_help()) + + +@cli.command() +@click.argument("input_path", type=click.Path(exists=True, path_type=Path), metavar="INPUT_PATH") +@click.option( + "-o", + "--output", + type=click.Path(path_type=Path), + default="ios-analysis-report.json", + help="Output path for the JSON analysis report.", + show_default=True, +) +@click.option( + "--working-dir", + type=click.Path(path_type=Path), + help="Working directory for temporary files (default: system temp).", +) +@click.option( + "--skip-swift-metadata", is_flag=True, help="Skip Swift metadata parsing for faster analysis." +) +@click.option("--skip-symbols", is_flag=True, help="Skip symbol extraction and analysis.") +@click.option("--verbose", "-v", is_flag=True, help="Enable verbose logging output.") +@click.option("--quiet", "-q", is_flag=True, help="Suppress all output except errors.") +@click.option( + "--format", + "output_format", + type=click.Choice(["json", "table"], case_sensitive=False), + default="json", + help="Output format for results.", + show_default=True, +) +def ios( + input_path: Path, + output: Path, + working_dir: Optional[Path], + skip_swift_metadata: bool, + skip_symbols: bool, + verbose: bool, + quiet: bool, + output_format: str, +) -> None: + """Analyze an iOS app bundle and generate a size report. + + INPUT_PATH can be: + - .xcarchive.zip file + """ + setup_logging(verbose=verbose, quiet=quiet) + + if verbose and quiet: + raise click.UsageError("Cannot specify both --verbose and --quiet") + + _validate_ios_input(input_path) + + if not quiet: + console.print(f"[bold blue]App Size Analyzer v{__version__}[/bold blue]") + console.print(f"Analyzing iOS app: [cyan]{input_path}[/cyan]") + console.print(f"Output: [cyan]{output}[/cyan]") + console.print() + + try: + start_time = time.time() + + with Progress( + SpinnerColumn(), + TextColumn("[progress.description]{task.description}"), + console=console, + disable=quiet, + ) as progress: + task = progress.add_task("Analyzing iOS app bundle...", total=None) + + analyzer = IOSAnalyzer( + working_dir=working_dir, + skip_swift_metadata=skip_swift_metadata, + skip_symbols=skip_symbols, + ) + results = analyzer.analyze(input_path) + + progress.update(task, description="Analysis complete!") + + end_time = time.time() + duration = end_time - start_time + + results = results.model_copy(update={"analysis_duration": duration}) + + if output_format == "json": + _write_json_output(results, output, quiet) + else: + _print_table_output(results, quiet) + + if not quiet: + console.print(f"\n[bold green]✓[/bold green] Analysis completed in {duration:.2f}s") + _print_summary(results) + + except Exception as e: + if verbose: + console.print_exception() + else: + console.print(f"[bold red]Error:[/bold red] {e}") + raise click.Abort() + + +@cli.command() +@click.argument("input_path", type=click.Path(exists=True, path_type=Path), metavar="INPUT_PATH") +@click.option( + "-o", + "--output", + type=click.Path(path_type=Path), + default="android-analysis-report.json", + help="Output path for the JSON analysis report.", + show_default=True, +) +def android(input_path: Path, output: Path) -> None: + """Analyze an Android app bundle and generate a size report. + + INPUT_PATH can be: + - Android .apk file + - Android .aab file + + [Coming Soon - Android analysis is not yet implemented] + """ + console.print("[bold red]Android analysis is not yet implemented.[/bold red]") + console.print("This feature is coming soon!") + raise click.Abort() + + +def _validate_ios_input(input_path: Path) -> None: + """Validate that the input path looks like an iOS artifact.""" + suffix = input_path.suffix.lower() + valid_extensions = {".zip"} + + if suffix not in valid_extensions: + raise click.BadParameter( + f"'{input_path}' doesn't look like a typical iOS artifact. " + f"Expected one of: {', '.join(sorted(valid_extensions))}" + ) + + +def _write_json_output(results: AnalysisResults, output_path: Path, quiet: bool) -> None: + """Write results to JSON file.""" + # Ensure output directory exists + output_path.parent.mkdir(parents=True, exist_ok=True) + + # Write JSON with proper formatting + with open(output_path, "w", encoding="utf-8") as f: + json.dump(results.to_dict(), f, indent=2, ensure_ascii=False) + + if not quiet: + console.print(f"[bold green]✓[/bold green] Results written to: [cyan]{output_path}[/cyan]") + + +def _print_table_output(results: AnalysisResults, quiet: bool) -> None: + """Print results in table format to console.""" + if quiet: + return + + # App Info Table + app_table = Table(title="App Information", show_header=True, header_style="bold magenta") + app_table.add_column("Property", style="cyan") + app_table.add_column("Value", style="white") + + app_info = results.app_info + app_table.add_row("Name", app_info.name) + app_table.add_row("Bundle ID", app_info.bundle_id) + app_table.add_row("Version", f"{app_info.version} ({app_info.build})") + app_table.add_row("Min OS", app_info.minimum_os_version) + app_table.add_row("Platforms", ", ".join(app_info.supported_platforms)) + + console.print(app_table) + console.print() + + # File Analysis Table + file_table = Table(title="File Analysis", show_header=True, header_style="bold green") + file_table.add_column("Metric", style="cyan") + file_table.add_column("Value", style="white") + + file_analysis = results.file_analysis + file_table.add_row("Total Size", _format_bytes(file_analysis.total_size)) + file_table.add_row("File Count", str(file_analysis.file_count)) + file_table.add_row("Duplicate Files", str(len(file_analysis.duplicate_files))) + file_table.add_row("Potential Savings", _format_bytes(file_analysis.total_duplicate_savings)) + + console.print(file_table) + console.print() + + # File Types Table + if file_analysis.file_type_sizes: + type_table = Table(title="File Types", show_header=True, header_style="bold yellow") + type_table.add_column("Type", style="cyan") + type_table.add_column("Size", style="white") + type_table.add_column("Percentage", style="green") + + total_size = file_analysis.total_size + for file_type, size in sorted( + file_analysis.file_type_sizes.items(), key=lambda x: x[1], reverse=True + )[ + :10 + ]: # Top 10 file types + percentage = (size / total_size) * 100 if total_size > 0 else 0 + type_table.add_row(file_type or "unknown", _format_bytes(size), f"{percentage:.1f}%") + + console.print(type_table) + + +def _print_summary(results: AnalysisResults) -> None: + """Print a brief summary of the analysis.""" + file_analysis = results.file_analysis + binary_analysis = results.binary_analysis + + console.print("\n[bold]Summary:[/bold]") + console.print(f"• Total app size: [cyan]{_format_bytes(file_analysis.total_size)}[/cyan]") + console.print( + f"• Executable size: [cyan]{_format_bytes(binary_analysis.executable_size)}[/cyan]" + ) + console.print(f"• File count: [cyan]{file_analysis.file_count:,}[/cyan]") + console.print(f"• Architectures: [cyan]{', '.join(binary_analysis.architectures)}[/cyan]") + + if file_analysis.duplicate_files: + console.print( + f"• Potential savings from duplicates: " + f"[yellow]{_format_bytes(file_analysis.total_duplicate_savings)}[/yellow]" + ) + + +def _format_bytes(size: int) -> str: + """Format byte size in human-readable format.""" + for unit in ["B", "KB", "MB", "GB"]: + if size < 1024.0: + return f"{size:.1f} {unit}" + size /= 1024.0 + return f"{size:.1f} TB" + + +def main() -> None: + """Main entry point for the CLI.""" + cli() + + +if __name__ == "__main__": + main() diff --git a/src/app_size_analyzer/models/__init__.py b/src/app_size_analyzer/models/__init__.py new file mode 100644 index 0000000..0d71b15 --- /dev/null +++ b/src/app_size_analyzer/models/__init__.py @@ -0,0 +1,57 @@ +"""Data models for app size analysis results.""" + +# Import common models that are shared across platforms +from .common import ( + FileInfo, + DuplicateFileGroup, + SymbolInfo, + FileAnalysis, + BaseAppInfo, + BaseBinaryAnalysis, + BaseAnalysisResults, +) + +# Import iOS-specific models +from .ios import ( + SwiftMetadata, + IOSAppInfo, + IOSBinaryAnalysis, + IOSAnalysisResults, + # Backwards compatibility aliases + AppInfo, + BinaryAnalysis, + AnalysisResults, +) + +# Import Android models (placeholders for now) +from .android import ( + AndroidMetadata, + AndroidAppInfo, + AndroidBinaryAnalysis, + AndroidAnalysisResults, +) + +__all__ = [ + # Common models + "FileInfo", + "DuplicateFileGroup", + "SymbolInfo", + "FileAnalysis", + "BaseAppInfo", + "BaseBinaryAnalysis", + "BaseAnalysisResults", + # iOS-specific models + "SwiftMetadata", + "IOSAppInfo", + "IOSBinaryAnalysis", + "IOSAnalysisResults", + # Backwards compatibility aliases (to be deprecated) + "AppInfo", + "BinaryAnalysis", + "AnalysisResults", + # Android models (placeholder) + "AndroidMetadata", + "AndroidAppInfo", + "AndroidBinaryAnalysis", + "AndroidAnalysisResults", +] diff --git a/src/app_size_analyzer/models/android.py b/src/app_size_analyzer/models/android.py new file mode 100644 index 0000000..0761b71 --- /dev/null +++ b/src/app_size_analyzer/models/android.py @@ -0,0 +1,58 @@ +"""Android-specific models for analysis results (placeholder for future implementation).""" + +from __future__ import annotations + +from typing import List, Optional + +from pydantic import BaseModel, Field, ConfigDict + +from .common import BaseAppInfo, BaseBinaryAnalysis, BaseAnalysisResults + + +# TODO: Implement Android-specific models when Android support is added + + +class AndroidMetadata(BaseModel): + """Android-specific metadata extracted from the APK/AAB.""" + + model_config = ConfigDict(frozen=True) + + # Placeholder fields - to be implemented based on Android analysis needs + # permissions: List[str] = Field(default_factory=list, description="App permissions") + # activities: List[str] = Field(default_factory=list, description="Android activities") + # services: List[str] = Field(default_factory=list, description="Android services") + pass + + +class AndroidAppInfo(BaseAppInfo): + """Android-specific app information.""" + + model_config = ConfigDict(frozen=True) + + # Android-specific fields to be added: + # package_name: str = Field(..., description="Android package name") + # min_sdk_version: int = Field(..., description="Minimum SDK version") + # target_sdk_version: int = Field(..., description="Target SDK version") + pass + + +class AndroidBinaryAnalysis(BaseBinaryAnalysis): + """Android-specific binary analysis results.""" + + model_config = ConfigDict(frozen=True) + + # Android-specific fields to be added: + # dex_analysis: Optional[DexAnalysis] = Field(None, description="DEX file analysis") + # native_libraries: List[str] = Field(default_factory=list, description="Native .so libraries") + pass + + +class AndroidAnalysisResults(BaseAnalysisResults): + """Complete Android analysis results.""" + + model_config = ConfigDict(frozen=True) + + # To be uncommented when Android models are implemented: + # app_info: AndroidAppInfo = Field(..., description="Android app information") + # binary_analysis: AndroidBinaryAnalysis = Field(..., description="Android binary analysis results") + pass diff --git a/src/app_size_analyzer/models/common.py b/src/app_size_analyzer/models/common.py new file mode 100644 index 0000000..a59dd83 --- /dev/null +++ b/src/app_size_analyzer/models/common.py @@ -0,0 +1,131 @@ +"""Common models shared across platforms.""" + +from __future__ import annotations + +from datetime import datetime +from typing import Any, Dict, List, Optional + +from pydantic import BaseModel, Field, ConfigDict + + +class FileInfo(BaseModel): + """Information about a single file in the app bundle.""" + + model_config = ConfigDict(frozen=True) + + path: str = Field(..., description="Relative path within the bundle") + size: int = Field(..., ge=0, description="File size in bytes") + file_type: str = Field(..., description="File extension/type") + hash_md5: Optional[str] = Field(None, description="MD5 hash of file contents") + + +class DuplicateFileGroup(BaseModel): + """Group of duplicate files found in the bundle.""" + + model_config = ConfigDict(frozen=True) + + files: List[FileInfo] = Field(..., min_length=2, description="List of duplicate files") + potential_savings: int = Field(..., ge=0, description="Potential size savings in bytes") + + @property + def duplicate_count(self) -> int: + """Number of duplicate files (excluding the original).""" + return len(self.files) - 1 + + +class SymbolInfo(BaseModel): + """Information about a binary symbol.""" + + model_config = ConfigDict(frozen=True) + + name: str = Field(..., description="Symbol name") + mangled_name: Optional[str] = Field(None, description="Mangled symbol name") + size: int = Field(..., ge=0, description="Symbol size in bytes") + section: str = Field(..., description="Binary section containing the symbol") + symbol_type: str = Field(..., description="Type of symbol (function, data, etc.)") + + +class FileAnalysis(BaseModel): + """Analysis results for files in the app bundle.""" + + model_config = ConfigDict(frozen=True) + + total_size: int = Field(..., ge=0, description="Total bundle size in bytes") + file_count: int = Field(..., ge=0, description="Total number of files") + files_by_type: Dict[str, List[FileInfo]] = Field( + default_factory=dict, description="Files grouped by type/extension" + ) + duplicate_files: List[DuplicateFileGroup] = Field( + default_factory=list, description="Groups of duplicate files" + ) + largest_files: List[FileInfo] = Field( + default_factory=list, description="Largest files in the bundle" + ) + + @property + def total_duplicate_savings(self) -> int: + """Total potential savings from removing duplicates.""" + return sum(group.potential_savings for group in self.duplicate_files) + + @property + def file_type_sizes(self) -> Dict[str, int]: + """Total size by file type.""" + return { + file_type: sum(file.size for file in files) + for file_type, files in self.files_by_type.items() + } + + +class BaseAppInfo(BaseModel): + """Base app information that applies across platforms.""" + + model_config = ConfigDict(frozen=True) + + name: str = Field(..., description="App display name") + version: str = Field(..., description="App version") + build: str = Field(..., description="Build number") + executable: str = Field(..., description="Main executable name") + + +class BaseBinaryAnalysis(BaseModel): + """Base binary analysis that applies across platforms.""" + + model_config = ConfigDict(frozen=True) + + executable_size: int = Field(..., ge=0, description="Main executable size in bytes") + architectures: List[str] = Field(..., description="CPU architectures") + linked_libraries: List[str] = Field( + default_factory=list, description="Linked dynamic libraries" + ) + symbols: List[SymbolInfo] = Field(default_factory=list, description="Symbol information") + sections: Dict[str, int] = Field( + default_factory=dict, description="Binary sections and their sizes" + ) + + @property + def total_symbols_size(self) -> int: + """Total size of all symbols.""" + return sum(symbol.size for symbol in self.symbols) + + +class BaseAnalysisResults(BaseModel): + """Base analysis results structure.""" + + model_config = ConfigDict(frozen=True) + + file_analysis: FileAnalysis = Field(..., description="File-level analysis results") + generated_at: datetime = Field(default_factory=datetime.now, description="Analysis timestamp") + analysis_duration: Optional[float] = Field( + None, ge=0, description="Analysis duration in seconds" + ) + + @property + def total_size(self) -> int: + """Total app bundle size.""" + return self.file_analysis.total_size + + def to_dict(self) -> Dict[str, Any]: + """Convert to dictionary with serializable datetime.""" + data = self.model_dump() + data["generated_at"] = self.generated_at.isoformat() + return data diff --git a/src/app_size_analyzer/models/ios.py b/src/app_size_analyzer/models/ios.py new file mode 100644 index 0000000..751bbf4 --- /dev/null +++ b/src/app_size_analyzer/models/ios.py @@ -0,0 +1,54 @@ +"""iOS-specific models for analysis results.""" + +from __future__ import annotations + +from typing import List, Optional + +from pydantic import BaseModel, Field, ConfigDict + +from .common import BaseAppInfo, BaseBinaryAnalysis, BaseAnalysisResults + + +class SwiftMetadata(BaseModel): + """Swift-specific metadata extracted from the binary.""" + + model_config = ConfigDict(frozen=True) + + classes: List[str] = Field(default_factory=list, description="Swift class names") + protocols: List[str] = Field(default_factory=list, description="Swift protocol names") + extensions: List[str] = Field(default_factory=list, description="Swift extension names") + total_metadata_size: int = Field(default=0, ge=0, description="Total Swift metadata size") + + +class IOSAppInfo(BaseAppInfo): + """iOS-specific app information.""" + + model_config = ConfigDict(frozen=True) + + bundle_id: str = Field(..., description="Bundle identifier") + minimum_os_version: str = Field(..., description="Minimum iOS version") + supported_platforms: List[str] = Field(default_factory=list, description="Supported platforms") + sdk_version: Optional[str] = Field(None, description="iOS SDK version used for build") + + +class IOSBinaryAnalysis(BaseBinaryAnalysis): + """iOS-specific binary analysis results.""" + + model_config = ConfigDict(frozen=True) + + swift_metadata: Optional[SwiftMetadata] = Field(None, description="Swift-specific metadata") + + +class IOSAnalysisResults(BaseAnalysisResults): + """Complete iOS analysis results.""" + + model_config = ConfigDict(frozen=True) + + app_info: IOSAppInfo = Field(..., description="iOS app information") + binary_analysis: IOSBinaryAnalysis = Field(..., description="iOS binary analysis results") + + +# Backwards compatibility aliases - can be removed once all references are updated +AppInfo = IOSAppInfo +BinaryAnalysis = IOSBinaryAnalysis +AnalysisResults = IOSAnalysisResults diff --git a/src/app_size_analyzer/utils/__init__.py b/src/app_size_analyzer/utils/__init__.py new file mode 100644 index 0000000..68028ac --- /dev/null +++ b/src/app_size_analyzer/utils/__init__.py @@ -0,0 +1,11 @@ +"""Utility modules for app size analyzer.""" + +from .logging import setup_logging +from .file_utils import extract_archive, find_app_bundle, calculate_file_hash + +__all__ = [ + "setup_logging", + "extract_archive", + "find_app_bundle", + "calculate_file_hash", +] \ No newline at end of file diff --git a/src/app_size_analyzer/utils/file_utils.py b/src/app_size_analyzer/utils/file_utils.py new file mode 100644 index 0000000..bedbcb8 --- /dev/null +++ b/src/app_size_analyzer/utils/file_utils.py @@ -0,0 +1,169 @@ +"""File utilities for app size analyzer.""" + +import hashlib +import shutil +import subprocess +import tempfile +import zipfile +from pathlib import Path +from typing import Optional + +from .logging import get_logger + +logger = get_logger(__name__) + + +def extract_archive(archive_path: Path, destination: Path) -> None: + """Extract an archive to the destination directory. + + Args: + archive_path: Path to the archive file + destination: Destination directory for extraction + + Raises: + ValueError: If archive format is not supported + RuntimeError: If extraction fails + """ + destination.mkdir(parents=True, exist_ok=True) + + suffix = archive_path.suffix.lower() + + if suffix == ".zip" or suffix == ".ipa": + _extract_zip(archive_path, destination) + else: + raise ValueError(f"Unsupported archive format: {suffix}") + + +def _extract_zip(archive_path: Path, destination: Path) -> None: + """Extract a ZIP archive using Python's zipfile module.""" + try: + with zipfile.ZipFile(archive_path, "r") as zip_ref: + zip_ref.extractall(destination) + logger.debug(f"Extracted {archive_path} to {destination}") + except zipfile.BadZipFile as e: + raise RuntimeError(f"Invalid ZIP archive: {e}") + except Exception as e: + raise RuntimeError(f"Failed to extract archive: {e}") + + +def find_app_bundle(directory: Path, platform: str = "ios") -> Path: + """Find an app bundle in the given directory. + + Args: + directory: Directory to search in + platform: Target platform ("ios" or "android") + + Returns: + Path to the found app bundle + + Raises: + FileNotFoundError: If no app bundle is found + """ + if platform == "ios": + return _find_ios_app_bundle(directory) + elif platform == "android": + return _find_android_app_bundle(directory) + else: + raise ValueError(f"Unsupported platform: {platform}") + + +def _find_ios_app_bundle(directory: Path) -> Path: + """Find an iOS .app bundle in the directory tree.""" + # Look for .app directories + for item in directory.rglob("*.app"): + if item.is_dir(): + logger.debug(f"Found iOS app bundle: {item}") + return item + + raise FileNotFoundError(f"No .app bundle found in {directory}") + + +def _find_android_app_bundle(directory: Path) -> Path: + """Find an Android .apk file in the directory tree.""" + # Look for .apk files + for item in directory.rglob("*.apk"): + if item.is_file(): + logger.debug(f"Found Android app bundle: {item}") + return item + + raise FileNotFoundError(f"No .apk file found in {directory}") + + +def calculate_file_hash(file_path: Path, algorithm: str = "md5") -> str: + """Calculate hash of a file. + + Args: + file_path: Path to the file + algorithm: Hash algorithm to use ("md5", "sha1", "sha256") + + Returns: + Hexadecimal hash string + + Raises: + ValueError: If algorithm is not supported + FileNotFoundError: If file doesn't exist + """ + if not file_path.exists(): + raise FileNotFoundError(f"File not found: {file_path}") + + if algorithm == "md5": + hasher = hashlib.md5() + elif algorithm == "sha1": + hasher = hashlib.sha1() + elif algorithm == "sha256": + hasher = hashlib.sha256() + else: + raise ValueError(f"Unsupported hash algorithm: {algorithm}") + + try: + with open(file_path, "rb") as f: + # Read file in chunks to handle large files efficiently + for chunk in iter(lambda: f.read(8192), b""): + hasher.update(chunk) + + return hasher.hexdigest() + except Exception as e: + raise RuntimeError(f"Failed to calculate hash for {file_path}: {e}") + + +def get_file_size(file_path: Path) -> int: + """Get file size in bytes. + + Args: + file_path: Path to the file + + Returns: + File size in bytes + + Raises: + FileNotFoundError: If file doesn't exist + """ + if not file_path.exists(): + raise FileNotFoundError(f"File not found: {file_path}") + + return file_path.stat().st_size + + +def create_temp_directory(prefix: str = "app-analyzer-") -> Path: + """Create a temporary directory. + + Args: + prefix: Prefix for the temporary directory name + + Returns: + Path to the created temporary directory + """ + temp_dir = Path(tempfile.mkdtemp(prefix=prefix)) + logger.debug(f"Created temporary directory: {temp_dir}") + return temp_dir + + +def cleanup_directory(directory: Path) -> None: + """Remove a directory and all its contents. + + Args: + directory: Directory to remove + """ + if directory.exists() and directory.is_dir(): + shutil.rmtree(directory) + logger.debug(f"Cleaned up directory: {directory}") \ No newline at end of file diff --git a/src/app_size_analyzer/utils/logging.py b/src/app_size_analyzer/utils/logging.py new file mode 100644 index 0000000..a6fde71 --- /dev/null +++ b/src/app_size_analyzer/utils/logging.py @@ -0,0 +1,38 @@ +"""Logging utilities for app size analyzer.""" + +import logging +import sys +from typing import Optional + + +def setup_logging(verbose: bool = False, quiet: bool = False) -> None: + """Setup logging configuration. + + Args: + verbose: Enable debug-level logging + quiet: Suppress all logging except errors + """ + if quiet: + level = logging.ERROR + elif verbose: + level = logging.DEBUG + else: + level = logging.INFO + + # Configure root logger + logging.basicConfig( + level=level, + format="%(asctime)s - %(name)s - %(levelname)s - %(message)s", + handlers=[ + logging.StreamHandler(sys.stderr) + ] + ) + + # Set levels for third-party libraries + if not verbose: + logging.getLogger("lief").setLevel(logging.WARNING) + + +def get_logger(name: str) -> logging.Logger: + """Get a logger with the specified name.""" + return logging.getLogger(name) \ No newline at end of file diff --git a/tests/__init__.py b/tests/__init__.py new file mode 100644 index 0000000..760e77f --- /dev/null +++ b/tests/__init__.py @@ -0,0 +1 @@ +"""Tests for app size analyzer.""" \ No newline at end of file diff --git "a/tests/artifacts/HackerNews 2-12-25, 6.06\342\200\257PM.xcarchive.zip" "b/tests/artifacts/HackerNews 2-12-25, 6.06\342\200\257PM.xcarchive.zip" new file mode 100644 index 0000000..4f8ee22 Binary files /dev/null and "b/tests/artifacts/HackerNews 2-12-25, 6.06\342\200\257PM.xcarchive.zip" differ diff --git a/tests/unit/test_cli.py b/tests/unit/test_cli.py new file mode 100644 index 0000000..4681d2a --- /dev/null +++ b/tests/unit/test_cli.py @@ -0,0 +1,52 @@ +"""Unit tests for CLI functionality.""" + +import pytest +from click.testing import CliRunner + +from app_size_analyzer import __version__ +from app_size_analyzer.cli import cli + + +class TestCLI: + """Test cases for the CLI interface.""" + + def test_version_flag(self) -> None: + """Test --version flag displays correct version.""" + runner = CliRunner() + result = runner.invoke(cli, ["--version"]) + + assert result.exit_code == 0 + assert __version__ in result.output + + def test_help_output(self) -> None: + """Test help output is displayed when no command given.""" + runner = CliRunner() + result = runner.invoke(cli, []) + + assert result.exit_code == 0 + assert "App Size Analyzer" in result.output + assert "ios" in result.output + + def test_analyze_help(self) -> None: + """Test analyze command help.""" + runner = CliRunner() + result = runner.invoke(cli, ["ios", "--help"]) + + assert result.exit_code == 0 + assert "Analyze an iOS app bundle" in result.output + assert "INPUT_PATH" in result.output + + def test_analyze_missing_input(self) -> None: + """Test analyze command fails with missing input.""" + runner = CliRunner() + result = runner.invoke(cli, ["analyze"]) + + assert result.exit_code != 0 + assert "Missing argument" in result.output or "Error" in result.output + + def test_analyze_nonexistent_file(self) -> None: + """Test analyze command fails with nonexistent file.""" + runner = CliRunner() + result = runner.invoke(cli, ["analyze", "/nonexistent/file.app"]) + + assert result.exit_code != 0 diff --git a/tests/unit/test_models.py b/tests/unit/test_models.py new file mode 100644 index 0000000..1c16f4f --- /dev/null +++ b/tests/unit/test_models.py @@ -0,0 +1,159 @@ +"""Unit tests for data models.""" + +import pytest +from datetime import datetime + +from app_size_analyzer.models import ( + AnalysisResults, + AppInfo, + BinaryAnalysis, + DuplicateFileGroup, + FileAnalysis, + FileInfo, + SwiftMetadata, + SymbolInfo, +) + + +class TestFileInfo: + """Test cases for FileInfo model.""" + + def test_valid_file_info(self) -> None: + """Test creating valid FileInfo instance.""" + file_info = FileInfo(path="test/file.txt", size=1024, file_type="txt", hash_md5="abcd1234") + + assert file_info.path == "test/file.txt" + assert file_info.size == 1024 + assert file_info.file_type == "txt" + assert file_info.hash_md5 == "abcd1234" + + def test_negative_size_validation(self) -> None: + """Test that negative file size is rejected.""" + with pytest.raises(ValueError): + FileInfo(path="test/file.txt", size=-1, file_type="txt") + + +class TestAppInfo: + """Test cases for AppInfo model.""" + + def test_valid_app_info(self) -> None: + """Test creating valid AppInfo instance.""" + app_info = AppInfo( + name="Test App", + bundle_id="com.test.app", + version="1.0.0", + build="100", + executable="TestApp", + minimum_os_version="14.0", + supported_platforms=["iPhoneOS"], + ) + + assert app_info.name == "Test App" + assert app_info.bundle_id == "com.test.app" + assert app_info.version == "1.0.0" + + +class TestDuplicateFileGroup: + """Test cases for DuplicateFileGroup model.""" + + def test_duplicate_count_property(self) -> None: + """Test duplicate_count property calculation.""" + file1 = FileInfo(path="file1.txt", size=100, file_type="txt") + file2 = FileInfo(path="file2.txt", size=100, file_type="txt") + file3 = FileInfo(path="file3.txt", size=100, file_type="txt") + + group = DuplicateFileGroup(files=[file1, file2, file3], potential_savings=200) + + assert group.duplicate_count == 2 # 3 files - 1 original + + def test_minimum_files_validation(self) -> None: + """Test that at least 2 files are required.""" + file1 = FileInfo(path="file1.txt", size=100, file_type="txt") + + with pytest.raises(ValueError): + DuplicateFileGroup(files=[file1], potential_savings=0) # Only 1 file + + +class TestFileAnalysis: + """Test cases for FileAnalysis model.""" + + def test_total_duplicate_savings_property(self) -> None: + """Test total_duplicate_savings property calculation.""" + file1 = FileInfo(path="file1.txt", size=100, file_type="txt") + file2 = FileInfo(path="file2.txt", size=100, file_type="txt") + + group1 = DuplicateFileGroup(files=[file1, file2], potential_savings=100) + group2 = DuplicateFileGroup(files=[file1, file2], potential_savings=200) + + analysis = FileAnalysis(total_size=1000, file_count=10, duplicate_files=[group1, group2]) + + assert analysis.total_duplicate_savings == 300 + + def test_file_type_sizes_property(self) -> None: + """Test file_type_sizes property calculation.""" + txt_file1 = FileInfo(path="file1.txt", size=100, file_type="txt") + txt_file2 = FileInfo(path="file2.txt", size=200, file_type="txt") + jpg_file = FileInfo(path="image.jpg", size=500, file_type="jpg") + + analysis = FileAnalysis( + total_size=800, + file_count=3, + files_by_type={"txt": [txt_file1, txt_file2], "jpg": [jpg_file]}, + ) + + type_sizes = analysis.file_type_sizes + assert type_sizes["txt"] == 300 # 100 + 200 + assert type_sizes["jpg"] == 500 + + +class TestAnalysisResults: + """Test cases for AnalysisResults model.""" + + def test_total_size_property(self) -> None: + """Test total_size property returns file analysis total.""" + app_info = AppInfo( + name="Test", + bundle_id="com.test", + version="1.0", + build="1", + executable="test", + minimum_os_version="14.0", + ) + + file_analysis = FileAnalysis(total_size=2048, file_count=5) + + binary_analysis = BinaryAnalysis(executable_size=1024, architectures=["arm64"]) + + results = AnalysisResults( + app_info=app_info, file_analysis=file_analysis, binary_analysis=binary_analysis + ) + + assert results.total_size == 2048 + + def test_to_dict_serialization(self) -> None: + """Test to_dict method creates serializable dictionary.""" + app_info = AppInfo( + name="Test", + bundle_id="com.test", + version="1.0", + build="1", + executable="test", + minimum_os_version="14.0", + ) + + file_analysis = FileAnalysis(total_size=1024, file_count=1) + + binary_analysis = BinaryAnalysis(executable_size=512, architectures=["arm64"]) + + results = AnalysisResults( + app_info=app_info, file_analysis=file_analysis, binary_analysis=binary_analysis + ) + + data = results.to_dict() + + assert isinstance(data, dict) + assert "app_info" in data + assert "file_analysis" in data + assert "binary_analysis" in data + assert "generated_at" in data + assert isinstance(data["generated_at"], str) # Should be ISO format string