diff --git a/README.md b/README.md index dd49ca4..d8e8b6d 100644 --- a/README.md +++ b/README.md @@ -27,6 +27,7 @@ TwinTrim is a powerful and efficient tool designed to find and manage duplicate - **Multi-Threaded Processing**: Utilizes multi-threading to quickly scan and process large numbers of files concurrently. - **Deadlock Prevention**: Implements locks to prevent deadlocks during multi-threaded operations, ensuring smooth and safe execution. - **User-Friendly Interface**: Offers clear prompts and feedback via the command line, making the process straightforward and interactive. +- **Dry Run**: Use the --dry-run option to simulate the process without making any changes, allowing you to review what will happen before executing. ## How It Works @@ -46,7 +47,10 @@ TwinTrim is a powerful and efficient tool designed to find and manage duplicate - Duplicate files are identified by comparing their hashes. - Based on file modification time, the latest file is retained, and older duplicates are removed. -5. **Deadlock Prevention**: +5. **Dry Run Mode**: + - The --dry-run flag allows you to simulate the duplicate removal process without making any actual changes, giving you an opportunity to review potential actions before committing to them. + +6. **Deadlock Prevention**: - Uses locks within multi-threaded processes to ensure that resources are accessed safely, preventing deadlocks that could otherwise halt execution. ### Key Functions @@ -74,6 +78,7 @@ python -m twinTrim.main [OPTIONS] - `--exclude`: Exclude specific files by name. - `--label-color`: Set the font color of the output label of the progress bar. - `--bar-color`: Set the color of the progress bar. +- `--dry-run`: Simulate the duplicate removal process without making any changes. ### Examples @@ -92,6 +97,12 @@ python -m twinTrim.main [OPTIONS] python -m twinTrim.main /path/to/directory --min-size "50kb" --max-size "500mb" --file-type "txt" ``` +4. **Dry Run Simulation**: + + ```bash + python twinTrim.py /path/to/directory --dry-run + ``` + ## Dependencies - Python 3.6+ diff --git a/twinTrim/flagController.py b/twinTrim/flagController.py index 9aed5e2..25cbfbb 100644 --- a/twinTrim/flagController.py +++ b/twinTrim/flagController.py @@ -33,10 +33,6 @@ def handleAllFlag(directory,file_filter,pb_color,bar_color): # Update progress bar as files are processed for future in as_completed(futures): - try: - future.result() # Ensures exception handling for each future - except Exception as e: - click.echo(click.style(f"Error processing file {futures[future]}: {str(e)}", fg='red')) progress_bar.update(1) click.echo(click.style("All files scanned and duplicates handled.", fg='green')) @@ -50,7 +46,6 @@ def handleAllFlag(directory,file_filter,pb_color,bar_color): def find_duplicates(directory, file_filter, pb_color, bar_color): """Find duplicate files in the given directory and store them in normalStore.""" # Collect all file paths first and apply filters - start_time=time.time() all_files = [os.path.join(root, file_name) for root, _, files in os.walk(directory) for file_name in files] all_files = [f for f in all_files if file_filter.filter_files(f)] # Apply filters @@ -65,14 +60,8 @@ def process_file(file_path): futures = {executor.submit(process_file, file_path): file_path for file_path in all_files} for future in as_completed(futures): - try: - future.result() # Ensures exception handling for each future - except Exception as e: - click.echo(click.style(f"Error processing file {futures[future]}: {str(e)}", fg='red')) - progress_bar.update(1) + progress_bar.update(1) - end_time=time.time() - click.echo(click.style(f"Time taken to find all duplicate files: {end_time-start_time:.2f} seconds.", fg='green')) duplicates = [] for _, metadata in normalStore.items(): if len(metadata.filepaths) > 1: @@ -81,3 +70,5 @@ def process_file(file_path): duplicates.append((original_path, duplicate_path)) return duplicates + + diff --git a/twinTrim/flags.py b/twinTrim/flags.py index 30d7f61..7e24dfc 100644 --- a/twinTrim/flags.py +++ b/twinTrim/flags.py @@ -9,10 +9,10 @@ from twinTrim.dataStructures.fileFilter import FileFilter # Setting up logging configuration -logging.basicConfig ( - filename='duplicate_file_manager.log', - level = logging.INFO, - format = '%(asctime)s - %(levelname)s - %(message)s' +logging.basicConfig( + filename='duplicate_file_manager.log', + level=logging.INFO, + format='%(asctime)s - %(levelname)s - %(message)s' ) @click.command() @@ -24,9 +24,10 @@ @click.option("--exclude", multiple=True, help="Files to exclude by name.") @click.option("--label-color", default="yellow", type=str, help="Color of the label of progress bar.") @click.option("--bar-color", default='#aaaaaa', type=str, help="Color of the progress bar.") -def cli(directory, all, min_size, max_size, file_type, exclude, label_color, bar_color): +@click.option("--dry-run", is_flag=True, help="Simulate the deletion without actually removing any files.") +def cli(directory, all, min_size, max_size, file_type, exclude, label_color, bar_color, dry_run): """Find and manage duplicate files in the specified DIRECTORY.""" - + # Initialize the FileFilter object file_filter = FileFilter() file_filter.setMinFileSize(parse_size(min_size)) @@ -37,7 +38,7 @@ def cli(directory, all, min_size, max_size, file_type, exclude, label_color, bar if all: logging.info("Deleting all duplicate files without asking.") - handleAllFlag(directory, file_filter, label_color, bar_color) + handleAllFlag(directory, file_filter, label_color, bar_color, dry_run) return start_time = time.time() @@ -75,33 +76,41 @@ def cli(directory, all, min_size, max_size, file_type, exclude, label_color, bar f"{idx + 1}) {duplicate} (Size: {os.path.getsize(duplicate)} bytes)" for idx, duplicate in enumerate(duplicates_list) ] - answers = inquirer.prompt( - [ - inquirer.Checkbox( - 'files', - message="Select files to delete (Use space to select, enter to confirm, or ctr + c to cancel, arrow key to navigate.)", - choices=file_options, - validate=lambda answer, current: len(answer) > 0 or "You must choose at least one file.", - ), - inquirer.Confirm( - 'confirm', - message="Are you sure you want to delete the selected files?", - default=True + if dry_run: + # Log and display which files would be deleted in dry-run mode + click.echo(click.style("Dry Run Mode: These files would be deleted:", fg='yellow')) + for option in file_options: + click.echo(click.style(option, fg='red')) + logging.info(f"[Dry Run] Would delete files: {file_options}") + else: + # If not dry run, prompt for deletion + answers = inquirer.prompt( + [ + inquirer.Checkbox( + 'files', + message="Select files to delete (Use space to select, enter to confirm, or ctrl+c to cancel, arrow key to navigate.)", + choices=file_options, + validate=lambda answer, current: len(answer) > 0 or "You must choose at least one file.", + ), + inquirer.Confirm( + 'confirm', + message="Are you sure you want to delete the selected files?", + default=True + ) + ] ) - ]) + if answers and answers['confirm']: + selected_files = answers['files'] + # Convert the selected options back to the original file paths + files_to_delete = [duplicates_list[int(option.split(")")[0]) - 1] for option in selected_files] - if answers and answers['confirm']: - selected_files = answers['files'] - # Convert the selected options back to the original file paths - files_to_delete = [duplicates_list[int(option.split(")")[0]) - 1] for option in selected_files] - - for file_path in files_to_delete: - handle_and_remove(file_path) - else: - click.echo(click.style("File deletion canceled.", fg='yellow')) + for file_path in files_to_delete: + handle_and_remove(file_path) + else: + click.echo(click.style("File deletion canceled.", fg='yellow')) end_time = time.time() time_taken = end_time - start_time click.echo(click.style(f"Time taken: {time_taken:.2f} seconds.", fg='green')) - logging.info(f"Total time taken: {time_taken:.2f} seconds.") \ No newline at end of file + logging.info(f"Total time taken: {time_taken:.2f} seconds.")