diff --git a/.filedust.conf.example b/.filedust.conf.example new file mode 100644 index 0000000..67bd655 --- /dev/null +++ b/.filedust.conf.example @@ -0,0 +1,20 @@ +# filedust configuration file +# Place at: ~/.filedust.conf +# +# Use this file to customize cleanup behavior. +# Only keys matter (no values). Paths are relative to $HOME. +# +# Patterns (globs) are allowed. + +[exclude] +# Add directories or patterns you want filedust to ignore. +# Examples: +# Projects/important/* + +[include] +# Add directories or patterns you want filedust to remove. +# Examples: +# node_modules +# dist +# *.tmp +# *~ diff --git a/README.md b/README.md index 0b82c7c..5862af9 100644 --- a/README.md +++ b/README.md @@ -34,7 +34,11 @@ One interactive prompt at the end of the run (unless -y is used). Shows how much disk space can be freed. ### Safe by design -Never touches dotfiles, configs, project files, or anything important. +* It ONLY runs within user's `$HOME` + +* Put user in control by reading `~/.filedust.conf` + +* Never touches dotfiles, configs, project files, or anything important unless you want. ## Installation @@ -50,6 +54,12 @@ cd filedust/ poetry install ``` +### Custom config +You can download the example and add your custom rule +``` +wget -O ~/.filedust.conf https://raw.githubusercontent.com/guardutils/filedust/main/.filedust.conf.example +``` + ### TAB completion Add this to your `.bashrc` ``` diff --git a/pyproject.toml b/pyproject.toml index e9ed90f..3b89d66 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -1,6 +1,6 @@ [tool.poetry] name = "filedust" -version = "0.2.0" +version = "0.3.0" description = "Opinionated junk cleaner for dev machines (caches, build artifacts, editor backups)." authors = ["Marco D'Aleo "] license = "GPL-3.0-or-later" diff --git a/src/filedust/cli.py b/src/filedust/cli.py index d05106a..f550f06 100644 --- a/src/filedust/cli.py +++ b/src/filedust/cli.py @@ -12,7 +12,7 @@ from rich.table import Table from rich.prompt import Confirm from rich import box -from .junk import Finding, iter_junk +from .junk import Finding, iter_junk, load_user_rules console = Console() @@ -168,10 +168,24 @@ def delete_all(findings: List[Finding]) -> int: def main(argv: list[str] | None = None) -> int: + print("Looking for junk ...") parser = build_parser() args = parser.parse_args(argv) root = Path(args.path).expanduser() + home = Path.home().resolve() + root_resolved = root.resolve() + + # Ensure root is inside the user's home directory + try: + root_resolved.relative_to(home) + except ValueError: + console.print( + f"[red]Error:[/] Refusing to operate outside the user's home directory.\n" + f"Requested: {root_resolved}\n" + f"Allowed: {home}" + ) + return 1 if not root.exists(): console.print(f"[red]Error:[/] Path not found: {root}") @@ -183,7 +197,8 @@ def main(argv: list[str] | None = None) -> int: "This may take a while and may require sudo for deletions.[/]" ) - findings = list(iter_junk(root)) + rules = load_user_rules() + findings = list(iter_junk(root, rules=rules)) total_size = compute_total_size(findings) if not findings: diff --git a/src/filedust/junk.py b/src/filedust/junk.py index 7b21c4a..19f572a 100644 --- a/src/filedust/junk.py +++ b/src/filedust/junk.py @@ -1,12 +1,41 @@ from __future__ import annotations import os +import configparser from dataclasses import dataclass from fnmatch import fnmatch from pathlib import Path from typing import Iterable, List +class UserRules: + def __init__(self): + self.include: list[str] = [] + self.exclude: list[str] = [] + + +def load_user_rules() -> UserRules: + rules = UserRules() + cfg_path = Path.home() / ".filedust.conf" + + if cfg_path.exists(): + parser = configparser.ConfigParser(allow_no_value=True) + parser.read(cfg_path) + + if parser.has_section("include"): + rules.include = list(parser["include"].keys()) + + if parser.has_section("exclude"): + rules.exclude = list(parser["exclude"].keys()) + + return rules + + +def matches_any(patterns: list[str], relpath: Path) -> bool: + posix = relpath.as_posix() + return any(fnmatch(posix, p) for p in patterns) + + @dataclass class Finding: path: Path @@ -23,7 +52,6 @@ JUNK_DIR_NAMES = { ".nox", ".tox", ".hypothesis", - ".cache", ".gradle", ".parcel-cache", ".turbo", @@ -31,7 +59,6 @@ JUNK_DIR_NAMES = { ".vite", ".sass-cache", ".sass-cache", - "build", "dist", } @@ -53,6 +80,9 @@ JUNK_FILE_PATTERNS = [ # VCS / system dirs SKIP_DIR_NAMES = { + ".cache", + "build", + ".gnupg", ".git", ".hg", ".svn", @@ -62,6 +92,34 @@ SKIP_DIR_NAMES = { } +HOME = Path.home().resolve() + + +def safe_exists(path: Path) -> bool | None: + """Return True/False if the path exists, or None if permission denied.""" + try: + return path.exists() + except Exception: + return None + + +def safe_resolve(path: Path, root: Path) -> Path | None: + """ + Resolve symlinks only if safe. + Return resolved path if it stays within root. + Return None if: + - resolution escapes the root + - resolution fails + - permission denied + """ + try: + resolved = path.resolve(strict=False) # NEVER strict + resolved.relative_to(root) # ensure containment + return resolved + except Exception: + return None + + def is_junk_dir_name(name: str) -> bool: return name in JUNK_DIR_NAMES @@ -70,37 +128,140 @@ def is_junk_file_name(name: str) -> bool: return any(fnmatch(name, pattern) for pattern in JUNK_FILE_PATTERNS) -def iter_junk(root: Path) -> Iterable[Finding]: +def iter_junk(root: Path, rules: UserRules | None = None) -> Iterable[Finding]: """ - Walk the tree under `root` and yield junk candidates. + Safe, fast junk scanner: + - Never follows symlinks. + - Broken symlinks are not automatically junk — they follow normal rules. + - User include/exclude overrides all. + - Built-in junk rules applied only when safe. + - SKIP_DIR_NAMES protected unless user includes. + - Fully contained in $HOME. + - No crashes from PermissionError or unreadable paths. + """ + if rules is None: + rules = UserRules() - filedust: - - Skips known critical / config directories (SKIP_DIR_NAMES). - - Treats known "junk" directory names as removable as a whole. - - Treats known junk file patterns as removable. - """ root = root.resolve() + root_str = str(root) - for dirpath, dirnames, filenames in os.walk(root): + for dirpath, dirnames, filenames in os.walk(root, followlinks=False): dirpath_p = Path(dirpath) - # Prune dirs we never touch at all. - dirnames[:] = [d for d in dirnames if d not in SKIP_DIR_NAMES] + # Fast relative path computation + if dirpath == root_str: + rel_dir = Path(".") + else: + rel_dir = Path(dirpath[len(root_str) :].lstrip("/")) - # Detect junk directories (and skip walking inside them). + # USER EXCLUDE → skip entire subtree + if matches_any(rules.exclude, rel_dir): + dirnames[:] = [] + continue + + pruned = [] + + # Handling dirs + for d in dirnames: + child = dirpath_p / d + + try: + st = child.lstat() + except Exception: + continue # unreadable + + is_symlink = (st.st_mode & 0o170000) == 0o120000 + + if is_symlink: + # If broken symlink dir treat as file later via filenames (skip descent) + continue + + rel_child = rel_dir / d + + # User exclude wins + if matches_any(rules.exclude, rel_child): + continue + + # SKIP_DIR_NAMES unless user includes + if d in SKIP_DIR_NAMES and not matches_any( + rules.include, rel_child + ): + continue + + pruned.append(d) + + dirnames[:] = pruned + + # Detect JUNK dirs i = 0 while i < len(dirnames): name = dirnames[i] - if is_junk_dir_name(name): - junk_dir = dirpath_p / name - yield Finding(path=junk_dir, kind="dir", reason="junk_dir") - # Remove from walk so we don't descend into it. + rel_child = rel_dir / name + + # User include directory + if matches_any(rules.include, rel_child): + yield Finding(dirpath_p / name, "dir", "user_include") del dirnames[i] continue + + # Built-in safe junk dirs + if is_junk_dir_name(name): + yield Finding(dirpath_p / name, "dir", "junk_dir") + del dirnames[i] + continue + i += 1 - # Now process files. + # Handling files (including symlinks) for fname in filenames: + fpath = dirpath_p / fname + rel_file = rel_dir / fname + + try: + st = fpath.lstat() + except Exception: + continue + + is_symlink = (st.st_mode & 0o170000) == 0o120000 + + # Handling broken symlinks + if is_symlink: + exists = safe_exists(fpath) + + # Permission denied → skip + if exists is None: + continue + + # User exclude wins + if matches_any(rules.exclude, rel_file): + continue + + # User include wins + if matches_any(rules.include, rel_file): + yield Finding(fpath, "file", "user_include") + continue + + # Broken symlink? + if exists is False: + # DO NOT auto-delete — classify like regular file + # Only built-in junk patterns apply + if is_junk_file_name(fname): + yield Finding(fpath, "file", "broken_symlink") + continue + + # Valid symlink — NEVER follow; only user-include counts + continue + + # Regular files + # User exclude wins + if matches_any(rules.exclude, rel_file): + continue + + # User include wins + if matches_any(rules.include, rel_file): + yield Finding(fpath, "file", "user_include") + continue + + # Built-in junk patterns (safe ones) if is_junk_file_name(fname): - fpath = dirpath_p / fname - yield Finding(path=fpath, kind="file", reason="junk_file") + yield Finding(fpath, "file", "junk_file")