From 5e9a55dcc6b24c9d82de1aa12d5e5298fff3b977 Mon Sep 17 00:00:00 2001 From: Marco D'Aleo Date: Mon, 15 Dec 2025 15:39:14 +0000 Subject: [PATCH] Add globstar filtering --- .filedust.conf.example | 7 ++++-- src/filedust/junk.py | 53 ++++++++++++++++++++++++++++++++++++------ 2 files changed, 51 insertions(+), 9 deletions(-) diff --git a/.filedust.conf.example b/.filedust.conf.example index 67bd655..f260ef9 100644 --- a/.filedust.conf.example +++ b/.filedust.conf.example @@ -2,9 +2,12 @@ # Place at: ~/.filedust.conf # # Use this file to customize cleanup behavior. -# Only keys matter (no values). Paths are relative to $HOME. # -# Patterns (globs) are allowed. +# Patterns are matched against paths relative to $HOME +# Supports: +# * = one path segment +# ** = zero or more path segments (recursive) +# Matching is case-sensitive [exclude] # Add directories or patterns you want filedust to ignore. diff --git a/src/filedust/junk.py b/src/filedust/junk.py index 19f572a..3ec8383 100644 --- a/src/filedust/junk.py +++ b/src/filedust/junk.py @@ -20,6 +20,7 @@ def load_user_rules() -> UserRules: if cfg_path.exists(): parser = configparser.ConfigParser(allow_no_value=True) + parser.optionxform = str parser.read(cfg_path) if parser.has_section("include"): @@ -32,8 +33,46 @@ def load_user_rules() -> UserRules: def matches_any(patterns: list[str], relpath: Path) -> bool: - posix = relpath.as_posix() - return any(fnmatch(posix, p) for p in patterns) + """ + True globstar matcher. + + Rules: + - * matches exactly one path segment + - ** matches zero or more segments + - Patterns are relative to $HOME + """ + + path_parts = relpath.parts + + for pat in patterns: + pat = pat.strip("/") + + pat_parts = tuple(pat.split("/")) + + if _match_parts(pat_parts, path_parts): + return True + + return False + + +def _match_parts(pat: tuple[str, ...], path: tuple[str, ...]) -> bool: + """Recursive glob matcher with ** support.""" + if not pat: + return not path + + if pat[0] == "**": + # ** matches zero or more segments + return _match_parts(pat[1:], path) or ( + bool(path) and _match_parts(pat, path[1:]) + ) + + if not path: + return False + + if fnmatch(path[0], pat[0]): + return _match_parts(pat[1:], path[1:]) + + return False @dataclass @@ -148,11 +187,11 @@ def iter_junk(root: Path, rules: UserRules | None = None) -> Iterable[Finding]: for dirpath, dirnames, filenames in os.walk(root, followlinks=False): dirpath_p = Path(dirpath) - # Fast relative path computation - if dirpath == root_str: - rel_dir = Path(".") - else: - rel_dir = Path(dirpath[len(root_str) :].lstrip("/")) + try: + rel_dir = dirpath_p.resolve().relative_to(HOME) + except ValueError: + # Should never happen due to earlier checks + continue # USER EXCLUDE → skip entire subtree if matches_any(rules.exclude, rel_dir): -- 2.49.1