10 Commits
0.2.0 ... 0.3.1

Author SHA1 Message Date
Marco D'Aleo
fca4c8defc Merge pull request #3 from guardutils/relax_dependencies
Change dependencies constraints
2025-11-29 17:02:59 +00:00
6cdfd2fc44 Change dependencies constraints, fix 'Looking for junk' print statement location 2025-11-29 17:01:22 +00:00
6c1d2dc430 Update badges URLs 2025-11-29 16:40:13 +00:00
Marco D'Aleo
fa8a194ccb Merge pull request #2 from guardutils/update_filedust_20251129
Improve sefety and add config file
- Add .cache and build to the skip dir list, make filedust run ONLY in the user home directory
- Major rewrite of junk.py, adding user config file for custom rules, don't treat broken symlink as junk
- Add filedust config file, update README, version bump
2025-11-29 10:52:40 +00:00
677b14db26 Add filedust config file, update README, version bump 2025-11-29 10:23:05 +00:00
35f5f2674a Major rewrite of junk.py, adding user config file for custom rules, don't treat broken symlink as junk 2025-11-29 10:02:45 +00:00
c75a5246e3 Add .cache and build to the skip dir list, make filedust run ONLY in the user home directory 2025-11-29 08:29:14 +00:00
Marco D'Aleo
7f2b23b41b Merge pull request #1 from guardutils/update_filedust_20251127
Switch ownership from mdaleo404 to guardutils in README and pyproject
2025-11-27 17:44:01 +00:00
ae281624da Trim trailing whitespaces in .gitignore 2025-11-27 17:42:49 +00:00
1bebbcfa42 Switch ownership from mdaleo404 to guardutils in README and pyproject 2025-11-27 17:42:37 +00:00
7 changed files with 243 additions and 37 deletions

20
.filedust.conf.example Normal file
View File

@@ -0,0 +1,20 @@
# filedust configuration file
# Place at: ~/.filedust.conf
#
# Use this file to customize cleanup behavior.
# Only keys matter (no values). Paths are relative to $HOME.
#
# Patterns (globs) are allowed.
[exclude]
# Add directories or patterns you want filedust to ignore.
# Examples:
# Projects/important/*
[include]
# Add directories or patterns you want filedust to remove.
# Examples:
# node_modules
# dist
# *.tmp
# *~

View File

@@ -1,8 +1,7 @@
[![License](https://img.shields.io/github/license/mdaleo404/filedust?style=flat)](LICENCE) [![License](https://img.shields.io/github/license/guardutils/filedust?style=flat)](LICENCE)
[![Language](https://img.shields.io/github/languages/top/mdaleo404/filedust.svg)](https://github.com/mdaleo404/filedust/) [![Language](https://img.shields.io/github/languages/top/guardutils/filedust.svg)](https://github.com/guardutils/filedust/)
![GitHub Release](https://img.shields.io/github/v/release/mdaleo404/filedust?display_name=release&logo=github) [![GitHub Release](https://img.shields.io/github/v/release/guardutils/filedust?display_name=release&logo=github)](https://github.com/guardutils/filedust/releases)
![PyPI - Version](https://img.shields.io/pypi/v/filedust?logo=pypi) [![PyPI - Version](https://img.shields.io/pypi/v/filedust?logo=pypi)](https://pypi.org/project/filedust/#history)
[![Build Status](https://img.shields.io/github/actions/workflow/status/mdaleo404/filedust/.github/workflows/lint-and-security.yml)](https://github.com/mdaleo404/filedust/actions)
[![PyPI downloads](https://img.shields.io/pypi/dm/filedust.svg)](https://pypi.org/project/filedust/) [![PyPI downloads](https://img.shields.io/pypi/dm/filedust.svg)](https://pypi.org/project/filedust/)
# filedust # filedust
@@ -35,7 +34,11 @@ One interactive prompt at the end of the run (unless -y is used).
Shows how much disk space can be freed. Shows how much disk space can be freed.
### Safe by design ### Safe by design
Never touches dotfiles, configs, project files, or anything important. * It ONLY runs within user's `$HOME`
* Put user in control by reading `~/.filedust.conf`
* Never touches dotfiles, configs, project files, or anything important unless you want.
## Installation ## Installation
@@ -46,11 +49,17 @@ pip install filedust
### From this repository ### From this repository
``` ```
git clone https://github.com/mdaleo404/filedust.git git clone https://github.com/guardutils/filedust.git
cd filedust/ cd filedust/
poetry install poetry install
``` ```
### Custom config
You can download the example and add your custom rule
```
wget -O ~/.filedust.conf https://raw.githubusercontent.com/guardutils/filedust/main/.filedust.conf.example
```
### TAB completion ### TAB completion
Add this to your `.bashrc` Add this to your `.bashrc`
``` ```

2
poetry.lock generated
View File

@@ -555,4 +555,4 @@ test = ["covdefaults (>=2.3)", "coverage (>=7.2.7)", "coverage-enable-subprocess
[metadata] [metadata]
lock-version = "2.0" lock-version = "2.0"
python-versions = ">=3.10,<4.0" python-versions = ">=3.10,<4.0"
content-hash = "676393f654b241eb2ea6f983d589e83daff16e0b0ca2cb76228c0cd5ed447591" content-hash = "5ffc6940e33919ad5c8107dde30e6203d63a3bb64eaab81013cde2e773964657"

View File

@@ -1,18 +1,18 @@
[tool.poetry] [tool.poetry]
name = "filedust" name = "filedust"
version = "0.2.0" version = "0.3.1"
description = "Opinionated junk cleaner for dev machines (caches, build artifacts, editor backups)." description = "Opinionated junk cleaner for dev machines (caches, build artifacts, editor backups)."
authors = ["Marco D'Aleo <marco@marcodaleo.com>"] authors = ["Marco D'Aleo <marco@marcodaleo.com>"]
license = "GPL-3.0-or-later" license = "GPL-3.0-or-later"
readme = "README.md" readme = "README.md"
homepage = "https://github.com/mdaleo404/filedust" homepage = "https://github.com/guardutils/filedust"
repository = "https://github.com/mdaleo404/filedust" repository = "https://github.com/guardutils/filedust"
packages = [{ include = "filedust", from = "src" }] packages = [{ include = "filedust", from = "src" }]
[tool.poetry.dependencies] [tool.poetry.dependencies]
python = ">=3.10,<4.0" python = ">=3.10,<4.0"
rich = "^13.0.0" rich = ">=12"
argcomplete = "^3.3.0" argcomplete = ">=2"
[tool.poetry.scripts] [tool.poetry.scripts]
filedust = "filedust.cli:main" filedust = "filedust.cli:main"

View File

@@ -12,7 +12,7 @@ from rich.table import Table
from rich.prompt import Confirm from rich.prompt import Confirm
from rich import box from rich import box
from .junk import Finding, iter_junk from .junk import Finding, iter_junk, load_user_rules
console = Console() console = Console()
@@ -172,18 +172,34 @@ def main(argv: list[str] | None = None) -> int:
args = parser.parse_args(argv) args = parser.parse_args(argv)
root = Path(args.path).expanduser() root = Path(args.path).expanduser()
home = Path.home().resolve()
root_resolved = root.resolve()
# Ensure root is inside the user's home directory
try:
root_resolved.relative_to(home)
except ValueError:
console.print(
f"[red]Error:[/] Refusing to operate outside the user's home directory.\n"
f"Requested: {root_resolved}\n"
f"Allowed: {home}"
)
return 1
if not root.exists(): if not root.exists():
console.print(f"[red]Error:[/] Path not found: {root}") console.print(f"[red]Error:[/] Path not found: {root}")
return 1 return 1
print("Looking for junk ...")
if root.resolve() == Path("/"): if root.resolve() == Path("/"):
console.print( console.print(
"[yellow]Running filedust on the entire filesystem (/). " "[yellow]Running filedust on the entire filesystem (/). "
"This may take a while and may require sudo for deletions.[/]" "This may take a while and may require sudo for deletions.[/]"
) )
findings = list(iter_junk(root)) rules = load_user_rules()
findings = list(iter_junk(root, rules=rules))
total_size = compute_total_size(findings) total_size = compute_total_size(findings)
if not findings: if not findings:

View File

@@ -1,12 +1,41 @@
from __future__ import annotations from __future__ import annotations
import os import os
import configparser
from dataclasses import dataclass from dataclasses import dataclass
from fnmatch import fnmatch from fnmatch import fnmatch
from pathlib import Path from pathlib import Path
from typing import Iterable, List from typing import Iterable, List
class UserRules:
def __init__(self):
self.include: list[str] = []
self.exclude: list[str] = []
def load_user_rules() -> UserRules:
rules = UserRules()
cfg_path = Path.home() / ".filedust.conf"
if cfg_path.exists():
parser = configparser.ConfigParser(allow_no_value=True)
parser.read(cfg_path)
if parser.has_section("include"):
rules.include = list(parser["include"].keys())
if parser.has_section("exclude"):
rules.exclude = list(parser["exclude"].keys())
return rules
def matches_any(patterns: list[str], relpath: Path) -> bool:
posix = relpath.as_posix()
return any(fnmatch(posix, p) for p in patterns)
@dataclass @dataclass
class Finding: class Finding:
path: Path path: Path
@@ -23,7 +52,6 @@ JUNK_DIR_NAMES = {
".nox", ".nox",
".tox", ".tox",
".hypothesis", ".hypothesis",
".cache",
".gradle", ".gradle",
".parcel-cache", ".parcel-cache",
".turbo", ".turbo",
@@ -31,7 +59,6 @@ JUNK_DIR_NAMES = {
".vite", ".vite",
".sass-cache", ".sass-cache",
".sass-cache", ".sass-cache",
"build",
"dist", "dist",
} }
@@ -53,6 +80,9 @@ JUNK_FILE_PATTERNS = [
# VCS / system dirs # VCS / system dirs
SKIP_DIR_NAMES = { SKIP_DIR_NAMES = {
".cache",
"build",
".gnupg",
".git", ".git",
".hg", ".hg",
".svn", ".svn",
@@ -62,6 +92,34 @@ SKIP_DIR_NAMES = {
} }
HOME = Path.home().resolve()
def safe_exists(path: Path) -> bool | None:
"""Return True/False if the path exists, or None if permission denied."""
try:
return path.exists()
except Exception:
return None
def safe_resolve(path: Path, root: Path) -> Path | None:
"""
Resolve symlinks only if safe.
Return resolved path if it stays within root.
Return None if:
- resolution escapes the root
- resolution fails
- permission denied
"""
try:
resolved = path.resolve(strict=False) # NEVER strict
resolved.relative_to(root) # ensure containment
return resolved
except Exception:
return None
def is_junk_dir_name(name: str) -> bool: def is_junk_dir_name(name: str) -> bool:
return name in JUNK_DIR_NAMES return name in JUNK_DIR_NAMES
@@ -70,37 +128,140 @@ def is_junk_file_name(name: str) -> bool:
return any(fnmatch(name, pattern) for pattern in JUNK_FILE_PATTERNS) return any(fnmatch(name, pattern) for pattern in JUNK_FILE_PATTERNS)
def iter_junk(root: Path) -> Iterable[Finding]: def iter_junk(root: Path, rules: UserRules | None = None) -> Iterable[Finding]:
""" """
Walk the tree under `root` and yield junk candidates. Safe, fast junk scanner:
- Never follows symlinks.
- Broken symlinks are not automatically junk — they follow normal rules.
- User include/exclude overrides all.
- Built-in junk rules applied only when safe.
- SKIP_DIR_NAMES protected unless user includes.
- Fully contained in $HOME.
- No crashes from PermissionError or unreadable paths.
"""
if rules is None:
rules = UserRules()
filedust:
- Skips known critical / config directories (SKIP_DIR_NAMES).
- Treats known "junk" directory names as removable as a whole.
- Treats known junk file patterns as removable.
"""
root = root.resolve() root = root.resolve()
root_str = str(root)
for dirpath, dirnames, filenames in os.walk(root): for dirpath, dirnames, filenames in os.walk(root, followlinks=False):
dirpath_p = Path(dirpath) dirpath_p = Path(dirpath)
# Prune dirs we never touch at all. # Fast relative path computation
dirnames[:] = [d for d in dirnames if d not in SKIP_DIR_NAMES] if dirpath == root_str:
rel_dir = Path(".")
else:
rel_dir = Path(dirpath[len(root_str) :].lstrip("/"))
# Detect junk directories (and skip walking inside them). # USER EXCLUDE → skip entire subtree
if matches_any(rules.exclude, rel_dir):
dirnames[:] = []
continue
pruned = []
# Handling dirs
for d in dirnames:
child = dirpath_p / d
try:
st = child.lstat()
except Exception:
continue # unreadable
is_symlink = (st.st_mode & 0o170000) == 0o120000
if is_symlink:
# If broken symlink dir treat as file later via filenames (skip descent)
continue
rel_child = rel_dir / d
# User exclude wins
if matches_any(rules.exclude, rel_child):
continue
# SKIP_DIR_NAMES unless user includes
if d in SKIP_DIR_NAMES and not matches_any(
rules.include, rel_child
):
continue
pruned.append(d)
dirnames[:] = pruned
# Detect JUNK dirs
i = 0 i = 0
while i < len(dirnames): while i < len(dirnames):
name = dirnames[i] name = dirnames[i]
if is_junk_dir_name(name): rel_child = rel_dir / name
junk_dir = dirpath_p / name
yield Finding(path=junk_dir, kind="dir", reason="junk_dir") # User include directory
# Remove from walk so we don't descend into it. if matches_any(rules.include, rel_child):
yield Finding(dirpath_p / name, "dir", "user_include")
del dirnames[i] del dirnames[i]
continue continue
# Built-in safe junk dirs
if is_junk_dir_name(name):
yield Finding(dirpath_p / name, "dir", "junk_dir")
del dirnames[i]
continue
i += 1 i += 1
# Now process files. # Handling files (including symlinks)
for fname in filenames: for fname in filenames:
fpath = dirpath_p / fname
rel_file = rel_dir / fname
try:
st = fpath.lstat()
except Exception:
continue
is_symlink = (st.st_mode & 0o170000) == 0o120000
# Handling broken symlinks
if is_symlink:
exists = safe_exists(fpath)
# Permission denied → skip
if exists is None:
continue
# User exclude wins
if matches_any(rules.exclude, rel_file):
continue
# User include wins
if matches_any(rules.include, rel_file):
yield Finding(fpath, "file", "user_include")
continue
# Broken symlink?
if exists is False:
# DO NOT auto-delete — classify like regular file
# Only built-in junk patterns apply
if is_junk_file_name(fname):
yield Finding(fpath, "file", "broken_symlink")
continue
# Valid symlink — NEVER follow; only user-include counts
continue
# Regular files
# User exclude wins
if matches_any(rules.exclude, rel_file):
continue
# User include wins
if matches_any(rules.include, rel_file):
yield Finding(fpath, "file", "user_include")
continue
# Built-in junk patterns (safe ones)
if is_junk_file_name(fname): if is_junk_file_name(fname):
fpath = dirpath_p / fname yield Finding(fpath, "file", "junk_file")
yield Finding(path=fpath, kind="file", reason="junk_file")