9 Commits

7 changed files with 337 additions and 33 deletions

20
.filedust.conf.example Normal file
View File

@@ -0,0 +1,20 @@
# filedust configuration file
# Place at: ~/.filedust.conf
#
# Use this file to customize cleanup behavior.
# Only keys matter (no values). Paths are relative to $HOME.
#
# Patterns (globs) are allowed.
[exclude]
# Add directories or patterns you want filedust to ignore.
# Examples:
# Projects/important/*
[include]
# Add directories or patterns you want filedust to remove.
# Examples:
# node_modules
# dist
# *.tmp
# *~

4
.gitignore vendored
View File

@@ -182,9 +182,9 @@ cython_debug/
.abstra/ .abstra/
# Visual Studio Code # Visual Studio Code
# Visual Studio Code specific template is maintained in a separate VisualStudioCode.gitignore # Visual Studio Code specific template is maintained in a separate VisualStudioCode.gitignore
# that can be found at https://github.com/github/gitignore/blob/main/Global/VisualStudioCode.gitignore # that can be found at https://github.com/github/gitignore/blob/main/Global/VisualStudioCode.gitignore
# and can be added to the global gitignore or merged into this file. However, if you prefer, # and can be added to the global gitignore or merged into this file. However, if you prefer,
# you could uncomment the following to ignore the entire vscode folder # you could uncomment the following to ignore the entire vscode folder
# .vscode/ # .vscode/

View File

@@ -1,8 +1,7 @@
![License](https://img.shields.io/github/license/mdaleo404/filedust) [![License](https://img.shields.io/github/license/guardutils/filedust?style=flat)](LICENCE)
[![Language](https://img.shields.io/github/languages/top/mdaleo404/filedust.svg)](https://github.com/mdaleo404/filedust/) [![Language](https://img.shields.io/github/languages/top/guardutils/filedust.svg)](https://github.com/guardutils/filedust/)
![GitHub Release](https://img.shields.io/github/v/release/mdaleo404/filedust?display_name=release&logo=github) ![GitHub Release](https://img.shields.io/github/v/release/guardutils/filedust?display_name=release&logo=github)
![PyPI - Version](https://img.shields.io/pypi/v/filedust?logo=pypi) ![PyPI - Version](https://img.shields.io/pypi/v/filedust?logo=pypi)
[![Build Status](https://img.shields.io/github/actions/workflow/status/mdaleo404/filedust/.github/workflows/lint-and-security.yml)](https://github.com/mdaleo404/filedust/actions)
[![PyPI downloads](https://img.shields.io/pypi/dm/filedust.svg)](https://pypi.org/project/filedust/) [![PyPI downloads](https://img.shields.io/pypi/dm/filedust.svg)](https://pypi.org/project/filedust/)
# filedust # filedust
@@ -35,4 +34,38 @@ One interactive prompt at the end of the run (unless -y is used).
Shows how much disk space can be freed. Shows how much disk space can be freed.
### Safe by design ### Safe by design
Never touches dotfiles, configs, project files, or anything important. * It ONLY runs within user's `$HOME`
* Put user in control by reading `~/.filedust.conf`
* Never touches dotfiles, configs, project files, or anything important unless you want.
## Installation
### From PyPI
```
pip install filedust
```
### From this repository
```
git clone https://github.com/guardutils/filedust.git
cd filedust/
poetry install
```
### Custom config
You can download the example and add your custom rule
```
wget -O ~/.filedust.conf https://raw.githubusercontent.com/guardutils/filedust/main/.filedust.conf.example
```
### TAB completion
Add this to your `.bashrc`
```
eval "$(register-python-argcomplete filedust)"
```
And then
```
source ~/.bashrc
```

69
poetry.lock generated
View File

@@ -1,5 +1,19 @@
# This file is automatically @generated by Poetry 1.8.4 and should not be changed by hand. # This file is automatically @generated by Poetry 1.8.4 and should not be changed by hand.
[[package]]
name = "argcomplete"
version = "3.6.3"
description = "Bash tab completion for argparse"
optional = false
python-versions = ">=3.8"
files = [
{file = "argcomplete-3.6.3-py3-none-any.whl", hash = "sha256:f5007b3a600ccac5d25bbce33089211dfd49eab4a7718da3f10e3082525a92ce"},
{file = "argcomplete-3.6.3.tar.gz", hash = "sha256:62e8ed4fd6a45864acc8235409461b72c9a28ee785a2011cc5eb78318786c89c"},
]
[package.extras]
test = ["coverage", "mypy", "pexpect", "ruff", "wheel"]
[[package]] [[package]]
name = "cfgv" name = "cfgv"
version = "3.5.0" version = "3.5.0"
@@ -193,6 +207,40 @@ files = [
{file = "iniconfig-2.3.0.tar.gz", hash = "sha256:c76315c77db068650d49c5b56314774a7804df16fee4402c1f19d6d15d8c4730"}, {file = "iniconfig-2.3.0.tar.gz", hash = "sha256:c76315c77db068650d49c5b56314774a7804df16fee4402c1f19d6d15d8c4730"},
] ]
[[package]]
name = "markdown-it-py"
version = "4.0.0"
description = "Python port of markdown-it. Markdown parsing, done right!"
optional = false
python-versions = ">=3.10"
files = [
{file = "markdown_it_py-4.0.0-py3-none-any.whl", hash = "sha256:87327c59b172c5011896038353a81343b6754500a08cd7a4973bb48c6d578147"},
{file = "markdown_it_py-4.0.0.tar.gz", hash = "sha256:cb0a2b4aa34f932c007117b194e945bd74e0ec24133ceb5bac59009cda1cb9f3"},
]
[package.dependencies]
mdurl = ">=0.1,<1.0"
[package.extras]
benchmarking = ["psutil", "pytest", "pytest-benchmark"]
compare = ["commonmark (>=0.9,<1.0)", "markdown (>=3.4,<4.0)", "markdown-it-pyrs", "mistletoe (>=1.0,<2.0)", "mistune (>=3.0,<4.0)", "panflute (>=2.3,<3.0)"]
linkify = ["linkify-it-py (>=1,<3)"]
plugins = ["mdit-py-plugins (>=0.5.0)"]
profiling = ["gprof2dot"]
rtd = ["ipykernel", "jupyter_sphinx", "mdit-py-plugins (>=0.5.0)", "myst-parser", "pyyaml", "sphinx", "sphinx-book-theme (>=1.0,<2.0)", "sphinx-copybutton", "sphinx-design"]
testing = ["coverage", "pytest", "pytest-cov", "pytest-regressions", "requests"]
[[package]]
name = "mdurl"
version = "0.1.2"
description = "Markdown URL utilities"
optional = false
python-versions = ">=3.7"
files = [
{file = "mdurl-0.1.2-py3-none-any.whl", hash = "sha256:84008a41e51615a49fc9966191ff91509e3c40b939176e643fd50a5c2196b8f8"},
{file = "mdurl-0.1.2.tar.gz", hash = "sha256:bb413d29f5eea38f31dd4754dd7377d4465116fb207585f97bf925588687c1ba"},
]
[[package]] [[package]]
name = "nodeenv" name = "nodeenv"
version = "1.9.1" version = "1.9.1"
@@ -402,6 +450,25 @@ files = [
{file = "pyyaml-6.0.3.tar.gz", hash = "sha256:d76623373421df22fb4cf8817020cbb7ef15c725b9d5e45f17e189bfc384190f"}, {file = "pyyaml-6.0.3.tar.gz", hash = "sha256:d76623373421df22fb4cf8817020cbb7ef15c725b9d5e45f17e189bfc384190f"},
] ]
[[package]]
name = "rich"
version = "13.9.4"
description = "Render rich text, tables, progress bars, syntax highlighting, markdown and more to the terminal"
optional = false
python-versions = ">=3.8.0"
files = [
{file = "rich-13.9.4-py3-none-any.whl", hash = "sha256:6049d5e6ec054bf2779ab3358186963bac2ea89175919d699e378b99738c2a90"},
{file = "rich-13.9.4.tar.gz", hash = "sha256:439594978a49a09530cff7ebc4b5c7103ef57baf48d5ea3184f21d9a2befa098"},
]
[package.dependencies]
markdown-it-py = ">=2.2.0"
pygments = ">=2.13.0,<3.0.0"
typing-extensions = {version = ">=4.0.0,<5.0", markers = "python_version < \"3.11\""}
[package.extras]
jupyter = ["ipywidgets (>=7.5.1,<9)"]
[[package]] [[package]]
name = "tomli" name = "tomli"
version = "2.3.0" version = "2.3.0"
@@ -488,4 +555,4 @@ test = ["covdefaults (>=2.3)", "coverage (>=7.2.7)", "coverage-enable-subprocess
[metadata] [metadata]
lock-version = "2.0" lock-version = "2.0"
python-versions = ">=3.10,<4.0" python-versions = ">=3.10,<4.0"
content-hash = "98acd9fd57ec90c98a407b83122fd9c8ed432383e095a47d44e201bf187d3107" content-hash = "676393f654b241eb2ea6f983d589e83daff16e0b0ca2cb76228c0cd5ed447591"

View File

@@ -1,16 +1,18 @@
[tool.poetry] [tool.poetry]
name = "filedust" name = "filedust"
version = "0.1.0" version = "0.3.0"
description = "Opinionated junk cleaner for dev machines (caches, build artifacts, editor backups)." description = "Opinionated junk cleaner for dev machines (caches, build artifacts, editor backups)."
authors = ["Marco D'Aleo <marco@marcodaleo.com>"] authors = ["Marco D'Aleo <marco@marcodaleo.com>"]
license = "GPL-3.0-or-later" license = "GPL-3.0-or-later"
readme = "README.md" readme = "README.md"
homepage = "https://github.com/mdaleo404/filedust" homepage = "https://github.com/guardutils/filedust"
repository = "https://github.com/mdaleo404/filedust" repository = "https://github.com/guardutils/filedust"
packages = [{ include = "filedust", from = "src" }] packages = [{ include = "filedust", from = "src" }]
[tool.poetry.dependencies] [tool.poetry.dependencies]
python = ">=3.10,<4.0" python = ">=3.10,<4.0"
rich = "^13.0.0"
argcomplete = "^3.3.0"
[tool.poetry.scripts] [tool.poetry.scripts]
filedust = "filedust.cli:main" filedust = "filedust.cli:main"

View File

@@ -2,6 +2,7 @@ from __future__ import annotations
import importlib.metadata import importlib.metadata
import argparse import argparse
import argcomplete
import shutil import shutil
from pathlib import Path from pathlib import Path
from typing import List from typing import List
@@ -11,7 +12,7 @@ from rich.table import Table
from rich.prompt import Confirm from rich.prompt import Confirm
from rich import box from rich import box
from .junk import Finding, iter_junk from .junk import Finding, iter_junk, load_user_rules
console = Console() console = Console()
@@ -96,6 +97,11 @@ def build_parser() -> argparse.ArgumentParser:
help="Delete without prompting for confirmation.", help="Delete without prompting for confirmation.",
) )
try:
argcomplete.autocomplete(parser)
except ImportError:
pass
return parser return parser
@@ -162,10 +168,24 @@ def delete_all(findings: List[Finding]) -> int:
def main(argv: list[str] | None = None) -> int: def main(argv: list[str] | None = None) -> int:
print("Looking for junk ...")
parser = build_parser() parser = build_parser()
args = parser.parse_args(argv) args = parser.parse_args(argv)
root = Path(args.path).expanduser() root = Path(args.path).expanduser()
home = Path.home().resolve()
root_resolved = root.resolve()
# Ensure root is inside the user's home directory
try:
root_resolved.relative_to(home)
except ValueError:
console.print(
f"[red]Error:[/] Refusing to operate outside the user's home directory.\n"
f"Requested: {root_resolved}\n"
f"Allowed: {home}"
)
return 1
if not root.exists(): if not root.exists():
console.print(f"[red]Error:[/] Path not found: {root}") console.print(f"[red]Error:[/] Path not found: {root}")
@@ -177,7 +197,8 @@ def main(argv: list[str] | None = None) -> int:
"This may take a while and may require sudo for deletions.[/]" "This may take a while and may require sudo for deletions.[/]"
) )
findings = list(iter_junk(root)) rules = load_user_rules()
findings = list(iter_junk(root, rules=rules))
total_size = compute_total_size(findings) total_size = compute_total_size(findings)
if not findings: if not findings:

View File

@@ -1,12 +1,41 @@
from __future__ import annotations from __future__ import annotations
import os import os
import configparser
from dataclasses import dataclass from dataclasses import dataclass
from fnmatch import fnmatch from fnmatch import fnmatch
from pathlib import Path from pathlib import Path
from typing import Iterable, List from typing import Iterable, List
class UserRules:
def __init__(self):
self.include: list[str] = []
self.exclude: list[str] = []
def load_user_rules() -> UserRules:
rules = UserRules()
cfg_path = Path.home() / ".filedust.conf"
if cfg_path.exists():
parser = configparser.ConfigParser(allow_no_value=True)
parser.read(cfg_path)
if parser.has_section("include"):
rules.include = list(parser["include"].keys())
if parser.has_section("exclude"):
rules.exclude = list(parser["exclude"].keys())
return rules
def matches_any(patterns: list[str], relpath: Path) -> bool:
posix = relpath.as_posix()
return any(fnmatch(posix, p) for p in patterns)
@dataclass @dataclass
class Finding: class Finding:
path: Path path: Path
@@ -23,7 +52,6 @@ JUNK_DIR_NAMES = {
".nox", ".nox",
".tox", ".tox",
".hypothesis", ".hypothesis",
".cache",
".gradle", ".gradle",
".parcel-cache", ".parcel-cache",
".turbo", ".turbo",
@@ -31,7 +59,6 @@ JUNK_DIR_NAMES = {
".vite", ".vite",
".sass-cache", ".sass-cache",
".sass-cache", ".sass-cache",
"build",
"dist", "dist",
} }
@@ -53,6 +80,9 @@ JUNK_FILE_PATTERNS = [
# VCS / system dirs # VCS / system dirs
SKIP_DIR_NAMES = { SKIP_DIR_NAMES = {
".cache",
"build",
".gnupg",
".git", ".git",
".hg", ".hg",
".svn", ".svn",
@@ -62,6 +92,34 @@ SKIP_DIR_NAMES = {
} }
HOME = Path.home().resolve()
def safe_exists(path: Path) -> bool | None:
"""Return True/False if the path exists, or None if permission denied."""
try:
return path.exists()
except Exception:
return None
def safe_resolve(path: Path, root: Path) -> Path | None:
"""
Resolve symlinks only if safe.
Return resolved path if it stays within root.
Return None if:
- resolution escapes the root
- resolution fails
- permission denied
"""
try:
resolved = path.resolve(strict=False) # NEVER strict
resolved.relative_to(root) # ensure containment
return resolved
except Exception:
return None
def is_junk_dir_name(name: str) -> bool: def is_junk_dir_name(name: str) -> bool:
return name in JUNK_DIR_NAMES return name in JUNK_DIR_NAMES
@@ -70,37 +128,140 @@ def is_junk_file_name(name: str) -> bool:
return any(fnmatch(name, pattern) for pattern in JUNK_FILE_PATTERNS) return any(fnmatch(name, pattern) for pattern in JUNK_FILE_PATTERNS)
def iter_junk(root: Path) -> Iterable[Finding]: def iter_junk(root: Path, rules: UserRules | None = None) -> Iterable[Finding]:
""" """
Walk the tree under `root` and yield junk candidates. Safe, fast junk scanner:
- Never follows symlinks.
- Broken symlinks are not automatically junk — they follow normal rules.
- User include/exclude overrides all.
- Built-in junk rules applied only when safe.
- SKIP_DIR_NAMES protected unless user includes.
- Fully contained in $HOME.
- No crashes from PermissionError or unreadable paths.
"""
if rules is None:
rules = UserRules()
filedust:
- Skips known critical / config directories (SKIP_DIR_NAMES).
- Treats known "junk" directory names as removable as a whole.
- Treats known junk file patterns as removable.
"""
root = root.resolve() root = root.resolve()
root_str = str(root)
for dirpath, dirnames, filenames in os.walk(root): for dirpath, dirnames, filenames in os.walk(root, followlinks=False):
dirpath_p = Path(dirpath) dirpath_p = Path(dirpath)
# Prune dirs we never touch at all. # Fast relative path computation
dirnames[:] = [d for d in dirnames if d not in SKIP_DIR_NAMES] if dirpath == root_str:
rel_dir = Path(".")
else:
rel_dir = Path(dirpath[len(root_str) :].lstrip("/"))
# Detect junk directories (and skip walking inside them). # USER EXCLUDE → skip entire subtree
if matches_any(rules.exclude, rel_dir):
dirnames[:] = []
continue
pruned = []
# Handling dirs
for d in dirnames:
child = dirpath_p / d
try:
st = child.lstat()
except Exception:
continue # unreadable
is_symlink = (st.st_mode & 0o170000) == 0o120000
if is_symlink:
# If broken symlink dir treat as file later via filenames (skip descent)
continue
rel_child = rel_dir / d
# User exclude wins
if matches_any(rules.exclude, rel_child):
continue
# SKIP_DIR_NAMES unless user includes
if d in SKIP_DIR_NAMES and not matches_any(
rules.include, rel_child
):
continue
pruned.append(d)
dirnames[:] = pruned
# Detect JUNK dirs
i = 0 i = 0
while i < len(dirnames): while i < len(dirnames):
name = dirnames[i] name = dirnames[i]
if is_junk_dir_name(name): rel_child = rel_dir / name
junk_dir = dirpath_p / name
yield Finding(path=junk_dir, kind="dir", reason="junk_dir") # User include directory
# Remove from walk so we don't descend into it. if matches_any(rules.include, rel_child):
yield Finding(dirpath_p / name, "dir", "user_include")
del dirnames[i] del dirnames[i]
continue continue
# Built-in safe junk dirs
if is_junk_dir_name(name):
yield Finding(dirpath_p / name, "dir", "junk_dir")
del dirnames[i]
continue
i += 1 i += 1
# Now process files. # Handling files (including symlinks)
for fname in filenames: for fname in filenames:
fpath = dirpath_p / fname
rel_file = rel_dir / fname
try:
st = fpath.lstat()
except Exception:
continue
is_symlink = (st.st_mode & 0o170000) == 0o120000
# Handling broken symlinks
if is_symlink:
exists = safe_exists(fpath)
# Permission denied → skip
if exists is None:
continue
# User exclude wins
if matches_any(rules.exclude, rel_file):
continue
# User include wins
if matches_any(rules.include, rel_file):
yield Finding(fpath, "file", "user_include")
continue
# Broken symlink?
if exists is False:
# DO NOT auto-delete — classify like regular file
# Only built-in junk patterns apply
if is_junk_file_name(fname):
yield Finding(fpath, "file", "broken_symlink")
continue
# Valid symlink — NEVER follow; only user-include counts
continue
# Regular files
# User exclude wins
if matches_any(rules.exclude, rel_file):
continue
# User include wins
if matches_any(rules.include, rel_file):
yield Finding(fpath, "file", "user_include")
continue
# Built-in junk patterns (safe ones)
if is_junk_file_name(fname): if is_junk_file_name(fname):
fpath = dirpath_p / fname yield Finding(fpath, "file", "junk_file")
yield Finding(path=fpath, kind="file", reason="junk_file")