Compare commits
26 Commits
| Author | SHA1 | Date | |
|---|---|---|---|
|
c090320e47
|
|||
|
c7a07f8327
|
|||
| 31f91fcd28 | |||
|
5268e5834b
|
|||
|
532cc68fb3
|
|||
| e36228c308 | |||
|
c016a45b82
|
|||
| 860e8a668f | |||
|
5e9a55dcc6
|
|||
|
7daa2175e8
|
|||
|
67a6af2ddb
|
|||
| 25ea1fec63 | |||
|
dc66700f1e
|
|||
| 1eb082fc52 | |||
|
c2f52b8049
|
|||
|
6ebef8e058
|
|||
|
|
fca4c8defc | ||
| 6cdfd2fc44 | |||
|
6c1d2dc430
|
|||
|
|
fa8a194ccb | ||
|
677b14db26
|
|||
|
35f5f2674a
|
|||
|
c75a5246e3
|
|||
|
|
7f2b23b41b | ||
| ae281624da | |||
|
1bebbcfa42
|
23
.filedust.conf.example
Normal file
23
.filedust.conf.example
Normal file
@@ -0,0 +1,23 @@
|
||||
# filedust configuration file
|
||||
# Place at: ~/.filedust.conf
|
||||
#
|
||||
# Use this file to customize cleanup behavior.
|
||||
#
|
||||
# Patterns are matched against paths relative to $HOME
|
||||
# Supports:
|
||||
# * = one path segment
|
||||
# ** = zero or more path segments (recursive)
|
||||
# Matching is case-sensitive
|
||||
|
||||
[exclude]
|
||||
# Add directories or patterns you want filedust to ignore.
|
||||
# Examples:
|
||||
# Projects/important/*
|
||||
|
||||
[include]
|
||||
# Add directories or patterns you want filedust to remove.
|
||||
# Examples:
|
||||
# node_modules
|
||||
# dist
|
||||
# *.tmp
|
||||
# *~
|
||||
@@ -20,10 +20,17 @@ jobs:
|
||||
run: pip install pre-commit
|
||||
|
||||
- name: Run pre-commit hooks
|
||||
uses: pre-commit/action@v3.0.1
|
||||
run: pre-commit run --all-files --color always
|
||||
|
||||
- name: Install Poetry
|
||||
run: |
|
||||
pip install poetry
|
||||
poetry self add poetry-plugin-export
|
||||
|
||||
- name: Install pip-audit
|
||||
run: pip install pip-audit
|
||||
|
||||
- name: Run pip-audit
|
||||
run: pip-audit
|
||||
- name: Audit dependencies (Poetry lockfile)
|
||||
run: |
|
||||
poetry export -f requirements.txt --without-hashes \
|
||||
| pip-audit -r /dev/stdin
|
||||
61
.gitea/workflows/trivy-scan.yml
Normal file
61
.gitea/workflows/trivy-scan.yml
Normal file
@@ -0,0 +1,61 @@
|
||||
---
|
||||
name: Trivy Scan
|
||||
on:
|
||||
schedule:
|
||||
- cron: 17 8 * * *
|
||||
workflow_dispatch:
|
||||
|
||||
jobs:
|
||||
security-scan:
|
||||
runs-on: running-man
|
||||
steps:
|
||||
- name: Checkout
|
||||
uses: actions/checkout@v4
|
||||
|
||||
- name: Trivy scan via Docker
|
||||
id: trivy
|
||||
continue-on-error: true
|
||||
run: |
|
||||
docker run --rm \
|
||||
--volumes-from "$HOSTNAME" \
|
||||
aquasec/trivy:latest \
|
||||
fs /workspace/guardutils/filedust \
|
||||
--scanners vuln \
|
||||
--pkg-types library \
|
||||
--include-dev-deps \
|
||||
--severity MEDIUM,HIGH,CRITICAL \
|
||||
--ignore-unfixed \
|
||||
--format json \
|
||||
--output /workspace/guardutils/filedust/trivy.json \
|
||||
--exit-code 1
|
||||
|
||||
- name: Notify Node-RED on vulnerabilities
|
||||
if: steps.trivy.outcome == 'failure'
|
||||
run: |
|
||||
jq -r '
|
||||
{
|
||||
repo: "guardutils/filedust",
|
||||
summary: (
|
||||
"Total: " +
|
||||
((.Results[].Vulnerabilities | length) | tostring)
|
||||
),
|
||||
vulnerabilities: [
|
||||
.Results[].Vulnerabilities[] | {
|
||||
library: .PkgName,
|
||||
cve: .VulnerabilityID,
|
||||
severity: .Severity,
|
||||
installed: .InstalledVersion,
|
||||
fixed: .FixedVersion,
|
||||
title: .Title,
|
||||
url: .PrimaryURL
|
||||
}
|
||||
]
|
||||
}
|
||||
' trivy.json \
|
||||
| curl -s -X POST https://nodered.sysmd.uk/trivy-alert \
|
||||
-H "Content-Type: application/json" \
|
||||
--data-binary @-
|
||||
|
||||
- name: Fail workflow if vulnerabilities found
|
||||
if: steps.trivy.outcome == 'failure'
|
||||
run: exit 1
|
||||
94
README.md
94
README.md
@@ -1,12 +1,13 @@
|
||||
[](LICENCE)
|
||||
[](https://github.com/mdaleo404/filedust/)
|
||||

|
||||

|
||||
[](https://github.com/mdaleo404/filedust/actions)
|
||||
[](https://pypi.org/project/filedust/)
|
||||
[](https://git.sysmd.uk/guardutils/filedust/src/branch/main/LICENCE)
|
||||
[](https://git.sysmd.uk/guardutils/filedust/releases)
|
||||
[](https://git.sysmd.uk/guardutils/filedust/src/branch/main/.pre-commit-config.yaml)
|
||||
|
||||
# filedust
|
||||
|
||||
<div align="center">
|
||||
<img src="filedust.png" alt="filedust logo" width="256" />
|
||||
</div>
|
||||
|
||||
**filedust** is a small, fast, and safe command-line tool that scans your filesystem for obvious junk — things like Python __pycache__ folders, build artifacts, editor backup files, and leftover temporary files — and cleans them up.
|
||||
|
||||
Think of it as “`autoremove` for files.”
|
||||
@@ -35,10 +36,71 @@ One interactive prompt at the end of the run (unless -y is used).
|
||||
Shows how much disk space can be freed.
|
||||
|
||||
### Safe by design
|
||||
Never touches dotfiles, configs, project files, or anything important.
|
||||
* It ONLY runs within user's `$HOME`
|
||||
|
||||
* Put user in control by reading `~/.filedust.conf`
|
||||
|
||||
* Never touches dotfiles, configs, project files, or anything important unless you want.
|
||||
|
||||
## Installation
|
||||
|
||||
### From GuardUtils package repo
|
||||
|
||||
This is the preferred method of installation.
|
||||
|
||||
### Debian/Ubuntu
|
||||
|
||||
#### 1) Import the GPG key
|
||||
|
||||
```bash
|
||||
sudo mkdir -p /usr/share/keyrings
|
||||
curl -fsSL https://repo.sysmd.uk/guardutils/guardutils.gpg | sudo gpg --dearmor -o /usr/share/keyrings/guardutils.gpg
|
||||
```
|
||||
|
||||
The GPG fingerprint is `0032C71FA6A11EF9567D4434C5C06BD4603C28B1`.
|
||||
|
||||
#### 2) Add the APT source
|
||||
|
||||
```bash
|
||||
echo "deb [arch=amd64 signed-by=/usr/share/keyrings/guardutils.gpg] https://repo.sysmd.uk/guardutils/debian stable main" | sudo tee /etc/apt/sources.list.d/guardutils.list
|
||||
```
|
||||
|
||||
#### 3) Update and install
|
||||
|
||||
```
|
||||
sudo apt update
|
||||
sudo apt install filedust
|
||||
```
|
||||
|
||||
### Fedora/RHEL
|
||||
|
||||
#### 1) Import the GPG key
|
||||
|
||||
```
|
||||
sudo rpm --import https://repo.sysmd.uk/guardutils/guardutils.gpg
|
||||
```
|
||||
|
||||
#### 2) Add the repository configuration
|
||||
|
||||
```
|
||||
sudo tee /etc/yum.repos.d/guardutils.repo > /dev/null << 'EOF'
|
||||
[guardutils]
|
||||
name=GuardUtils Repository
|
||||
baseurl=https://repo.sysmd.uk/guardutils/rpm/$basearch
|
||||
enabled=1
|
||||
gpgcheck=1
|
||||
repo_gpgcheck=1
|
||||
gpgkey=https://repo.sysmd.uk/guardutils/guardutils.gpg
|
||||
EOF
|
||||
```
|
||||
|
||||
#### 4) Update and install
|
||||
|
||||
```
|
||||
sudo dnf upgrade --refresh
|
||||
sudo dnf install filedust
|
||||
```
|
||||
|
||||
### From PyPI
|
||||
```
|
||||
pip install filedust
|
||||
@@ -46,11 +108,17 @@ pip install filedust
|
||||
|
||||
### From this repository
|
||||
```
|
||||
git clone https://github.com/mdaleo404/filedust.git
|
||||
git clone https://git.sysmd.uk/guardutils/filedust.git
|
||||
cd filedust/
|
||||
poetry install
|
||||
```
|
||||
|
||||
### Custom config
|
||||
You can download the example and add your custom rule
|
||||
```
|
||||
wget -O ~/.filedust.conf https://git.sysmd.uk/guardutils/filedust/raw/branch/main/.filedust.conf.example
|
||||
```
|
||||
|
||||
### TAB completion
|
||||
Add this to your `.bashrc`
|
||||
```
|
||||
@@ -60,3 +128,13 @@ And then
|
||||
```
|
||||
source ~/.bashrc
|
||||
```
|
||||
|
||||
## pre-commit
|
||||
This project uses [**pre-commit**](https://pre-commit.com/) to run automatic formatting and security checks before each commit (Black, Bandit, and various safety checks).
|
||||
|
||||
To enable it:
|
||||
```
|
||||
poetry install
|
||||
poetry run pre-commit install
|
||||
```
|
||||
This ensures consistent formatting, catches common issues early, and keeps the codebase clean.
|
||||
|
||||
BIN
filedust.png
Normal file
BIN
filedust.png
Normal file
Binary file not shown.
|
After Width: | Height: | Size: 35 KiB |
16
poetry.lock
generated
16
poetry.lock
generated
@@ -173,13 +173,13 @@ test = ["pytest (>=6)"]
|
||||
|
||||
[[package]]
|
||||
name = "filelock"
|
||||
version = "3.20.0"
|
||||
version = "3.20.3"
|
||||
description = "A platform independent file lock."
|
||||
optional = false
|
||||
python-versions = ">=3.10"
|
||||
files = [
|
||||
{file = "filelock-3.20.0-py3-none-any.whl", hash = "sha256:339b4732ffda5cd79b13f4e2711a31b0365ce445d95d243bb996273d072546a2"},
|
||||
{file = "filelock-3.20.0.tar.gz", hash = "sha256:711e943b4ec6be42e1d4e6690b48dc175c822967466bb31c0c293f34334c13f4"},
|
||||
{file = "filelock-3.20.3-py3-none-any.whl", hash = "sha256:4b0dda527ee31078689fc205ec4f1c1bf7d56cf88b6dc9426c4f230e46c2dce1"},
|
||||
{file = "filelock-3.20.3.tar.gz", hash = "sha256:18c57ee915c7ec61cff0ecf7f0f869936c7c30191bb0cf406f1341778d0834e1"},
|
||||
]
|
||||
|
||||
[[package]]
|
||||
@@ -533,18 +533,18 @@ files = [
|
||||
|
||||
[[package]]
|
||||
name = "virtualenv"
|
||||
version = "20.35.4"
|
||||
version = "20.36.1"
|
||||
description = "Virtual Python Environment builder"
|
||||
optional = false
|
||||
python-versions = ">=3.8"
|
||||
files = [
|
||||
{file = "virtualenv-20.35.4-py3-none-any.whl", hash = "sha256:c21c9cede36c9753eeade68ba7d523529f228a403463376cf821eaae2b650f1b"},
|
||||
{file = "virtualenv-20.35.4.tar.gz", hash = "sha256:643d3914d73d3eeb0c552cbb12d7e82adf0e504dbf86a3182f8771a153a1971c"},
|
||||
{file = "virtualenv-20.36.1-py3-none-any.whl", hash = "sha256:575a8d6b124ef88f6f51d56d656132389f961062a9177016a50e4f507bbcc19f"},
|
||||
{file = "virtualenv-20.36.1.tar.gz", hash = "sha256:8befb5c81842c641f8ee658481e42641c68b5eab3521d8e092d18320902466ba"},
|
||||
]
|
||||
|
||||
[package.dependencies]
|
||||
distlib = ">=0.3.7,<1"
|
||||
filelock = ">=3.12.2,<4"
|
||||
filelock = {version = ">=3.20.1,<4", markers = "python_version >= \"3.10\""}
|
||||
platformdirs = ">=3.9.1,<5"
|
||||
typing-extensions = {version = ">=4.13.2", markers = "python_version < \"3.11\""}
|
||||
|
||||
@@ -555,4 +555,4 @@ test = ["covdefaults (>=2.3)", "coverage (>=7.2.7)", "coverage-enable-subprocess
|
||||
[metadata]
|
||||
lock-version = "2.0"
|
||||
python-versions = ">=3.10,<4.0"
|
||||
content-hash = "676393f654b241eb2ea6f983d589e83daff16e0b0ca2cb76228c0cd5ed447591"
|
||||
content-hash = "5ffc6940e33919ad5c8107dde30e6203d63a3bb64eaab81013cde2e773964657"
|
||||
|
||||
@@ -1,18 +1,18 @@
|
||||
[tool.poetry]
|
||||
name = "filedust"
|
||||
version = "0.2.0"
|
||||
version = "0.4.1"
|
||||
description = "Opinionated junk cleaner for dev machines (caches, build artifacts, editor backups)."
|
||||
authors = ["Marco D'Aleo <marco@marcodaleo.com>"]
|
||||
license = "GPL-3.0-or-later"
|
||||
readme = "README.md"
|
||||
homepage = "https://github.com/mdaleo404/filedust"
|
||||
repository = "https://github.com/mdaleo404/filedust"
|
||||
homepage = "https://git.sysmd.uk/guardutils/filedust"
|
||||
repository = "https://git.sysmd.uk/guardutils/filedust"
|
||||
packages = [{ include = "filedust", from = "src" }]
|
||||
|
||||
[tool.poetry.dependencies]
|
||||
python = ">=3.10,<4.0"
|
||||
rich = "^13.0.0"
|
||||
argcomplete = "^3.3.0"
|
||||
rich = ">=12"
|
||||
argcomplete = ">=2"
|
||||
|
||||
[tool.poetry.scripts]
|
||||
filedust = "filedust.cli:main"
|
||||
|
||||
@@ -12,7 +12,7 @@ from rich.table import Table
|
||||
from rich.prompt import Confirm
|
||||
from rich import box
|
||||
|
||||
from .junk import Finding, iter_junk
|
||||
from .junk import Finding, iter_junk, load_user_rules
|
||||
|
||||
|
||||
console = Console()
|
||||
@@ -172,18 +172,34 @@ def main(argv: list[str] | None = None) -> int:
|
||||
args = parser.parse_args(argv)
|
||||
|
||||
root = Path(args.path).expanduser()
|
||||
home = Path.home().resolve()
|
||||
root_resolved = root.resolve()
|
||||
|
||||
# Ensure root is inside the user's home directory
|
||||
try:
|
||||
root_resolved.relative_to(home)
|
||||
except ValueError:
|
||||
console.print(
|
||||
f"[red]Error:[/] Refusing to operate outside the user's home directory.\n"
|
||||
f"Requested: {root_resolved}\n"
|
||||
f"Allowed: {home}"
|
||||
)
|
||||
return 1
|
||||
|
||||
if not root.exists():
|
||||
console.print(f"[red]Error:[/] Path not found: {root}")
|
||||
return 1
|
||||
|
||||
print("Looking for junk ...")
|
||||
|
||||
if root.resolve() == Path("/"):
|
||||
console.print(
|
||||
"[yellow]Running filedust on the entire filesystem (/). "
|
||||
"This may take a while and may require sudo for deletions.[/]"
|
||||
)
|
||||
|
||||
findings = list(iter_junk(root))
|
||||
rules = load_user_rules()
|
||||
findings = list(iter_junk(root, rules=rules))
|
||||
total_size = compute_total_size(findings)
|
||||
|
||||
if not findings:
|
||||
|
||||
@@ -1,12 +1,80 @@
|
||||
from __future__ import annotations
|
||||
|
||||
import os
|
||||
import configparser
|
||||
from dataclasses import dataclass
|
||||
from fnmatch import fnmatch
|
||||
from pathlib import Path
|
||||
from typing import Iterable, List
|
||||
|
||||
|
||||
class UserRules:
|
||||
def __init__(self):
|
||||
self.include: list[str] = []
|
||||
self.exclude: list[str] = []
|
||||
|
||||
|
||||
def load_user_rules() -> UserRules:
|
||||
rules = UserRules()
|
||||
cfg_path = Path.home() / ".filedust.conf"
|
||||
|
||||
if cfg_path.exists():
|
||||
parser = configparser.ConfigParser(allow_no_value=True)
|
||||
parser.optionxform = str
|
||||
parser.read(cfg_path)
|
||||
|
||||
if parser.has_section("include"):
|
||||
rules.include = list(parser["include"].keys())
|
||||
|
||||
if parser.has_section("exclude"):
|
||||
rules.exclude = list(parser["exclude"].keys())
|
||||
|
||||
return rules
|
||||
|
||||
|
||||
def matches_any(patterns: list[str], relpath: Path) -> bool:
|
||||
"""
|
||||
True globstar matcher.
|
||||
|
||||
Rules:
|
||||
- * matches exactly one path segment
|
||||
- ** matches zero or more segments
|
||||
- Patterns are relative to $HOME
|
||||
"""
|
||||
|
||||
path_parts = relpath.parts
|
||||
|
||||
for pat in patterns:
|
||||
pat = pat.strip("/")
|
||||
|
||||
pat_parts = tuple(pat.split("/"))
|
||||
|
||||
if _match_parts(pat_parts, path_parts):
|
||||
return True
|
||||
|
||||
return False
|
||||
|
||||
|
||||
def _match_parts(pat: tuple[str, ...], path: tuple[str, ...]) -> bool:
|
||||
"""Recursive glob matcher with ** support."""
|
||||
if not pat:
|
||||
return not path
|
||||
|
||||
if pat[0] == "**":
|
||||
# ** matches zero or more segments
|
||||
return _match_parts(pat[1:], path) or (
|
||||
bool(path) and _match_parts(pat, path[1:])
|
||||
)
|
||||
|
||||
if not path:
|
||||
return False
|
||||
|
||||
if fnmatch(path[0], pat[0]):
|
||||
return _match_parts(pat[1:], path[1:])
|
||||
|
||||
return False
|
||||
|
||||
|
||||
@dataclass
|
||||
class Finding:
|
||||
path: Path
|
||||
@@ -23,7 +91,6 @@ JUNK_DIR_NAMES = {
|
||||
".nox",
|
||||
".tox",
|
||||
".hypothesis",
|
||||
".cache",
|
||||
".gradle",
|
||||
".parcel-cache",
|
||||
".turbo",
|
||||
@@ -31,7 +98,6 @@ JUNK_DIR_NAMES = {
|
||||
".vite",
|
||||
".sass-cache",
|
||||
".sass-cache",
|
||||
"build",
|
||||
"dist",
|
||||
}
|
||||
|
||||
@@ -53,6 +119,9 @@ JUNK_FILE_PATTERNS = [
|
||||
|
||||
# VCS / system dirs
|
||||
SKIP_DIR_NAMES = {
|
||||
".cache",
|
||||
"build",
|
||||
".gnupg",
|
||||
".git",
|
||||
".hg",
|
||||
".svn",
|
||||
@@ -62,6 +131,34 @@ SKIP_DIR_NAMES = {
|
||||
}
|
||||
|
||||
|
||||
HOME = Path.home().resolve()
|
||||
|
||||
|
||||
def safe_exists(path: Path) -> bool | None:
|
||||
"""Return True/False if the path exists, or None if permission denied."""
|
||||
try:
|
||||
return path.exists()
|
||||
except Exception:
|
||||
return None
|
||||
|
||||
|
||||
def safe_resolve(path: Path, root: Path) -> Path | None:
|
||||
"""
|
||||
Resolve symlinks only if safe.
|
||||
Return resolved path if it stays within root.
|
||||
Return None if:
|
||||
- resolution escapes the root
|
||||
- resolution fails
|
||||
- permission denied
|
||||
"""
|
||||
try:
|
||||
resolved = path.resolve(strict=False) # NEVER strict
|
||||
resolved.relative_to(root) # ensure containment
|
||||
return resolved
|
||||
except Exception:
|
||||
return None
|
||||
|
||||
|
||||
def is_junk_dir_name(name: str) -> bool:
|
||||
return name in JUNK_DIR_NAMES
|
||||
|
||||
@@ -70,37 +167,140 @@ def is_junk_file_name(name: str) -> bool:
|
||||
return any(fnmatch(name, pattern) for pattern in JUNK_FILE_PATTERNS)
|
||||
|
||||
|
||||
def iter_junk(root: Path) -> Iterable[Finding]:
|
||||
def iter_junk(root: Path, rules: UserRules | None = None) -> Iterable[Finding]:
|
||||
"""
|
||||
Walk the tree under `root` and yield junk candidates.
|
||||
Safe, fast junk scanner:
|
||||
- Never follows symlinks.
|
||||
- Broken symlinks are not automatically junk — they follow normal rules.
|
||||
- User include/exclude overrides all.
|
||||
- Built-in junk rules applied only when safe.
|
||||
- SKIP_DIR_NAMES protected unless user includes.
|
||||
- Fully contained in $HOME.
|
||||
- No crashes from PermissionError or unreadable paths.
|
||||
"""
|
||||
if rules is None:
|
||||
rules = UserRules()
|
||||
|
||||
filedust:
|
||||
- Skips known critical / config directories (SKIP_DIR_NAMES).
|
||||
- Treats known "junk" directory names as removable as a whole.
|
||||
- Treats known junk file patterns as removable.
|
||||
"""
|
||||
root = root.resolve()
|
||||
root_str = str(root)
|
||||
|
||||
for dirpath, dirnames, filenames in os.walk(root):
|
||||
for dirpath, dirnames, filenames in os.walk(root, followlinks=False):
|
||||
dirpath_p = Path(dirpath)
|
||||
|
||||
# Prune dirs we never touch at all.
|
||||
dirnames[:] = [d for d in dirnames if d not in SKIP_DIR_NAMES]
|
||||
try:
|
||||
rel_dir = dirpath_p.resolve().relative_to(HOME)
|
||||
except ValueError:
|
||||
# Should never happen due to earlier checks
|
||||
continue
|
||||
|
||||
# Detect junk directories (and skip walking inside them).
|
||||
# USER EXCLUDE → skip entire subtree
|
||||
if matches_any(rules.exclude, rel_dir):
|
||||
dirnames[:] = []
|
||||
continue
|
||||
|
||||
pruned = []
|
||||
|
||||
# Handling dirs
|
||||
for d in dirnames:
|
||||
child = dirpath_p / d
|
||||
|
||||
try:
|
||||
st = child.lstat()
|
||||
except Exception:
|
||||
continue # unreadable
|
||||
|
||||
is_symlink = (st.st_mode & 0o170000) == 0o120000
|
||||
|
||||
if is_symlink:
|
||||
# If broken symlink dir treat as file later via filenames (skip descent)
|
||||
continue
|
||||
|
||||
rel_child = rel_dir / d
|
||||
|
||||
# User exclude wins
|
||||
if matches_any(rules.exclude, rel_child):
|
||||
continue
|
||||
|
||||
# SKIP_DIR_NAMES unless user includes
|
||||
if d in SKIP_DIR_NAMES and not matches_any(
|
||||
rules.include, rel_child
|
||||
):
|
||||
continue
|
||||
|
||||
pruned.append(d)
|
||||
|
||||
dirnames[:] = pruned
|
||||
|
||||
# Detect JUNK dirs
|
||||
i = 0
|
||||
while i < len(dirnames):
|
||||
name = dirnames[i]
|
||||
if is_junk_dir_name(name):
|
||||
junk_dir = dirpath_p / name
|
||||
yield Finding(path=junk_dir, kind="dir", reason="junk_dir")
|
||||
# Remove from walk so we don't descend into it.
|
||||
rel_child = rel_dir / name
|
||||
|
||||
# User include directory
|
||||
if matches_any(rules.include, rel_child):
|
||||
yield Finding(dirpath_p / name, "dir", "user_include")
|
||||
del dirnames[i]
|
||||
continue
|
||||
|
||||
# Built-in safe junk dirs
|
||||
if is_junk_dir_name(name):
|
||||
yield Finding(dirpath_p / name, "dir", "junk_dir")
|
||||
del dirnames[i]
|
||||
continue
|
||||
|
||||
i += 1
|
||||
|
||||
# Now process files.
|
||||
# Handling files (including symlinks)
|
||||
for fname in filenames:
|
||||
if is_junk_file_name(fname):
|
||||
fpath = dirpath_p / fname
|
||||
yield Finding(path=fpath, kind="file", reason="junk_file")
|
||||
rel_file = rel_dir / fname
|
||||
|
||||
try:
|
||||
st = fpath.lstat()
|
||||
except Exception:
|
||||
continue
|
||||
|
||||
is_symlink = (st.st_mode & 0o170000) == 0o120000
|
||||
|
||||
# Handling broken symlinks
|
||||
if is_symlink:
|
||||
exists = safe_exists(fpath)
|
||||
|
||||
# Permission denied → skip
|
||||
if exists is None:
|
||||
continue
|
||||
|
||||
# User exclude wins
|
||||
if matches_any(rules.exclude, rel_file):
|
||||
continue
|
||||
|
||||
# User include wins
|
||||
if matches_any(rules.include, rel_file):
|
||||
yield Finding(fpath, "file", "user_include")
|
||||
continue
|
||||
|
||||
# Broken symlink?
|
||||
if exists is False:
|
||||
# DO NOT auto-delete — classify like regular file
|
||||
# Only built-in junk patterns apply
|
||||
if is_junk_file_name(fname):
|
||||
yield Finding(fpath, "file", "broken_symlink")
|
||||
continue
|
||||
|
||||
# Valid symlink — NEVER follow; only user-include counts
|
||||
continue
|
||||
|
||||
# Regular files
|
||||
# User exclude wins
|
||||
if matches_any(rules.exclude, rel_file):
|
||||
continue
|
||||
|
||||
# User include wins
|
||||
if matches_any(rules.include, rel_file):
|
||||
yield Finding(fpath, "file", "user_include")
|
||||
continue
|
||||
|
||||
# Built-in junk patterns (safe ones)
|
||||
if is_junk_file_name(fname):
|
||||
yield Finding(fpath, "file", "junk_file")
|
||||
|
||||
Reference in New Issue
Block a user