Skip to content
Open
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
258 changes: 221 additions & 37 deletions .claude/hooks/pre_tool_use.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,49 +6,233 @@
import json
import sys
import re
import shlex
from pathlib import Path


def _parse_rm_tokens(tokens):
"""
Given a list of tokens starting after 'rm', determine whether the
combination of flags and operands constitutes a dangerous rm invocation.

Returns True if the command is dangerous, False otherwise.

Uses proper token-level parsing so that path components like
'-enrollment' are never mistaken for flags (fixes #28).
"""
has_recursive = False
has_force = False
operands = []
end_of_options = False

for tok in tokens:
if end_of_options or not tok.startswith('-'):
operands.append(tok)
continue
if tok == '--':
end_of_options = True
continue
if tok in ('--recursive', '-R'):
has_recursive = True
continue
if tok == '--force':
has_force = True
continue
# Collapsed short flags like -rf, -fr, -Rf, -rfi, etc.
if tok.startswith('-') and not tok.startswith('--'):
flag_chars = tok[1:]
if 'r' in flag_chars or 'R' in flag_chars:
has_recursive = True
if 'f' in flag_chars:
has_force = True
continue

# rm -rf (or -r -f) is always dangerous
if has_recursive and has_force:
return True

# rm -r targeting dangerous paths
if has_recursive:
dangerous_path_patterns = [
r'^/$', # Root
r'^/\*$', # Root wildcard
r'^~/?', # Home directory
r'^\$HOME', # $HOME
r'\.\.', # Parent directory traversal
r'^\*$', # Bare wildcard
r'^\.$', # Current directory
]
for operand in operands:
for pattern in dangerous_path_patterns:
if re.search(pattern, operand):
return True

return False


def is_dangerous_rm_command(command):
"""
Comprehensive detection of dangerous rm commands.
Matches various forms of rm -rf and similar destructive patterns.
Detect dangerous rm commands using proper shell tokenization.

Handles:
- Direct rm invocations (rm -rf /)
- Inline shell wrappers: bash -c "rm -rf /", sh -c "rm -rf /" (fixes #4)
- Script execution: bash script.sh, source script.sh, . script.sh (fixes #4)

Uses shlex.split instead of regex on the raw string so that path
components like '-enrollment' are never misidentified as flags (fixes #28).
"""
# Normalize command by removing extra spaces and converting to lowercase
normalized = ' '.join(command.lower().split())

# Pattern 1: Standard rm -rf variations
patterns = [
r'\brm\s+.*-[a-z]*r[a-z]*f', # rm -rf, rm -fr, rm -Rf, etc.
r'\brm\s+.*-[a-z]*f[a-z]*r', # rm -fr variations
r'\brm\s+--recursive\s+--force', # rm --recursive --force
r'\brm\s+--force\s+--recursive', # rm --force --recursive
r'\brm\s+-r\s+.*-f', # rm -r ... -f
r'\brm\s+-f\s+.*-r', # rm -f ... -r
]

# Check for dangerous patterns
for pattern in patterns:
if re.search(pattern, normalized):
return True

# Pattern 2: Check for rm with recursive flag targeting dangerous paths
dangerous_paths = [
r'/', # Root directory
r'/\*', # Root with wildcard
r'~', # Home directory
r'~/', # Home directory path
r'\$HOME', # Home environment variable
r'\.\.', # Parent directory references
r'\*', # Wildcards in general rm -rf context
r'\.', # Current directory
r'\.\s*$', # Current directory at end of command
]

if re.search(r'\brm\s+.*-[a-z]*r', normalized): # If rm has recursive flag
for path in dangerous_paths:
if re.search(path, normalized):
try:
tokens = shlex.split(command)
except ValueError:
# Malformed quoting — fall back to simple whitespace split
tokens = command.split()

if not tokens:
return False

# --- Check 1: Direct rm invocation anywhere in a pipeline / chain -------
# Split on shell operators so "echo hi && rm -rf /" is caught.
# We re-tokenize each sub-command.
sub_commands = _split_shell_commands(command)
for sub in sub_commands:
try:
sub_tokens = shlex.split(sub)
except ValueError:
sub_tokens = sub.split()
if not sub_tokens:
continue

cmd = _basename(sub_tokens[0])
if cmd == 'rm':
if _parse_rm_tokens(sub_tokens[1:]):
return True


# --- Check 2: Inline shell — bash/sh/zsh/dash -c "..." ----------------
for sub in sub_commands:
try:
sub_tokens = shlex.split(sub)
except ValueError:
sub_tokens = sub.split()
if not sub_tokens:
continue

cmd = _basename(sub_tokens[0])
if cmd in ('bash', 'sh', 'zsh', 'dash'):
# Look for -c flag and extract the inline script
inline_script = _extract_inline_script(sub_tokens[1:])
if inline_script and is_dangerous_rm_command(inline_script):
return True

# --- Check 3: Script file execution ------------------------------------
# Patterns: bash script.sh, sh script.sh, source script.sh, . script.sh,
# ./script.sh, /path/to/script.sh
for sub in sub_commands:
try:
sub_tokens = shlex.split(sub)
except ValueError:
sub_tokens = sub.split()
if not sub_tokens:
continue

script_path = _detect_script_execution(sub_tokens)
if script_path and _script_contains_dangerous_rm(script_path):
return True

return False


def _basename(path):
"""Return the basename of a command path, e.g. /usr/bin/rm -> rm."""
return path.rsplit('/', 1)[-1] if '/' in path else path


def _split_shell_commands(command):
"""
Naively split a shell command line on ;, &&, ||, and | boundaries.
This is intentionally simple — full shell parsing is not the goal.
"""
# Replace operators with a unique delimiter, then split
result = re.split(r'\s*(?:&&|\|\||[;|])\s*', command)
return [s.strip() for s in result if s.strip()]


def _extract_inline_script(tokens):
"""
Given tokens after bash/sh (e.g. ['-c', 'rm -rf /']), return the
inline script string if -c is present, else None.
"""
for i, tok in enumerate(tokens):
if tok == '-c' and i + 1 < len(tokens):
# Everything after -c is the script (may be one quoted arg
# or multiple args that the shell concatenates with spaces)
return ' '.join(tokens[i + 1:])
return None


def _detect_script_execution(tokens):
"""
Detect if the command executes a script file.
Returns the script file path or None.

Patterns detected:
- bash/sh/zsh/dash script.sh (without -c)
- source script.sh
- . script.sh
- ./script.sh or /path/to/script.sh (if the file looks like a script)
"""
if not tokens:
return None

cmd = _basename(tokens[0])

# source / dot-command
if cmd in ('source', '.') and len(tokens) > 1:
return tokens[1]

# bash/sh/zsh/dash script.sh (no -c flag)
if cmd in ('bash', 'sh', 'zsh', 'dash'):
# Skip option flags to find the script file argument
for tok in tokens[1:]:
if tok == '-c':
return None # Inline command, handled elsewhere
if tok.startswith('-'):
continue
return tok # First non-flag argument is the script file
return None

# Direct execution: ./script.sh or /abs/path/script.sh
if '/' in tokens[0] or tokens[0].startswith('./'):
return tokens[0]

return None


def _script_contains_dangerous_rm(script_path):
"""
Read a script file and check whether it contains a dangerous rm command.
Returns False if the file doesn't exist or can't be read — fail open
to avoid blocking legitimate commands against non-existent files.
"""
try:
path = Path(script_path).resolve()
if not path.is_file():
return False
# Only inspect reasonably sized files (< 1 MB) to avoid stalls
if path.stat().st_size > 1_000_000:
return False
content = path.read_text(errors='replace')
except (OSError, PermissionError):
return False

# Check each non-comment line
for line in content.splitlines():
stripped = line.strip()
if not stripped or stripped.startswith('#'):
continue
# Recursively use the same detection for each line
if is_dangerous_rm_command(stripped):
return True
return False

def is_env_file_access(tool_name, tool_input):
Expand Down