Skip to content
Closed
Show file tree
Hide file tree
Changes from 3 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion actions/setup/md/cache_memory_prompt.md
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
<cache-memory>
<cache-memory sanitized="true">
<path>__GH_AW_CACHE_DIR__</path>__GH_AW_CACHE_DESCRIPTION__
<properties>Persistent read/write storage across workflow runs via Actions cache. Last write wins.</properties>__GH_AW_ALLOWED_EXTENSIONS__
<cache-miss-guidance>If you look for data in the cache and do not find any, call the `missing_data` tool with `data_type: "cache_memory"` and `reason: "cache_memory_miss"` to signal that the cache does not contain the expected information.</cache-miss-guidance>
Expand Down
2 changes: 1 addition & 1 deletion actions/setup/md/cache_memory_prompt_multi.md
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
<cache-memory>
<cache-memory sanitized="true">
__GH_AW_CACHE_LIST__
<properties>Persistent read/write storage across workflow runs via Actions cache. Last write wins.</properties>__GH_AW_ALLOWED_EXTENSIONS__
<examples>
Expand Down
2 changes: 1 addition & 1 deletion actions/setup/md/repo_memory_prompt.md
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
<repo-memory>
<repo-memory sanitized="true">
## Repo Memory Available

You have access to a persistent repo memory folder at `__GH_AW_MEMORY_DIR__` where you can read and write files that are stored in a git branch.__GH_AW_MEMORY_DESCRIPTION____GH_AW_WIKI_NOTE__
Expand Down
2 changes: 1 addition & 1 deletion actions/setup/md/repo_memory_prompt_multi.md
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
<repo-memory>
<repo-memory sanitized="true">
## Repo Memory Locations Available

You have access to persistent repo memory folders where you can read and write files that are stored in git branches:
Expand Down
5 changes: 5 additions & 0 deletions actions/setup/sh/clone_repo_memory_branch.sh
Original file line number Diff line number Diff line change
Expand Up @@ -81,3 +81,8 @@ fi
# Ensure memory directory exists
mkdir -p "$MEMORY_DIR"
echo "Repo memory directory ready at $MEMORY_DIR"

# Scan cloned files for prompt injection patterns (ASI-06: Memory & Context Poisoning).
# This runs after the clone so that any injected content is caught before the agent sees it.
GH_AW_SCAN_DIR="$MEMORY_DIR" \
bash "${RUNNER_TEMP}/gh-aw/actions/sanitize_memory.sh"
120 changes: 120 additions & 0 deletions actions/setup/sh/sanitize_memory.sh
Original file line number Diff line number Diff line change
@@ -0,0 +1,120 @@
#!/usr/bin/env bash

# sanitize_memory.sh
# Pre-agent content scanning for prompt injection in memory files.
#
# This script scans text files in a memory directory for known prompt injection
# patterns (system prompt overrides, role-play injections, instruction-ignoring
# directives) per OWASP Agentic Top 10 — ASI-06 (Memory & Context Poisoning).
Comment on lines +1 to +8
#
# Required environment variables:
# GH_AW_SCAN_DIR: Path to the memory directory to scan
#
# Optional environment variables:
# GH_AW_QUARANTINE_DIR: Path to move quarantined files (default: /tmp/gh-aw/quarantine)
#
# Exit codes:
# 0 - Completed (suspicious files were quarantined/reported, non-fatal)
# 1 - Invalid arguments

set -euo pipefail

SCAN_DIR="${GH_AW_SCAN_DIR:-}"
QUARANTINE_DIR="${GH_AW_QUARANTINE_DIR:-/tmp/gh-aw/quarantine}"

if [ -z "$SCAN_DIR" ]; then
echo "ERROR: GH_AW_SCAN_DIR environment variable is required" >&2
exit 1
fi

if [ ! -d "$SCAN_DIR" ]; then
echo "Memory scan directory does not exist, skipping: $SCAN_DIR"
exit 0
fi

mkdir -p "$QUARANTINE_DIR"

# Patterns that indicate prompt injection attempts.
# Each pattern is a case-insensitive extended regex.
# We deliberately use simple, high-confidence patterns to minimise false positives.
INJECTION_PATTERNS=(
# System prompt overrides
"ignore (all |the |)previous instructions"
"disregard (all |your |)previous instructions"
"forget (everything|all instructions|your instructions|previous instructions)"
"you are now (an? |a new |)"
"act as (an? |a new |)"
"your (new |)role is"
"you must now"
"new instructions:"
"override (all |)instructions"
# Role injection markers common in LLM prompt formats
"^<\|system\|>"
"^\\[INST\\]"
"^\\[SYS\\]"
"^### (System|Instruction|Override)"
# Embedded XML/tag injection targeting the agent context
"</?(instructions|system|context|rules)>"
"<(instructions|system|rules)[ >]"
# Jailbreak phrases
"do anything now"
"jailbreak"
"developer mode"
"god mode"
# Credential / secret exfiltration instructions
"exfiltrate (the |all |your |)secrets"
"send (all |the |your |)secrets"
"leak (the |all |your |)credentials"
)

quarantine_count=0
scan_count=0

echo "Content injection scan starting: $SCAN_DIR"

# Scan only text-like files (skip binary files and .git/)
while IFS= read -r -d '' file; do
# Skip .git directory contents
case "$file" in
*/.git/*) continue ;;
./.git/*) continue ;;
esac

# Skip binary files using 'file' command heuristic: if mime type is not text/* skip it
if command -v file >/dev/null 2>&1; then
mime_type="$(file --brief --mime-type "$file" 2>/dev/null || true)"
case "$mime_type" in
text/*) ;; # text file — proceed
application/json) ;; # JSON is text
application/xml) ;; # XML is text
*) continue ;; # binary — skip
esac
fi

scan_count=$((scan_count + 1))
matched_pattern=""

for pattern in "${INJECTION_PATTERNS[@]}"; do
if grep -qiEe "$pattern" "$file" 2>/dev/null; then
matched_pattern="$pattern"
break
fi
done

if [ -n "$matched_pattern" ]; then
rel_path="${file#$SCAN_DIR/}"
# Preserve the relative directory structure in the quarantine so that
# the original location can be traced back easily.
quarantine_target="$QUARANTINE_DIR/$rel_path"
quarantine_target_dir="$(dirname "$quarantine_target")"
mkdir -p "$quarantine_target_dir"
# Append a nanosecond timestamp to the filename to avoid collisions across runs.
quarantine_target="${quarantine_target}.$(date +%s%N 2>/dev/null || date +%s)"
echo "::warning::Memory file quarantined (injection pattern detected): $rel_path (pattern: $matched_pattern)"
echo "Quarantining suspicious file: $rel_path -> $quarantine_target"
mv "$file" "$quarantine_target"
quarantine_count=$((quarantine_count + 1))
fi
done < <(find "$SCAN_DIR" -not -path '*/.git/*' -type f -print0 2>/dev/null)

echo "Content injection scan complete: scanned=${scan_count} quarantined=${quarantine_count} dir=${SCAN_DIR}"
6 changes: 6 additions & 0 deletions actions/setup/sh/setup_cache_memory_git.sh
Original file line number Diff line number Diff line change
Expand Up @@ -167,3 +167,9 @@ if [ -n "${GH_AW_ALLOWED_EXTENSIONS:-}" ]; then
done < <(find . -not -path './.git/*' -type f -print0)
echo "Pre-agent sanitization complete: removed ${removed} file(s) with disallowed extensions"
fi

# 4. Scan remaining text files for prompt injection patterns (ASI-06).
# Any file whose content matches a known injection pattern is quarantined before
# the agent can read it, preventing Memory & Context Poisoning attacks.
GH_AW_SCAN_DIR="$CACHE_DIR" \
bash "${RUNNER_TEMP}/gh-aw/actions/sanitize_memory.sh"
31 changes: 31 additions & 0 deletions pkg/workflow/memory_sanitizer.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,31 @@
package workflow

import (
"fmt"
"strings"

"github.com/github/gh-aw/pkg/logger"
)

// memorySanitizerLog is the logger for the memory sanitizer module.
var memorySanitizerLog = logger.New("workflow:memory_sanitizer")

// sanitizeMemoryScriptName is the filename of the runtime memory sanitization script.
// The script scans memory directories for prompt injection patterns per ASI-06.
const sanitizeMemoryScriptName = "sanitize_memory.sh"

// generateRepoMemorySanitizationStep emits a workflow step that scans the
// repo-memory directory for prompt injection content after the clone step.
// This addresses OWASP Agentic Top 10 ASI-06 (Memory & Context Poisoning).
func generateRepoMemorySanitizationStep(builder *strings.Builder, memory RepoMemoryEntry, memoryDir string) {
memorySanitizerLog.Printf("Generating repo-memory content scan step for memory id=%s dir=%s", memory.ID, memoryDir)

if memory.Wiki {
fmt.Fprintf(builder, " - name: Scan wiki-memory for prompt injection (%s)\n", memory.ID)
} else {
fmt.Fprintf(builder, " - name: Scan repo-memory for prompt injection (%s)\n", memory.ID)
}
builder.WriteString(" env:\n")
fmt.Fprintf(builder, " GH_AW_SCAN_DIR: %s\n", memoryDir)
builder.WriteString(" run: bash \"${RUNNER_TEMP}/gh-aw/actions/sanitize_memory.sh\"\n")
}
181 changes: 181 additions & 0 deletions pkg/workflow/memory_sanitizer_test.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,181 @@
//go:build !integration

package workflow

import (
"os"
"path/filepath"
"strings"
"testing"

"github.com/stretchr/testify/assert"
"github.com/stretchr/testify/require"
)

// promptFileDir is the path from this test file to the actions/setup/md directory
// where runtime prompt files live.
const promptFileDir = "../../actions/setup/md"

// readPromptFile reads a prompt file from the actions/setup/md directory.
func readPromptFile(t *testing.T, filename string) string {
t.Helper()
path := filepath.Join(promptFileDir, filename)
content, err := os.ReadFile(path)
require.NoError(t, err, "Should be able to read prompt file %s", filename)
return string(content)
}

// TestGenerateRepoMemorySanitizationStep_DefaultMemory verifies that the
// sanitization step is generated for a standard (non-wiki) default memory.
func TestGenerateRepoMemorySanitizationStep_DefaultMemory(t *testing.T) {
var builder strings.Builder
memory := RepoMemoryEntry{
ID: "default",
BranchName: "memory/test",
Wiki: false,
}
memoryDir := "/tmp/gh-aw/repo-memory/default"

generateRepoMemorySanitizationStep(&builder, memory, memoryDir)

output := builder.String()
assert.Contains(t, output, "- name: Scan repo-memory for prompt injection (default)",
"Should emit a named scan step for default memory")
assert.Contains(t, output, "GH_AW_SCAN_DIR: /tmp/gh-aw/repo-memory/default",
"Should set GH_AW_SCAN_DIR to the memory directory")
assert.Contains(t, output, "sanitize_memory.sh",
"Should invoke sanitize_memory.sh")
}

// TestGenerateRepoMemorySanitizationStep_WikiMemory verifies that the step name
// reflects wiki memory.
func TestGenerateRepoMemorySanitizationStep_WikiMemory(t *testing.T) {
var builder strings.Builder
memory := RepoMemoryEntry{
ID: "docs",
Wiki: true,
}
memoryDir := "/tmp/gh-aw/repo-memory/docs"

generateRepoMemorySanitizationStep(&builder, memory, memoryDir)

output := builder.String()
assert.Contains(t, output, "- name: Scan wiki-memory for prompt injection (docs)",
"Should use wiki-memory prefix for wiki memories")
assert.Contains(t, output, "GH_AW_SCAN_DIR: /tmp/gh-aw/repo-memory/docs",
"Should set GH_AW_SCAN_DIR to the memory directory")
}

// TestGenerateRepoMemorySanitizationStep_NamedMemory verifies that non-default
// memory IDs are included in the step name.
func TestGenerateRepoMemorySanitizationStep_NamedMemory(t *testing.T) {
var builder strings.Builder
memory := RepoMemoryEntry{
ID: "research",
Wiki: false,
}
memoryDir := "/tmp/gh-aw/repo-memory/research"

generateRepoMemorySanitizationStep(&builder, memory, memoryDir)

output := builder.String()
assert.Contains(t, output, "- name: Scan repo-memory for prompt injection (research)",
"Should include memory ID in step name")
assert.Contains(t, output, "GH_AW_SCAN_DIR: /tmp/gh-aw/repo-memory/research",
"Should set GH_AW_SCAN_DIR to the named memory directory")
}

// TestSanitizeMemoryScriptNameConstant verifies the script name constant is correct.
func TestSanitizeMemoryScriptNameConstant(t *testing.T) {
assert.Equal(t, "sanitize_memory.sh", sanitizeMemoryScriptName,
"Script name constant should match the deployed script filename")
}

// TestRepoMemoryPromptHasSanitizedAttribute verifies that the repo-memory prompt
// boundary markers include the sanitized="true" attribute per ASI-06.
func TestRepoMemoryPromptHasSanitizedAttribute(t *testing.T) {
t.Run("single repo memory prompt file has sanitized attribute", func(t *testing.T) {
content := readPromptFile(t, repoMemoryPromptFile)
assert.Contains(t, content, `<repo-memory sanitized="true">`,
"repo_memory_prompt.md should have sanitized=\"true\" attribute on boundary marker (ASI-06)")
})

t.Run("multi repo memory prompt file has sanitized attribute", func(t *testing.T) {
content := readPromptFile(t, repoMemoryPromptMultiFile)
assert.Contains(t, content, `<repo-memory sanitized="true">`,
"repo_memory_prompt_multi.md should have sanitized=\"true\" attribute on boundary marker (ASI-06)")
})

t.Run("single default repo memory prompt section references correct file", func(t *testing.T) {
config := &RepoMemoryConfig{
Memories: []RepoMemoryEntry{
{
ID: "default",
BranchName: "memory/test",
},
},
}

section := buildRepoMemoryPromptSection(config)
require.NotNil(t, section, "Should return a prompt section")
assert.Equal(t, repoMemoryPromptFile, section.Content,
"Should reference the repo memory prompt file")
})

t.Run("multi repo memory prompt section references correct file", func(t *testing.T) {
config := &RepoMemoryConfig{
Memories: []RepoMemoryEntry{
{ID: "default", BranchName: "memory/test"},
{ID: "extra", BranchName: "memory/extra"},
},
}

section := buildRepoMemoryPromptSection(config)
require.NotNil(t, section, "Should return a prompt section")
assert.Equal(t, repoMemoryPromptMultiFile, section.Content,
"Should reference the multi repo memory prompt file")
})
}

// TestCacheMemoryPromptHasSanitizedAttribute verifies that the cache-memory prompt
// boundary markers include the sanitized="true" attribute per ASI-06.
func TestCacheMemoryPromptHasSanitizedAttribute(t *testing.T) {
t.Run("single cache memory prompt file has sanitized attribute", func(t *testing.T) {
content := readPromptFile(t, cacheMemoryPromptFile)
assert.Contains(t, content, `<cache-memory sanitized="true">`,
"cache_memory_prompt.md should have sanitized=\"true\" attribute on boundary marker (ASI-06)")
})

t.Run("multi cache memory prompt file has sanitized attribute", func(t *testing.T) {
content := readPromptFile(t, cacheMemoryPromptMultiFile)
assert.Contains(t, content, `<cache-memory sanitized="true">`,
"cache_memory_prompt_multi.md should have sanitized=\"true\" attribute on boundary marker (ASI-06)")
})

t.Run("single default cache memory prompt section references correct file", func(t *testing.T) {
config := &CacheMemoryConfig{
Caches: []CacheMemoryEntry{
{ID: "default"},
},
}

section := buildCacheMemoryPromptSection(config)
require.NotNil(t, section, "Should return a prompt section")
assert.Equal(t, cacheMemoryPromptFile, section.Content,
"Should reference the cache memory prompt file")
})

t.Run("multi cache memory prompt section references correct file", func(t *testing.T) {
config := &CacheMemoryConfig{
Caches: []CacheMemoryEntry{
{ID: "default"},
{ID: "session"},
},
}

section := buildCacheMemoryPromptSection(config)
require.NotNil(t, section, "Should return a prompt section")
assert.Equal(t, cacheMemoryPromptMultiFile, section.Content,
"Should reference the multi cache memory prompt file")
})
}
6 changes: 6 additions & 0 deletions pkg/workflow/repo_memory.go
Original file line number Diff line number Diff line change
Expand Up @@ -545,6 +545,12 @@ func generateRepoMemorySteps(builder *strings.Builder, data *WorkflowData) {
fmt.Fprintf(builder, " MEMORY_DIR: %s\n", memoryDir)
fmt.Fprintf(builder, " CREATE_ORPHAN: %t\n", memory.CreateOrphan)
builder.WriteString(" run: bash \"${RUNNER_TEMP}/gh-aw/actions/clone_repo_memory_branch.sh\"\n")

// Step 2: Scan the cloned memory for prompt injection (ASI-06).
// The sanitize_memory.sh script is also invoked directly by the clone script,
// but we emit an explicit step here so that the scan appears in the workflow
// summary and its output is auditable independently of the clone step.
generateRepoMemorySanitizationStep(builder, memory, memoryDir)
Comment on lines +548 to +553
}
}

Expand Down
Loading