github · Copilot · Apr 27, 2026 · Apr 27, 2026 · Apr 27, 2026 · Apr 27, 2026
diff --git a/actions/setup/md/cache_memory_prompt.md b/actions/setup/md/cache_memory_prompt.md
@@ -1,4 +1,4 @@
-<cache-memory>
+<cache-memory sanitized="true">
 <path>__GH_AW_CACHE_DIR__</path>__GH_AW_CACHE_DESCRIPTION__
 <properties>Persistent read/write storage across workflow runs via Actions cache. Last write wins.</properties>__GH_AW_ALLOWED_EXTENSIONS__
 <cache-miss-guidance>If you look for data in the cache and do not find any, call the `missing_data` tool with `data_type: "cache_memory"` and `reason: "cache_memory_miss"` to signal that the cache does not contain the expected information.</cache-miss-guidance>

diff --git a/actions/setup/md/cache_memory_prompt_multi.md b/actions/setup/md/cache_memory_prompt_multi.md
@@ -1,4 +1,4 @@
-<cache-memory>
+<cache-memory sanitized="true">
 __GH_AW_CACHE_LIST__
 <properties>Persistent read/write storage across workflow runs via Actions cache. Last write wins.</properties>__GH_AW_ALLOWED_EXTENSIONS__
 <examples>

diff --git a/actions/setup/md/repo_memory_prompt.md b/actions/setup/md/repo_memory_prompt.md
@@ -1,4 +1,4 @@
-<repo-memory>
+<repo-memory sanitized="true">
 ## Repo Memory Available
 
 You have access to a persistent repo memory folder at `__GH_AW_MEMORY_DIR__` where you can read and write files that are stored in a git branch.__GH_AW_MEMORY_DESCRIPTION____GH_AW_WIKI_NOTE__

diff --git a/actions/setup/md/repo_memory_prompt_multi.md b/actions/setup/md/repo_memory_prompt_multi.md
@@ -1,4 +1,4 @@
-<repo-memory>
+<repo-memory sanitized="true">
 ## Repo Memory Locations Available
 
 You have access to persistent repo memory folders where you can read and write files that are stored in git branches:

diff --git a/actions/setup/sh/clone_repo_memory_branch.sh b/actions/setup/sh/clone_repo_memory_branch.sh
@@ -81,3 +81,8 @@ fi
 # Ensure memory directory exists
 mkdir -p "$MEMORY_DIR"
 echo "Repo memory directory ready at $MEMORY_DIR"
+
+# Scan cloned files for prompt injection patterns (ASI-06: Memory & Context Poisoning).
+# This runs after the clone so that any injected content is caught before the agent sees it.
+GH_AW_SCAN_DIR="$MEMORY_DIR" \
+  bash "${RUNNER_TEMP}/gh-aw/actions/sanitize_memory.sh"
diff --git a/actions/setup/sh/sanitize_memory.sh b/actions/setup/sh/sanitize_memory.sh
@@ -0,0 +1,120 @@
+#!/usr/bin/env bash
+
+# sanitize_memory.sh
+# Pre-agent content scanning for prompt injection in memory files.
+#
+# This script scans text files in a memory directory for known prompt injection
+# patterns (system prompt overrides, role-play injections, instruction-ignoring
+# directives) per OWASP Agentic Top 10 — ASI-06 (Memory & Context Poisoning).
+#
+# Required environment variables:
+#   GH_AW_SCAN_DIR:  Path to the memory directory to scan
+#
+# Optional environment variables:
+#   GH_AW_QUARANTINE_DIR:  Path to move quarantined files (default: /tmp/gh-aw/quarantine)
+#
+# Exit codes:
+#   0 - Completed (suspicious files were quarantined/reported, non-fatal)
+#   1 - Invalid arguments
+
+set -euo pipefail
+
+SCAN_DIR="${GH_AW_SCAN_DIR:-}"
+QUARANTINE_DIR="${GH_AW_QUARANTINE_DIR:-/tmp/gh-aw/quarantine}"
+
+if [ -z "$SCAN_DIR" ]; then
+  echo "ERROR: GH_AW_SCAN_DIR environment variable is required" >&2
+  exit 1
+fi
+
+if [ ! -d "$SCAN_DIR" ]; then
+  echo "Memory scan directory does not exist, skipping: $SCAN_DIR"
+  exit 0
+fi
+
+mkdir -p "$QUARANTINE_DIR"
+
+# Patterns that indicate prompt injection attempts.
+# Each pattern is a case-insensitive extended regex.
+# We deliberately use simple, high-confidence patterns to minimise false positives.
+INJECTION_PATTERNS=(
+  # System prompt overrides
+  "ignore (all |the |)previous instructions"
+  "disregard (all |your |)previous instructions"
+  "forget (everything|all instructions|your instructions|previous instructions)"
+  "you are now (an? |a new |)"
+  "act as (an? |a new |)"
+  "your (new |)role is"
+  "you must now"
+  "new instructions:"
+  "override (all |)instructions"
+  # Role injection markers common in LLM prompt formats
+  "^<\|system\|>"
+  "^\\[INST\\]"
+  "^\\[SYS\\]"
+  "^### (System|Instruction|Override)"
+  # Embedded XML/tag injection targeting the agent context
+  "</?(instructions|system|context|rules)>"
+  "<(instructions|system|rules)[ >]"
+  # Jailbreak phrases
+  "do anything now"
+  "jailbreak"
+  "developer mode"
+  "god mode"
+  # Credential / secret exfiltration instructions
+  "exfiltrate (the |all |your |)secrets"
+  "send (all |the |your |)secrets"
+  "leak (the |all |your |)credentials"
+)
+
+quarantine_count=0
+scan_count=0
+
+echo "Content injection scan starting: $SCAN_DIR"
+
+# Scan only text-like files (skip binary files and .git/)
+while IFS= read -r -d '' file; do
+  # Skip .git directory contents
+  case "$file" in
+    */.git/*) continue ;;
+    ./.git/*) continue ;;
+  esac
+
+  # Skip binary files using 'file' command heuristic: if mime type is not text/* skip it
+  if command -v file >/dev/null 2>&1; then
+    mime_type="$(file --brief --mime-type "$file" 2>/dev/null || true)"
+    case "$mime_type" in
+      text/*) ;;            # text file — proceed
+      application/json) ;;  # JSON is text
+      application/xml) ;;   # XML is text
+      *) continue ;;        # binary — skip
+    esac
+  fi
+
+  scan_count=$((scan_count + 1))
+  matched_pattern=""
+
+  for pattern in "${INJECTION_PATTERNS[@]}"; do
+    if grep -qiEe "$pattern" "$file" 2>/dev/null; then
+      matched_pattern="$pattern"
+      break
+    fi
+  done
+
+  if [ -n "$matched_pattern" ]; then
+    rel_path="${file#$SCAN_DIR/}"
+    # Preserve the relative directory structure in the quarantine so that
+    # the original location can be traced back easily.
+    quarantine_target="$QUARANTINE_DIR/$rel_path"
+    quarantine_target_dir="$(dirname "$quarantine_target")"
+    mkdir -p "$quarantine_target_dir"
+    # Append a nanosecond timestamp to the filename to avoid collisions across runs.
+    quarantine_target="${quarantine_target}.$(date +%s%N 2>/dev/null || date +%s)"
+    echo "::warning::Memory file quarantined (injection pattern detected): $rel_path (pattern: $matched_pattern)"
+    echo "Quarantining suspicious file: $rel_path -> $quarantine_target"
+    mv "$file" "$quarantine_target"
+    quarantine_count=$((quarantine_count + 1))
+  fi
+done < <(find "$SCAN_DIR" -not -path '*/.git/*' -type f -print0 2>/dev/null)
+
+echo "Content injection scan complete: scanned=${scan_count} quarantined=${quarantine_count} dir=${SCAN_DIR}"
diff --git a/actions/setup/sh/setup_cache_memory_git.sh b/actions/setup/sh/setup_cache_memory_git.sh
@@ -167,3 +167,9 @@ if [ -n "${GH_AW_ALLOWED_EXTENSIONS:-}" ]; then
   done < <(find . -not -path './.git/*' -type f -print0)
   echo "Pre-agent sanitization complete: removed ${removed} file(s) with disallowed extensions"
 fi
+
+# 4. Scan remaining text files for prompt injection patterns (ASI-06).
+# Any file whose content matches a known injection pattern is quarantined before
+# the agent can read it, preventing Memory & Context Poisoning attacks.
+GH_AW_SCAN_DIR="$CACHE_DIR" \
+  bash "${RUNNER_TEMP}/gh-aw/actions/sanitize_memory.sh"
diff --git a/pkg/workflow/memory_sanitizer.go b/pkg/workflow/memory_sanitizer.go
@@ -0,0 +1,31 @@
+package workflow
+
+import (
+	"fmt"
+	"strings"
+
+	"github.com/github/gh-aw/pkg/logger"
+)
+
+// memorySanitizerLog is the logger for the memory sanitizer module.
+var memorySanitizerLog = logger.New("workflow:memory_sanitizer")
+
+// sanitizeMemoryScriptName is the filename of the runtime memory sanitization script.
+// The script scans memory directories for prompt injection patterns per ASI-06.
+const sanitizeMemoryScriptName = "sanitize_memory.sh"
+
+// generateRepoMemorySanitizationStep emits a workflow step that scans the
+// repo-memory directory for prompt injection content after the clone step.
+// This addresses OWASP Agentic Top 10 ASI-06 (Memory & Context Poisoning).
+func generateRepoMemorySanitizationStep(builder *strings.Builder, memory RepoMemoryEntry, memoryDir string) {
+	memorySanitizerLog.Printf("Generating repo-memory content scan step for memory id=%s dir=%s", memory.ID, memoryDir)
+
+	if memory.Wiki {
+		fmt.Fprintf(builder, "      - name: Scan wiki-memory for prompt injection (%s)\n", memory.ID)
+	} else {
+		fmt.Fprintf(builder, "      - name: Scan repo-memory for prompt injection (%s)\n", memory.ID)
+	}
+	builder.WriteString("        env:\n")
+	fmt.Fprintf(builder, "          GH_AW_SCAN_DIR: %s\n", memoryDir)
+	builder.WriteString("        run: bash \"${RUNNER_TEMP}/gh-aw/actions/sanitize_memory.sh\"\n")
+}
diff --git a/pkg/workflow/memory_sanitizer_test.go b/pkg/workflow/memory_sanitizer_test.go
@@ -0,0 +1,181 @@
+//go:build !integration
+
+package workflow
+
+import (
+	"os"
+	"path/filepath"
+	"strings"
+	"testing"
+
+	"github.com/stretchr/testify/assert"
+	"github.com/stretchr/testify/require"
+)
+
+// promptFileDir is the path from this test file to the actions/setup/md directory
+// where runtime prompt files live.
+const promptFileDir = "../../actions/setup/md"
+
+// readPromptFile reads a prompt file from the actions/setup/md directory.
+func readPromptFile(t *testing.T, filename string) string {
+	t.Helper()
+	path := filepath.Join(promptFileDir, filename)
+	content, err := os.ReadFile(path)
+	require.NoError(t, err, "Should be able to read prompt file %s", filename)
+	return string(content)
+}
+
+// TestGenerateRepoMemorySanitizationStep_DefaultMemory verifies that the
+// sanitization step is generated for a standard (non-wiki) default memory.
+func TestGenerateRepoMemorySanitizationStep_DefaultMemory(t *testing.T) {
+	var builder strings.Builder
+	memory := RepoMemoryEntry{
+		ID:         "default",
+		BranchName: "memory/test",
+		Wiki:       false,
+	}
+	memoryDir := "/tmp/gh-aw/repo-memory/default"
+
+	generateRepoMemorySanitizationStep(&builder, memory, memoryDir)
+
+	output := builder.String()
+	assert.Contains(t, output, "- name: Scan repo-memory for prompt injection (default)",
+		"Should emit a named scan step for default memory")
+	assert.Contains(t, output, "GH_AW_SCAN_DIR: /tmp/gh-aw/repo-memory/default",
+		"Should set GH_AW_SCAN_DIR to the memory directory")
+	assert.Contains(t, output, "sanitize_memory.sh",
+		"Should invoke sanitize_memory.sh")
+}
+
+// TestGenerateRepoMemorySanitizationStep_WikiMemory verifies that the step name
+// reflects wiki memory.
+func TestGenerateRepoMemorySanitizationStep_WikiMemory(t *testing.T) {
+	var builder strings.Builder
+	memory := RepoMemoryEntry{
+		ID:   "docs",
+		Wiki: true,
+	}
+	memoryDir := "/tmp/gh-aw/repo-memory/docs"
+
+	generateRepoMemorySanitizationStep(&builder, memory, memoryDir)
+
+	output := builder.String()
+	assert.Contains(t, output, "- name: Scan wiki-memory for prompt injection (docs)",
+		"Should use wiki-memory prefix for wiki memories")
+	assert.Contains(t, output, "GH_AW_SCAN_DIR: /tmp/gh-aw/repo-memory/docs",
+		"Should set GH_AW_SCAN_DIR to the memory directory")
+}
+
+// TestGenerateRepoMemorySanitizationStep_NamedMemory verifies that non-default
+// memory IDs are included in the step name.
+func TestGenerateRepoMemorySanitizationStep_NamedMemory(t *testing.T) {
+	var builder strings.Builder
+	memory := RepoMemoryEntry{
+		ID:   "research",
+		Wiki: false,
+	}
+	memoryDir := "/tmp/gh-aw/repo-memory/research"
+
+	generateRepoMemorySanitizationStep(&builder, memory, memoryDir)
+
+	output := builder.String()
+	assert.Contains(t, output, "- name: Scan repo-memory for prompt injection (research)",
+		"Should include memory ID in step name")
+	assert.Contains(t, output, "GH_AW_SCAN_DIR: /tmp/gh-aw/repo-memory/research",
+		"Should set GH_AW_SCAN_DIR to the named memory directory")
+}
+
+// TestSanitizeMemoryScriptNameConstant verifies the script name constant is correct.
+func TestSanitizeMemoryScriptNameConstant(t *testing.T) {
+	assert.Equal(t, "sanitize_memory.sh", sanitizeMemoryScriptName,
+		"Script name constant should match the deployed script filename")
+}
+
+// TestRepoMemoryPromptHasSanitizedAttribute verifies that the repo-memory prompt
+// boundary markers include the sanitized="true" attribute per ASI-06.
+func TestRepoMemoryPromptHasSanitizedAttribute(t *testing.T) {
+	t.Run("single repo memory prompt file has sanitized attribute", func(t *testing.T) {
+		content := readPromptFile(t, repoMemoryPromptFile)
+		assert.Contains(t, content, `<repo-memory sanitized="true">`,
+			"repo_memory_prompt.md should have sanitized=\"true\" attribute on boundary marker (ASI-06)")
+	})
+
+	t.Run("multi repo memory prompt file has sanitized attribute", func(t *testing.T) {
+		content := readPromptFile(t, repoMemoryPromptMultiFile)
+		assert.Contains(t, content, `<repo-memory sanitized="true">`,
+			"repo_memory_prompt_multi.md should have sanitized=\"true\" attribute on boundary marker (ASI-06)")
+	})
+
+	t.Run("single default repo memory prompt section references correct file", func(t *testing.T) {
+		config := &RepoMemoryConfig{
+			Memories: []RepoMemoryEntry{
+				{
+					ID:         "default",
+					BranchName: "memory/test",
+				},
+			},
+		}
+
+		section := buildRepoMemoryPromptSection(config)
+		require.NotNil(t, section, "Should return a prompt section")
+		assert.Equal(t, repoMemoryPromptFile, section.Content,
+			"Should reference the repo memory prompt file")
+	})
+
+	t.Run("multi repo memory prompt section references correct file", func(t *testing.T) {
+		config := &RepoMemoryConfig{
+			Memories: []RepoMemoryEntry{
+				{ID: "default", BranchName: "memory/test"},
+				{ID: "extra", BranchName: "memory/extra"},
+			},
+		}
+
+		section := buildRepoMemoryPromptSection(config)
+		require.NotNil(t, section, "Should return a prompt section")
+		assert.Equal(t, repoMemoryPromptMultiFile, section.Content,
+			"Should reference the multi repo memory prompt file")
+	})
+}
+
+// TestCacheMemoryPromptHasSanitizedAttribute verifies that the cache-memory prompt
+// boundary markers include the sanitized="true" attribute per ASI-06.
+func TestCacheMemoryPromptHasSanitizedAttribute(t *testing.T) {
+	t.Run("single cache memory prompt file has sanitized attribute", func(t *testing.T) {
+		content := readPromptFile(t, cacheMemoryPromptFile)
+		assert.Contains(t, content, `<cache-memory sanitized="true">`,
+			"cache_memory_prompt.md should have sanitized=\"true\" attribute on boundary marker (ASI-06)")
+	})
+
+	t.Run("multi cache memory prompt file has sanitized attribute", func(t *testing.T) {
+		content := readPromptFile(t, cacheMemoryPromptMultiFile)
+		assert.Contains(t, content, `<cache-memory sanitized="true">`,
+			"cache_memory_prompt_multi.md should have sanitized=\"true\" attribute on boundary marker (ASI-06)")
+	})
+
+	t.Run("single default cache memory prompt section references correct file", func(t *testing.T) {
+		config := &CacheMemoryConfig{
+			Caches: []CacheMemoryEntry{
+				{ID: "default"},
+			},
+		}
+
+		section := buildCacheMemoryPromptSection(config)
+		require.NotNil(t, section, "Should return a prompt section")
+		assert.Equal(t, cacheMemoryPromptFile, section.Content,
+			"Should reference the cache memory prompt file")
+	})
+
+	t.Run("multi cache memory prompt section references correct file", func(t *testing.T) {
+		config := &CacheMemoryConfig{
+			Caches: []CacheMemoryEntry{
+				{ID: "default"},
+				{ID: "session"},
+			},
+		}
+
+		section := buildCacheMemoryPromptSection(config)
+		require.NotNil(t, section, "Should return a prompt section")
+		assert.Equal(t, cacheMemoryPromptMultiFile, section.Content,
+			"Should reference the multi cache memory prompt file")
+	})
+}
diff --git a/pkg/workflow/repo_memory.go b/pkg/workflow/repo_memory.go
@@ -545,6 +545,12 @@ func generateRepoMemorySteps(builder *strings.Builder, data *WorkflowData) {
 		fmt.Fprintf(builder, "          MEMORY_DIR: %s\n", memoryDir)
 		fmt.Fprintf(builder, "          CREATE_ORPHAN: %t\n", memory.CreateOrphan)
 		builder.WriteString("        run: bash \"${RUNNER_TEMP}/gh-aw/actions/clone_repo_memory_branch.sh\"\n")
+
+		// Step 2: Scan the cloned memory for prompt injection (ASI-06).
+		// The sanitize_memory.sh script is also invoked directly by the clone script,
+		// but we emit an explicit step here so that the scan appears in the workflow
+		// summary and its output is auditable independently of the clone step.
+		generateRepoMemorySanitizationStep(builder, memory, memoryDir)
 	}
 }