Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
11 changes: 11 additions & 0 deletions app/helpers/searches_helper.rb
Original file line number Diff line number Diff line change
@@ -1,4 +1,15 @@
module SearchesHelper
# Use this method to ensure FTS5 search result output is safe to render,
# allowing only <mark> tags through.
#
# FTS5 highlight() and snippet() wrap matched terms in <mark> tags, but the
# surrounding content may contain unsanitized HTML from user input,
# particularly if it was indexed before explicit sanitization was added to
# the Searchable concern.
def sanitize_search_result(html)
sanitize(html, tags: %w[mark], attributes: [])
end

def highlight_searched_content(leaf, content, query)
if query.present?
terms = leaf.matches_for_highlight(query)
Expand Down
17 changes: 15 additions & 2 deletions app/models/leaf/searchable.rb
Original file line number Diff line number Diff line change
Expand Up @@ -58,15 +58,28 @@ def searchable?
searchable_content
end

# Strip tags from content before indexing to keep the FTS table clean.
# This is a hygiene measure, not a security boundary — display-time
# sanitization in sanitize_search_result is the primary defense.
# ActionText content (Pages) is already HTML-safe plain text via
# ERB::Util.html_escape, so skip it to avoid double-encoding.
def sanitize_for_index(text)
if text.html_safe?
text
else
Rails::Html::FullSanitizer.new.sanitize(text)
end
end

def create_in_search_index
execute_sql_with_binds "insert into leaf_search_index(rowid, title, content ) values (?, ?, ?)",
id, title, searchable_content
id, sanitize_for_index(title), sanitize_for_index(searchable_content)
end

def update_in_search_index
transaction do
updated = execute_sql_with_binds "update leaf_search_index set title = ?, content = ? where rowid = ?",
title, searchable_content, id
sanitize_for_index(title), sanitize_for_index(searchable_content), id

create_in_search_index unless updated
end
Expand Down
7 changes: 6 additions & 1 deletion app/models/page.rb
Original file line number Diff line number Diff line change
Expand Up @@ -9,7 +9,12 @@ class Page < ApplicationRecord
has_markdown :body

def searchable_content
plain_text
# `to_plain_text` decodes HTML entities, so characters like `<` that were `&lt;`
# in the original HTML become literal `<`. Re-encode them with `html_escape` so
# they are safe in the FTS index. The `html_safe` return value signals to
# `Leaf::Searchable#sanitize_for_index` that this content should not be
# double-encoded.
ERB::Util.html_escape(plain_text)
end

def html_preview
Expand Down
4 changes: 2 additions & 2 deletions app/views/books/searches/_result.html.erb
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
<%= link_to edit_leafable_path(leaf, search: params[:search]), class: "search__result hide_from_reading_mode txt-ink", data: { turbo_frame: "_top" } do %>
<strong><%= leaf.title_match.html_safe %>:</strong> <%= leaf.content_match.html_safe %>
<strong><%= sanitize_search_result(leaf.title_match) %>:</strong> <%= sanitize_search_result(leaf.content_match) %>
<% end %>
<%= link_to leafable_slug_path(leaf, search: params[:search]), class: "search__result hide_from_edit_mode txt-ink", data: { turbo_frame: "_top" } do %>
<strong><%= leaf.title_match.html_safe %>:</strong> <%= leaf.content_match.html_safe %>
<strong><%= sanitize_search_result(leaf.title_match) %>:</strong> <%= sanitize_search_result(leaf.content_match) %>
<% end %>
192 changes: 192 additions & 0 deletions test/controllers/books/searches_controller_test.rb
Original file line number Diff line number Diff line change
Expand Up @@ -49,4 +49,196 @@ class Books::SearchesControllerTest < ActionDispatch::IntegrationTest
assert_response :success
assert_select "p", text: /no matches/i
end

test "search results strip dangerous tags from section body" do
section = Section.new(body: 'findme <img src=x onerror="alert(1)">')
books(:handbook).press(section, title: "Safe Title")
section.leaf.reindex

post book_search_url(books(:handbook)), params: { search: "findme" }

assert_response :success
assert_select "a.search__result", count: 2 do |results|
results.each do |result|
assert_pattern {
result => {
elements: [
{ name: "strong", content: "Safe Title:" },
{ name: "mark", content: "findme" }
]
}
}
end
end
end

test "search results strip dangerous tags from section title" do
section = Section.new(body: "findme content")
books(:handbook).press(section, title: 'findme <img src=x onerror="alert(1)">')
section.leaf.reindex

post book_search_url(books(:handbook)), params: { search: "findme" }

assert_response :success
assert_select "a.search__result", count: 2 do |results|
results.each do |result|
assert_pattern {
result => {
elements: [
{ name: "strong", elements: [{ name: "mark", content: "findme" }] },
{ name: "mark", content: "findme" }
]
}
}
end
end
end

test "search results strip dangerous tags from page body" do
pages(:welcome).update! body: 'findme <b>bold</b>'
leaves(:welcome_page).reindex

post book_search_url(books(:handbook)), params: { search: "findme" }

assert_response :success
assert_select "a.search__result", count: 2 do |results|
results.each do |result|
assert_pattern {
result => {
elements: [
{ name: "strong", content: /Handbook/ },
{ name: "mark", content: "findme" }
]
}
}
end
end
end

test "search results strip dangerous tags from page title" do
leaf = leaves(:welcome_page)
leaf.update! title: 'findme <b>bold</b>'
leaf.reindex

post book_search_url(books(:handbook)), params: { search: "findme" }

assert_response :success
assert_select "a.search__result", count: 2 do |results|
results.each do |result|
assert_pattern {
result => {
elements: [
{ name: "strong", elements: [{ name: "mark", content: "findme" }], content: "findme bold:" }
]
}
}
end
end
end

test "search results encode entities in section body" do
section = Section.new(body: "findme Tom & Jerry")
books(:handbook).press(section, title: "Safe Title")
section.leaf.reindex

post book_search_url(books(:handbook)), params: { search: "findme" }

assert_response :success
assert_includes response.body, "Tom &amp; Jerry"
end

test "search results encode entities in section title" do
section = Section.new(body: "findme content")
books(:handbook).press(section, title: "findme Tom & Jerry")
section.leaf.reindex

post book_search_url(books(:handbook)), params: { search: "findme" }

assert_response :success
assert_includes response.body, "Tom &amp; Jerry"
end

test "search results encode entities in page body" do
pages(:welcome).update! body: "findme Tom & Jerry"
leaves(:welcome_page).reindex

post book_search_url(books(:handbook)), params: { search: "findme" }

assert_response :success
assert_includes response.body, "Tom &amp; Jerry"
end

test "search results encode entities in page title" do
leaf = leaves(:welcome_page)
leaf.update! title: "findme Tom & Jerry"
leaf.reindex

post book_search_url(books(:handbook)), params: { search: "findme" }

assert_response :success
assert_includes response.body, "Tom &amp; Jerry"
end

test "search results sanitize pre-existing poisoned body in the index" do
section = Section.new(body: "findme safe content")
books(:handbook).press(section, title: "Safe Title")

Leaf.connection.execute(
Leaf.sanitize_sql([
"update leaf_search_index set content = ? where rowid = ?",
'findme <img src=x onerror="alert(1)"> Tom & Jerry', section.leaf.id
])
)

post book_search_url(books(:handbook)), params: { search: "findme" }

assert_response :success
assert_select "a.search__result", count: 2 do |results|
results.each do |result|
assert_pattern {
result => {
elements: [
{ name: "strong", content: "Safe Title:" },
{ name: "mark", content: "findme" }
]
}
}
end
end
assert_includes response.body, "Tom &amp; Jerry"
end

test "search results sanitize pre-existing poisoned title in the index" do
section = Section.new(body: "findme safe content")
books(:handbook).press(section, title: "Safe Title")

Leaf.connection.execute(
Leaf.sanitize_sql([
"update leaf_search_index set title = ? where rowid = ?",
'findme <mark onclick="alert(1)">fake</mark> <img src=x onerror="alert(1)">', section.leaf.id
])
)

post book_search_url(books(:handbook)), params: { search: "findme" }

assert_response :success
assert_select "a.search__result", count: 2 do |results|
results.each do |result|
assert_pattern {
result => {
elements: [
{
name: "strong",
elements: [
{ name: "mark", content: "findme", attributes: [] },
{ name: "mark", content: "fake", attributes: [] }
]
},
{ name: "mark", content: "findme" }
]
}
}
end
end
end
end
19 changes: 19 additions & 0 deletions test/helpers/searches_helper_test.rb
Original file line number Diff line number Diff line change
@@ -0,0 +1,19 @@
require "test_helper"

class SearchesHelperTest < ActionView::TestCase
test "sanitize_search_result preserves mark tags" do
assert_equal "<mark>findme</mark> text", sanitize_search_result("<mark>findme</mark> text")
end

test "sanitize_search_result strips non-mark tags" do
assert_equal "findme bold", sanitize_search_result("<b>findme</b> <b>bold</b>")
end

test "sanitize_search_result encodes entities" do
assert_equal "Tom &amp; Jerry", sanitize_search_result("Tom & Jerry")
end

test "sanitize_search_result strips attributes from mark tags" do
assert_equal "<mark>findme</mark> text", sanitize_search_result('<mark class="hidden">findme</mark> text')
end
end
44 changes: 44 additions & 0 deletions test/models/leaf/searchable_test.rb
Original file line number Diff line number Diff line change
Expand Up @@ -37,4 +37,48 @@ class Leaf::SearchableTest < ActiveSupport::TestCase
markup = leaves(:welcome_page).matches_for_highlight("haggis")
assert_empty markup
end

test "indexing sanitizes section body" do
section = Section.new(body: 'findme Tom & Jerry <img src=x onerror="alert(1)">')
books(:handbook).press(section, title: "Safe Title")
section.leaf.reindex

leaves = Leaf.search("findme")
assert_equal "<mark>findme</mark> Tom &amp; Jerry ", leaves.first.content_match
end

test "indexing sanitizes section title" do
section = Section.new(body: "findme content")
books(:handbook).press(section, title: 'findme Tom & Jerry <img src=x onerror="alert(1)">')
section.leaf.reindex

leaves = Leaf.search("findme")
assert_equal "<mark>findme</mark> Tom &amp; Jerry ", leaves.first.title_match
end

test "indexing sanitizes page body" do
pages(:welcome).update! body: "findme Tom & Jerry <b>bold</b>"

leaves = Leaf.search("findme")
assert_equal "<mark>findme</mark> Tom &amp; Jerry bold", leaves.first.content_match
end


test "indexing sanitizes page title" do
leaf = leaves(:welcome_page)
leaf.update! title: 'findme Tom & Jerry <b>bold</b>'
leaf.reindex

leaves = Leaf.search("findme")
assert_equal "<mark>findme</mark> Tom &amp; Jerry bold", leaves.first.title_match
end

test "indexing strips injected mark tags from title" do
section = Section.new(body: "findme content")
books(:handbook).press(section, title: 'findme <mark>fake highlight</mark>')
section.leaf.reindex

leaves = Leaf.search("findme")
assert_equal "<mark>findme</mark> fake highlight", leaves.first.title_match
end
end
7 changes: 7 additions & 0 deletions test/models/page_test.rb
Original file line number Diff line number Diff line change
Expand Up @@ -19,4 +19,11 @@ class PageTest < ActiveSupport::TestCase

assert_equal "", page.markable
end

test "searchable_content re-encodes HTML entities decoded by to_plain_text" do
page = Page.new(body: "5 < 10 & 10 > 5")

assert_includes page.searchable_content, "5 &lt; 10 &amp; 10 &gt; 5"
assert page.searchable_content.html_safe?
end
end
Loading