Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion marker/schema/blocks/listitem.py
Original file line number Diff line number Diff line change
Expand Up @@ -13,7 +13,7 @@ def replace_bullets(child_blocks):

if first_block is not None and first_block.id.block_type == BlockTypes.Line:
bullet_pattern = r"(^|[\n ]|<[^>]*>)[•●○ഠ ം◦■▪▫–—-]( )"
first_block.html = re.sub(bullet_pattern, r"\1\2", first_block.html)
first_block.html = re.sub(bullet_pattern, r"\1\2", first_block.html, count=1)


class ListItem(Block):
Expand Down
18 changes: 18 additions & 0 deletions tests/schema/blocks/test_listitem.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,18 @@
from types import SimpleNamespace

from marker.schema import BlockTypes
from marker.schema.blocks.listitem import replace_bullets


def test_replace_bullets_keeps_inner_dashes():
# Regression for #1024: removing the bullet must not drop a dash inside the text.
line = SimpleNamespace(
children=[],
id=SimpleNamespace(block_type=BlockTypes.Line),
html="• He paused — then left.",
)

replace_bullets([line])

assert "•" not in line.html
assert line.html.strip() == "He paused — then left."
Loading