Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
113 changes: 64 additions & 49 deletions qubespdfconverter/server.py
Original file line number Diff line number Diff line change
Expand Up @@ -90,6 +90,62 @@ def recv_b():
return untrusted_data


class PdfRenderer:
"""Render PDF pages into image representations."""

def __init__(self, path, password=b"", resolution=RESOLUTION):
self.path = path
self.password = password
self.resolution = str(resolution)


def _password_args(self):
if not self.password:
return []
password = self.password.decode()
return ["-opw", password, "-upw", password]


def page_count(self):
"""Return the number of pages in the PDF."""
cmd = ["pdfinfo"] + self._password_args() + [str(self.path)]
output = subprocess.run(cmd, capture_output=True, check=True)
pages = 0

for line in output.stdout.decode().splitlines():
if "Pages:" in line:
pages = int(line.split(":")[1])

return pages


async def create_page_image(self, page, output):
"""Render one PDF page into an image."""
cmd = ["pdftocairo"] + self._password_args()
cmd += [
str(self.path),
"-png",
"-r",
self.resolution,
"-f",
str(page),
"-l",
str(page),
"-singlefile",
str(Path(output.parent, output.stem))
]

proc = await asyncio.create_subprocess_exec(*cmd)
await wait_proc(proc, cmd)


async def render_page(self, page, prefix):
"""Create an intermediate page representation."""
rep = Representation(prefix, "png", "rgb")
await self.create_page_image(page, rep.initial)
return rep


class Representation:
"""Umbrella object for a file's initial and final representations

Expand All @@ -103,21 +159,19 @@ class Representation:
from the final representation upon conversion. Generally, this makes the
final representation a relatively simple format (e.g., RGB bitmap).

:param path: Path to original, unsanitized file
:param prefix: Path prefix for representations
:param f_suffix: File extension of initial representation (without .)
:param i_suffix: File extension of final representation (without .)
"""

def __init__(self, path, prefix, i_suffix, f_suffix):
self.path = path
def __init__(self, prefix, i_suffix, f_suffix):
self.page = prefix.name
self.initial = prefix.with_suffix(f".{i_suffix}")
self.final = prefix.with_suffix(f".{f_suffix}")
self.dim = None


async def convert(self, password=b""):
async def convert(self):
"""Convert initial representation to final representation"""
cmd = [
"gm",
Expand All @@ -128,7 +182,6 @@ async def convert(self, password=b""):
f"rgb:{self.final}"
]

await self.create_irep(password)
self.dim = await self._dim()

proc = await asyncio.create_subprocess_exec(*cmd)
Expand All @@ -142,30 +195,6 @@ async def convert(self, password=b""):
)


async def create_irep(self, password=b""):
"""Create initial representation"""
cmd = ["pdftocairo"]

if password:
cmd += ["-opw", password.decode(), "-upw", password.decode()]

cmd += [
str(self.path),
"-png",
"-r",
args.resolution,
"-f",
str(self.page),
"-l",
str(self.page),
"-singlefile",
str(Path(self.initial.parent, self.initial.stem))
]

proc = await asyncio.create_subprocess_exec(*cmd)
await wait_proc(proc, cmd)


async def _dim(self):
"""Identify image dimensions of initial representation"""
cmd = ["gm", "identify", "-format", "%w %h", str(self.initial)]
Expand Down Expand Up @@ -194,6 +223,7 @@ class BaseFile:
def __init__(self, path, password=b""):
self.path = path
self.password = password
self.renderer = PdfRenderer(path, password, args.resolution)
self.pagenums = 0
self.batch = None

Expand Down Expand Up @@ -223,32 +253,17 @@ async def sanitize(self):

def _pagenums(self):
"""Return the number of pages in the suspect file"""
cmd = ["pdfinfo"]

if self.password:
cmd += ["-opw", self.password.decode(), "-upw", self.password.decode()]

cmd.append(str(self.path))
output = subprocess.run(cmd, capture_output=True, check=True)
pages = 0

for line in output.stdout.decode().splitlines():
if "Pages:" in line:
pages = int(line.split(":")[1])

return pages
return self.renderer.page_count()


async def _publish(self):
"""Extract initial representations and enqueue conversion tasks"""
for page in range(1, self.pagenums + 1):
rep = Representation(
self.path,
Path(self.path.parent, str(page)),
"png",
"rgb"
rep = await self.renderer.render_page(
page,
Path(self.path.parent, str(page))
)
task = asyncio.create_task(rep.convert(self.password))
task = asyncio.create_task(rep.convert())
batch_e = BatchEntry(task, rep)
await self.batch.join()

Expand Down
14 changes: 7 additions & 7 deletions qubespdfconverter/tests/test_password.py
Original file line number Diff line number Diff line change
Expand Up @@ -11,7 +11,7 @@
from pathlib import Path
from unittest import mock

from qubespdfconverter.server import BaseFile, Representation
from qubespdfconverter.server import BaseFile, PdfRenderer


class TC_ServerPassword(unittest.IsolatedAsyncioTestCase):
Expand Down Expand Up @@ -48,12 +48,12 @@ def test_pagenums_omits_password_flags_when_empty(self):
self.assertNotIn("-opw", cmd)
self.assertNotIn("-upw", cmd)

async def test_create_irep_includes_password_flags(self):
async def test_create_page_image_includes_password_flags(self):
"""pdftocairo receives -opw/-upw when a password is provided."""
with tempfile.TemporaryDirectory() as tmpdir:
path = Path(tmpdir, "original.pdf")
path.touch()
rep = Representation(path, Path(tmpdir, "1"), "png", "rgb")
renderer = PdfRenderer(path, password=b"secret")

mock_proc = mock.AsyncMock()
mock_proc.returncode = 0
Expand All @@ -63,19 +63,19 @@ async def test_create_irep_includes_password_flags(self):
"asyncio.create_subprocess_exec",
return_value=mock_proc
) as exec_mock:
await rep.create_irep(password=b"secret")
await renderer.create_page_image(1, Path(tmpdir, "1.png"))

cmd = exec_mock.call_args[0]
self.assertIn("-opw", cmd)
self.assertIn("-upw", cmd)
self.assertIn("secret", cmd)

async def test_create_irep_omits_password_flags_when_empty(self):
async def test_create_page_image_omits_password_flags_when_empty(self):
"""pdftocairo does not receive password flags when password is empty."""
with tempfile.TemporaryDirectory() as tmpdir:
path = Path(tmpdir, "original.pdf")
path.touch()
rep = Representation(path, Path(tmpdir, "1"), "png", "rgb")
renderer = PdfRenderer(path, password=b"")

mock_proc = mock.AsyncMock()
mock_proc.returncode = 0
Expand All @@ -85,7 +85,7 @@ async def test_create_irep_omits_password_flags_when_empty(self):
"asyncio.create_subprocess_exec",
return_value=mock_proc
) as exec_mock:
await rep.create_irep(password=b"")
await renderer.create_page_image(1, Path(tmpdir, "1.png"))

cmd = exec_mock.call_args[0]
self.assertNotIn("-opw", cmd)
Expand Down