Skip to content
Draft
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions MANIFEST
Original file line number Diff line number Diff line change
Expand Up @@ -91,6 +91,7 @@ t/g_void.t
t/get_base.t
t/handler_die.t
t/load_encoding.t
t/lwp_entity_limits.t
t/memory_leak_symtab.t
t/namespaces.t Test script
t/nolwp.t
Expand Down
44 changes: 44 additions & 0 deletions Parser.pm
Original file line number Diff line number Diff line change
Expand Up @@ -494,6 +494,24 @@ This option has no effect if the ExternEnt or ExternEntFin handlers are
directly set. Otherwise, if true, it forces the use of a file based external
entity handler.

=item * LWP_MaxEntitySize

Maximum size in bytes for content fetched via the LWP external entity handler.
If a response exceeds this limit, the entity is rejected and a parse error is
raised. Defaults to C<1_048_576> (1 MB). Set to C<0> to disable the size
limit entirely.

This option has no effect when C<NoLWP> is set or when a custom C<ExternEnt>
handler is provided.

=item * LWP_Timeout

Timeout in seconds for HTTP requests made by the LWP external entity handler.
Defaults to C<30>.

This option has no effect when C<NoLWP> is set or when a custom C<ExternEnt>
handler is provided.

=item * BillionLaughsAttackProtectionMaximumAmplification

Sets the maximum amplification factor for the Billion Laughs attack
Expand Down Expand Up @@ -1010,6 +1028,32 @@ For full details on each option, see L<XML::Parser::Expat/"new">.
BillionLaughsAttackProtectionActivationThreshold => 1024,
);

=head2 External Entity Size Limits

When using the LWP-based external entity handler (the default when LWP is
available), entities fetched over HTTP are subject to a size limit and a
request timeout. This prevents denial-of-service attacks where a malicious
DTD references an extremely large or slow remote resource.

=over 4

=item B<LWP_MaxEntitySize>

Maximum response size in bytes. Defaults to 1 MB (C<1_048_576>). Set to
C<0> to disable.

=item B<LWP_Timeout>

HTTP request timeout in seconds. Defaults to C<30>.

=back

# Example: allow up to 5 MB entities with a 60-second timeout
my $parser = XML::Parser->new(
LWP_MaxEntitySize => 5_242_880,
LWP_Timeout => 60,
);

=head1 LICENSE

This library is free software; you can redistribute it and/or modify it
Expand Down
14 changes: 14 additions & 0 deletions Parser/LWPExternEnt.pl
Original file line number Diff line number Diff line change
Expand Up @@ -46,6 +46,14 @@ sub lwp_ext_ent_handler {
unless (defined $ua) {
$ua = $xp->{_lwpagent} = LWP::UserAgent->new();
$ua->env_proxy();

my $max_size = $xp->{LWP_MaxEntitySize};
$max_size = 1_048_576 unless defined $max_size;
$ua->max_size($max_size) if $max_size;

my $timeout = $xp->{LWP_Timeout};
$timeout = 30 unless defined $timeout;
$ua->timeout($timeout);
}

my $req = HTTP::Request->new('GET', $uri);
Expand All @@ -55,6 +63,12 @@ sub lwp_ext_ent_handler {
$xp->{ErrorMessage} .= "\n" . $res->status_line . " $uri";
return undef;
}

if ($res->header('Client-Aborted')) {
$xp->{ErrorMessage} .= "\nEntity too large (exceeds "
. $ua->max_size . " bytes): $uri";
return undef;
}

$xp->{_BaseStack} ||= [];
push(@{$xp->{_BaseStack}}, $base);
Expand Down
158 changes: 158 additions & 0 deletions t/lwp_entity_limits.t
Original file line number Diff line number Diff line change
@@ -0,0 +1,158 @@
#!/usr/bin/perl

use strict;
use warnings;

use Test::More;

BEGIN {
eval { require LWP::UserAgent; require HTTP::Response; 1 }
or plan skip_all => 'LWP::UserAgent required for this test';
}

plan tests => 9;

use XML::Parser;

my $xml_with_entity = <<'XML';
<!DOCTYPE foo [
<!ENTITY ext SYSTEM "http://example.com/entity.txt">
]>
<foo>&ext;</foo>
XML

# Intercept LWP::UserAgent to capture configuration and return mock responses
my %captured_ua_settings;
my $mock_response;

{
no warnings 'redefine';

my $orig_new = \&LWP::UserAgent::new;
*LWP::UserAgent::new = sub {
my $ua = $orig_new->(@_);
%captured_ua_settings = ();
return $ua;
};

my $orig_max_size = \&LWP::UserAgent::max_size;
*LWP::UserAgent::max_size = sub {
if (@_ > 1) {
$captured_ua_settings{max_size} = $_[1];
}
return $orig_max_size->(@_);
};

my $orig_timeout = \&LWP::UserAgent::timeout;
*LWP::UserAgent::timeout = sub {
if (@_ > 1) {
$captured_ua_settings{timeout} = $_[1];
}
return $orig_timeout->(@_);
};

*LWP::UserAgent::request = sub {
return $mock_response;
};
}

sub make_response {
my (%opts) = @_;
my $res = HTTP::Response->new($opts{code} // 200, $opts{message} // 'OK');
$res->content($opts{content} // '');
$res->header('Client-Aborted' => $opts{client_aborted})
if $opts{client_aborted};
return $res;
}

# Test 1-2: Default limits applied (1MB max_size, 30s timeout)
{
%captured_ua_settings = ();
$mock_response = make_response(content => 'hello');

my $chardata = '';
my $p = XML::Parser->new(
Handlers => { Char => sub { $chardata .= $_[1] } },
);
eval { $p->parse($xml_with_entity) };

is($captured_ua_settings{max_size}, 1_048_576,
'Default max_size is 1MB');
is($captured_ua_settings{timeout}, 30,
'Default timeout is 30 seconds');
}

# Test 3-4: Custom limits honored
{
%captured_ua_settings = ();
$mock_response = make_response(content => 'hello');

my $p = XML::Parser->new(
LWP_MaxEntitySize => 500_000,
LWP_Timeout => 10,
Handlers => { Char => sub {} },
);
eval { $p->parse($xml_with_entity) };

is($captured_ua_settings{max_size}, 500_000,
'Custom max_size honored');
is($captured_ua_settings{timeout}, 10,
'Custom timeout honored');
}

# Test 5: Normal response parses successfully
{
$mock_response = make_response(content => 'entity content');

my $chardata = '';
my $p = XML::Parser->new(
Handlers => { Char => sub { $chardata .= $_[1] } },
);
eval { $p->parse($xml_with_entity) };
is($@, '', 'Normal response parses without error');
}

# Test 6-7: Truncated response (Client-Aborted) causes parse error
{
$mock_response = make_response(
content => 'x' x 100,
client_aborted => 'die',
);

my $p = XML::Parser->new(
LWP_MaxEntitySize => 1024,
Handlers => { Char => sub {} },
);
eval { $p->parse($xml_with_entity) };
like($@, qr/entity too large/i,
'Truncated response causes parse error');
like($@, qr/1024/,
'Error message includes size limit');
}

# Test 8: Disable max_size with 0
{
%captured_ua_settings = ();
$mock_response = make_response(content => 'hello');

my $p = XML::Parser->new(
LWP_MaxEntitySize => 0,
Handlers => { Char => sub {} },
);
eval { $p->parse($xml_with_entity) };

ok(!exists $captured_ua_settings{max_size},
'max_size not set when LWP_MaxEntitySize is 0');
}

# Test 9: HTTP error still reported correctly
{
$mock_response = make_response(code => 404, message => 'Not Found');

my $p = XML::Parser->new(
Handlers => { Char => sub {} },
);
eval { $p->parse($xml_with_entity) };
like($@, qr/404/,
'HTTP errors still reported');
}
Loading