Skip to content
Draft
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions MANIFEST
Original file line number Diff line number Diff line change
Expand Up @@ -106,6 +106,7 @@ t/recognized_string.t
t/return_undef_list_context.t
t/security_api.t
t/skip.t Test script
t/ssrf_protection.t
t/stream.t Test script
t/stream_attr_escape.t
t/stream_delimiter.t
Expand Down
57 changes: 57 additions & 0 deletions Parser.pm
Original file line number Diff line number Diff line change
Expand Up @@ -494,6 +494,15 @@ This option has no effect if the ExternEnt or ExternEntFin handlers are
directly set. Otherwise, if true, it forces the use of a file based external
entity handler.

=item * NoNetwork

When set to a true value, the default LWP external entity handler will refuse
to fetch C<http://> and C<https://> URLs, only allowing local file access.
This prevents Server-Side Request Forgery (SSRF) attacks when parsing
untrusted XML. See L<"SECURITY"> below for details.

B<Recommended for any application parsing untrusted XML.>

=item * BillionLaughsAttackProtectionMaximumAmplification

Sets the maximum amplification factor for the Billion Laughs attack
Expand Down Expand Up @@ -708,6 +717,10 @@ after giving a warning on the first external entity reference.
The LWP external entity handler will use proxies defined in the environment
(http_proxy, ftp_proxy, etc.).

The LWP handler includes SSRF protections: a URL scheme whitelist (only
C<file>, C<http>, C<https>) and private IP blocking. See L<"SECURITY"> for
details and the C<NoNetwork> option.

Please note that the LWP external entity handler reads the entire
entity into a string and returns it, where as the file handler opens a
filehandle.
Expand Down Expand Up @@ -1010,6 +1023,50 @@ For full details on each option, see L<XML::Parser::Expat/"new">.
BillionLaughsAttackProtectionActivationThreshold => 1024,
);

=head2 External Entity SSRF Protection

The default LWP-based external entity handler resolves C<SYSTEM> identifiers
in DTD entity declarations. Without safeguards, an attacker can craft XML
that forces the parser to fetch arbitrary URLs, enabling Server-Side Request
Forgery (SSRF) attacks — for example, reaching cloud metadata endpoints
(C<http://169.254.169.254/>) or scanning internal networks.

B<Built-in protections> (always active in the default LWP handler):

=over 4

=item * B<URL scheme whitelist> — Only C<file>, C<http>, and C<https> schemes
are permitted. Dangerous schemes like C<gopher>, C<ftp>, C<data>, and C<dict>
are rejected.

=item * B<Private IP blocking> — HTTP/HTTPS requests to private and reserved
IP ranges are blocked: C<10.0.0.0/8>, C<172.16.0.0/12>, C<192.168.0.0/16>,
C<169.254.0.0/16> (link-local/cloud metadata), C<127.0.0.0/8> (loopback),
and IPv6 loopback (C<::1>).

=back

B<For untrusted XML>, use one or more of these additional measures:

=over 4

=item * Set C<< NoNetwork => 1 >> to block all HTTP/HTTPS fetches while
still allowing local file entities.

=item * Set C<< NoLWP => 1 >> to use the file-only handler (no network
capability at all).

=item * Install a custom C<ExternEnt> handler that returns C<undef> to
disable all external entity resolution.

=back

# Recommended for untrusted input:
my $parser = XML::Parser->new(
NoNetwork => 1,
Style => 'Tree',
);

=head1 LICENSE

This library is free software; you can redistribute it and/or modify it
Expand Down
93 changes: 83 additions & 10 deletions Parser/LWPExternEnt.pl
Original file line number Diff line number Diff line change
Expand Up @@ -14,20 +14,55 @@ package XML::Parser;
use URI::file;
use LWP::UserAgent;

##
## Note that this external entity handler reads the entire entity into
## memory, so it will choke on huge ones. It would be really nice if
## LWP::UserAgent optionally returned us an IO::Handle.
##
my %_ALLOWED_SCHEMES = map { $_ => 1 } qw(file http https);

my @_PRIVATE_IPV4 = (
[0x0A000000, 0xFF000000], # 10.0.0.0/8
[0xAC100000, 0xFFF00000], # 172.16.0.0/12
[0xC0A80000, 0xFFFF0000], # 192.168.0.0/16
[0xA9FE0000, 0xFFFF0000], # 169.254.0.0/16
[0x7F000000, 0xFF000000], # 127.0.0.0/8
[0x00000000, 0xFF000000], # 0.0.0.0/8
);

sub _is_private_ip {
my ($host) = @_;

# IPv6 loopback
if ($host =~ /^\[?::1\]?$/) {
return 1;
}

# IPv4-mapped IPv6 (e.g. ::ffff:127.0.0.1)
if ($host =~ /^\[?::ffff:(\d+\.\d+\.\d+\.\d+)\]?$/i) {
$host = $1;
}

# IPv6 addresses other than loopback and mapped — allow for now
if ($host =~ /:/) {
return 0;
}

# Strip brackets
$host =~ s/^\[|\]$//g;

# Parse IPv4
if ($host =~ /^(\d+)\.(\d+)\.(\d+)\.(\d+)$/) {
my $ip = ($1 << 24) | ($2 << 16) | ($3 << 8) | $4;
for my $range (@_PRIVATE_IPV4) {
return 1 if ($ip & $range->[1]) == $range->[0];
}
}

return 0;
}

sub lwp_ext_ent_handler {
my ($xp, $base, $sys) = @_; # We don't use public id

my $uri;

if (defined $base) {
# Base may have been set by parsefile, which is agnostic about
# whether its a file or URI.
my $base_uri = URI->new($base);
unless (defined $base_uri->scheme) {
$base_uri = URI->new_abs($base_uri, URI::file->cwd);
Expand All @@ -41,7 +76,45 @@ sub lwp_ext_ent_handler {
$uri = URI->new_abs($uri, URI::file->cwd);
}
}


my $scheme = lc($uri->scheme || '');

# Scheme whitelist: only file, http, https permitted
unless ($_ALLOWED_SCHEMES{$scheme}) {
$xp->{ErrorMessage} .= "\nURI scheme '$scheme' is not permitted"
. " (allowed: file, http, https): $uri";
return undef;
}

# For file:// URIs, delegate to the file handler path
if ($scheme eq 'file') {
my $path = $uri->file;
$xp->{_BaseStack} ||= [];
push(@{$xp->{_BaseStack}}, $base);
$xp->base($uri);

require IO::File;
my $fh = IO::File->new($path, '<');
unless (defined $fh) {
$xp->{ErrorMessage} .= "\nFailed to open $path:\n$!";
return undef;
}
return $fh;
}

# NoNetwork: block http/https requests
if ($xp->{NoNetwork}) {
$xp->{ErrorMessage} .= "\nNetwork requests disabled (NoNetwork option set): $uri";
return undef;
}

# Private IP / SSRF blocking for network requests
my $host = $uri->host || '';
if (_is_private_ip($host)) {
$xp->{ErrorMessage} .= "\nRequest to private/reserved IP address blocked: $uri";
return undef;
}

my $ua = $xp->{_lwpagent};
unless (defined $ua) {
$ua = $xp->{_lwpagent} = LWP::UserAgent->new();
Expand All @@ -55,12 +128,12 @@ sub lwp_ext_ent_handler {
$xp->{ErrorMessage} .= "\n" . $res->status_line . " $uri";
return undef;
}

$xp->{_BaseStack} ||= [];
push(@{$xp->{_BaseStack}}, $base);

$xp->base($uri);

return $res->content;
} # End lwp_ext_ent_handler

Expand Down
127 changes: 127 additions & 0 deletions t/ssrf_protection.t
Original file line number Diff line number Diff line change
@@ -0,0 +1,127 @@
#!/usr/bin/perl

# Test SSRF protection in the LWP external entity handler.
# Verifies URL scheme whitelist and private IP blocking.
# See GitHub issue #275.

use strict;
use warnings;

use Test::More;
use XML::Parser;

eval { require LWP::UserAgent; require URI; };
if ($@) {
plan skip_all => 'LWP::UserAgent not installed';
}

plan tests => 14;

# Helper: parse XML with an external entity pointing to $uri
# Returns error message on failure, empty string on success.
sub parse_with_entity {
my ($uri, %opts) = @_;
my $xml = qq{<!DOCTYPE foo [\n <!ENTITY ext SYSTEM "$uri">\n]>\n<foo>&ext;</foo>};
my $p = XML::Parser->new(%opts);
eval { $p->parse($xml) };
return $@ || '';
}

# --- URL scheme whitelist ---

# Dangerous schemes must be rejected
{
my $err = parse_with_entity('gopher://evil.example.com/');
like($err, qr/scheme.*not allowed|not permitted/i,
'gopher:// scheme rejected');
}

{
my $err = parse_with_entity('ftp://evil.example.com/data');
like($err, qr/scheme.*not allowed|not permitted/i,
'ftp:// scheme rejected');
}

{
my $err = parse_with_entity('data:text/plain,hello');
like($err, qr/scheme.*not allowed|not permitted/i,
'data: scheme rejected');
}

{
my $err = parse_with_entity('dict://evil.example.com/');
like($err, qr/scheme.*not allowed|not permitted/i,
'dict:// scheme rejected');
}

# --- Private IP blocking ---

{
my $err = parse_with_entity('http://169.254.169.254/latest/meta-data/');
like($err, qr/private|blocked|not allowed/i,
'link-local 169.254.x.x blocked');
}

{
my $err = parse_with_entity('http://127.0.0.1/secret');
like($err, qr/private|blocked|not allowed/i,
'loopback 127.0.0.1 blocked');
}

{
my $err = parse_with_entity('http://10.0.0.1/internal');
like($err, qr/private|blocked|not allowed/i,
'10.x.x.x private range blocked');
}

{
my $err = parse_with_entity('http://192.168.1.1/router');
like($err, qr/private|blocked|not allowed/i,
'192.168.x.x private range blocked');
}

{
my $err = parse_with_entity('http://172.16.0.1/internal');
like($err, qr/private|blocked|not allowed/i,
'172.16.x.x private range blocked');
}

{
my $err = parse_with_entity('http://[::1]/secret');
like($err, qr/private|blocked|not allowed/i,
'IPv6 loopback [::1] blocked');
}

# --- NoNetwork option ---

{
my $err = parse_with_entity('http://example.com/foo.xml', NoNetwork => 1);
like($err, qr/network.*disabled|not allowed|NoNetwork/i,
'NoNetwork blocks http:// URLs');
}

{
my $err = parse_with_entity('https://example.com/foo.xml', NoNetwork => 1);
like($err, qr/network.*disabled|not allowed|NoNetwork/i,
'NoNetwork blocks https:// URLs');
}

# --- file:// still works with NoNetwork ---
# (file_ext_ent_handler is used for local files)

use File::Temp qw(tempfile);
my ($fh, $entfile) = tempfile(UNLINK => 1, SUFFIX => '.ent');
print $fh "local content";
close $fh;

{
my $chardata = '';
my $xml = qq{<!DOCTYPE foo [\n <!ENTITY ext SYSTEM "$entfile">\n]>\n<foo>&ext;</foo>};
my $p = XML::Parser->new(
NoNetwork => 1,
Handlers => { Char => sub { $chardata .= $_[1] } },
);
eval { $p->parse($xml) };
is($@, '', 'NoNetwork allows local file entities');
is($chardata, 'local content', 'NoNetwork: local file content correct');
}
Loading