| 1 |
package Plagger::Plugin::Filter::HTMLScrubber; |
|---|
| 2 |
use strict; |
|---|
| 3 |
use base qw( Plagger::Plugin ); |
|---|
| 4 |
|
|---|
| 5 |
use HTML::Scrubber; |
|---|
| 6 |
|
|---|
| 7 |
sub rules { |
|---|
| 8 |
return( |
|---|
| 9 |
img => { |
|---|
| 10 |
src => qr{^http://}, # only URL with http:// |
|---|
| 11 |
alt => 1, |
|---|
| 12 |
'*' => 0, |
|---|
| 13 |
}, |
|---|
| 14 |
style => 0, |
|---|
| 15 |
script => 0, |
|---|
| 16 |
); |
|---|
| 17 |
} |
|---|
| 18 |
|
|---|
| 19 |
sub default { |
|---|
| 20 |
return( |
|---|
| 21 |
'*' => 1, |
|---|
| 22 |
'href' => qr{^(?!(?:java)?script)}i, |
|---|
| 23 |
'src' => qr{^(?!(?:java)?script)}i, |
|---|
| 24 |
'cite' => '(?i-xsm:^(?!(?:java)?script))', |
|---|
| 25 |
'language' => 0, |
|---|
| 26 |
'name' => 1, |
|---|
| 27 |
'onblur' => 0, |
|---|
| 28 |
'onchange' => 0, |
|---|
| 29 |
'onclick' => 0, |
|---|
| 30 |
'ondblclick' => 0, |
|---|
| 31 |
'onerror' => 0, |
|---|
| 32 |
'onfocus' => 0, |
|---|
| 33 |
'onkeydown' => 0, |
|---|
| 34 |
'onkeypress' => 0, |
|---|
| 35 |
'onkeyup' => 0, |
|---|
| 36 |
'onload' => 0, |
|---|
| 37 |
'onmousedown' => 0, |
|---|
| 38 |
'onmousemove' => 0, |
|---|
| 39 |
'onmouseout' => 0, |
|---|
| 40 |
'onmouseover' => 0, |
|---|
| 41 |
'onmouseup' => 0, |
|---|
| 42 |
'onreset' => 0, |
|---|
| 43 |
'onselect' => 0, |
|---|
| 44 |
'onsubmit' => 0, |
|---|
| 45 |
'onunload' => 0, |
|---|
| 46 |
'src' => 0, |
|---|
| 47 |
'type' => 0, |
|---|
| 48 |
'style' => 0, |
|---|
| 49 |
); |
|---|
| 50 |
} |
|---|
| 51 |
|
|---|
| 52 |
sub register { |
|---|
| 53 |
my ( $self, $context ) = @_; |
|---|
| 54 |
|
|---|
| 55 |
$context->register_hook( |
|---|
| 56 |
$self, |
|---|
| 57 |
'update.entry.fixup' => \&update, |
|---|
| 58 |
'plugin.init' => \&initialize, |
|---|
| 59 |
); |
|---|
| 60 |
} |
|---|
| 61 |
|
|---|
| 62 |
sub initialize { |
|---|
| 63 |
my($self, $context, $args) = @_; |
|---|
| 64 |
|
|---|
| 65 |
$self->{scrubber} = do { |
|---|
| 66 |
my $scrubber = HTML::Scrubber->new; |
|---|
| 67 |
my $config = $self->conf; |
|---|
| 68 |
|
|---|
| 69 |
my ( %rules, %default ); |
|---|
| 70 |
unless ( delete $config->{no_default_configs} ) { |
|---|
| 71 |
%rules = $self->rules; |
|---|
| 72 |
%default = $self->default; |
|---|
| 73 |
} |
|---|
| 74 |
$scrubber->rules( %rules, %{ delete $config->{rules} || {} } ); |
|---|
| 75 |
$scrubber->default(1, { %default, %{ delete $config->{default} || {} } }); |
|---|
| 76 |
|
|---|
| 77 |
while ( my ( $method, $arg ) = each %$config ) { |
|---|
| 78 |
eval { |
|---|
| 79 |
$scrubber->$method( |
|---|
| 80 |
ref $arg eq 'ARRAY' ? @$arg |
|---|
| 81 |
: ref $arg eq 'HASH' ? %$arg |
|---|
| 82 |
: $arg ); |
|---|
| 83 |
}; |
|---|
| 84 |
$context->error(qq/Invalid method call "$method": $@/) if $@; |
|---|
| 85 |
} |
|---|
| 86 |
|
|---|
| 87 |
$scrubber; |
|---|
| 88 |
}; |
|---|
| 89 |
} |
|---|
| 90 |
|
|---|
| 91 |
sub update { |
|---|
| 92 |
my ( $self, $context, $args ) = @_; |
|---|
| 93 |
|
|---|
| 94 |
if (defined $args->{entry}->body && $args->{entry}->body->is_html) { |
|---|
| 95 |
$context->log(debug => "Scrubbing body for " . $args->{entry}->permalink || '(no-link)'); |
|---|
| 96 |
my $body = $self->{scrubber}->scrub( $args->{entry}->body ); |
|---|
| 97 |
$args->{entry}->body($body); |
|---|
| 98 |
} |
|---|
| 99 |
} |
|---|
| 100 |
|
|---|
| 101 |
1; |
|---|
| 102 |
|
|---|
| 103 |
__END__ |
|---|
| 104 |
|
|---|
| 105 |
=head1 NAME |
|---|
| 106 |
|
|---|
| 107 |
Plagger::Plugin::Filter::HTMLScrubber - Scrub feed content |
|---|
| 108 |
|
|---|
| 109 |
=head1 SYNOPSIS |
|---|
| 110 |
|
|---|
| 111 |
- module: Filter::HTMLScrubber |
|---|
| 112 |
config: |
|---|
| 113 |
rules: |
|---|
| 114 |
style: 0 |
|---|
| 115 |
script: 0 |
|---|
| 116 |
|
|---|
| 117 |
=head1 DESCRIPTION |
|---|
| 118 |
|
|---|
| 119 |
This plugin scrubs feed content using L<HTML::Scrubber>. |
|---|
| 120 |
|
|---|
| 121 |
All config parameters (except 'no_default_configs') are implemented as |
|---|
| 122 |
HTML::Scrubber's method: value. For example, if you write: |
|---|
| 123 |
|
|---|
| 124 |
method: value |
|---|
| 125 |
|
|---|
| 126 |
in the config: section, this plugin will automatically turn the config |
|---|
| 127 |
into the method call: |
|---|
| 128 |
|
|---|
| 129 |
$scrubber->method('value'); |
|---|
| 130 |
|
|---|
| 131 |
See L<HTML::Scrubber> document for details. |
|---|
| 132 |
|
|---|
| 133 |
=head1 CONFIG |
|---|
| 134 |
|
|---|
| 135 |
=over 4 |
|---|
| 136 |
|
|---|
| 137 |
=item no_default_configs |
|---|
| 138 |
|
|---|
| 139 |
Some rules and default config parameters are set by default. See I<rules> |
|---|
| 140 |
and I<default> methods defined in this module code for details. |
|---|
| 141 |
|
|---|
| 142 |
If you don't need these settings, use C<no_default_configs> |
|---|
| 143 |
|
|---|
| 144 |
no_detault_configs: 1 |
|---|
| 145 |
|
|---|
| 146 |
Defaults to 0, which means it uses the default (somewhat secure) config. |
|---|
| 147 |
|
|---|
| 148 |
=back |
|---|
| 149 |
|
|---|
| 150 |
=head1 AUTHOR |
|---|
| 151 |
|
|---|
| 152 |
Daisuke Murase <typester@cpan.org> |
|---|
| 153 |
|
|---|
| 154 |
Tatsuhiko Miyagawa |
|---|
| 155 |
|
|---|
| 156 |
=head1 SEE ALSO |
|---|
| 157 |
|
|---|
| 158 |
L<Plagger>, L<HTML::Scrubber> |
|---|
| 159 |
|
|---|
| 160 |
=cut |
|---|