root/branches/feature-server/plagger/lib/Plagger/Plugin/Filter/HTMLScrubber.pm

Revision 1101 (checked in by typester, 4 years ago)

Filter::HTMLScrubber:

  • fixed config parser
  • added default config parameters (from Publish::Planet)
Line 
1 package Plagger::Plugin::Filter::HTMLScrubber;
2 use strict;
3 use base qw( Plagger::Plugin );
4
5 use HTML::Scrubber;
6
7 sub rules {
8     return(
9         img => {
10             src => qr{^http://},    # only URL with http://
11             alt => 1,               # alt attributes allowed
12             '*' => 0,               # deny all others
13         },
14         style  => 0,
15         script => 0,
16     );
17 }
18
19 sub default {
20     return(
21         '*'    => 1,                        # default rule, allow all attributes
22         'href' => qr{^(?!(?:java)?script)}i,
23         'src'  => qr{^(?!(?:java)?script)}i,
24         'cite'     => '(?i-xsm:^(?!(?:java)?script))',
25         'language' => 0,
26         'name'        => 1,                 # could be sneaky, but hey ;)
27         'onblur'      => 0,
28         'onchange'    => 0,
29         'onclick'     => 0,
30         'ondblclick'  => 0,
31         'onerror'     => 0,
32         'onfocus'     => 0,
33         'onkeydown'   => 0,
34         'onkeypress'  => 0,
35         'onkeyup'     => 0,
36         'onload'      => 0,
37         'onmousedown' => 0,
38         'onmousemove' => 0,
39         'onmouseout'  => 0,
40         'onmouseover' => 0,
41         'onmouseup'   => 0,
42         'onreset'     => 0,
43         'onselect'    => 0,
44         'onsubmit'    => 0,
45         'onunload'    => 0,
46         'src'         => 0,
47         'type'        => 0,
48         'style'       => 0,
49     );
50 }
51
52 sub register {
53     my ( $self, $context ) = @_;
54
55     $context->register_hook( $self, 'update.entry.fixup' => \&update, );
56
57     $self->{scrubber} = do {
58         my $scrubber = HTML::Scrubber->new;
59         my $config   = $self->conf;
60
61         my ( %rules, %default );
62         unless ( delete $config->{no_default_configs} ) {
63             %rules   = $self->rules;
64             %default = $self->default;
65         }
66         $scrubber->rules( %rules, %{ delete $config->{rules} || {} } );
67         $scrubber->default( %default, %{ delete $config->{default} || {} } );
68
69         while ( my ( $method, $arg ) = each %$config ) {
70             eval {
71                 $scrubber->$method(
72                       ref $arg eq 'ARRAY' ? @$arg
73                     : ref $arg eq 'HASH'  ? %$arg
74                     : $arg );
75             };
76             $context->error(qq/Invalid method call "$method": $@/) if $@;
77         }
78
79         $scrubber;
80     };
81 }
82
83 sub update {
84     my ( $self, $context, $args ) = @_;
85
86     my $body = $self->{scrubber}->scrub( $args->{entry}->body );
87     $args->{entry}->body($body);
88 }
89
90 =head1 NAME
91
92 Plagger::Plugin::Filter::HTMLScrubber - Scrub feed content
93
94 =head1 SYNOPSIS
95
96   - module: Filter::HTMLScrubber
97     config:
98       rules:
99         style: 0
100         script: 0
101
102 =head1 DESCRIPTION
103
104 This plugin scrub feed content by L<HTML::Scrubber>.
105
106 All config parameters (except 'no_default_configs') are implemented as HTML::Scrubber's method: value.
107 For example, if you write in config.
108
109     method: value
110
111 then
112
113     $scrubber->method('value');
114
115 are called.
116
117 So please see L<HTML::Scrubber> document for detail.
118
119 =head1 DEFAULT_CONFIGS
120
121 some rules and default config parameters are setted by default.
122 see rules and default functions in this module.
123
124 and if you doen't need these settings, use no_default_configs:
125
126    config:
127      no_detault_configs: 1
128
129 =head1 AUTHOR
130
131 Daisuke Murase <typester@cpan.org>
132
133 =head1 SEE ALSO
134
135 L<Plagger>, L<HTML::Scrubber>
136
137 =cut
138
139 1;
Note: See TracBrowser for help on using the browser.