| 1 |
package Plagger::Plugin::Filter::HTMLTidy; |
|---|
| 2 |
use strict; |
|---|
| 3 |
use base qw( Plagger::Plugin ); |
|---|
| 4 |
|
|---|
| 5 |
use HTML::Tidy; |
|---|
| 6 |
|
|---|
| 7 |
sub register { |
|---|
| 8 |
my($self, $context) = @_; |
|---|
| 9 |
$context->register_hook( |
|---|
| 10 |
$self, |
|---|
| 11 |
'update.entry.fixup' => \&filter, |
|---|
| 12 |
); |
|---|
| 13 |
} |
|---|
| 14 |
|
|---|
| 15 |
our %defaults = ( |
|---|
| 16 |
doctype => 'omit', |
|---|
| 17 |
output_xhtml => 1, |
|---|
| 18 |
wrap => 0, |
|---|
| 19 |
break_before_br => 0, |
|---|
| 20 |
input_encoding => 'utf8', |
|---|
| 21 |
output_encoding => 'utf8', |
|---|
| 22 |
tidy_mark => 0, |
|---|
| 23 |
); |
|---|
| 24 |
|
|---|
| 25 |
sub filter { |
|---|
| 26 |
my($self, $context, $args) = @_; |
|---|
| 27 |
|
|---|
| 28 |
my $body = $args->{entry}->body; |
|---|
| 29 |
return unless $body && $body->is_html; |
|---|
| 30 |
|
|---|
| 31 |
my $conf = $self->conf || {}; |
|---|
| 32 |
while (my($key, $value) = each %defaults) { |
|---|
| 33 |
$conf->{$key} = $value unless exists $conf->{$key}; |
|---|
| 34 |
} |
|---|
| 35 |
|
|---|
| 36 |
my $tidy = HTML::Tidy->new( $self->conf || {} ); |
|---|
| 37 |
$tidy->ignore( type => TIDY_WARNING ); |
|---|
| 38 |
my $new_body = $tidy->clean($body->data); |
|---|
| 39 |
|
|---|
| 40 |
|
|---|
| 41 |
$new_body =~ s!^.*<body>\s*(.*?)\s*</body>\s*</html>\s*$!$1!s; |
|---|
| 42 |
|
|---|
| 43 |
$args->{entry}->body($new_body); |
|---|
| 44 |
} |
|---|
| 45 |
|
|---|
| 46 |
1; |
|---|
| 47 |
__END__ |
|---|
| 48 |
|
|---|
| 49 |
=for stopwords htmltidy |
|---|
| 50 |
|
|---|
| 51 |
=head1 NAME |
|---|
| 52 |
|
|---|
| 53 |
Plagger::Plugin::Filter::HTMLTidy - Filters body HTML using HTML::Tidy |
|---|
| 54 |
|
|---|
| 55 |
=head1 SYNOPSIS |
|---|
| 56 |
|
|---|
| 57 |
- module: Filter::HTMLTidy |
|---|
| 58 |
config: |
|---|
| 59 |
output-xhtml: yes |
|---|
| 60 |
char-encoding: utf-8 |
|---|
| 61 |
|
|---|
| 62 |
=head1 DESCRIPTION |
|---|
| 63 |
|
|---|
| 64 |
This plugin glues HTML::Tidy as an entry filter, so it scrubs HTML to |
|---|
| 65 |
make it tidy. Best used with Publish plugins like Planet. |
|---|
| 66 |
|
|---|
| 67 |
=head1 CONFIG |
|---|
| 68 |
|
|---|
| 69 |
This plugin accepts any config options that can be used as htmltidy |
|---|
| 70 |
config file. See L<http://tidy.sourceforge.net/docs/quickref.html> for details. |
|---|
| 71 |
|
|---|
| 72 |
=head1 AUTHOR |
|---|
| 73 |
|
|---|
| 74 |
Tatsuhiko Miyagawa |
|---|
| 75 |
|
|---|
| 76 |
=head1 SEE ALSO |
|---|
| 77 |
|
|---|
| 78 |
L<Plagger>, L<HTML::Tidy>, L<http://tidy.sourceforge.net/docs/quickref.html> |
|---|
| 79 |
|
|---|
| 80 |
=cut |
|---|