root/branches/hackathon-summary/plagger/lib/Plagger/Feed.pm

Revision 1527 (checked in by miyagawa, 2 years ago)

renamed Plagger::Content to Plagger:Text, to make it easy to understand considering Atom RFC's "Text Construct"

  • Property svn:keywords set to Id Revision
Line 
1 package Plagger::Feed;
2 use strict;
3
4 use base qw( Plagger::Thing );
5 __PACKAGE__->mk_accessors(qw( link url image language tags meta type source_xml aggregator ));
6 __PACKAGE__->mk_text_accessors(qw( description author title ));
7 __PACKAGE__->mk_date_accessors(qw( updated ));
8
9 use Digest::MD5 qw(md5_hex);
10 use URI;
11 use Plagger::Util;
12 use Scalar::Util qw(blessed);
13
14 sub new {
15     my $class = shift;
16     bless {
17         meta  => {},
18         tags  => [],
19         entries => [],
20         type  => 'feed',
21     }, $class;
22 }
23
24 sub add_entry {
25     my($self, $entry) = @_;
26     push @{ $self->{entries} }, $entry;
27 }
28
29 sub delete_entry {
30     my($self, $entry) = @_;
31     my @entries = grep { $_ ne $entry } $self->entries;
32     $self->{entries} = \@entries;
33 }
34
35 sub entries {
36     my $self = shift;
37     wantarray ? @{ $self->{entries} } : $self->{entries};
38 }
39
40 sub count {
41     my $self = shift;
42     scalar @{ $self->{entries} };
43 }
44
45 sub id {
46     my $self = shift;
47     $self->{id} = shift if @_;
48     $self->{id} || Digest::MD5::md5_hex($self->url || $self->link);
49 }
50
51 sub id_safe {
52     my $self = shift;
53     my $id = $self->id;
54     $id =~ s![^\w\s]+!_!g;
55     $id =~ s!\s+!_!g;
56     $id;
57 }
58
59 sub title_text {
60     my $self = shift;
61     $self->title ? $self->title->plaintext : undef;
62 }
63
64 sub sort_entries {
65     my $self = shift;
66
67     # xxx reverse chron only, using Schwartzian transform
68     my @entries = map { $_->[1] }
69         sort { $b->[0] <=> $a->[0] }
70         map { [ $_->date || DateTime->from_epoch(epoch => 0), $_ ] } $self->entries;
71
72     $self->{entries} = \@entries;
73 }
74
75 sub clear_entries {
76     my $self = shift;
77     $self->{entries} = [];
78 }
79
80 sub dedupe_entries {
81     my $self = shift;
82
83     # this logic breaks ordering of entries, to be sorted using sort_entries
84
85     my(%seen, @entries);
86     for my $entry ($self->entries) {
87         push @{ $seen{$entry->permalink} }, $entry;
88     }
89
90     for my $permalink (keys %seen) {
91         my @sorted = _sort_prioritize($permalink, @{ $seen{$permalink} });
92         push @entries, $sorted[0];
93     }
94
95     $self->{entries} = \@entries;
96 }
97
98 sub _sort_prioritize {
99     my($permalink, @entries) = @_;
100
101     # use domain match, date and full-content-ness to prioritize source entry
102     # TODO: Date vs Full-content check should be user configurable
103
104     my $now = time;
105     return
106         map { $_->[0] }
107         sort { $b->[1] <=> $a->[1] || $b->[2] <=> $a->[2] || $b->[3] <=> $a->[3] || $b->[4] <=> $a->[4] }
108         map { [
109             $_,                                              # Plagger::Entry for Schwartzian
110             _is_same_domain($permalink, $_->source->url),    # permalink and $feed->url is the same domain
111             _is_same_domain($permalink, $_->source->link),   # permalink and $feed->link is the same domain
112             ($_->date ? ($now - $_->date->epoch) : 0),       # Older entry date is prioritized
113             length($_->body || ''),                          # Prioritize full content feed
114         ] } @entries;
115 }
116
117 sub _is_same_domain {
118     my $u1 = URI->new($_[0]);
119     my $u2 = URI->new($_[1]);
120
121     return 0 unless $u1->can('host') && $u2->can('host');
122     return lc($u1->host) eq lc($u2->host);
123 }
124
125 sub primary_author {
126     my $self = shift;
127     $self->author || do {
128         # if all entries are authored by the same person, use him/her as primary
129         my %authors = map { defined $_->author ? ($_->author => 1) : () } $self->entries;
130         my @authors = keys %authors;
131         @authors == 1 ? $authors[0] : undef;
132     };
133 }
134
135 1;
Note: See TracBrowser for help on using the browser.