root/trunk/plagger/lib/Plagger/Feed.pm

Revision 1741 (checked in by miyagawa, 2 years ago)

merge from hackathon-summary

  • Property svn:keywords set to Id Revision
Line 
1 package Plagger::Feed;
2 use strict;
3
4 use base qw( Plagger::Thing );
5 __PACKAGE__->mk_accessors(qw( link url image language tags meta type source_xml aggregator ));
6 __PACKAGE__->mk_text_accessors(qw( description author title ));
7 __PACKAGE__->mk_date_accessors(qw( updated ));
8
9 use Digest::MD5 qw(md5_hex);
10 use URI;
11 use Plagger::Util;
12 use Scalar::Util qw(blessed);
13
14 sub new {
15     my $class = shift;
16     bless {
17         meta  => {},
18         tags  => [],
19         entries => [],
20         type  => 'feed',
21     }, $class;
22 }
23
24 sub add_entry {
25     my($self, $entry) = @_;
26     push @{ $self->{entries} }, $entry;
27 }
28
29 sub delete_entry {
30     my($self, $entry) = @_;
31     my @entries = grep { $_ ne $entry } $self->entries;
32     $self->{entries} = \@entries;
33 }
34
35 sub entries {
36     my $self = shift;
37     wantarray ? @{ $self->{entries} } : $self->{entries};
38 }
39
40 sub count {
41     my $self = shift;
42     scalar @{ $self->{entries} };
43 }
44
45 sub id {
46     my $self = shift;
47     $self->{id} = shift if @_;
48     $self->{id} || $self->url || $self->link;
49 }
50
51 sub id_safe {
52     my $self = shift;
53     Plagger::Util::safe_id($self->id);
54 }
55
56 sub title_text {
57     my $self = shift;
58     $self->title ? $self->title->plaintext : undef;
59 }
60
61 sub sort_entries {
62     my $self = shift;
63
64     # xxx reverse chron only, using Schwartzian transform
65     my @entries = map { $_->[1] }
66         sort { $b->[0] <=> $a->[0] }
67         map { [ $_->date || DateTime->from_epoch(epoch => 0), $_ ] } $self->entries;
68
69     $self->{entries} = \@entries;
70 }
71
72 sub clear_entries {
73     my $self = shift;
74     $self->{entries} = [];
75 }
76
77 sub dedupe_entries {
78     my $self = shift;
79
80     # this logic breaks ordering of entries, to be sorted using sort_entries
81
82     my(%seen, @entries);
83     for my $entry ($self->entries) {
84         push @{ $seen{$entry->permalink} }, $entry;
85     }
86
87     for my $permalink (keys %seen) {
88         my @sorted = _sort_prioritize($permalink, @{ $seen{$permalink} });
89         push @entries, $sorted[0];
90     }
91
92     $self->{entries} = \@entries;
93 }
94
95 sub _sort_prioritize {
96     my($permalink, @entries) = @_;
97
98     # use domain match, date and full-content-ness to prioritize source entry
99     # TODO: Date vs Full-content check should be user configurable
100
101     my $now = time;
102     return
103         map { $_->[0] }
104         sort { $b->[1] <=> $a->[1] || $b->[2] <=> $a->[2] || $b->[3] <=> $a->[3] || $b->[4] <=> $a->[4] }
105         map { [
106             $_,                                              # Plagger::Entry for Schwartzian
107             _is_same_domain($permalink, $_->source->url),    # permalink and $feed->url is the same domain
108             _is_same_domain($permalink, $_->source->link),   # permalink and $feed->link is the same domain
109             ($_->date ? ($now - $_->date->epoch) : 0),       # Older entry date is prioritized
110             length($_->body || ''),                          # Prioritize full content feed
111         ] } @entries;
112 }
113
114 sub _is_same_domain {
115     my $u1 = URI->new($_[0]);
116     my $u2 = URI->new($_[1]);
117
118     return 0 unless $u1->can('host') && $u2->can('host');
119     return lc($u1->host) eq lc($u2->host);
120 }
121
122 sub primary_author {
123     my $self = shift;
124     $self->author || do {
125         # if all entries are authored by the same person, use him/her as primary
126         my %authors = map { defined $_->author ? ($_->author => 1) : () } $self->entries;
127         my @authors = keys %authors;
128         @authors == 1 ? $authors[0] : undef;
129     };
130 }
131
132 1;
Note: See TracBrowser for help on using the browser.