#!/usr/bin/env perl
# Public domain as in http://en.wikipedia.org/wiki/Public_domain
#
# Written initially by David Moreno Garza <damog@damog.net>.
#
# This script generates an RSS feed for each of the weblogs
# at www.dixo.com.
#
# If you feel this work is useful for you, please let me know
# at <damog@damog.net> and visit my blog: http://www.damog.net/
#
# Have a beautiful day.
use warnings;
use strict;
use LWP;
use XML::RSS;
use Date::Manip;
use Net::SCP;
my @blogs = ( 'mariana-h',
'pada',
'cha',
'angel-dehesa',
'zamora',
'alonso-arreola',
'warpig',
'sonika',
'sputnik',
'atomix',
);
my $scp = Net::SCP->new("puntodeb.com", "puntodeb");
$scp->cwd("/home/puntodeb/www/dixo");
foreach(@blogs) {
my $persona = $_;
procesa($persona);
$scp->put("/tmp/$persona.xml");
unlink "/tmp/$persona.xml";
}
sub procesa {
my $persona = shift;
my $url = 'http://www.dixo.com/'.$persona.'/';
my $inside = 0;
my $item_title;
my $item_pubdate;
my $item_permalink;
my $item_content;
my $rss = new XML::RSS(version => '2.0');
$rss->channel( title => "$persona weblog",
link => $url,
language => 'es',
description => "$persona blog de dixo.com",
copyright => 'Dixo.com',
generator => 'damog.net dixo-rss.perl|metiche 0.0.1',
);
my $getter = LWP::UserAgent->new;
my $response = $getter->get($url,
'User-Agent' => 'damog.net dixo-rss.perl|metiche 0.0.1');
my $html = $response->decoded_content;
open(I, '<', \$html);
while(<I>){
if($inside == 1) {
next unless $_ =~ /<a href=/;
$item_permalink = $1 if $_ =~ /<a href="(.*)">Escrito en/;
$item_pubdate = parseDate($1) if $_ =~ /Escrito en: (.*)<\/a> \| </;
$inside = 2;
} elsif($inside == 2) {
next unless $_ =~ /<div class="contendtext">/;
$inside = 3;
} elsif($inside == 3) {
$item_content .= $_;
next unless $_ =~ /<\/div>$/;
$rss->add_item( title => $item_title,
permaLink => $item_permalink,
pubDate => $item_pubdate,
description => $item_content,
);
undef $item_title;
undef $item_permalink;
undef $item_pubdate;
undef $item_content;
$inside = 0;
} else {
next unless $_ =~ /<div class="ccblogger">/;
$inside = 1;
$item_title = $1 if $_ =~ /<div class="ccblogger">(.*)<\/div>/;
}
}
$rss->save("/tmp/$persona.xml");
}
sub parseDate {
my $date = shift;
my $mon;
my $month = $1 if $date =~ /^(.*) \d+\, \d\d\d\d/;
my $day = $1 if $date =~ /^.* (\d+)\, \d\d\d\d/;
my $year = $1 if $date =~ /^.* \d+\, (\d\d\d\d) a las /;
my $time = $1 if $date =~ /^.* \d+\, \d\d\d\d a las (\d\d:\d\d) (A|P)M/;
my $ampm = $1 if $date =~ /a las \d\d:\d\d (.*)$/;
if($month eq 'Enero') {
$mon = 'Jan';
} elsif($month eq 'Abril') {
$mon = 'Apr';
} elsif($month eq 'Agosto') {
$mon = 'Aug';
} elsif($month eq 'Diciembre') {
$mon = 'Dec';
} else {
$mon = $1 if $month =~ /^(.{3})/;
}
my $dm = ParseDate("$mon $day, $year $time $ampm CST");
my $end = UnixDate($dm, "%g");
return $end;
}
syntax highlighted by Code2HTML, v. 0.9.1