#!/usr/bin/env perl

# Public domain as in http://en.wikipedia.org/wiki/Public_domain
#
#	Written initially by David Moreno Garza <damog@damog.net>.
#
#	This script generates an RSS feed for each of the weblogs
#	at www.dixo.com.
#
#	If you feel this work is useful for you, please let me know
#	at <damog@damog.net> and visit my blog: http://www.damog.net/
#
#	Have a beautiful day.

use warnings;
use strict;

use LWP;
use XML::RSS;
use Date::Manip;
use Net::SCP;

my @blogs = (	'mariana-h',
		'pada',
		'cha',
		'angel-dehesa',
		'zamora',
		'alonso-arreola',
		'warpig',
		'sonika',
		'sputnik',
		'atomix',
		);

my $scp = Net::SCP->new("puntodeb.com", "puntodeb");
$scp->cwd("/home/puntodeb/www/dixo");

foreach(@blogs) {
	my $persona = $_;
	procesa($persona);
	$scp->put("/tmp/$persona.xml");
	unlink "/tmp/$persona.xml";
}

sub procesa {
	my $persona = shift;
	my $url = 'http://www.dixo.com/'.$persona.'/';
	my $inside = 0;

	my $item_title;
	my $item_pubdate;
	my $item_permalink;
	my $item_content;

	my $rss = new XML::RSS(version => '2.0');
	$rss->channel(	title		=>	"$persona weblog",
			link		=>	$url,
			language	=>	'es',
			description	=>	"$persona blog de dixo.com",
			copyright	=>	'Dixo.com',
			generator	=>	'damog.net dixo-rss.perl|metiche 0.0.1',
			);

	my $getter = LWP::UserAgent->new;

	my $response = $getter->get($url,
		'User-Agent' => 'damog.net dixo-rss.perl|metiche 0.0.1');

	my $html = $response->decoded_content;

	open(I, '<', \$html);

	while(<I>){
		if($inside == 1) {
			next unless $_ =~ /<a href=/;
			$item_permalink = $1 if $_ =~ /<a href="(.*)">Escrito en/;
			$item_pubdate = parseDate($1) if $_ =~ /Escrito en: (.*)<\/a> \| </;
			$inside = 2;
		} elsif($inside == 2) {
			next unless $_ =~ /<div class="contendtext">/;
			$inside = 3;
		} elsif($inside == 3) {
			$item_content .= $_;
			next unless $_ =~ /<\/div>$/;
			$rss->add_item(	title		=>	$item_title,
					permaLink	=>	$item_permalink,
					pubDate		=>	$item_pubdate,
					description	=>	$item_content,
					);
			undef $item_title;
			undef $item_permalink;
			undef $item_pubdate;
			undef $item_content;
			$inside = 0;
		} else {
			next unless $_ =~ /<div class="ccblogger">/;
			$inside = 1;
			$item_title = $1 if $_ =~ /<div class="ccblogger">(.*)<\/div>/;
		}

	}
	$rss->save("/tmp/$persona.xml");
}

sub parseDate {
	my $date = shift;
	my $mon;
	my $month = $1 if $date =~ /^(.*) \d+\, \d\d\d\d/;
	my $day = $1 if $date =~ /^.* (\d+)\, \d\d\d\d/;
	my $year = $1 if $date =~ /^.* \d+\, (\d\d\d\d) a las /;
	my $time = $1 if $date =~ /^.* \d+\, \d\d\d\d a las (\d\d:\d\d) (A|P)M/;
	my $ampm = $1 if $date =~ /a las \d\d:\d\d (.*)$/;
	
	if($month eq 'Enero') {
		$mon = 'Jan';
	} elsif($month eq 'Abril') {
		$mon = 'Apr';
	} elsif($month eq 'Agosto') {
		$mon = 'Aug';
	} elsif($month eq 'Diciembre') {
		$mon = 'Dec';
	} else {
		$mon = $1 if $month =~ /^(.{3})/;
	}
	my $dm = ParseDate("$mon $day, $year $time $ampm CST");
	my $end = UnixDate($dm, "%g");
	return $end;
}	


syntax highlighted by Code2HTML, v. 0.9.1