#!/usr/bin/perl

use LWP::Simple;
use HTML::Entities;

sub usage() {
    print "usage: ./ljdc in_file [out.xml]\n"
}

my $size = scalar(@ARGV);

if ($size == 0 || ($size == 1 && ($ARGV[0] =~ "-h" || $ARGV[0] =~ "--help" )))
{
    usage();
    exit(0)
}

my $in_file = $ARGV[0];
my $out_file = "out.xml";

if ($size == 2)
{
    $out_file = $ARGV[1];
}

if( ! open(IN_FILE,"<$in_file") ) {
    exit(1);
}

if( ! open(OUT_FILE,">:encoding(utf-8)", $out_file) ) {
    exit(1);
}

print(OUT_FILE "<?xml version=\"1.0\" encoding=\"utf-8\"?>\n");
print(OUT_FILE "<ljdc>\n");

while( defined( $address = <IN_FILE> ) ) {

    my $id;

    chomp($address);

    next if (length($address) == 0);

    if ($address =~ /post\/([0-9]+)\//)
    {
	$id = $1;
    }
    else
    {
	print "id not found for $address\n";
	next;
    }

    my $title = "", $img = "";
    my $contents = get($address);
    my @lines = split /\n/, $contents;

    foreach my $line (@lines) {
	
	# Title
	if (length($title) == 0 && $line =~ /<title>Les joies du code  - (.*)<\/title>/) {
	    $title = $1;
	}

	if (length($title) == 0 && $line =~ /<title>The coding love  - (.*)<\/title>/) {
	    $title = $1;
	}

	if (length($title) == 0 && $line =~ /<meta property="og:title" 		content="([^"]+)" \/>/) {
	    $title = $1;
	}

	# IMG
	if ($line =~ /<p class="c1">.*<img.*src="([^"]+.gif)".*\/><\/p>/) {
	    $img = $1;
	    last;
	}
	if ($line =~ /<div class="bodytype"> <p class="centredimg"><img .*src="([^"]+.gif)".*\/>/) {
	    $img = $1;
	    last;
	}
	if ($line =~ /<div class="bodytype"> <p class="e"><img .*src="([^"]+.gif)".*\/>/) {
	    $img = $1;
	    last;
	}
	if ($line =~ /<div class="bodytype"> <p class="e"><img .*src="([^"]+.gif)".*>/) {
	    $img = $1;
	    last;
	}
	if ($line =~ /<div class="bodytype"> <p class="e"><img .*src="([^"]+.jpg)".*\/>/) {
	    $img = $1;
	    last;
	}
	if ($line =~ /<p class="e"><img.*src="([^"]+.jpg)">/) {
	    $img = $1;
	    last;
	}
	if ($line =~ /<p class="e"><img.*src="([^"]+.gif)">/) {
	    $img = $1;
	    last;
	}
	if ($line =~ /<\/source><img src="([^"]+\.gif)">/) {
	    $img = $1;
	    last;
	}
	if ($line =~ /<p class="e"><img src="([^"]+.gif)">/) {
	    $img = $1;
	    last;
	}
	if ($line =~ /<p class="c1">.*<img.*src="([^"]+.gif)".*><\/p>/) {
	    $img = $1;
	    last;
	}
	if ($line =~ /<p class="centredimg"><img.*src="([^"]+.gif)".*><\/p>/) {
	    $img = $1;
	    last;
	}
	if ($line =~ /^<p><img.*src="(http:\/\/ljdchost.com\/[^"]+.gif)".*><\/p>$/) {
	    $img = $1;
	    last;
	}
    }

    if (length($title) == 0 || length($img) == 0)
    {
	print "Error with $address\n";
	print "Title not defined\n" if (length($title) == 0);
	print "IMG not defined\n" if (length($img) == 0);
	print { STDERR } $contents;
	exit 1;
    }
    else
    {
	$title = decode_entities($title);
	print(OUT_FILE "    <entry>\n");
	print(OUT_FILE "        <id>$id</id>\n");
	print(OUT_FILE "        <address>$address</address>\n");
	print(OUT_FILE "        <title>$title</title>\n");
	print(OUT_FILE "        <img>$img</img>\n");
	print(OUT_FILE "    </entry>\n");
    }
}
print(OUT_FILE "</ljdc>\n");
close(OUT_FILE);

print "Done !\n";