96 lines
1.9 KiB
Plaintext
96 lines
1.9 KiB
Plaintext
|
#!/usr/bin/perl
|
||
|
|
||
|
use LWP::Simple;
|
||
|
|
||
|
sub usage() {
|
||
|
print "usage: ./ljdc in_file [out.xml]\n"
|
||
|
}
|
||
|
|
||
|
my $size = scalar(@ARGV);
|
||
|
|
||
|
if ($size == 0 || ($size == 1 && ($ARGV[0] =~ "-h" || $ARGV[0] =~ "--help" )))
|
||
|
{
|
||
|
usage();
|
||
|
exit(0)
|
||
|
}
|
||
|
|
||
|
my $in_file = $ARGV[0];
|
||
|
my $out_file = "out.xml";
|
||
|
|
||
|
if ($size == 2)
|
||
|
{
|
||
|
$out_file = $ARGV[1];
|
||
|
}
|
||
|
|
||
|
if( ! open(IN_FILE,"<$in_file") ) {
|
||
|
exit(1);
|
||
|
}
|
||
|
|
||
|
if( ! open(OUT_FILE,">:encoding(utf-8)", $out_file) ) {
|
||
|
exit(1);
|
||
|
}
|
||
|
|
||
|
print(OUT_FILE "<?xml version=\"1.0\" encoding=\"utf-8\"?>\n");
|
||
|
# print(OUT_FILE '<!DOCTYPE html PUBLIC "-//W3C//DTD XHTML 1.1//EN" "http://www.w3.org/TR/xhtml11/DTD/xhtml11.dtd">');
|
||
|
print(OUT_FILE "<ljdc>\n");
|
||
|
|
||
|
while( defined( $address = <IN_FILE> ) ) {
|
||
|
|
||
|
my $id;
|
||
|
|
||
|
chomp($address);
|
||
|
|
||
|
next if (length($address) == 0);
|
||
|
|
||
|
if ($address =~ /post\/([0-9]+)\//)
|
||
|
{
|
||
|
$id = $1;
|
||
|
}
|
||
|
else
|
||
|
{
|
||
|
print "id not found for $address\n";
|
||
|
next;
|
||
|
}
|
||
|
|
||
|
my $title = "", $img = "";
|
||
|
my $contents = get($address);
|
||
|
my @lines = split /\n/, $contents;
|
||
|
|
||
|
foreach my $line (@lines) {
|
||
|
|
||
|
if (length($title) == 0 && $line =~ /<title>Les joies du code - (.*)<\/title>/) {
|
||
|
$title = $1;
|
||
|
}
|
||
|
|
||
|
if ($line =~ /<p class="c1"><img.*src=\"([^"]+)\".*\/><\/p>/) {
|
||
|
$img = $1;
|
||
|
last;
|
||
|
}
|
||
|
|
||
|
if ($line =~ /<div class="bodytype"> <p class="centredimg"><img .*src=\"([^"]+)\".*\/>/) {
|
||
|
$img = $1;
|
||
|
last;
|
||
|
}
|
||
|
}
|
||
|
|
||
|
if (length($title) == 0 || length($img) == 0)
|
||
|
{
|
||
|
print "Error with $address\n";
|
||
|
print "Title not defined\n" if (length($title) == 0);
|
||
|
print "IMG not defined\n" if (length($img) == 0);
|
||
|
}
|
||
|
else
|
||
|
{
|
||
|
print(OUT_FILE " <entry>\n");
|
||
|
print(OUT_FILE " <id>$id</id>\n");
|
||
|
print(OUT_FILE " <address>$address</address>\n");
|
||
|
print(OUT_FILE " <title>$title</title>\n");
|
||
|
print(OUT_FILE " <img>$img</img>\n");
|
||
|
print(OUT_FILE " </entry>\n");
|
||
|
}
|
||
|
}
|
||
|
print(OUT_FILE "</ljdc>\n");
|
||
|
close(OUT_FILE);
|
||
|
|
||
|
print "Done !\n";
|