Load raw html in articles and fix a bug in createRecents
This commit is contained in:
parent
af3c792450
commit
a5c5e7edc8
|
@ -37,7 +37,7 @@ class Archive(Index):
|
||||||
#print 'Generate ' + filename
|
#print 'Generate ' + filename
|
||||||
nodes = dom.getElementsByTagName("*")
|
nodes = dom.getElementsByTagName("*")
|
||||||
nodes[0] = self.parse(src, hooks, articles, dom, nodes[0])
|
nodes[0] = self.parse(src, hooks, articles, dom, nodes[0])
|
||||||
self.writeIfNotTheSame(output + self.dirname + '/' + filename, nodes[0].toxml('utf8'))
|
self.writeIfNotTheSame(output + self.dirname + '/' + filename, nodes[0])
|
||||||
self.cur_page = self.cur_page + 1
|
self.cur_page = self.cur_page + 1
|
||||||
filename = self.filename + str(self.cur_page) + '.html'
|
filename = self.filename + str(self.cur_page) + '.html'
|
||||||
dom = parse(src + '/_archive.html')
|
dom = parse(src + '/_archive.html')
|
||||||
|
|
|
@ -41,7 +41,7 @@ class Article(Index):
|
||||||
if not os.path.exists(filename):
|
if not os.path.exists(filename):
|
||||||
os.makedirs(filename)
|
os.makedirs(filename)
|
||||||
filename = filename + article.title_slug + '.html'
|
filename = filename + article.title_slug + '.html'
|
||||||
self.writeIfNotTheSame(filename, nodes[0].toxml('utf8'))
|
self.writeIfNotTheSame(filename, nodes[0])
|
||||||
dom = parse(src + '/_article.html')
|
dom = parse(src + '/_article.html')
|
||||||
|
|
||||||
if not self.somethingWrote:
|
if not self.somethingWrote:
|
||||||
|
|
|
@ -73,7 +73,7 @@ class Category(Index):
|
||||||
#print 'Generate ' + filename
|
#print 'Generate ' + filename
|
||||||
nodes = dom.getElementsByTagName("*")
|
nodes = dom.getElementsByTagName("*")
|
||||||
nodes[0] = self.parse(src, hooks, articles, dom, nodes[0])
|
nodes[0] = self.parse(src, hooks, articles, dom, nodes[0])
|
||||||
self.writeIfNotTheSame(output + self.dirname + '/' + filename, nodes[0].toxml('utf8'))
|
self.writeIfNotTheSame(output + self.dirname + '/' + filename, nodes[0])
|
||||||
self.cur_page = self.cur_page + 1
|
self.cur_page = self.cur_page + 1
|
||||||
filename = self.filename + str(self.cur_page) + '.html'
|
filename = self.filename + str(self.cur_page) + '.html'
|
||||||
dom = parse(src + '/_category.html')
|
dom = parse(src + '/_category.html')
|
||||||
|
|
|
@ -2,10 +2,42 @@ import os
|
||||||
import hashlib
|
import hashlib
|
||||||
import gzip
|
import gzip
|
||||||
import math
|
import math
|
||||||
|
import codecs
|
||||||
from xml.dom import *
|
from xml.dom import *
|
||||||
from xml.dom.minidom import parse
|
from xml.dom.minidom import parse
|
||||||
from xml.parsers.expat import *
|
from xml.parsers.expat import *
|
||||||
|
|
||||||
|
class StrictUTF8Writer(codecs.StreamWriter):
|
||||||
|
'''A StreamWriter for utf8 that requires written objects be unicode'''
|
||||||
|
encode = codecs.utf_8_encode
|
||||||
|
value = ''
|
||||||
|
|
||||||
|
def __init__(self):
|
||||||
|
self.value = u''
|
||||||
|
pass
|
||||||
|
|
||||||
|
def write(self, object):
|
||||||
|
object = object.replace('<', '<')
|
||||||
|
object = object.replace('>', '>')
|
||||||
|
object = object.replace('"', '"')
|
||||||
|
object = object.replace(''', "'")
|
||||||
|
|
||||||
|
if not type(object) == unicode:
|
||||||
|
self.value = self.value + unicode(object, "utf-8")
|
||||||
|
else:
|
||||||
|
self.value = self.value + object
|
||||||
|
return self.value
|
||||||
|
|
||||||
|
def reset(self):
|
||||||
|
self.value = u''
|
||||||
|
|
||||||
|
def getvalue(self):
|
||||||
|
return self.value
|
||||||
|
#self.stream.write(object)
|
||||||
|
# if not isinstance(object, unicode):
|
||||||
|
# raise ValueError('write() requires unicode object')
|
||||||
|
# return codecs.StreamWriter.write(self, object)
|
||||||
|
|
||||||
class DynastieGenerator:
|
class DynastieGenerator:
|
||||||
|
|
||||||
URI = "http://indefero.soutade.fr/p/dynastie"
|
URI = "http://indefero.soutade.fr/p/dynastie"
|
||||||
|
@ -38,14 +70,10 @@ class DynastieGenerator:
|
||||||
res = math.ceil((nb_articles*1.0)/(nb_articles_per_page*1.0))
|
res = math.ceil((nb_articles*1.0)/(nb_articles_per_page*1.0))
|
||||||
return int(res)
|
return int(res)
|
||||||
|
|
||||||
def removeCDATA(self, content):
|
def writeIfNotTheSame(self, filename, node):
|
||||||
content = content.replace('<pre><![CDATA[', '<pre>')
|
writer = StrictUTF8Writer()
|
||||||
content = content.replace(']]></pre>', '</pre>')
|
node.writexml(writer)
|
||||||
|
content = writer.getvalue().encode('utf-8')
|
||||||
return content
|
|
||||||
|
|
||||||
def writeIfNotTheSame(self, filename, content):
|
|
||||||
content = self.removeCDATA(content)
|
|
||||||
if os.path.exists(filename):
|
if os.path.exists(filename):
|
||||||
src_md5 = hashlib.md5()
|
src_md5 = hashlib.md5()
|
||||||
f = open(filename,'rb')
|
f = open(filename,'rb')
|
||||||
|
|
|
@ -77,16 +77,8 @@ class Index(DynastieGenerator):
|
||||||
return
|
return
|
||||||
|
|
||||||
f = open(filename, 'rb')
|
f = open(filename, 'rb')
|
||||||
content = '<div id="123">' + f.read() + '</div>'
|
article_content = f.read()
|
||||||
f.close()
|
f.close()
|
||||||
dom2 = None
|
|
||||||
try:
|
|
||||||
dom2 = parseString(content)
|
|
||||||
except ExpatError, e:
|
|
||||||
self.addError('Error parsing ' + filename)
|
|
||||||
print filename
|
|
||||||
print e
|
|
||||||
pass
|
|
||||||
|
|
||||||
self.simpleTransform(values, dom, article_elem, root)
|
self.simpleTransform(values, dom, article_elem, root)
|
||||||
|
|
||||||
|
@ -96,9 +88,9 @@ class Index(DynastieGenerator):
|
||||||
the_class = content_node.getAttribute('class')
|
the_class = content_node.getAttribute('class')
|
||||||
if not the_class in post_transform:
|
if not the_class in post_transform:
|
||||||
continue
|
continue
|
||||||
if the_class == 'article_content' and dom2 != None:
|
if the_class == 'article_content':
|
||||||
for article_node in dom2.firstChild.childNodes:
|
new_node = dom.createTextNode(article_content)
|
||||||
content_node.appendChild(article_node)
|
content_node.appendChild(new_node)
|
||||||
|
|
||||||
def createArticles(self, articles, dom, root, node):
|
def createArticles(self, articles, dom, root, node):
|
||||||
articles_elem = self.createElement(dom, 'articles')
|
articles_elem = self.createElement(dom, 'articles')
|
||||||
|
@ -116,6 +108,10 @@ class Index(DynastieGenerator):
|
||||||
root.replaceChild(articles_elem, node)
|
root.replaceChild(articles_elem, node)
|
||||||
|
|
||||||
def createRecents(self, articles, dom, root, node):
|
def createRecents(self, articles, dom, root, node):
|
||||||
|
if self.cur_article == len(articles):
|
||||||
|
root.removeChild(node)
|
||||||
|
return
|
||||||
|
|
||||||
if node.hasAttribute("limit"):
|
if node.hasAttribute("limit"):
|
||||||
nb_recents = int(node.getAttribute("limit"))
|
nb_recents = int(node.getAttribute("limit"))
|
||||||
else:
|
else:
|
||||||
|
@ -179,7 +175,7 @@ class Index(DynastieGenerator):
|
||||||
#print 'Generate ' + filename
|
#print 'Generate ' + filename
|
||||||
nodes = dom.getElementsByTagName("*")
|
nodes = dom.getElementsByTagName("*")
|
||||||
nodes[0] = self.parse(src, hooks, articles, dom, nodes[0])
|
nodes[0] = self.parse(src, hooks, articles, dom, nodes[0])
|
||||||
self.writeIfNotTheSame(output + '/' + filename, nodes[0].toxml(encoding='utf-8'))
|
self.writeIfNotTheSame(output + '/' + filename, nodes[0])
|
||||||
self.cur_page = self.cur_page + 1
|
self.cur_page = self.cur_page + 1
|
||||||
filename = 'index' + str(self.cur_page) + '.html'
|
filename = 'index' + str(self.cur_page) + '.html'
|
||||||
dom = parse(src + '/_index.html')
|
dom = parse(src + '/_index.html')
|
||||||
|
|
Loading…
Reference in New Issue
Block a user