diff --git a/generators/archive.py b/generators/archive.py index 6f65cbe..5b8d3b0 100644 --- a/generators/archive.py +++ b/generators/archive.py @@ -37,7 +37,7 @@ class Archive(Index): #print 'Generate ' + filename nodes = dom.getElementsByTagName("*") nodes[0] = self.parse(src, hooks, articles, dom, nodes[0]) - self.writeIfNotTheSame(output + self.dirname + '/' + filename, nodes[0].toxml('utf8')) + self.writeIfNotTheSame(output + self.dirname + '/' + filename, nodes[0]) self.cur_page = self.cur_page + 1 filename = self.filename + str(self.cur_page) + '.html' dom = parse(src + '/_archive.html') diff --git a/generators/article.py b/generators/article.py index dd226d7..1eb59d3 100644 --- a/generators/article.py +++ b/generators/article.py @@ -41,7 +41,7 @@ class Article(Index): if not os.path.exists(filename): os.makedirs(filename) filename = filename + article.title_slug + '.html' - self.writeIfNotTheSame(filename, nodes[0].toxml('utf8')) + self.writeIfNotTheSame(filename, nodes[0]) dom = parse(src + '/_article.html') if not self.somethingWrote: diff --git a/generators/category.py b/generators/category.py index a7e25c0..cd61bb6 100644 --- a/generators/category.py +++ b/generators/category.py @@ -73,7 +73,7 @@ class Category(Index): #print 'Generate ' + filename nodes = dom.getElementsByTagName("*") nodes[0] = self.parse(src, hooks, articles, dom, nodes[0]) - self.writeIfNotTheSame(output + self.dirname + '/' + filename, nodes[0].toxml('utf8')) + self.writeIfNotTheSame(output + self.dirname + '/' + filename, nodes[0]) self.cur_page = self.cur_page + 1 filename = self.filename + str(self.cur_page) + '.html' dom = parse(src + '/_category.html') diff --git a/generators/generator.py b/generators/generator.py index 65da4b7..cc4d91b 100644 --- a/generators/generator.py +++ b/generators/generator.py @@ -2,10 +2,42 @@ import os import hashlib import gzip import math +import codecs from xml.dom import * from xml.dom.minidom import parse from xml.parsers.expat import * +class StrictUTF8Writer(codecs.StreamWriter): + '''A StreamWriter for utf8 that requires written objects be unicode''' + encode = codecs.utf_8_encode + value = '' + + def __init__(self): + self.value = u'' + pass + + def write(self, object): + object = object.replace('<', '<') + object = object.replace('>', '>') + object = object.replace('"', '"') + object = object.replace(''', "'") + + if not type(object) == unicode: + self.value = self.value + unicode(object, "utf-8") + else: + self.value = self.value + object + return self.value + + def reset(self): + self.value = u'' + + def getvalue(self): + return self.value + #self.stream.write(object) + # if not isinstance(object, unicode): + # raise ValueError('write() requires unicode object') + # return codecs.StreamWriter.write(self, object) + class DynastieGenerator: URI = "http://indefero.soutade.fr/p/dynastie" @@ -38,14 +70,10 @@ class DynastieGenerator: res = math.ceil((nb_articles*1.0)/(nb_articles_per_page*1.0)) return int(res) - def removeCDATA(self, content): - content = content.replace('
')
-        content = content.replace(']]>
', '') - - return content - - def writeIfNotTheSame(self, filename, content): - content = self.removeCDATA(content) + def writeIfNotTheSame(self, filename, node): + writer = StrictUTF8Writer() + node.writexml(writer) + content = writer.getvalue().encode('utf-8') if os.path.exists(filename): src_md5 = hashlib.md5() f = open(filename,'rb') diff --git a/generators/index.py b/generators/index.py index 52a5058..77ad2b5 100644 --- a/generators/index.py +++ b/generators/index.py @@ -77,16 +77,8 @@ class Index(DynastieGenerator): return f = open(filename, 'rb') - content = '
' + f.read() + '
' + article_content = f.read() f.close() - dom2 = None - try: - dom2 = parseString(content) - except ExpatError, e: - self.addError('Error parsing ' + filename) - print filename - print e - pass self.simpleTransform(values, dom, article_elem, root) @@ -96,9 +88,9 @@ class Index(DynastieGenerator): the_class = content_node.getAttribute('class') if not the_class in post_transform: continue - if the_class == 'article_content' and dom2 != None: - for article_node in dom2.firstChild.childNodes: - content_node.appendChild(article_node) + if the_class == 'article_content': + new_node = dom.createTextNode(article_content) + content_node.appendChild(new_node) def createArticles(self, articles, dom, root, node): articles_elem = self.createElement(dom, 'articles') @@ -116,6 +108,10 @@ class Index(DynastieGenerator): root.replaceChild(articles_elem, node) def createRecents(self, articles, dom, root, node): + if self.cur_article == len(articles): + root.removeChild(node) + return + if node.hasAttribute("limit"): nb_recents = int(node.getAttribute("limit")) else: @@ -179,7 +175,7 @@ class Index(DynastieGenerator): #print 'Generate ' + filename nodes = dom.getElementsByTagName("*") nodes[0] = self.parse(src, hooks, articles, dom, nodes[0]) - self.writeIfNotTheSame(output + '/' + filename, nodes[0].toxml(encoding='utf-8')) + self.writeIfNotTheSame(output + '/' + filename, nodes[0]) self.cur_page = self.cur_page + 1 filename = 'index' + str(self.cur_page) + '.html' dom = parse(src + '/_index.html')