Load raw html in articles and fix a bug in createRecents
This commit is contained in:
		| @@ -37,7 +37,7 @@ class Archive(Index): | ||||
|             #print 'Generate ' + filename | ||||
|             nodes = dom.getElementsByTagName("*") | ||||
|             nodes[0] = self.parse(src, hooks, articles, dom, nodes[0]) | ||||
|             self.writeIfNotTheSame(output + self.dirname + '/' + filename, nodes[0].toxml('utf8')) | ||||
|             self.writeIfNotTheSame(output + self.dirname + '/' + filename, nodes[0]) | ||||
|             self.cur_page = self.cur_page + 1 | ||||
|             filename = self.filename + str(self.cur_page) + '.html' | ||||
|             dom = parse(src + '/_archive.html') | ||||
|   | ||||
| @@ -41,7 +41,7 @@ class Article(Index): | ||||
|             if not os.path.exists(filename): | ||||
|                 os.makedirs(filename) | ||||
|             filename = filename + article.title_slug + '.html' | ||||
|             self.writeIfNotTheSame(filename, nodes[0].toxml('utf8')) | ||||
|             self.writeIfNotTheSame(filename, nodes[0]) | ||||
|             dom = parse(src + '/_article.html') | ||||
|  | ||||
|         if not self.somethingWrote: | ||||
|   | ||||
| @@ -73,7 +73,7 @@ class Category(Index): | ||||
|                 #print 'Generate ' + filename | ||||
|                 nodes = dom.getElementsByTagName("*") | ||||
|                 nodes[0] = self.parse(src, hooks, articles, dom, nodes[0]) | ||||
|                 self.writeIfNotTheSame(output + self.dirname + '/' + filename, nodes[0].toxml('utf8')) | ||||
|                 self.writeIfNotTheSame(output + self.dirname + '/' + filename, nodes[0]) | ||||
|                 self.cur_page = self.cur_page + 1 | ||||
|                 filename = self.filename + str(self.cur_page) + '.html' | ||||
|                 dom = parse(src + '/_category.html') | ||||
|   | ||||
| @@ -2,10 +2,42 @@ import os | ||||
| import hashlib | ||||
| import gzip | ||||
| import math | ||||
| import codecs | ||||
| from xml.dom import * | ||||
| from xml.dom.minidom import parse | ||||
| from xml.parsers.expat import * | ||||
|  | ||||
| class StrictUTF8Writer(codecs.StreamWriter): | ||||
|     '''A StreamWriter for utf8 that requires written objects be unicode''' | ||||
|     encode = codecs.utf_8_encode | ||||
|     value = '' | ||||
|  | ||||
|     def __init__(self): | ||||
|         self.value = u'' | ||||
|         pass | ||||
|  | ||||
|     def write(self, object): | ||||
|         object = object.replace('<', '<') | ||||
|         object = object.replace('>', '>') | ||||
|         object = object.replace('"', '"') | ||||
|         object = object.replace(''', "'") | ||||
|  | ||||
|         if not type(object) == unicode: | ||||
|             self.value = self.value + unicode(object, "utf-8") | ||||
|         else: | ||||
|             self.value = self.value + object | ||||
|         return self.value | ||||
|  | ||||
|     def reset(self): | ||||
|         self.value = u'' | ||||
|  | ||||
|     def getvalue(self): | ||||
|         return self.value | ||||
|         #self.stream.write(object) | ||||
|         # if not isinstance(object, unicode): | ||||
|         #     raise ValueError('write() requires unicode object') | ||||
|         # return codecs.StreamWriter.write(self, object) | ||||
|  | ||||
| class DynastieGenerator: | ||||
|  | ||||
|     URI = "http://indefero.soutade.fr/p/dynastie" | ||||
| @@ -38,14 +70,10 @@ class DynastieGenerator: | ||||
|         res = math.ceil((nb_articles*1.0)/(nb_articles_per_page*1.0)) | ||||
|         return int(res) | ||||
|  | ||||
|     def removeCDATA(self, content): | ||||
|         content = content.replace('<pre><![CDATA[', '<pre>') | ||||
|         content = content.replace(']]></pre>', '</pre>') | ||||
|  | ||||
|         return content | ||||
|  | ||||
|     def writeIfNotTheSame(self, filename, content): | ||||
|         content = self.removeCDATA(content) | ||||
|     def writeIfNotTheSame(self, filename, node): | ||||
|         writer = StrictUTF8Writer() | ||||
|         node.writexml(writer) | ||||
|         content = writer.getvalue().encode('utf-8') | ||||
|         if os.path.exists(filename): | ||||
|             src_md5 = hashlib.md5() | ||||
|             f = open(filename,'rb')  | ||||
|   | ||||
| @@ -77,16 +77,8 @@ class Index(DynastieGenerator): | ||||
|             return | ||||
|  | ||||
|         f = open(filename, 'rb') | ||||
|         content = '<div id="123">' + f.read() + '</div>' | ||||
|         article_content = f.read() | ||||
|         f.close() | ||||
|         dom2 = None | ||||
|         try: | ||||
|             dom2 = parseString(content) | ||||
|         except ExpatError, e: | ||||
|             self.addError('Error parsing ' + filename) | ||||
|             print filename | ||||
|             print e | ||||
|             pass | ||||
|  | ||||
|         self.simpleTransform(values, dom, article_elem, root) | ||||
|  | ||||
| @@ -96,9 +88,9 @@ class Index(DynastieGenerator): | ||||
|             the_class = content_node.getAttribute('class') | ||||
|             if not the_class in post_transform: | ||||
|                 continue | ||||
|             if the_class == 'article_content' and dom2 != None: | ||||
|                 for article_node in dom2.firstChild.childNodes: | ||||
|                     content_node.appendChild(article_node) | ||||
|             if the_class == 'article_content': | ||||
|                 new_node = dom.createTextNode(article_content) | ||||
|                 content_node.appendChild(new_node) | ||||
|  | ||||
|     def createArticles(self, articles, dom, root, node): | ||||
|         articles_elem = self.createElement(dom, 'articles') | ||||
| @@ -116,6 +108,10 @@ class Index(DynastieGenerator): | ||||
|         root.replaceChild(articles_elem, node) | ||||
|  | ||||
|     def createRecents(self, articles, dom, root, node): | ||||
|         if self.cur_article == len(articles): | ||||
|             root.removeChild(node) | ||||
|             return | ||||
|  | ||||
|         if node.hasAttribute("limit"): | ||||
|             nb_recents = int(node.getAttribute("limit")) | ||||
|         else: | ||||
| @@ -179,7 +175,7 @@ class Index(DynastieGenerator): | ||||
|             #print 'Generate ' + filename | ||||
|             nodes = dom.getElementsByTagName("*") | ||||
|             nodes[0] = self.parse(src, hooks, articles, dom, nodes[0]) | ||||
|             self.writeIfNotTheSame(output + '/' + filename, nodes[0].toxml(encoding='utf-8')) | ||||
|             self.writeIfNotTheSame(output + '/' + filename, nodes[0]) | ||||
|             self.cur_page = self.cur_page + 1 | ||||
|             filename = 'index' + str(self.cur_page) + '.html' | ||||
|             dom = parse(src + '/_index.html') | ||||
|   | ||||
		Reference in New Issue
	
	Block a user