Load raw html in articles and fix a bug in createRecents
This commit is contained in:
parent
af3c792450
commit
a5c5e7edc8
|
@ -37,7 +37,7 @@ class Archive(Index):
|
|||
#print 'Generate ' + filename
|
||||
nodes = dom.getElementsByTagName("*")
|
||||
nodes[0] = self.parse(src, hooks, articles, dom, nodes[0])
|
||||
self.writeIfNotTheSame(output + self.dirname + '/' + filename, nodes[0].toxml('utf8'))
|
||||
self.writeIfNotTheSame(output + self.dirname + '/' + filename, nodes[0])
|
||||
self.cur_page = self.cur_page + 1
|
||||
filename = self.filename + str(self.cur_page) + '.html'
|
||||
dom = parse(src + '/_archive.html')
|
||||
|
|
|
@ -41,7 +41,7 @@ class Article(Index):
|
|||
if not os.path.exists(filename):
|
||||
os.makedirs(filename)
|
||||
filename = filename + article.title_slug + '.html'
|
||||
self.writeIfNotTheSame(filename, nodes[0].toxml('utf8'))
|
||||
self.writeIfNotTheSame(filename, nodes[0])
|
||||
dom = parse(src + '/_article.html')
|
||||
|
||||
if not self.somethingWrote:
|
||||
|
|
|
@ -73,7 +73,7 @@ class Category(Index):
|
|||
#print 'Generate ' + filename
|
||||
nodes = dom.getElementsByTagName("*")
|
||||
nodes[0] = self.parse(src, hooks, articles, dom, nodes[0])
|
||||
self.writeIfNotTheSame(output + self.dirname + '/' + filename, nodes[0].toxml('utf8'))
|
||||
self.writeIfNotTheSame(output + self.dirname + '/' + filename, nodes[0])
|
||||
self.cur_page = self.cur_page + 1
|
||||
filename = self.filename + str(self.cur_page) + '.html'
|
||||
dom = parse(src + '/_category.html')
|
||||
|
|
|
@ -2,10 +2,42 @@ import os
|
|||
import hashlib
|
||||
import gzip
|
||||
import math
|
||||
import codecs
|
||||
from xml.dom import *
|
||||
from xml.dom.minidom import parse
|
||||
from xml.parsers.expat import *
|
||||
|
||||
class StrictUTF8Writer(codecs.StreamWriter):
|
||||
'''A StreamWriter for utf8 that requires written objects be unicode'''
|
||||
encode = codecs.utf_8_encode
|
||||
value = ''
|
||||
|
||||
def __init__(self):
|
||||
self.value = u''
|
||||
pass
|
||||
|
||||
def write(self, object):
|
||||
object = object.replace('<', '<')
|
||||
object = object.replace('>', '>')
|
||||
object = object.replace('"', '"')
|
||||
object = object.replace(''', "'")
|
||||
|
||||
if not type(object) == unicode:
|
||||
self.value = self.value + unicode(object, "utf-8")
|
||||
else:
|
||||
self.value = self.value + object
|
||||
return self.value
|
||||
|
||||
def reset(self):
|
||||
self.value = u''
|
||||
|
||||
def getvalue(self):
|
||||
return self.value
|
||||
#self.stream.write(object)
|
||||
# if not isinstance(object, unicode):
|
||||
# raise ValueError('write() requires unicode object')
|
||||
# return codecs.StreamWriter.write(self, object)
|
||||
|
||||
class DynastieGenerator:
|
||||
|
||||
URI = "http://indefero.soutade.fr/p/dynastie"
|
||||
|
@ -38,14 +70,10 @@ class DynastieGenerator:
|
|||
res = math.ceil((nb_articles*1.0)/(nb_articles_per_page*1.0))
|
||||
return int(res)
|
||||
|
||||
def removeCDATA(self, content):
|
||||
content = content.replace('<pre><![CDATA[', '<pre>')
|
||||
content = content.replace(']]></pre>', '</pre>')
|
||||
|
||||
return content
|
||||
|
||||
def writeIfNotTheSame(self, filename, content):
|
||||
content = self.removeCDATA(content)
|
||||
def writeIfNotTheSame(self, filename, node):
|
||||
writer = StrictUTF8Writer()
|
||||
node.writexml(writer)
|
||||
content = writer.getvalue().encode('utf-8')
|
||||
if os.path.exists(filename):
|
||||
src_md5 = hashlib.md5()
|
||||
f = open(filename,'rb')
|
||||
|
|
|
@ -77,16 +77,8 @@ class Index(DynastieGenerator):
|
|||
return
|
||||
|
||||
f = open(filename, 'rb')
|
||||
content = '<div id="123">' + f.read() + '</div>'
|
||||
article_content = f.read()
|
||||
f.close()
|
||||
dom2 = None
|
||||
try:
|
||||
dom2 = parseString(content)
|
||||
except ExpatError, e:
|
||||
self.addError('Error parsing ' + filename)
|
||||
print filename
|
||||
print e
|
||||
pass
|
||||
|
||||
self.simpleTransform(values, dom, article_elem, root)
|
||||
|
||||
|
@ -96,9 +88,9 @@ class Index(DynastieGenerator):
|
|||
the_class = content_node.getAttribute('class')
|
||||
if not the_class in post_transform:
|
||||
continue
|
||||
if the_class == 'article_content' and dom2 != None:
|
||||
for article_node in dom2.firstChild.childNodes:
|
||||
content_node.appendChild(article_node)
|
||||
if the_class == 'article_content':
|
||||
new_node = dom.createTextNode(article_content)
|
||||
content_node.appendChild(new_node)
|
||||
|
||||
def createArticles(self, articles, dom, root, node):
|
||||
articles_elem = self.createElement(dom, 'articles')
|
||||
|
@ -116,6 +108,10 @@ class Index(DynastieGenerator):
|
|||
root.replaceChild(articles_elem, node)
|
||||
|
||||
def createRecents(self, articles, dom, root, node):
|
||||
if self.cur_article == len(articles):
|
||||
root.removeChild(node)
|
||||
return
|
||||
|
||||
if node.hasAttribute("limit"):
|
||||
nb_recents = int(node.getAttribute("limit"))
|
||||
else:
|
||||
|
@ -179,7 +175,7 @@ class Index(DynastieGenerator):
|
|||
#print 'Generate ' + filename
|
||||
nodes = dom.getElementsByTagName("*")
|
||||
nodes[0] = self.parse(src, hooks, articles, dom, nodes[0])
|
||||
self.writeIfNotTheSame(output + '/' + filename, nodes[0].toxml(encoding='utf-8'))
|
||||
self.writeIfNotTheSame(output + '/' + filename, nodes[0])
|
||||
self.cur_page = self.cur_page + 1
|
||||
filename = 'index' + str(self.cur_page) + '.html'
|
||||
dom = parse(src + '/_index.html')
|
||||
|
|
Loading…
Reference in New Issue
Block a user