import os import hashlib import gzip import math import codecs from xml.dom import * from xml.dom.minidom import parse from xml.parsers.expat import * class StrictUTF8Writer(codecs.StreamWriter): '''A StreamWriter for utf8 that requires written objects be unicode''' encode = codecs.utf_8_encode value = '' def __init__(self): self.value = u'' pass def write(self, object): object = object.replace('<', '<') object = object.replace('>', '>') object = object.replace('"', '"') object = object.replace(''', "'") object = object.replace('&', '&') if not type(object) == unicode: self.value = self.value + unicode(object, 'utf-8') else: self.value = self.value + object return self.value def reset(self): self.value = u'' def getvalue(self): return self.value #self.stream.write(object) # if not isinstance(object, unicode): # raise ValueError('write() requires unicode object') # return codecs.StreamWriter.write(self, object) class DynastieGenerator: URI = "http://indefero.soutade.fr/p/dynastie" report = '' somethingWrote = False def __init__(self): self.report = '' self.somethingWrote = False def addReport(self, string, color=''): if color != '': self.report = self.report + '' self.report = self.report + '' + self.__class__.__name__ + ' : ' self.report = self.report + string if color != '': self.report = self.report + '' self.report = self.report + '
\n' def addWarning(self, string): self.addReport(string, 'yellow') def addError(self, string): self.addReport(string, 'red') def generate(self, blog, src, output): return def computeNbPages(self, nb_post, nb_post_per_page): res = math.ceil((nb_post*1.0)/(nb_post_per_page*1.0)) return int(res) def writeIfNotTheSame(self, filename, node): writer = StrictUTF8Writer() node.writexml(writer) content = writer.getvalue().encode('utf-8') if os.path.exists(filename): src_md5 = hashlib.md5() f = open(filename,'rb') src_md5.update(f.read()) f.close() dst_md5 = hashlib.md5() dst_md5.update(content) if src_md5.digest() == dst_md5.digest(): filename = filename + '.gz' if not os.path.exists(filename): f = gzip.open(filename, 'wb') f.write(content) f.close() return os.unlink(filename) self.addReport('Write (and compress) ' + filename) f = open(filename,'wb') f.write(content) f.close() filename = filename + '.gz' #self.addReport('Compressing it ' + filename) f = gzip.open(filename, 'wb') f.write(content) f.close() self.somethingWrote = True def createLinkElem(self, dom, path, title): link_elem = dom.createElement('a') link_elem.setAttribute('href', path) text_elem = dom.createTextNode(title) link_elem.appendChild(text_elem) return link_elem def createElement(self, dom, name='', content=''): div = dom.createElement('div') if name != '': div.setAttribute('class', name) if content != '': div.appendChild(dom.createTextNode(content)) return div def createMeta(self, dom, name='', content=''): div = dom.createElement('meta') if name != '': div.setAttribute('name', name) if content != '': div.setAttribute('content', content) return div def simpleTransform(self, values, dom, elem, root): for node in root.childNodes: if node.prefix == 'dyn': if node.localName in values: content = values[node.localName] if type(content) == unicode or type(content) == str: new_elem = self.createElement(dom, node.localName, content) else: new_elem = self.createElement(dom, node.localName) new_elem.appendChild(content) else: new_elem = node.cloneNode(False) self.simpleTransform(values, dom, new_elem, node) else: new_elem = node.cloneNode(False) self.simpleTransform(values, dom, new_elem, node) elem.appendChild(new_elem) def replaceByText(self, dom, root, node, content): new_node = dom.createTextNode(content) root.replaceChild(new_node, node) def _parse(self, hooks, posts, dom, root): for node in root.childNodes: if node.prefix == 'dyn': if node.localName in hooks: node = hooks[node.localName](posts, dom, root, node) if not node is None and node.hasChildNodes(): self._parse(hooks, posts, dom, node) def parse(self, src, hooks, posts, dom, root): bases = dom.getElementsByTagNameNS(self.URI, 'base') if len(bases) == 0: self._parse(hooks, posts, dom, root) return root if len(bases) != 1: self.addError('More than one base defined') return root base = bases[0] if not base.hasAttribute('file'): self.addError('No \'file\' attribute defined') return root filename = base.getAttribute('file') if not base.hasAttribute('block'): self.addError('No \'block\' attribute defined') return root target_block = base.getAttribute('block') if not os.path.exists(src + '/' + filename): self.addError('Base ' + filename + ' doesn\'t exists') return root dom2 = root try: dom2 = parse(src + '/' + filename) except ExpatError, e: self.addError('Error parsing ' + src + '/' + filename) return root blocks = dom2.getElementsByTagNameNS(self.URI, 'block') block_found = False for block in blocks: if not block.hasAttribute('name'): self.addError('block has no attribute \'name\' in ' + filename) return root blockname = block.getAttribute('name') if blockname != target_block: continue for child in root.childNodes: block.parentNode.appendChild(child.cloneNode(True)) block.parentNode.removeChild(block) block_found = True if not block_found: self.addError('Block ' + target_block + ' not found in ' + src + '/' + filename) return root root = dom2.firstChild self.parse(src, hooks, posts, dom2, root) return root