From 2362fd1fd26460556dd06b11c55c179575d8eb30 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Gr=C3=A9gory=20Soutad=C3=A9?= Date: Thu, 4 Dec 2014 21:04:41 +0100 Subject: [PATCH] Fix unicode problems Add generateHTMLLink() --- display.py | 160 ++++++++++++++++-------------- iwla.py | 2 +- plugins/display/referers.py | 10 +- plugins/display/top_downloads.py | 4 +- plugins/display/top_hits.py | 4 +- plugins/display/top_pages.py | 4 +- plugins/post_analysis/referers.py | 5 +- 7 files changed, 98 insertions(+), 91 deletions(-) diff --git a/display.py b/display.py index febb705..bb14313 100644 --- a/display.py +++ b/display.py @@ -1,8 +1,9 @@ import os +import codecs class DisplayHTMLRaw(object): - def __init__(self, html=''): + def __init__(self, html=u''): self.html = html def setRawHTML(self, html): @@ -23,31 +24,31 @@ class DisplayHTMLBlock(DisplayHTMLRaw): def __init__(self, title=''): super(DisplayHTMLBlock, self).__init__(html='') self.title = title - self.cssclass = 'iwla_block' - self.title_cssclass = 'iwla_block_title' - self.value_cssclass = 'iwla_block_value' + self.cssclass = u'iwla_block' + self.title_cssclass = u'iwla_block_title' + self.value_cssclass = u'iwla_block_value' def getTitle(self): return self.title def setTitle(self, value): - self.title = value + self.title = unicode(value) def setCSSClass(self, cssclass): - self.cssclass = cssclass + self.cssclass = unicode(cssclass) def setTitleCSSClass(self, cssclass): - self.title_cssclass = cssclass + self.title_cssclass = unicode(cssclass) def setValueCSSClass(self, cssclass): - self.value_cssclass = cssclass + self.value_cssclass = unicode(cssclass) def _buildHTML(self): - html = '
' % (self.cssclass) + html = u'
' % (self.cssclass) if self.title: - html += '
%s
' % (self.title_cssclass, self.title) - html += '
%s
' % (self.value_cssclass, self.html) - html += '
' + html += u'
%s
' % (self.title_cssclass, self.title) + html += u'
%s
' % (self.value_cssclass, self.html) + html += u'
' self.html = html @@ -55,15 +56,15 @@ class DisplayHTMLBlockTable(DisplayHTMLBlock): def __init__(self, title, cols): super(DisplayHTMLBlockTable, self).__init__(title=title) - self.cols = cols + self.cols = listToStr(cols) self.rows = [] - self.cols_cssclasses = [''] * len(cols) + self.cols_cssclasses = [u''] * len(cols) self.rows_cssclasses = [] - self.table_css = 'iwla_table' + self.table_css = u'iwla_table' def appendRow(self, row): self.rows.append(listToStr(row)) - self.rows_cssclasses.append([''] * len(row)) + self.rows_cssclasses.append([u''] * len(row)) def getCellValue(self, row, col): if row < 0 or col < 0 or\ @@ -77,14 +78,14 @@ class DisplayHTMLBlockTable(DisplayHTMLBlock): row >= len(self.rows) or col >= len(self.cols): raise ValueError('Invalid indices %d,%d' % (row, col)) - self.rows[row][col] = value + self.rows[row][col] = unicode(value) def setCellCSSClass(self, row, col, value): if row < 0 or col < 0 or\ row >= len(self.rows) or col >= len(self.cols): raise ValueError('Invalid indices %d,%d' % (row, col)) - self.rows_cssclasses[row][col] = value + self.rows_cssclasses[row][col] = unicode(value) def getCellCSSClass(self, row, col): if row < 0 or col < 0 or\ @@ -103,42 +104,42 @@ class DisplayHTMLBlockTable(DisplayHTMLBlock): if row < 0 or row >= len(self.rows): raise ValueError('Invalid indice %d' % (row)) - self.rows_cssclasses[row] = [value] * len(self.rows_cssclasses[row]) + self.rows_cssclasses[row] = [unicode(value)] * len(self.rows_cssclasses[row]) def setColCSSClass(self, col, value): if col < 0 or col >= len(self.cols): raise ValueError('Invalid indice %d' % (col)) - self.cols_cssclasses[col] = value + self.cols_cssclasses[col] = unicode(value) def setColsCSSClass(self, values): if len(values) != len(self.cols): raise ValueError('Invalid values size') - self.cols_cssclasses = values + self.cols_cssclasses = [unicode(values)] * len(self.cols) def _buildHTML(self): - style = '' - if self.table_css: style = ' class="%s"' % (self.table_css) - html = '' % (style) + style = u'' + if self.table_css: style = u' class="%s"' % (self.table_css) + html = u'' % (style) if self.cols: - html += '' + html += u'' for i in range (0, len(self.cols)): title = self.cols[i] style = self.getColCSSClass(i) - if style: style = ' class="%s"' % (style) - html += '%s' % (style, title) - html += '' + if style: style = u' class="%s"' % (style) + html += u'%s' % (style, title) + html += u'' for i in range(0, len(self.rows)): row = self.rows[i] - html += '' + html += u'' for j in range(0, len(row)): v = row[j] style = self.getCellCSSClass(i, j) - if style: style = ' class="%s"' % (style) - html += '%s' % (style, v) - html += '' - html += '' + if style: style = u' class="%s"' % (style) + html += u'%s' % (style, v) + html += u'' + html += u'' self.html += html @@ -149,14 +150,15 @@ class DisplayHTMLBlockTableWithGraph(DisplayHTMLBlockTable): def __init__(self, title, cols, short_titles=None, nb_valid_rows=0, graph_cols=None): super(DisplayHTMLBlockTableWithGraph, self).__init__(title=title, cols=cols) self.short_titles = short_titles or [] + self.short_titles = listToStr(self.short_titles) self.nb_valid_rows = nb_valid_rows # TOFIX - self.icon_path = 'resources/icon' + self.icon_path = u'resources/icon' # self.icon_path = self.iwla.getConfValue('icon_path', '/') self.raw_rows = [] self.maxes = [0] * len(cols) - self.table_graph_css = 'iwla_graph_table' - self.td_img_css = 'iwla_td_img' + self.table_graph_css = u'iwla_graph_table' + self.td_img_css = u'iwla_td_img' self.graph_cols = graph_cols or [] def appendRow(self, row): @@ -164,10 +166,10 @@ class DisplayHTMLBlockTableWithGraph(DisplayHTMLBlockTable): super(DisplayHTMLBlockTableWithGraph, self).appendRow(row) def appendShortTitle(self, short_title): - self.short_titles.append(short_title) + self.short_titles.append(unicode(short_title)) def setShortTitle(self, short_titles): - self.short_titles = short_titles + self.short_titles = listToStr(short_titles) def setNbValidRows(self, nb_valid_rows): self.nb_valid_rows = nb_valid_rows @@ -180,47 +182,47 @@ class DisplayHTMLBlockTableWithGraph(DisplayHTMLBlockTable): self.maxes[j] = row[j] def _getIconFromStyle(self, style): - if style.startswith('iwla_page'): icon = 'vp.png' - elif style.startswith('iwla_hit'): icon = 'vh.png' - elif style.startswith('iwla_bandwidth'): icon = 'vk.png' - elif style.startswith('iwla_visit'): icon = 'vv.png' - elif style.startswith('iwla_search'): icon = 'vu.png' + if style.startswith(u'iwla_page'): icon = u'vp.png' + elif style.startswith(u'iwla_hit'): icon = u'vh.png' + elif style.startswith(u'iwla_bandwidth'): icon = u'vk.png' + elif style.startswith(u'iwla_visitor'): icon = u'vu.png' + elif style.startswith(u'iwla_visit'): icon = u'vv.png' else: return '' - return '%s/%s' % (self.icon_path, icon) + return u'%s/%s' % (self.icon_path, icon) def _buildHTML(self): self._computeMax() - style = '' - if self.table_graph_css: style = ' class="%s"' % (self.table_graph_css) - html = '' % (style) - html += '' + style = u'' + if self.table_graph_css: style = u' class="%s"' % (self.table_graph_css) + html = u'' % (style) + html += u'' for i in range(0, self.nb_valid_rows): row = self.rows[i] - css = '' - if self.td_img_css: css=' class="%s"' % (self.td_img_css) - html += '' % (css) + css = u'' + if self.td_img_css: css=u' class="%s"' % (self.td_img_css) + html += u'' % (css) for j in self.graph_cols: style = self.getColCSSClass(j) icon = self._getIconFromStyle(style) if not icon: continue - if style: style = ' class="%s"' % (style) - alt = '%s: %s' % (row[j], self.cols[j]) + if style: style = u' class="%s"' % (style) + alt = u'%s: %s' % (row[j], self.cols[j]) if self.maxes[j]: height = int((self.raw_rows[i][j] * 100) / self.maxes[j]) or 1 else: height = 1 - html += '' % (style, icon, height, alt, alt) - html += '' - html += '' - html += '' + html += u'' % (style, icon, height, alt, alt) + html += u'' + html += u'' + html += u'' for i in range(0, len(self.short_titles)): style = self.getCellCSSClass(i, 0) - if style: style = ' class="%s"' % (style) - html += '%s' % (style, self.short_titles[i]) - html += '' - html += '' + if style: style = u' class="%s"' % (style) + html += u'%s' % (style, self.short_titles[i]) + html += u'' + html += u'' self.html += html @@ -229,10 +231,10 @@ class DisplayHTMLBlockTableWithGraph(DisplayHTMLBlockTable): class DisplayHTMLPage(object): def __init__(self, title, filename, css_path): - self.title = title + self.title = unicode(title) self.filename = filename self.blocks = [] - self.css_path = css_path + self.css_path = listToStr(css_path) def getFilename(self): return self.filename; @@ -253,19 +255,19 @@ class DisplayHTMLPage(object): if not os.path.exists(base): os.makedirs(base) - f = open(filename, 'w') - f.write('') - f.write('') - f.write('') - f.write('') + f = codecs.open(filename, 'w', 'utf-8') + f.write(u'') + f.write(u'') + f.write(u'') + f.write(u'') for css in self.css_path: - f.write('' % (css)) + f.write(u'' % (css)) if self.title: - f.write('%s' % (self.title)) - f.write('') + f.write(u'%s' % (self.title)) + f.write(u'') for block in self.blocks: block.build(f) - f.write('') + f.write(u'') f.close() class DisplayHTMLBuild(object): @@ -295,19 +297,25 @@ class DisplayHTMLBuild(object): page.build(root) def bytesToStr(bytes): - suffixes = ['', ' kB', ' MB', ' GB', ' TB'] + suffixes = [u'', u' kB', u' MB', u' GB', u' TB'] for i in range(0, len(suffixes)): if bytes < 1024: break bytes /= 1024.0 if i: - return '%.02f%s' % (bytes, suffixes[i]) + return u'%.02f%s' % (bytes, suffixes[i]) else: - return '%d%s' % (bytes, suffixes[i]) + return u'%d%s' % (bytes, suffixes[i]) def _toStr(v): - if type(v) != str: return str(v) + if type(v) != unicode: return unicode(v) else: return v def listToStr(l): return map(lambda(v) : _toStr(v), l) + +def generateHTMLLink(url, name=None, max_length=100, prefix=u'http'): + url = unicode(url) + if not name: name = unicode(url) + if not url.startswith(prefix): url = u'%s://%s' % (prefix, url) + return u'%s' % (url, name[:max_length]) diff --git a/iwla.py b/iwla.py index 66acaf2..634f0b0 100755 --- a/iwla.py +++ b/iwla.py @@ -301,7 +301,7 @@ class IWLA(object): cols = ['Month', 'Visitors', 'Pages', 'Hits', 'Bandwidth', 'Not viewed Bandwidth', 'Details'] graph_cols=range(1,6) months = DisplayHTMLBlockTableWithGraph(title, cols, nb_valid_rows=12, graph_cols=graph_cols) - months.setColsCSSClass(['', 'iwla_visit', 'iwla_page', 'iwla_hit', 'iwla_bandwidth', 'iwla_bandwidth', '']) + months.setColsCSSClass(['', 'iwla_visitor', 'iwla_page', 'iwla_hit', 'iwla_bandwidth', 'iwla_bandwidth', '']) total = [0] * len(cols) for i in range(1, 13): month = '%s
%d' % (months_name[i], year) diff --git a/plugins/display/referers.py b/plugins/display/referers.py index 36381bc..4e28bf1 100644 --- a/plugins/display/referers.py +++ b/plugins/display/referers.py @@ -49,12 +49,12 @@ class IWLADisplayReferers(IPlugin): table.appendRow(['External URL', '', '']) for r,_ in top_referers: - row = [r, referers[r]['pages'], referers[r]['hits']] + row = [generateHTMLLink(r), referers[r]['pages'], referers[r]['hits']] table.appendRow(row) table.appendRow(['External URL (robot)', '', '']) for r,_ in top_robots_referers: - row = [r, robots_referers[r]['pages'], robots_referers[r]['hits']] + row = [generateHTMLLink(r), robots_referers[r]['pages'], robots_referers[r]['hits']] table.appendRow(row) page.appendBlock(table) @@ -77,12 +77,12 @@ class IWLADisplayReferers(IPlugin): table.appendRow(['External URL', '', '']) for r,_ in top_referers[:10]: - row = [r, referers[r]['pages'], referers[r]['hits']] + row = [generateHTMLLink(r), referers[r]['pages'], referers[r]['hits']] table.appendRow(row) table.appendRow(['External URL (robot)', '', '']) for r,_ in top_robots_referers[:10]: - row = [r, robots_referers[r]['pages'], robots_referers[r]['hits']] + row = [generateHTMLLink(r), robots_referers[r]['pages'], robots_referers[r]['hits']] table.appendRow(row) index.appendBlock(table) @@ -99,7 +99,7 @@ class IWLADisplayReferers(IPlugin): for phrase in top_key_phrases: table.appendRow([phrase[0], phrase[1]]) page.appendBlock(table) - + display.addPage(page) link = 'All key phrases' % (filename) diff --git a/plugins/display/top_downloads.py b/plugins/display/top_downloads.py index baf6a1b..9629db2 100644 --- a/plugins/display/top_downloads.py +++ b/plugins/display/top_downloads.py @@ -23,7 +23,7 @@ class IWLADisplayTopDownloads(IPlugin): table = DisplayHTMLBlockTable('All Downloads', ['URI', 'Hit']) table.setColsCSSClass(['', 'iwla_hit']) for (uri, entrance) in top_downloads: - table.appendRow([uri, entrance]) + table.appendRow([generateHTMLLink(uri), entrance]) page.appendBlock(table) self.iwla.getDisplay().addPage(page) @@ -37,5 +37,5 @@ class IWLADisplayTopDownloads(IPlugin): table = DisplayHTMLBlockTable(title, ['URI', 'Hits']) table.setColsCSSClass(['', 'iwla_hit']) for (uri, entrance) in top_downloads[:10]: - table.appendRow([uri, entrance]) + table.appendRow([generateHTMLLink(uri), entrance]) index.appendBlock(table) diff --git a/plugins/display/top_hits.py b/plugins/display/top_hits.py index 05b74eb..107204d 100644 --- a/plugins/display/top_hits.py +++ b/plugins/display/top_hits.py @@ -23,7 +23,7 @@ class IWLADisplayTopHits(IPlugin): table = DisplayHTMLBlockTable('All Hits', ['URI', 'Entrance']) table.setColsCSSClass(['', 'iwla_hit']) for (uri, entrance) in top_hits: - table.appendRow([uri, entrance]) + table.appendRow([generateHTMLLink(uri), entrance]) page.appendBlock(table) self.iwla.getDisplay().addPage(page) @@ -37,5 +37,5 @@ class IWLADisplayTopHits(IPlugin): table = DisplayHTMLBlockTable(title, ['URI', 'Entrance']) table.setColsCSSClass(['', 'iwla_hit']) for (uri, entrance) in top_hits[:10]: - table.appendRow([uri, entrance]) + table.appendRow([generateHTMLLink(uri), entrance]) index.appendBlock(table) diff --git a/plugins/display/top_pages.py b/plugins/display/top_pages.py index 5533166..23f9dfe 100644 --- a/plugins/display/top_pages.py +++ b/plugins/display/top_pages.py @@ -23,7 +23,7 @@ class IWLADisplayTopPages(IPlugin): table = DisplayHTMLBlockTable('All Pages', ['URI', 'Entrance']) table.setColsCSSClass(['', 'iwla_hit']) for (uri, entrance) in top_pages: - table.appendRow([uri, entrance]) + table.appendRow([generateHTMLLink(uri), entrance]) page.appendBlock(table) self.iwla.getDisplay().addPage(page) @@ -37,5 +37,5 @@ class IWLADisplayTopPages(IPlugin): table = DisplayHTMLBlockTable(title, ['URI', 'Entrance']) table.setColsCSSClass(['', 'iwla_hit']) for (uri, entrance) in top_pages[:10]: - table.appendRow([uri, entrance]) + table.appendRow([generateHTMLLink(uri), entrance]) index.appendBlock(table) diff --git a/plugins/post_analysis/referers.py b/plugins/post_analysis/referers.py index f7dc714..eb6fe3e 100644 --- a/plugins/post_analysis/referers.py +++ b/plugins/post_analysis/referers.py @@ -1,5 +1,5 @@ import re -import xml.sax.saxutils as saxutils +import urllib from iwla import IWLA from iplugin import IPlugin @@ -57,8 +57,7 @@ class IWLAPostAnalysisReferers(IPlugin): groups = key_phrase_re.match(p) if groups: key_phrase = groups.groupdict()['key_phrase'] - key_phrase = key_phrase.replace('+', ' ').lower() - key_phrase = saxutils.unescape(key_phrase) + key_phrase = urllib.unquote_plus(key_phrase).decode('utf8') if not key_phrase in key_phrases.keys(): key_phrases[key_phrase] = 1 else: