diff --git a/plugins/display/referers.py b/plugins/display/referers.py index a279c4a..8c5d23d 100644 --- a/plugins/display/referers.py +++ b/plugins/display/referers.py @@ -1,6 +1,6 @@ import time import re -import HTMLParser +import xml.sax.saxutils as saxutils from iwla import IWLA from iplugin import IPlugin @@ -13,12 +13,13 @@ class IWLADisplayReferers(IPlugin): super(IWLADisplayReferers, self).__init__(iwla) self.API_VERSION = 1 - def _getSearchEngine(self, engine): + def _getSearchEngine(self, hashid): + #print 'Look for %s' % engine for (k, e) in self.search_engines.items(): - for hashid in e['hashid']: - if hashid.match(engine): + for (h,h_re) in e['hashid']: + if hashid == h: return k - print 'Not found %s' % (engine) + #print 'Not found %s' % (hashid) return None def load(self): @@ -28,45 +29,29 @@ class IWLADisplayReferers(IPlugin): print 'domain_name required in conf' return False - self.own_domain_re = re.compile('.*%s.*' % (domain_name)) + self.own_domain_re = re.compile(r'.*%s.*' % (domain_name)) self.search_engines = {} - for (engine, known_url) in awstats_data.search_engines_knwown_url.items(): - self.search_engines[engine] = { - 'known_url' : re.compile(known_url + '(?P.+)'), - 'hashid' : [] - } - - for (hashid, engine) in awstats_data.search_engines_hashid.items(): - hashid_re = re.compile('.*%s.*' % (hashid)) - if not engine in self.search_engines.keys(): - self.search_engines[engine] = { - 'hashid' : [hashid_re] + for (hashid, name) in awstats_data.search_engines_hashid.items(): + hashid_re = re.compile(r'.*%s.*' % (hashid)) + if not name in self.search_engines.keys(): + self.search_engines[name] = { + 'hashid' : [(hashid, hashid_re)] } else: - self.search_engines[engine]['hashid'].append(hashid_re) - print 'Hashid %s => %s' % (engine, hashid) + self.search_engines[name]['hashid'].append((hashid, hashid_re)) + #print 'Hashid %s => %s' % (name, hashid) + + for (name, known_url) in awstats_data.search_engines_knwown_url.items(): + self.search_engines[name]['known_url'] = re.compile(known_url + '(?P.+)') for (engine, not_engine) in awstats_data.not_search_engines_keys.items(): - not_engine_re = re.compile('.*%s.*' % (not_engine)) + not_engine_re = re.compile(r'.*%s.*' % (not_engine)) key = self._getSearchEngine(engine) if key: self.search_engines[key]['not_search_engine'] = not_engine_re - for engine in awstats_data.search_engines: - engine_re = re.compile('.*%s.*' % (engine), re.IGNORECASE) - key = self._getSearchEngine(engine) - if key: - self.search_engines[key]['re'] = not_engine_re - - for (k,e) in self.search_engines.items(): - if not 're' in e.keys(): - print 'Remove %s' % k - del self.search_engines[k] - - print self.search_engines - - self.html_parser = HTMLParser.HTMLParser() + #self.html_parser = html.parser.HTMLParser() return True @@ -76,12 +61,14 @@ class IWLADisplayReferers(IPlugin): for p in parameters.split('&'): groups = key_phrase_re.match(p) if groups: - print groups.groupddict() - key_phrase = self.html_parser.unescape(groups.groupddict()['key_phrase']).lower() + key_phrase = groups.groupdict()['key_phrase'] + key_phrase = key_phrase.replace('+', ' ').lower() + key_phrase = saxutils.unescape(key_phrase) if not key_phrase in key_phrases.keys(): key_phrases[key_phrase] = 1 else: key_phrases[key_phrase] += 1 + break def hook(self, iwla): stats = iwla.getCurrentVisists() @@ -99,22 +86,20 @@ class IWLADisplayReferers(IPlugin): if self.own_domain_re.match(uri): continue - for e in self.search_engines.values(): - if e['re'].match(uri): - not_engine = e.get('not_search_engine', None) + for (name, engine) in self.search_engines.items(): + for (hashid, hashid_re) in engine['hashid']: + if not hashid_re.match(uri): continue + + not_engine = engine.get('not_search_engine', None) # Try not engine if not_engine and not_engine.match(uri): break is_search_engine = True - uri = e['name'] + uri = name parameters = r['extract_referer'].get('extract_parameters', None) - key_phrase_re = e.get('known_url', None) - - # print parameters - # print key_phrase_re + key_phrase_re = engine.get('known_url', None) self._extractKeyPhrase(key_phrase_re, parameters, key_phrases) - break if is_search_engine: @@ -218,3 +203,7 @@ class IWLADisplayReferers(IPlugin): page.appendBlock(table) display.addPage(page) + + block = DisplayHTMLRawBlock() + block.setRawHTML('All key phrases' % (filename)) + index.appendBlock(block)