Fix key_phrases
This commit is contained in:
		| @@ -1,6 +1,6 @@ | ||||
| import time | ||||
| import re | ||||
| import HTMLParser | ||||
| import xml.sax.saxutils as saxutils | ||||
|  | ||||
| from iwla import IWLA | ||||
| from iplugin import IPlugin | ||||
| @@ -13,12 +13,13 @@ class IWLADisplayReferers(IPlugin): | ||||
|         super(IWLADisplayReferers, self).__init__(iwla) | ||||
|         self.API_VERSION = 1 | ||||
|  | ||||
|     def _getSearchEngine(self, engine): | ||||
|     def _getSearchEngine(self, hashid): | ||||
|         #print 'Look for %s' % engine | ||||
|         for (k, e) in self.search_engines.items(): | ||||
|             for hashid in e['hashid']: | ||||
|                 if hashid.match(engine): | ||||
|             for (h,h_re) in e['hashid']: | ||||
|                 if hashid == h: | ||||
|                     return k | ||||
|         print 'Not found %s' % (engine) | ||||
|         #print 'Not found %s' % (hashid) | ||||
|         return None | ||||
|  | ||||
|     def load(self): | ||||
| @@ -28,45 +29,29 @@ class IWLADisplayReferers(IPlugin): | ||||
|             print 'domain_name required in conf' | ||||
|             return False | ||||
|  | ||||
|         self.own_domain_re = re.compile('.*%s.*' % (domain_name)) | ||||
|         self.own_domain_re = re.compile(r'.*%s.*' % (domain_name)) | ||||
|         self.search_engines = {} | ||||
|  | ||||
|         for (engine, known_url) in awstats_data.search_engines_knwown_url.items(): | ||||
|             self.search_engines[engine] = { | ||||
|                 'known_url' : re.compile(known_url + '(?P<key_phrase>.+)'), | ||||
|                 'hashid' : [] | ||||
|                 } | ||||
|          | ||||
|         for (hashid, engine) in awstats_data.search_engines_hashid.items(): | ||||
|             hashid_re = re.compile('.*%s.*' % (hashid)) | ||||
|             if not engine in self.search_engines.keys(): | ||||
|                 self.search_engines[engine] = { | ||||
|                     'hashid' : [hashid_re] | ||||
|         for (hashid, name) in awstats_data.search_engines_hashid.items(): | ||||
|             hashid_re = re.compile(r'.*%s.*' % (hashid)) | ||||
|             if not name in self.search_engines.keys(): | ||||
|                 self.search_engines[name] = { | ||||
|                     'hashid' : [(hashid, hashid_re)] | ||||
|                     } | ||||
|             else: | ||||
|                 self.search_engines[engine]['hashid'].append(hashid_re) | ||||
|             print 'Hashid %s => %s' % (engine, hashid) | ||||
|                 self.search_engines[name]['hashid'].append((hashid, hashid_re)) | ||||
|             #print 'Hashid %s => %s' % (name, hashid) | ||||
|  | ||||
|         for (name, known_url) in awstats_data.search_engines_knwown_url.items(): | ||||
|             self.search_engines[name]['known_url'] = re.compile(known_url + '(?P<key_phrase>.+)') | ||||
|  | ||||
|         for (engine, not_engine) in awstats_data.not_search_engines_keys.items(): | ||||
|             not_engine_re = re.compile('.*%s.*' % (not_engine)) | ||||
|             not_engine_re = re.compile(r'.*%s.*' % (not_engine)) | ||||
|             key = self._getSearchEngine(engine) | ||||
|             if key: | ||||
|                 self.search_engines[key]['not_search_engine'] = not_engine_re | ||||
|  | ||||
|         for engine in awstats_data.search_engines: | ||||
|             engine_re = re.compile('.*%s.*' % (engine), re.IGNORECASE) | ||||
|             key = self._getSearchEngine(engine) | ||||
|             if key: | ||||
|                 self.search_engines[key]['re'] = not_engine_re | ||||
|  | ||||
|         for (k,e) in self.search_engines.items(): | ||||
|             if not 're' in e.keys(): | ||||
|                 print 'Remove %s' % k | ||||
|                 del self.search_engines[k] | ||||
|  | ||||
|         print self.search_engines | ||||
|  | ||||
|         self.html_parser = HTMLParser.HTMLParser() | ||||
|         #self.html_parser = html.parser.HTMLParser() | ||||
|  | ||||
|         return True | ||||
|  | ||||
| @@ -76,12 +61,14 @@ class IWLADisplayReferers(IPlugin): | ||||
|         for p in parameters.split('&'): | ||||
|             groups = key_phrase_re.match(p) | ||||
|             if groups: | ||||
|                 print groups.groupddict() | ||||
|                 key_phrase = self.html_parser.unescape(groups.groupddict()['key_phrase']).lower() | ||||
|                 key_phrase = groups.groupdict()['key_phrase'] | ||||
|                 key_phrase = key_phrase.replace('+', ' ').lower() | ||||
|                 key_phrase = saxutils.unescape(key_phrase) | ||||
|                 if not key_phrase in key_phrases.keys(): | ||||
|                     key_phrases[key_phrase] = 1 | ||||
|                 else: | ||||
|                     key_phrases[key_phrase] += 1 | ||||
|                 break | ||||
|  | ||||
|     def hook(self, iwla): | ||||
|         stats = iwla.getCurrentVisists() | ||||
| @@ -99,22 +86,20 @@ class IWLADisplayReferers(IPlugin): | ||||
|  | ||||
|                 if self.own_domain_re.match(uri): continue | ||||
|  | ||||
|                 for e in self.search_engines.values(): | ||||
|                     if e['re'].match(uri): | ||||
|                         not_engine = e.get('not_search_engine', None) | ||||
|                 for (name, engine) in self.search_engines.items(): | ||||
|                     for (hashid, hashid_re) in engine['hashid']: | ||||
|                         if not hashid_re.match(uri): continue | ||||
|  | ||||
|                         not_engine = engine.get('not_search_engine', None) | ||||
|                         # Try not engine | ||||
|                         if not_engine and not_engine.match(uri): break | ||||
|                         is_search_engine = True | ||||
|                         uri = e['name'] | ||||
|                         uri = name | ||||
|  | ||||
|                         parameters = r['extract_referer'].get('extract_parameters', None) | ||||
|                         key_phrase_re = e.get('known_url', None) | ||||
|                          | ||||
|                         # print parameters | ||||
|                         # print key_phrase_re | ||||
|                         key_phrase_re = engine.get('known_url', None) | ||||
|  | ||||
|                         self._extractKeyPhrase(key_phrase_re, parameters, key_phrases) | ||||
|  | ||||
|                         break | ||||
|  | ||||
|                 if is_search_engine: | ||||
| @@ -218,3 +203,7 @@ class IWLADisplayReferers(IPlugin): | ||||
|         page.appendBlock(table) | ||||
|                          | ||||
|         display.addPage(page) | ||||
|  | ||||
|         block = DisplayHTMLRawBlock() | ||||
|         block.setRawHTML('<a href=\'%s\'>All key phrases</a>' % (filename)) | ||||
|         index.appendBlock(block) | ||||
|   | ||||
		Reference in New Issue
	
	Block a user