Fix key_phrases
This commit is contained in:
parent
e6b31fbf8a
commit
92533cc244
|
@ -1,6 +1,6 @@
|
||||||
import time
|
import time
|
||||||
import re
|
import re
|
||||||
import HTMLParser
|
import xml.sax.saxutils as saxutils
|
||||||
|
|
||||||
from iwla import IWLA
|
from iwla import IWLA
|
||||||
from iplugin import IPlugin
|
from iplugin import IPlugin
|
||||||
|
@ -13,12 +13,13 @@ class IWLADisplayReferers(IPlugin):
|
||||||
super(IWLADisplayReferers, self).__init__(iwla)
|
super(IWLADisplayReferers, self).__init__(iwla)
|
||||||
self.API_VERSION = 1
|
self.API_VERSION = 1
|
||||||
|
|
||||||
def _getSearchEngine(self, engine):
|
def _getSearchEngine(self, hashid):
|
||||||
|
#print 'Look for %s' % engine
|
||||||
for (k, e) in self.search_engines.items():
|
for (k, e) in self.search_engines.items():
|
||||||
for hashid in e['hashid']:
|
for (h,h_re) in e['hashid']:
|
||||||
if hashid.match(engine):
|
if hashid == h:
|
||||||
return k
|
return k
|
||||||
print 'Not found %s' % (engine)
|
#print 'Not found %s' % (hashid)
|
||||||
return None
|
return None
|
||||||
|
|
||||||
def load(self):
|
def load(self):
|
||||||
|
@ -28,45 +29,29 @@ class IWLADisplayReferers(IPlugin):
|
||||||
print 'domain_name required in conf'
|
print 'domain_name required in conf'
|
||||||
return False
|
return False
|
||||||
|
|
||||||
self.own_domain_re = re.compile('.*%s.*' % (domain_name))
|
self.own_domain_re = re.compile(r'.*%s.*' % (domain_name))
|
||||||
self.search_engines = {}
|
self.search_engines = {}
|
||||||
|
|
||||||
for (engine, known_url) in awstats_data.search_engines_knwown_url.items():
|
for (hashid, name) in awstats_data.search_engines_hashid.items():
|
||||||
self.search_engines[engine] = {
|
hashid_re = re.compile(r'.*%s.*' % (hashid))
|
||||||
'known_url' : re.compile(known_url + '(?P<key_phrase>.+)'),
|
if not name in self.search_engines.keys():
|
||||||
'hashid' : []
|
self.search_engines[name] = {
|
||||||
}
|
'hashid' : [(hashid, hashid_re)]
|
||||||
|
|
||||||
for (hashid, engine) in awstats_data.search_engines_hashid.items():
|
|
||||||
hashid_re = re.compile('.*%s.*' % (hashid))
|
|
||||||
if not engine in self.search_engines.keys():
|
|
||||||
self.search_engines[engine] = {
|
|
||||||
'hashid' : [hashid_re]
|
|
||||||
}
|
}
|
||||||
else:
|
else:
|
||||||
self.search_engines[engine]['hashid'].append(hashid_re)
|
self.search_engines[name]['hashid'].append((hashid, hashid_re))
|
||||||
print 'Hashid %s => %s' % (engine, hashid)
|
#print 'Hashid %s => %s' % (name, hashid)
|
||||||
|
|
||||||
|
for (name, known_url) in awstats_data.search_engines_knwown_url.items():
|
||||||
|
self.search_engines[name]['known_url'] = re.compile(known_url + '(?P<key_phrase>.+)')
|
||||||
|
|
||||||
for (engine, not_engine) in awstats_data.not_search_engines_keys.items():
|
for (engine, not_engine) in awstats_data.not_search_engines_keys.items():
|
||||||
not_engine_re = re.compile('.*%s.*' % (not_engine))
|
not_engine_re = re.compile(r'.*%s.*' % (not_engine))
|
||||||
key = self._getSearchEngine(engine)
|
key = self._getSearchEngine(engine)
|
||||||
if key:
|
if key:
|
||||||
self.search_engines[key]['not_search_engine'] = not_engine_re
|
self.search_engines[key]['not_search_engine'] = not_engine_re
|
||||||
|
|
||||||
for engine in awstats_data.search_engines:
|
#self.html_parser = html.parser.HTMLParser()
|
||||||
engine_re = re.compile('.*%s.*' % (engine), re.IGNORECASE)
|
|
||||||
key = self._getSearchEngine(engine)
|
|
||||||
if key:
|
|
||||||
self.search_engines[key]['re'] = not_engine_re
|
|
||||||
|
|
||||||
for (k,e) in self.search_engines.items():
|
|
||||||
if not 're' in e.keys():
|
|
||||||
print 'Remove %s' % k
|
|
||||||
del self.search_engines[k]
|
|
||||||
|
|
||||||
print self.search_engines
|
|
||||||
|
|
||||||
self.html_parser = HTMLParser.HTMLParser()
|
|
||||||
|
|
||||||
return True
|
return True
|
||||||
|
|
||||||
|
@ -76,12 +61,14 @@ class IWLADisplayReferers(IPlugin):
|
||||||
for p in parameters.split('&'):
|
for p in parameters.split('&'):
|
||||||
groups = key_phrase_re.match(p)
|
groups = key_phrase_re.match(p)
|
||||||
if groups:
|
if groups:
|
||||||
print groups.groupddict()
|
key_phrase = groups.groupdict()['key_phrase']
|
||||||
key_phrase = self.html_parser.unescape(groups.groupddict()['key_phrase']).lower()
|
key_phrase = key_phrase.replace('+', ' ').lower()
|
||||||
|
key_phrase = saxutils.unescape(key_phrase)
|
||||||
if not key_phrase in key_phrases.keys():
|
if not key_phrase in key_phrases.keys():
|
||||||
key_phrases[key_phrase] = 1
|
key_phrases[key_phrase] = 1
|
||||||
else:
|
else:
|
||||||
key_phrases[key_phrase] += 1
|
key_phrases[key_phrase] += 1
|
||||||
|
break
|
||||||
|
|
||||||
def hook(self, iwla):
|
def hook(self, iwla):
|
||||||
stats = iwla.getCurrentVisists()
|
stats = iwla.getCurrentVisists()
|
||||||
|
@ -99,22 +86,20 @@ class IWLADisplayReferers(IPlugin):
|
||||||
|
|
||||||
if self.own_domain_re.match(uri): continue
|
if self.own_domain_re.match(uri): continue
|
||||||
|
|
||||||
for e in self.search_engines.values():
|
for (name, engine) in self.search_engines.items():
|
||||||
if e['re'].match(uri):
|
for (hashid, hashid_re) in engine['hashid']:
|
||||||
not_engine = e.get('not_search_engine', None)
|
if not hashid_re.match(uri): continue
|
||||||
|
|
||||||
|
not_engine = engine.get('not_search_engine', None)
|
||||||
# Try not engine
|
# Try not engine
|
||||||
if not_engine and not_engine.match(uri): break
|
if not_engine and not_engine.match(uri): break
|
||||||
is_search_engine = True
|
is_search_engine = True
|
||||||
uri = e['name']
|
uri = name
|
||||||
|
|
||||||
parameters = r['extract_referer'].get('extract_parameters', None)
|
parameters = r['extract_referer'].get('extract_parameters', None)
|
||||||
key_phrase_re = e.get('known_url', None)
|
key_phrase_re = engine.get('known_url', None)
|
||||||
|
|
||||||
# print parameters
|
|
||||||
# print key_phrase_re
|
|
||||||
|
|
||||||
self._extractKeyPhrase(key_phrase_re, parameters, key_phrases)
|
self._extractKeyPhrase(key_phrase_re, parameters, key_phrases)
|
||||||
|
|
||||||
break
|
break
|
||||||
|
|
||||||
if is_search_engine:
|
if is_search_engine:
|
||||||
|
@ -218,3 +203,7 @@ class IWLADisplayReferers(IPlugin):
|
||||||
page.appendBlock(table)
|
page.appendBlock(table)
|
||||||
|
|
||||||
display.addPage(page)
|
display.addPage(page)
|
||||||
|
|
||||||
|
block = DisplayHTMLRawBlock()
|
||||||
|
block.setRawHTML('<a href=\'%s\'>All key phrases</a>' % (filename))
|
||||||
|
index.appendBlock(block)
|
||||||
|
|
Loading…
Reference in New Issue
Block a user