WIP
This commit is contained in:
parent
b11411f115
commit
e6b31fbf8a
File diff suppressed because one or more lines are too long
|
@ -24,7 +24,7 @@ sub dumpList {
|
||||||
{
|
{
|
||||||
$first = 0;
|
$first = 0;
|
||||||
}
|
}
|
||||||
print $FIC "'.*$r.*'";
|
print $FIC "'$r'";
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -44,7 +44,7 @@ sub dumpHash {
|
||||||
{
|
{
|
||||||
$first = 0;
|
$first = 0;
|
||||||
}
|
}
|
||||||
print $FIC "'.*$k.*' : '$v'";
|
print $FIC "'$k' : '$v'";
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
|
@ -13,6 +13,14 @@ class IWLADisplayReferers(IPlugin):
|
||||||
super(IWLADisplayReferers, self).__init__(iwla)
|
super(IWLADisplayReferers, self).__init__(iwla)
|
||||||
self.API_VERSION = 1
|
self.API_VERSION = 1
|
||||||
|
|
||||||
|
def _getSearchEngine(self, engine):
|
||||||
|
for (k, e) in self.search_engines.items():
|
||||||
|
for hashid in e['hashid']:
|
||||||
|
if hashid.match(engine):
|
||||||
|
return k
|
||||||
|
print 'Not found %s' % (engine)
|
||||||
|
return None
|
||||||
|
|
||||||
def load(self):
|
def load(self):
|
||||||
domain_name = self.iwla.getConfValue('domain_name', '')
|
domain_name = self.iwla.getConfValue('domain_name', '')
|
||||||
|
|
||||||
|
@ -23,26 +31,40 @@ class IWLADisplayReferers(IPlugin):
|
||||||
self.own_domain_re = re.compile('.*%s.*' % (domain_name))
|
self.own_domain_re = re.compile('.*%s.*' % (domain_name))
|
||||||
self.search_engines = {}
|
self.search_engines = {}
|
||||||
|
|
||||||
for engine in awstats_data.search_engines:
|
for (engine, known_url) in awstats_data.search_engines_knwown_url.items():
|
||||||
self.search_engines[engine] = {
|
self.search_engines[engine] = {
|
||||||
're' : re.compile(engine, re.IGNORECASE)
|
'known_url' : re.compile(known_url + '(?P<key_phrase>.+)'),
|
||||||
|
'hashid' : []
|
||||||
}
|
}
|
||||||
|
|
||||||
|
for (hashid, engine) in awstats_data.search_engines_hashid.items():
|
||||||
|
hashid_re = re.compile('.*%s.*' % (hashid))
|
||||||
|
if not engine in self.search_engines.keys():
|
||||||
|
self.search_engines[engine] = {
|
||||||
|
'hashid' : [hashid_re]
|
||||||
|
}
|
||||||
|
else:
|
||||||
|
self.search_engines[engine]['hashid'].append(hashid_re)
|
||||||
|
print 'Hashid %s => %s' % (engine, hashid)
|
||||||
|
|
||||||
for (engine, not_engine) in awstats_data.not_search_engines_keys.items():
|
for (engine, not_engine) in awstats_data.not_search_engines_keys.items():
|
||||||
if not engine in self.search_engines: continue
|
not_engine_re = re.compile('.*%s.*' % (not_engine))
|
||||||
self.search_engines[engine]['not_search_engine'] = \
|
key = self._getSearchEngine(engine)
|
||||||
re.compile(not_engine, re.IGNORECASE)
|
if key:
|
||||||
|
self.search_engines[key]['not_search_engine'] = not_engine_re
|
||||||
|
|
||||||
for (engine, name) in awstats_data.search_engines_hashid.items():
|
for engine in awstats_data.search_engines:
|
||||||
if not engine in self.search_engines: continue
|
engine_re = re.compile('.*%s.*' % (engine), re.IGNORECASE)
|
||||||
self.search_engines[engine]['name'] = name
|
key = self._getSearchEngine(engine)
|
||||||
|
if key:
|
||||||
|
self.search_engines[key]['re'] = not_engine_re
|
||||||
|
|
||||||
for (engine, knwown_url) in awstats_data.search_engines_knwown_url.items():
|
for (k,e) in self.search_engines.items():
|
||||||
engine = engin[2:-2]
|
if not 're' in e.keys():
|
||||||
if not engine in self.search_engines: continue
|
print 'Remove %s' % k
|
||||||
print knwown_url
|
del self.search_engines[k]
|
||||||
self.search_engines[engine]['known_url'] = re.compile(known_url + '(?P<key_phrase>.+)')
|
|
||||||
|
|
||||||
|
print self.search_engines
|
||||||
|
|
||||||
self.html_parser = HTMLParser.HTMLParser()
|
self.html_parser = HTMLParser.HTMLParser()
|
||||||
|
|
||||||
|
@ -51,7 +73,6 @@ class IWLADisplayReferers(IPlugin):
|
||||||
def _extractKeyPhrase(self, key_phrase_re, parameters, key_phrases):
|
def _extractKeyPhrase(self, key_phrase_re, parameters, key_phrases):
|
||||||
if not parameters or not key_phrase_re: return
|
if not parameters or not key_phrase_re: return
|
||||||
|
|
||||||
|
|
||||||
for p in parameters.split('&'):
|
for p in parameters.split('&'):
|
||||||
groups = key_phrase_re.match(p)
|
groups = key_phrase_re.match(p)
|
||||||
if groups:
|
if groups:
|
||||||
|
@ -89,8 +110,8 @@ class IWLADisplayReferers(IPlugin):
|
||||||
parameters = r['extract_referer'].get('extract_parameters', None)
|
parameters = r['extract_referer'].get('extract_parameters', None)
|
||||||
key_phrase_re = e.get('known_url', None)
|
key_phrase_re = e.get('known_url', None)
|
||||||
|
|
||||||
print parameters
|
# print parameters
|
||||||
print key_phrase_re
|
# print key_phrase_re
|
||||||
|
|
||||||
self._extractKeyPhrase(key_phrase_re, parameters, key_phrases)
|
self._extractKeyPhrase(key_phrase_re, parameters, key_phrases)
|
||||||
|
|
||||||
|
|
|
@ -11,7 +11,7 @@ class IWLAPreAnalysisRobots(IPlugin):
|
||||||
self.API_VERSION = 1
|
self.API_VERSION = 1
|
||||||
|
|
||||||
def load(self):
|
def load(self):
|
||||||
self.awstats_robots = map(lambda (x) : re.compile(x, re.IGNORECASE), awstats_data.robots)
|
self.awstats_robots = map(lambda (x) : re.compile(('.*%s.*') % (x), re.IGNORECASE), awstats_data.robots)
|
||||||
|
|
||||||
return True
|
return True
|
||||||
|
|
||||||
|
|
Loading…
Reference in New Issue
Block a user