Add hit_to_page_conf in addition to page_to_hit_conf
This commit is contained in:
@@ -12,10 +12,15 @@ class IWLAPreAnalysisPageToHit(IPlugin):
|
||||
self.API_VERSION = 1
|
||||
|
||||
def load(self):
|
||||
# Remove logo from indefero
|
||||
self.regexps = self.iwla.getConfValue('page_to_hit_conf', [])
|
||||
if not self.regexps: return False
|
||||
self.regexps = map(lambda(r): re.compile(r), self.regexps)
|
||||
# Page to hit
|
||||
self.ph_regexps = self.iwla.getConfValue('page_to_hit_conf', [])
|
||||
if not self.ph_regexps: return False
|
||||
self.ph_regexps = map(lambda(r): re.compile(r), self.ph_regexps)
|
||||
|
||||
# Hit to page
|
||||
self.hp_regexps = self.iwla.getConfValue('hit_to_page_conf', [])
|
||||
if not self.hp_regexps: return False
|
||||
self.hp_regexps = map(lambda(r): re.compile(r), self.hp_regexps)
|
||||
|
||||
return True
|
||||
|
||||
@@ -29,12 +34,24 @@ class IWLAPreAnalysisPageToHit(IPlugin):
|
||||
if not self.iwla.isValidForCurrentAnalysis(request) or\
|
||||
not self.iwla.hasBeenViewed(request):
|
||||
continue
|
||||
if not request['is_page']: continue
|
||||
|
||||
uri = request['extract_request']['extract_uri']
|
||||
for regexp in self.regexps:
|
||||
if regexp.match(uri):
|
||||
#print '%s is an hit' % uri
|
||||
request['is_page'] = False
|
||||
super_hit['viewed_pages'] -= 1
|
||||
super_hit['viewed_hits'] += 1
|
||||
break
|
||||
|
||||
if request['is_page']:
|
||||
# Page to hit
|
||||
for regexp in self.ph_regexps:
|
||||
if regexp.match(uri):
|
||||
#print '%s is a hit' % (uri )
|
||||
request['is_page'] = False
|
||||
super_hit['viewed_pages'] -= 1
|
||||
super_hit['viewed_hits'] += 1
|
||||
break
|
||||
else:
|
||||
# Hit to page
|
||||
for regexp in self.hp_regexps:
|
||||
if regexp.match(uri):
|
||||
#print '%s is a page' % (uri )
|
||||
request['is_page'] = True
|
||||
super_hit['viewed_pages'] += 1
|
||||
super_hit['viewed_hits'] -= 1
|
||||
break
|
||||
|
||||
Reference in New Issue
Block a user