Add hit_to_page_conf in addition to page_to_hit_conf
This commit is contained in:
parent
5ccc63c7ae
commit
c87ddfb1aa
7
conf.py
7
conf.py
|
@ -16,11 +16,12 @@ DB_ROOT = './output/'
|
||||||
DISPLAY_ROOT = './output/'
|
DISPLAY_ROOT = './output/'
|
||||||
|
|
||||||
pre_analysis_hooks = ['page_to_hit', 'robots']
|
pre_analysis_hooks = ['page_to_hit', 'robots']
|
||||||
post_analysis_hooks = ['referers', 'top_pages', 'top_downloads']
|
post_analysis_hooks = ['referers', 'top_pages', 'top_downloads', 'top_hits']
|
||||||
# post_analysis_hooks = ['top_visitors', 'reverse_dns']
|
# post_analysis_hooks = ['top_visitors', 'reverse_dns']
|
||||||
display_hooks = ['top_visitors', 'all_visits', 'referers', 'top_pages', 'top_downloads']
|
display_hooks = ['top_visitors', 'all_visits', 'referers', 'top_pages', 'top_downloads', 'top_hits']
|
||||||
|
|
||||||
reverse_dns_timeout = 0.2
|
reverse_dns_timeout = 0.2
|
||||||
page_to_hit_conf = [r'^.+/logo[/]?$', r'^.+/category/.+$', r'^.+/tag/.+$', r'^.+/archive/.+$', r'^.+/ljdc[/]?$']
|
page_to_hit_conf = [r'^.+/logo[/]?$']
|
||||||
|
hit_to_page_conf = [r'^.+/category/.+$', r'^.+/tag/.+$', r'^.+/archive/.+$', r'^.+/ljdc[/]?$']
|
||||||
|
|
||||||
count_hit_only_visitors = True
|
count_hit_only_visitors = True
|
||||||
|
|
|
@ -29,7 +29,7 @@ class IWLADisplayTopDownloads(IPlugin):
|
||||||
path = '%d/%s' % (cur_time.tm_year, filename)
|
path = '%d/%s' % (cur_time.tm_year, filename)
|
||||||
|
|
||||||
page = DisplayHTMLPage(title, path)
|
page = DisplayHTMLPage(title, path)
|
||||||
table = DisplayHTMLBlockTable('Top Downloads', ['URI', 'Hit'])
|
table = DisplayHTMLBlockTable('All Downloads', ['URI', 'Hit'])
|
||||||
for (uri, entrance) in top_downloads:
|
for (uri, entrance) in top_downloads:
|
||||||
table.appendRow([uri, entrance])
|
table.appendRow([uri, entrance])
|
||||||
page.appendBlock(table)
|
page.appendBlock(table)
|
||||||
|
|
|
@ -23,7 +23,7 @@ class IWLADisplayTopPages(IPlugin):
|
||||||
index.appendBlock(table)
|
index.appendBlock(table)
|
||||||
|
|
||||||
cur_time = self.iwla.getCurTime()
|
cur_time = self.iwla.getCurTime()
|
||||||
title = time.strftime('Top Pages - %B %Y', cur_time)
|
title = time.strftime('All Pages - %B %Y', cur_time)
|
||||||
|
|
||||||
filename = 'top_pages_%d.html' % (cur_time.tm_mon)
|
filename = 'top_pages_%d.html' % (cur_time.tm_mon)
|
||||||
path = '%d/%s' % (cur_time.tm_year, filename)
|
path = '%d/%s' % (cur_time.tm_year, filename)
|
||||||
|
|
|
@ -23,7 +23,9 @@ class IWLAPostAnalysisTopPages(IPlugin):
|
||||||
for r in super_hit['requests']:
|
for r in super_hit['requests']:
|
||||||
if not r['is_page']: continue
|
if not r['is_page']: continue
|
||||||
|
|
||||||
if not self.iwla.isValidForCurrentAnalysis(r): continue
|
if not self.iwla.isValidForCurrentAnalysis(r) or\
|
||||||
|
not self.iwla.hasBeenViewed(r):
|
||||||
|
continue
|
||||||
|
|
||||||
uri = r['extract_request']['extract_uri']
|
uri = r['extract_request']['extract_uri']
|
||||||
if self.index_re.match(uri):
|
if self.index_re.match(uri):
|
||||||
|
|
|
@ -12,10 +12,15 @@ class IWLAPreAnalysisPageToHit(IPlugin):
|
||||||
self.API_VERSION = 1
|
self.API_VERSION = 1
|
||||||
|
|
||||||
def load(self):
|
def load(self):
|
||||||
# Remove logo from indefero
|
# Page to hit
|
||||||
self.regexps = self.iwla.getConfValue('page_to_hit_conf', [])
|
self.ph_regexps = self.iwla.getConfValue('page_to_hit_conf', [])
|
||||||
if not self.regexps: return False
|
if not self.ph_regexps: return False
|
||||||
self.regexps = map(lambda(r): re.compile(r), self.regexps)
|
self.ph_regexps = map(lambda(r): re.compile(r), self.ph_regexps)
|
||||||
|
|
||||||
|
# Hit to page
|
||||||
|
self.hp_regexps = self.iwla.getConfValue('hit_to_page_conf', [])
|
||||||
|
if not self.hp_regexps: return False
|
||||||
|
self.hp_regexps = map(lambda(r): re.compile(r), self.hp_regexps)
|
||||||
|
|
||||||
return True
|
return True
|
||||||
|
|
||||||
|
@ -29,12 +34,24 @@ class IWLAPreAnalysisPageToHit(IPlugin):
|
||||||
if not self.iwla.isValidForCurrentAnalysis(request) or\
|
if not self.iwla.isValidForCurrentAnalysis(request) or\
|
||||||
not self.iwla.hasBeenViewed(request):
|
not self.iwla.hasBeenViewed(request):
|
||||||
continue
|
continue
|
||||||
if not request['is_page']: continue
|
|
||||||
uri = request['extract_request']['extract_uri']
|
uri = request['extract_request']['extract_uri']
|
||||||
for regexp in self.regexps:
|
|
||||||
if regexp.match(uri):
|
if request['is_page']:
|
||||||
#print '%s is an hit' % uri
|
# Page to hit
|
||||||
request['is_page'] = False
|
for regexp in self.ph_regexps:
|
||||||
super_hit['viewed_pages'] -= 1
|
if regexp.match(uri):
|
||||||
super_hit['viewed_hits'] += 1
|
#print '%s is a hit' % (uri )
|
||||||
break
|
request['is_page'] = False
|
||||||
|
super_hit['viewed_pages'] -= 1
|
||||||
|
super_hit['viewed_hits'] += 1
|
||||||
|
break
|
||||||
|
else:
|
||||||
|
# Hit to page
|
||||||
|
for regexp in self.hp_regexps:
|
||||||
|
if regexp.match(uri):
|
||||||
|
#print '%s is a page' % (uri )
|
||||||
|
request['is_page'] = True
|
||||||
|
super_hit['viewed_pages'] += 1
|
||||||
|
super_hit['viewed_hits'] -= 1
|
||||||
|
break
|
||||||
|
|
Loading…
Reference in New Issue
Block a user