Optimize analysis using reverse loop
This commit is contained in:
parent
9da4eb3858
commit
3a246d5cd6
4
iwla.py
4
iwla.py
|
@ -256,7 +256,7 @@ class IWLA(object):
|
||||||
hit['is_page'] = self.isPage(uri)
|
hit['is_page'] = self.isPage(uri)
|
||||||
|
|
||||||
if super_hit['robot'] or\
|
if super_hit['robot'] or\
|
||||||
not int(hit['status']) in conf.viewed_http_codes:
|
not self.hasBeenViewed(hit):
|
||||||
page_key = 'not_viewed_pages'
|
page_key = 'not_viewed_pages'
|
||||||
hit_key = 'not_viewed_hits'
|
hit_key = 'not_viewed_hits'
|
||||||
else:
|
else:
|
||||||
|
@ -523,7 +523,7 @@ class IWLA(object):
|
||||||
if hit['time_decoded'].tm_mday != cur_time.tm_mday:
|
if hit['time_decoded'].tm_mday != cur_time.tm_mday:
|
||||||
break
|
break
|
||||||
if super_hit['robot'] or\
|
if super_hit['robot'] or\
|
||||||
not int(hit['status']) in conf.viewed_http_codes:
|
not self.hasBeenViewed(hit):
|
||||||
stats['not_viewed_bandwidth'] += int(hit['body_bytes_sent'])
|
stats['not_viewed_bandwidth'] += int(hit['body_bytes_sent'])
|
||||||
continue
|
continue
|
||||||
stats['viewed_bandwidth'] += int(hit['body_bytes_sent'])
|
stats['viewed_bandwidth'] += int(hit['body_bytes_sent'])
|
||||||
|
|
|
@ -109,15 +109,14 @@ class IWLAPostAnalysisReferers(IPlugin):
|
||||||
key_phrases = month_stats.get('key_phrases', {})
|
key_phrases = month_stats.get('key_phrases', {})
|
||||||
|
|
||||||
for (k, super_hit) in stats.items():
|
for (k, super_hit) in stats.items():
|
||||||
for r in super_hit['requests']:
|
for r in super_hit['requests'][::-1]:
|
||||||
if not self.iwla.isValidForCurrentAnalysis(r): continue
|
if not self.iwla.isValidForCurrentAnalysis(r): break
|
||||||
if not r['http_referer']: continue
|
if not r['http_referer']: continue
|
||||||
|
|
||||||
uri = r['extract_referer']['extract_uri']
|
uri = r['extract_referer']['extract_uri']
|
||||||
is_search_engine = False
|
|
||||||
|
|
||||||
if self.own_domain_re.match(uri): continue
|
if self.own_domain_re.match(uri): continue
|
||||||
|
|
||||||
|
is_search_engine = False
|
||||||
for (name, engine) in self.search_engines.items():
|
for (name, engine) in self.search_engines.items():
|
||||||
for (hashid, hashid_re) in engine['hashid']:
|
for (hashid, hashid_re) in engine['hashid']:
|
||||||
if not hashid_re.match(uri): continue
|
if not hashid_re.match(uri): continue
|
||||||
|
|
|
@ -46,14 +46,12 @@ class IWLAPostAnalysisTopDownloads(IPlugin):
|
||||||
|
|
||||||
for (k, super_hit) in stats.items():
|
for (k, super_hit) in stats.items():
|
||||||
if super_hit['robot']: continue
|
if super_hit['robot']: continue
|
||||||
for r in super_hit['requests']:
|
for r in super_hit['requests'][::-1]:
|
||||||
if not self.iwla.isValidForCurrentAnalysis(r) or\
|
if not self.iwla.isValidForCurrentAnalysis(r):
|
||||||
not self.iwla.hasBeenViewed(r):
|
break
|
||||||
|
if not self.iwla.hasBeenViewed(r) or\
|
||||||
|
r['is_page']:
|
||||||
continue
|
continue
|
||||||
if r['is_page']: continue
|
|
||||||
|
|
||||||
|
|
||||||
if not int(r['status']) in viewed_http_codes: continue
|
|
||||||
|
|
||||||
uri = r['extract_request']['extract_uri'].lower()
|
uri = r['extract_request']['extract_uri'].lower()
|
||||||
|
|
||||||
|
|
|
@ -40,15 +40,14 @@ class IWLAPostAnalysisTopHits(IPlugin):
|
||||||
|
|
||||||
for (k, super_hit) in stats.items():
|
for (k, super_hit) in stats.items():
|
||||||
if super_hit['robot']: continue
|
if super_hit['robot']: continue
|
||||||
for r in super_hit['requests']:
|
for r in super_hit['requests'][::-1]:
|
||||||
if r['is_page']: continue
|
if not self.iwla.isValidForCurrentAnalysis(r):
|
||||||
|
break
|
||||||
if not self.iwla.isValidForCurrentAnalysis(r) or\
|
if not self.iwla.hasBeenViewed(r) or\
|
||||||
not self.iwla.hasBeenViewed(r):
|
r['is_page']:
|
||||||
continue
|
continue
|
||||||
|
|
||||||
uri = r['extract_request']['extract_uri']
|
uri = r['extract_request']['extract_uri'].lower()
|
||||||
|
|
||||||
uri = "%s%s" % (r.get('server_name', ''), uri)
|
uri = "%s%s" % (r.get('server_name', ''), uri)
|
||||||
|
|
||||||
if not uri in top_hits.keys():
|
if not uri in top_hits.keys():
|
||||||
|
|
|
@ -46,11 +46,11 @@ class IWLAPostAnalysisTopPages(IPlugin):
|
||||||
|
|
||||||
for (k, super_hit) in stats.items():
|
for (k, super_hit) in stats.items():
|
||||||
if super_hit['robot']: continue
|
if super_hit['robot']: continue
|
||||||
for r in super_hit['requests']:
|
for r in super_hit['requests'][::-1]:
|
||||||
if not r['is_page']: continue
|
if not self.iwla.isValidForCurrentAnalysis(r):
|
||||||
|
break
|
||||||
if not self.iwla.isValidForCurrentAnalysis(r) or\
|
if not self.iwla.hasBeenViewed(r) or\
|
||||||
not self.iwla.hasBeenViewed(r):
|
not r['is_page']:
|
||||||
continue
|
continue
|
||||||
|
|
||||||
uri = r['extract_request']['extract_uri']
|
uri = r['extract_request']['extract_uri']
|
||||||
|
|
|
@ -54,9 +54,11 @@ class IWLAPreAnalysisPageToHit(IPlugin):
|
||||||
for (k, super_hit) in hits.items():
|
for (k, super_hit) in hits.items():
|
||||||
if super_hit['robot']: continue
|
if super_hit['robot']: continue
|
||||||
|
|
||||||
for request in super_hit['requests']:
|
for request in super_hit['requests'][::-1]:
|
||||||
if not self.iwla.isValidForCurrentAnalysis(request) or\
|
if not self.iwla.isValidForCurrentAnalysis(request):
|
||||||
not self.iwla.hasBeenViewed(request):
|
break
|
||||||
|
|
||||||
|
if not self.iwla.hasBeenViewed(request):
|
||||||
continue
|
continue
|
||||||
|
|
||||||
uri = request['extract_request']['extract_uri']
|
uri = request['extract_request']['extract_uri']
|
||||||
|
|
Loading…
Reference in New Issue
Block a user