Optimize analysis using reverse loop

This commit is contained in:
Gregory Soutade
2014-12-14 15:10:13 +01:00
parent 9da4eb3858
commit 3a246d5cd6
6 changed files with 26 additions and 28 deletions

View File

@@ -109,15 +109,14 @@ class IWLAPostAnalysisReferers(IPlugin):
key_phrases = month_stats.get('key_phrases', {})
for (k, super_hit) in stats.items():
for r in super_hit['requests']:
if not self.iwla.isValidForCurrentAnalysis(r): continue
for r in super_hit['requests'][::-1]:
if not self.iwla.isValidForCurrentAnalysis(r): break
if not r['http_referer']: continue
uri = r['extract_referer']['extract_uri']
is_search_engine = False
if self.own_domain_re.match(uri): continue
is_search_engine = False
for (name, engine) in self.search_engines.items():
for (hashid, hashid_re) in engine['hashid']:
if not hashid_re.match(uri): continue

View File

@@ -46,14 +46,12 @@ class IWLAPostAnalysisTopDownloads(IPlugin):
for (k, super_hit) in stats.items():
if super_hit['robot']: continue
for r in super_hit['requests']:
if not self.iwla.isValidForCurrentAnalysis(r) or\
not self.iwla.hasBeenViewed(r):
for r in super_hit['requests'][::-1]:
if not self.iwla.isValidForCurrentAnalysis(r):
break
if not self.iwla.hasBeenViewed(r) or\
r['is_page']:
continue
if r['is_page']: continue
if not int(r['status']) in viewed_http_codes: continue
uri = r['extract_request']['extract_uri'].lower()

View File

@@ -40,15 +40,14 @@ class IWLAPostAnalysisTopHits(IPlugin):
for (k, super_hit) in stats.items():
if super_hit['robot']: continue
for r in super_hit['requests']:
if r['is_page']: continue
if not self.iwla.isValidForCurrentAnalysis(r) or\
not self.iwla.hasBeenViewed(r):
for r in super_hit['requests'][::-1]:
if not self.iwla.isValidForCurrentAnalysis(r):
break
if not self.iwla.hasBeenViewed(r) or\
r['is_page']:
continue
uri = r['extract_request']['extract_uri']
uri = r['extract_request']['extract_uri'].lower()
uri = "%s%s" % (r.get('server_name', ''), uri)
if not uri in top_hits.keys():

View File

@@ -46,11 +46,11 @@ class IWLAPostAnalysisTopPages(IPlugin):
for (k, super_hit) in stats.items():
if super_hit['robot']: continue
for r in super_hit['requests']:
if not r['is_page']: continue
if not self.iwla.isValidForCurrentAnalysis(r) or\
not self.iwla.hasBeenViewed(r):
for r in super_hit['requests'][::-1]:
if not self.iwla.isValidForCurrentAnalysis(r):
break
if not self.iwla.hasBeenViewed(r) or\
not r['is_page']:
continue
uri = r['extract_request']['extract_uri']

View File

@@ -54,9 +54,11 @@ class IWLAPreAnalysisPageToHit(IPlugin):
for (k, super_hit) in hits.items():
if super_hit['robot']: continue
for request in super_hit['requests']:
if not self.iwla.isValidForCurrentAnalysis(request) or\
not self.iwla.hasBeenViewed(request):
for request in super_hit['requests'][::-1]:
if not self.iwla.isValidForCurrentAnalysis(request):
break
if not self.iwla.hasBeenViewed(request):
continue
uri = request['extract_request']['extract_uri']