Basically seems to work

This commit is contained in:
Gregory Soutade
2014-11-20 14:09:01 +01:00
parent 7bd5a42962
commit f593cc78d9
3 changed files with 9 additions and 2 deletions

View File

@@ -13,6 +13,8 @@ def hook(hits):
for p in super_hit['pages']:
if not p['is_page']: continue
if int(p['status']) != 200: continue
if logo_re.match(p['extract_request']['extract_uri']):
p['is_page'] = False
if super_hit['viewed_pages']:

View File

@@ -19,7 +19,10 @@ def hook(hits):
if not super_hit['viewed_hits']:
super_hit['robot'] = 1
continue
elif not super_hit['viewed_pages']:
# Hit only
super_hit['hit_only'] = 1
for hit in super_hit['pages']:
# 3) /robots.txt read
if hit['extract_request']['http_uri'] == '/robots.txt':