New format for (not_)viewed pages/hits and bandwidth that are now recorded by day (in a dictionnary were only element 0 is initialized). Element 0 is the total. WARNING : not backward compatible with previous databases.
This commit is contained in:
parent
fffab335fa
commit
007be71ad6
63
iwla.py
63
iwla.py
|
@ -95,11 +95,11 @@ visits :
|
||||||
remote_addr =>
|
remote_addr =>
|
||||||
remote_addr
|
remote_addr
|
||||||
remote_ip
|
remote_ip
|
||||||
viewed_pages
|
viewed_pages{0..31} # 0 contains total
|
||||||
viewed_hits
|
viewed_hits{0..31} # 0 contains total
|
||||||
not_viewed_pages
|
not_viewed_pages{0..31}
|
||||||
not_viewed_hits
|
not_viewed_hits{0..31}
|
||||||
bandwidth
|
bandwidth{0..31}
|
||||||
last_access
|
last_access
|
||||||
requests =>
|
requests =>
|
||||||
[fields_from_format_log]
|
[fields_from_format_log]
|
||||||
|
@ -298,8 +298,7 @@ class IWLA(object):
|
||||||
|
|
||||||
def isValidVisitor(self, hit):
|
def isValidVisitor(self, hit):
|
||||||
if hit['robot']: return False
|
if hit['robot']: return False
|
||||||
if not (conf.count_hit_only_visitors or\
|
if not conf.count_hit_only_visitors and not hit['viewed_pages'][0]:
|
||||||
hit['viewed_pages']):
|
|
||||||
return False
|
return False
|
||||||
return True
|
return True
|
||||||
|
|
||||||
|
@ -318,7 +317,11 @@ class IWLA(object):
|
||||||
# Don't keep all requests for robots
|
# Don't keep all requests for robots
|
||||||
if not super_hit['robot']:
|
if not super_hit['robot']:
|
||||||
super_hit['requests'].append(hit)
|
super_hit['requests'].append(hit)
|
||||||
super_hit['bandwidth'] += int(hit['body_bytes_sent'])
|
|
||||||
|
day = self.meta_infos['last_time'].tm_mday
|
||||||
|
if self.hasBeenViewed(hit):
|
||||||
|
super_hit['bandwidth'][day] = super_hit['bandwidth'].get(day, 0) + int(hit['body_bytes_sent'])
|
||||||
|
super_hit['bandwidth'][0] += int(hit['body_bytes_sent'])
|
||||||
super_hit['last_access'] = self.meta_infos['last_time']
|
super_hit['last_access'] = self.meta_infos['last_time']
|
||||||
|
|
||||||
request = hit['extract_request']
|
request = hit['extract_request']
|
||||||
|
@ -336,19 +339,21 @@ class IWLA(object):
|
||||||
hit_key = 'viewed_hits'
|
hit_key = 'viewed_hits'
|
||||||
|
|
||||||
if hit['is_page']:
|
if hit['is_page']:
|
||||||
super_hit[page_key] += 1
|
super_hit[page_key][day] = super_hit[page_key].get(day, 0) + 1
|
||||||
|
super_hit[page_key][0] += 1
|
||||||
else:
|
else:
|
||||||
super_hit[hit_key] += 1
|
super_hit[hit_key][day] = super_hit[hit_key].get(day, 0) + 1
|
||||||
|
super_hit[hit_key][0] += 1
|
||||||
|
|
||||||
def _createVisitor(self, hit):
|
def _createVisitor(self, hit):
|
||||||
super_hit = self.current_analysis['visits'][hit['remote_addr']] = {}
|
super_hit = self.current_analysis['visits'][hit['remote_addr']] = {}
|
||||||
super_hit['remote_addr'] = hit['remote_addr']
|
super_hit['remote_addr'] = hit['remote_addr']
|
||||||
super_hit['remote_ip'] = hit['remote_addr']
|
super_hit['remote_ip'] = hit['remote_addr']
|
||||||
super_hit['viewed_pages'] = 0
|
super_hit['viewed_pages'] = {0:0}
|
||||||
super_hit['viewed_hits'] = 0
|
super_hit['viewed_hits'] = {0:0}
|
||||||
super_hit['not_viewed_pages'] = 0
|
super_hit['not_viewed_pages'] = {0:0}
|
||||||
super_hit['not_viewed_hits'] = 0
|
super_hit['not_viewed_hits'] = {0:0}
|
||||||
super_hit['bandwidth'] = 0
|
super_hit['bandwidth'] = {0:0}
|
||||||
super_hit['last_access'] = self.meta_infos['last_time']
|
super_hit['last_access'] = self.meta_infos['last_time']
|
||||||
super_hit['requests'] = []
|
super_hit['requests'] = []
|
||||||
super_hit['robot'] = False
|
super_hit['robot'] = False
|
||||||
|
@ -659,26 +664,18 @@ class IWLA(object):
|
||||||
|
|
||||||
stats = self._createEmptyStats()
|
stats = self._createEmptyStats()
|
||||||
|
|
||||||
|
day = cur_time.tm_mday
|
||||||
for (k, super_hit) in visits.items():
|
for (k, super_hit) in visits.items():
|
||||||
if super_hit['last_access'].tm_mday != cur_time.tm_mday:
|
if super_hit['last_access'].tm_mday != day:
|
||||||
continue
|
continue
|
||||||
viewed_pages = False
|
if super_hit['robot']:
|
||||||
for hit in super_hit['requests'][::-1]:
|
stats['not_viewed_bandwidth'] += super_hit['bandwidth'].get(day, 0)
|
||||||
if hit['time_decoded'].tm_mday != cur_time.tm_mday:
|
continue
|
||||||
break
|
stats['viewed_bandwidth'] += super_hit['bandwidth'].get(day, 0)
|
||||||
if super_hit['robot'] or\
|
stats['viewed_hits'] += super_hit['viewed_hits'].get(day, 0)
|
||||||
not self.hasBeenViewed(hit):
|
stats['viewed_pages'] += super_hit['viewed_pages'].get(day, 0)
|
||||||
stats['not_viewed_bandwidth'] += int(hit['body_bytes_sent'])
|
if ((conf.count_hit_only_visitors and super_hit['viewed_hits'].get(day, 0)) or\
|
||||||
continue
|
super_hit['viewed_pages'].get(day, 0)):
|
||||||
stats['viewed_bandwidth'] += int(hit['body_bytes_sent'])
|
|
||||||
if hit['is_page']:
|
|
||||||
stats['viewed_pages'] += 1
|
|
||||||
viewed_pages = True
|
|
||||||
else:
|
|
||||||
stats['viewed_hits'] += 1
|
|
||||||
if (conf.count_hit_only_visitors or\
|
|
||||||
viewed_pages) and\
|
|
||||||
not super_hit['robot']:
|
|
||||||
stats['nb_visits'] += 1
|
stats['nb_visits'] += 1
|
||||||
|
|
||||||
self.logger.info("== Stats for %d/%02d/%02d ==" % (cur_time.tm_year, cur_time.tm_mon, cur_time.tm_mday))
|
self.logger.info("== Stats for %d/%02d/%02d ==" % (cur_time.tm_year, cur_time.tm_mon, cur_time.tm_mday))
|
||||||
|
|
|
@ -78,9 +78,9 @@ class IWLADisplayAllVisits(IPlugin):
|
||||||
|
|
||||||
row = [
|
row = [
|
||||||
address,
|
address,
|
||||||
super_hit['viewed_pages'],
|
super_hit['viewed_pages'][0],
|
||||||
super_hit['viewed_hits'],
|
super_hit['viewed_hits'][0],
|
||||||
bytesToStr(super_hit['bandwidth']),
|
bytesToStr(super_hit['bandwidth'][0]),
|
||||||
time.asctime(super_hit['last_access'])
|
time.asctime(super_hit['last_access'])
|
||||||
]
|
]
|
||||||
table.appendRow(row)
|
table.appendRow(row)
|
||||||
|
|
|
@ -87,9 +87,9 @@ class IWLADisplayFeeds(IPlugin):
|
||||||
if super_hit['feed_parser'] == IWLAPostAnalysisFeeds.MERGED_FEED_PARSER:
|
if super_hit['feed_parser'] == IWLAPostAnalysisFeeds.MERGED_FEED_PARSER:
|
||||||
address += '*'
|
address += '*'
|
||||||
if super_hit['robot']:
|
if super_hit['robot']:
|
||||||
table.appendRow([address, super_hit['not_viewed_pages'], super_hit['not_viewed_hits']])
|
table.appendRow([address, super_hit['not_viewed_pages'][0], super_hit['not_viewed_hits'][0]])
|
||||||
else:
|
else:
|
||||||
table.appendRow([address, super_hit['viewed_pages'], super_hit['viewed_hits']])
|
table.appendRow([address, super_hit['viewed_pages'][0], super_hit['viewed_hits'][0]])
|
||||||
page.appendBlock(table)
|
page.appendBlock(table)
|
||||||
note = DisplayHTMLRaw(self.iwla, ('<small>*%s</small>' % (self.iwla._(u'Merged feeds parsers'))))
|
note = DisplayHTMLRaw(self.iwla, ('<small>*%s</small>' % (self.iwla._(u'Merged feeds parsers'))))
|
||||||
page.appendBlock(note)
|
page.appendBlock(note)
|
||||||
|
|
|
@ -68,7 +68,7 @@ class IWLADisplayRobotBandwidth(IPlugin):
|
||||||
for (k, super_hit) in hits.items():
|
for (k, super_hit) in hits.items():
|
||||||
if not self.iwla.isRobot(super_hit):
|
if not self.iwla.isRobot(super_hit):
|
||||||
continue
|
continue
|
||||||
bandwidths.append((super_hit, super_hit['bandwidth']))
|
bandwidths.append((super_hit, super_hit['bandwidth'][0]))
|
||||||
bandwidths.sort(key=lambda tup: tup[1], reverse=True)
|
bandwidths.sort(key=lambda tup: tup[1], reverse=True)
|
||||||
|
|
||||||
# All in a page
|
# All in a page
|
||||||
|
|
|
@ -60,11 +60,11 @@ class IWLADisplayTopVisitors(IPlugin):
|
||||||
|
|
||||||
total = [0]*5
|
total = [0]*5
|
||||||
for super_hit in hits.values():
|
for super_hit in hits.values():
|
||||||
total[1] += super_hit['viewed_pages']
|
total[1] += super_hit['viewed_pages'][0]
|
||||||
total[2] += super_hit['viewed_hits']
|
total[2] += super_hit['viewed_hits'][0]
|
||||||
total[3] += super_hit['bandwidth']
|
total[3] += super_hit['bandwidth'][0]
|
||||||
|
|
||||||
top_bandwidth = [(k,v['bandwidth']) for (k,v) in hits.items()]
|
top_bandwidth = [(k,v['bandwidth'][0]) for (k,v) in hits.items()]
|
||||||
top_bandwidth = sorted(top_bandwidth, key=lambda t: t[1], reverse=True)
|
top_bandwidth = sorted(top_bandwidth, key=lambda t: t[1], reverse=True)
|
||||||
top_visitors = [hits[h[0]] for h in top_bandwidth[:10]]
|
top_visitors = [hits[h[0]] for h in top_bandwidth[:10]]
|
||||||
|
|
||||||
|
@ -79,14 +79,14 @@ class IWLADisplayTopVisitors(IPlugin):
|
||||||
|
|
||||||
row = [
|
row = [
|
||||||
address,
|
address,
|
||||||
super_hit['viewed_pages'],
|
super_hit['viewed_pages'][0],
|
||||||
super_hit['viewed_hits'],
|
super_hit['viewed_hits'][0],
|
||||||
bytesToStr(super_hit['bandwidth']),
|
bytesToStr(super_hit['bandwidth'][0]),
|
||||||
time.asctime(super_hit['last_access'])
|
time.asctime(super_hit['last_access'])
|
||||||
]
|
]
|
||||||
total[1] -= super_hit['viewed_pages']
|
total[1] -= super_hit['viewed_pages'][0]
|
||||||
total[2] -= super_hit['viewed_hits']
|
total[2] -= super_hit['viewed_hits'][0]
|
||||||
total[3] -= super_hit['bandwidth']
|
total[3] -= super_hit['bandwidth'][0]
|
||||||
table.appendRow(row)
|
table.appendRow(row)
|
||||||
if total[1] or total[2] or total[3]:
|
if total[1] or total[2] or total[3]:
|
||||||
total[0] = self.iwla._(u'Others')
|
total[0] = self.iwla._(u'Others')
|
||||||
|
|
|
@ -78,7 +78,7 @@ class IWLAPostAnalysisFeeds(IPlugin):
|
||||||
return True
|
return True
|
||||||
|
|
||||||
def mergeOneHitOnlyFeedsParsers(self, isFeedParser, one_hit_only, hit):
|
def mergeOneHitOnlyFeedsParsers(self, isFeedParser, one_hit_only, hit):
|
||||||
if isFeedParser and (hit['viewed_hits'] + hit['not_viewed_hits']) == 1:
|
if isFeedParser and (hit['viewed_hits'][0] + hit['not_viewed_hits'][0]) == 1:
|
||||||
user_agent = hit['requests'][0]['http_user_agent'].lower()
|
user_agent = hit['requests'][0]['http_user_agent'].lower()
|
||||||
if one_hit_only.get(user_agent, None) is None:
|
if one_hit_only.get(user_agent, None) is None:
|
||||||
# Merged
|
# Merged
|
||||||
|
@ -117,7 +117,7 @@ class IWLAPostAnalysisFeeds(IPlugin):
|
||||||
isFeedParser = self.FEED_PARSER
|
isFeedParser = self.FEED_PARSER
|
||||||
# Robot that views pages -> bot
|
# Robot that views pages -> bot
|
||||||
if hit['robot']:
|
if hit['robot']:
|
||||||
if hit['not_viewed_pages']:
|
if hit['not_viewed_pages'][0]:
|
||||||
isFeedParser = self.NOT_A_FEED_PARSER
|
isFeedParser = self.NOT_A_FEED_PARSER
|
||||||
break
|
break
|
||||||
if self.merge_one_hit_only_feeds_parsers:
|
if self.merge_one_hit_only_feeds_parsers:
|
||||||
|
|
|
@ -83,14 +83,17 @@ class IWLAPreAnalysisPageToHit(IPlugin):
|
||||||
|
|
||||||
uri = request['extract_request']['extract_uri']
|
uri = request['extract_request']['extract_uri']
|
||||||
|
|
||||||
|
day = request['time_decoded'].tm_mday
|
||||||
if request['is_page']:
|
if request['is_page']:
|
||||||
# Page to hit
|
# Page to hit
|
||||||
for regexp in self.ph_regexps:
|
for regexp in self.ph_regexps:
|
||||||
if regexp.match(uri):
|
if regexp.match(uri):
|
||||||
self.logger.debug('%s changed from page to hit' % (uri))
|
self.logger.debug('%s changed from page to hit' % (uri))
|
||||||
request['is_page'] = False
|
request['is_page'] = False
|
||||||
super_hit['viewed_pages'] -= 1
|
super_hit['viewed_pages'][day] -= 1
|
||||||
super_hit['viewed_hits'] += 1
|
super_hit['viewed_hits'][day] = super_hit['viewed_hits'].get(day, 0) + 1
|
||||||
|
super_hit['viewed_pages'][0] -= 1
|
||||||
|
super_hit['viewed_hits'][0] += 1
|
||||||
break
|
break
|
||||||
else:
|
else:
|
||||||
# Hit to page
|
# Hit to page
|
||||||
|
@ -98,6 +101,8 @@ class IWLAPreAnalysisPageToHit(IPlugin):
|
||||||
if regexp.match(uri):
|
if regexp.match(uri):
|
||||||
self.logger.debug('%s changed from hit to page' % (uri))
|
self.logger.debug('%s changed from hit to page' % (uri))
|
||||||
request['is_page'] = True
|
request['is_page'] = True
|
||||||
super_hit['viewed_pages'] += 1
|
super_hit['viewed_pages'][day] = super_hit['viewed_pages'].get(day, 0) + 1
|
||||||
super_hit['viewed_hits'] -= 1
|
super_hit['viewed_hits'][day] -= 1
|
||||||
|
super_hit['viewed_pages'][0] += 1
|
||||||
|
super_hit['viewed_hits'][0] -= 1
|
||||||
break
|
break
|
||||||
|
|
|
@ -104,12 +104,12 @@ class IWLAPreAnalysisRobots(IPlugin):
|
||||||
continue
|
continue
|
||||||
|
|
||||||
# 1) no pages view --> robot
|
# 1) no pages view --> robot
|
||||||
# if not super_hit['viewed_pages']:
|
# if not super_hit['viewed_pages'][0]:
|
||||||
# super_hit['robot'] = 1
|
# super_hit['robot'] = 1
|
||||||
# continue
|
# continue
|
||||||
|
|
||||||
# 2) pages without hit --> robot
|
# 2) pages without hit --> robot
|
||||||
if not super_hit['viewed_hits']:
|
if not super_hit['viewed_hits'][0] and super_hit['viewed_pages'][0]:
|
||||||
self.logger.debug(super_hit)
|
self.logger.debug(super_hit)
|
||||||
self._setRobot(k, super_hit)
|
self._setRobot(k, super_hit)
|
||||||
continue
|
continue
|
||||||
|
@ -137,7 +137,7 @@ class IWLAPreAnalysisRobots(IPlugin):
|
||||||
self._setRobot(k, super_hit)
|
self._setRobot(k, super_hit)
|
||||||
continue
|
continue
|
||||||
|
|
||||||
if not super_hit['viewed_pages'] and \
|
if not super_hit['viewed_pages'][0] and \
|
||||||
(super_hit['viewed_hits'] and not referers):
|
(super_hit['viewed_hits'][0] and not referers):
|
||||||
self._setRobot(k, super_hit)
|
self._setRobot(k, super_hit)
|
||||||
continue
|
continue
|
||||||
|
|
Loading…
Reference in New Issue
Block a user