diff --git a/iwla.py b/iwla.py index a2ed3de..e0c4eff 100755 --- a/iwla.py +++ b/iwla.py @@ -95,11 +95,11 @@ visits : remote_addr => remote_addr remote_ip - viewed_pages - viewed_hits - not_viewed_pages - not_viewed_hits - bandwidth + viewed_pages{0..31} # 0 contains total + viewed_hits{0..31} # 0 contains total + not_viewed_pages{0..31} + not_viewed_hits{0..31} + bandwidth{0..31} last_access requests => [fields_from_format_log] @@ -298,8 +298,7 @@ class IWLA(object): def isValidVisitor(self, hit): if hit['robot']: return False - if not (conf.count_hit_only_visitors or\ - hit['viewed_pages']): + if not conf.count_hit_only_visitors and not hit['viewed_pages'][0]: return False return True @@ -318,7 +317,11 @@ class IWLA(object): # Don't keep all requests for robots if not super_hit['robot']: super_hit['requests'].append(hit) - super_hit['bandwidth'] += int(hit['body_bytes_sent']) + + day = self.meta_infos['last_time'].tm_mday + if self.hasBeenViewed(hit): + super_hit['bandwidth'][day] = super_hit['bandwidth'].get(day, 0) + int(hit['body_bytes_sent']) + super_hit['bandwidth'][0] += int(hit['body_bytes_sent']) super_hit['last_access'] = self.meta_infos['last_time'] request = hit['extract_request'] @@ -336,19 +339,21 @@ class IWLA(object): hit_key = 'viewed_hits' if hit['is_page']: - super_hit[page_key] += 1 + super_hit[page_key][day] = super_hit[page_key].get(day, 0) + 1 + super_hit[page_key][0] += 1 else: - super_hit[hit_key] += 1 + super_hit[hit_key][day] = super_hit[hit_key].get(day, 0) + 1 + super_hit[hit_key][0] += 1 def _createVisitor(self, hit): super_hit = self.current_analysis['visits'][hit['remote_addr']] = {} super_hit['remote_addr'] = hit['remote_addr'] super_hit['remote_ip'] = hit['remote_addr'] - super_hit['viewed_pages'] = 0 - super_hit['viewed_hits'] = 0 - super_hit['not_viewed_pages'] = 0 - super_hit['not_viewed_hits'] = 0 - super_hit['bandwidth'] = 0 + super_hit['viewed_pages'] = {0:0} + super_hit['viewed_hits'] = {0:0} + super_hit['not_viewed_pages'] = {0:0} + super_hit['not_viewed_hits'] = {0:0} + super_hit['bandwidth'] = {0:0} super_hit['last_access'] = self.meta_infos['last_time'] super_hit['requests'] = [] super_hit['robot'] = False @@ -659,26 +664,18 @@ class IWLA(object): stats = self._createEmptyStats() + day = cur_time.tm_mday for (k, super_hit) in visits.items(): - if super_hit['last_access'].tm_mday != cur_time.tm_mday: + if super_hit['last_access'].tm_mday != day: continue - viewed_pages = False - for hit in super_hit['requests'][::-1]: - if hit['time_decoded'].tm_mday != cur_time.tm_mday: - break - if super_hit['robot'] or\ - not self.hasBeenViewed(hit): - stats['not_viewed_bandwidth'] += int(hit['body_bytes_sent']) - continue - stats['viewed_bandwidth'] += int(hit['body_bytes_sent']) - if hit['is_page']: - stats['viewed_pages'] += 1 - viewed_pages = True - else: - stats['viewed_hits'] += 1 - if (conf.count_hit_only_visitors or\ - viewed_pages) and\ - not super_hit['robot']: + if super_hit['robot']: + stats['not_viewed_bandwidth'] += super_hit['bandwidth'].get(day, 0) + continue + stats['viewed_bandwidth'] += super_hit['bandwidth'].get(day, 0) + stats['viewed_hits'] += super_hit['viewed_hits'].get(day, 0) + stats['viewed_pages'] += super_hit['viewed_pages'].get(day, 0) + if ((conf.count_hit_only_visitors and super_hit['viewed_hits'].get(day, 0)) or\ + super_hit['viewed_pages'].get(day, 0)): stats['nb_visits'] += 1 self.logger.info("== Stats for %d/%02d/%02d ==" % (cur_time.tm_year, cur_time.tm_mon, cur_time.tm_mday)) diff --git a/plugins/display/all_visits.py b/plugins/display/all_visits.py index 4dbc92c..d1b5b72 100644 --- a/plugins/display/all_visits.py +++ b/plugins/display/all_visits.py @@ -78,9 +78,9 @@ class IWLADisplayAllVisits(IPlugin): row = [ address, - super_hit['viewed_pages'], - super_hit['viewed_hits'], - bytesToStr(super_hit['bandwidth']), + super_hit['viewed_pages'][0], + super_hit['viewed_hits'][0], + bytesToStr(super_hit['bandwidth'][0]), time.asctime(super_hit['last_access']) ] table.appendRow(row) diff --git a/plugins/display/feeds.py b/plugins/display/feeds.py index c211bfe..e94671e 100644 --- a/plugins/display/feeds.py +++ b/plugins/display/feeds.py @@ -87,9 +87,9 @@ class IWLADisplayFeeds(IPlugin): if super_hit['feed_parser'] == IWLAPostAnalysisFeeds.MERGED_FEED_PARSER: address += '*' if super_hit['robot']: - table.appendRow([address, super_hit['not_viewed_pages'], super_hit['not_viewed_hits']]) + table.appendRow([address, super_hit['not_viewed_pages'][0], super_hit['not_viewed_hits'][0]]) else: - table.appendRow([address, super_hit['viewed_pages'], super_hit['viewed_hits']]) + table.appendRow([address, super_hit['viewed_pages'][0], super_hit['viewed_hits'][0]]) page.appendBlock(table) note = DisplayHTMLRaw(self.iwla, ('*%s' % (self.iwla._(u'Merged feeds parsers')))) page.appendBlock(note) diff --git a/plugins/display/robot_bandwidth.py b/plugins/display/robot_bandwidth.py index 59bf048..3e10f1c 100644 --- a/plugins/display/robot_bandwidth.py +++ b/plugins/display/robot_bandwidth.py @@ -68,7 +68,7 @@ class IWLADisplayRobotBandwidth(IPlugin): for (k, super_hit) in hits.items(): if not self.iwla.isRobot(super_hit): continue - bandwidths.append((super_hit, super_hit['bandwidth'])) + bandwidths.append((super_hit, super_hit['bandwidth'][0])) bandwidths.sort(key=lambda tup: tup[1], reverse=True) # All in a page diff --git a/plugins/display/top_visitors.py b/plugins/display/top_visitors.py index b7d3b66..06760d7 100644 --- a/plugins/display/top_visitors.py +++ b/plugins/display/top_visitors.py @@ -60,11 +60,11 @@ class IWLADisplayTopVisitors(IPlugin): total = [0]*5 for super_hit in hits.values(): - total[1] += super_hit['viewed_pages'] - total[2] += super_hit['viewed_hits'] - total[3] += super_hit['bandwidth'] + total[1] += super_hit['viewed_pages'][0] + total[2] += super_hit['viewed_hits'][0] + total[3] += super_hit['bandwidth'][0] - top_bandwidth = [(k,v['bandwidth']) for (k,v) in hits.items()] + top_bandwidth = [(k,v['bandwidth'][0]) for (k,v) in hits.items()] top_bandwidth = sorted(top_bandwidth, key=lambda t: t[1], reverse=True) top_visitors = [hits[h[0]] for h in top_bandwidth[:10]] @@ -79,14 +79,14 @@ class IWLADisplayTopVisitors(IPlugin): row = [ address, - super_hit['viewed_pages'], - super_hit['viewed_hits'], - bytesToStr(super_hit['bandwidth']), + super_hit['viewed_pages'][0], + super_hit['viewed_hits'][0], + bytesToStr(super_hit['bandwidth'][0]), time.asctime(super_hit['last_access']) ] - total[1] -= super_hit['viewed_pages'] - total[2] -= super_hit['viewed_hits'] - total[3] -= super_hit['bandwidth'] + total[1] -= super_hit['viewed_pages'][0] + total[2] -= super_hit['viewed_hits'][0] + total[3] -= super_hit['bandwidth'][0] table.appendRow(row) if total[1] or total[2] or total[3]: total[0] = self.iwla._(u'Others') diff --git a/plugins/post_analysis/feeds.py b/plugins/post_analysis/feeds.py index d054aaf..76814df 100644 --- a/plugins/post_analysis/feeds.py +++ b/plugins/post_analysis/feeds.py @@ -78,7 +78,7 @@ class IWLAPostAnalysisFeeds(IPlugin): return True def mergeOneHitOnlyFeedsParsers(self, isFeedParser, one_hit_only, hit): - if isFeedParser and (hit['viewed_hits'] + hit['not_viewed_hits']) == 1: + if isFeedParser and (hit['viewed_hits'][0] + hit['not_viewed_hits'][0]) == 1: user_agent = hit['requests'][0]['http_user_agent'].lower() if one_hit_only.get(user_agent, None) is None: # Merged @@ -117,7 +117,7 @@ class IWLAPostAnalysisFeeds(IPlugin): isFeedParser = self.FEED_PARSER # Robot that views pages -> bot if hit['robot']: - if hit['not_viewed_pages']: + if hit['not_viewed_pages'][0]: isFeedParser = self.NOT_A_FEED_PARSER break if self.merge_one_hit_only_feeds_parsers: diff --git a/plugins/pre_analysis/page_to_hit.py b/plugins/pre_analysis/page_to_hit.py index 9d8765b..1f82771 100644 --- a/plugins/pre_analysis/page_to_hit.py +++ b/plugins/pre_analysis/page_to_hit.py @@ -83,14 +83,17 @@ class IWLAPreAnalysisPageToHit(IPlugin): uri = request['extract_request']['extract_uri'] + day = request['time_decoded'].tm_mday if request['is_page']: # Page to hit for regexp in self.ph_regexps: if regexp.match(uri): self.logger.debug('%s changed from page to hit' % (uri)) request['is_page'] = False - super_hit['viewed_pages'] -= 1 - super_hit['viewed_hits'] += 1 + super_hit['viewed_pages'][day] -= 1 + super_hit['viewed_hits'][day] = super_hit['viewed_hits'].get(day, 0) + 1 + super_hit['viewed_pages'][0] -= 1 + super_hit['viewed_hits'][0] += 1 break else: # Hit to page @@ -98,6 +101,8 @@ class IWLAPreAnalysisPageToHit(IPlugin): if regexp.match(uri): self.logger.debug('%s changed from hit to page' % (uri)) request['is_page'] = True - super_hit['viewed_pages'] += 1 - super_hit['viewed_hits'] -= 1 + super_hit['viewed_pages'][day] = super_hit['viewed_pages'].get(day, 0) + 1 + super_hit['viewed_hits'][day] -= 1 + super_hit['viewed_pages'][0] += 1 + super_hit['viewed_hits'][0] -= 1 break diff --git a/plugins/pre_analysis/robots.py b/plugins/pre_analysis/robots.py index 4bad943..375590e 100644 --- a/plugins/pre_analysis/robots.py +++ b/plugins/pre_analysis/robots.py @@ -104,12 +104,12 @@ class IWLAPreAnalysisRobots(IPlugin): continue # 1) no pages view --> robot - # if not super_hit['viewed_pages']: + # if not super_hit['viewed_pages'][0]: # super_hit['robot'] = 1 # continue # 2) pages without hit --> robot - if not super_hit['viewed_hits']: + if not super_hit['viewed_hits'][0] and super_hit['viewed_pages'][0]: self.logger.debug(super_hit) self._setRobot(k, super_hit) continue @@ -137,7 +137,7 @@ class IWLAPreAnalysisRobots(IPlugin): self._setRobot(k, super_hit) continue - if not super_hit['viewed_pages'] and \ - (super_hit['viewed_hits'] and not referers): + if not super_hit['viewed_pages'][0] and \ + (super_hit['viewed_hits'][0] and not referers): self._setRobot(k, super_hit) continue