import re from iwla import IWLA from iplugin import IPlugin # # Post analysis hook # # Count TOP downloads # # Plugin requirements : # None # # Conf values needed : # reverse_dns_timeout* # # Output files : # None # # Statistics creation : # None # # Statistics update : # month_stats: # top_downloads => # uri # # Statistics deletion : # None # class IWLAPostAnalysisTopDownloads(IPlugin): def __init__(self, iwla): super(IWLAPostAnalysisTopDownloads, self).__init__(iwla) self.API_VERSION = 1 self.conf_requires = ['multimedia_files', 'viewed_http_codes'] def hook(self): stats = self.iwla.getCurrentVisists() month_stats = self.iwla.getMonthStats() multimedia_files = self.iwla.getConfValue('multimedia_files') viewed_http_codes = self.iwla.getConfValue('viewed_http_codes') top_downloads = month_stats.get('top_downloads', {}) for (k, super_hit) in stats.items(): if super_hit['robot']: continue for r in super_hit['requests']: if not self.iwla.isValidForCurrentAnalysis(r) or\ not self.iwla.hasBeenViewed(r): continue if r['is_page']: continue if not int(r['status']) in viewed_http_codes: continue uri = r['extract_request']['extract_uri'].lower() isMultimedia = False for ext in multimedia_files: if uri.endswith(ext): isMultimedia = True break if isMultimedia: continue uri = "%s%s" % (r.get('server_name', ''), r['extract_request']['extract_uri']) if not uri in top_downloads.keys(): top_downloads[uri] = 1 else: top_downloads[uri] += 1 month_stats['top_downloads'] = top_downloads