From adc04bf753a138913eb946608a0b0894a826eb13 Mon Sep 17 00:00:00 2001 From: Gregory Soutade Date: Sat, 11 Mar 2023 20:51:44 +0100 Subject: [PATCH] Update iwla : * Rework arg variable management * Manage dry run at top level * 'robot' property is now None by default (allow to do analysis only once) * Add --disable-display option --- iwla.py | 37 +++++++++++++++------------ plugins/post_analysis/filter_users.py | 4 --- plugins/post_analysis/reverse_dns.py | 6 ++--- 3 files changed, 23 insertions(+), 24 deletions(-) diff --git a/iwla.py b/iwla.py index 704cd8f..5cf0c5c 100755 --- a/iwla.py +++ b/iwla.py @@ -134,7 +134,7 @@ class IWLA(object): API_VERSION = 1 IWLA_VERSION = '0.7' - def __init__(self, logLevel, dry_run): + def __init__(self, logLevel, args): self.meta_infos = {} self.analyse_started = False self.current_analysis = {} @@ -142,7 +142,7 @@ class IWLA(object): self.cache_plugins = {} self.display = DisplayHTMLBuild(self) self.valid_visitors = None - self.dry_run = dry_run + self.args = args self.log_format_extracted = re.sub(r'([^\$?\w])', r'\\\g<1>', conf.log_format) self.log_format_extracted = re.sub(r'\$(\w+)', '(?P<\g<1>>.+)', self.log_format_extracted) @@ -161,7 +161,7 @@ class IWLA(object): logging.basicConfig(format='%(name)s %(message)s', level=logLevel) self.logger = logging.getLogger(self.__class__.__name__) - if self.dry_run: + if self.args.dry_run: self.logger.info('==> Start (DRY RUN)') else: self.logger.info('==> Start') @@ -256,7 +256,8 @@ class IWLA(object): return gzip.open(filename, prot) def _serialize(self, obj, filename): - if self.dry_run: return + if self.args.dry_run: return + self.logger.info("==> Serialize to %s" % (filename)) base = os.path.dirname(filename) if not os.path.exists(base): os.makedirs(base) @@ -318,7 +319,8 @@ class IWLA(object): return True def isRobot(self, hit): - return hit['robot'] + # By default robot is None + return hit['robot'] == True def _appendHit(self, hit): remote_addr = hit['remote_addr'] @@ -379,7 +381,7 @@ class IWLA(object): super_hit['bandwidth'] = {0:0} super_hit['last_access'] = self.meta_infos['last_time'] super_hit['requests'] = [] - super_hit['robot'] = False + super_hit['robot'] = None super_hit['hit_only'] = 0 def _normalizeURI(self, uri, removeFileSlash=False): @@ -578,7 +580,7 @@ class IWLA(object): if not os.path.exists(gz_path) or\ os.stat(path).st_mtime > os.stat(gz_path).st_mtime: - if self.dry_run: return + if self.args.dry_run: return with open(path, 'rb') as f_in, gzip.open(gz_path, 'wb') as f_out: f_out.write(f_in.read()) @@ -592,6 +594,8 @@ class IWLA(object): break def _generateDisplay(self): + if self.args.dry_run or\ + self.args.disable_display: return self._generateDisplayDaysStats() self._callPlugins(conf.DISPLAY_HOOK_DIRECTORY) self._generateDisplayWholeMonthStats() @@ -639,7 +643,7 @@ class IWLA(object): self._callPlugins(conf.POST_HOOK_DIRECTORY) - if args.display_only: + if self.args.display_only: if not 'stats' in self.meta_infos.keys(): self.meta_infos['stats'] = {} self._generateDisplay() @@ -653,7 +657,6 @@ class IWLA(object): path = self.getDBFilename(cur_time) - self.logger.info("==> Serialize to %s" % (path)) self._serialize(self.current_analysis, path) # Save month stats @@ -666,7 +669,6 @@ class IWLA(object): self.meta_infos['stats'][year][month] = duplicated_stats meta_path = os.path.join(conf.DB_ROOT, conf.META_FILENAME) - self.logger.info("==> Serialize to %s" % (meta_path)) self._serialize(self.meta_infos, meta_path) self._generateDisplay() @@ -766,8 +768,7 @@ class IWLA(object): if os.path.exists(output_path): shutil.rmtree(output_path) month += 1 - def start(self, _file, args): - self.args = args + def start(self, _file): self.start_time = datetime.now() meta_path = os.path.join(conf.DB_ROOT, conf.META_FILENAME) @@ -876,7 +877,11 @@ if __name__ == '__main__': parser.add_argument('-p', '--display-only', dest='display_only', action='store_true', default=False, - help='Only generate display') + help='Only generate display (don\'t write database)') + + parser.add_argument('-P', '--disable-display', dest='disable_display', action='store_true', + default=False, + help='Don\'t generate display') parser.add_argument('-D', '--dry-run', dest='dry_run', action='store_true', default=False, @@ -914,14 +919,14 @@ if __name__ == '__main__': if not isinstance(loglevel, int): raise ValueError('Invalid log level: %s' % (args.loglevel)) - iwla = IWLA(loglevel, args.dry_run) + iwla = IWLA(loglevel, args) required_conf = ['analyzed_filename', 'domain_name'] if not validConfRequirements(required_conf, iwla, 'Main Conf'): sys.exit(0) if args.stdin: - iwla.start(sys.stdin, args) + iwla.start(sys.stdin) else: filename = args.file or conf.analyzed_filename - iwla.start(FileIter(filename), args) + iwla.start(FileIter(filename)) diff --git a/plugins/post_analysis/filter_users.py b/plugins/post_analysis/filter_users.py index 3b16407..e5e0138 100644 --- a/plugins/post_analysis/filter_users.py +++ b/plugins/post_analysis/filter_users.py @@ -80,10 +80,6 @@ Statistics deletion : """ class IWLAPostAnalysisFilterUsers(IPlugin): - def __init__(self, iwla): - super(IWLAPostAnalysisFilterUsers, self).__init__(iwla) - self.API_VERSION = 1 - def _check_filter(self, _filter): if len(_filter) != 3: raise Exception('Bad filter ' + ' '.join(_filter)) diff --git a/plugins/post_analysis/reverse_dns.py b/plugins/post_analysis/reverse_dns.py index 37ae232..b1779c4 100644 --- a/plugins/post_analysis/reverse_dns.py +++ b/plugins/post_analysis/reverse_dns.py @@ -53,10 +53,6 @@ Statistics deletion : class IWLAPostAnalysisReverseDNS(IPlugin): DEFAULT_DNS_TIMEOUT = 0.5 - def __init__(self, iwla): - super(IWLAPostAnalysisReverseDNS, self).__init__(iwla) - self.API_VERSION = 1 - def load(self): timeout = self.iwla.getConfValue('reverse_dns_timeout', IWLAPostAnalysisReverseDNS.DEFAULT_DNS_TIMEOUT) @@ -67,6 +63,8 @@ class IWLAPostAnalysisReverseDNS(IPlugin): hits = self.iwla.getCurrentVisits() for (k, hit) in hits.items(): if hit.get('dns_analysed', False): continue + # Do reverse for feed parser even if they're not + # valid visitors if not hit.get('feed_parser', False) and\ not self.iwla.isValidVisitor(hit): continue