Update iwla :
* Rework arg variable management * Manage dry run at top level * 'robot' property is now None by default (allow to do analysis only once) * Add --disable-display option
This commit is contained in:
parent
6500d98bdd
commit
adc04bf753
37
iwla.py
37
iwla.py
|
@ -134,7 +134,7 @@ class IWLA(object):
|
|||
API_VERSION = 1
|
||||
IWLA_VERSION = '0.7'
|
||||
|
||||
def __init__(self, logLevel, dry_run):
|
||||
def __init__(self, logLevel, args):
|
||||
self.meta_infos = {}
|
||||
self.analyse_started = False
|
||||
self.current_analysis = {}
|
||||
|
@ -142,7 +142,7 @@ class IWLA(object):
|
|||
self.cache_plugins = {}
|
||||
self.display = DisplayHTMLBuild(self)
|
||||
self.valid_visitors = None
|
||||
self.dry_run = dry_run
|
||||
self.args = args
|
||||
|
||||
self.log_format_extracted = re.sub(r'([^\$?\w])', r'\\\g<1>', conf.log_format)
|
||||
self.log_format_extracted = re.sub(r'\$(\w+)', '(?P<\g<1>>.+)', self.log_format_extracted)
|
||||
|
@ -161,7 +161,7 @@ class IWLA(object):
|
|||
|
||||
logging.basicConfig(format='%(name)s %(message)s', level=logLevel)
|
||||
self.logger = logging.getLogger(self.__class__.__name__)
|
||||
if self.dry_run:
|
||||
if self.args.dry_run:
|
||||
self.logger.info('==> Start (DRY RUN)')
|
||||
else:
|
||||
self.logger.info('==> Start')
|
||||
|
@ -256,7 +256,8 @@ class IWLA(object):
|
|||
return gzip.open(filename, prot)
|
||||
|
||||
def _serialize(self, obj, filename):
|
||||
if self.dry_run: return
|
||||
if self.args.dry_run: return
|
||||
self.logger.info("==> Serialize to %s" % (filename))
|
||||
base = os.path.dirname(filename)
|
||||
if not os.path.exists(base):
|
||||
os.makedirs(base)
|
||||
|
@ -318,7 +319,8 @@ class IWLA(object):
|
|||
return True
|
||||
|
||||
def isRobot(self, hit):
|
||||
return hit['robot']
|
||||
# By default robot is None
|
||||
return hit['robot'] == True
|
||||
|
||||
def _appendHit(self, hit):
|
||||
remote_addr = hit['remote_addr']
|
||||
|
@ -379,7 +381,7 @@ class IWLA(object):
|
|||
super_hit['bandwidth'] = {0:0}
|
||||
super_hit['last_access'] = self.meta_infos['last_time']
|
||||
super_hit['requests'] = []
|
||||
super_hit['robot'] = False
|
||||
super_hit['robot'] = None
|
||||
super_hit['hit_only'] = 0
|
||||
|
||||
def _normalizeURI(self, uri, removeFileSlash=False):
|
||||
|
@ -578,7 +580,7 @@ class IWLA(object):
|
|||
|
||||
if not os.path.exists(gz_path) or\
|
||||
os.stat(path).st_mtime > os.stat(gz_path).st_mtime:
|
||||
if self.dry_run: return
|
||||
if self.args.dry_run: return
|
||||
with open(path, 'rb') as f_in, gzip.open(gz_path, 'wb') as f_out:
|
||||
f_out.write(f_in.read())
|
||||
|
||||
|
@ -592,6 +594,8 @@ class IWLA(object):
|
|||
break
|
||||
|
||||
def _generateDisplay(self):
|
||||
if self.args.dry_run or\
|
||||
self.args.disable_display: return
|
||||
self._generateDisplayDaysStats()
|
||||
self._callPlugins(conf.DISPLAY_HOOK_DIRECTORY)
|
||||
self._generateDisplayWholeMonthStats()
|
||||
|
@ -639,7 +643,7 @@ class IWLA(object):
|
|||
|
||||
self._callPlugins(conf.POST_HOOK_DIRECTORY)
|
||||
|
||||
if args.display_only:
|
||||
if self.args.display_only:
|
||||
if not 'stats' in self.meta_infos.keys():
|
||||
self.meta_infos['stats'] = {}
|
||||
self._generateDisplay()
|
||||
|
@ -653,7 +657,6 @@ class IWLA(object):
|
|||
|
||||
path = self.getDBFilename(cur_time)
|
||||
|
||||
self.logger.info("==> Serialize to %s" % (path))
|
||||
self._serialize(self.current_analysis, path)
|
||||
|
||||
# Save month stats
|
||||
|
@ -666,7 +669,6 @@ class IWLA(object):
|
|||
self.meta_infos['stats'][year][month] = duplicated_stats
|
||||
|
||||
meta_path = os.path.join(conf.DB_ROOT, conf.META_FILENAME)
|
||||
self.logger.info("==> Serialize to %s" % (meta_path))
|
||||
self._serialize(self.meta_infos, meta_path)
|
||||
|
||||
self._generateDisplay()
|
||||
|
@ -766,8 +768,7 @@ class IWLA(object):
|
|||
if os.path.exists(output_path): shutil.rmtree(output_path)
|
||||
month += 1
|
||||
|
||||
def start(self, _file, args):
|
||||
self.args = args
|
||||
def start(self, _file):
|
||||
self.start_time = datetime.now()
|
||||
|
||||
meta_path = os.path.join(conf.DB_ROOT, conf.META_FILENAME)
|
||||
|
@ -876,7 +877,11 @@ if __name__ == '__main__':
|
|||
|
||||
parser.add_argument('-p', '--display-only', dest='display_only', action='store_true',
|
||||
default=False,
|
||||
help='Only generate display')
|
||||
help='Only generate display (don\'t write database)')
|
||||
|
||||
parser.add_argument('-P', '--disable-display', dest='disable_display', action='store_true',
|
||||
default=False,
|
||||
help='Don\'t generate display')
|
||||
|
||||
parser.add_argument('-D', '--dry-run', dest='dry_run', action='store_true',
|
||||
default=False,
|
||||
|
@ -914,14 +919,14 @@ if __name__ == '__main__':
|
|||
if not isinstance(loglevel, int):
|
||||
raise ValueError('Invalid log level: %s' % (args.loglevel))
|
||||
|
||||
iwla = IWLA(loglevel, args.dry_run)
|
||||
iwla = IWLA(loglevel, args)
|
||||
|
||||
required_conf = ['analyzed_filename', 'domain_name']
|
||||
if not validConfRequirements(required_conf, iwla, 'Main Conf'):
|
||||
sys.exit(0)
|
||||
|
||||
if args.stdin:
|
||||
iwla.start(sys.stdin, args)
|
||||
iwla.start(sys.stdin)
|
||||
else:
|
||||
filename = args.file or conf.analyzed_filename
|
||||
iwla.start(FileIter(filename), args)
|
||||
iwla.start(FileIter(filename))
|
||||
|
|
|
@ -80,10 +80,6 @@ Statistics deletion :
|
|||
"""
|
||||
|
||||
class IWLAPostAnalysisFilterUsers(IPlugin):
|
||||
def __init__(self, iwla):
|
||||
super(IWLAPostAnalysisFilterUsers, self).__init__(iwla)
|
||||
self.API_VERSION = 1
|
||||
|
||||
def _check_filter(self, _filter):
|
||||
if len(_filter) != 3:
|
||||
raise Exception('Bad filter ' + ' '.join(_filter))
|
||||
|
|
|
@ -53,10 +53,6 @@ Statistics deletion :
|
|||
class IWLAPostAnalysisReverseDNS(IPlugin):
|
||||
DEFAULT_DNS_TIMEOUT = 0.5
|
||||
|
||||
def __init__(self, iwla):
|
||||
super(IWLAPostAnalysisReverseDNS, self).__init__(iwla)
|
||||
self.API_VERSION = 1
|
||||
|
||||
def load(self):
|
||||
timeout = self.iwla.getConfValue('reverse_dns_timeout',
|
||||
IWLAPostAnalysisReverseDNS.DEFAULT_DNS_TIMEOUT)
|
||||
|
@ -67,6 +63,8 @@ class IWLAPostAnalysisReverseDNS(IPlugin):
|
|||
hits = self.iwla.getCurrentVisits()
|
||||
for (k, hit) in hits.items():
|
||||
if hit.get('dns_analysed', False): continue
|
||||
# Do reverse for feed parser even if they're not
|
||||
# valid visitors
|
||||
if not hit.get('feed_parser', False) and\
|
||||
not self.iwla.isValidVisitor(hit):
|
||||
continue
|
||||
|
|
Loading…
Reference in New Issue
Block a user