#!/usr/bin/env python3
# -*- coding: utf-8 -*-
#
# Copyright Grégory Soutadé 2015

# This file is part of iwla

# iwla is free software: you can redistribute it and/or modify
# it under the terms of the GNU General Public License as published by
# the Free Software Foundation, either version 3 of the License, or
# (at your option) any later version.
#
# iwla is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
# GNU General Public License for more details.
#
# You should have received a copy of the GNU General Public License
# along with iwla.  If not, see <http://www.gnu.org/licenses/>.
#

import os
import shutil
import sys
import re
import time
import pickle
import gzip
import importlib 
import argparse
import logging
import gettext
from calendar import monthrange
from datetime import date, datetime

import default_conf as conf

from iplugin import *
from display import *

"""
Main class IWLA
Parse Log, compute them, call plugins and produce output
For now, only HTTP log are valid

Plugin requirements :
    None

Conf values needed :
    analyzed_filename
    domain_name
    locales_path
    compress_output_files
    excluded_ip

Output files :
    DB_ROOT/meta.db
    DB_ROOT/year/month/iwla.db
    OUTPUT_ROOT/index.html
    OUTPUT_ROOT/year/_stats.html
    OUTPUT_ROOT/year/month/index.html

Statistics creation :

meta :
    last_time
    start_analysis_time
    stats =>
        year =>
            month =>
                viewed_bandwidth
                not_viewed_bandwidth
                viewed_pages
                viewed_hits
                nb_visits
                nb_visitors

month_stats :
    viewed_bandwidth
    not_viewed_bandwidth
    viewed_pages
    viewed_hits
    nb_visits

days_stats :
    day =>
        viewed_bandwidth
        not_viewed_bandwidth
        viewed_pages
        viewed_hits
        nb_visits
        nb_visitors

visits :
   remote_addr =>
       remote_addr
       remote_ip
       viewed_pages{0..31} # 0 contains total
       viewed_hits{0..31} # 0 contains total
       not_viewed_pages{0..31}
       not_viewed_hits{0..31}
       bandwidth{0..31}
       last_access
       requests =>
           [fields_from_format_log]
           extract_request =>
               http_method
               http_uri
               http_version
               extract_uri
               extract_parameters*
           extract_referer* =>
               extract_uri
               extract_parameters*
       robot
       hit_only
       is_page
       keep_requests

valid_visitors:
    month_stats without robot and hit only visitors (if not conf.count_hit_only_visitors)

Statistics update :
    None

Statistics deletion :
    None
"""


class IWLA(object):

    ANALYSIS_CLASS = 'HTTP'
    API_VERSION = 1
    IWLA_VERSION = '0.7'

    def __init__(self, logLevel, dry_run):
        self.meta_infos = {}
        self.analyse_started = False
        self.current_analysis = {}
        self.start_time = 0
        self.cache_plugins = {}
        self.display = DisplayHTMLBuild(self)
        self.valid_visitors = None
        self.dry_run = dry_run

        self.log_format_extracted = re.sub(r'([^\$?\w])', r'\\\g<1>', conf.log_format)
        self.log_format_extracted = re.sub(r'\$(\w+)', '(?P<\g<1>>.+)', self.log_format_extracted)
        self.http_request_extracted = re.compile(r'(?P<http_method>\S+) (?P<http_uri>\S+) (?P<http_version>\S+)')
        self.log_re = re.compile(self.log_format_extracted)
        self.uri_re = re.compile(r'(?P<extract_uri>[^\?#]+)(\?(?P<extract_parameters>[^#]+))?(#.*)?')
        self.domain_name_re = re.compile(r'.*%s' % conf.domain_name)
        self.slash_re = re.compile(r'//')
        self.protocol_re = re.compile(r'^.*://')
        self.excluded_ip = []
        for ip in conf.excluded_ip:
            self.excluded_ip += [re.compile(ip)]
        self.plugins = [(conf.PRE_HOOK_DIRECTORY     , conf.pre_analysis_hooks),
                        (conf.POST_HOOK_DIRECTORY    , conf.post_analysis_hooks),
                        (conf.DISPLAY_HOOK_DIRECTORY , conf.display_hooks)]

        logging.basicConfig(format='%(name)s %(message)s', level=logLevel)
        self.logger = logging.getLogger(self.__class__.__name__)
        if self.dry_run:
            self.logger.info('==> Start (DRY RUN)')
        else:
            self.logger.info('==> Start')
        try:
            t = gettext.translation('iwla', localedir=conf.locales_path, languages=[conf.locale])
            self.logger.info('\tUsing locale %s' % (conf.locale))
        except IOError:
            t = gettext.NullTranslations()
            self.logger.info('\tUsing default locale en_EN')
        self._ = t.gettext

    def getVersion(self):
        return IWLA.IWLA_VERSION

    def getConfValue(self, key, default=None):
        if not key in dir(conf):
            return default
        else:
            return conf.__dict__[key]

    def _clearVisits(self):
        self.current_analysis = {
            'days_stats' : {},
            'month_stats' : {},
            'visits' : {}
            }
        self.valid_visitors = None
        return self.current_analysis

    def getDaysStats(self):
        return self.current_analysis['days_stats']

    def getMonthStats(self):
        return self.current_analysis['month_stats']

    def getCurrentVisits(self):
        return self.current_analysis['visits']

    def getSortedCurrentVisits(self):
        visits = self.current_analysis['visits'].values()
        return sorted(visits, key=lambda hit: hit['last_access'])

    def getValidVisitors(self):
        return self.valid_visitors

    def getDisplay(self):
        return self.display

    def getCurTime(self):
        return self.meta_infos['last_time']

    def getStartAnalysisTime(self):
        return self.meta_infos['start_analysis_time']

    def isValidForCurrentAnalysis(self, request):
        cur_time = self.meta_infos['start_analysis_time']
        # Analyse not started
        if not cur_time: return False
        return (time.mktime(cur_time) < time.mktime(request['time_decoded']))

    def hasBeenViewed(self, request):
        return int(request['status']) in conf.viewed_http_codes

    def getCurDisplayPath(self, filename):
        cur_time = self.meta_infos['last_time']
        return os.path.join(str(cur_time.tm_year), '%02d' % (cur_time.tm_mon), filename)

    def getResourcesPath(self):
        return conf.resources_path

    def getCSSPath(self):
        return conf.css_path

    def _clearMeta(self):
        self.meta_infos = {
            'last_time' : None,
            'start_analysis_time' : None
            }
        return self.meta_infos

    def _clearDisplay(self):
        self.display.clear()
        return self.display

    def getDBFilename(self, time):
        return os.path.join(conf.DB_ROOT, str(time.tm_year), '%02d' % (time.tm_mon), conf.DB_FILENAME)

    def _openDB(self, filename, prot='r'):
        if self.args.dont_compress:
            return open(filename, prot)
        else:
            return gzip.open(filename, prot)

    def _serialize(self, obj, filename):
        if self.dry_run: return
        base = os.path.dirname(filename)
        if not os.path.exists(base):
            os.makedirs(base)

        # Make a backup in case of something fails
        if os.path.exists(filename):
            shutil.copy(filename, filename + '.bak')
            
        with open(filename + '.tmp', 'wb+') as f, self._openDB(filename, 'w') as fzip:
            pickle.dump(obj, f)
            f.seek(0)
            fzip.write(f.read())
            os.fsync(fzip)
        os.remove(filename + '.tmp')
        if os.path.exists(filename + '.bak'):
            os.remove(filename + '.bak')

    def _deserialize(self, filename):
        if not os.path.exists(filename):
            return None

        res = None
        with self._openDB(filename) as f:
            res = pickle.load(f)
        return res

    def _callPlugins(self, target_root, *args):
        self.logger.info('==> Call plugins (%s)' % (target_root))
        for (root, plugins) in self.plugins:
            if root != target_root: continue
            for p in plugins:
                mod = self.cache_plugins.get(root + '.' + p, None)
                if mod:
                    self.logger.info('\t%s' % (p))
                    mod.hook(*args)

    def isPage(self, request):
        self.logger.debug("Is page %s" % (request))
        for e in conf.pages_extensions:
            if request.endswith(e):
                self.logger.debug("True")
                return True
        self.logger.debug("False")
        return False

    def isMultimediaFile(self, request):
        self.logger.debug("Is multimedia %s" % (request))
        for e in conf.multimedia_files:
            if request.lower().endswith(e):
                self.logger.debug("True")
                return True
        self.logger.debug("False")
        return False

    def isValidVisitor(self, hit):
        if hit['robot']: return False
        if not conf.count_hit_only_visitors and not hit['viewed_pages'][0]:
            return False
        return True

    def isRobot(self, hit):
        return hit['robot']

    def _appendHit(self, hit):
        remote_addr = hit['remote_addr']
	
        if not remote_addr: return

        for ip in self.excluded_ip:
            if ip.match(remote_addr):
                return

        # Redirected page/hit
        if int(hit['status']) in (301, 302, 307, 308):
            return

        if not remote_addr in self.current_analysis['visits'].keys():
            self._createVisitor(hit)
	    
        super_hit = self.current_analysis['visits'][remote_addr]
        # Don't keep all requests for robots
        if not super_hit['robot']:
            super_hit['requests'].append(hit)

        day = self.meta_infos['last_time'].tm_mday
        if self.hasBeenViewed(hit):
            super_hit['bandwidth'][day] = super_hit['bandwidth'].get(day, 0) + int(hit['body_bytes_sent'])
            super_hit['bandwidth'][0] += int(hit['body_bytes_sent'])
            super_hit['last_access'] = self.meta_infos['last_time']

        request = hit['extract_request']

        uri = request.get('extract_uri', request['http_uri'])

        hit['is_page'] = self.isPage(uri)

        if super_hit['robot'] or\
           not self.hasBeenViewed(hit):
            page_key = 'not_viewed_pages'
            hit_key  = 'not_viewed_hits'
        else:
            page_key = 'viewed_pages'
            hit_key  = 'viewed_hits'

        if hit['is_page']:
            super_hit[page_key][day] = super_hit[page_key].get(day, 0) + 1
            super_hit[page_key][0] += 1
        else:
            super_hit[hit_key][day] = super_hit[hit_key].get(day, 0) + 1
            super_hit[hit_key][0] += 1

    def _createVisitor(self, hit):
        super_hit = self.current_analysis['visits'][hit['remote_addr']] = {}
        super_hit['remote_addr'] = hit['remote_addr']
        super_hit['remote_ip'] = hit['remote_addr']
        super_hit['viewed_pages'] = {0:0}
        super_hit['viewed_hits'] = {0:0}
        super_hit['not_viewed_pages'] = {0:0}
        super_hit['not_viewed_hits'] = {0:0}
        super_hit['bandwidth'] = {0:0}
        super_hit['last_access'] = self.meta_infos['last_time']
        super_hit['requests'] = []
        super_hit['robot'] = False
        super_hit['hit_only'] = 0

    def _normalizeURI(self, uri, removeFileSlash=False):
        if uri == '/': return uri
        # Remove protocol
        uri = self.protocol_re.sub('', uri)
        # Remove double /
        uri = self.slash_re.sub('/', uri)
        if removeFileSlash and uri[-1] == '/':
            uri = uri[:-1]
        return uri

    def _normalizeParameters(self, parameters):
        # No parameters
        if parameters == '?': return None
        return parameters

    def _decodeHTTPRequest(self, hit):
        if not 'request' in hit.keys(): return False

        groups = self.http_request_extracted.match(hit['request'])

        if groups:
            hit['extract_request'] = groups.groupdict("")
            uri_groups = self.uri_re.match(hit['extract_request']['http_uri'])
            if uri_groups:
                d = uri_groups.groupdict("")
                hit['extract_request']['extract_uri'] = self._normalizeURI(d['extract_uri'])
                if 'extract_parameters' in d.keys():
                    parameters = self._normalizeParameters(d['extract_parameters'])
                    if parameters:
                        hit['extract_request']['extract_parameters'] = parameters
        else:
            self.logger.warning("Bad request extraction %s" % (hit['request']))
            return False

        if hit['http_referer']:
            referer_groups = self.uri_re.match(hit['http_referer'])
            if referer_groups:
                hit['extract_referer'] = referer_groups.groupdict("")
                hit['extract_referer']['extract_uri'] = self._normalizeURI(hit['extract_referer']['extract_uri'], True)
                hit['extract_referer']['extract_parameters'] = self._normalizeParameters(hit['extract_referer']['extract_parameters'])
        return True

    def _decodeTime(self, hit):
        try:
            hit['time_decoded'] = time.strptime(hit['time_local'], conf.time_format)
        except ValueError as e:
            if sys.version_info < (3, 2):
                # Try without UTC value at the end (%z not recognized)
                gmt_offset_str = hit['time_local'][-5:]
                gmt_offset_hours = int(gmt_offset_str[1:3])*60*60
                gmt_offset_minutes = int(gmt_offset_str[3:5])*60
                gmt_offset = gmt_offset_hours + gmt_offset_minutes
                hit['time_decoded'] = time.strptime(hit['time_local'][:-6], conf.time_format[:-3])
                # if gmt_offset_str[0] == '-':
                #     hit['time_decoded'] = time.localtime(time.mktime(hit['time_decoded'])+gmt_offset)
                # else:
                #     hit['time_decoded'] = time.localtime(time.mktime(hit['time_decoded'])-gmt_offset)
            else:
                raise e
        return hit['time_decoded']

    def getDisplayIndex(self):
        cur_time = self.meta_infos['last_time']
        filename = self.getCurDisplayPath('index.html')

        return self.display.getPage(filename)

    def _generateDisplayDaysStats(self):
        cur_time = self.meta_infos['last_time']
        title = createCurTitle(self, self._('Statistics'))
        filename = self.getCurDisplayPath('index.html')
        self.logger.info('==> Generate display (%s)' % (filename))
        page = self.display.createPage(title, filename, conf.css_path)
        link = DisplayHTMLRaw(self, '<iframe src="../_stats.html"></iframe>')
        page.appendBlock(link)

        months_name = ['', self._('Jan'), self._('Feb'), self._('Mar'), self._('Apr'), self._('May'), self._('June'), self._('Jul'), self._('Aug'), self._('Sep'), self._('Oct'), self._('Nov'), self._('Dec')]
        _, nb_month_days = monthrange(cur_time.tm_year, cur_time.tm_mon)
        days = self.display.createBlock(DisplayHTMLBlockTableWithGraph, self._('By day'), [self._('Day'), self._('Visits'), self._('Pages'), self._('Hits'), self._('Bandwidth'), self._('Not viewed Bandwidth')], None, nb_month_days, range(1,6), [4, 5])
        days.setColsCSSClass(['', 'iwla_visit', 'iwla_page', 'iwla_hit', 'iwla_bandwidth', 'iwla_bandwidth'])
        nb_visits = 0
        nb_days = 0
        for i in range(1, nb_month_days+1):
            month = months_name[int(time.strftime('%m', cur_time), 10)]
            day = '%d<br/>%s' % (i, month)
            full_day = '%02d %s %d' % (i, month, cur_time.tm_year)
            if i in self.current_analysis['days_stats'].keys():
                stats = self.current_analysis['days_stats'][i]
                row = [full_day, stats['nb_visits'], stats['viewed_pages'], stats['viewed_hits'],
                       stats['viewed_bandwidth'], stats['not_viewed_bandwidth']]
                nb_visits += stats['nb_visits']
                nb_days += 1
            else:
                row = [full_day, 0, 0, 0, 0, 0]
            days.appendRow(row)
            viewed_bandwidth = row[4]
            not_viewed_bandwidth = row[5]
            days.setCellValue(i-1, 4, viewed_bandwidth)
            days.setCellValue(i-1, 5, not_viewed_bandwidth)
            days.appendShortTitle(day)
            adate = date(cur_time.tm_year, cur_time.tm_mon, i)
            week_day = adate.weekday()
            if week_day == 5 or week_day == 6:
                days.setRowCSSClass(i-1, 'iwla_weekend')
            if adate == date.today():
                css = days.getCellCSSClass(i-1, 0)
                if css: css = '%s %s' % (css, 'iwla_curday')
                else: css = 'iwla_curday'
                days.setCellCSSClass(i-1, 0, css)

        stats = self.current_analysis['month_stats']

        row = [0, nb_visits, stats['viewed_pages'], stats['viewed_hits'], stats['viewed_bandwidth'], stats['not_viewed_bandwidth']]
        if nb_days:
            average_row = list(map(lambda v: int(v/nb_days), row))
        else:
            average_row = list(map(lambda v: 0, row))

        average_row[0] = self._('Average')
        days.appendRow(average_row)

        row[0] = self._('Total')
        days.appendRow(row)
        page.appendBlock(days)
        self.display.addPage(page)

    def _generateDisplayMonthStats(self, page, year, month_stats):
        cur_time = time.localtime()
        months_name = ['', self._('Jan'), self._('Feb'), self._('Mar'), self._('Apr'), self._('May'), self._('June'), self._('Jul'), self._('Aug'), self._('Sep'), self._('Oct'), self._('Nov'), self._('Dec')]
        title = '%s %d' % (self._('Summary'), year)
        cols = [self._('Month'), self._('Visitors'), self._('Visits'), self._('Pages'), self._('Hits'), self._('Bandwidth'), self._('Not viewed Bandwidth')]
        graph_cols=range(1,6)
        months = self.display.createBlock(DisplayHTMLBlockTableWithGraph, title, cols, None, 12, graph_cols, [5, 6])
        months.setColsCSSClass(['', 'iwla_visitor', 'iwla_visit', 'iwla_page', 'iwla_hit', 'iwla_bandwidth', 'iwla_bandwidth'])
        total = [0] * len(cols)
        for i in range(1, 13):
            month = '%s<br/>%d' % (months_name[i], year)
            full_month = '%s %d' % (months_name[i], year)
            link_month = '<a target="_top" href="/%d/%02d/index.html">%s</a>' % (year, i, full_month)
            if i in month_stats.keys():
                stats = month_stats[i]
                row = [link_month, stats['nb_visitors'], stats['nb_visits'], stats['viewed_pages'], stats['viewed_hits'],
                       stats['viewed_bandwidth'], stats['not_viewed_bandwidth']]
                for j in graph_cols:
                    total[j] += row[j]
            else:
                row = [full_month, 0, 0, 0, 0, 0, 0]
            months.appendRow(row)
            months.appendShortTitle(month)
            if year == cur_time.tm_year and i == cur_time.tm_mon:
                css = months.getCellCSSClass(i-1, 0)
                if css: css = '%s %s' % (css, 'iwla_curday')
                else: css = 'iwla_curday'
                months.setCellCSSClass(i-1, 0, css)

        total[0] = self._('Total')
        months.appendRow(total)
        page.appendBlock(months)

        filename = '%d/_stats.html' % (year)
        page_ = self.display.createPage(u'', filename, conf.css_path)
        page_.appendBlock(months)
        page_.build(conf.DISPLAY_ROOT, False)
        months.resetHTML()

    def _generateDisplayWholeMonthStats(self):
        title = '%s %s' % (self._('Statistics for'), conf.domain_name)
        filename = 'index.html'

        self.logger.info('==> Generate main page (%s)' % (filename))

        page = self.display.createPage(title, filename, conf.css_path)

        last_update = u'<b>%s</b> %s<br />' % (self._(u'Last update'), time.strftime('%02d %b %Y %H:%M', time.localtime()))
        page.appendBlock(self.display.createBlock(DisplayHTMLRaw, last_update))
        duration = datetime.now() - self.start_time
        duration = time.gmtime(duration.seconds)
        time_analysis = u'<b>%s</b> ' % (self._('Time analysis'))
        if duration.tm_hour:
            time_analysis += u'%d %s, ' % (duration.tm_hour, self._(u'hours'))
        time_analysis += u'%d %s and %d %s<br />' % (duration.tm_min, self._(u'minutes'), duration.tm_sec, self._(u'seconds'))
        page.appendBlock(self.display.createBlock(DisplayHTMLRaw, time_analysis))

        for year in sorted(self.meta_infos['stats'].keys(), reverse=True):
            self._generateDisplayMonthStats(page, year, self.meta_infos['stats'][year])

        self.display.addPage(page)

    def _compressFile(self, root, filename):
        path = os.path.join(root, filename)
        gz_path = path + '.gz'

        self.logger.debug('Compress %s => %s' % (path, gz_path))

        if not os.path.exists(gz_path) or\
           os.stat(path).st_mtime > os.stat(gz_path).st_mtime:
            if self.dry_run: return
            with open(path, 'rb') as f_in, gzip.open(gz_path, 'wb') as f_out:
                f_out.write(f_in.read())

    def _compressFiles(self, root):
        if not conf.compress_output_files: return
        for rootdir, subdirs, files in os.walk(root, followlinks=True):
            for f in files:
                for ext in conf.compress_output_files:
                    if f.endswith(ext):
                        self._compressFile(rootdir, f)
                        break
                    
    def _generateDisplay(self):
        self._generateDisplayDaysStats()
        self._callPlugins(conf.DISPLAY_HOOK_DIRECTORY)
        self._generateDisplayWholeMonthStats()
        self.display.build(conf.DISPLAY_ROOT)
        self._compressFiles(conf.DISPLAY_ROOT)

    def _createEmptyStats(self):
        stats = {}
        stats['viewed_bandwidth'] = 0
        stats['not_viewed_bandwidth'] = 0
        stats['viewed_pages'] = 0
        stats['viewed_hits'] = 0
        stats['nb_visits'] = 0

        return stats

    def _generateMonthStats(self):
        self._clearDisplay()

        visits = self.current_analysis['visits']

        stats = self._createEmptyStats()
        for (day, stat) in self.current_analysis['days_stats'].items():
            for k in stats.keys():
                stats[k] += stat[k]

        duplicated_stats = {k:v for (k,v) in stats.items()}

        cur_time = self.meta_infos['last_time']
        self.logger.info("== Stats for %d/%02d ==" % (cur_time.tm_year, cur_time.tm_mon))
        self.logger.info(stats)

        if not 'month_stats' in self.current_analysis.keys():
            self.current_analysis['month_stats'] = stats
        else:
            for (k,v) in stats.items():
                self.current_analysis['month_stats'][k] = v

        self.valid_visitors = {}
        for (k,v) in visits.items():
            if self.isValidVisitor(v):
                self.valid_visitors[k] = v

        duplicated_stats['nb_visitors'] = stats['nb_visitors'] = len(self.valid_visitors.keys())

        self._callPlugins(conf.POST_HOOK_DIRECTORY)

        if args.display_only:
            if not 'stats' in self.meta_infos.keys():
                self.meta_infos['stats'] = {}
            self._generateDisplay()
            return

        for (k,v) in visits.items():
            # Keep at least one request (for referers...)
            if not v.get('keep_requests', conf.keep_requests):
                if len(v['requests']) > 1:
                    v['requests'] = [v['requests'][0]]
                    
        path = self.getDBFilename(cur_time)

        self.logger.info("==> Serialize to %s" % (path))
        self._serialize(self.current_analysis, path)

        # Save month stats
        year = cur_time.tm_year
        month = cur_time.tm_mon
        if not 'stats' in self.meta_infos.keys():
            self.meta_infos['stats'] = {}
        if not year in self.meta_infos['stats'].keys():
            self.meta_infos['stats'][year] = {}
        self.meta_infos['stats'][year][month] = duplicated_stats

        meta_path = os.path.join(conf.DB_ROOT, conf.META_FILENAME)
        self.logger.info("==> Serialize to %s" % (meta_path))
        self._serialize(self.meta_infos, meta_path)

        self._generateDisplay()

    def _generateDayStats(self):
        self._callPlugins(conf.PRE_HOOK_DIRECTORY)

        visits = self.current_analysis['visits']
        cur_time = self.meta_infos['last_time']
        stats = self._createEmptyStats()

        day = cur_time.tm_mday
        for (k, super_hit) in visits.items():
            if super_hit['last_access'].tm_mday != day:
                continue
            if super_hit['robot']:
                stats['not_viewed_bandwidth'] += super_hit['bandwidth'].get(day, 0)
                continue
            stats['viewed_bandwidth'] += super_hit['bandwidth'].get(day, 0)
            stats['viewed_hits'] += super_hit['viewed_hits'].get(day, 0)
            stats['viewed_pages'] += super_hit['viewed_pages'].get(day, 0)
            if ((conf.count_hit_only_visitors and super_hit['viewed_hits'].get(day, 0)) or\
                super_hit['viewed_pages'].get(day, 0)):
                stats['nb_visits'] += 1

        self.logger.info("== Stats for %d/%02d/%02d ==" % (cur_time.tm_year, cur_time.tm_mon, cur_time.tm_mday))
        self.logger.info(stats)

        self.current_analysis['days_stats'][cur_time.tm_mday] = stats

    def _newHit(self, hit):
        if not self.domain_name_re.match(hit['server_name']):
            self.logger.debug("Not in domain %s" % (hit))
            return False

        t = self._decodeTime(hit)

        cur_time = self.meta_infos['last_time']

        if cur_time == None:
            self.current_analysis = self._deserialize(self.getDBFilename(t)) or self._clearVisits()
            self.analyse_started = True
        else:
            if not self.analyse_started and\
               time.mktime(t) <= time.mktime(cur_time):
                self.logger.debug("Not in time")
                return False
            self.analyse_started = True
            if t < cur_time: # Don't accept past hits
                return False
            if cur_time.tm_mon != t.tm_mon:
                self._generateDayStats()
                self._generateMonthStats()
                self.current_analysis = self._deserialize(self.getDBFilename(t)) or self._clearVisits()
            elif cur_time.tm_mday != t.tm_mday:
                self._generateDayStats()

        self.meta_infos['last_time'] = t

        if not self.meta_infos['start_analysis_time']:
            self.meta_infos['start_analysis_time'] = t

        if not self._decodeHTTPRequest(hit): return False

        if hit['extract_request']['http_method'] not in ['GET', 'POST']:
            return False

        for k in hit.keys():
            if hit[k] == '-' or hit[k] == '*':
                hit[k] = ''
                
        self._appendHit(hit)

        return True

    def _reset(self):
        reset_time = time.strptime(self.args.reset, '%m/%Y')

        self.logger.info('Reset time')
        self.logger.info(reset_time)

        self.meta_infos['last_time'] = reset_time

        cur_time = time.localtime()
        year = reset_time.tm_year
        while year < cur_time.tm_year:
            db_path = os.path.join(conf.DB_ROOT, str(year))
            if os.path.exists(db_path): shutil.rmtree(db_path)
            output_path = os.path.join(conf.DISPLAY_ROOT, str(year))
            if os.path.exists(output_path): shutil.rmtree(output_path)
            year += 1
        month = reset_time.tm_mon
        while month <= cur_time.tm_mon:
            db_path = os.path.join(conf.DB_ROOT, str(year), '%02d' % (month))
            if os.path.exists(db_path): shutil.rmtree(db_path)
            output_path = os.path.join(conf.DISPLAY_ROOT, str(year), '%02d' % (month))
            if os.path.exists(output_path): shutil.rmtree(output_path)
            month += 1

    def start(self, _file, args):
        self.args = args
        self.start_time = datetime.now()
    
        meta_path = os.path.join(conf.DB_ROOT, conf.META_FILENAME)
        if os.path.exists(meta_path):
            self.logger.info('==> Load previous database')

        self.meta_infos = self._deserialize(meta_path) or self._clearMeta()
        if self.meta_infos['last_time']:
            if args.reset:
                self._reset()
            self.logger.info('Last time')
            self.logger.info(self.meta_infos['last_time'])
            self.current_analysis = self._deserialize(self.getDBFilename(self.meta_infos['last_time'])) or self._clearVisits()
        else:
            self._clearVisits()

        self.meta_infos['start_analysis_time'] = None

        self.cache_plugins = preloadPlugins(self.plugins, self)

        self.logger.info('==> Analysing log')

        for l in _file:
            # print "line " + l

            groups = self.log_re.match(l)

            if groups:
                self._newHit(groups.groupdict(""))
            else:
                self.logger.warning("No match for %s" % (l))
                #break

        if self.analyse_started:
            self._generateDayStats()
            self._generateMonthStats()
            del self.meta_infos['start_analysis_time']
        else:
            self.logger.info('==> Analyse not started : nothing new')


class FileIter(object):
    def __init__(self, filenames):
        self.filenames = [f for f in filenames.split(',') if f]
        for f in self.filenames:
            if not os.path.exists(f):
                print('No such file \'%s\'' % (f))
                sys.exit(-1)
        self.cur_file = None
        self._openNextFile()

    def __iter__(self):
        return self

    def __next__(self):
        return self.next()

    def _openNextFile(self):
        if self.cur_file:
            self.cur_file.close()
            self.cur_file = None
        if not self.filenames:
            raise StopIteration()
        filename = self.filenames.pop(0)
        if filename.endswith('gz'):
            self.cur_file = gzip.open(filename, 'rt')
        else:
            self.cur_file = open(filename, 'rt')

    def next(self):
        l = self.cur_file.readline()
        if not l:
            self._openNextFile()
            l = self.cur_file.readline()
        return l[:-1]
    
if __name__ == '__main__':
    parser = argparse.ArgumentParser(description='Intelligent Web Log Analyzer')

    parser.add_argument('-c', '--config-file', dest='config_file',
                        default='conf.py', type=str,
                        help='Config file to use (default conf.py)')

    parser.add_argument('-C', '--clean-output', dest='clean_output', action='store_true',
                        default=False,
                        help='Clean output before starting')

    parser.add_argument('-i', '--stdin', dest='stdin', action='store_true',
                        default=False,
                        help='Read data from stdin instead of conf.analyzed_filename')

    parser.add_argument('-f', '--file', dest='file',
                        help='Analyse this log file, multiple files can be specified (comma separated). gz files are accepted')

    parser.add_argument('-d', '--log-level', dest='loglevel',
                        default='INFO', type=str,
                        help='Loglevel in %s, default : %s' % (['DEBUG', 'INFO', 'WARNING', 'ERROR', 'CRITICAL'], 'INFO'))

    parser.add_argument('-r', '--reset', dest='reset',
                        default=False,
                        help='Reset analysis to a specific date (month/year)')

    parser.add_argument('-z', '--dont-compress', dest='dont_compress', action='store_true',
                        default=False,
                        help='Don\'t compress databases (bigger but faster, not compatible with compressed databases)')

    parser.add_argument('-p', '--display-only', dest='display_only', action='store_true',
                        default=False,
                        help='Only generate display')

    parser.add_argument('-D', '--dry-run', dest='dry_run', action='store_true',
                        default=False,
                        help='Process log but don\'t write files (database and HTML) to disk')

    args = parser.parse_args()

    if args.config_file.endswith('.py'):
        args.config_file = args.config_file[:-3]
        
    user_conf = importlib.import_module(args.config_file)
    
    # Load user conf
    for (k,v) in user_conf.__dict__.items():
        if k.endswith('_append'):
            new_k = k[:-7]
            if new_k in dir(conf):
                if type(conf.__dict__[new_k]) == list:
                    if type(v) == list:
                        conf.__dict__[new_k] += v
                    else:
                        conf.__dict__[new_k].append(v)
                else:
                    print("Error %s is not a list" % (new_k))
            else:
                print("Error %s doesn't exists in default conf" % (new_k))
        else:
            conf.__dict__.update({k:v})

    if args.clean_output and not args.dry_run:
        if os.path.exists(conf.DB_ROOT): shutil.rmtree(conf.DB_ROOT)
        if os.path.exists(conf.DISPLAY_ROOT): shutil.rmtree(conf.DISPLAY_ROOT)

    loglevel = getattr(logging, args.loglevel.upper(), None)
    if not isinstance(loglevel, int):
        raise ValueError('Invalid log level: %s' % (args.loglevel))
    
    iwla = IWLA(loglevel, args.dry_run)
    
    required_conf = ['analyzed_filename', 'domain_name']
    if not validConfRequirements(required_conf, iwla, 'Main Conf'):
        sys.exit(0)

    if args.stdin:
        iwla.start(sys.stdin, args)
    else:
        filename = args.file or conf.analyzed_filename
        iwla.start(FileIter(filename), args)