From fffab335fa4ac053365c491162748e0daaa12e70 Mon Sep 17 00:00:00 2001 From: Gregory Soutade Date: Wed, 23 Aug 2017 20:11:17 +0200 Subject: [PATCH] Handle URLs with empty referer --- default_conf.py | 2 +- iwla.py | 10 +++++----- 2 files changed, 6 insertions(+), 6 deletions(-) diff --git a/default_conf.py b/default_conf.py index c92d42a..96ce509 100644 --- a/default_conf.py +++ b/default_conf.py @@ -22,7 +22,7 @@ DB_FILENAME = 'iwla.db' # Web server log format (nginx style). Default is apache log format log_format = '$server_name:$server_port $remote_addr - $remote_user [$time_local] ' +\ '"$request" $status $body_bytes_sent ' +\ - '"$http_referer" "$http_user_agent"' + '"$http_referer?" "$http_user_agent?"' # Time format used in log format time_format = '%d/%b/%Y:%H:%M:%S %z' diff --git a/iwla.py b/iwla.py index 8211032..a2ed3de 100755 --- a/iwla.py +++ b/iwla.py @@ -143,7 +143,7 @@ class IWLA(object): self.valid_visitors = None self.dry_run = dry_run - self.log_format_extracted = re.sub(r'([^\$\w])', r'\\\g<1>', conf.log_format) + self.log_format_extracted = re.sub(r'([^\$?\w])', r'\\\g<1>', conf.log_format) self.log_format_extracted = re.sub(r'\$(\w+)', '(?P<\g<1>>.+)', self.log_format_extracted) self.http_request_extracted = re.compile(r'(?P\S+) (?P\S+) (?P\S+)') self.log_re = re.compile(self.log_format_extracted) @@ -374,10 +374,10 @@ class IWLA(object): groups = self.http_request_extracted.match(hit['request']) if groups: - hit['extract_request'] = groups.groupdict() + hit['extract_request'] = groups.groupdict("") uri_groups = self.uri_re.match(hit['extract_request']['http_uri']) if uri_groups: - d = uri_groups.groupdict() + d = uri_groups.groupdict("") hit['extract_request']['extract_uri'] = self._normalizeURI(d['extract_uri']) if 'extract_parameters' in d.keys(): parameters = self._normalizeParameters(d['extract_parameters']) @@ -390,7 +390,7 @@ class IWLA(object): if hit['http_referer']: referer_groups = self.uri_re.match(hit['http_referer']) if referer_groups: - hit['extract_referer'] = referer_groups.groupdict() + hit['extract_referer'] = referer_groups.groupdict("") hit['extract_referer']['extract_uri'] = self._removeFinalSlashes(hit['extract_referer']['extract_uri']) hit['extract_referer']['extract_parameters'] = self._normalizeParameters(hit['extract_referer']['extract_parameters']) return True @@ -781,7 +781,7 @@ class IWLA(object): groups = self.log_re.match(l) if groups: - self._newHit(groups.groupdict()) + self._newHit(groups.groupdict("")) else: self.logger.warning("No match for %s" % (l)) #break