Handle URLs with empty referer
This commit is contained in:
parent
3b3ae1ea3e
commit
fffab335fa
|
@ -22,7 +22,7 @@ DB_FILENAME = 'iwla.db'
|
|||
# Web server log format (nginx style). Default is apache log format
|
||||
log_format = '$server_name:$server_port $remote_addr - $remote_user [$time_local] ' +\
|
||||
'"$request" $status $body_bytes_sent ' +\
|
||||
'"$http_referer" "$http_user_agent"'
|
||||
'"$http_referer?" "$http_user_agent?"'
|
||||
|
||||
# Time format used in log format
|
||||
time_format = '%d/%b/%Y:%H:%M:%S %z'
|
||||
|
|
10
iwla.py
10
iwla.py
|
@ -143,7 +143,7 @@ class IWLA(object):
|
|||
self.valid_visitors = None
|
||||
self.dry_run = dry_run
|
||||
|
||||
self.log_format_extracted = re.sub(r'([^\$\w])', r'\\\g<1>', conf.log_format)
|
||||
self.log_format_extracted = re.sub(r'([^\$?\w])', r'\\\g<1>', conf.log_format)
|
||||
self.log_format_extracted = re.sub(r'\$(\w+)', '(?P<\g<1>>.+)', self.log_format_extracted)
|
||||
self.http_request_extracted = re.compile(r'(?P<http_method>\S+) (?P<http_uri>\S+) (?P<http_version>\S+)')
|
||||
self.log_re = re.compile(self.log_format_extracted)
|
||||
|
@ -374,10 +374,10 @@ class IWLA(object):
|
|||
groups = self.http_request_extracted.match(hit['request'])
|
||||
|
||||
if groups:
|
||||
hit['extract_request'] = groups.groupdict()
|
||||
hit['extract_request'] = groups.groupdict("")
|
||||
uri_groups = self.uri_re.match(hit['extract_request']['http_uri'])
|
||||
if uri_groups:
|
||||
d = uri_groups.groupdict()
|
||||
d = uri_groups.groupdict("")
|
||||
hit['extract_request']['extract_uri'] = self._normalizeURI(d['extract_uri'])
|
||||
if 'extract_parameters' in d.keys():
|
||||
parameters = self._normalizeParameters(d['extract_parameters'])
|
||||
|
@ -390,7 +390,7 @@ class IWLA(object):
|
|||
if hit['http_referer']:
|
||||
referer_groups = self.uri_re.match(hit['http_referer'])
|
||||
if referer_groups:
|
||||
hit['extract_referer'] = referer_groups.groupdict()
|
||||
hit['extract_referer'] = referer_groups.groupdict("")
|
||||
hit['extract_referer']['extract_uri'] = self._removeFinalSlashes(hit['extract_referer']['extract_uri'])
|
||||
hit['extract_referer']['extract_parameters'] = self._normalizeParameters(hit['extract_referer']['extract_parameters'])
|
||||
return True
|
||||
|
@ -781,7 +781,7 @@ class IWLA(object):
|
|||
groups = self.log_re.match(l)
|
||||
|
||||
if groups:
|
||||
self._newHit(groups.groupdict())
|
||||
self._newHit(groups.groupdict(""))
|
||||
else:
|
||||
self.logger.warning("No match for %s" % (l))
|
||||
#break
|
||||
|
|
Loading…
Reference in New Issue
Block a user