Normalize URL before counting in stats
This commit is contained in:
parent
37a33f1291
commit
d36676ca38
18
iwla.py
18
iwla.py
|
@ -149,7 +149,8 @@ class IWLA(object):
|
|||
self.log_re = re.compile(self.log_format_extracted)
|
||||
self.uri_re = re.compile(r'(?P<extract_uri>[^\?#]+)(\?(?P<extract_parameters>[^#]+))?(#.*)?')
|
||||
self.domain_name_re = re.compile(r'.*%s' % conf.domain_name)
|
||||
self.final_slashes_re = re.compile(r'/+$')
|
||||
self.slash_re = re.compile(r'//')
|
||||
self.protocol_re = re.compile(r'^.*://')
|
||||
self.excluded_ip = []
|
||||
for ip in conf.excluded_ip:
|
||||
self.excluded_ip += [re.compile(ip)]
|
||||
|
@ -372,15 +373,16 @@ class IWLA(object):
|
|||
super_hit['robot'] = False
|
||||
super_hit['hit_only'] = 0
|
||||
|
||||
def _normalizeURI(self, uri):
|
||||
def _normalizeURI(self, uri, removeFileSlash=False):
|
||||
if uri == '/': return uri
|
||||
uri = self.final_slashes_re.sub('/', uri)
|
||||
# Remove protocol
|
||||
uri = self.protocol_re.sub('', uri)
|
||||
# Remove double /
|
||||
uri = self.slash_re.sub('/', uri)
|
||||
if removeFileSlash and uri[-1] == '/':
|
||||
uri = uri[:-1]
|
||||
return uri
|
||||
|
||||
def _removeFinalSlashes(self, uri):
|
||||
if uri == '/': return uri
|
||||
return self.final_slashes_re.sub('', uri)
|
||||
|
||||
def _normalizeParameters(self, parameters):
|
||||
# No parameters
|
||||
if parameters == '?': return None
|
||||
|
@ -409,7 +411,7 @@ class IWLA(object):
|
|||
referer_groups = self.uri_re.match(hit['http_referer'])
|
||||
if referer_groups:
|
||||
hit['extract_referer'] = referer_groups.groupdict("")
|
||||
hit['extract_referer']['extract_uri'] = self._removeFinalSlashes(hit['extract_referer']['extract_uri'])
|
||||
hit['extract_referer']['extract_uri'] = self._normalizeURI(hit['extract_referer']['extract_uri'], True)
|
||||
hit['extract_referer']['extract_parameters'] = self._normalizeParameters(hit['extract_referer']['extract_parameters'])
|
||||
return True
|
||||
|
||||
|
|
Loading…
Reference in New Issue
Block a user