Dont count 'uri' and 'uri/' as differents uri
This commit is contained in:
		| @@ -1,4 +1,4 @@ | |||||||
| v0.3 (13/07/2015) | v0.3 (20/12/2015) | ||||||
| ** User ** | ** User ** | ||||||
| 	Add referers_diff display plugin | 	Add referers_diff display plugin | ||||||
| 	Add year statistics in month details | 	Add year statistics in month details | ||||||
| @@ -21,6 +21,7 @@ v0.3 (13/07/2015) | |||||||
| 	Sort documentation output | 	Sort documentation output | ||||||
| 	Add debug traces in robots plugin | 	Add debug traces in robots plugin | ||||||
| 	Update awstats data | 	Update awstats data | ||||||
|  | 	Dont count 'uri' and 'uri/' as differents uri | ||||||
|  |  | ||||||
| ** Bugs ** | ** Bugs ** | ||||||
| 	Forgot <body> tag | 	Forgot <body> tag | ||||||
|   | |||||||
							
								
								
									
										19
									
								
								iwla.py
									
									
									
									
									
								
							
							
						
						
									
										19
									
								
								iwla.py
									
									
									
									
									
								
							| @@ -148,6 +148,7 @@ class IWLA(object): | |||||||
|         self.log_re = re.compile(self.log_format_extracted) |         self.log_re = re.compile(self.log_format_extracted) | ||||||
|         self.uri_re = re.compile(r'(?P<extract_uri>[^\?#]+)(\?(?P<extract_parameters>[^#]+))?(#.*)?') |         self.uri_re = re.compile(r'(?P<extract_uri>[^\?#]+)(\?(?P<extract_parameters>[^#]+))?(#.*)?') | ||||||
|         self.domain_name_re = re.compile(r'.*%s' % conf.domain_name) |         self.domain_name_re = re.compile(r'.*%s' % conf.domain_name) | ||||||
|  |         self.normalize_uri_final_slashes = re.compile(r'/+$') | ||||||
|         self.plugins = [(conf.PRE_HOOK_DIRECTORY     , conf.pre_analysis_hooks), |         self.plugins = [(conf.PRE_HOOK_DIRECTORY     , conf.pre_analysis_hooks), | ||||||
|                         (conf.POST_HOOK_DIRECTORY    , conf.post_analysis_hooks), |                         (conf.POST_HOOK_DIRECTORY    , conf.post_analysis_hooks), | ||||||
|                         (conf.DISPLAY_HOOK_DIRECTORY , conf.display_hooks)] |                         (conf.DISPLAY_HOOK_DIRECTORY , conf.display_hooks)] | ||||||
| @@ -334,6 +335,16 @@ class IWLA(object): | |||||||
|         super_hit['robot'] = False |         super_hit['robot'] = False | ||||||
|         super_hit['hit_only'] = 0 |         super_hit['hit_only'] = 0 | ||||||
|  |  | ||||||
|  |     def _normalizeURI(self, uri): | ||||||
|  |         if uri == '/': return uri | ||||||
|  |         uri = self.normalize_uri_final_slashes.sub('', uri) | ||||||
|  |         return uri | ||||||
|  |  | ||||||
|  |     def _normalizeParameters(self, parameters): | ||||||
|  |         # No parameters | ||||||
|  |         if parameters == '?': return None | ||||||
|  |         return parameters | ||||||
|  |  | ||||||
|     def _decodeHTTPRequest(self, hit): |     def _decodeHTTPRequest(self, hit): | ||||||
|         if not 'request' in hit.keys(): return False |         if not 'request' in hit.keys(): return False | ||||||
|  |  | ||||||
| @@ -344,9 +355,11 @@ class IWLA(object): | |||||||
|             uri_groups = self.uri_re.match(hit['extract_request']['http_uri']) |             uri_groups = self.uri_re.match(hit['extract_request']['http_uri']) | ||||||
|             if uri_groups: |             if uri_groups: | ||||||
|                 d = uri_groups.groupdict() |                 d = uri_groups.groupdict() | ||||||
|                 hit['extract_request']['extract_uri'] = d['extract_uri'] |                 hit['extract_request']['extract_uri'] = self._normalizeURI(d['extract_uri']) | ||||||
|                 if 'extract_parameters' in d.keys(): |                 if 'extract_parameters' in d.keys(): | ||||||
|                     hit['extract_request']['extract_parameters'] = d['extract_parameters'] |                     parameters = self._normalizeParameters(d['extract_parameters']) | ||||||
|  |                     if parameters: | ||||||
|  |                         hit['extract_request']['extract_parameters'] = parameters | ||||||
|         else: |         else: | ||||||
|             self.logger.warning("Bad request extraction %s" % (hit['request'])) |             self.logger.warning("Bad request extraction %s" % (hit['request'])) | ||||||
|             return False |             return False | ||||||
| @@ -354,7 +367,7 @@ class IWLA(object): | |||||||
|         if hit['http_referer']: |         if hit['http_referer']: | ||||||
|             referer_groups = self.uri_re.match(hit['http_referer']) |             referer_groups = self.uri_re.match(hit['http_referer']) | ||||||
|             if referer_groups: |             if referer_groups: | ||||||
|                 hit['extract_referer'] = referer_groups.groupdict() |                 hit['extract_referer'] = self._normalizeURI(referer_groups.groupdict()) | ||||||
|         return True |         return True | ||||||
|  |  | ||||||
|     def _decodeTime(self, hit): |     def _decodeTime(self, hit): | ||||||
|   | |||||||
		Reference in New Issue
	
	Block a user