Move reverse DNS core management into iwla.py + Add robot_domains configuration

This commit is contained in:
Gregory Soutade 2024-10-27 09:16:01 +01:00
parent 70de0d3aca
commit bde91ca936
2 changed files with 43 additions and 17 deletions

28
iwla.py
View File

@ -32,6 +32,7 @@ import logging
import gettext
from calendar import monthrange
from datetime import date, datetime
import socket
import default_conf as conf
@ -53,6 +54,7 @@ Conf values needed :
compress_output_files
excluded_ip
excluded_domain_name
reverse_dns_timeout*
Output files :
DB_ROOT/meta.db
@ -133,7 +135,8 @@ class IWLA(object):
ANALYSIS_CLASS = 'HTTP'
API_VERSION = 1
IWLA_VERSION = '0.7'
IWLA_VERSION = '0.8'
DEFAULT_DNS_TIMEOUT = 0.5
def __init__(self, logLevel, args):
self.meta_infos = {}
@ -145,6 +148,9 @@ class IWLA(object):
self.valid_visitors = None
self.args = args
self.reverse_dns_timeout = self.getConfValue('reverse_dns_timeout',
IWLA.DEFAULT_DNS_TIMEOUT)
self.log_format_extracted = re.sub(r'([^\$?\w])', r'\\\g<1>', conf.log_format)
self.log_format_extracted = re.sub(r'\$(\w+)', '(?P<\g<1>>.+)', self.log_format_extracted)
self.http_request_extracted = re.compile(r'(?P<http_method>\S+) (?P<http_uri>\S+) (?P<http_version>\S+)')
@ -242,6 +248,26 @@ class IWLA(object):
def getCSSPath(self):
return conf.css_path
def reverseDNS(self, hit):
if hit.get('dns_name_replaced', False):
return hit['remote_addr']
try:
timeout = socket.getdefaulttimeout()
if timeout != self.reverse_dns_timeout:
socket.setdefaulttimeout(self.reverse_dns_timeout)
name, _, _ = socket.gethostbyaddr(hit['remote_ip'])
if timeout != self.reverse_dns_timeout:
socket.setdefaulttimeout(timeout)
hit['remote_addr'] = name.lower()
hit['dns_name_replaced'] = True
except socket.herror:
pass
finally:
hit['dns_analysed'] = True
return hit['remote_addr']
def _clearMeta(self):
self.meta_infos = {
'last_time' : None,

View File

@ -19,12 +19,13 @@
#
import socket
import re
from iwla import IWLA
from iplugin import IPlugin
"""
Post analysis hook
Pre analysis hook
Replace IP by reverse DNS names
@ -32,7 +33,7 @@ Plugin requirements :
None
Conf values needed :
reverse_dns_timeout*
robot_domains*
Output files :
None
@ -51,12 +52,13 @@ Statistics deletion :
"""
class IWLAPostAnalysisReverseDNS(IPlugin):
DEFAULT_DNS_TIMEOUT = 0.5
def load(self):
timeout = self.iwla.getConfValue('reverse_dns_timeout',
IWLAPostAnalysisReverseDNS.DEFAULT_DNS_TIMEOUT)
socket.setdefaulttimeout(timeout)
self.robot_domains_re = []
robot_domains = self.iwla.getConfValue('robot_domains', [])
for domain in robot_domains:
self.robot_domains_re.append(re.compile(domain))
return True
def hook(self):
@ -65,15 +67,13 @@ class IWLAPostAnalysisReverseDNS(IPlugin):
if hit.get('dns_analysed', False): continue
# Do reverse for feed parser even if they're not
# valid visitors
if not hit.get('feed_parser', False) and\
not self.iwla.isValidVisitor(hit):
if hit.get('robot', False) and not hit.get('feed_parser', False):
continue
try:
name, _, _ = socket.gethostbyaddr(k)
hit['remote_addr'] = name.lower()
hit['dns_name_replaced'] = True
except:
pass
finally:
hit['dns_analysed'] = True
res = self.iwla.reverseDNS(hit)
for r in self.robot_domains_re:
if r.match(hit['remote_addr']):
hit['robot'] = True
break