Move reverse DNS core management into iwla.py + Add robot_domains configuration

This commit is contained in:
Gregory Soutade 2024-10-27 09:16:01 +01:00
parent 70de0d3aca
commit bde91ca936
2 changed files with 43 additions and 17 deletions

28
iwla.py
View File

@ -32,6 +32,7 @@ import logging
import gettext import gettext
from calendar import monthrange from calendar import monthrange
from datetime import date, datetime from datetime import date, datetime
import socket
import default_conf as conf import default_conf as conf
@ -53,6 +54,7 @@ Conf values needed :
compress_output_files compress_output_files
excluded_ip excluded_ip
excluded_domain_name excluded_domain_name
reverse_dns_timeout*
Output files : Output files :
DB_ROOT/meta.db DB_ROOT/meta.db
@ -133,7 +135,8 @@ class IWLA(object):
ANALYSIS_CLASS = 'HTTP' ANALYSIS_CLASS = 'HTTP'
API_VERSION = 1 API_VERSION = 1
IWLA_VERSION = '0.7' IWLA_VERSION = '0.8'
DEFAULT_DNS_TIMEOUT = 0.5
def __init__(self, logLevel, args): def __init__(self, logLevel, args):
self.meta_infos = {} self.meta_infos = {}
@ -145,6 +148,9 @@ class IWLA(object):
self.valid_visitors = None self.valid_visitors = None
self.args = args self.args = args
self.reverse_dns_timeout = self.getConfValue('reverse_dns_timeout',
IWLA.DEFAULT_DNS_TIMEOUT)
self.log_format_extracted = re.sub(r'([^\$?\w])', r'\\\g<1>', conf.log_format) self.log_format_extracted = re.sub(r'([^\$?\w])', r'\\\g<1>', conf.log_format)
self.log_format_extracted = re.sub(r'\$(\w+)', '(?P<\g<1>>.+)', self.log_format_extracted) self.log_format_extracted = re.sub(r'\$(\w+)', '(?P<\g<1>>.+)', self.log_format_extracted)
self.http_request_extracted = re.compile(r'(?P<http_method>\S+) (?P<http_uri>\S+) (?P<http_version>\S+)') self.http_request_extracted = re.compile(r'(?P<http_method>\S+) (?P<http_uri>\S+) (?P<http_version>\S+)')
@ -242,6 +248,26 @@ class IWLA(object):
def getCSSPath(self): def getCSSPath(self):
return conf.css_path return conf.css_path
def reverseDNS(self, hit):
if hit.get('dns_name_replaced', False):
return hit['remote_addr']
try:
timeout = socket.getdefaulttimeout()
if timeout != self.reverse_dns_timeout:
socket.setdefaulttimeout(self.reverse_dns_timeout)
name, _, _ = socket.gethostbyaddr(hit['remote_ip'])
if timeout != self.reverse_dns_timeout:
socket.setdefaulttimeout(timeout)
hit['remote_addr'] = name.lower()
hit['dns_name_replaced'] = True
except socket.herror:
pass
finally:
hit['dns_analysed'] = True
return hit['remote_addr']
def _clearMeta(self): def _clearMeta(self):
self.meta_infos = { self.meta_infos = {
'last_time' : None, 'last_time' : None,

View File

@ -19,12 +19,13 @@
# #
import socket import socket
import re
from iwla import IWLA from iwla import IWLA
from iplugin import IPlugin from iplugin import IPlugin
""" """
Post analysis hook Pre analysis hook
Replace IP by reverse DNS names Replace IP by reverse DNS names
@ -32,7 +33,7 @@ Plugin requirements :
None None
Conf values needed : Conf values needed :
reverse_dns_timeout* robot_domains*
Output files : Output files :
None None
@ -51,12 +52,13 @@ Statistics deletion :
""" """
class IWLAPostAnalysisReverseDNS(IPlugin): class IWLAPostAnalysisReverseDNS(IPlugin):
DEFAULT_DNS_TIMEOUT = 0.5
def load(self): def load(self):
timeout = self.iwla.getConfValue('reverse_dns_timeout', self.robot_domains_re = []
IWLAPostAnalysisReverseDNS.DEFAULT_DNS_TIMEOUT) robot_domains = self.iwla.getConfValue('robot_domains', [])
socket.setdefaulttimeout(timeout) for domain in robot_domains:
self.robot_domains_re.append(re.compile(domain))
return True return True
def hook(self): def hook(self):
@ -65,15 +67,13 @@ class IWLAPostAnalysisReverseDNS(IPlugin):
if hit.get('dns_analysed', False): continue if hit.get('dns_analysed', False): continue
# Do reverse for feed parser even if they're not # Do reverse for feed parser even if they're not
# valid visitors # valid visitors
if not hit.get('feed_parser', False) and\ if hit.get('robot', False) and not hit.get('feed_parser', False):
not self.iwla.isValidVisitor(hit):
continue continue
try:
name, _, _ = socket.gethostbyaddr(k) res = self.iwla.reverseDNS(hit)
hit['remote_addr'] = name.lower()
hit['dns_name_replaced'] = True for r in self.robot_domains_re:
except: if r.match(hit['remote_addr']):
pass hit['robot'] = True
finally: break
hit['dns_analysed'] = True