iwla/plugins/post_analysis/subdomains.py
2023-01-28 09:44:43 +01:00

74 lines
1.7 KiB
Python

# -*- coding: utf-8 -*-
#
# Copyright Grégory Soutadé 2023
# This file is part of iwla
# iwla is free software: you can redistribute it and/or modify
# it under the terms of the GNU General Public License as published by
# the Free Software Foundation, either version 3 of the License, or
# (at your option) any later version.
#
# iwla is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
# GNU General Public License for more details.
#
# You should have received a copy of the GNU General Public License
# along with iwla. If not, see <http://www.gnu.org/licenses/>.
#
import re
from iwla import IWLA
from iplugin import IPlugin
"""
Post analysis hook
Group top pages by subdomains
Plugin requirements :
post_analysis/top_pages
Conf values needed :
None
Output files :
None
Statistics creation :
month_stats:
subdomains =>
domain => count
Statistics update :
None
Statistics deletion :
None
"""
class IWLAPostAnalysisSubDomains(IPlugin):
def __init__(self, iwla):
super(IWLAPostAnalysisSubDomains, self).__init__(iwla)
self.requires = ['IWLAPostAnalysisTopPages']
def load(self):
self.domain_re = re.compile(r'([^/]*)/.*')
return True
def hook(self):
month_stats = self.iwla.getMonthStats()
top_pages = month_stats['top_pages']
subdomains = {}
for (uri, count) in top_pages.items():
domain = self.domain_re.match(uri)
if not domain: continue
domain = domain.group(1)
subdomains[domain] = subdomains.get(domain, 0) + count
month_stats['subdomains'] = subdomains