From e0f9260802db8835239efaa73b72f75118a79937 Mon Sep 17 00:00:00 2001 From: Gregory Soutade Date: Tue, 17 Feb 2015 19:11:04 +0100 Subject: [PATCH] Add Feeds plugin --- iplugin.py | 2 +- plugins/display/feeds.py | 99 ++++++++++++++++++++++++++ plugins/post_analysis/feeds.py | 81 +++++++++++++++++++++ plugins/post_analysis/top_downloads.py | 2 - 4 files changed, 181 insertions(+), 3 deletions(-) create mode 100644 plugins/display/feeds.py create mode 100644 plugins/post_analysis/feeds.py diff --git a/iplugin.py b/iplugin.py index 2664190..7f57135 100644 --- a/iplugin.py +++ b/iplugin.py @@ -103,7 +103,7 @@ def preloadPlugins(plugins, iwla): requirement_validated = False for r in requirements: - for (_,p) in cache_plugins.items(): + for p in cache_plugins.values(): if p.__class__.__name__ == r: requirement_validated = True break diff --git a/plugins/display/feeds.py b/plugins/display/feeds.py new file mode 100644 index 0000000..3466fd9 --- /dev/null +++ b/plugins/display/feeds.py @@ -0,0 +1,99 @@ +# -*- coding: utf-8 -*- +# +# Copyright Grégory Soutadé 2015 + +# This file is part of iwla + +# iwla is free software: you can redistribute it and/or modify +# it under the terms of the GNU General Public License as published by +# the Free Software Foundation, either version 3 of the License, or +# (at your option) any later version. +# +# iwla is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU General Public License for more details. +# +# You should have received a copy of the GNU General Public License +# along with iwla. If not, see . +# + +from iwla import IWLA +from iplugin import IPlugin +from display import * + +import awstats_data + +""" +Display hook + +Display feeds parsers + +Plugin requirements : + None + +Conf values needed : + create_all_feeds_page* + +Output files : + OUTPUT_ROOT/year/month/index.html + OUTPUT_ROOT/year/month/all_feeds.html + +Statistics creation : + None + +Statistics update : + None + +Statistics deletion : + None +""" + +class IWLADisplayFeeds(IPlugin): + def __init__(self, iwla): + super(IWLADisplayFeeds, self).__init__(iwla) + self.API_VERSION = 1 + self.requires = ['IWLAPostAnalysisFeeds'] + + def load(self): + self.create_all_feeds_page = self.iwla.getConfValue('create_all_feeds_page', True) + + return True + + def hook(self): + display = self.iwla.getDisplay() + hits = self.iwla.getCurrentVisists() + nb_feeds_parsers = 0 + + # All in a page + if self.create_all_feeds_page: + title = createCurTitle(self.iwla, u'All Feeds parsers') + filename = 'all_feeds.html' + path = self.iwla.getCurDisplayPath(filename) + display_visitor_ip = self.iwla.getConfValue('display_visitor_ip', False) + + page = display.createPage(title, path, self.iwla.getConfValue('css_path', [])) + table = display.createBlock(DisplayHTMLBlockTable, self.iwla._(u'All feeds parsers'), [self.iwla._(u'Host'), self.iwla._(u'Pages'), self.iwla._(u'Hits')]) + for super_hit in hits.values(): + if not super_hit['feed_parser']: continue + nb_feeds_parsers += 1 + address = super_hit['remote_addr'] + if display_visitor_ip and\ + super_hit.get('dns_name_replaced', False): + address = '%s [%s]' % (address, super_hit['remote_ip']) + table.appendRow([address, super_hit['viewed_pages'], super_hit['viewed_hits']]) + page.appendBlock(table) + + display.addPage(page) + + # Found in index + title = self.iwla._(u'Feeds parsers') + if self.create_all_feeds_page: + link = '%s' % (filename, self.iwla._(u'Details')) + title = '%s - %s' % (title, link) + + index = self.iwla.getDisplayIndex() + + table = display.createBlock(DisplayHTMLBlockTable, title, [self.iwla._(u'Found')]) + table.appendRow([nb_feeds_parsers]) + index.appendBlock(table) diff --git a/plugins/post_analysis/feeds.py b/plugins/post_analysis/feeds.py new file mode 100644 index 0000000..96884d8 --- /dev/null +++ b/plugins/post_analysis/feeds.py @@ -0,0 +1,81 @@ +# -*- coding: utf-8 -*- +# +# Copyright Grégory Soutadé 2015 + +# This file is part of iwla + +# iwla is free software: you can redistribute it and/or modify +# it under the terms of the GNU General Public License as published by +# the Free Software Foundation, either version 3 of the License, or +# (at your option) any later version. +# +# iwla is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU General Public License for more details. +# +# You should have received a copy of the GNU General Public License +# along with iwla. If not, see . +# + +import re + +from iwla import IWLA +from iplugin import IPlugin + +""" +Post analysis hook + +Find feeds parsers (first hit in feeds conf value and no viewed pages if it's a robot) + +Plugin requirements : + None + +Conf values needed : + feeds + +Output files : + None + +Statistics creation : + remote_addr => + feed_parser + +Statistics update : + None + +Statistics deletion : + None +""" + +class IWLAPostAnalysisFeeds(IPlugin): + def __init__(self, iwla): + super(IWLAPostAnalysisFeeds, self).__init__(iwla) + self.API_VERSION = 1 + + def load(self): + feeds = self.iwla.getConfValue('feeds', None) + + if feeds is None: return False + + self.feeds_re = [] + for f in feeds: + self.feeds_re.append(re.compile(r'.*%s.*' % (f))) + + return True + + def hook(self): + hits = self.iwla.getCurrentVisists() + for hit in hits.values(): + if not hit.get('feed_parser', None) is None: continue + isFeedParser = False + uri = hit['requests'][0]['extract_request']['extract_uri'].lower() + for regexp in self.feeds_re: + if regexp.match(uri): + # Robot that views pages -> bot + if hit['robot']: + if hit['viewed_pages']: continue + isFeedParser = True + break + hit['feed_parser'] = isFeedParser + diff --git a/plugins/post_analysis/top_downloads.py b/plugins/post_analysis/top_downloads.py index 2f5c136..3d82e55 100644 --- a/plugins/post_analysis/top_downloads.py +++ b/plugins/post_analysis/top_downloads.py @@ -18,8 +18,6 @@ # along with iwla. If not, see . # -import re - from iwla import IWLA from iplugin import IPlugin