Initial commit
This commit is contained in:
1
plugins/post_analysis/__init__.py
Normal file
1
plugins/post_analysis/__init__.py
Normal file
@@ -0,0 +1 @@
|
||||
#
|
173
plugins/post_analysis/referers.py
Normal file
173
plugins/post_analysis/referers.py
Normal file
@@ -0,0 +1,173 @@
|
||||
# -*- coding: utf-8 -*-
|
||||
#
|
||||
# Copyright Grégory Soutadé 2015
|
||||
|
||||
# This file is part of iwla
|
||||
|
||||
# iwla is free software: you can redistribute it and/or modify
|
||||
# it under the terms of the GNU General Public License as published by
|
||||
# the Free Software Foundation, either version 3 of the License, or
|
||||
# (at your option) any later version.
|
||||
#
|
||||
# iwla is distributed in the hope that it will be useful,
|
||||
# but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
# GNU General Public License for more details.
|
||||
#
|
||||
# You should have received a copy of the GNU General Public License
|
||||
# along with iwla. If not, see <http://www.gnu.org/licenses/>.
|
||||
#
|
||||
|
||||
import re
|
||||
import urllib
|
||||
|
||||
from iwla import IWLA
|
||||
from iplugin import IPlugin
|
||||
|
||||
import awstats_data
|
||||
|
||||
"""
|
||||
Post analysis hook
|
||||
|
||||
Extract referers and key phrases from requests
|
||||
|
||||
Plugin requirements :
|
||||
None
|
||||
|
||||
Conf values needed :
|
||||
domain_name
|
||||
|
||||
Output files :
|
||||
None
|
||||
|
||||
Statistics creation :
|
||||
None
|
||||
|
||||
Statistics update :
|
||||
month_stats :
|
||||
referers =>
|
||||
pages
|
||||
hits
|
||||
robots_referers =>
|
||||
pages
|
||||
hits
|
||||
search_engine_referers =>
|
||||
pages
|
||||
hits
|
||||
key_phrases =>
|
||||
phrase
|
||||
|
||||
Statistics deletion :
|
||||
None
|
||||
"""
|
||||
|
||||
class IWLAPostAnalysisReferers(IPlugin):
|
||||
def __init__(self, iwla):
|
||||
super(IWLAPostAnalysisReferers, self).__init__(iwla)
|
||||
self.API_VERSION = 1
|
||||
self.conf_requires = ['domain_name']
|
||||
|
||||
def _getSearchEngine(self, hashid):
|
||||
for (k, e) in self.search_engines.items():
|
||||
for (h,h_re) in e['hashid']:
|
||||
if hashid == h:
|
||||
return k
|
||||
return None
|
||||
|
||||
def load(self):
|
||||
domain_name = self.iwla.getConfValue('domain_name', '')
|
||||
|
||||
if not domain_name:
|
||||
print 'domain_name must not be empty !'
|
||||
return False
|
||||
|
||||
self.own_domain_re = re.compile(r'.*%s.*' % (domain_name))
|
||||
self.search_engines = {}
|
||||
|
||||
for (hashid, name) in awstats_data.search_engines_hashid.items():
|
||||
hashid_re = re.compile(r'.*%s.*' % (hashid))
|
||||
if not name in self.search_engines.keys():
|
||||
self.search_engines[name] = {
|
||||
'hashid' : [(hashid, hashid_re)]
|
||||
}
|
||||
else:
|
||||
self.search_engines[name]['hashid'].append((hashid, hashid_re))
|
||||
#print 'Hashid %s => %s' % (name, hashid)
|
||||
|
||||
for (name, known_url) in awstats_data.search_engines_knwown_url.items():
|
||||
self.search_engines[name]['known_url'] = re.compile(known_url + '(?P<key_phrase>.+)')
|
||||
|
||||
for (engine, not_engine) in awstats_data.not_search_engines_keys.items():
|
||||
not_engine_re = re.compile(r'.*%s.*' % (not_engine))
|
||||
key = self._getSearchEngine(engine)
|
||||
if key:
|
||||
self.search_engines[key]['not_search_engine'] = not_engine_re
|
||||
|
||||
return True
|
||||
|
||||
def _extractKeyPhrase(self, key_phrase_re, parameters, key_phrases):
|
||||
if not parameters or not key_phrase_re: return
|
||||
|
||||
for p in parameters.split('&'):
|
||||
groups = key_phrase_re.match(p)
|
||||
if groups:
|
||||
key_phrase = groups.groupdict()['key_phrase']
|
||||
key_phrase = urllib.unquote_plus(key_phrase).decode('utf8')
|
||||
if not key_phrase in key_phrases.keys():
|
||||
key_phrases[key_phrase] = 1
|
||||
else:
|
||||
key_phrases[key_phrase] += 1
|
||||
break
|
||||
|
||||
def hook(self):
|
||||
stats = self.iwla.getCurrentVisists()
|
||||
month_stats = self.iwla.getMonthStats()
|
||||
|
||||
referers = month_stats.get('referers', {})
|
||||
robots_referers = month_stats.get('robots_referers', {})
|
||||
search_engine_referers = month_stats.get('search_engine_referers', {})
|
||||
key_phrases = month_stats.get('key_phrases', {})
|
||||
|
||||
for (k, super_hit) in stats.items():
|
||||
for r in super_hit['requests'][::-1]:
|
||||
if not self.iwla.isValidForCurrentAnalysis(r): break
|
||||
if not r['http_referer']: continue
|
||||
|
||||
uri = r['extract_referer']['extract_uri']
|
||||
if self.own_domain_re.match(uri): continue
|
||||
|
||||
is_search_engine = False
|
||||
for (name, engine) in self.search_engines.items():
|
||||
for (hashid, hashid_re) in engine['hashid']:
|
||||
if not hashid_re.match(uri): continue
|
||||
|
||||
not_engine = engine.get('not_search_engine', None)
|
||||
# Try not engine
|
||||
if not_engine and not_engine.match(uri): break
|
||||
is_search_engine = True
|
||||
uri = name
|
||||
|
||||
parameters = r['extract_referer'].get('extract_parameters', None)
|
||||
key_phrase_re = engine.get('known_url', None)
|
||||
|
||||
self._extractKeyPhrase(key_phrase_re, parameters, key_phrases)
|
||||
break
|
||||
|
||||
if is_search_engine:
|
||||
dictionary = search_engine_referers
|
||||
elif super_hit['robot']:
|
||||
dictionary = robots_referers
|
||||
# print '%s => %s' % (uri, super_hit['remote_ip'])
|
||||
else:
|
||||
dictionary = referers
|
||||
if r['is_page']:
|
||||
key = 'pages'
|
||||
else:
|
||||
key = 'hits'
|
||||
if not uri in dictionary: dictionary[uri] = {'pages':0, 'hits':0}
|
||||
dictionary[uri][key] += 1
|
||||
|
||||
month_stats['referers'] = referers
|
||||
month_stats['robots_referers'] = robots_referers
|
||||
month_stats['search_engine_referers'] = search_engine_referers
|
||||
month_stats['key_phrases'] = key_phrases
|
78
plugins/post_analysis/reverse_dns.py
Normal file
78
plugins/post_analysis/reverse_dns.py
Normal file
@@ -0,0 +1,78 @@
|
||||
# -*- coding: utf-8 -*-
|
||||
#
|
||||
# Copyright Grégory Soutadé 2015
|
||||
|
||||
# This file is part of iwla
|
||||
|
||||
# iwla is free software: you can redistribute it and/or modify
|
||||
# it under the terms of the GNU General Public License as published by
|
||||
# the Free Software Foundation, either version 3 of the License, or
|
||||
# (at your option) any later version.
|
||||
#
|
||||
# iwla is distributed in the hope that it will be useful,
|
||||
# but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
# GNU General Public License for more details.
|
||||
#
|
||||
# You should have received a copy of the GNU General Public License
|
||||
# along with iwla. If not, see <http://www.gnu.org/licenses/>.
|
||||
#
|
||||
|
||||
import socket
|
||||
|
||||
from iwla import IWLA
|
||||
from iplugin import IPlugin
|
||||
|
||||
"""
|
||||
Post analysis hook
|
||||
|
||||
Replace IP by reverse DNS names
|
||||
|
||||
Plugin requirements :
|
||||
None
|
||||
|
||||
Conf values needed :
|
||||
reverse_dns_timeout*
|
||||
|
||||
Output files :
|
||||
None
|
||||
|
||||
Statistics creation :
|
||||
None
|
||||
|
||||
Statistics update :
|
||||
valid_visitors:
|
||||
remote_addr
|
||||
dns_name_replaced
|
||||
dns_analyzed
|
||||
|
||||
Statistics deletion :
|
||||
None
|
||||
"""
|
||||
|
||||
class IWLAPostAnalysisReverseDNS(IPlugin):
|
||||
DEFAULT_DNS_TIMEOUT = 0.5
|
||||
|
||||
def __init__(self, iwla):
|
||||
super(IWLAPostAnalysisReverseDNS, self).__init__(iwla)
|
||||
self.API_VERSION = 1
|
||||
|
||||
def load(self):
|
||||
timeout = self.iwla.getConfValue('reverse_dns_timeout',
|
||||
IWLAPostAnalysisReverseDNS.DEFAULT_DNS_TIMEOUT)
|
||||
socket.setdefaulttimeout(timeout)
|
||||
return True
|
||||
|
||||
def hook(self):
|
||||
hits = self.iwla.getValidVisitors()
|
||||
for (k, hit) in hits.items():
|
||||
if hit.get('dns_analysed', False): continue
|
||||
try:
|
||||
name, _, _ = socket.gethostbyaddr(k)
|
||||
hit['remote_addr'] = name.lower()
|
||||
hit['dns_name_replaced'] = True
|
||||
except:
|
||||
pass
|
||||
finally:
|
||||
hit['dns_analysed'] = True
|
||||
|
94
plugins/post_analysis/top_downloads.py
Normal file
94
plugins/post_analysis/top_downloads.py
Normal file
@@ -0,0 +1,94 @@
|
||||
# -*- coding: utf-8 -*-
|
||||
#
|
||||
# Copyright Grégory Soutadé 2015
|
||||
|
||||
# This file is part of iwla
|
||||
|
||||
# iwla is free software: you can redistribute it and/or modify
|
||||
# it under the terms of the GNU General Public License as published by
|
||||
# the Free Software Foundation, either version 3 of the License, or
|
||||
# (at your option) any later version.
|
||||
#
|
||||
# iwla is distributed in the hope that it will be useful,
|
||||
# but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
# GNU General Public License for more details.
|
||||
#
|
||||
# You should have received a copy of the GNU General Public License
|
||||
# along with iwla. If not, see <http://www.gnu.org/licenses/>.
|
||||
#
|
||||
|
||||
import re
|
||||
|
||||
from iwla import IWLA
|
||||
from iplugin import IPlugin
|
||||
|
||||
"""
|
||||
Post analysis hook
|
||||
|
||||
Count TOP downloads
|
||||
|
||||
Plugin requirements :
|
||||
None
|
||||
|
||||
Conf values needed :
|
||||
None
|
||||
|
||||
Output files :
|
||||
None
|
||||
|
||||
Statistics creation :
|
||||
None
|
||||
|
||||
Statistics update :
|
||||
month_stats:
|
||||
top_downloads =>
|
||||
uri
|
||||
|
||||
Statistics deletion :
|
||||
None
|
||||
"""
|
||||
|
||||
class IWLAPostAnalysisTopDownloads(IPlugin):
|
||||
def __init__(self, iwla):
|
||||
super(IWLAPostAnalysisTopDownloads, self).__init__(iwla)
|
||||
self.API_VERSION = 1
|
||||
self.conf_requires = ['multimedia_files', 'viewed_http_codes']
|
||||
|
||||
def hook(self):
|
||||
stats = self.iwla.getCurrentVisists()
|
||||
month_stats = self.iwla.getMonthStats()
|
||||
|
||||
multimedia_files = self.iwla.getConfValue('multimedia_files')
|
||||
viewed_http_codes = self.iwla.getConfValue('viewed_http_codes')
|
||||
|
||||
top_downloads = month_stats.get('top_downloads', {})
|
||||
|
||||
for (k, super_hit) in stats.items():
|
||||
if super_hit['robot']: continue
|
||||
for r in super_hit['requests'][::-1]:
|
||||
if not self.iwla.isValidForCurrentAnalysis(r):
|
||||
break
|
||||
if not self.iwla.hasBeenViewed(r) or\
|
||||
r['is_page']:
|
||||
continue
|
||||
|
||||
uri = r['extract_request']['extract_uri'].lower()
|
||||
|
||||
isMultimedia = False
|
||||
for ext in multimedia_files:
|
||||
if uri.endswith(ext):
|
||||
isMultimedia = True
|
||||
break
|
||||
|
||||
if isMultimedia: continue
|
||||
|
||||
uri = "%s%s" % (r.get('server_name', ''),
|
||||
r['extract_request']['extract_uri'])
|
||||
|
||||
if not uri in top_downloads.keys():
|
||||
top_downloads[uri] = 1
|
||||
else:
|
||||
top_downloads[uri] += 1
|
||||
|
||||
month_stats['top_downloads'] = top_downloads
|
78
plugins/post_analysis/top_hits.py
Normal file
78
plugins/post_analysis/top_hits.py
Normal file
@@ -0,0 +1,78 @@
|
||||
# -*- coding: utf-8 -*-
|
||||
#
|
||||
# Copyright Grégory Soutadé 2015
|
||||
|
||||
# This file is part of iwla
|
||||
|
||||
# iwla is free software: you can redistribute it and/or modify
|
||||
# it under the terms of the GNU General Public License as published by
|
||||
# the Free Software Foundation, either version 3 of the License, or
|
||||
# (at your option) any later version.
|
||||
#
|
||||
# iwla is distributed in the hope that it will be useful,
|
||||
# but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
# GNU General Public License for more details.
|
||||
#
|
||||
# You should have received a copy of the GNU General Public License
|
||||
# along with iwla. If not, see <http://www.gnu.org/licenses/>.
|
||||
#
|
||||
|
||||
from iwla import IWLA
|
||||
from iplugin import IPlugin
|
||||
|
||||
"""
|
||||
Post analysis hook
|
||||
|
||||
Count TOP hits
|
||||
|
||||
Plugin requirements :
|
||||
None
|
||||
|
||||
Conf values needed :
|
||||
None
|
||||
|
||||
Output files :
|
||||
None
|
||||
|
||||
Statistics creation :
|
||||
None
|
||||
|
||||
Statistics update :
|
||||
month_stats:
|
||||
top_hits =>
|
||||
uri
|
||||
|
||||
Statistics deletion :
|
||||
None
|
||||
"""
|
||||
|
||||
class IWLAPostAnalysisTopHits(IPlugin):
|
||||
def __init__(self, iwla):
|
||||
super(IWLAPostAnalysisTopHits, self).__init__(iwla)
|
||||
self.API_VERSION = 1
|
||||
|
||||
def hook(self):
|
||||
stats = self.iwla.getCurrentVisists()
|
||||
month_stats = self.iwla.getMonthStats()
|
||||
|
||||
top_hits = month_stats.get('top_hits', {})
|
||||
|
||||
for (k, super_hit) in stats.items():
|
||||
if super_hit['robot']: continue
|
||||
for r in super_hit['requests'][::-1]:
|
||||
if not self.iwla.isValidForCurrentAnalysis(r):
|
||||
break
|
||||
if not self.iwla.hasBeenViewed(r) or\
|
||||
r['is_page']:
|
||||
continue
|
||||
|
||||
uri = r['extract_request']['extract_uri'].lower()
|
||||
uri = "%s%s" % (r.get('server_name', ''), uri)
|
||||
|
||||
if not uri in top_hits.keys():
|
||||
top_hits[uri] = 1
|
||||
else:
|
||||
top_hits[uri] += 1
|
||||
|
||||
month_stats['top_hits'] = top_hits
|
87
plugins/post_analysis/top_pages.py
Normal file
87
plugins/post_analysis/top_pages.py
Normal file
@@ -0,0 +1,87 @@
|
||||
# -*- coding: utf-8 -*-
|
||||
#
|
||||
# Copyright Grégory Soutadé 2015
|
||||
|
||||
# This file is part of iwla
|
||||
|
||||
# iwla is free software: you can redistribute it and/or modify
|
||||
# it under the terms of the GNU General Public License as published by
|
||||
# the Free Software Foundation, either version 3 of the License, or
|
||||
# (at your option) any later version.
|
||||
#
|
||||
# iwla is distributed in the hope that it will be useful,
|
||||
# but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
# GNU General Public License for more details.
|
||||
#
|
||||
# You should have received a copy of the GNU General Public License
|
||||
# along with iwla. If not, see <http://www.gnu.org/licenses/>.
|
||||
#
|
||||
|
||||
import re
|
||||
|
||||
from iwla import IWLA
|
||||
from iplugin import IPlugin
|
||||
|
||||
"""
|
||||
Post analysis hook
|
||||
|
||||
Count TOP pages
|
||||
|
||||
Plugin requirements :
|
||||
None
|
||||
|
||||
Conf values needed :
|
||||
None
|
||||
|
||||
Output files :
|
||||
None
|
||||
|
||||
Statistics creation :
|
||||
None
|
||||
|
||||
Statistics update :
|
||||
month_stats:
|
||||
top_pages =>
|
||||
uri
|
||||
|
||||
Statistics deletion :
|
||||
None
|
||||
"""
|
||||
|
||||
class IWLAPostAnalysisTopPages(IPlugin):
|
||||
def __init__(self, iwla):
|
||||
super(IWLAPostAnalysisTopPages, self).__init__(iwla)
|
||||
self.API_VERSION = 1
|
||||
|
||||
def load(self):
|
||||
self.index_re = re.compile(r'/index.*')
|
||||
return True
|
||||
|
||||
def hook(self):
|
||||
stats = self.iwla.getCurrentVisists()
|
||||
month_stats = self.iwla.getMonthStats()
|
||||
|
||||
top_pages = month_stats.get('top_pages', {})
|
||||
|
||||
for (k, super_hit) in stats.items():
|
||||
if super_hit['robot']: continue
|
||||
for r in super_hit['requests'][::-1]:
|
||||
if not self.iwla.isValidForCurrentAnalysis(r):
|
||||
break
|
||||
if not self.iwla.hasBeenViewed(r) or\
|
||||
not r['is_page']:
|
||||
continue
|
||||
|
||||
uri = r['extract_request']['extract_uri']
|
||||
if self.index_re.match(uri):
|
||||
uri = '/'
|
||||
|
||||
uri = "%s%s" % (r.get('server_name', ''), uri)
|
||||
|
||||
if not uri in top_pages.keys():
|
||||
top_pages[uri] = 1
|
||||
else:
|
||||
top_pages[uri] += 1
|
||||
|
||||
month_stats['top_pages'] = top_pages
|
Reference in New Issue
Block a user