Merge branch 'master' of soutade.fr:iwla

This commit is contained in:
Grégory Soutadé 2016-04-13 17:28:33 +02:00
commit 56b3457e92
232 changed files with 2970 additions and 558 deletions

4
.gitignore vendored
View File

@ -1,3 +1,5 @@
*~
*.pyc
*.gz
*.gz
output
output_db

29
ChangeLog Normal file
View File

@ -0,0 +1,29 @@
v0.3 (13/07/2015)
** User **
Add referers_diff display plugin
Add year statistics in month details
Add analysis duration
Add browsers detection
Add operating systems detection
Add track users plugin
Add feeds plugin
Add _append feature to conf.py
Add hours_stats plugin
Add display/top_downloads_diff plugin
Can specify multiple files to analyze
Add reset feature
Add gz files support
Add -z option (don't compress databases)
Add own search enfines files
** Dev **
Add istats_diff interface
Sort documentation output
Add debug traces in robots plugin
Update awstats data
** Bugs **
Forgot <body> tag
Bad UTC time computation
Hits/pages in the same second where not analyzed
Last day of month was skipped

File diff suppressed because one or more lines are too long

23
conf.py
View File

@ -1,7 +1,6 @@
# -*- coding: utf-8 -*-
# Web server log
analyzed_filename = 'access.log'
analyzed_filename = '/var/log/apache2/access.log.1,/var/log/apache2/access.log'
# Domain name to analyze
domain_name = 'soutade.fr'
@ -11,24 +10,28 @@ display_visitor_ip = True
# Hooks used
pre_analysis_hooks = ['page_to_hit', 'robots']
post_analysis_hooks = ['referers', 'top_pages', 'top_downloads', 'top_hits']#, 'reverse_dns']
display_hooks = ['top_visitors', 'all_visits', 'referers', 'top_pages', 'top_downloads', 'top_hits']
post_analysis_hooks = ['referers', 'top_pages', 'top_downloads', 'operating_systems', 'browsers', 'feeds', 'hours_stats', 'reverse_dns']
display_hooks = ['track_users', 'top_visitors', 'all_visits', 'referers', 'top_pages', 'top_downloads', 'referers_diff', 'operating_systems', 'browsers', 'feeds', 'hours_stats', 'top_downloads_diff']
# Reverse DNS timeout
reverse_dns_timeout = 0.2
# Count this addresses as hit
page_to_hit_conf = [r'^.+/logo[/]?$']
## Count this addresses as page
hit_to_page_conf = [r'^.+/category/.+$', r'^.+/tag/.+$', r'^.+/archive/.+$', r'^.+/ljdc[/]?$']
# Count this addresses as page
hit_to_page_conf = [r'^.+/category/.+$', r'^.+/tag/.+$', r'^.+/archive/.+$', r'^.+/ljdc[/]?$', r'^.+/source/tree/.*$', r'^.+/source/file/.*$', r'^.+/search/.+$']
# Because it's too long to build HTML when there is too much entries
max_hits_displayed = 100
max_downloads_displayed = 100
# Compress these files after generation
compress_output_files = ['html', 'css', 'js']
compress_output_files = ['html', 'css', 'js', 'xml']
# Display result in French
locale = 'fr'
#locale = 'fr'
# Tracked IP
tracked_ip = ['192.168.1.1']
feeds = [r'^.*/atom.xml$', r'^.*/rss.xml$']
multimedia_file_append = ['xml']

View File

@ -52,6 +52,9 @@ class DisplayHTMLRaw(object):
self._buildHTML()
self._build(f, self.html)
def getTitle(self):
return ''
class DisplayHTMLBlock(DisplayHTMLRaw):
def __init__(self, iwla, title=''):
@ -99,6 +102,21 @@ class DisplayHTMLBlockTable(DisplayHTMLBlock):
self.rows.append(listToStr(row))
self.rows_cssclasses.append([u''] * len(row))
def insertCol(self, col_number, col_title='', col_css_class=''):
self.cols.insert(col_number, col_title)
for r in self.rows:
r.insert(col_number, u'')
for r in self.rows_cssclasses:
v = r[0]
# If all cells have the same CSS class, set it
for cur_value in r:
if v != cur_value:
v = None
break
v = v or u''
r.insert(col_number, v)
self.cols_cssclasses.insert(col_number, col_css_class)
def getNbRows(self):
return len(self.rows)
@ -157,6 +175,20 @@ class DisplayHTMLBlockTable(DisplayHTMLBlock):
self.cols_cssclasses = listToStr(values)
def computeRatio(self, column, column_insertion=None):
if column_insertion is None:
column_insertion = column+1
total = 0
for r in self.rows:
if r[column]:
total += int(r[column])
self.insertCol(column_insertion, self.iwla._('Ratio'), u'iwla_hit')
for (index, r) in enumerate(self.rows):
val = r[column] and int(r[column]) or 0
self.setCellValue(index, column_insertion, '%.1f%%' % (float(val*100)/float(total)))
def _buildHTML(self):
style = u''
if self.table_css: style = u' class="%s"' % (self.table_css)
@ -226,7 +258,7 @@ class DisplayHTMLBlockTableWithGraph(DisplayHTMLBlockTable):
elif style.startswith(u'iwla_visit'): icon = u'vv.png'
else: return ''
return u'/%s/%s' % (self.icon_path, icon)
return u'/%s/other/%s' % (self.icon_path, icon)
def _buildHTML(self):
self._computeMax()
@ -287,7 +319,7 @@ class DisplayHTMLPage(object):
def appendBlock(self, block):
self.blocks.append(block)
def build(self, root):
def build(self, root, displayVersion=True):
filename = os.path.join(root, self.filename)
base = os.path.dirname(filename)
@ -305,11 +337,12 @@ class DisplayHTMLPage(object):
f.write(u'<link rel="stylesheet" href="/%s"/>' % (css))
if self.title:
f.write(u'<title>%s</title>' % (self.title))
f.write(u'</head>')
f.write(u'</head><body>')
for block in self.blocks:
block.build(f)
f.write(u'<center>Generated by <a href="%s">IWLA %s</a></center>' %
("http://indefero.soutade.fr/p/iwla", self.iwla.getVersion()))
if displayVersion:
f.write(u'<center>Generated by <a href="%s">IWLA %s</a></center>' %
("http://indefero.soutade.fr/p/iwla", self.iwla.getVersion()))
f.write(u'</body></html>')
f.close()

View File

@ -4,19 +4,21 @@ iwla
Introduction
------------
iwla (Intelligent Web Log Analyzer) is basically a clone of [awstats](http://www.awstats.org). The main problem with awstats is that it's a very monolothic project with everything in one big PERL file. In opposite, iwla has been though to be very modular : a small core analysis and a lot of filters. It can be viewed as UNIX pipes. Philosophy of iwla is : add, update, delete ! That's the job of each filter : modify statistics until final result. It's written in Python.
iwla (Intelligent Web Log Analyzer) is basically a clone of [awstats](http://www.awstats.org). The main problem with awstats is that it's a very monolithic project with everything in one big PERL file. In opposite, iwla has been though to be very modular : a small core analysis and a lot of filters. It can be viewed as UNIX pipes. Philosophy of iwla is : add, update, delete ! That's the job of each filter : modify statistics until final result. It's written in Python.
Nevertheless, iwla is only focused on HTTP logs. It uses data (robots definitions, search engines definitions) and design from awstats. Moreover, it's not dynamic, but only generates static HTML page (with gzip compression option).
Usage
-----
./iwla [-c|--clean-output] [-i|--stdin] [-f FILE|--file FILE] [-d LOGLEVEL|--log-level LOGLEVEL]
./iwla [-c|--clean-output] [-i|--stdin] [-f FILE|--file FILE] [-d LOGLEVEL|--log-level LOGLEVEL] [-r|--reset year/month] [-z|--dont-compress]
-c : Clean output (database and HTML) before starting
-i : Read data from stdin instead of conf.analyzed_filename
-f : Read data from FILE instead of conf.analyzed_filename
-f : Analyse this log file, multiple files can be specified (comma separated). gz files are acceptedRead data from FILE instead of conf.analyzed_filename
-d : Loglevel in ['DEBUG', 'INFO', 'WARNING', 'ERROR', 'CRITICAL']
-r : Reset analysis to a specific date (month/year)
-z : Don't compress databases (bigger but faster, not compatible with compressed databases)
Basic usage
-----------
@ -32,6 +34,12 @@ Main values to edit are :
* **display_hooks** : List of display hooks
* **locale** : Displayed locale (_en_ or _fr_)
You can also append an element to an existing default configuration list by using "_append" suffix. Example :
multimedia_files_append = ['xml']
or
multimedia_files_append = 'xml'
Will append 'xml' to current multimedia_files list
Then, you can launch iwla. Output HTML files are created in _output_ directory by default. To quickly see it, go into _output_ and type
python -m SimpleHTTPServer 8000
@ -90,6 +98,34 @@ Plugins
Optional configuration values ends with *.
* iwla.py
* plugins/display/all_visits.py
* plugins/display/browsers.py
* plugins/display/feeds.py
* plugins/display/hours_stats.py
* plugins/display/istats_diff.py
* plugins/display/operating_systems.py
* plugins/display/referers.py
* plugins/display/referers_diff.py
* plugins/display/top_downloads.py
* plugins/display/top_downloads_diff.py
* plugins/display/top_hits.py
* plugins/display/top_pages.py
* plugins/display/top_visitors.py
* plugins/display/track_users.py
* plugins/post_analysis/browsers.py
* plugins/post_analysis/feeds.py
* plugins/post_analysis/hours_stats.py
* plugins/post_analysis/operating_systems.py
* plugins/post_analysis/referers.py
* plugins/post_analysis/reverse_dns.py
* plugins/post_analysis/top_downloads.py
* plugins/post_analysis/top_hits.py
* plugins/post_analysis/top_pages.py
* plugins/pre_analysis/page_to_hit.py
* plugins/pre_analysis/robots.py
iwla
----
@ -110,6 +146,7 @@ iwla
DB_ROOT/meta.db
DB_ROOT/year/month/iwla.db
OUTPUT_ROOT/index.html
OUTPUT_ROOT/year/_stats.html
OUTPUT_ROOT/year/month/index.html
Statistics creation :
@ -156,6 +193,9 @@ iwla
requests =>
[fields_from_format_log]
extract_request =>
http_method
http_uri
http_version
extract_uri
extract_parameters*
extract_referer* =>
@ -202,6 +242,139 @@ plugins.display.all_visits
None
plugins.display.browsers
------------------------
Display hook
Create browsers page
Plugin requirements :
post_analysis/browsers
Conf values needed :
max_browsers_displayed*
create_browsers_page*
Output files :
OUTPUT_ROOT/year/month/browsers.html
OUTPUT_ROOT/year/month/index.html
Statistics creation :
None
Statistics update :
None
Statistics deletion :
None
plugins.display.feeds
---------------------
Display hook
Display feeds parsers
Plugin requirements :
post_analysis/feeds
Conf values needed :
create_all_feeds_page*
Output files :
OUTPUT_ROOT/year/month/index.html
OUTPUT_ROOT/year/month/all_feeds.html
Statistics creation :
None
Statistics update :
None
Statistics deletion :
None
plugins.display.hours_stats
---------------------------
Display hook
Display statistics by hour/week day
Plugin requirements :
post_analysis/hours_stats
Conf values needed :
None
Output files :
OUTPUT_ROOT/year/month/index.html
Statistics creation :
None
Statistics update :
None
Statistics deletion :
None
plugins.display.istats_diff
---------------------------
Display hook interface
Enlight new and updated statistics
Plugin requirements :
None
Conf values needed :
None
Output files :
None
Statistics creation :
None
Statistics update :
None
Statistics deletion :
None
plugins.display.operating_systems
---------------------------------
Display hook
Add operating systems statistics
Plugin requirements :
post_analysis/operating_systems
Conf values needed :
create_families_page*
Output files :
OUTPUT_ROOT/year/month/index.html
Statistics creation :
None
Statistics update :
None
Statistics deletion :
None
plugins.display.referers
------------------------
@ -233,20 +406,102 @@ plugins.display.referers
None
plugins.display.top_visitors
----------------------------
plugins.display.referers_diff
-----------------------------
Display hook
Create TOP visitors block
Enlight new and updated key phrases in in all_key_phrases.html
Plugin requirements :
None
display/referers
Conf values needed :
display_visitor_ip*
None
Output files :
None
Statistics creation :
None
Statistics update :
None
Statistics deletion :
None
plugins.display.top_downloads
-----------------------------
Display hook
Create TOP downloads page
Plugin requirements :
post_analysis/top_downloads
Conf values needed :
max_downloads_displayed*
create_all_downloads_page*
Output files :
OUTPUT_ROOT/year/month/top_downloads.html
OUTPUT_ROOT/year/month/index.html
Statistics creation :
None
Statistics update :
None
Statistics deletion :
None
plugins.display.top_downloads_diff
----------------------------------
Display hook
Enlight new and updated downloads in in top_downloads.html
Plugin requirements :
display/top_downloads
Conf values needed :
None
Output files :
None
Statistics creation :
None
Statistics update :
None
Statistics deletion :
None
plugins.display.top_hits
------------------------
Display hook
Create TOP hits page
Plugin requirements :
post_analysis/top_hits
Conf values needed :
max_hits_displayed*
create_all_hits_page*
Output files :
OUTPUT_ROOT/year/month/top_hits.html
OUTPUT_ROOT/year/month/index.html
Statistics creation :
@ -287,22 +542,20 @@ plugins.display.top_pages
None
plugins.display.top_hits
------------------------
plugins.display.top_visitors
----------------------------
Display hook
Create TOP hits page
Create TOP visitors block
Plugin requirements :
post_analysis/top_hits
None
Conf values needed :
max_hits_displayed*
create_all_hits_page*
display_visitor_ip*
Output files :
OUTPUT_ROOT/year/month/top_hits.html
OUTPUT_ROOT/year/month/index.html
Statistics creation :
@ -315,23 +568,23 @@ plugins.display.top_hits
None
plugins.display.top_downloads
-----------------------------
plugins.display.track_users
---------------------------
Display hook
Create TOP downloads page
Track users
Plugin requirements :
post_analysis/top_downloads
None
Conf values needed :
max_downloads_displayed*
create_all_downloads_page*
tracked_ip
create_tracked_page*
Output files :
OUTPUT_ROOT/year/month/top_downloads.html
OUTPUT_ROOT/year/month/index.html
OUTPUT_ROOT/year/month/tracked_users.html
Statistics creation :
None
@ -343,6 +596,290 @@ plugins.display.top_downloads
None
plugins.post_analysis.browsers
------------------------------
Post analysis hook
Detect browser information from requests
Plugin requirements :
None
Conf values needed :
None
Output files :
None
Statistics creation :
visits :
remote_addr =>
browser
month_stats :
browsers =>
browser => count
Statistics update :
None
Statistics deletion :
None
plugins.post_analysis.feeds
---------------------------
Post analysis hook
Find feeds parsers (first hit in feeds conf value and no viewed pages if it's a robot)
If there is ony one hit per day to a feed, merge feeds parsers with the same user agent
as it must be the same person with a different IP address.
Plugin requirements :
None
Conf values needed :
feeds
merge_one_hit_only_feeds_parsers*
Output files :
None
Statistics creation :
remote_addr =>
feed_parser
Statistics update :
None
Statistics deletion :
None
plugins.post_analysis.hours_stats
---------------------------------
Post analysis hook
Count pages, hits and bandwidth by hour/week day
Plugin requirements :
None
Conf values needed :
None
Output files :
None
Statistics creation :
month_stats:
hours_stats =>
00 .. 23 =>
pages
hits
bandwidth
days_stats =>
0 .. 6 =>
pages
hits
bandwidth
Statistics update :
None
Statistics deletion :
None
plugins.post_analysis.operating_systems
---------------------------------------
Post analysis hook
Detect operating systems from requests
Plugin requirements :
None
Conf values needed :
None
Output files :
None
Statistics creation :
visits :
remote_addr =>
operating_system
month_stats :
operating_systems =>
operating_system => count
os_families =>
family => count
Statistics update :
None
Statistics deletion :
None
plugins.post_analysis.referers
------------------------------
Post analysis hook
Extract referers and key phrases from requests
Plugin requirements :
None
Conf values needed :
domain_name
Output files :
None
Statistics creation :
None
Statistics update :
month_stats :
referers =>
pages => count
hits => count
robots_referers =>
pages => count
hits => count
search_engine_referers =>
pages => count
hits => count
key_phrases =>
phrase => count
Statistics deletion :
None
plugins.post_analysis.reverse_dns
---------------------------------
Post analysis hook
Replace IP by reverse DNS names
Plugin requirements :
None
Conf values needed :
reverse_dns_timeout*
Output files :
None
Statistics creation :
None
Statistics update :
valid_visitors:
remote_addr
dns_name_replaced
dns_analyzed
Statistics deletion :
None
plugins.post_analysis.top_downloads
-----------------------------------
Post analysis hook
Count TOP downloads
Plugin requirements :
None
Conf values needed :
None
Output files :
None
Statistics creation :
None
Statistics update :
month_stats:
top_downloads =>
uri => count
Statistics deletion :
None
plugins.post_analysis.top_hits
------------------------------
Post analysis hook
Count TOP hits
Plugin requirements :
None
Conf values needed :
None
Output files :
None
Statistics creation :
None
Statistics update :
month_stats:
top_hits =>
uri => count
Statistics deletion :
None
plugins.post_analysis.top_pages
-------------------------------
Post analysis hook
Count TOP pages
Plugin requirements :
None
Conf values needed :
None
Output files :
None
Statistics creation :
None
Statistics update :
month_stats:
top_pages =>
uri => count
Statistics deletion :
None
plugins.pre_analysis.page_to_hit
--------------------------------
@ -400,153 +937,3 @@ plugins.pre_analysis.robots
None
plugins.post_analysis.referers
------------------------------
Post analysis hook
Extract referers and key phrases from requests
Plugin requirements :
None
Conf values needed :
domain_name
Output files :
None
Statistics creation :
None
Statistics update :
month_stats :
referers =>
pages
hits
robots_referers =>
pages
hits
search_engine_referers =>
pages
hits
key_phrases =>
phrase
Statistics deletion :
None
plugins.post_analysis.top_pages
-------------------------------
Post analysis hook
Count TOP pages
Plugin requirements :
None
Conf values needed :
None
Output files :
None
Statistics creation :
None
Statistics update :
month_stats:
top_pages =>
uri
Statistics deletion :
None
plugins.post_analysis.reverse_dns
---------------------------------
Post analysis hook
Replace IP by reverse DNS names
Plugin requirements :
None
Conf values needed :
reverse_dns_timeout*
Output files :
None
Statistics creation :
None
Statistics update :
valid_visitors:
remote_addr
dns_name_replaced
dns_analyzed
Statistics deletion :
None
plugins.post_analysis.top_hits
------------------------------
Post analysis hook
Count TOP hits
Plugin requirements :
None
Conf values needed :
None
Output files :
None
Statistics creation :
None
Statistics update :
month_stats:
top_hits =>
uri
Statistics deletion :
None
plugins.post_analysis.top_downloads
-----------------------------------
Post analysis hook
Count TOP downloads
Plugin requirements :
None
Conf values needed :
None
Output files :
None
Statistics creation :
None
Statistics update :
month_stats:
top_downloads =>
uri
Statistics deletion :
None

View File

@ -4,19 +4,21 @@ iwla
Introduction
------------
iwla (Intelligent Web Log Analyzer) is basically a clone of [awstats](http://www.awstats.org). The main problem with awstats is that it's a very monolothic project with everything in one big PERL file. In opposite, iwla has been though to be very modular : a small core analysis and a lot of filters. It can be viewed as UNIX pipes. Philosophy of iwla is : add, update, delete ! That's the job of each filter : modify statistics until final result. It's written in Python.
iwla (Intelligent Web Log Analyzer) is basically a clone of [awstats](http://www.awstats.org). The main problem with awstats is that it's a very monolithic project with everything in one big PERL file. In opposite, iwla has been though to be very modular : a small core analysis and a lot of filters. It can be viewed as UNIX pipes. Philosophy of iwla is : add, update, delete ! That's the job of each filter : modify statistics until final result. It's written in Python.
Nevertheless, iwla is only focused on HTTP logs. It uses data (robots definitions, search engines definitions) and design from awstats. Moreover, it's not dynamic, but only generates static HTML page (with gzip compression option).
Usage
-----
./iwla [-c|--clean-output] [-i|--stdin] [-f FILE|--file FILE] [-d LOGLEVEL|--log-level LOGLEVEL]
./iwla [-c|--clean-output] [-i|--stdin] [-f FILE|--file FILE] [-d LOGLEVEL|--log-level LOGLEVEL] [-r|--reset year/month] [-z|--dont-compress]
-c : Clean output (database and HTML) before starting
-i : Read data from stdin instead of conf.analyzed_filename
-f : Read data from FILE instead of conf.analyzed_filename
-f : Analyse this log file, multiple files can be specified (comma separated). gz files are acceptedRead data from FILE instead of conf.analyzed_filename
-d : Loglevel in ['DEBUG', 'INFO', 'WARNING', 'ERROR', 'CRITICAL']
-r : Reset analysis to a specific date (month/year)
-z : Don't compress databases (bigger but faster, not compatible with compressed databases)
Basic usage
-----------
@ -32,6 +34,12 @@ Main values to edit are :
* **display_hooks** : List of display hooks
* **locale** : Displayed locale (_en_ or _fr_)
You can also append an element to an existing default configuration list by using "_append" suffix. Example :
multimedia_files_append = ['xml']
or
multimedia_files_append = 'xml'
Will append 'xml' to current multimedia_files list
Then, you can launch iwla. Output HTML files are created in _output_ directory by default. To quickly see it, go into _output_ and type
python -m SimpleHTTPServer 8000

View File

@ -1,3 +1,31 @@
* iwla.py
* plugins/display/all_visits.py
* plugins/display/browsers.py
* plugins/display/feeds.py
* plugins/display/hours_stats.py
* plugins/display/istats_diff.py
* plugins/display/operating_systems.py
* plugins/display/referers.py
* plugins/display/referers_diff.py
* plugins/display/top_downloads.py
* plugins/display/top_downloads_diff.py
* plugins/display/top_hits.py
* plugins/display/top_pages.py
* plugins/display/top_visitors.py
* plugins/display/track_users.py
* plugins/post_analysis/browsers.py
* plugins/post_analysis/feeds.py
* plugins/post_analysis/hours_stats.py
* plugins/post_analysis/operating_systems.py
* plugins/post_analysis/referers.py
* plugins/post_analysis/reverse_dns.py
* plugins/post_analysis/top_downloads.py
* plugins/post_analysis/top_hits.py
* plugins/post_analysis/top_pages.py
* plugins/pre_analysis/page_to_hit.py
* plugins/pre_analysis/robots.py
iwla
----
@ -18,6 +46,7 @@ iwla
DB_ROOT/meta.db
DB_ROOT/year/month/iwla.db
OUTPUT_ROOT/index.html
OUTPUT_ROOT/year/_stats.html
OUTPUT_ROOT/year/month/index.html
Statistics creation :
@ -64,6 +93,9 @@ iwla
requests =>
[fields_from_format_log]
extract_request =>
http_method
http_uri
http_version
extract_uri
extract_parameters*
extract_referer* =>
@ -110,6 +142,139 @@ plugins.display.all_visits
None
plugins.display.browsers
------------------------
Display hook
Create browsers page
Plugin requirements :
post_analysis/browsers
Conf values needed :
max_browsers_displayed*
create_browsers_page*
Output files :
OUTPUT_ROOT/year/month/browsers.html
OUTPUT_ROOT/year/month/index.html
Statistics creation :
None
Statistics update :
None
Statistics deletion :
None
plugins.display.feeds
---------------------
Display hook
Display feeds parsers
Plugin requirements :
post_analysis/feeds
Conf values needed :
create_all_feeds_page*
Output files :
OUTPUT_ROOT/year/month/index.html
OUTPUT_ROOT/year/month/all_feeds.html
Statistics creation :
None
Statistics update :
None
Statistics deletion :
None
plugins.display.hours_stats
---------------------------
Display hook
Display statistics by hour/week day
Plugin requirements :
post_analysis/hours_stats
Conf values needed :
None
Output files :
OUTPUT_ROOT/year/month/index.html
Statistics creation :
None
Statistics update :
None
Statistics deletion :
None
plugins.display.istats_diff
---------------------------
Display hook interface
Enlight new and updated statistics
Plugin requirements :
None
Conf values needed :
None
Output files :
None
Statistics creation :
None
Statistics update :
None
Statistics deletion :
None
plugins.display.operating_systems
---------------------------------
Display hook
Add operating systems statistics
Plugin requirements :
post_analysis/operating_systems
Conf values needed :
create_families_page*
Output files :
OUTPUT_ROOT/year/month/index.html
Statistics creation :
None
Statistics update :
None
Statistics deletion :
None
plugins.display.referers
------------------------
@ -141,20 +306,102 @@ plugins.display.referers
None
plugins.display.top_visitors
----------------------------
plugins.display.referers_diff
-----------------------------
Display hook
Create TOP visitors block
Enlight new and updated key phrases in in all_key_phrases.html
Plugin requirements :
None
display/referers
Conf values needed :
display_visitor_ip*
None
Output files :
None
Statistics creation :
None
Statistics update :
None
Statistics deletion :
None
plugins.display.top_downloads
-----------------------------
Display hook
Create TOP downloads page
Plugin requirements :
post_analysis/top_downloads
Conf values needed :
max_downloads_displayed*
create_all_downloads_page*
Output files :
OUTPUT_ROOT/year/month/top_downloads.html
OUTPUT_ROOT/year/month/index.html
Statistics creation :
None
Statistics update :
None
Statistics deletion :
None
plugins.display.top_downloads_diff
----------------------------------
Display hook
Enlight new and updated downloads in in top_downloads.html
Plugin requirements :
display/top_downloads
Conf values needed :
None
Output files :
None
Statistics creation :
None
Statistics update :
None
Statistics deletion :
None
plugins.display.top_hits
------------------------
Display hook
Create TOP hits page
Plugin requirements :
post_analysis/top_hits
Conf values needed :
max_hits_displayed*
create_all_hits_page*
Output files :
OUTPUT_ROOT/year/month/top_hits.html
OUTPUT_ROOT/year/month/index.html
Statistics creation :
@ -195,22 +442,20 @@ plugins.display.top_pages
None
plugins.display.top_hits
------------------------
plugins.display.top_visitors
----------------------------
Display hook
Create TOP hits page
Create TOP visitors block
Plugin requirements :
post_analysis/top_hits
None
Conf values needed :
max_hits_displayed*
create_all_hits_page*
display_visitor_ip*
Output files :
OUTPUT_ROOT/year/month/top_hits.html
OUTPUT_ROOT/year/month/index.html
Statistics creation :
@ -223,23 +468,23 @@ plugins.display.top_hits
None
plugins.display.top_downloads
-----------------------------
plugins.display.track_users
---------------------------
Display hook
Create TOP downloads page
Track users
Plugin requirements :
post_analysis/top_downloads
None
Conf values needed :
max_downloads_displayed*
create_all_downloads_page*
tracked_ip
create_tracked_page*
Output files :
OUTPUT_ROOT/year/month/top_downloads.html
OUTPUT_ROOT/year/month/index.html
OUTPUT_ROOT/year/month/tracked_users.html
Statistics creation :
None
@ -251,6 +496,290 @@ plugins.display.top_downloads
None
plugins.post_analysis.browsers
------------------------------
Post analysis hook
Detect browser information from requests
Plugin requirements :
None
Conf values needed :
None
Output files :
None
Statistics creation :
visits :
remote_addr =>
browser
month_stats :
browsers =>
browser => count
Statistics update :
None
Statistics deletion :
None
plugins.post_analysis.feeds
---------------------------
Post analysis hook
Find feeds parsers (first hit in feeds conf value and no viewed pages if it's a robot)
If there is ony one hit per day to a feed, merge feeds parsers with the same user agent
as it must be the same person with a different IP address.
Plugin requirements :
None
Conf values needed :
feeds
merge_one_hit_only_feeds_parsers*
Output files :
None
Statistics creation :
remote_addr =>
feed_parser
Statistics update :
None
Statistics deletion :
None
plugins.post_analysis.hours_stats
---------------------------------
Post analysis hook
Count pages, hits and bandwidth by hour/week day
Plugin requirements :
None
Conf values needed :
None
Output files :
None
Statistics creation :
month_stats:
hours_stats =>
00 .. 23 =>
pages
hits
bandwidth
days_stats =>
0 .. 6 =>
pages
hits
bandwidth
Statistics update :
None
Statistics deletion :
None
plugins.post_analysis.operating_systems
---------------------------------------
Post analysis hook
Detect operating systems from requests
Plugin requirements :
None
Conf values needed :
None
Output files :
None
Statistics creation :
visits :
remote_addr =>
operating_system
month_stats :
operating_systems =>
operating_system => count
os_families =>
family => count
Statistics update :
None
Statistics deletion :
None
plugins.post_analysis.referers
------------------------------
Post analysis hook
Extract referers and key phrases from requests
Plugin requirements :
None
Conf values needed :
domain_name
Output files :
None
Statistics creation :
None
Statistics update :
month_stats :
referers =>
pages => count
hits => count
robots_referers =>
pages => count
hits => count
search_engine_referers =>
pages => count
hits => count
key_phrases =>
phrase => count
Statistics deletion :
None
plugins.post_analysis.reverse_dns
---------------------------------
Post analysis hook
Replace IP by reverse DNS names
Plugin requirements :
None
Conf values needed :
reverse_dns_timeout*
Output files :
None
Statistics creation :
None
Statistics update :
valid_visitors:
remote_addr
dns_name_replaced
dns_analyzed
Statistics deletion :
None
plugins.post_analysis.top_downloads
-----------------------------------
Post analysis hook
Count TOP downloads
Plugin requirements :
None
Conf values needed :
None
Output files :
None
Statistics creation :
None
Statistics update :
month_stats:
top_downloads =>
uri => count
Statistics deletion :
None
plugins.post_analysis.top_hits
------------------------------
Post analysis hook
Count TOP hits
Plugin requirements :
None
Conf values needed :
None
Output files :
None
Statistics creation :
None
Statistics update :
month_stats:
top_hits =>
uri => count
Statistics deletion :
None
plugins.post_analysis.top_pages
-------------------------------
Post analysis hook
Count TOP pages
Plugin requirements :
None
Conf values needed :
None
Output files :
None
Statistics creation :
None
Statistics update :
month_stats:
top_pages =>
uri => count
Statistics deletion :
None
plugins.pre_analysis.page_to_hit
--------------------------------
@ -308,153 +837,3 @@ plugins.pre_analysis.robots
None
plugins.post_analysis.referers
------------------------------
Post analysis hook
Extract referers and key phrases from requests
Plugin requirements :
None
Conf values needed :
domain_name
Output files :
None
Statistics creation :
None
Statistics update :
month_stats :
referers =>
pages
hits
robots_referers =>
pages
hits
search_engine_referers =>
pages
hits
key_phrases =>
phrase
Statistics deletion :
None
plugins.post_analysis.top_pages
-------------------------------
Post analysis hook
Count TOP pages
Plugin requirements :
None
Conf values needed :
None
Output files :
None
Statistics creation :
None
Statistics update :
month_stats:
top_pages =>
uri
Statistics deletion :
None
plugins.post_analysis.reverse_dns
---------------------------------
Post analysis hook
Replace IP by reverse DNS names
Plugin requirements :
None
Conf values needed :
reverse_dns_timeout*
Output files :
None
Statistics creation :
None
Statistics update :
valid_visitors:
remote_addr
dns_name_replaced
dns_analyzed
Statistics deletion :
None
plugins.post_analysis.top_hits
------------------------------
Post analysis hook
Count TOP hits
Plugin requirements :
None
Conf values needed :
None
Output files :
None
Statistics creation :
None
Statistics update :
month_stats:
top_hits =>
uri
Statistics deletion :
None
plugins.post_analysis.top_downloads
-----------------------------------
Post analysis hook
Count TOP downloads
Plugin requirements :
None
Conf values needed :
None
Output files :
None
Statistics creation :
None
Statistics update :
month_stats:
top_downloads =>
uri
Statistics deletion :
None

View File

@ -79,13 +79,19 @@ def preloadPlugins(plugins, iwla):
classes = [c for _,c in inspect.getmembers(mod)\
if inspect.isclass(c) and \
issubclass(c, IPlugin) and \
c.__name__ != 'IPlugin'
c.__name__ != 'IPlugin' and \
not c.__subclasses__()
]
if not classes:
logger.warning('No plugin defined in %s' % (plugin_path))
continue
if len(classes) > 1:
logger.warning('More than one class found in %s, loading may fail. Selecting %s' % (plugin_path, classes[0]))
print classes
continue
plugin = classes[0](iwla)
plugin_name = plugin.__class__.__name__
@ -103,7 +109,7 @@ def preloadPlugins(plugins, iwla):
requirement_validated = False
for r in requirements:
for (_,p) in cache_plugins.items():
for p in cache_plugins.values():
if p.__class__.__name__ == r:
requirement_validated = True
break

243
iwla.pot
View File

@ -5,7 +5,7 @@
msgid ""
msgstr ""
"Project-Id-Version: PACKAGE VERSION\n"
"POT-Creation-Date: 2014-12-19 17:46+CET\n"
"POT-Creation-Date: 2015-03-02 19:44+CET\n"
"PO-Revision-Date: YEAR-MO-DA HO:MI+ZONE\n"
"Last-Translator: FULL NAME <EMAIL@ADDRESS>\n"
"Language-Team: LANGUAGE <LL@li.org>\n"
@ -35,11 +35,11 @@ msgstr ""
msgid "March"
msgstr ""
#: display.py:32 iwla.py:428
#: display.py:32 iwla.py:440
msgid "June"
msgstr ""
#: display.py:32 iwla.py:428
#: display.py:32 iwla.py:440
msgid "May"
msgstr ""
@ -63,116 +63,143 @@ msgstr ""
msgid "September"
msgstr ""
#: iwla.py:371
#: display.py:187
msgid "Ratio"
msgstr ""
#: iwla.py:381
msgid "Statistics"
msgstr ""
#: iwla.py:377
msgid "By day"
msgstr ""
#: iwla.py:377
msgid "Day"
msgstr ""
#: iwla.py:377 iwla.py:430
#: iwla.py:389 iwla.py:442
msgid "Not viewed Bandwidth"
msgstr ""
#: iwla.py:377 iwla.py:430
#: iwla.py:389 iwla.py:442
msgid "Visits"
msgstr ""
#: iwla.py:377 iwla.py:430 plugins/display/all_visits.py:70
#: plugins/display/referers.py:95 plugins/display/referers.py:153
#: plugins/display/top_downloads.py:97 plugins/display/top_visitors.py:72
#: iwla.py:389 iwla.py:442 plugins/display/all_visits.py:70
#: plugins/display/feeds.py:75 plugins/display/hours_stats.py:73
#: plugins/display/hours_stats.py:83 plugins/display/referers.py:95
#: plugins/display/referers.py:153 plugins/display/top_downloads.py:97
#: plugins/display/top_visitors.py:72 plugins/display/track_users.py:113
msgid "Hits"
msgstr ""
#: iwla.py:377 iwla.py:430 plugins/display/all_visits.py:70
#: plugins/display/referers.py:95 plugins/display/referers.py:153
#: plugins/display/top_visitors.py:72
#: iwla.py:389 iwla.py:442 plugins/display/all_visits.py:70
#: plugins/display/feeds.py:75 plugins/display/hours_stats.py:73
#: plugins/display/hours_stats.py:83 plugins/display/referers.py:95
#: plugins/display/referers.py:153 plugins/display/top_visitors.py:72
#: plugins/display/track_users.py:77 plugins/display/track_users.py:113
msgid "Pages"
msgstr ""
#: iwla.py:377 iwla.py:430 plugins/display/all_visits.py:70
#: iwla.py:389 iwla.py:442 plugins/display/all_visits.py:70
#: plugins/display/hours_stats.py:73 plugins/display/hours_stats.py:83
#: plugins/display/top_visitors.py:72
msgid "Bandwidth"
msgstr ""
#: iwla.py:414
#: iwla.py:389 plugins/display/hours_stats.py:71
msgid "By day"
msgstr ""
#: iwla.py:389 plugins/display/hours_stats.py:73
msgid "Day"
msgstr ""
#: iwla.py:426
msgid "Average"
msgstr ""
#: iwla.py:419 iwla.py:457
#: iwla.py:431 iwla.py:476
msgid "Total"
msgstr ""
#: iwla.py:428
#: iwla.py:440
msgid "Apr"
msgstr ""
#: iwla.py:428
#: iwla.py:440
msgid "Aug"
msgstr ""
#: iwla.py:428
#: iwla.py:440
msgid "Dec"
msgstr ""
#: iwla.py:428
#: iwla.py:440
msgid "Feb"
msgstr ""
#: iwla.py:428
#: iwla.py:440
msgid "Jan"
msgstr ""
#: iwla.py:428
#: iwla.py:440
msgid "Jul"
msgstr ""
#: iwla.py:428
#: iwla.py:440
msgid "Mar"
msgstr ""
#: iwla.py:428
#: iwla.py:440
msgid "Nov"
msgstr ""
#: iwla.py:428
#: iwla.py:440
msgid "Oct"
msgstr ""
#: iwla.py:428
#: iwla.py:440
msgid "Sep"
msgstr ""
#: iwla.py:429
#: iwla.py:441
msgid "Summary"
msgstr ""
#: iwla.py:430
#: iwla.py:442
msgid "Month"
msgstr ""
#: iwla.py:430
#: iwla.py:442
msgid "Visitors"
msgstr ""
#: iwla.py:430 iwla.py:440
#: iwla.py:442 iwla.py:454 plugins/display/feeds.py:98
#: plugins/display/operating_systems.py:90 plugins/display/track_users.py:108
msgid "Details"
msgstr ""
#: iwla.py:465
#: iwla.py:490
msgid "Statistics for"
msgstr ""
#: iwla.py:472
#: iwla.py:497
msgid "Last update"
msgstr ""
#: plugins/display/all_visits.py:70 plugins/display/top_visitors.py:72
#: iwla.py:501
msgid "Time analysis"
msgstr ""
#: iwla.py:503
msgid "hours"
msgstr ""
#: iwla.py:504
msgid "minutes"
msgstr ""
#: iwla.py:504
msgid "seconds"
msgstr ""
#: plugins/display/all_visits.py:70 plugins/display/feeds.py:75
#: plugins/display/top_visitors.py:72
msgid "Host"
msgstr ""
@ -188,6 +215,103 @@ msgstr ""
msgid "Top visitors"
msgstr ""
#: plugins/display/browsers.py:79
msgid "Browsers"
msgstr ""
#: plugins/display/browsers.py:79 plugins/display/browsers.py:113
msgid "Browser"
msgstr ""
#: plugins/display/browsers.py:79 plugins/display/browsers.py:113
#: plugins/display/operating_systems.py:78
#: plugins/display/operating_systems.py:95 plugins/display/top_hits.py:71
#: plugins/display/top_hits.py:97 plugins/display/top_pages.py:71
#: plugins/display/top_pages.py:96
msgid "Entrance"
msgstr ""
#: plugins/display/browsers.py:98 plugins/display/browsers.py:128
#: plugins/display/referers.py:110 plugins/display/referers.py:125
#: plugins/display/referers.py:140 plugins/display/referers.py:163
#: plugins/display/referers.py:174 plugins/display/referers.py:185
#: plugins/display/referers.py:222 plugins/display/top_downloads.py:83
#: plugins/display/top_downloads.py:103 plugins/display/top_hits.py:82
#: plugins/display/top_hits.py:103 plugins/display/top_pages.py:82
#: plugins/display/top_pages.py:102 plugins/display/top_visitors.py:92
msgid "Others"
msgstr ""
#: plugins/display/browsers.py:107
msgid "All Browsers"
msgstr ""
#: plugins/display/feeds.py:69
msgid "All Feeds parsers"
msgstr ""
#: plugins/display/feeds.py:75
msgid "All feeds parsers"
msgstr ""
#: plugins/display/feeds.py:91
msgid "Merged feeds parsers"
msgstr ""
#: plugins/display/feeds.py:96
msgid "Feeds parsers"
msgstr ""
#: plugins/display/feeds.py:103
msgid "Found"
msgstr ""
#: plugins/display/hours_stats.py:72
msgid "Fri"
msgstr ""
#: plugins/display/hours_stats.py:72
msgid "Mon"
msgstr ""
#: plugins/display/hours_stats.py:72
msgid "Sat"
msgstr ""
#: plugins/display/hours_stats.py:72
msgid "Sun"
msgstr ""
#: plugins/display/hours_stats.py:72
msgid "Thu"
msgstr ""
#: plugins/display/hours_stats.py:72
msgid "Tue"
msgstr ""
#: plugins/display/hours_stats.py:72
msgid "Wed"
msgstr ""
#: plugins/display/hours_stats.py:81
msgid "By Hours"
msgstr ""
#: plugins/display/hours_stats.py:83
msgid "Hours"
msgstr ""
#: plugins/display/operating_systems.py:78
#: plugins/display/operating_systems.py:88
msgid "Operating Systems"
msgstr ""
#: plugins/display/operating_systems.py:78
#: plugins/display/operating_systems.py:95
msgid "Operating System"
msgstr ""
#: plugins/display/referers.py:95
msgid "Connexion from"
msgstr ""
@ -200,16 +324,6 @@ msgstr ""
msgid "Search Engine"
msgstr ""
#: plugins/display/referers.py:110 plugins/display/referers.py:125
#: plugins/display/referers.py:140 plugins/display/referers.py:163
#: plugins/display/referers.py:174 plugins/display/referers.py:185
#: plugins/display/referers.py:222 plugins/display/top_downloads.py:83
#: plugins/display/top_downloads.py:103 plugins/display/top_hits.py:82
#: plugins/display/top_hits.py:103 plugins/display/top_pages.py:82
#: plugins/display/top_pages.py:102 plugins/display/top_visitors.py:92
msgid "Others"
msgstr ""
#: plugins/display/referers.py:114 plugins/display/referers.py:167
msgid "External URL"
msgstr ""
@ -226,8 +340,12 @@ msgstr ""
msgid "All Referers"
msgstr ""
#: plugins/display/referers.py:200 plugins/display/referers.py:210
msgid "Top key phrases"
#: plugins/display/referers.py:193
msgid "All Key Phrases"
msgstr ""
#: plugins/display/referers.py:200
msgid "Key phrases"
msgstr ""
#: plugins/display/referers.py:200 plugins/display/referers.py:216
@ -238,6 +356,10 @@ msgstr ""
msgid "Search"
msgstr ""
#: plugins/display/referers.py:210
msgid "Top key phrases"
msgstr ""
#: plugins/display/referers.py:212
msgid "All key phrases"
msgstr ""
@ -264,11 +386,6 @@ msgstr ""
msgid "All Hits"
msgstr ""
#: plugins/display/top_hits.py:71 plugins/display/top_hits.py:97
#: plugins/display/top_pages.py:71 plugins/display/top_pages.py:96
msgid "Entrance"
msgstr ""
#: plugins/display/top_pages.py:71 plugins/display/top_pages.py:90
msgid "All Pages"
msgstr ""
@ -277,3 +394,15 @@ msgstr ""
msgid "Top Pages"
msgstr ""
#: plugins/display/track_users.py:77 plugins/display/track_users.py:106
msgid "Tracked users"
msgstr ""
#: plugins/display/track_users.py:77 plugins/display/track_users.py:113
msgid "Last Access"
msgstr ""
#: plugins/display/track_users.py:113
msgid "IP"
msgstr ""

184
iwla.py
View File

@ -31,12 +31,10 @@ import argparse
import logging
import gettext
from calendar import monthrange
from datetime import date
from datetime import date, datetime
import default_conf as conf
import conf as _
conf.__dict__.update(_.__dict__)
del _
import conf as user_conf
from iplugin import *
from display import *
@ -59,6 +57,7 @@ Output files :
DB_ROOT/meta.db
DB_ROOT/year/month/iwla.db
OUTPUT_ROOT/index.html
OUTPUT_ROOT/year/_stats.html
OUTPUT_ROOT/year/month/index.html
Statistics creation :
@ -105,6 +104,9 @@ visits :
requests =>
[fields_from_format_log]
extract_request =>
http_method
http_uri
http_version
extract_uri
extract_parameters*
extract_referer* =>
@ -129,12 +131,13 @@ class IWLA(object):
ANALYSIS_CLASS = 'HTTP'
API_VERSION = 1
IWLA_VERSION = '0.1'
IWLA_VERSION = '0.2'
def __init__(self, logLevel):
self.meta_infos = {}
self.analyse_started = False
self.current_analysis = {}
self.start_time = 0
self.cache_plugins = {}
self.display = DisplayHTMLBuild(self)
self.valid_visitors = None
@ -232,15 +235,18 @@ class IWLA(object):
def getDBFilename(self, time):
return os.path.join(conf.DB_ROOT, str(time.tm_year), '%02d' % (time.tm_mon), conf.DB_FILENAME)
def _openDB(self, filename, prot='r'):
if self.args.dont_compress:
return open(filename, prot)
else:
return gzip.open(filename, prot)
def _serialize(self, obj, filename):
base = os.path.dirname(filename)
if not os.path.exists(base):
os.makedirs(base)
# TODO : remove return
#return
with open(filename + '.tmp', 'wb+') as f, gzip.open(filename, 'w') as fzip:
with open(filename + '.tmp', 'wb+') as f, self._openDB(filename, 'w') as fzip:
pickle.dump(obj, f)
f.seek(0)
fzip.write(f.read())
@ -250,7 +256,7 @@ class IWLA(object):
if not os.path.exists(filename):
return None
with gzip.open(filename, 'r') as f:
with self._openDB(filename) as f:
return pickle.load(f)
return None
@ -265,10 +271,21 @@ class IWLA(object):
mod.hook(*args)
def isPage(self, request):
self.logger.debug("Is page %s" % (request))
for e in conf.pages_extensions:
if request.endswith(e):
self.logger.debug("True")
return True
self.logger.debug("False")
return False
def isMultimediaFile(self, request):
self.logger.debug("Is multimedia %s" % (request))
for e in conf.multimedia_files:
if request.endswith(e):
self.logger.debug("True")
return True
self.logger.debug("False")
return False
def _appendHit(self, hit):
@ -351,10 +368,10 @@ class IWLA(object):
gmt_offset_minutes = int(gmt_offset_str[3:5])*60
gmt_offset = gmt_offset_hours + gmt_offset_minutes
hit['time_decoded'] = time.strptime(hit['time_local'][:-6], conf.time_format[:-3])
if gmt_offset_str[0] == '+':
hit['time_decoded'] = time.localtime(time.mktime(hit['time_decoded'])+gmt_offset)
else:
hit['time_decoded'] = time.localtime(time.mktime(hit['time_decoded'])-gmt_offset)
# if gmt_offset_str[0] == '-':
# hit['time_decoded'] = time.localtime(time.mktime(hit['time_decoded'])+gmt_offset)
# else:
# hit['time_decoded'] = time.localtime(time.mktime(hit['time_decoded'])-gmt_offset)
else:
raise e
return hit['time_decoded']
@ -371,6 +388,8 @@ class IWLA(object):
filename = self.getCurDisplayPath('index.html')
self.logger.info('==> Generate display (%s)' % (filename))
page = self.display.createPage(title, filename, conf.css_path)
link = DisplayHTMLRaw(self, '<iframe src="../_stats.html"></iframe>')
page.appendBlock(link)
_, nb_month_days = monthrange(cur_time.tm_year, cur_time.tm_mon)
days = self.display.createBlock(DisplayHTMLBlockTableWithGraph, self._('By day'), [self._('Day'), self._('Visits'), self._('Pages'), self._('Hits'), self._('Bandwidth'), self._('Not viewed Bandwidth')], None, nb_month_days, range(1,6))
@ -430,6 +449,8 @@ class IWLA(object):
graph_cols=range(1,7)
months = self.display.createBlock(DisplayHTMLBlockTableWithGraph, title, cols, None, 12, graph_cols)
months.setColsCSSClass(['', 'iwla_visitor', 'iwla_visit', 'iwla_page', 'iwla_hit', 'iwla_bandwidth', 'iwla_bandwidth', ''])
months_ = self.display.createBlock(DisplayHTMLBlockTableWithGraph, title, cols[:-1], None, 12, graph_cols[:-1])
months_.setColsCSSClass(['', 'iwla_visitor', 'iwla_visit', 'iwla_page', 'iwla_hit', 'iwla_bandwidth', 'iwla_bandwidth'])
total = [0] * len(cols)
for i in range(1, 13):
month = '%s<br/>%d' % (months_name[i], year)
@ -447,11 +468,16 @@ class IWLA(object):
months.setCellValue(i-1, 5, bytesToStr(row[5]))
months.setCellValue(i-1, 6, bytesToStr(row[6]))
months.appendShortTitle(month)
months_.appendRow(row[:-1])
months_.setCellValue(i-1, 5, bytesToStr(row[5]))
months_.setCellValue(i-1, 6, bytesToStr(row[6]))
months_.appendShortTitle(month)
if year == cur_time.tm_year and i == cur_time.tm_mon:
css = months.getCellCSSClass(i-1, 0)
if css: css = '%s %s' % (css, 'iwla_curday')
else: css = 'iwla_curday'
months.setCellCSSClass(i-1, 0, css)
months_.setCellCSSClass(i-1, 0, css)
total[0] = self._('Total')
total[5] = bytesToStr(total[5])
@ -460,6 +486,12 @@ class IWLA(object):
months.appendRow(total)
page.appendBlock(months)
months_.appendRow(total[:-1])
filename = '%d/_stats.html' % (year)
page_ = self.display.createPage(u'', filename, conf.css_path)
page_.appendBlock(months_)
page_.build(conf.DISPLAY_ROOT, False)
def _generateDisplayWholeMonthStats(self):
title = '%s %s' % (self._('Statistics for'), conf.domain_name)
filename = 'index.html'
@ -468,8 +500,15 @@ class IWLA(object):
page = self.display.createPage(title, filename, conf.css_path)
last_update = '<b>%s</b> %s<br />' % (self._('Last update'), time.strftime('%02d %b %Y %H:%M', time.localtime()))
last_update = u'<b>%s</b> %s<br />' % (self._(u'Last update'), time.strftime('%02d %b %Y %H:%M', time.localtime()))
page.appendBlock(self.display.createBlock(DisplayHTMLRaw, last_update))
duration = datetime.now() - self.start_time
duration = time.gmtime(duration.seconds)
time_analysis = u'<b>%s</b> ' % (self._('Time analysis'))
if duration.tm_hour:
time_analysis += u'%d %s, ' % (duration.tm_hour, self._(u'hours'))
time_analysis += u'%d %s and %d %s<br />' % (duration.tm_min, self._(u'minutes'), duration.tm_sec, self._(u'seconds'))
page.appendBlock(self.display.createBlock(DisplayHTMLRaw, time_analysis))
for year in sorted(self.meta_infos['stats'].keys(), reverse=True):
self._generateDisplayMonthStats(page, year, self.meta_infos['stats'][year])
@ -564,6 +603,9 @@ class IWLA(object):
self.meta_infos['stats'][year] = {}
self.meta_infos['stats'][year][month] = duplicated_stats
self.logger.info("==> Serialize to %s" % (conf.META_PATH))
self._serialize(self.meta_infos, conf.META_PATH)
self._generateDisplay()
def _generateDayStats(self):
@ -603,6 +645,7 @@ class IWLA(object):
def _newHit(self, hit):
if not self.domain_name_re.match(hit['server_name']):
self.logger.debug("Not in domain %s" % (hit))
return False
t = self._decodeTime(hit)
@ -613,10 +656,13 @@ class IWLA(object):
self.current_analysis = self._deserialize(self.getDBFilename(t)) or self._clearVisits()
self.analyse_started = True
else:
if time.mktime(t) <= time.mktime(cur_time):
if not self.analyse_started and\
time.mktime(t) <= time.mktime(cur_time):
self.logger.debug("Not in time")
return False
self.analyse_started = True
if cur_time.tm_mon != t.tm_mon:
self._generateDayStats()
self._generateMonthStats()
self.current_analysis = self._deserialize(self.getDBFilename(t)) or self._clearVisits()
elif cur_time.tm_mday != t.tm_mday:
@ -629,6 +675,9 @@ class IWLA(object):
if not self._decodeHTTPRequest(hit): return False
if hit['extract_request']['http_method'] not in ['GET', 'POST']:
return False
for k in hit.keys():
if hit[k] == '-' or hit[k] == '*':
hit[k] = ''
@ -637,11 +686,40 @@ class IWLA(object):
return True
def start(self, _file):
def _reset(self):
reset_time = time.strptime(self.args.reset, '%m/%Y')
self.logger.info('Reset time')
self.logger.info(reset_time)
self.meta_infos['last_time'] = reset_time
cur_time = time.localtime()
year = reset_time.tm_year
while year < cur_time.tm_year:
db_path = os.path.join(conf.DB_ROOT, str(year))
if os.path.exists(db_path): shutil.rmtree(db_path)
output_path = os.path.join(conf.DISPLAY_ROOT, str(year))
if os.path.exists(output_path): shutil.rmtree(output_path)
year += 1
month = reset_time.tm_mon
while month <= cur_time.tm_mon:
db_path = os.path.join(conf.DB_ROOT, str(year), '%02d' % (month))
if os.path.exists(db_path): shutil.rmtree(db_path)
output_path = os.path.join(conf.DISPLAY_ROOT, str(year), '%02d' % (month))
if os.path.exists(output_path): shutil.rmtree(output_path)
month += 1
def start(self, _file, args):
self.args = args
self.start_time = datetime.now()
self.logger.info('==> Load previous database')
self.meta_infos = self._deserialize(conf.META_PATH) or self._clearMeta()
if self.meta_infos['last_time']:
if args.reset:
self._reset()
self.logger.info('Last time')
self.logger.info(self.meta_infos['last_time'])
self.current_analysis = self._deserialize(self.getDBFilename(self.meta_infos['last_time'])) or self._clearVisits()
@ -669,10 +747,45 @@ class IWLA(object):
self._generateDayStats()
self._generateMonthStats()
del self.meta_infos['start_analysis_time']
self._serialize(self.meta_infos, conf.META_PATH)
else:
self.logger.info('==> Analyse not started : nothing new')
class FileIter(object):
def __init__(self, filenames):
self.filenames = [f for f in filenames.split(',') if f]
for f in self.filenames:
if not os.path.exists(f):
print 'No such file \'%s\'' % (f)
sys.exit(-1)
self.cur_file = None
self._openNextFile()
def __iter__(self):
return self
def __next__(self):
return self.next()
def _openNextFile(self):
if self.cur_file:
self.cur_file.close()
self.cur_file = None
if not self.filenames:
raise StopIteration()
filename = self.filenames.pop(0)
if filename.endswith('gz'):
self.cur_file = gzip.open(filename, 'r')
else:
self.cur_file = open(filename)
def next(self):
l = self.cur_file.readline()
if not l:
self._openNextFile()
l = self.cur_file.readline()
return l[:-1]
if __name__ == '__main__':
parser = argparse.ArgumentParser(description='Intelligent Web Log Analyzer')
@ -685,14 +798,39 @@ if __name__ == '__main__':
help='Read data from stdin instead of conf.analyzed_filename')
parser.add_argument('-f', '--file', dest='file',
help='Analyse this log file')
help='Analyse this log file, multiple files can be specified (comma separated). gz files are accepted')
parser.add_argument('-d', '--log-level', dest='loglevel',
default='INFO', type=str,
help='Loglevel in %s, default : %s' % (['DEBUG', 'INFO', 'WARNING', 'ERROR', 'CRITICAL'], 'INFO'))
parser.add_argument('-r', '--reset', dest='reset', action='store_true',
default=False,
help='Reset analysis to a specific date (month/year)')
parser.add_argument('-z', '--dont-compress', dest='dont_compress', action='store_true',
default=False,
help='Don\'t compress databases (bigger but faster, not compatible with compressed databases)')
args = parser.parse_args()
# Load user conf
for (k,v) in user_conf.__dict__.items():
if k.endswith('_append'):
new_k = k[:-7]
if new_k in dir(conf):
if type(conf.__dict__[new_k]) == list:
if type(v) == list:
conf.__dict__[new_k] += v
else:
conf.__dict__[new_k].append(v)
else:
print("Error %s is not a list" % (new_k))
else:
print("Error %s doesn't exists in default conf" % (new_k))
else:
conf.__dict__.update({k:v})
if args.clean_output:
if os.path.exists(conf.DB_ROOT): shutil.rmtree(conf.DB_ROOT)
if os.path.exists(conf.DISPLAY_ROOT): shutil.rmtree(conf.DISPLAY_ROOT)
@ -708,11 +846,7 @@ if __name__ == '__main__':
sys.exit(0)
if args.stdin:
iwla.start(sys.stdin)
iwla.start(sys.stdin, args)
else:
filename = args.file or conf.analyzed_filename
if not os.path.exists(filename):
print 'No such file \'%s\'' % (filename)
sys.exit(-1)
with open(filename) as f:
iwla.start(f)
iwla.start(FileIter(filename), args)

Binary file not shown.

View File

@ -5,8 +5,8 @@
msgid ""
msgstr ""
"Project-Id-Version: iwla\n"
"POT-Creation-Date: 2014-12-19 17:43+CET\n"
"PO-Revision-Date: 2014-12-19 17:43+0100\n"
"POT-Creation-Date: 2015-03-02 19:44+CET\n"
"PO-Revision-Date: 2015-03-02 19:45+0100\n"
"Last-Translator: Soutadé <soutade@gmail.com>\n"
"Language-Team: iwla\n"
"Language: fr_FR\n"
@ -37,11 +37,11 @@ msgstr "Juillet"
msgid "March"
msgstr "Mars"
#: display.py:32 iwla.py:428
#: display.py:32 iwla.py:440
msgid "June"
msgstr "Juin"
#: display.py:32 iwla.py:428
#: display.py:32 iwla.py:440
msgid "May"
msgstr "Mai"
@ -65,116 +65,143 @@ msgstr "Octobre"
msgid "September"
msgstr "Septembre"
#: iwla.py:371
#: display.py:187
msgid "Ratio"
msgstr "Pourcentage"
#: iwla.py:381
msgid "Statistics"
msgstr "Statistiques"
#: iwla.py:377
msgid "By day"
msgstr "Par jour"
#: iwla.py:377
msgid "Day"
msgstr "Jour"
#: iwla.py:377 iwla.py:430
#: iwla.py:389 iwla.py:442
msgid "Not viewed Bandwidth"
msgstr "Traffic non vu"
#: iwla.py:377 iwla.py:430
#: iwla.py:389 iwla.py:442
msgid "Visits"
msgstr "Visites"
#: iwla.py:377 iwla.py:430 plugins/display/all_visits.py:70
#: plugins/display/referers.py:95 plugins/display/referers.py:153
#: plugins/display/top_downloads.py:97 plugins/display/top_visitors.py:72
#: iwla.py:389 iwla.py:442 plugins/display/all_visits.py:70
#: plugins/display/feeds.py:75 plugins/display/hours_stats.py:73
#: plugins/display/hours_stats.py:83 plugins/display/referers.py:95
#: plugins/display/referers.py:153 plugins/display/top_downloads.py:97
#: plugins/display/top_visitors.py:72 plugins/display/track_users.py:113
msgid "Hits"
msgstr "Hits"
#: iwla.py:377 iwla.py:430 plugins/display/all_visits.py:70
#: plugins/display/referers.py:95 plugins/display/referers.py:153
#: plugins/display/top_visitors.py:72
#: iwla.py:389 iwla.py:442 plugins/display/all_visits.py:70
#: plugins/display/feeds.py:75 plugins/display/hours_stats.py:73
#: plugins/display/hours_stats.py:83 plugins/display/referers.py:95
#: plugins/display/referers.py:153 plugins/display/top_visitors.py:72
#: plugins/display/track_users.py:77 plugins/display/track_users.py:113
msgid "Pages"
msgstr "Pages"
#: iwla.py:377 iwla.py:430 plugins/display/all_visits.py:70
#: iwla.py:389 iwla.py:442 plugins/display/all_visits.py:70
#: plugins/display/hours_stats.py:73 plugins/display/hours_stats.py:83
#: plugins/display/top_visitors.py:72
msgid "Bandwidth"
msgstr "Bande passante"
#: iwla.py:414
#: iwla.py:389 plugins/display/hours_stats.py:71
msgid "By day"
msgstr "Par jour"
#: iwla.py:389 plugins/display/hours_stats.py:73
msgid "Day"
msgstr "Jour"
#: iwla.py:426
msgid "Average"
msgstr "Moyenne"
#: iwla.py:419 iwla.py:457
#: iwla.py:431 iwla.py:476
msgid "Total"
msgstr "Total"
#: iwla.py:428
#: iwla.py:440
msgid "Apr"
msgstr "Avr"
#: iwla.py:428
#: iwla.py:440
msgid "Aug"
msgstr "Août"
#: iwla.py:428
#: iwla.py:440
msgid "Dec"
msgstr "Déc"
#: iwla.py:428
#: iwla.py:440
msgid "Feb"
msgstr "Fév"
#: iwla.py:428
#: iwla.py:440
msgid "Jan"
msgstr "Jan"
#: iwla.py:428
#: iwla.py:440
msgid "Jul"
msgstr "Jui"
#: iwla.py:428
#: iwla.py:440
msgid "Mar"
msgstr "Mars"
#: iwla.py:428
#: iwla.py:440
msgid "Nov"
msgstr "Nov"
#: iwla.py:428
#: iwla.py:440
msgid "Oct"
msgstr "Oct"
#: iwla.py:428
#: iwla.py:440
msgid "Sep"
msgstr "Sep"
#: iwla.py:429
#: iwla.py:441
msgid "Summary"
msgstr "Résumé"
#: iwla.py:430
#: iwla.py:442
msgid "Month"
msgstr "Mois"
#: iwla.py:430
#: iwla.py:442
msgid "Visitors"
msgstr "Visiteurs"
#: iwla.py:430 iwla.py:440
#: iwla.py:442 iwla.py:454 plugins/display/feeds.py:98
#: plugins/display/operating_systems.py:90 plugins/display/track_users.py:108
msgid "Details"
msgstr "Détails"
#: iwla.py:465
#: iwla.py:490
msgid "Statistics for"
msgstr "Statistiques pour"
#: iwla.py:472
#: iwla.py:497
msgid "Last update"
msgstr "Dernière mise à jour"
#: plugins/display/all_visits.py:70 plugins/display/top_visitors.py:72
#: iwla.py:501
msgid "Time analysis"
msgstr "Durée de l'analyse"
#: iwla.py:503
msgid "hours"
msgstr "heures "
#: iwla.py:504
msgid "minutes"
msgstr "minutes"
#: iwla.py:504
msgid "seconds"
msgstr "secondes"
#: plugins/display/all_visits.py:70 plugins/display/feeds.py:75
#: plugins/display/top_visitors.py:72
msgid "Host"
msgstr "Hôte"
@ -190,6 +217,103 @@ msgstr "Toutes les visites"
msgid "Top visitors"
msgstr "Top visiteurs"
#: plugins/display/browsers.py:79
msgid "Browsers"
msgstr "Navigateurs"
#: plugins/display/browsers.py:79 plugins/display/browsers.py:113
msgid "Browser"
msgstr "Navigateur"
#: plugins/display/browsers.py:79 plugins/display/browsers.py:113
#: plugins/display/operating_systems.py:78
#: plugins/display/operating_systems.py:95 plugins/display/top_hits.py:71
#: plugins/display/top_hits.py:97 plugins/display/top_pages.py:71
#: plugins/display/top_pages.py:96
msgid "Entrance"
msgstr "Entrées"
#: plugins/display/browsers.py:98 plugins/display/browsers.py:128
#: plugins/display/referers.py:110 plugins/display/referers.py:125
#: plugins/display/referers.py:140 plugins/display/referers.py:163
#: plugins/display/referers.py:174 plugins/display/referers.py:185
#: plugins/display/referers.py:222 plugins/display/top_downloads.py:83
#: plugins/display/top_downloads.py:103 plugins/display/top_hits.py:82
#: plugins/display/top_hits.py:103 plugins/display/top_pages.py:82
#: plugins/display/top_pages.py:102 plugins/display/top_visitors.py:92
msgid "Others"
msgstr "Autres"
#: plugins/display/browsers.py:107
msgid "All Browsers"
msgstr "Tous les navigateurs"
#: plugins/display/feeds.py:69
msgid "All Feeds parsers"
msgstr "Tous les agrégateurs"
#: plugins/display/feeds.py:75
msgid "All feeds parsers"
msgstr "Tous les agrégateurs"
#: plugins/display/feeds.py:91
msgid "Merged feeds parsers"
msgstr "Agrégateurs fusionnés"
#: plugins/display/feeds.py:96
msgid "Feeds parsers"
msgstr "Agrégateurs"
#: plugins/display/feeds.py:103
msgid "Found"
msgstr "Trouvé"
#: plugins/display/hours_stats.py:72
msgid "Fri"
msgstr "Jeu"
#: plugins/display/hours_stats.py:72
msgid "Mon"
msgstr "Lun"
#: plugins/display/hours_stats.py:72
msgid "Sat"
msgstr "Sam"
#: plugins/display/hours_stats.py:72
msgid "Sun"
msgstr "Dim"
#: plugins/display/hours_stats.py:72
msgid "Thu"
msgstr "Jeu"
#: plugins/display/hours_stats.py:72
msgid "Tue"
msgstr "Mar"
#: plugins/display/hours_stats.py:72
msgid "Wed"
msgstr "Mer"
#: plugins/display/hours_stats.py:81
msgid "By Hours"
msgstr "Par heures"
#: plugins/display/hours_stats.py:83
msgid "Hours"
msgstr "Heures"
#: plugins/display/operating_systems.py:78
#: plugins/display/operating_systems.py:88
msgid "Operating Systems"
msgstr "Systèmes d'exploitation"
#: plugins/display/operating_systems.py:78
#: plugins/display/operating_systems.py:95
msgid "Operating System"
msgstr "Système d'exploitation"
#: plugins/display/referers.py:95
msgid "Connexion from"
msgstr "Connexion depuis"
@ -202,16 +326,6 @@ msgstr "Origine"
msgid "Search Engine"
msgstr "Moteur de recherche"
#: plugins/display/referers.py:110 plugins/display/referers.py:125
#: plugins/display/referers.py:140 plugins/display/referers.py:163
#: plugins/display/referers.py:174 plugins/display/referers.py:185
#: plugins/display/referers.py:222 plugins/display/top_downloads.py:83
#: plugins/display/top_downloads.py:103 plugins/display/top_hits.py:82
#: plugins/display/top_hits.py:103 plugins/display/top_pages.py:82
#: plugins/display/top_pages.py:102 plugins/display/top_visitors.py:92
msgid "Others"
msgstr "Autres"
#: plugins/display/referers.py:114 plugins/display/referers.py:167
msgid "External URL"
msgstr "URL externe"
@ -228,9 +342,13 @@ msgstr "Top Origines"
msgid "All Referers"
msgstr "Toutes les origines"
#: plugins/display/referers.py:200 plugins/display/referers.py:210
msgid "Top key phrases"
msgstr "Top phrases clé"
#: plugins/display/referers.py:193
msgid "All Key Phrases"
msgstr "Toutes les phrases clé"
#: plugins/display/referers.py:200
msgid "Key phrases"
msgstr "Phrases clé"
#: plugins/display/referers.py:200 plugins/display/referers.py:216
msgid "Key phrase"
@ -240,6 +358,10 @@ msgstr "Phrase clé"
msgid "Search"
msgstr "Recherche"
#: plugins/display/referers.py:210
msgid "Top key phrases"
msgstr "Top phrases clé"
#: plugins/display/referers.py:212
msgid "All key phrases"
msgstr "Toutes les phrases clé"
@ -266,11 +388,6 @@ msgstr "Top Téléchargements"
msgid "All Hits"
msgstr "Tous les hits"
#: plugins/display/top_hits.py:71 plugins/display/top_hits.py:97
#: plugins/display/top_pages.py:71 plugins/display/top_pages.py:96
msgid "Entrance"
msgstr "Entrées"
#: plugins/display/top_pages.py:71 plugins/display/top_pages.py:90
msgid "All Pages"
msgstr "Toutes les pages"
@ -279,5 +396,20 @@ msgstr "Toutes les pages"
msgid "Top Pages"
msgstr "Top Pages"
#: plugins/display/track_users.py:77 plugins/display/track_users.py:106
msgid "Tracked users"
msgstr "Utilisateurs traqués"
#: plugins/display/track_users.py:77 plugins/display/track_users.py:113
msgid "Last Access"
msgstr "Dernière visite"
#: plugins/display/track_users.py:113
msgid "IP"
msgstr "IP"
#~ msgid "Page"
#~ msgstr "Page"
#~ msgid "Key Phrases"
#~ msgstr "Phrases clé"

132
plugins/display/browsers.py Normal file
View File

@ -0,0 +1,132 @@
# -*- coding: utf-8 -*-
#
# Copyright Grégory Soutadé 2015
# This file is part of iwla
# iwla is free software: you can redistribute it and/or modify
# it under the terms of the GNU General Public License as published by
# the Free Software Foundation, either version 3 of the License, or
# (at your option) any later version.
#
# iwla is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
# GNU General Public License for more details.
#
# You should have received a copy of the GNU General Public License
# along with iwla. If not, see <http://www.gnu.org/licenses/>.
#
from iwla import IWLA
from iplugin import IPlugin
from display import *
import awstats_data
"""
Display hook
Create browsers page
Plugin requirements :
post_analysis/browsers
Conf values needed :
max_browsers_displayed*
create_browsers_page*
Output files :
OUTPUT_ROOT/year/month/browsers.html
OUTPUT_ROOT/year/month/index.html
Statistics creation :
None
Statistics update :
None
Statistics deletion :
None
"""
class IWLADisplayBrowsers(IPlugin):
def __init__(self, iwla):
super(IWLADisplayBrowsers, self).__init__(iwla)
self.API_VERSION = 1
self.requires = ['IWLAPostAnalysisBrowsers']
def load(self):
self.icon_path = self.iwla.getConfValue('icon_path', '/')
self.max_browsers = self.iwla.getConfValue('max_browsers_displayed', 0)
self.create_browsers = self.iwla.getConfValue('create_browsers_page', True)
self.icon_names = {v:k for (k, v) in awstats_data.browsers_hashid.items()}
return True
def hook(self):
display = self.iwla.getDisplay()
browsers = self.iwla.getMonthStats()['browsers']
browsers = sorted(browsers.items(), key=lambda t: t[1], reverse=True)
# All in a file
if self.create_browsers:
title = createCurTitle(self.iwla, u'Browsers')
filename = 'browsers.html'
path = self.iwla.getCurDisplayPath(filename)
page = display.createPage(title, path, self.iwla.getConfValue('css_path', []))
table = display.createBlock(DisplayHTMLBlockTable, self.iwla._(u'Browsers'), ['', self.iwla._(u'Browser'), self.iwla._(u'Entrance')])
table.setColsCSSClass(['', '', 'iwla_hit'])
total_browsers = [0]*3
new_list = self.max_browsers and browsers[:self.max_browsers] or browsers
for (browser, entrance) in new_list:
if browser != 'unknown':
try:
icon = '<img src="/%s/browser/%s.png"/>' % (self.icon_path, awstats_data.browsers_icons[self.icon_names[browser]])
except:
icon = '<img src="/%s/browser/unknown.png"/>' % (self.icon_path)
else:
icon = '<img src="/%s/browser/unknown.png"/>' % (self.icon_path)
browser = 'Unknown'
table.appendRow([icon, browser, entrance])
total_browsers[2] += entrance
if self.max_browsers:
others = 0
for (browser, entrance) in browsers[self.max_browsers:]:
others += entrance
table.appendRow(['', self.iwla._(u'Others'), others])
table.setCellCSSClass(table.getNbRows()-1, 0, 'iwla_others')
page.appendBlock(table)
display.addPage(page)
title = self.iwla._(u'Top Browsers')
if self.create_browsers:
link = '<a href=\'%s\'>%s</a>' % (filename, self.iwla._(u'All Browsers'))
title = '%s - %s' % (title, link)
# Top in index
index = self.iwla.getDisplayIndex()
table = display.createBlock(DisplayHTMLBlockTable, title, ['', self.iwla._(u'Browser'), self.iwla._(u'Entrance')])
table.setColsCSSClass(['', '', 'iwla_hit'])
for (browser, entrance) in browsers[:10]:
if browser != 'unknown':
try:
icon = '<img src="/%s/browser/%s.png"/>' % (self.icon_path, awstats_data.browsers_icons[self.icon_names[browser]])
except:
icon = '<img src="/%s/browser/unknown.png"/>' % (self.icon_path)
else:
icon = '<img src="/%s/browser/unknown.png"/>' % (self.icon_path)
browser = self.iwla._(u'Unknown')
table.appendRow([icon, browser, entrance])
total_browsers[2] -= entrance
if total_browsers[2]:
total_browsers[0] = u''
total_browsers[1] = self.iwla._(u'Others')
table.appendRow(total_browsers)
table.setCellCSSClass(table.getNbRows()-1, 0, 'iwla_others')
table.computeRatio(2)
index.appendBlock(table)

106
plugins/display/feeds.py Normal file
View File

@ -0,0 +1,106 @@
# -*- coding: utf-8 -*-
#
# Copyright Grégory Soutadé 2015
# This file is part of iwla
# iwla is free software: you can redistribute it and/or modify
# it under the terms of the GNU General Public License as published by
# the Free Software Foundation, either version 3 of the License, or
# (at your option) any later version.
#
# iwla is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
# GNU General Public License for more details.
#
# You should have received a copy of the GNU General Public License
# along with iwla. If not, see <http://www.gnu.org/licenses/>.
#
from iwla import IWLA
from iplugin import IPlugin
from display import *
"""
Display hook
Display feeds parsers
Plugin requirements :
post_analysis/feeds
Conf values needed :
create_all_feeds_page*
Output files :
OUTPUT_ROOT/year/month/index.html
OUTPUT_ROOT/year/month/all_feeds.html
Statistics creation :
None
Statistics update :
None
Statistics deletion :
None
"""
class IWLADisplayFeeds(IPlugin):
def __init__(self, iwla):
super(IWLADisplayFeeds, self).__init__(iwla)
self.API_VERSION = 1
self.requires = ['IWLAPostAnalysisFeeds']
def load(self):
self.create_all_feeds_page = self.iwla.getConfValue('create_all_feeds_page', True)
return True
def hook(self):
from plugins.post_analysis.feeds import IWLAPostAnalysisFeeds
display = self.iwla.getDisplay()
hits = self.iwla.getCurrentVisists()
nb_feeds_parsers = 0
# All in a page
if self.create_all_feeds_page:
title = createCurTitle(self.iwla, self.iwla._(u'All Feeds parsers'))
filename = 'all_feeds.html'
path = self.iwla.getCurDisplayPath(filename)
display_visitor_ip = self.iwla.getConfValue('display_visitor_ip', False)
page = display.createPage(title, path, self.iwla.getConfValue('css_path', []))
table = display.createBlock(DisplayHTMLBlockTable, self.iwla._(u'All feeds parsers'), [self.iwla._(u'Host'), self.iwla._(u'Pages'), self.iwla._(u'Hits')])
table.setColsCSSClass(['', 'iwla_page', 'iwla_hit'])
for super_hit in hits.values():
if not super_hit['feed_parser']: continue
nb_feeds_parsers += 1
address = super_hit['remote_addr']
if display_visitor_ip and\
super_hit.get('dns_name_replaced', False):
address = '%s [%s]' % (address, super_hit['remote_ip'])
if super_hit['feed_parser'] == IWLAPostAnalysisFeeds.MERGED_FEED_PARSER:
address += '*'
if super_hit['robot']:
table.appendRow([address, super_hit['not_viewed_pages'], super_hit['not_viewed_hits']])
else:
table.appendRow([address, super_hit['viewed_pages'], super_hit['viewed_hits']])
page.appendBlock(table)
note = DisplayHTMLRaw(self.iwla, ('<small>*%s</small>' % (self.iwla._(u'Merged feeds parsers'))))
page.appendBlock(note)
display.addPage(page)
# Found in index
title = self.iwla._(u'Feeds parsers')
if self.create_all_feeds_page:
link = '<a href=\'%s\'>%s</a>' % (filename, self.iwla._(u'Details'))
title = '%s - %s' % (title, link)
index = self.iwla.getDisplayIndex()
table = display.createBlock(DisplayHTMLBlockTable, title, [self.iwla._(u'Found')])
table.appendRow([nb_feeds_parsers])
index.appendBlock(table)

View File

@ -0,0 +1,88 @@
# -*- coding: utf-8 -*-
#
# Copyright Grégory Soutadé 2015
# This file is part of iwla
# iwla is free software: you can redistribute it and/or modify
# it under the terms of the GNU General Public License as published by
# the Free Software Foundation, either version 3 of the License, or
# (at your option) any later version.
#
# iwla is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
# GNU General Public License for more details.
#
# You should have received a copy of the GNU General Public License
# along with iwla. If not, see <http://www.gnu.org/licenses/>.
#
from iwla import IWLA
from iplugin import IPlugin
from display import *
"""
Display hook
Display statistics by hour/week day
Plugin requirements :
post_analysis/hours_stats
Conf values needed :
None
Output files :
OUTPUT_ROOT/year/month/index.html
Statistics creation :
None
Statistics update :
None
Statistics deletion :
None
"""
class IWLADisplayHoursStats(IPlugin):
def __init__(self, iwla):
super(IWLADisplayHoursStats, self).__init__(iwla)
self.API_VERSION = 1
self.requires = ['IWLAPostAnalysisHoursStats']
def hook(self):
display = self.iwla.getDisplay()
month_stats = self.iwla.getMonthStats()
hours_stats = month_stats.get('hours_stats', {})
if not hours_stats:
for i in range(0, 24):
hours_stats[i] = {'pages':0, 'hits':0, 'bandwidth':0}
days_stats = month_stats.get('days_stats', {})
if not days_stats:
for i in range(0, 7):
days_stats[i] = {'pages':0, 'hits':0, 'bandwidth':0}
index = self.iwla.getDisplayIndex()
# By Day
title = self.iwla._(u'By day')
days = [self.iwla._('Mon'), self.iwla._('Tue'), self.iwla._('Wed'), self.iwla._('Thu'), self.iwla._('Fri'), self.iwla._('Sat'), self.iwla._('Sun')]
table = display.createBlock(DisplayHTMLBlockTableWithGraph, title, [self.iwla._('Day'), self.iwla._('Pages'), self.iwla._('Hits'), self.iwla._('Bandwidth')], days, 7, range(1,4))
table.setColsCSSClass(['', 'iwla_page', 'iwla_hit', 'iwla_bandwidth'])
for i in range(0,7):
table.appendRow([days[i], days_stats[i]['pages'], days_stats[i]['hits'], days_stats[i]['bandwidth']])
table.setCellValue(i, 3, bytesToStr(days_stats[i]['bandwidth']))
index.appendBlock(table)
# By Hours
title = self.iwla._(u'By Hours')
hours = ['%02d' % i for i in range(0, 24)]
table = display.createBlock(DisplayHTMLBlockTableWithGraph, title, [self.iwla._('Hours'), self.iwla._('Pages'), self.iwla._('Hits'), self.iwla._('Bandwidth')], hours, 24, range(1,4))
table.setColsCSSClass(['', 'iwla_page', 'iwla_hit', 'iwla_bandwidth'])
for i in range(0,24):
table.appendRow([hours[i], hours_stats[i]['pages'], hours_stats[i]['hits'], hours_stats[i]['bandwidth']])
table.setCellValue(i, 3, bytesToStr(hours_stats[i]['bandwidth']))
index.appendBlock(table)

View File

@ -24,7 +24,7 @@ from display import *
import logging
"""
Display hook itnerface
Display hook interface
Enlight new and updated statistics
@ -73,7 +73,9 @@ class IWLADisplayStatsDiff(IPlugin):
path = self.iwla.getCurDisplayPath(self.filename)
page = display.getPage(path)
if not page: return
if not page:
self.logger.error('No page for %s' % (path))
return
title = self.iwla._(self.block_name)
block = page.getBlock(title)
if not block:
@ -94,5 +96,6 @@ class IWLADisplayStatsDiff(IPlugin):
stats_diff[k] = 'iwla_new'
for (idx, row) in enumerate(block.rows):
if row[0] in stats_diff.keys():
block.setCellCSSClass(idx, 0, stats_diff[row[0]])
for k in stats_diff.keys():
if k in row[0]:
block.setCellCSSClass(idx, 0, stats_diff[k])

View File

@ -0,0 +1,101 @@
# -*- coding: utf-8 -*-
#
# Copyright Grégory Soutadé 2015
# This file is part of iwla
# iwla is free software: you can redistribute it and/or modify
# it under the terms of the GNU General Public License as published by
# the Free Software Foundation, either version 3 of the License, or
# (at your option) any later version.
#
# iwla is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
# GNU General Public License for more details.
#
# You should have received a copy of the GNU General Public License
# along with iwla. If not, see <http://www.gnu.org/licenses/>.
#
from iwla import IWLA
from iplugin import IPlugin
from display import *
import awstats_data
"""
Display hook
Add operating systems statistics
Plugin requirements :
post_analysis/operating_systems
Conf values needed :
create_families_page*
Output files :
OUTPUT_ROOT/year/month/index.html
Statistics creation :
None
Statistics update :
None
Statistics deletion :
None
"""
class IWLADisplayTopOperatingSystems(IPlugin):
def __init__(self, iwla):
super(IWLADisplayTopOperatingSystems, self).__init__(iwla)
self.API_VERSION = 1
self.requires = ['IWLAPostAnalysisOperatingSystems']
def load(self):
self.icon_path = self.iwla.getConfValue('icon_path', '/')
self.create_families_page = self.iwla.getConfValue('create_families_page_page', True)
self.icon_names = {v:k for (k, v) in awstats_data.operating_systems_family.items()}
return True
def hook(self):
display = self.iwla.getDisplay()
os_families = self.iwla.getMonthStats()['os_families']
os_families = sorted(os_families.items(), key=lambda t: t[1], reverse=True)
operating_systems = self.iwla.getMonthStats()['operating_systems']
operating_systems = sorted(operating_systems.items(), key=lambda t: t[1], reverse=True)
# All in a page
if self.create_families_page:
title = createCurTitle(self.iwla, u'All Operating Systems')
filename = 'operating_systems.html'
path = self.iwla.getCurDisplayPath(filename)
page = display.createPage(title, path, self.iwla.getConfValue('css_path', []))
table = display.createBlock(DisplayHTMLBlockTable, self.iwla._(u'Operating Systems'), ['', self.iwla._(u'Operating System'), self.iwla._(u'Entrance')])
table.setColsCSSClass(['', '', 'iwla_hit'])
for (os_name, entrance) in operating_systems:
icon = '<img src="/%s/os/%s.png"/>' % (self.icon_path, os_name)
table.appendRow([icon, os_name, entrance])
page.appendBlock(table)
display.addPage(page)
# Families in index
title = self.iwla._(u'Operating Systems')
if self.create_families_page:
link = '<a href=\'%s\'>%s</a>' % (filename, self.iwla._(u'Details'))
title = '%s - %s' % (title, link)
index = self.iwla.getDisplayIndex()
table = display.createBlock(DisplayHTMLBlockTable, title, ['', self.iwla._(u'Operating System'), self.iwla._(u'Entrance')])
table.setColsCSSClass(['', '', 'iwla_hit'])
for (family, entrance) in os_families:
icon = '<img src="/%s/os/%s.png"/>' % (self.icon_path, self.icon_names[family])
table.appendRow([icon, family, entrance])
table.computeRatio(2)
index.appendBlock(table)

View File

@ -190,14 +190,14 @@ class IWLADisplayReferers(IPlugin):
# All key phrases in a file
if self.create_all_key_phrases:
title = createCurTitle(self.iwla, u'Key Phrases')
title = createCurTitle(self.iwla, self.iwla._(u'All Key Phrases'))
filename = 'key_phrases.html'
path = self.iwla.getCurDisplayPath(filename)
total_search = [0]*2
page = display.createPage(title, path, self.iwla.getConfValue('css_path', []))
table = display.createBlock(DisplayHTMLBlockTable, self.iwla._(u'Top key phrases'), [self.iwla._(u'Key phrase'), self.iwla._(u'Search')])
table = display.createBlock(DisplayHTMLBlockTable, self.iwla._(u'Key phrases'), [self.iwla._(u'Key phrase'), self.iwla._(u'Search')])
table.setColsCSSClass(['', 'iwla_search'])
new_list = self.max_key_phrases and top_key_phrases[:self.max_key_phrases] or top_key_phrases
for phrase in new_list:

View File

@ -53,7 +53,7 @@ class IWLADisplayReferersDiff(IWLADisplayStatsDiff):
self.requires = ['IWLADisplayReferers']
self.month_stats_key = 'key_phrases'
self.filename = 'key_phrases.html'
self.block_name = u'Key phrases'
self.block_name = self.iwla._(u'Key phrases')
def load(self):
if not self.iwla.getConfValue('create_all_key_phrases_page', True):

View File

@ -0,0 +1,61 @@
# -*- coding: utf-8 -*-
#
# Copyright Grégory Soutadé 2015
# This file is part of iwla
# iwla is free software: you can redistribute it and/or modify
# it under the terms of the GNU General Public License as published by
# the Free Software Foundation, either version 3 of the License, or
# (at your option) any later version.
#
# iwla is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
# GNU General Public License for more details.
#
# You should have received a copy of the GNU General Public License
# along with iwla. If not, see <http://www.gnu.org/licenses/>.
#
from iwla import IWLA
from istats_diff import IWLADisplayStatsDiff
from display import *
"""
Display hook
Enlight new and updated downloads in in top_downloads.html
Plugin requirements :
display/top_downloads
Conf values needed :
None
Output files :
None
Statistics creation :
None
Statistics update :
None
Statistics deletion :
None
"""
class IWLADisplayTopDownloadsDiff(IWLADisplayStatsDiff):
def __init__(self, iwla):
super(IWLADisplayTopDownloadsDiff, self).__init__(iwla)
self.API_VERSION = 1
self.requires = ['IWLADisplayTopDownloads']
self.month_stats_key = u'top_downloads'
self.filename = u'top_downloads.html'
self.block_name = self.iwla._(u'All Downloads')
def load(self):
if not self.iwla.getConfValue('create_all_downloads_page', True):
return False
return super(IWLADisplayTopDownloadsDiff, self).load()

View File

@ -0,0 +1,122 @@
# -*- coding: utf-8 -*-
#
# Copyright Grégory Soutadé 2015
# This file is part of iwla
# iwla is free software: you can redistribute it and/or modify
# it under the terms of the GNU General Public License as published by
# the Free Software Foundation, either version 3 of the License, or
# (at your option) any later version.
#
# iwla is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
# GNU General Public License for more details.
#
# You should have received a copy of the GNU General Public License
# along with iwla. If not, see <http://www.gnu.org/licenses/>.
#
from iwla import IWLA
from iplugin import IPlugin
from display import *
import awstats_data
"""
Display hook
Track users
Plugin requirements :
None
Conf values needed :
tracked_ip
create_tracked_page*
Output files :
OUTPUT_ROOT/year/month/index.html
OUTPUT_ROOT/year/month/tracked_users.html
Statistics creation :
None
Statistics update :
None
Statistics deletion :
None
"""
class IWLADisplayTrackUsers(IPlugin):
def __init__(self, iwla):
super(IWLADisplayTrackUsers, self).__init__(iwla)
self.API_VERSION = 1
self.conf_requires = ['tracked_ip']
def load(self):
self.create_tracked_page = self.iwla.getConfValue('create_tracked_page', True)
self.tracked_ip = self.iwla.getConfValue('tracked_ip', [])
return True
def hook(self):
display = self.iwla.getDisplay()
hits = self.iwla.getCurrentVisists()
stats = {}
# All in a page
if self.create_tracked_page:
title = createCurTitle(self.iwla, u'Tracked users')
filename = 'tracked_users.html'
path = self.iwla.getCurDisplayPath(filename)
page = display.createPage(title, path, self.iwla.getConfValue('css_path', []))
table = display.createBlock(DisplayHTMLBlockTable, self.iwla._(u'Tracked users'), [self.iwla._(u'Pages'), self.iwla._(u'Last Access')])
table.setColsCSSClass(['iwla_page', ''])
for ip in self.tracked_ip:
if not ip in hits.keys(): continue
if 'dns_name_replaced' in hits[ip].keys():
ip_title = '<b>%s [%s]</b>' % (hits[ip]['remote_addr'], ip)
else:
ip_title = '<b>%s</b>' % (ip)
table.appendRow([ip_title, ''])
nb_hits = 0
nb_pages = 0
for r in hits[ip]['requests'][::-1]:
uri = r['extract_request']['extract_uri'].lower()
if not self.iwla.hasBeenViewed(r): continue
if not self.iwla.isPage(uri) or\
self.iwla.isMultimediaFile(uri):
nb_hits += 1
continue
nb_pages += 1
uri = "%s%s" % (r.get('server_name', ''),
r['extract_request']['extract_uri'])
table.appendRow([generateHTMLLink(uri), time.asctime(r['time_decoded'])])
stats[ip] = (nb_pages, nb_hits)
page.appendBlock(table)
display.addPage(page)
# Last access in index
title = self.iwla._(u'Tracked users')
if self.create_tracked_page:
link = '<a href=\'%s\'>%s</a>' % (filename, self.iwla._(u'Details'))
title = '%s - %s' % (title, link)
index = self.iwla.getDisplayIndex()
table = display.createBlock(DisplayHTMLBlockTable, title, [self.iwla._(u'IP'), self.iwla._(u'Last Access'), self.iwla._(u'Pages'), self.iwla._(u'Hits')])
table.setColsCSSClass(['', '', 'iwla_page', 'iwla_hit'])
for ip in self.tracked_ip:
if not ip in hits.keys(): continue
if 'dns_name_replaced' in hits[ip].keys():
ip_title = '%s [%s]' % (hits[ip]['remote_addr'], ip)
else:
ip_title = ip
table.appendRow([ip_title, time.asctime(hits[ip]['last_access']), stats[ip][0], stats[ip][1]])
index.appendBlock(table)

View File

@ -0,0 +1,103 @@
# -*- coding: utf-8 -*-
#
# Copyright Grégory Soutadé 2015
# This file is part of iwla
# iwla is free software: you can redistribute it and/or modify
# it under the terms of the GNU General Public License as published by
# the Free Software Foundation, either version 3 of the License, or
# (at your option) any later version.
#
# iwla is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
# GNU General Public License for more details.
#
# You should have received a copy of the GNU General Public License
# along with iwla. If not, see <http://www.gnu.org/licenses/>.
#
import re
from iwla import IWLA
from iplugin import IPlugin
import awstats_data
"""
Post analysis hook
Detect browser information from requests
Plugin requirements :
None
Conf values needed :
None
Output files :
None
Statistics creation :
visits :
remote_addr =>
browser
month_stats :
browsers =>
browser => count
Statistics update :
None
Statistics deletion :
None
"""
class IWLAPostAnalysisBrowsers(IPlugin):
def __init__(self, iwla):
super(IWLAPostAnalysisBrowsers, self).__init__(iwla)
self.API_VERSION = 1
def load(self):
self.browsers = []
for hashid in awstats_data.browsers:
hashid_re = re.compile(r'.*%s.*' % (hashid), re.IGNORECASE)
if hashid in awstats_data.browsers_hashid.keys():
self.browsers.append((hashid_re, awstats_data.browsers_hashid[hashid]))
return True
def hook(self):
stats = self.iwla.getValidVisitors()
month_stats = self.iwla.getMonthStats()
browsers = month_stats.get('browsers', {})
browsers_stats = {}
for (k, super_hit) in stats.items():
if not 'browser' in super_hit:
for r in super_hit['requests'][::-1]:
user_agent = r['http_user_agent']
if not user_agent: continue
browser_name = 'unknown'
for (hashid_re, browser) in self.browsers:
if hashid_re.match(user_agent):
browser_name = browser
break
super_hit['browser'] = browser_name
break
else:
browser_name = super_hit['browser']
if not browser_name in browsers_stats.keys():
browsers_stats[browser_name] = 1
else:
browsers_stats[browser_name] += 1
month_stats['browsers'] = browsers_stats

View File

@ -0,0 +1,112 @@
# -*- coding: utf-8 -*-
#
# Copyright Grégory Soutadé 2015
# This file is part of iwla
# iwla is free software: you can redistribute it and/or modify
# it under the terms of the GNU General Public License as published by
# the Free Software Foundation, either version 3 of the License, or
# (at your option) any later version.
#
# iwla is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
# GNU General Public License for more details.
#
# You should have received a copy of the GNU General Public License
# along with iwla. If not, see <http://www.gnu.org/licenses/>.
#
import re
from iwla import IWLA
from iplugin import IPlugin
"""
Post analysis hook
Find feeds parsers (first hit in feeds conf value and no viewed pages if it's a robot)
If there is ony one hit per day to a feed, merge feeds parsers with the same user agent
as it must be the same person with a different IP address.
Plugin requirements :
None
Conf values needed :
feeds
merge_one_hit_only_feeds_parsers*
Output files :
None
Statistics creation :
remote_addr =>
feed_parser
Statistics update :
None
Statistics deletion :
None
"""
class IWLAPostAnalysisFeeds(IPlugin):
NOT_A_FEED_PARSER = 0
FEED_PARSER = 1
MERGED_FEED_PARSER = 2
def __init__(self, iwla):
super(IWLAPostAnalysisFeeds, self).__init__(iwla)
self.API_VERSION = 1
self.conf_requires = ['feeds']
def load(self):
feeds = self.iwla.getConfValue('feeds', None)
self.merge_one_hit_only_feeds_parsers = self.iwla.getConfValue('merge_one_hit_only_feeds_parsers', True)
if feeds is None: return False
self.feeds_re = []
for f in feeds:
self.feeds_re.append(re.compile(r'.*%s.*' % (f)))
return True
def mergeOneHitOnlyFeedsParsers(self, isFeedParser, one_hit_only, hit):
if isFeedParser and (hit['viewed_hits'] + hit['not_viewed_hits']) == 1:
user_agent = hit['requests'][0]['http_user_agent'].lower()
if one_hit_only.get(user_agent, None) is None:
# Merged
isFeedParser = self.MERGED_FEED_PARSER
one_hit_only[user_agent] = (hit)
else:
isFeedParser = self.NOT_A_FEED_PARSER
hit['feed_parser'] = isFeedParser
def hook(self):
hits = self.iwla.getCurrentVisists()
one_hit_only = {}
for hit in hits.values():
isFeedParser = hit.get('feed_parser', None)
if isFeedParser == self.FEED_PARSER and\
self.merge_one_hit_only_feeds_parsers:
self.mergeOneHitOnlyFeedsParsers(isFeedParser, one_hit_only, hit)
if not isFeedParser is None: continue
isFeedParser = self.NOT_A_FEED_PARSER
uri = hit['requests'][0]['extract_request']['extract_uri'].lower()
for regexp in self.feeds_re:
if regexp.match(uri):
isFeedParser = self.FEED_PARSER
# Robot that views pages -> bot
if hit['robot']:
if hit['viewed_pages']:
isFeedParser = self.NOT_A_FEED_PARSER
break
if self.merge_one_hit_only_feeds_parsers:
self.mergeOneHitOnlyFeedsParsers(isFeedParser, one_hit_only, hit)
else:
hit['feed_parser'] = isFeedParser

View File

@ -0,0 +1,96 @@
# -*- coding: utf-8 -*-
#
# Copyright Grégory Soutadé 2015
# This file is part of iwla
# iwla is free software: you can redistribute it and/or modify
# it under the terms of the GNU General Public License as published by
# the Free Software Foundation, either version 3 of the License, or
# (at your option) any later version.
#
# iwla is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
# GNU General Public License for more details.
#
# You should have received a copy of the GNU General Public License
# along with iwla. If not, see <http://www.gnu.org/licenses/>.
#
from iwla import IWLA
from iplugin import IPlugin
"""
Post analysis hook
Count pages, hits and bandwidth by hour/week day
Plugin requirements :
None
Conf values needed :
None
Output files :
None
Statistics creation :
month_stats:
hours_stats =>
00 .. 23 =>
pages
hits
bandwidth
days_stats =>
0 .. 6 =>
pages
hits
bandwidth
Statistics update :
None
Statistics deletion :
None
"""
class IWLAPostAnalysisHoursStats(IPlugin):
def __init__(self, iwla):
super(IWLAPostAnalysisHoursStats, self).__init__(iwla)
self.API_VERSION = 1
def hook(self):
stats = self.iwla.getCurrentVisists()
month_stats = self.iwla.getMonthStats()
hours_stats = month_stats.get('hours_stats', {})
if not hours_stats:
for i in range(0, 24):
hours_stats[i] = {'pages':0, 'hits':0, 'bandwidth':0}
days_stats = month_stats.get('days_stats', {})
if not days_stats:
for i in range(0, 7):
days_stats[i] = {'pages':0, 'hits':0, 'bandwidth':0}
for super_hit in stats.values():
if super_hit['robot']: continue
for r in super_hit['requests'][::-1]:
if not self.iwla.isValidForCurrentAnalysis(r):
break
if not self.iwla.hasBeenViewed(r): continue
key = r['is_page'] and 'pages' or 'hits'
t = r['time_decoded']
hours_stats[t.tm_hour][key] += 1
hours_stats[t.tm_hour]['bandwidth'] += int(r['body_bytes_sent'])
days_stats[t.tm_wday][key] += 1
days_stats[t.tm_wday]['bandwidth'] += int(r['body_bytes_sent'])
month_stats['hours_stats'] = hours_stats
month_stats['days_stats'] = days_stats

View File

@ -0,0 +1,126 @@
# -*- coding: utf-8 -*-
#
# Copyright Grégory Soutadé 2015
# This file is part of iwla
# iwla is free software: you can redistribute it and/or modify
# it under the terms of the GNU General Public License as published by
# the Free Software Foundation, either version 3 of the License, or
# (at your option) any later version.
#
# iwla is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
# GNU General Public License for more details.
#
# You should have received a copy of the GNU General Public License
# along with iwla. If not, see <http://www.gnu.org/licenses/>.
#
import re
from iwla import IWLA
from iplugin import IPlugin
import awstats_data
"""
Post analysis hook
Detect operating systems from requests
Plugin requirements :
None
Conf values needed :
None
Output files :
None
Statistics creation :
visits :
remote_addr =>
operating_system
month_stats :
operating_systems =>
operating_system => count
os_families =>
family => count
Statistics update :
None
Statistics deletion :
None
"""
class IWLAPostAnalysisOperatingSystems(IPlugin):
def __init__(self, iwla):
super(IWLAPostAnalysisOperatingSystems, self).__init__(iwla)
self.API_VERSION = 1
def load(self):
self.operating_systems = []
self.os_family = {}
for hashid in awstats_data.operating_systems:
hashid_re = re.compile(r'.*%s.*' % (hashid), re.IGNORECASE)
if hashid in awstats_data.operating_systems_hashid.keys():
self.operating_systems.append((hashid_re, awstats_data.operating_systems_hashid[hashid]))
for (name, family) in awstats_data.operating_systems_family.items():
name_re = re.compile(r'.*%s.*' % (name))
self.os_family[name_re] = family
return True
def hook(self):
stats = self.iwla.getValidVisitors()
month_stats = self.iwla.getMonthStats()
operating_systems = month_stats.get('operating_systems', {})
os_stats = {}
family_stats = {}
for (k, super_hit) in stats.items():
if not 'operating_system' in super_hit:
for r in super_hit['requests'][::-1]:
user_agent = r['http_user_agent']
if not user_agent: continue
os_name = 'unknown'
for (hashid_re, operating_system) in self.operating_systems:
if hashid_re.match(user_agent):
os_name = operating_system
break
super_hit['operating_system'] = os_name
break
else:
os_name = super_hit['operating_system']
os_family = ''
if os_name != 'unknown':
for (name_re, family) in self.os_family.items():
if name_re.match(os_name):
os_family = family
break
if not os_name in os_stats.keys():
os_stats[os_name] = 1
else:
os_stats[os_name] += 1
if os_family:
if not os_family in family_stats.keys():
family_stats[os_family] = 1
else:
family_stats[os_family] += 1
month_stats['operating_systems'] = os_stats
month_stats['os_families'] = family_stats

View File

@ -46,16 +46,16 @@ Statistics creation :
Statistics update :
month_stats :
referers =>
pages
hits
pages => count
hits => count
robots_referers =>
pages
hits
pages => count
hits => count
search_engine_referers =>
pages
hits
pages => count
hits => count
key_phrases =>
phrase
phrase => count
Statistics deletion :
None

View File

@ -18,8 +18,6 @@
# along with iwla. If not, see <http://www.gnu.org/licenses/>.
#
import re
from iwla import IWLA
from iplugin import IPlugin
@ -43,7 +41,7 @@ Statistics creation :
Statistics update :
month_stats:
top_downloads =>
uri
uri => count
Statistics deletion :
None
@ -53,19 +51,14 @@ class IWLAPostAnalysisTopDownloads(IPlugin):
def __init__(self, iwla):
super(IWLAPostAnalysisTopDownloads, self).__init__(iwla)
self.API_VERSION = 1
self.conf_requires = ['multimedia_files', 'viewed_http_codes']
def hook(self):
stats = self.iwla.getCurrentVisists()
stats = self.iwla.getValidVisitors()
month_stats = self.iwla.getMonthStats()
multimedia_files = self.iwla.getConfValue('multimedia_files')
viewed_http_codes = self.iwla.getConfValue('viewed_http_codes')
top_downloads = month_stats.get('top_downloads', {})
for (k, super_hit) in stats.items():
if super_hit['robot']: continue
for r in super_hit['requests'][::-1]:
if not self.iwla.isValidForCurrentAnalysis(r):
break
@ -75,13 +68,8 @@ class IWLAPostAnalysisTopDownloads(IPlugin):
uri = r['extract_request']['extract_uri'].lower()
isMultimedia = False
for ext in multimedia_files:
if uri.endswith(ext):
isMultimedia = True
break
if isMultimedia: continue
if self.iwla.isMultimediaFile(uri):
continue
uri = "%s%s" % (r.get('server_name', ''),
r['extract_request']['extract_uri'])

View File

@ -41,7 +41,7 @@ Statistics creation :
Statistics update :
month_stats:
top_hits =>
uri
uri => count
Statistics deletion :
None

View File

@ -43,7 +43,7 @@ Statistics creation :
Statistics update :
month_stats:
top_pages =>
uri
uri => count
Statistics deletion :
None

View File

@ -19,6 +19,7 @@
#
import re
import logging
from iwla import IWLA
from iplugin import IPlugin
@ -58,14 +59,13 @@ class IWLAPreAnalysisPageToHit(IPlugin):
def load(self):
# Page to hit
self.ph_regexps = self.iwla.getConfValue('page_to_hit_conf', [])
if not self.ph_regexps: return False
self.ph_regexps = map(lambda(r): re.compile(r), self.ph_regexps)
# Hit to page
self.hp_regexps = self.iwla.getConfValue('hit_to_page_conf', [])
if not self.hp_regexps: return False
self.hp_regexps = map(lambda(r): re.compile(r), self.hp_regexps)
self.logger = logging.getLogger(self.__class__.__name__)
return True
def hook(self):
@ -87,7 +87,7 @@ class IWLAPreAnalysisPageToHit(IPlugin):
# Page to hit
for regexp in self.ph_regexps:
if regexp.match(uri):
#print '%s is a hit' % (uri )
self.logger.debug('%s changed from page to hit' % (uri))
request['is_page'] = False
super_hit['viewed_pages'] -= 1
super_hit['viewed_hits'] += 1
@ -96,7 +96,7 @@ class IWLAPreAnalysisPageToHit(IPlugin):
# Hit to page
for regexp in self.hp_regexps:
if regexp.match(uri):
#print '%s is a page' % (uri )
self.logger.debug('%s changed from hit to page' % (uri))
request['is_page'] = True
super_hit['viewed_pages'] += 1
super_hit['viewed_hits'] -= 1

View File

@ -19,6 +19,8 @@
#
import re
import logging
import inspect
from iwla import IWLA
from iplugin import IPlugin
@ -59,20 +61,37 @@ class IWLAPreAnalysisRobots(IPlugin):
def load(self):
self.awstats_robots = map(lambda (x) : re.compile(('.*%s.*') % (x), re.IGNORECASE), awstats_data.robots)
self.robot_re = re.compile(r'.*bot.*', re.IGNORECASE)
self.crawl_re = re.compile(r'.*crawl.*', re.IGNORECASE)
self.logger = logging.getLogger(self.__class__.__name__)
return True
def _setRobot(self, k, super_hit):
callerframerecord = inspect.stack()[1]
frame = callerframerecord[0]
info = inspect.getframeinfo(frame)
self.logger.debug('%s is a robot (caller %s:%d)' % (k, info.function, info.lineno))
super_hit['robot'] = 1
# Basic rule to detect robots
def hook(self):
hits = self.iwla.getCurrentVisists()
for (k, super_hit) in hits.items():
if super_hit['robot']: continue
if super_hit['robot']:
self.logger.debug('%s is a robot' % (k))
continue
isRobot = False
referers = 0
first_page = super_hit['requests'][0]
if not self.iwla.isValidForCurrentAnalysis(first_page): continue
if self.robot_re.match(first_page['http_user_agent']) or\
self.crawl_re.match(first_page['http_user_agent']):
self.logger.debug(first_page['http_user_agent'])
self._setRobot(k, super_hit)
continue
for r in self.awstats_robots:
if r.match(first_page['http_user_agent']):
@ -80,7 +99,8 @@ class IWLAPreAnalysisRobots(IPlugin):
break
if isRobot:
super_hit['robot'] = 1
self.logger.debug(first_page['http_user_agent'])
self._setRobot(k, super_hit)
continue
# 1) no pages view --> robot
@ -90,13 +110,14 @@ class IWLAPreAnalysisRobots(IPlugin):
# 2) pages without hit --> robot
if not super_hit['viewed_hits']:
super_hit['robot'] = 1
self.logger.debug(super_hit)
self._setRobot(k, super_hit)
continue
for hit in super_hit['requests']:
# 3) /robots.txt read
if hit['extract_request']['http_uri'] == '/robots.txt':
isRobot = True
if hit['extract_request']['http_uri'].endswith('/robots.txt'):
self._setRobot(k, super_hit)
break
# 4) Any referer for hits
@ -104,10 +125,10 @@ class IWLAPreAnalysisRobots(IPlugin):
referers += 1
if isRobot:
super_hit['robot'] = 1
self._setRobot(k, super_hit)
continue
if not super_hit['viewed_pages'] and \
(super_hit['viewed_hits'] and not referers):
super_hit['robot'] = 1
self._setRobot(k, super_hit)
continue

View File

@ -69,6 +69,9 @@ td:first-child
.iwla_weekend { background : #ECECEC; }
.iwla_curday { font-weight: bold; }
.iwla_others { color: #668; }
.iwla_update { background : orange; }
.iwla_new { background : green }
.iwla_graph_table
{
margin-left:auto;
@ -85,3 +88,5 @@ table.iwla_graph_table td
{
text-align:center;
}
iframe {outline:none; border:0px; width:100%; height:500px; display:block;}

Binary file not shown.

After

Width:  |  Height:  |  Size: 557 B

Binary file not shown.

After

Width:  |  Height:  |  Size: 340 B

Binary file not shown.

After

Width:  |  Height:  |  Size: 554 B

Binary file not shown.

After

Width:  |  Height:  |  Size: 351 B

Binary file not shown.

After

Width:  |  Height:  |  Size: 316 B

Binary file not shown.

After

Width:  |  Height:  |  Size: 288 B

Binary file not shown.

After

Width:  |  Height:  |  Size: 655 B

Binary file not shown.

After

Width:  |  Height:  |  Size: 230 B

Binary file not shown.

After

Width:  |  Height:  |  Size: 315 B

Binary file not shown.

After

Width:  |  Height:  |  Size: 770 B

Binary file not shown.

After

Width:  |  Height:  |  Size: 607 B

Binary file not shown.

After

Width:  |  Height:  |  Size: 714 B

Binary file not shown.

After

Width:  |  Height:  |  Size: 505 B

Binary file not shown.

After

Width:  |  Height:  |  Size: 288 B

Binary file not shown.

After

Width:  |  Height:  |  Size: 760 B

Binary file not shown.

After

Width:  |  Height:  |  Size: 689 B

Binary file not shown.

After

Width:  |  Height:  |  Size: 265 B

Binary file not shown.

After

Width:  |  Height:  |  Size: 768 B

Binary file not shown.

After

Width:  |  Height:  |  Size: 724 B

Binary file not shown.

After

Width:  |  Height:  |  Size: 303 B

Binary file not shown.

After

Width:  |  Height:  |  Size: 707 B

Binary file not shown.

After

Width:  |  Height:  |  Size: 757 B

Binary file not shown.

After

Width:  |  Height:  |  Size: 631 B

Binary file not shown.

After

Width:  |  Height:  |  Size: 1.0 KiB

Binary file not shown.

After

Width:  |  Height:  |  Size: 677 B

Binary file not shown.

After

Width:  |  Height:  |  Size: 552 B

Binary file not shown.

After

Width:  |  Height:  |  Size: 824 B

Binary file not shown.

After

Width:  |  Height:  |  Size: 488 B

Binary file not shown.

After

Width:  |  Height:  |  Size: 420 B

Binary file not shown.

After

Width:  |  Height:  |  Size: 420 B

Binary file not shown.

After

Width:  |  Height:  |  Size: 708 B

Binary file not shown.

After

Width:  |  Height:  |  Size: 623 B

Binary file not shown.

After

Width:  |  Height:  |  Size: 662 B

Binary file not shown.

After

Width:  |  Height:  |  Size: 288 B

Binary file not shown.

After

Width:  |  Height:  |  Size: 278 B

Binary file not shown.

After

Width:  |  Height:  |  Size: 710 B

Binary file not shown.

After

Width:  |  Height:  |  Size: 930 B

Binary file not shown.

After

Width:  |  Height:  |  Size: 269 B

Binary file not shown.

After

Width:  |  Height:  |  Size: 791 B

Binary file not shown.

After

Width:  |  Height:  |  Size: 894 B

Binary file not shown.

After

Width:  |  Height:  |  Size: 269 B

Binary file not shown.

After

Width:  |  Height:  |  Size: 314 B

Binary file not shown.

After

Width:  |  Height:  |  Size: 421 B

Binary file not shown.

After

Width:  |  Height:  |  Size: 419 B

Binary file not shown.

After

Width:  |  Height:  |  Size: 288 B

Binary file not shown.

After

Width:  |  Height:  |  Size: 676 B

Binary file not shown.

After

Width:  |  Height:  |  Size: 288 B

Binary file not shown.

After

Width:  |  Height:  |  Size: 285 B

Binary file not shown.

After

Width:  |  Height:  |  Size: 682 B

Binary file not shown.

After

Width:  |  Height:  |  Size: 541 B

Binary file not shown.

After

Width:  |  Height:  |  Size: 344 B

Binary file not shown.

After

Width:  |  Height:  |  Size: 288 B

Binary file not shown.

After

Width:  |  Height:  |  Size: 662 B

Binary file not shown.

After

Width:  |  Height:  |  Size: 370 B

Binary file not shown.

After

Width:  |  Height:  |  Size: 655 B

Binary file not shown.

After

Width:  |  Height:  |  Size: 289 B

Binary file not shown.

After

Width:  |  Height:  |  Size: 689 B

Binary file not shown.

After

Width:  |  Height:  |  Size: 314 B

Binary file not shown.

After

Width:  |  Height:  |  Size: 475 B

Binary file not shown.

After

Width:  |  Height:  |  Size: 289 B

Binary file not shown.

After

Width:  |  Height:  |  Size: 790 B

Binary file not shown.

After

Width:  |  Height:  |  Size: 328 B

Binary file not shown.

After

Width:  |  Height:  |  Size: 667 B

Binary file not shown.

After

Width:  |  Height:  |  Size: 372 B

Binary file not shown.

After

Width:  |  Height:  |  Size: 208 B

Binary file not shown.

After

Width:  |  Height:  |  Size: 440 B

Binary file not shown.

After

Width:  |  Height:  |  Size: 636 B

Some files were not shown because too many files have changed in this diff Show More