From 954b4972a9980f957156d984dbc45fba6b0b9997 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Gr=C3=A9gory=20Soutad=C3=A9?= Date: Wed, 15 Apr 2020 14:44:11 +0200 Subject: [PATCH] Update ChangeLog and Documentation --- ChangeLog | 17 ++++++++++++++--- docs/index.md | 45 +++++++++++++++++++++++++++++++++++++-------- docs/modules.md | 40 ++++++++++++++++++++++++++++++++++------ iwla.py | 3 ++- 4 files changed, 87 insertions(+), 18 deletions(-) diff --git a/ChangeLog b/ChangeLog index f162a12..40510f5 100644 --- a/ChangeLog +++ b/ChangeLog @@ -1,12 +1,23 @@ -v0.5 (25/05/2017) +v0.5 (15/04/2020) ** User ** Add --dry-run (-D) argument + Add more rules for robot detection : + More than ten 404 pages viewed + No page and no Hit + Pages without hit + New format for (not_)viewed pages/hits and bandwidth that are now recorded by day (in a dictionnary were only element 0 is initialized). Element 0 is the total. WARNING : not backward compatible with previous databases. + Sync data with awstat (develop branch : 7.7+) + Make backup before compressing (low memory servers) + Add top_pages_diff plugin + Add IP exclusion feature ** Dev ** Use cPickle instead of pickle Don't save all robots requests (only first pass is kept) which allow to save a large amount of memory/disk space - Add one more rule to robot detection : more than ten 404 pages viewed + Handle URLs with empty referer + Don't try to find search engine on robots ** Bugs ** - + Fix KeyError : geo attribute can not exists + Call post hook plugins even in display only mode v0.4 (29/01/2017) ** User ** diff --git a/docs/index.md b/docs/index.md index 7772ee7..7b72d1a 100644 --- a/docs/index.md +++ b/docs/index.md @@ -11,7 +11,7 @@ Nevertheless, iwla is only focused on HTTP logs. It uses data (robots definition Usage ----- - ./iwla [-c|--clean-output] [-i|--stdin] [-f FILE|--file FILE] [-d LOGLEVEL|--log-level LOGLEVEL] [-r|--reset year/month] [-z|--dont-compress] [-p] + ./iwla [-c|--clean-output] [-i|--stdin] [-f FILE|--file FILE] [-d LOGLEVEL|--log-level LOGLEVEL] [-r|--reset year/month] [-z|--dont-compress] [-p] [-D|--dry-run] -c : Clean output (database and HTML) before starting -i : Read data from stdin instead of conf.analyzed_filename @@ -20,7 +20,8 @@ Usage -r : Reset analysis to a specific date (month/year) -z : Don't compress databases (bigger but faster, not compatible with compressed databases) -p : Only generate display - + -d : Dry run (don't write/update files to disk) + Basic usage ----------- @@ -113,6 +114,7 @@ Optional configuration values ends with *. * plugins/display/top_downloads_diff.py * plugins/display/top_downloads.py * plugins/display/top_hits.py + * plugins/display/top_pages_diff.py * plugins/display/top_pages.py * plugins/display/top_visitors.py * plugins/display/track_users.py @@ -145,7 +147,8 @@ iwla analyzed_filename domain_name locales_path - compress_output_files* + compress_output_files + excluded_ip Output files : DB_ROOT/meta.db @@ -189,11 +192,11 @@ iwla remote_addr => remote_addr remote_ip - viewed_pages - viewed_hits - not_viewed_pages - not_viewed_hits - bandwidth + viewed_pages{0..31} # 0 contains total + viewed_hits{0..31} # 0 contains total + not_viewed_pages{0..31} + not_viewed_hits{0..31} + bandwidth{0..31} last_access requests => [fields_from_format_log] @@ -573,6 +576,32 @@ plugins.display.top_hits None +plugins.display.top_pages_diff +------------------------------ + + Display hook + + Enlight new and updated pages in in top_pages.html + + Plugin requirements : + display/top_pages + + Conf values needed : + None + + Output files : + None + + Statistics creation : + None + + Statistics update : + None + + Statistics deletion : + None + + plugins.display.top_pages ------------------------- diff --git a/docs/modules.md b/docs/modules.md index 97f32da..ee1dd11 100644 --- a/docs/modules.md +++ b/docs/modules.md @@ -12,6 +12,7 @@ * plugins/display/top_downloads_diff.py * plugins/display/top_downloads.py * plugins/display/top_hits.py + * plugins/display/top_pages_diff.py * plugins/display/top_pages.py * plugins/display/top_visitors.py * plugins/display/track_users.py @@ -44,7 +45,8 @@ iwla analyzed_filename domain_name locales_path - compress_output_files* + compress_output_files + excluded_ip Output files : DB_ROOT/meta.db @@ -88,11 +90,11 @@ iwla remote_addr => remote_addr remote_ip - viewed_pages - viewed_hits - not_viewed_pages - not_viewed_hits - bandwidth + viewed_pages{0..31} # 0 contains total + viewed_hits{0..31} # 0 contains total + not_viewed_pages{0..31} + not_viewed_hits{0..31} + bandwidth{0..31} last_access requests => [fields_from_format_log] @@ -472,6 +474,32 @@ plugins.display.top_hits None +plugins.display.top_pages_diff +------------------------------ + + Display hook + + Enlight new and updated pages in in top_pages.html + + Plugin requirements : + display/top_pages + + Conf values needed : + None + + Output files : + None + + Statistics creation : + None + + Statistics update : + None + + Statistics deletion : + None + + plugins.display.top_pages ------------------------- diff --git a/iwla.py b/iwla.py index 7fe2f46..e865a7f 100755 --- a/iwla.py +++ b/iwla.py @@ -51,7 +51,8 @@ Conf values needed : analyzed_filename domain_name locales_path - compress_output_files* + compress_output_files + excluded_ip Output files : DB_ROOT/meta.db