* iwla.py * plugins/display/all_visits.py * plugins/display/browsers.py * plugins/display/feeds.py * plugins/display/filter_users.py * plugins/display/hours_stats.py * plugins/display/ip_to_geo.py * plugins/display/istats_diff.py * plugins/display/operating_systems.py * plugins/display/referers_diff.py * plugins/display/referers.py * plugins/display/robot_bandwidth.py * plugins/display/top_downloads_diff.py * plugins/display/top_downloads.py * plugins/display/top_hits.py * plugins/display/top_pages_diff.py * plugins/display/top_pages.py * plugins/display/top_visitors.py * plugins/post_analysis/browsers.py * plugins/post_analysis/feeds.py * plugins/post_analysis/filter_users.py * plugins/post_analysis/google_console_api.py * plugins/post_analysis/hours_stats.py * plugins/post_analysis/ip_to_geo.py * plugins/post_analysis/iptogeo.py * plugins/post_analysis/operating_systems.py * plugins/post_analysis/referers.py * plugins/post_analysis/reverse_dns.py * plugins/post_analysis/search_analytics_api_sample.py * plugins/post_analysis/top_downloads.py * plugins/post_analysis/top_hits.py * plugins/post_analysis/top_pages.py * plugins/pre_analysis/page_to_hit.py * plugins/pre_analysis/robots.py iwla ---- Main class IWLA Parse Log, compute them, call plugins and produce output For now, only HTTP log are valid Plugin requirements : None Conf values needed : analyzed_filename domain_name locales_path compress_output_files excluded_ip Output files : DB_ROOT/meta.db DB_ROOT/year/month/iwla.db OUTPUT_ROOT/index.html OUTPUT_ROOT/year/_stats.html OUTPUT_ROOT/year/month/index.html Statistics creation : meta : last_time start_analysis_time stats => year => month => viewed_bandwidth not_viewed_bandwidth viewed_pages viewed_hits nb_visits nb_visitors month_stats : viewed_bandwidth not_viewed_bandwidth viewed_pages viewed_hits nb_visits days_stats : day => viewed_bandwidth not_viewed_bandwidth viewed_pages viewed_hits nb_visits nb_visitors visits : remote_addr => remote_addr remote_ip viewed_pages{0..31} # 0 contains total viewed_hits{0..31} # 0 contains total not_viewed_pages{0..31} not_viewed_hits{0..31} bandwidth{0..31} last_access requests => [fields_from_format_log] extract_request => http_method http_uri http_version extract_uri extract_parameters* extract_referer* => extract_uri extract_parameters* robot hit_only is_page keep_requests valid_visitors: month_stats without robot and hit only visitors (if not conf.count_hit_only_visitors) Statistics update : None Statistics deletion : None plugins.display.all_visits -------------------------- Display hook Create All visits page Plugin requirements : None Conf values needed : display_visitor_ip* Output files : OUTPUT_ROOT/year/month/all_visits.html OUTPUT_ROOT/year/month/index.html Statistics creation : None Statistics update : None Statistics deletion : None plugins.display.browsers ------------------------ Display hook Create browsers page Plugin requirements : post_analysis/browsers Conf values needed : max_browsers_displayed* create_browsers_page* Output files : OUTPUT_ROOT/year/month/browsers.html OUTPUT_ROOT/year/month/index.html Statistics creation : None Statistics update : None Statistics deletion : None plugins.display.feeds --------------------- Display hook Display feeds parsers Plugin requirements : post_analysis/feeds Conf values needed : create_all_feeds_page* Output files : OUTPUT_ROOT/year/month/index.html OUTPUT_ROOT/year/month/all_feeds.html Statistics creation : None Statistics update : None Statistics deletion : None plugins.display.filter_users ---------------------------- Display hook Filter users Plugin requirements : None Conf values needed : create_filtered_page* Output files : OUTPUT_ROOT/year/month/index.html OUTPUT_ROOT/year/month/filtered_users.html Statistics creation : None Statistics update : None Statistics deletion : None plugins.display.hours_stats --------------------------- Display hook Display statistics by hour/week day Plugin requirements : post_analysis/hours_stats Conf values needed : None Output files : OUTPUT_ROOT/year/month/index.html Statistics creation : None Statistics update : None Statistics deletion : None plugins.display.ip_to_geo ------------------------- Display hook Add geo statistics Plugin requirements : post_analysis/ip_to_geo Conf values needed : create_geo_page* Output files : OUTPUT_ROOT/year/month/index.html Statistics creation : None Statistics update : None Statistics deletion : None plugins.display.istats_diff --------------------------- Display hook interface Enlight new and updated statistics Plugin requirements : None Conf values needed : None Output files : None Statistics creation : None Statistics update : None Statistics deletion : None plugins.display.operating_systems --------------------------------- Display hook Add operating systems statistics Plugin requirements : post_analysis/operating_systems Conf values needed : create_families_page* Output files : OUTPUT_ROOT/year/month/index.html Statistics creation : None Statistics update : None Statistics deletion : None plugins.display.referers_diff ----------------------------- Display hook Enlight new and updated key phrases in in all_key_phrases.html Plugin requirements : display/referers Conf values needed : None Output files : None Statistics creation : None Statistics update : None Statistics deletion : None plugins.display.referers ------------------------ Display hook Create Referers page Plugin requirements : post_analysis/referers Conf values needed : max_referers_displayed* create_all_referers_page* max_key_phrases_displayed* create_all_key_phrases_page* Output files : OUTPUT_ROOT/year/month/referers.html OUTPUT_ROOT/year/month/key_phrases.html OUTPUT_ROOT/year/month/index.html Statistics creation : None Statistics update : None Statistics deletion : None plugins.display.robot_bandwidth ------------------------------- Display hook Display top 10 robot bandwidth use Plugin requirements : None Conf values needed : display_visitor_ip* create_all_robot_bandwidth_page* Output files : OUTPUT_ROOT/year/month/top_robots_bandwidth.html OUTPUT_ROOT/year/month/index.html Statistics creation : None Statistics update : None Statistics deletion : None plugins.display.top_downloads_diff ---------------------------------- Display hook Enlight new and updated downloads in in top_downloads.html Plugin requirements : display/top_downloads Conf values needed : None Output files : None Statistics creation : None Statistics update : None Statistics deletion : None plugins.display.top_downloads ----------------------------- Display hook Create TOP downloads page Plugin requirements : post_analysis/top_downloads Conf values needed : max_downloads_displayed* create_all_downloads_page* Output files : OUTPUT_ROOT/year/month/top_downloads.html OUTPUT_ROOT/year/month/index.html Statistics creation : None Statistics update : None Statistics deletion : None plugins.display.top_hits ------------------------ Display hook Create TOP hits page Plugin requirements : post_analysis/top_hits Conf values needed : max_hits_displayed* create_all_hits_page* Output files : OUTPUT_ROOT/year/month/top_hits.html OUTPUT_ROOT/year/month/index.html Statistics creation : None Statistics update : None Statistics deletion : None plugins.display.top_pages_diff ------------------------------ Display hook Enlight new and updated pages in in top_pages.html Plugin requirements : display/top_pages Conf values needed : None Output files : None Statistics creation : None Statistics update : None Statistics deletion : None plugins.display.top_pages ------------------------- Display hook Create TOP pages page Plugin requirements : post_analysis/top_pages Conf values needed : max_pages_displayed* create_all_pages_page* Output files : OUTPUT_ROOT/year/month/top_pages.html OUTPUT_ROOT/year/month/index.html Statistics creation : None Statistics update : None Statistics deletion : None plugins.display.top_visitors ---------------------------- Display hook Create TOP visitors block Plugin requirements : None Conf values needed : display_visitor_ip* Output files : OUTPUT_ROOT/year/month/index.html Statistics creation : None Statistics update : None Statistics deletion : None plugins.post_analysis.browsers ------------------------------ Post analysis hook Detect browser information from requests Plugin requirements : None Conf values needed : None Output files : None Statistics creation : visits : remote_addr => browser month_stats : browsers => browser => count Statistics update : None Statistics deletion : None plugins.post_analysis.feeds --------------------------- Post analysis hook Find feeds parsers (first hit in feeds conf value and no viewed pages if it's a robot) If there is ony one hit per day to a feed, merge feeds parsers with the same user agent as it must be the same person with a different IP address. Plugin requirements : None Conf values needed : feeds feeds_referers* merge_one_hit_only_feeds_parsers* Output files : None Statistics creation : remote_addr => feed_parser Statistics update : None Statistics deletion : None plugins.post_analysis.filter_users ---------------------------------- Post analysis hook Filter users with given user conditions Plugin requirements : None Conf values needed : filtered_users : list of filters filtered_ip : list of ip (string) create_filtered_page* Filter can be a function or a list of filter description combined by AND operator Filter description can be a function or a list of 3 elements : * Field to match in visits * Operator '=', '==', '!=', '>', '>=', '<', '<=' for int value * Operator '=', '==', '!=', 'in', 'match' for str value * Target value For easiest config, you can indicate both 'remote_addr' or 'ip' in field element function prototype is func(iwla, hit) and must return True or False Example : def my_filter(iwla, hit): return True filtered_users = [ [['viewed_pages', '>=', '5'], ['viewed_hits', '>=', '5']], [['viewed_hits', '>=', '5'], my_filter], my_filter, ] Output files : None Statistics creation : visits : remote_addr => filtered geo_location Statistics update : visits : remote_addr => keep_requests Statistics deletion : None plugins.post_analysis.google_console_api ---------------------------------------- Post analysis hook Extract key phrases from Google console API Plugin requirements : None Conf values needed : domain_name Output files : None Statistics creation : None Statistics update : month_stats : key_phrases => phrase => count Statistics deletion : None plugins.post_analysis.hours_stats --------------------------------- Post analysis hook Count pages, hits and bandwidth by hour/week day Plugin requirements : None Conf values needed : None Output files : None Statistics creation : month_stats: hours_stats => 00 .. 23 => pages hits bandwidth days_stats => 0 .. 6 => pages hits bandwidth Statistics update : None Statistics deletion : None plugins.post_analysis.ip_to_geo ------------------------------- Post analysis hook Get country code from IP address Plugin requirements : None Conf values needed : iptogeo_remote_addr* iptogeo_remote_port* Output files : None Statistics creation : geo => country_code => count None Statistics update : valid_visitors: country_code Statistics deletion : None plugins.post_analysis.iptogeo ----------------------------- plugins.post_analysis.operating_systems --------------------------------------- Post analysis hook Detect operating systems from requests Plugin requirements : None Conf values needed : None Output files : None Statistics creation : visits : remote_addr => operating_system month_stats : operating_systems => operating_system => count os_families => family => count Statistics update : None Statistics deletion : None plugins.post_analysis.referers ------------------------------ Post analysis hook Extract referers and key phrases from requests Plugin requirements : None Conf values needed : domain_name Output files : None Statistics creation : None Statistics update : month_stats : referers => pages => count hits => count robots_referers => pages => count hits => count search_engine_referers => pages => count hits => count key_phrases => phrase => count Statistics deletion : None plugins.post_analysis.reverse_dns --------------------------------- Post analysis hook Replace IP by reverse DNS names Plugin requirements : None Conf values needed : reverse_dns_timeout* Output files : None Statistics creation : None Statistics update : valid_visitors: remote_addr dns_name_replaced dns_analyzed Statistics deletion : None plugins.post_analysis.search_analytics_api_sample ------------------------------------------------- from __future__ import print_function import argparse import sys from googleapiclient import sample_tools # Declare command-line flags. argparser = argparse.ArgumentParser(add_help=False) argparser.add_argument('property_uri', type=str, help=('Site or app URI to query data for (including ' 'trailing slash).')) argparser.add_argument('start_date', type=str, help=('Start date of the requested date range in ' 'YYYY-MM-DD format.')) argparser.add_argument('end_date', type=str, help=('End date of the requested date range in ' 'YYYY-MM-DD format.')) def main(argv): service, flags = sample_tools.init( argv, 'webmasters', 'v3', __doc__, __file__, parents=[argparser], scope='https://www.googleapis.com/auth/webmasters.readonly') # First run a query to learn which dates we have data for. You should always # check which days in a date range have data before running your main query. # This query shows data for the entire range, grouped and sorted by day, # descending; any days without data will be missing from the results. request = { 'startDate': flags.start_date, 'endDate': flags.end_date, 'dimensions': ['query'] } response = execute_request(service, flags.property_uri, request) print_table(response, 'Available dates') return # Get totals for the date range. request = { 'startDate': flags.start_date, 'endDate': flags.end_date } response = execute_request(service, flags.property_uri, request) print_table(response, 'Totals') # Get top 10 queries for the date range, sorted by click count, descending. request = { 'startDate': flags.start_date, 'endDate': flags.end_date, 'dimensions': ['query'], 'rowLimit': 10 } response = execute_request(service, flags.property_uri, request) print_table(response, 'Top Queries') # Get top 11-20 mobile queries for the date range, sorted by click count, descending. request = { 'startDate': flags.start_date, 'endDate': flags.end_date, 'dimensions': ['query'], 'dimensionFilterGroups': [{ 'filters': [{ 'dimension': 'device', 'expression': 'mobile' }] }], 'rowLimit': 10, 'startRow': 10 } response = execute_request(service, flags.property_uri, request) print_table(response, 'Top 11-20 Mobile Queries') # Get top 10 pages for the date range, sorted by click count, descending. request = { 'startDate': flags.start_date, 'endDate': flags.end_date, 'dimensions': ['page'], 'rowLimit': 10 } response = execute_request(service, flags.property_uri, request) print_table(response, 'Top Pages') # Get the top 10 queries in India, sorted by click count, descending. request = { 'startDate': flags.start_date, 'endDate': flags.end_date, 'dimensions': ['query'], 'dimensionFilterGroups': [{ 'filters': [{ 'dimension': 'country', 'expression': 'ind' }] }], 'rowLimit': 10 } response = execute_request(service, flags.property_uri, request) print_table(response, 'Top queries in India') # Group by both country and device. request = { 'startDate': flags.start_date, 'endDate': flags.end_date, 'dimensions': ['country', 'device'], 'rowLimit': 10 } response = execute_request(service, flags.property_uri, request) print_table(response, 'Group by country and device') # Group by total number of Search Appearance count. # Note: It is not possible to use searchAppearance with other # dimensions. request = { 'startDate': flags.start_date, 'endDate': flags.end_date, 'dimensions': ['searchAppearance'], 'rowLimit': 10 } response = execute_request(service, flags.property_uri, request) print_table(response, 'Search Appearance Features') def execute_request(service, property_uri, request): """Executes a searchAnalytics.query request. Args: service: The webmasters service to use when executing the query. property_uri: The site or app URI to request data for. request: The request to be executed. Returns: An array of response rows. plugins.post_analysis.top_downloads ----------------------------------- Post analysis hook Count TOP downloads Plugin requirements : None Conf values needed : None Output files : None Statistics creation : None Statistics update : month_stats: top_downloads => uri => count Statistics deletion : None plugins.post_analysis.top_hits ------------------------------ Post analysis hook Count TOP hits Plugin requirements : None Conf values needed : None Output files : None Statistics creation : None Statistics update : month_stats: top_hits => uri => count Statistics deletion : None plugins.post_analysis.top_pages ------------------------------- Post analysis hook Count TOP pages Plugin requirements : None Conf values needed : None Output files : None Statistics creation : None Statistics update : month_stats: top_pages => uri => count Statistics deletion : None plugins.pre_analysis.page_to_hit -------------------------------- Pre analysis hook Change page into hit and hit into page into statistics Plugin requirements : None Conf values needed : page_to_hit_conf* hit_to_page_conf* Output files : None Statistics creation : None Statistics update : visits : remote_addr => is_page Statistics deletion : None plugins.pre_analysis.robots --------------------------- Pre analysis hook Filter robots Plugin requirements : None Conf values needed : None Output files : None Statistics creation : None Statistics update : visits : remote_addr => robot keep_requests Statistics deletion : None