diff --git a/.gitignore b/.gitignore index e03b74e..d4a3073 100644 --- a/.gitignore +++ b/.gitignore @@ -1,3 +1,5 @@ *~ *.pyc -*.gz \ No newline at end of file +*.gz +output +output_db diff --git a/ChangeLog b/ChangeLog new file mode 100644 index 0000000..3fb03fa --- /dev/null +++ b/ChangeLog @@ -0,0 +1,29 @@ +v0.3 (13/07/2015) +** User ** + Add referers_diff display plugin + Add year statistics in month details + Add analysis duration + Add browsers detection + Add operating systems detection + Add track users plugin + Add feeds plugin + Add _append feature to conf.py + Add hours_stats plugin + Add display/top_downloads_diff plugin + Can specify multiple files to analyze + Add reset feature + Add gz files support + Add -z option (don't compress databases) + Add own search enfines files + +** Dev ** + Add istats_diff interface + Sort documentation output + Add debug traces in robots plugin + Update awstats data + +** Bugs ** + Forgot tag + Bad UTC time computation + Hits/pages in the same second where not analyzed + Last day of month was skipped diff --git a/awstats_data.py b/awstats_data.py index 8789384..33157e1 100644 --- a/awstats_data.py +++ b/awstats_data.py @@ -1,12 +1,26 @@ -robots = ['appie', 'architext', 'jeeves', 'bjaaland', 'contentmatch', 'ferret', 'googlebot', 'google\-sitemaps', 'gulliver', 'virus[_+ ]detector', 'harvest', 'htdig', 'linkwalker', 'lilina', 'lycos[_+ ]', 'moget', 'muscatferret', 'myweb', 'nomad', 'scooter', 'slurp', '^voyager\/', 'weblayers', 'antibot', 'bruinbot', 'digout4u', 'echo!', 'fast\-webcrawler', 'ia_archiver\-web\.archive\.org', 'ia_archiver', 'jennybot', 'mercator', 'netcraft', 'msnbot\-media', 'msnbot', 'petersnews', 'relevantnoise\.com', 'unlost_web_crawler', 'voila', 'webbase', 'webcollage', 'cfetch', 'zyborg', 'wisenutbot', '[^a]fish', 'abcdatos', 'acme\.spider', 'ahoythehomepagefinder', 'alkaline', 'anthill', 'arachnophilia', 'arale', 'araneo', 'aretha', 'ariadne', 'powermarks', 'arks', 'aspider', 'atn\.txt', 'atomz', 'auresys', 'backrub', 'bbot', 'bigbrother', 'blackwidow', 'blindekuh', 'bloodhound', 'borg\-bot', 'brightnet', 'bspider', 'cactvschemistryspider', 'calif[^r]', 'cassandra', 'cgireader', 'checkbot', 'christcrawler', 'churl', 'cienciaficcion', 'collective', 'combine', 'conceptbot', 'coolbot', 'core', 'cosmos', 'cruiser', 'cusco', 'cyberspyder', 'desertrealm', 'deweb', 'dienstspider', 'digger', 'diibot', 'direct_hit', 'dnabot', 'download_express', 'dragonbot', 'dwcp', 'e\-collector', 'ebiness', 'elfinbot', 'emacs', 'emcspider', 'esther', 'evliyacelebi', 'fastcrawler', 'feedcrawl', 'fdse', 'felix', 'fetchrover', 'fido', 'finnish', 'fireball', 'fouineur', 'francoroute', 'freecrawl', 'funnelweb', 'gama', 'gazz', 'gcreep', 'getbot', 'geturl', 'golem', 'gougou', 'grapnel', 'griffon', 'gromit', 'gulperbot', 'hambot', 'havindex', 'hometown', 'htmlgobble', 'hyperdecontextualizer', 'iajabot', 'iaskspider', 'hl_ftien_spider', 'sogou', 'iconoclast', 'ilse', 'imagelock', 'incywincy', 'informant', 'infoseek', 'infoseeksidewinder', 'infospider', 'inspectorwww', 'intelliagent', 'irobot', 'iron33', 'israelisearch', 'javabee', 'jbot', 'jcrawler', 'jobo', 'jobot', 'joebot', 'jubii', 'jumpstation', 'kapsi', 'katipo', 'kilroy', 'ko[_+ ]yappo[_+ ]robot', 'kummhttp', 'labelgrabber\.txt', 'larbin', 'legs', 'linkidator', 'linkscan', 'lockon', 'logo_gif', 'macworm', 'magpie', 'marvin', 'mattie', 'mediafox', 'merzscope', 'meshexplorer', 'mindcrawler', 'mnogosearch', 'momspider', 'monster', 'motor', 'muncher', 'mwdsearch', 'ndspider', 'nederland\.zoek', 'netcarta', 'netmechanic', 'netscoop', 'newscan\-online', 'nhse', 'northstar', 'nzexplorer', 'objectssearch', 'occam', 'octopus', 'openfind', 'orb_search', 'packrat', 'pageboy', 'parasite', 'patric', 'pegasus', 'perignator', 'perlcrawler', 'phantom', 'phpdig', 'piltdownman', 'pimptrain', 'pioneer', 'pitkow', 'pjspider', 'plumtreewebaccessor', 'poppi', 'portalb', 'psbot', 'python', 'raven', 'rbse', 'resumerobot', 'rhcs', 'road_runner', 'robbie', 'robi', 'robocrawl', 'robofox', 'robozilla', 'roverbot', 'rules', 'safetynetrobot', 'search\-info', 'search_au', 'searchprocess', 'senrigan', 'sgscout', 'shaggy', 'shaihulud', 'sift', 'simbot', 'site\-valet', 'sitetech', 'skymob', 'slcrawler', 'smartspider', 'snooper', 'solbot', 'speedy', 'spider[_+ ]monkey', 'spiderbot', 'spiderline', 'spiderman', 'spiderview', 'spry', 'sqworm', 'ssearcher', 'suke', 'sunrise', 'suntek', 'sven', 'tach_bw', 'tagyu_agent', 'tailrank', 'tarantula', 'tarspider', 'techbot', 'templeton', 'titan', 'titin', 'tkwww', 'tlspider', 'ucsd', 'udmsearch', 'universalfeedparser', 'urlck', 'valkyrie', 'verticrawl', 'victoria', 'visionsearch', 'voidbot', 'vwbot', 'w3index', 'w3m2', 'wallpaper', 'wanderer', 'wapspIRLider', 'webbandit', 'webcatcher', 'webcopy', 'webfetcher', 'webfoot', 'webinator', 'weblinker', 'webmirror', 'webmoose', 'webquest', 'webreader', 'webreaper', 'websnarf', 'webspider', 'webvac', 'webwalk', 'webwalker', 'webwatch', 'whatuseek', 'whowhere', 'wired\-digital', 'wmir', 'wolp', 'wombat', 'wordpress', 'worm', 'woozweb', 'wwwc', 'wz101', 'xget', '1\-more_scanner', 'accoona\-ai\-agent', 'activebookmark', 'adamm_bot', 'almaden', 'aipbot', 'aleadsoftbot', 'alpha_search_agent', 'allrati', 'aport', 'archive\.org_bot', 'argus', 'arianna\.libero\.it', 'aspseek', 'asterias', 'awbot', 'baiduspider', 'becomebot', 'bender', 'betabot', 'biglotron', 'bittorrent_bot', 'biz360[_+ ]spider', 'blogbridge[_+ ]service', 'bloglines', 'blogpulse', 'blogsearch', 'blogshares', 'blogslive', 'blogssay', 'bncf\.firenze\.sbn\.it\/raccolta\.txt', 'bobby', 'boitho\.com\-dc', 'bookmark\-manager', 'boris', 'bumblebee', 'candlelight[_+ ]favorites[_+ ]inspector', 'cbn00glebot', 'cerberian_drtrs', 'cfnetwork', 'cipinetbot', 'checkweb_link_validator', 'commons\-httpclient', 'computer_and_automation_research_institute_crawler', 'converamultimediacrawler', 'converacrawler', 'cscrawler', 'cse_html_validator_lite_online', 'cuasarbot', 'cursor', 'custo', 'datafountains\/dmoz_downloader', 'daviesbot', 'daypopbot', 'deepindex', 'dipsie\.bot', 'dnsgroup', 'domainchecker', 'domainsdb\.net', 'dulance', 'dumbot', 'dumm\.de\-bot', 'earthcom\.info', 'easydl', 'edgeio\-retriever', 'ets_v', 'exactseek', 'extreme[_+ ]picture[_+ ]finder', 'eventax', 'everbeecrawler', 'everest\-vulcan', 'ezresult', 'enteprise', 'facebook', 'fast_enterprise_crawler.*crawleradmin\.t\-info@telekom\.de', 'fast_enterprise_crawler.*t\-info_bi_cluster_crawleradmin\.t\-info@telekom\.de', 'matrix_s\.p\.a\._\-_fast_enterprise_crawler', 'fast_enterprise_crawler', 'fast\-search\-engine', 'favicon', 'favorg', 'favorites_sweeper', 'feedburner', 'feedfetcher\-google', 'feedflow', 'feedster', 'feedsky', 'feedvalidator', 'filmkamerabot', 'findlinks', 'findexa_crawler', 'fooky\.com\/ScorpionBot', 'g2crawler', 'gaisbot', 'geniebot', 'gigabot', 'girafabot', 'global_fetch', 'gnodspider', 'goforit\.com', 'goforitbot', 'gonzo', 'grub', 'gpu_p2p_crawler', 'henrythemiragorobot', 'heritrix', 'holmes', 'hoowwwer', 'hpprint', 'htmlparser', 'html[_+ ]link[_+ ]validator', 'httrack', 'hundesuche\.com\-bot', 'ichiro', 'iltrovatore\-setaccio', 'infobot', 'infociousbot', 'infomine', 'insurancobot', 'internet[_+ ]ninja', 'internetarchive', 'internetseer', 'internetsupervision', 'irlbot', 'isearch2006', 'iupui_research_bot', 'jrtwine[_+ ]software[_+ ]check[_+ ]favorites[_+ ]utility', 'justview', 'kalambot', 'kamano\.de_newsfeedverzeichnis', 'kazoombot', 'kevin', 'keyoshid', 'kinjabot', 'kinja\-imagebot', 'knowitall', 'knowledge\.com', 'kouaa_krawler', 'krugle', 'ksibot', 'kurzor', 'lanshanbot', 'letscrawl\.com', 'libcrawl', 'linkbot', 'link_valet_online', 'metager\-linkchecker', 'linkchecker', 'livejournal\.com', 'lmspider', 'lwp\-request', 'lwp\-trivial', 'magpierss', 'mail\.ru', 'mapoftheinternet\.com', 'mediapartners\-google', 'megite', 'metaspinner', 'microsoft[_+ ]url[_+ ]control', 'mini\-reptile', 'minirank', 'missigua_locator', 'misterbot', 'miva', 'mizzu_labs', 'mj12bot', 'mojeekbot', 'msiecrawler', 'ms_search_4\.0_robot', 'msrabot', 'msrbot', 'mt::telegraph::agent', 'nagios', 'nasa_search', 'mydoyouhike', 'netluchs', 'netsprint', 'newsgatoronline', 'nicebot', 'nimblecrawler', 'noxtrumbot', 'npbot', 'nutchcvs', 'nutchosu\-vlib', 'nutch', 'ocelli', 'octora_beta_bot', 'omniexplorer[_+ ]bot', 'onet\.pl[_+ ]sa', 'onfolio', 'opentaggerbot', 'openwebspider', 'oracle_ultra_search', 'orbiter', 'yodaobot', 'qihoobot', 'passwordmaker\.org', 'pear_http_request_class', 'peerbot', 'perman', 'php[_+ ]version[_+ ]tracker', 'pictureofinternet', 'ping\.blo\.gs', 'plinki', 'pluckfeedcrawler', 'pogodak', 'pompos', 'popdexter', 'port_huron_labs', 'postfavorites', 'projectwf\-java\-test\-crawler', 'proodlebot', 'pyquery', 'rambler', 'redalert', 'rojo', 'rssimagesbot', 'ruffle', 'rufusbot', 'sandcrawler', 'sbider', 'schizozilla', 'scumbot', 'searchguild[_+ ]dmoz[_+ ]experiment', 'seekbot', 'sensis_web_crawler', 'seznambot', 'shim\-crawler', 'shoutcast', 'slysearch', 'snap\.com_beta_crawler', 'sohu\-search', 'sohu', 'snappy', 'sphere_scout', 'spip', 'sproose_crawler', 'steeler', 'steroid__download', 'suchfin\-bot', 'superbot', 'surveybot', 'susie', 'syndic8', 'syndicapi', 'synoobot', 'tcl_http_client_package', 'technoratibot', 'teragramcrawlersurf', 'test_crawler', 'testbot', 't\-h\-u\-n\-d\-e\-r\-s\-t\-o\-n\-e', 'topicblogs', 'turnitinbot', 'turtlescanner', 'turtle', 'tutorgigbot', 'twiceler', 'ubicrawler', 'ultraseek', 'unchaos_bot_hybrid_web_search_engine', 'unido\-bot', 'updated', 'ustc\-semantic\-group', 'vagabondo\-wap', 'vagabondo', 'vermut', 'versus_crawler_from_eda\.baykan@epfl\.ch', 'vespa_crawler', 'vortex', 'vse\/', 'w3c\-checklink', 'w3c[_+ ]css[_+ ]validator[_+ ]jfouffa', 'w3c_validator', 'watchmouse', 'wavefire', 'webclipping\.com', 'webcompass', 'webcrawl\.net', 'web_downloader', 'webdup', 'webfilter', 'webindexer', 'webminer', 'website[_+ ]monitoring[_+ ]bot', 'webvulncrawl', 'wells_search', 'wonderer', 'wume_crawler', 'wwweasel', 'xenu\'s_link_sleuth', 'xenu_link_sleuth', 'xirq', 'y!j', 'yacy', 'yahoo\-blogs', 'yahoo\-verticalcrawler', 'yahoofeedseeker', 'yahooseeker\-testing', 'yahooseeker', 'yahoo\-mmcrawler', 'yahoo!_mindset', 'yandex', 'flexum', 'yanga', 'yooglifetchagent', 'z\-add_link_checker', 'zealbot', 'zhuaxia', 'zspider', 'zeus', 'ng\/1\.', 'ng\/2\.', 'exabot', 'wget', 'libwww', 'java\/[0-9]'] +#This file was automatically generated by iwla_convert.pl. Do not edit manually. + +robots = ['appie', 'architext', 'bingpreview', 'bjaaland', 'contentmatch', 'ferret', 'googlebot', 'google\-sitemaps', 'google[_+ ]web[_+ ]preview', 'gulliver', 'virus[_+ ]detector', 'harvest', 'htdig', 'jeeves', 'linkwalker', 'lilina', 'lycos[_+ ]', 'moget', 'muscatferret', 'myweb', 'nomad', 'scooter', 'slurp', '^voyager\/', 'weblayers', 'antibot', 'bruinbot', 'digout4u', 'echo!', 'fast\-webcrawler', 'ia_archiver\-web\.archive\.org', 'ia_archiver', 'jennybot', 'mercator', 'netcraft', 'msnbot\-media', 'msnbot', 'petersnews', 'relevantnoise\.com', 'unlost_web_crawler', 'voila', 'webbase', 'webcollage', 'cfetch', 'zyborg', 'wisenutbot', '[^a]fish', 'abcdatos', 'acme\.spider', 'ahoythehomepagefinder', 'alkaline', 'anthill', 'arachnophilia', 'arale', 'araneo', 'aretha', 'ariadne', 'powermarks', 'arks', 'aspider', 'atn\.txt', 'atomz', 'auresys', 'backrub', 'bbot', 'bigbrother', 'blackwidow', 'blindekuh', 'bloodhound', 'borg\-bot', 'brightnet', 'bspider', 'cactvschemistryspider', 'calif[^r]', 'cassandra', 'cgireader', 'checkbot', 'christcrawler', 'churl', 'cienciaficcion', 'collective', 'combine', 'conceptbot', 'coolbot', 'core', 'cosmos', 'cruiser', 'cusco', 'cyberspyder', 'desertrealm', 'deweb', 'dienstspider', 'digger', 'diibot', 'direct_hit', 'dnabot', 'download_express', 'dragonbot', 'dwcp', 'e\-collector', 'ebiness', 'elfinbot', 'emacs', 'emcspider', 'esther', 'evliyacelebi', 'fastcrawler', 'feedcrawl', 'fdse', 'felix', 'fetchrover', 'fido', 'finnish', 'fireball', 'fouineur', 'francoroute', 'freecrawl', 'funnelweb', 'gama', 'gazz', 'gcreep', 'getbot', 'geturl', 'golem', 'gougou', 'grapnel', 'griffon', 'gromit', 'gulperbot', 'hambot', 'havindex', 'hometown', 'htmlgobble', 'hyperdecontextualizer', 'iajabot', 'iaskspider', 'hl_ftien_spider', 'sogou', 'iconoclast', 'ilse', 'imagelock', 'incywincy', 'informant', 'infoseek', 'infoseeksidewinder', 'infospider', 'inspectorwww', 'intelliagent', 'irobot', 'iron33', 'israelisearch', 'javabee', 'jbot', 'jcrawler', 'jobo', 'jobot', 'joebot', 'jubii', 'jumpstation', 'kapsi', 'katipo', 'kilroy', 'ko[_+ ]yappo[_+ ]robot', 'kummhttp', 'labelgrabber\.txt', 'larbin', 'legs', 'linkidator', 'linkscan', 'lockon', 'logo_gif', 'macworm', 'magpie', 'marvin', 'mattie', 'mediafox', 'merzscope', 'meshexplorer', 'mindcrawler', 'mnogosearch', 'momspider', 'monster', 'motor', 'muncher', 'mwdsearch', 'ndspider', 'nederland\.zoek', 'netcarta', 'netmechanic', 'netscoop', 'newscan\-online', 'nhse', 'northstar', 'nzexplorer', 'objectssearch', 'occam', 'octopus', 'openfind', 'orb_search', 'packrat', 'pageboy', 'parasite', 'patric', 'pegasus', 'perignator', 'perlcrawler', 'phantom', 'phpdig', 'piltdownman', 'pimptrain', 'pioneer', 'pitkow', 'pjspider', 'plumtreewebaccessor', 'poppi', 'portalb', 'psbot', 'python', 'raven', 'rbse', 'resumerobot', 'rhcs', 'road_runner', 'robbie', 'robi', 'robocrawl', 'robofox', 'robozilla', 'roverbot', 'rules', 'safetynetrobot', 'search\-info', 'search_au', 'searchprocess', 'senrigan', 'sgscout', 'shaggy', 'shaihulud', 'sift', 'simbot', 'site\-valet', 'sitetech', 'skymob', 'slcrawler', 'smartspider', 'snooper', 'solbot', 'speedy', 'spider[_+ ]monkey', 'spiderbot', 'spiderline', 'spiderman', 'spiderview', 'spry', 'sqworm', 'ssearcher', 'suke', 'sunrise', 'suntek', 'sven', 'tach_bw', 'tagyu_agent', 'tailrank', 'tarantula', 'tarspider', 'techbot', 'templeton', 'titan', 'titin', 'tkwww', 'tlspider', 'ucsd', 'udmsearch', 'universalfeedparser', 'urlck', 'valkyrie', 'verticrawl', 'victoria', 'visionsearch', 'voidbot', 'vwbot', 'w3index', 'w3m2', 'wallpaper', 'wanderer', 'wapspIRLider', 'webbandit', 'webcatcher', 'webcopy', 'webfetcher', 'webfoot', 'webinator', 'weblinker', 'webmirror', 'webmoose', 'webquest', 'webreader', 'webreaper', 'websnarf', 'webspider', 'webvac', 'webwalk', 'webwalker', 'webwatch', 'whatuseek', 'whowhere', 'wired\-digital', 'wmir', 'wolp', 'wombat', 'wordpress', 'worm', 'woozweb', 'wwwc', 'wz101', 'xget', '1\-more_scanner', 'accoona\-ai\-agent', 'activebookmark', 'adamm_bot', 'almaden', 'aipbot', 'aleadsoftbot', 'alpha_search_agent', 'allrati', 'aport', 'archive\.org_bot', 'argus', 'arianna\.libero\.it', 'aspseek', 'asterias', 'awbot', 'baiduspider', 'becomebot', 'bender', 'betabot', 'biglotron', 'bittorrent_bot', 'biz360[_+ ]spider', 'blogbridge[_+ ]service', 'bloglines', 'blogpulse', 'blogsearch', 'blogshares', 'blogslive', 'blogssay', 'bncf\.firenze\.sbn\.it\/raccolta\.txt', 'bobby', 'boitho\.com\-dc', 'bookmark\-manager', 'boris', 'bumblebee', 'candlelight[_+ ]favorites[_+ ]inspector', 'cbn00glebot', 'cerberian_drtrs', 'cfnetwork', 'cipinetbot', 'checkweb_link_validator', 'commons\-httpclient', 'computer_and_automation_research_institute_crawler', 'converamultimediacrawler', 'converacrawler', 'cscrawler', 'cse_html_validator_lite_online', 'cuasarbot', 'cursor', 'custo', 'datafountains\/dmoz_downloader', 'daviesbot', 'daypopbot', 'deepindex', 'dipsie\.bot', 'dnsgroup', 'domainchecker', 'domainsdb\.net', 'dulance', 'dumbot', 'dumm\.de\-bot', 'earthcom\.info', 'easydl', 'edgeio\-retriever', 'ets_v', 'exactseek', 'extreme[_+ ]picture[_+ ]finder', 'eventax', 'everbeecrawler', 'everest\-vulcan', 'ezresult', 'enteprise', 'facebook', 'fast_enterprise_crawler.*crawleradmin\.t\-info@telekom\.de', 'fast_enterprise_crawler.*t\-info_bi_cluster_crawleradmin\.t\-info@telekom\.de', 'matrix_s\.p\.a\._\-_fast_enterprise_crawler', 'fast_enterprise_crawler', 'fast\-search\-engine', 'favicon', 'favorg', 'favorites_sweeper', 'feedburner', 'feedfetcher\-google', 'feedflow', 'feedster', 'feedsky', 'feedvalidator', 'filmkamerabot', 'findlinks', 'findexa_crawler', 'fooky\.com\/ScorpionBot', 'g2crawler', 'gaisbot', 'geniebot', 'gigabot', 'girafabot', 'global_fetch', 'gnodspider', 'goforit\.com', 'goforitbot', 'gonzo', 'grub', 'gpu_p2p_crawler', 'henrythemiragorobot', 'heritrix', 'holmes', 'hoowwwer', 'hpprint', 'htmlparser', 'html[_+ ]link[_+ ]validator', 'httrack', 'hundesuche\.com\-bot', 'ichiro', 'iltrovatore\-setaccio', 'infobot', 'infociousbot', 'infomine', 'insurancobot', 'internet[_+ ]ninja', 'internetarchive', 'internetseer', 'internetsupervision', 'irlbot', 'isearch2006', 'iupui_research_bot', 'jrtwine[_+ ]software[_+ ]check[_+ ]favorites[_+ ]utility', 'justview', 'kalambot', 'kamano\.de_newsfeedverzeichnis', 'kazoombot', 'kevin', 'keyoshid', 'kinjabot', 'kinja\-imagebot', 'knowitall', 'knowledge\.com', 'kouaa_krawler', 'krugle', 'ksibot', 'kurzor', 'lanshanbot', 'letscrawl\.com', 'libcrawl', 'linkbot', 'link_valet_online', 'metager\-linkchecker', 'linkchecker', 'livejournal\.com', 'lmspider', 'lwp\-request', 'lwp\-trivial', 'magpierss', 'mail\.ru', 'mapoftheinternet\.com', 'mediapartners\-google', 'megite', 'metaspinner', 'microsoft[_+ ]url[_+ ]control', 'mini\-reptile', 'minirank', 'missigua_locator', 'misterbot', 'miva', 'mizzu_labs', 'mj12bot', 'mojeekbot', 'msiecrawler', 'ms_search_4\.0_robot', 'msrabot', 'msrbot', 'mt::telegraph::agent', 'nagios', 'nasa_search', 'mydoyouhike', 'netluchs', 'netsprint', 'newsgatoronline', 'nicebot', 'nimblecrawler', 'noxtrumbot', 'npbot', 'nutchcvs', 'nutchosu\-vlib', 'nutch', 'ocelli', 'octora_beta_bot', 'omniexplorer[_+ ]bot', 'onet\.pl[_+ ]sa', 'onfolio', 'opentaggerbot', 'openwebspider', 'oracle_ultra_search', 'orbiter', 'yodaobot', 'qihoobot', 'passwordmaker\.org', 'pear_http_request_class', 'peerbot', 'perman', 'php[_+ ]version[_+ ]tracker', 'pictureofinternet', 'ping\.blo\.gs', 'plinki', 'pluckfeedcrawler', 'pogodak', 'pompos', 'popdexter', 'port_huron_labs', 'postfavorites', 'projectwf\-java\-test\-crawler', 'proodlebot', 'pyquery', 'rambler', 'redalert', 'rojo', 'rssimagesbot', 'ruffle', 'rufusbot', 'sandcrawler', 'sbider', 'schizozilla', 'scumbot', 'searchguild[_+ ]dmoz[_+ ]experiment', 'seekbot', 'sensis_web_crawler', 'seznambot', 'shim\-crawler', 'shoutcast', 'slysearch', 'snap\.com_beta_crawler', 'sohu\-search', 'sohu', 'snappy', 'sphere_scout', 'spip', 'sproose_crawler', 'steeler', 'steroid__download', 'suchfin\-bot', 'superbot', 'surveybot', 'susie', 'syndic8', 'syndicapi', 'synoobot', 'tcl_http_client_package', 'technoratibot', 'teragramcrawlersurf', 'test_crawler', 'testbot', 't\-h\-u\-n\-d\-e\-r\-s\-t\-o\-n\-e', 'topicblogs', 'turnitinbot', 'turtlescanner', 'turtle', 'tutorgigbot', 'twiceler', 'ubicrawler', 'ultraseek', 'unchaos_bot_hybrid_web_search_engine', 'unido\-bot', 'updated', 'ustc\-semantic\-group', 'vagabondo\-wap', 'vagabondo', 'vermut', 'versus_crawler_from_eda\.baykan@epfl\.ch', 'vespa_crawler', 'vortex', 'vse\/', 'w3c\-checklink', 'w3c[_+ ]css[_+ ]validator[_+ ]jfouffa', 'w3c_validator', 'watchmouse', 'wavefire', 'webclipping\.com', 'webcompass', 'webcrawl\.net', 'web_downloader', 'webdup', 'webfilter', 'webindexer', 'webminer', 'website[_+ ]monitoring[_+ ]bot', 'webvulncrawl', 'wells_search', 'wonderer', 'wume_crawler', 'wwweasel', 'xenu\'s_link_sleuth', 'xenu_link_sleuth', 'xirq', 'y!j', 'yacy', 'yahoo\-blogs', 'yahoo\-verticalcrawler', 'yahoofeedseeker', 'yahooseeker\-testing', 'yahooseeker', 'yahoo\-mmcrawler', 'yahoo!_mindset', 'yandex', 'flexum', 'yanga', 'yooglifetchagent', 'z\-add_link_checker', 'zealbot', 'zhuaxia', 'zspider', 'zeus', 'ng\/1\.', 'ng\/2\.', 'exabot', '^[1-3]$', 'alltop', 'applesyndication', 'asynchttpclient', 'bingbot', 'blogged_crawl', 'bloglovin', 'butterfly', 'buzztracker', 'carpathia', 'catbot', 'chattertrap', 'check_http', 'coldfusion', 'covario', 'daylifefeedfetcher', 'discobot', 'dlvr\.it', 'dreamwidth', 'drupal', 'ezoom', 'feedmyinbox', 'feedroll\.com', 'feedzira', 'fever\/', 'freenews', 'geohasher', 'hanrss', 'inagist', 'jacobin club', 'jakarta', 'js\-kit', 'largesmall crawler', 'linkedinbot', 'longurl', 'metauri', 'microsoft\-webdav\-miniredir', '^motorola$', 'movabletype', '^mozilla\/3\.0 \(compatible$', '^mozilla\/4\.0$', '^mozilla\/4\.0 \(compatible;\)$', '^mozilla\/5\.0$', '^mozilla\/5\.0 \(compatible;$', '^mozilla\/5\.0 \(en\-us\)$', '^mozilla\/5\.0 firefox\/3\.0\.5$', '^msie', 'netnewswire', ' netseer ', 'netvibes', 'newrelicpinger', 'newsfox', 'nextgensearchbot', 'ning', 'pingdom', 'pita', 'postpost', 'postrank', 'printfulbot', 'protopage', 'proximic', 'quipply', 'r6\_', 'ratingburner', 'regator', 'rome client', 'rpt\-httpclient', 'rssgraffiti', 'sage\+\+', 'scoutjet', 'simplepie', 'sitebot', 'summify\.com', 'superfeedr', 'synthesio', 'teoma', 'topblogsinfo', 'topix\.net', 'trapit', 'trileet', 'tweetedtimes', 'twisted pagegetter', 'twitterbot', 'twitterfeed', 'unwindfetchor', 'wazzup', 'windows\-rss\-platform', 'wiumi', 'xydo', 'yahoo! slurp', 'yahoo pipes', 'yahoo\-newscrawler', 'yahoocachesystem', 'yahooexternalcache', 'yahoo! searchmonkey', 'yahooysmcm', 'yammer', 'yandexbot', 'yeti', 'yie8', 'youdao', 'yourls', 'zemanta', 'zend_http_client', 'wget', 'libwww', '^java\/[0-9]'] search_engines = ['google\.[\w.]+/products', 'base\.google\.', 'froogle\.google\.', 'groups\.google\.', 'images\.google\.', 'google\.', 'googlee\.', 'googlecom\.com', 'goggle\.co\.hu', '216\.239\.(35|37|39|51)\.100', '216\.239\.(35|37|39|51)\.101', '216\.239\.5[0-9]\.104', '64\.233\.1[0-9]{2}\.104', '66\.102\.[1-9]\.104', '66\.249\.93\.104', '72\.14\.2[0-9]{2}\.104', 'msn\.', 'live\.com', 'bing\.', 'voila\.', 'mindset\.research\.yahoo', 'yahoo\.', '(66\.218\.71\.225|216\.109\.117\.135|216\.109\.125\.130|66\.218\.69\.11)', 'search\.aol\.co', 'tiscali\.', 'lycos\.', 'alexa\.com', 'alltheweb\.com', 'altavista\.', 'a9\.com', 'dmoz\.org', 'netscape\.', 'search\.terra\.', 'www\.search\.com', 'search\.sli\.sympatico\.ca', 'excite\.'] -search_engines_2 = ['4\-counter\.com', 'att\.net', 'bungeebonesdotcom', 'northernlight\.', 'hotbot\.', 'kvasir\.', 'webcrawler\.', 'metacrawler\.', 'go2net\.com', '(^|\.)go\.com', 'euroseek\.', 'looksmart\.', 'spray\.', 'nbci\.com\/search', 'de\.ask.\com', 'es\.ask.\com', 'fr\.ask.\com', 'it\.ask.\com', 'nl\.ask.\com', 'uk\.ask.\com', '(^|\.)ask\.com', 'atomz\.', 'overture\.com', 'teoma\.', 'findarticles\.com', 'infospace\.com', 'mamma\.', 'dejanews\.', 'dogpile\.com', 'wisenut\.com', 'ixquick\.com', 'search\.earthlink\.net', 'i-une\.com', 'blingo\.com', 'centraldatabase\.org', 'clusty\.com', 'mysearch\.', 'vivisimo\.com', 'kartoo\.com', 'icerocket\.com', 'sphere\.com', 'ledix\.net', 'start\.shaw\.ca', 'searchalot\.com', 'copernic\.com', 'avantfind\.com', 'steadysearch\.com', 'steady-search\.com', 'chello\.at', 'chello\.be', 'chello\.cz', 'chello\.fr', 'chello\.hu', 'chello\.nl', 'chello\.no', 'chello\.pl', 'chello\.se', 'chello\.sk', 'chello', 'mirago\.be', 'mirago\.ch', 'mirago\.de', 'mirago\.dk', 'es\.mirago\.com', 'mirago\.fr', 'mirago\.it', 'mirago\.nl', 'no\.mirago\.com', 'mirago\.se', 'mirago\.co\.uk', 'mirago', 'answerbus\.com', 'icq\.com\/search', 'nusearch\.com', 'goodsearch\.com', 'scroogle\.org', 'questionanswering\.com', 'mywebsearch\.com', 'as\.starware\.com', 'del\.icio\.us', 'digg\.com', 'stumbleupon\.com', 'swik\.net', 'segnalo\.alice\.it', 'ineffabile\.it', 'anzwers\.com\.au', 'engine\.exe', 'miner\.bol\.com\.br', '\.baidu\.com', '\.vnet\.cn', '\.soso\.com', '\.sogou\.com', '\.3721\.com', 'iask\.com', '\.accoona\.com', '\.163\.com', '\.zhongsou\.com', 'atlas\.cz', 'seznam\.cz', 'quick\.cz', 'centrum\.cz', 'jyxo\.(cz|com)', 'najdi\.to', 'redbox\.cz', 'opasia\.dk', 'danielsen\.com', 'sol\.dk', 'jubii\.dk', 'find\.dk', 'edderkoppen\.dk', 'netstjernen\.dk', 'orbis\.dk', 'tyfon\.dk', '1klik\.dk', 'ofir\.dk', 'ilse\.', 'vindex\.', '(^|\.)ask\.co\.uk', 'bbc\.co\.uk/cgi-bin/search', 'ifind\.freeserve', 'looksmart\.co\.uk', 'splut\.', 'spotjockey\.', 'ukdirectory\.', 'ukindex\.co\.uk', 'ukplus\.', 'searchy\.co\.uk', 'haku\.www\.fi', 'recherche\.aol\.fr', 'ctrouve\.', 'francite\.', '\.lbb\.org', 'rechercher\.libertysurf\.fr', 'search[\w\-]+\.free\.fr', 'recherche\.club-internet\.fr', 'toile\.com', 'biglotron\.com', 'mozbot\.fr', 'sucheaol\.aol\.de', 'fireball\.de', 'infoseek\.de', 'suche\d?\.web\.de', '[a-z]serv\.rrzn\.uni-hannover\.de', 'suchen\.abacho\.de', '(brisbane|suche)\.t-online\.de', 'allesklar\.de', 'meinestadt\.de', '212\.227\.33\.241', '(161\.58\.227\.204|161\.58\.247\.101|212\.40\.165\.90|213\.133\.108\.202|217\.160\.108\.151|217\.160\.111\.99|217\.160\.131\.108|217\.160\.142\.227|217\.160\.176\.42)', 'wwweasel\.de', 'netluchs\.de', 'schoenerbrausen\.de', 'heureka\.hu', 'vizsla\.origo\.hu', 'lapkereso\.hu', 'goliat\.hu', 'index\.hu', 'wahoo\.hu', 'webmania\.hu', 'search\.internetto\.hu', 'tango\.hu', 'keresolap\.hu', 'polymeta\.hu', 'sify\.com', 'virgilio\.it', 'arianna\.libero\.it', 'supereva\.com', 'kataweb\.it', 'search\.alice\.it\.master', 'search\.alice\.it', 'gotuneed\.com', 'godado', 'jumpy\.it', 'shinyseek\.it', 'teecno\.it', 'ask\.jp', 'sagool\.jp', 'sok\.start\.no', 'eniro\.no', 'szukaj\.wp\.pl', 'szukaj\.onet\.pl', 'dodaj\.pl', 'gazeta\.pl', 'gery\.pl', 'hoga\.pl', 'netsprint\.pl', 'interia\.pl', 'katalog\.onet\.pl', 'o2\.pl', 'polska\.pl', 'szukacz\.pl', 'wow\.pl', 'ya(ndex)?\.ru', 'aport\.ru', 'rambler\.ru', 'turtle\.ru', 'metabot\.ru', 'evreka\.passagen\.se', 'eniro\.se', 'zoznam\.sk', 'sapo\.pt', 'search\.ch', 'search\.bluewin\.ch', 'pogodak\.'] +search_engines_2 = ['4\-counter\.com', 'att\.net', 'bungeebonesdotcom', 'northernlight\.', 'hotbot\.', 'kvasir\.', 'webcrawler\.', 'metacrawler\.', 'go2net\.com', '(^|\.)go\.com', 'euroseek\.', 'looksmart\.', 'spray\.', 'nbci\.com\/search', 'de\.ask.\com', 'es\.ask.\com', 'fr\.ask.\com', 'it\.ask.\com', 'nl\.ask.\com', 'uk\.ask.\com', '(^|\.)ask\.com', 'atomz\.', 'overture\.com', 'teoma\.', 'findarticles\.com', 'infospace\.com', 'mamma\.', 'dejanews\.', 'dogpile\.com', 'wisenut\.com', 'ixquick\.com', 'search\.earthlink\.net', 'i-une\.com', 'blingo\.com', 'centraldatabase\.org', 'clusty\.com', 'mysearch\.', 'vivisimo\.com', 'kartoo\.com', 'icerocket\.com', 'sphere\.com', 'ledix\.net', 'start\.shaw\.ca', 'searchalot\.com', 'copernic\.com', 'avantfind\.com', 'steadysearch\.com', 'steady-search\.com', 'claro-search\.com', 'www1\.search-results\.com', 'www\.holasearch\.com', 'search\.conduit\.com', 'static\.flipora\.com', '(?:www[12]?|mixidj)\.delta-search\.com', 'start\.iminent\.com', 'www\.searchmobileonline\.com', 'int\.search-results\.com', 'chello\.at', 'chello\.be', 'chello\.cz', 'chello\.fr', 'chello\.hu', 'chello\.nl', 'chello\.no', 'chello\.pl', 'chello\.se', 'chello\.sk', 'chello', 'mirago\.be', 'mirago\.ch', 'mirago\.de', 'mirago\.dk', 'es\.mirago\.com', 'mirago\.fr', 'mirago\.it', 'mirago\.nl', 'no\.mirago\.com', 'mirago\.se', 'mirago\.co\.uk', 'mirago', 'answerbus\.com', 'icq\.com\/search', 'nusearch\.com', 'goodsearch\.com', 'scroogle\.org', 'questionanswering\.com', 'mywebsearch\.com', 'as\.starware\.com', 'del\.icio\.us', 'digg\.com', 'stumbleupon\.com', 'swik\.net', 'segnalo\.alice\.it', 'ineffabile\.it', 'anzwers\.com\.au', 'engine\.exe', 'miner\.bol\.com\.br', '\.baidu\.com', '\.vnet\.cn', '\.soso\.com', '\.sogou\.com', '\.3721\.com', 'iask\.com', '\.accoona\.com', '\.163\.com', '\.zhongsou\.com', 'atlas\.cz', 'seznam\.cz', 'quick\.cz', 'centrum\.cz', 'jyxo\.(cz|com)', 'najdi\.to', 'redbox\.cz', 'isearch\.avg\.com', 'opasia\.dk', 'danielsen\.com', 'sol\.dk', 'jubii\.dk', 'find\.dk', 'edderkoppen\.dk', 'netstjernen\.dk', 'orbis\.dk', 'tyfon\.dk', '1klik\.dk', 'ofir\.dk', 'ilse\.', 'vindex\.', '(^|\.)ask\.co\.uk', 'bbc\.co\.uk/cgi-bin/search', 'ifind\.freeserve', 'looksmart\.co\.uk', 'splut\.', 'spotjockey\.', 'ukdirectory\.', 'ukindex\.co\.uk', 'ukplus\.', 'searchy\.co\.uk', 'search\.fbdownloader\.com', 'search\.babylon\.com', 'haku\.www\.fi', 'recherche\.aol\.fr', 'ctrouve\.', 'francite\.', '\.lbb\.org', 'rechercher\.libertysurf\.fr', 'search[\w\-]+\.free\.fr', 'recherche\.club-internet\.fr', 'toile\.com', 'biglotron\.com', 'mozbot\.fr', 'sucheaol\.aol\.de', 'o2suche\.aol\.de', 'fireball\.de', 'infoseek\.de', 'suche\d?\.web\.de', '[a-z]serv\.rrzn\.uni-hannover\.de', 'suchen\.abacho\.de', '(brisbane|suche)\.t-online\.de', 'allesklar\.de', 'meinestadt\.de', '212\.227\.33\.241', '(161\.58\.227\.204|161\.58\.247\.101|212\.40\.165\.90|213\.133\.108\.202|217\.160\.108\.151|217\.160\.111\.99|217\.160\.131\.108|217\.160\.142\.227|217\.160\.176\.42)', 'wwweasel\.de', 'netluchs\.de', 'schoenerbrausen\.de', 'suche\.gmx\.net', 'ecosia\.org', 'de\.aolsearch\.com', 'suche\.aol\.de', 'www\.startxxl\.com', 'www\.benefind\.de', 'heureka\.hu', 'vizsla\.origo\.hu', 'lapkereso\.hu', 'goliat\.hu', 'index\.hu', 'wahoo\.hu', 'webmania\.hu', 'search\.internetto\.hu', 'tango\.hu', 'keresolap\.hu', 'polymeta\.hu', 'sify\.com', 'virgilio\.it', 'arianna\.libero\.it', 'supereva\.com', 'kataweb\.it', 'search\.alice\.it\.master', 'search\.alice\.it', 'gotuneed\.com', 'godado', 'jumpy\.it', 'shinyseek\.it', 'teecno\.it', 'search\.genieo\.com', 'ask\.jp', 'sagool\.jp', 'sok\.start\.no', 'eniro\.no', 'szukaj\.wp\.pl', 'szukaj\.onet\.pl', 'dodaj\.pl', 'gazeta\.pl', 'gery\.pl', 'hoga\.pl', 'netsprint\.pl', 'interia\.pl', 'katalog\.onet\.pl', 'o2\.pl', 'polska\.pl', 'szukacz\.pl', 'wow\.pl', 'ya(ndex)?\.ru', 'aport\.ru', 'rambler\.ru', 'turtle\.ru', 'metabot\.ru', 'evreka\.passagen\.se', 'eniro\.se', 'zoznam\.sk', 'sapo\.pt', 'search\.ch', 'search\.bluewin\.ch', 'pogodak\.', 'jwss\.cc', 'lemoteur\.orange\.fr', 'windowssearch\.com', 'qwant\.com', 'wow\.com', 'searches\.omiga-plus\.com', 'buenosearch\.com', 'searches\.vi-view\.com'] -not_search_engines_keys = {'yahoo\.' : '(?:picks|mail)\.yahoo\.|yahoo\.[^/]+/picks', 'altavista\.' : 'babelfish\.altavista\.', 'tiscali\.' : 'mail\.tiscali\.', 'yandex\.' : 'direct\.yandex\.', 'google\.' : 'translate\.google\.', 'msn\.' : 'hotmail\.msn\.'} +not_search_engines_keys = {'tiscali\.' : 'mail\.tiscali\.', 'altavista\.' : 'babelfish\.altavista\.', 'yahoo\.' : '(?:picks|mail)\.yahoo\.|yahoo\.[^/]+/picks', 'google\.' : 'translate\.google\.', 'msn\.' : 'hotmail\.msn\.', 'yandex\.' : 'direct\.yandex\.'} -search_engines_hashid = {'search\.sli\.sympatico\.ca' : 'sympatico', 'mywebsearch\.com' : 'mywebsearch', 'netsprint\.pl\/hoga\-search' : 'hogapl', 'findarticles\.com' : 'findarticles', 'wow\.pl' : 'wowpl', 'allesklar\.de' : 'allesklar', 'atomz\.' : 'atomz', 'bing\.' : 'bing', 'find\.dk' : 'finddk', 'google\.' : 'google', '(66\.218\.71\.225|216\.109\.117\.135|216\.109\.125\.130|66\.218\.69\.11)' : 'yahoo', 'pogodak\.' : 'pogodak', 'ask\.jp' : 'askjp', '\.baidu\.com' : 'baidu', 'tango\.hu' : 'tango_hu', 'gotuneed\.com' : 'gotuneed', 'quick\.cz' : 'quick', 'mirago' : 'mirago', 'szukaj\.wp\.pl' : 'wp', 'mirago\.de' : 'miragode', 'mirago\.dk' : 'miragodk', 'katalog\.onet\.pl' : 'katalogonetpl', 'googlee\.' : 'google', 'orbis\.dk' : 'orbis', 'turtle\.ru' : 'turtle', 'zoznam\.sk' : 'zoznam', 'start\.shaw\.ca' : 'shawca', 'chello\.at' : 'chelloat', 'centraldatabase\.org' : 'centraldatabase', 'centrum\.cz' : 'centrum', 'kataweb\.it' : 'kataweb', '\.lbb\.org' : 'lbb', 'blingo\.com' : 'blingo', 'vivisimo\.com' : 'vivisimo', 'stumbleupon\.com' : 'stumbleupon', 'es\.ask.\com' : 'askes', 'interia\.pl' : 'interiapl', '[a-z]serv\.rrzn\.uni-hannover\.de' : 'meta', 'search\.alice\.it' : 'aliceit', 'shinyseek\.it' : 'shinyseek\.it', 'i-une\.com' : 'iune', 'dejanews\.' : 'dejanews', 'opasia\.dk' : 'opasia', 'chello\.cz' : 'chellocz', 'ya(ndex)?\.ru' : 'yandex', 'kartoo\.com' : 'kartoo', 'arianna\.libero\.it' : 'arianna', 'ofir\.dk' : 'ofir', 'search\.earthlink\.net' : 'earthlink', 'biglotron\.com' : 'biglotron', 'lapkereso\.hu' : 'lapkereso', '216\.239\.(35|37|39|51)\.101' : 'google_cache', 'miner\.bol\.com\.br' : 'miner', 'dodaj\.pl' : 'dodajpl', 'mirago\.be' : 'miragobe', 'googlecom\.com' : 'google', 'steadysearch\.com' : 'steadysearch', 'redbox\.cz' : 'redbox', 'haku\.www\.fi' : 'haku', 'sapo\.pt' : 'sapo', 'sphere\.com' : 'sphere', 'danielsen\.com' : 'danielsen', 'alexa\.com' : 'alexa', 'mamma\.' : 'mamma', 'swik\.net' : 'swik', 'polska\.pl' : 'polskapl', 'groups\.google\.' : 'google_groups', 'metabot\.ru' : 'metabot', 'rechercher\.libertysurf\.fr' : 'libertysurf', 'szukaj\.onet\.pl' : 'onetpl', 'aport\.ru' : 'aport', 'de\.ask.\com' : 'askde', 'splut\.' : 'splut', 'live\.com' : 'live', '216\.239\.5[0-9]\.104' : 'google_cache', 'mysearch\.' : 'mysearch', 'ukplus\.' : 'ukplus', 'najdi\.to' : 'najdi', 'overture\.com' : 'overture', 'iask\.com' : 'iask', 'nl\.ask.\com' : 'asknl', 'nbci\.com\/search' : 'nbci', 'search\.aol\.co' : 'aol', 'eniro\.se' : 'enirose', '64\.233\.1[0-9]{2}\.104' : 'google_cache', 'mirago\.ch' : 'miragoch', 'altavista\.' : 'altavista', 'chello\.hu' : 'chellohu', 'mozbot\.fr' : 'mozbot', 'northernlight\.' : 'northernlight', 'mirago\.co\.uk' : 'miragocouk', 'search[\w\-]+\.free\.fr' : 'free', 'mindset\.research\.yahoo' : 'yahoo_mindset', 'copernic\.com' : 'copernic', 'heureka\.hu' : 'heureka', 'steady-search\.com' : 'steadysearch', 'teecno\.it' : 'teecnoit', 'voila\.' : 'voila', 'netstjernen\.dk' : 'netstjernen', 'keresolap\.hu' : 'keresolap_hu', 'yahoo\.' : 'yahoo', 'icerocket\.com' : 'icerocket', 'alltheweb\.com' : 'alltheweb', 'www\.search\.com' : 'search.com', 'digg\.com' : 'digg', 'tiscali\.' : 'tiscali', 'spotjockey\.' : 'spotjockey', 'a9\.com' : 'a9', '(brisbane|suche)\.t-online\.de' : 't-online', 'ifind\.freeserve' : 'freeserve', 'att\.net' : 'att', 'mirago\.it' : 'miragoit', 'index\.hu' : 'indexhu', '\.sogou\.com' : 'sogou', 'no\.mirago\.com' : 'miragono', 'ineffabile\.it' : 'ineffabile', 'netluchs\.de' : 'netluchs', 'toile\.com' : 'toile', 'search\..*\.\w+' : 'search', 'del\.icio\.us' : 'delicious', 'vizsla\.origo\.hu' : 'origo', 'netscape\.' : 'netscape', 'dogpile\.com' : 'dogpile', 'anzwers\.com\.au' : 'anzwers', '\.zhongsou\.com' : 'zhongsou', 'ctrouve\.' : 'ctrouve', 'gazeta\.pl' : 'gazetapl', 'recherche\.club-internet\.fr' : 'clubinternet', 'sok\.start\.no' : 'start', 'scroogle\.org' : 'scroogle', 'schoenerbrausen\.de' : 'schoenerbrausen', 'looksmart\.co\.uk' : 'looksmartuk', 'wwweasel\.de' : 'wwweasel', 'godado' : 'godado', '216\.239\.(35|37|39|51)\.100' : 'google_cache', 'jubii\.dk' : 'jubii', '212\.227\.33\.241' : 'metaspinner', 'mirago\.fr' : 'miragofr', 'sol\.dk' : 'sol', 'bbc\.co\.uk/cgi-bin/search' : 'bbc', 'jumpy\.it' : 'jumpy\.it', 'francite\.' : 'francite', 'infoseek\.de' : 'infoseek', 'es\.mirago\.com' : 'miragoes', 'jyxo\.(cz|com)' : 'jyxo', 'hotbot\.' : 'hotbot', 'engine\.exe' : 'engine', '(^|\.)ask\.com' : 'ask', 'goliat\.hu' : 'goliat', 'wisenut\.com' : 'wisenut', 'mirago\.nl' : 'miragonl', 'base\.google\.' : 'google_base', 'search\.bluewin\.ch' : 'bluewin', 'lycos\.' : 'lycos', 'meinestadt\.de' : 'meinestadt', '4\-counter\.com' : 'google4counter', 'search\.alice\.it\.master' : 'aliceitmaster', 'teoma\.' : 'teoma', '(^|\.)ask\.co\.uk' : 'askuk', 'tyfon\.dk' : 'tyfon', 'froogle\.google\.' : 'google_froogle', 'ukdirectory\.' : 'ukdirectory', 'ledix\.net' : 'ledix', 'edderkoppen\.dk' : 'edderkoppen', 'recherche\.aol\.fr' : 'aolfr', 'google\.[\w.]+/products' : 'google_products', 'webmania\.hu' : 'webmania', 'searchy\.co\.uk' : 'searchy', 'fr\.ask.\com' : 'askfr', 'spray\.' : 'spray', '72\.14\.2[0-9]{2}\.104' : 'google_cache', 'eniro\.no' : 'eniro', 'goodsearch\.com' : 'goodsearch', 'kvasir\.' : 'kvasir', '\.accoona\.com' : 'accoona', '\.soso\.com' : 'soso', 'as\.starware\.com' : 'comettoolbar', 'virgilio\.it' : 'virgilio', 'o2\.pl' : 'o2pl', 'chello\.nl' : 'chellonl', 'chello\.be' : 'chellobe', 'icq\.com\/search' : 'icq', 'msn\.' : 'msn', 'fireball\.de' : 'fireball', 'sucheaol\.aol\.de' : 'aolde', 'uk\.ask.\com' : 'askuk', 'euroseek\.' : 'euroseek', 'gery\.pl' : 'gerypl', 'chello\.fr' : 'chellofr', 'netsprint\.pl' : 'netsprintpl', 'avantfind\.com' : 'avantfind', 'supereva\.com' : 'supereva', 'polymeta\.hu' : 'polymeta_hu', 'infospace\.com' : 'infospace', 'sify\.com' : 'sify', 'go2net\.com' : 'go2net', 'wahoo\.hu' : 'wahoo', 'suche\d?\.web\.de' : 'webde', '(161\.58\.227\.204|161\.58\.247\.101|212\.40\.165\.90|213\.133\.108\.202|217\.160\.108\.151|217\.160\.111\.99|217\.160\.131\.108|217\.160\.142\.227|217\.160\.176\.42)' : 'metacrawler_de', '\.3721\.com' : '3721', 'ilse\.' : 'ilse', 'metacrawler\.' : 'metacrawler', 'sagool\.jp' : 'sagool', 'atlas\.cz' : 'atlas', 'vindex\.' : 'vindex', 'ixquick\.com' : 'ixquick', '66\.102\.[1-9]\.104' : 'google_cache', 'rambler\.ru' : 'rambler', 'answerbus\.com' : 'answerbus', 'evreka\.passagen\.se' : 'passagen', 'chello\.se' : 'chellose', 'clusty\.com' : 'clusty', 'search\.ch' : 'searchch', 'chello\.no' : 'chellono', 'searchalot\.com' : 'searchalot', 'questionanswering\.com' : 'questionanswering', 'seznam\.cz' : 'seznam', 'ukindex\.co\.uk' : 'ukindex', 'dmoz\.org' : 'dmoz', 'excite\.' : 'excite', 'chello\.pl' : 'chellopl', 'looksmart\.' : 'looksmart', '1klik\.dk' : '1klik', '\.vnet\.cn' : 'vnet', 'chello\.sk' : 'chellosk', '(^|\.)go\.com' : 'go', 'nusearch\.com' : 'nusearch', 'it\.ask.\com' : 'askit', 'bungeebonesdotcom' : 'bungeebonesdotcom', 'search\.terra\.' : 'terra', 'webcrawler\.' : 'webcrawler', 'suchen\.abacho\.de' : 'abacho', 'szukacz\.pl' : 'szukaczpl', '66\.249\.93\.104' : 'google_cache', 'search\.internetto\.hu' : 'internetto', 'goggle\.co\.hu' : 'google', 'mirago\.se' : 'miragose', 'images\.google\.' : 'google_image', 'segnalo\.alice\.it' : 'segnalo', '\.163\.com' : 'netease', 'chello' : 'chellocom'} +search_engines_hashid = {'recherche\.aol\.fr' : 'aolfr', 'google\.' : 'google', 'engine\.exe' : 'engine', 'netsprint\.pl\/hoga\-search' : 'hogapl', 'search\.fbdownloader\.com' : 'fbdownloader', 'chello\.pl' : 'chellopl', 'suche\.gmx\.net' : 'gmxsuche', '\.baidu\.com' : 'baidu', 'ya(ndex)?\.ru' : 'yandex', 'i-une\.com' : 'iune', 'edderkoppen\.dk' : 'edderkoppen', 'mirago\.dk' : 'miragodk', 'biglotron\.com' : 'biglotron', 'infoseek\.de' : 'infoseek', 'findarticles\.com' : 'findarticles', 'chello\.se' : 'chellose', 'suche\d?\.web\.de' : 'webde', 'goliat\.hu' : 'goliat', 'meinestadt\.de' : 'meinestadt', '\.accoona\.com' : 'accoona', 'nl\.ask.\com' : 'asknl', 'infospace\.com' : 'infospace', 'mysearch\.' : 'mysearch', 'francite\.' : 'francite', 'searchy\.co\.uk' : 'searchy', '(66\.218\.71\.225|216\.109\.117\.135|216\.109\.125\.130|66\.218\.69\.11)' : 'yahoo', 'iask\.com' : 'iask', 'googlee\.' : 'google', 'index\.hu' : 'indexhu', 'clusty\.com' : 'clusty', 'www\.startxxl\.com' : 'startxxl', 'search\.earthlink\.net' : 'earthlink', 'danielsen\.com' : 'danielsen', 'digg\.com' : 'digg', 'uk\.ask.\com' : 'askuk', 'mirago' : 'mirago', 'dodaj\.pl' : 'dodajpl', 'altavista\.' : 'altavista', 'chello\.no' : 'chellono', 'es\.mirago\.com' : 'miragoes', 'teoma\.' : 'teoma', 'isearch\.avg\.com' : 'avgsearch', 'search\..*\.\w+' : 'search', 'schoenerbrausen\.de' : 'schoenerbrausen', 'ineffabile\.it' : 'ineffabile', 'mozbot\.fr' : 'mozbot', 'atlas\.cz' : 'atlas', '1klik\.dk' : '1klik', 'de\.ask.\com' : 'askde', 'o2\.pl' : 'o2pl', '\.sogou\.com' : 'sogou', 'netluchs\.de' : 'netluchs', 'mindset\.research\.yahoo' : 'yahoo_mindset', 'search\.internetto\.hu' : 'internetto', 'search\.bluewin\.ch' : 'bluewin', 'images\.google\.' : 'google_image', 'mirago\.fr' : 'miragofr', 'nusearch\.com' : 'nusearch', 'stumbleupon\.com' : 'stumbleupon', 'o2suche\.aol\.de' : 'o2aolde', 'www\.benefind\.de' : 'benefind', 'search\.babylon\.com' : 'babylon', '(^|\.)ask\.com' : 'ask', 'teecno\.it' : 'teecnoit', 'yahoo\.' : 'yahoo', '\.3721\.com' : '3721', 'steady-search\.com' : 'steadysearch', 'sucheaol\.aol\.de' : 'aolde', '(brisbane|suche)\.t-online\.de' : 't-online', '216\.239\.5[0-9]\.104' : 'google_cache', 'chello\.hu' : 'chellohu', 'jubii\.dk' : 'jubii', 'www\.searchmobileonline\.com' : 'searchmobileonline', 'gotuneed\.com' : 'gotuneed', 'virgilio\.it' : 'virgilio', 'wwweasel\.de' : 'wwweasel', 'ledix\.net' : 'ledix', 'rambler\.ru' : 'rambler', 'arianna\.libero\.it' : 'arianna', 'ecosia\.org' : 'ecosiasearch', 'no\.mirago\.com' : 'miragono', 'as\.starware\.com' : 'comettoolbar', 'centrum\.cz' : 'centrum', 'mirago\.ch' : 'miragoch', 'supereva\.com' : 'supereva', 'groups\.google\.' : 'google_groups', 'spotjockey\.' : 'spotjockey', 'goggle\.co\.hu' : 'google', 'recherche\.club-internet\.fr' : 'clubinternet', 'toile\.com' : 'toile', 'centraldatabase\.org' : 'centraldatabase', 'ctrouve\.' : 'ctrouve', '(?:www[12]?|mixidj)\.delta-search\.com' : 'delta-search', 'search\.terra\.' : 'terra', 'blingo\.com' : 'blingo', 'rechercher\.libertysurf\.fr' : 'libertysurf', 'gery\.pl' : 'gerypl', 'avantfind\.com' : 'avantfind', 'godado' : 'godado', 'anzwers\.com\.au' : 'anzwers', 'scroogle\.org' : 'scroogle', 'eniro\.se' : 'enirose', 'chello\.cz' : 'chellocz', 'mamma\.' : 'mamma', 'sify\.com' : 'sify', '(^|\.)ask\.co\.uk' : 'askuk', 'netstjernen\.dk' : 'netstjernen', 'search\.ch' : 'searchch', 'answerbus\.com' : 'answerbus', 'alltheweb\.com' : 'alltheweb', 'netscape\.' : 'netscape', 'ask\.jp' : 'askjp', 'search\.alice\.it\.master' : 'aliceitmaster', 'chello\.fr' : 'chellofr', 'voila\.' : 'voila', 'del\.icio\.us' : 'delicious', 'mirago\.be' : 'miragobe', '\.zhongsou\.com' : 'zhongsou', 'chello' : 'chellocom', 'haku\.www\.fi' : 'haku', 'seznam\.cz' : 'seznam', 'webcrawler\.' : 'webcrawler', 'hotbot\.' : 'hotbot', 'looksmart\.co\.uk' : 'looksmartuk', 'bing\.' : 'bing', 'orbis\.dk' : 'orbis', 'froogle\.google\.' : 'google_froogle', 'int\.search-results\.com' : 'nortonsavesearch', 'keresolap\.hu' : 'keresolap_hu', '216\.239\.(35|37|39|51)\.100' : 'google_cache', 'jyxo\.(cz|com)' : 'jyxo', 'suche\.aol\.de' : 'aolsuche', 'zoznam\.sk' : 'zoznam', 'mirago\.de' : 'miragode', '\.lbb\.org' : 'lbb', 'search\.genieo\.com' : 'genieo', 'shinyseek\.it' : 'shinyseek\.it', 'www\.holasearch\.com' : 'holasearch', 'excite\.' : 'excite', '216\.239\.(35|37|39|51)\.101' : 'google_cache', 'swik\.net' : 'swik', '66\.249\.93\.104' : 'google_cache', 'miner\.bol\.com\.br' : 'miner', '\.163\.com' : 'netease', '\.vnet\.cn' : 'vnet', 'pogodak\.' : 'pogodak', 'go2net\.com' : 'go2net', 'dogpile\.com' : 'dogpile', 'live\.com' : 'live', 'claro-search\.com' : 'clarosearch', 'nbci\.com\/search' : 'nbci', 'search\.alice\.it' : 'aliceit', 'evreka\.passagen\.se' : 'passagen', 'tiscali\.' : 'tiscali', 'copernic\.com' : 'copernic', 'overture\.com' : 'overture', 'search\.sli\.sympatico\.ca' : 'sympatico', 'fr\.ask.\com' : 'askfr', 'alexa\.com' : 'alexa', 'find\.dk' : 'finddk', '(161\.58\.227\.204|161\.58\.247\.101|212\.40\.165\.90|213\.133\.108\.202|217\.160\.108\.151|217\.160\.111\.99|217\.160\.131\.108|217\.160\.142\.227|217\.160\.176\.42)' : 'metacrawler_de', 'msn\.' : 'msn', 'search\.conduit\.com' : 'conduit', 'sapo\.pt' : 'sapo', 'ukplus\.' : 'ukplus', 'looksmart\.' : 'looksmart', 'sol\.dk' : 'sol', 'kataweb\.it' : 'kataweb', 'interia\.pl' : 'interiapl', 'polymeta\.hu' : 'polymeta_hu', 'chello\.sk' : 'chellosk', 'search[\w\-]+\.free\.fr' : 'free', 'metabot\.ru' : 'metabot', 'netsprint\.pl' : 'netsprintpl', 'lapkereso\.hu' : 'lapkereso', 'wisenut\.com' : 'wisenut', 'tango\.hu' : 'tango_hu', 'mywebsearch\.com' : 'mywebsearch', 'eniro\.no' : 'eniro', 'szukaj\.onet\.pl' : 'onetpl', '\.soso\.com' : 'soso', 'segnalo\.alice\.it' : 'segnalo', 'splut\.' : 'splut', 'fireball\.de' : 'fireball', 'vindex\.' : 'vindex', 'dmoz\.org' : 'dmoz', 'search\.aol\.co' : 'aol', 'goodsearch\.com' : 'goodsearch', '[a-z]serv\.rrzn\.uni-hannover\.de' : 'meta', 'bungeebonesdotcom' : 'bungeebonesdotcom', 'ofir\.dk' : 'ofir', 'ifind\.freeserve' : 'freeserve', 'de\.aolsearch\.com' : 'aolsearch', 'mirago\.se' : 'miragose', 'att\.net' : 'att', 'www\.search\.com' : 'search.com', 'chello\.be' : 'chellobe', 'ixquick\.com' : 'ixquick', 'szukaj\.wp\.pl' : 'wp', 'katalog\.onet\.pl' : 'katalogonetpl', 'vivisimo\.com' : 'vivisimo', 'northernlight\.' : 'northernlight', 'turtle\.ru' : 'turtle', 'wow\.pl' : 'wowpl', '(^|\.)go\.com' : 'go', 'szukacz\.pl' : 'szukaczpl', 'metacrawler\.' : 'metacrawler', 'googlecom\.com' : 'google', 'dejanews\.' : 'dejanews', 'mirago\.it' : 'miragoit', '64\.233\.1[0-9]{2}\.104' : 'google_cache', 'wahoo\.hu' : 'wahoo', 'mirago\.nl' : 'miragonl', 'a9\.com' : 'a9', 'suchen\.abacho\.de' : 'abacho', 'bbc\.co\.uk/cgi-bin/search' : 'bbc', 'questionanswering\.com' : 'questionanswering', 'najdi\.to' : 'najdi', 'jumpy\.it' : 'jumpy\.it', 'aport\.ru' : 'aport', 'vizsla\.origo\.hu' : 'origo', 'spray\.' : 'spray', 'sphere\.com' : 'sphere', 'steadysearch\.com' : 'steadysearch', '66\.102\.[1-9]\.104' : 'google_cache', 'chello\.nl' : 'chellonl', 'atomz\.' : 'atomz', 'google\.[\w.]+/products' : 'google_products', 'redbox\.cz' : 'redbox', 'lycos\.' : 'lycos', 'chello\.at' : 'chelloat', 'quick\.cz' : 'quick', 'kartoo\.com' : 'kartoo', 'icerocket\.com' : 'icerocket', 'mirago\.co\.uk' : 'miragocouk', 'gazeta\.pl' : 'gazetapl', 'start\.shaw\.ca' : 'shawca', 'allesklar\.de' : 'allesklar', 'polska\.pl' : 'polskapl', 'start\.iminent\.com' : 'iminent', 'tyfon\.dk' : 'tyfon', 'heureka\.hu' : 'heureka', 'webmania\.hu' : 'webmania', 'es\.ask.\com' : 'askes', 'opasia\.dk' : 'opasia', 'euroseek\.' : 'euroseek', 'ilse\.' : 'ilse', 'it\.ask.\com' : 'askit', 'base\.google\.' : 'google_base', '4\-counter\.com' : 'google4counter', '212\.227\.33\.241' : 'metaspinner', 'sok\.start\.no' : 'start', 'ukindex\.co\.uk' : 'ukindex', 'static\.flipora\.com' : 'flipora', 'ukdirectory\.' : 'ukdirectory', 'searchalot\.com' : 'searchalot', 'sagool\.jp' : 'sagool', 'www1\.search-results\.com' : 'searchresults', 'kvasir\.' : 'kvasir', 'icq\.com\/search' : 'icq', '72\.14\.2[0-9]{2}\.104' : 'google_cache', 'www.sfr\.fr\/recherche\/google' : 'google', 'searches\.omiga-plus\.com' : 'Omiga-plus', 'lemoteur\.orange\.fr' : 'Orange', 'searches\.vi-view\.com' : 'vi-view', 'qwant\.com' : 'Qwant', 'buenosearch\.com' : 'Bueno Search', 'wow\.com' : 'WOW', 'windowssearch\.com' : 'Windows Search', 'jwss\.cc' : 'jws'} -search_engines_knwown_url = {'dmoz' : 'search=', 'google' : '(p|q|as_p|as_q)=', 'searchalot' : 'q=', 'teoma' : 'q=', 'looksmartuk' : 'key=', 'polymeta_hu' : '', 'google_groups' : 'group\/', 'iune' : '(keywords|q)=', 'chellosk' : 'q1=', 'eniro' : 'q=', 'msn' : 'q=', 'webcrawler' : 'searchText=', 'mirago' : '(txtsearch|qry)=', 'enirose' : 'q=', 'miragobe' : '(txtsearch|qry)=', 'netease' : 'q=', 'netluchs' : 'query=', 'google_products' : '(p|q|as_p|as_q)=', 'jyxo' : '(s|q)=', 'origo' : '(q|search)=', 'ilse' : 'search_for=', 'chellocom' : 'q1=', 'goodsearch' : 'Keywords=', 'ledix' : 'q=', 'mozbot' : 'q=', 'chellocz' : 'q1=', 'webde' : 'su=', 'biglotron' : 'question=', 'metacrawler_de' : 'qry=', 'finddk' : 'words=', 'start' : 'q=', 'sagool' : 'q=', 'miragoch' : '(txtsearch|qry)=', 'google_base' : '(p|q|as_p|as_q)=', 'aliceit' : 'qs=', 'shinyseek\.it' : 'KEY=', 'onetpl' : 'qt=', 'clusty' : 'query=', 'chellonl' : 'q1=', 'miragode' : '(txtsearch|qry)=', 'miragose' : '(txtsearch|qry)=', 'o2pl' : 'qt=', 'goliat' : 'KERESES=', 'kvasir' : 'q=', 'askfr' : '(ask|q)=', 'infoseek' : 'qt=', 'yahoo_mindset' : 'p=', 'comettoolbar' : 'qry=', 'alltheweb' : 'q(|uery)=', 'miner' : 'q=', 'aol' : 'query=', 'rambler' : 'words=', 'scroogle' : 'Gw=', 'chellose' : 'q1=', 'ineffabile' : '', 'miragoit' : '(txtsearch|qry)=', 'yandex' : 'text=', 'segnalo' : '', 'dodajpl' : 'keyword=', 'avantfind' : 'keywords=', 'nusearch' : 'nusearch_terms=', 'bbc' : 'q=', 'supereva' : 'q=', 'atomz' : 'sp-q=', 'searchy' : 'search_term=', 'dogpile' : 'q(|kw)=', 'chellohu' : 'q1=', 'vnet' : 'kw=', '1klik' : 'query=', 't-online' : 'q=', 'hogapl' : 'qt=', 'stumbleupon' : '', 'soso' : 'q=', 'zhongsou' : '(word|w)=', 'a9' : 'a9\.com\/', 'centraldatabase' : 'query=', 'mamma' : 'query=', 'icerocket' : 'q=', 'ask' : '(ask|q)=', 'chellobe' : 'q1=', 'altavista' : 'q=', 'vindex' : 'in=', 'miragodk' : '(txtsearch|qry)=', 'chelloat' : 'q1=', 'digg' : 's=', 'metacrawler' : 'general=', 'nbci' : 'keyword=', 'chellono' : 'q1=', 'icq' : 'q=', 'arianna' : 'query=', 'miragocouk' : '(txtsearch|qry)=', '3721' : '(p|name)=', 'pogodak' : 'q=', 'ukdirectory' : 'k=', 'overture' : 'keywords=', 'heureka' : 'heureka=', 'teecnoit' : 'q=', 'miragoes' : '(txtsearch|qry)=', 'haku' : 'w=', 'go' : 'qt=', 'fireball' : 'q=', 'wisenut' : 'query=', 'sify' : 'keyword=', 'ixquick' : 'query=', 'anzwers' : 'search=', 'quick' : 'query=', 'jubii' : 'soegeord=', 'questionanswering' : '', 'asknl' : '(ask|q)=', 'askde' : '(ask|q)=', 'att' : 'qry=', 'terra' : 'query=', 'bing' : 'q=', 'wowpl' : 'q=', 'freeserve' : 'q=', 'atlas' : '(searchtext|q)=', 'askuk' : '(ask|q)=', 'godado' : 'Keywords=', 'northernlight' : 'qr=', 'answerbus' : '', 'search.com' : 'q=', 'google_image' : '(p|q|as_p|as_q)=', 'jumpy\.it' : 'searchWord=', 'gazetapl' : 'slowo=', 'yahoo' : 'p=', 'hotbot' : 'mt=', 'metabot' : 'st=', 'copernic' : 'web\/', 'kartoo' : '', 'metaspinner' : 'qry=', 'toile' : 'q=', 'aolde' : 'q=', 'blingo' : 'q=', 'askit' : '(ask|q)=', 'netscape' : 'search=', 'splut' : 'pattern=', 'looksmart' : 'key=', 'sphere' : 'q=', 'sol' : 'q=', 'miragono' : '(txtsearch|qry)=', 'kataweb' : 'q=', 'ofir' : 'querytext=', 'aliceitmaster' : 'qs=', 'miragofr' : '(txtsearch|qry)=', 'spray' : 'string=', 'seznam' : '(w|q)=', 'interiapl' : 'q=', 'euroseek' : 'query=', 'schoenerbrausen' : 'q=', 'centrum' : 'q=', 'netsprintpl' : 'q=', 'go2net' : 'general=', 'katalogonetpl' : 'qt=', 'ukindex' : 'stext=', 'shawca' : 'q=', 'szukaczpl' : 'q=', 'accoona' : 'qt=', 'live' : 'q=', 'google4counter' : '(p|q|as_p|as_q)=', 'iask' : '(w|k)=', 'earthlink' : 'q=', 'tiscali' : 'key=', 'askes' : '(ask|q)=', 'gotuneed' : '', 'clubinternet' : 'q=', 'redbox' : 'srch=', 'delicious' : 'all=', 'chellofr' : 'q1=', 'lycos' : 'query=', 'sympatico' : 'query=', 'vivisimo' : 'query=', 'bluewin' : 'qry=', 'mysearch' : 'searchfor=', 'google_cache' : '(p|q|as_p|as_q)=cache:[0-9A-Za-z]{12}:', 'ukplus' : 'search=', 'gerypl' : 'q=', 'keresolap_hu' : 'q=', 'abacho' : 'q=', 'engine' : 'p1=', 'opasia' : 'q=', 'wp' : 'szukaj=', 'steadysearch' : 'w=', 'chellopl' : 'q1=', 'voila' : '(kw|rdata)=', 'aport' : 'r=', 'internetto' : 'searchstr=', 'passagen' : 'q=', 'wwweasel' : 'q=', 'najdi' : 'dotaz=', 'alexa' : 'q=', 'baidu' : '(wd|word)=', 'spotjockey' : 'Search_Keyword=', 'virgilio' : 'qs=', 'orbis' : 'search_field=', 'tango_hu' : 'q=', 'askjp' : '(ask|q)=', 'bungeebonesdotcom' : 'query=', 'francite' : 'name=', 'searchch' : 'q=', 'google_froogle' : '(p|q|as_p|as_q)=', 'excite' : 'search=', 'infospace' : 'qkw=', 'polskapl' : 'qt=', 'swik' : 'swik\.net/', 'edderkoppen' : 'query=', 'mywebsearch' : 'searchfor=', 'danielsen' : 'q=', 'wahoo' : 'q=', 'sogou' : 'query=', 'miragonl' : '(txtsearch|qry)=', 'findarticles' : 'key='} +search_engines_knwown_url = {'clusty' : 'query=', 'mywebsearch' : 'searchfor=', 'o2pl' : 'qt=', 'jubii' : 'soegeord=', 'finddk' : 'words=', 'chellono' : 'q1=', 'search.com' : 'q=', 'askuk' : '(ask|q)=', 'iminent' : 'q=', 'earthlink' : 'q=', 'passagen' : 'q=', 'miragobe' : '(txtsearch|qry)=', 'miragoit' : '(txtsearch|qry)=', 'danielsen' : 'q=', 'askde' : '(ask|q)=', 'looksmartuk' : 'key=', 'orbis' : 'search_field=', 'chellocz' : 'q1=', 'nusearch' : 'nusearch_terms=', 'searchmobileonline' : 'q=', 'avantfind' : 'keywords=', 'kartoo' : '', 'asknl' : '(ask|q)=', 'chellose' : 'q1=', 'teoma' : 'q=', 'bungeebonesdotcom' : 'query=', 'metacrawler_de' : 'qry=', '1klik' : 'query=', 'bing' : 'q=', 'mysearch' : 'searchfor=', 'aolsearch' : 'q=', 'yahoo_mindset' : 'p=', 'go' : 'qt=', 'wisenut' : 'query=', 'chellohu' : 'q1=', 'iune' : '(keywords|q)=', 'francite' : 'name=', 'gmxsuche' : 'q=', 'benefind' : 'q=', 'o2aolde' : 'q=', 'jyxo' : '(s|q)=', 'chellopl' : 'q1=', 'schoenerbrausen' : 'q=', 'findarticles' : 'key=', 'looksmart' : 'key=', 'conduit' : 'q=', 'google4counter' : '(p|q|as_p|as_q)=', 'google_image' : '(p|q|as_p|as_q)=', 'spray' : 'string=', 'baidu' : '(wd|word)=', 'mamma' : 'query=', 'chelloat' : 'q1=', 'ixquick' : 'query=', 'heureka' : 'heureka=', '3721' : '(p|name)=', 'questionanswering' : '', 'live' : 'q=', 'kataweb' : 'q=', 'aliceit' : 'qs=', 'google_products' : '(p|q|as_p|as_q)=', 'euroseek' : 'query=', 'sympatico' : 'query=', 'go2net' : 'general=', 'accoona' : 'qt=', 'netease' : 'q=', 'redbox' : 'srch=', 'sol' : 'q=', 'goodsearch' : 'Keywords=', 'miragoch' : '(txtsearch|qry)=', 'seznam' : '(w|q)=', 'chellonl' : 'q1=', 'start' : 'q=', 'zhongsou' : '(word|w)=', 'ecosiasearch' : 'q=', 'katalogonetpl' : 'qt=', 'nortonsavesearch' : 'q=', 'aolsuche' : 'q=', 'att' : 'qry=', 'delicious' : 'all=', 'origo' : '(q|search)=', 'jumpy\.it' : 'searchWord=', 'wwweasel' : 'q=', 'aliceitmaster' : 'qs=', 'sagool' : 'q=', 'flipora' : 'q=', 'gerypl' : 'q=', 'miragode' : '(txtsearch|qry)=', 'nbci' : 'keyword=', 'searchresults' : 'q=', 'centrum' : 'q=', 'engine' : 'p1=', 'tango_hu' : 'q=', 'netluchs' : 'query=', 'delta-search' : 'q=', 'icerocket' : 'q=', 'spotjockey' : 'Search_Keyword=', 'northernlight' : 'qr=', 'dodajpl' : 'keyword=', 'google_base' : '(p|q|as_p|as_q)=', 'google_cache' : '(p|q|as_p|as_q)=cache:[0-9A-Za-z]{12}:', 'blingo' : 'q=', 'google_froogle' : '(p|q|as_p|as_q)=', 'abacho' : 'q=', 'holasearch' : 'q=', 'fireball' : 'q=', 'google' : '(p|q|as_p|as_q)=', 'terra' : 'query=', 't-online' : 'q=', 'aol' : 'query=', 'lycos' : 'query=', 'edderkoppen' : 'query=', 'hogapl' : 'qt=', 'yandex' : 'text=', 'askjp' : '(ask|q)=', 'dmoz' : 'search=', 'chellocom' : 'q1=', 'goliat' : 'KERESES=', 'hotbot' : 'mt=', 'ukplus' : 'search=', 'anzwers' : 'search=', 'aport' : 'r=', 'avgsearch' : 'q=', 'centraldatabase' : 'query=', 'ilse' : 'search_for=', 'miragofr' : '(txtsearch|qry)=', 'metacrawler' : 'general=', 'infoseek' : 'qt=', 'aolde' : 'q=', 'interiapl' : 'q=', 'dogpile' : 'q(|kw)=', 'google_groups' : 'group\/', 'biglotron' : 'question=', 'startxxl' : 'q=', 'alexa' : 'q=', 'iask' : '(w|k)=', 'clarosearch' : 'q=', 'atlas' : '(searchtext|q)=', 'wahoo' : 'q=', 'toile' : 'q=', 'sphere' : 'q=', 'metabot' : 'st=', 'scroogle' : 'Gw=', 'mirago' : '(txtsearch|qry)=', 'alltheweb' : 'q(|uery)=', 'yahoo' : 'p=', 'pogodak' : 'q=', 'ukindex' : 'stext=', 'gazetapl' : 'slowo=', 'genieo' : 'q=', 'szukaczpl' : 'q=', 'steadysearch' : 'w=', 'gotuneed' : '', 'miner' : 'q=', 'virgilio' : 'qs=', 'miragodk' : '(txtsearch|qry)=', 'mozbot' : 'q=', 'wp' : 'szukaj=', 'swik' : 'swik\.net/', 'wowpl' : 'q=', 'najdi' : 'dotaz=', 'keresolap_hu' : 'q=', 'vivisimo' : 'query=', 'polymeta_hu' : '', 'kvasir' : 'q=', 'babylon' : 'q=', 'icq' : 'q=', 'comettoolbar' : 'qry=', 'vindex' : 'in=', 'atomz' : 'sp-q=', 'fbdownloader' : 'q=', 'ledix' : 'q=', 'ofir' : 'querytext=', 'chellobe' : 'q1=', 'answerbus' : '', 'miragoes' : '(txtsearch|qry)=', 'sogou' : 'query=', 'segnalo' : '', 'voila' : '(kw|rdata)=', 'msn' : 'q=', 'quick' : 'query=', 'webcrawler' : 'searchText=', 'searchch' : 'q=', 'internetto' : 'searchstr=', 'sify' : 'keyword=', 'arianna' : 'query=', 'splut' : 'pattern=', 'enirose' : 'q=', 'netscape' : 'search=', 'godado' : 'Keywords=', 'stumbleupon' : '', 'overture' : 'keywords=', 'netsprintpl' : 'q=', 'rambler' : 'words=', 'freeserve' : 'q=', 'chellofr' : 'q1=', 'askes' : '(ask|q)=', 'copernic' : 'web\/', 'polskapl' : 'qt=', 'bluewin' : 'qry=', 'ineffabile' : '', 'miragose' : '(txtsearch|qry)=', 'supereva' : 'q=', 'miragonl' : '(txtsearch|qry)=', 'metaspinner' : 'qry=', 'excite' : 'search=', 'miragocouk' : '(txtsearch|qry)=', 'ukdirectory' : 'k=', 'ask' : '(ask|q)=', 'opasia' : 'q=', 'bbc' : 'q=', 'a9' : 'a9\.com\/', 'eniro' : 'q=', 'askfr' : '(ask|q)=', 'askit' : '(ask|q)=', 'teecnoit' : 'q=', 'shawca' : 'q=', 'digg' : 's=', 'webde' : 'su=', 'searchalot' : 'q=', 'soso' : 'q=', 'shinyseek\.it' : 'KEY=', 'vnet' : 'kw=', 'clubinternet' : 'q=', 'miragono' : '(txtsearch|qry)=', 'chellosk' : 'q1=', 'tiscali' : 'key=', 'onetpl' : 'qt=', 'haku' : 'w=', 'altavista' : 'q=', 'infospace' : 'qkw=', 'searchy' : 'search_term=', 'vi-view' : 'q=', 'jws' : 'q=', 'WOW' : 'q=', 'Omiga-plus' : 'q=', 'Qwant' : 'q=', 'Windows Search' : 'q=', 'Bueno Search' : 'q=', 'Orange' : 'kw='} + +operating_systems = ['windows[_+ ]?2005', 'windows[_+ ]nt[_+ ]6\.0', 'windows[_+ ]?2008', 'windows[_+ ]nt[_+ ]6\.1', 'windows[_+ ]?2012', 'windows[_+ ]nt[_+ ]6\.2', 'windows[_+ ]?vista', 'windows[_+ ]nt[_+ ]6', 'windows[_+ ]?2003', 'windows[_+ ]nt[_+ ]5\.2', 'windows[_+ ]xp', 'windows[_+ ]nt[_+ ]5\.1', 'windows[_+ ]me', 'win[_+ ]9x', 'windows[_+ ]?2000', 'windows[_+ ]nt[_+ ]5', 'winnt', 'windows[_+ \-]?nt', 'win32', 'win(.*)98', 'win(.*)95', 'win(.*)16', 'windows[_+ ]3', 'win(.*)ce', 'mac[_+ ]os[_+ ]x[_+ ]10[_\.]9', 'mac[_+ ]os[_+ ]x[_+ ]10[_\.]8', 'mac[_+ ]os[_+ ]x[_+ ]10[_\.]7', 'mac[_+ ]os[_+ ]x[_+ ]10[_\.]6', 'mac[_+ ]os[_+ ]x[_+ ]10[_\.]5', 'mac[_+ ]os[_+ ]x[_+ ]10[_\.]4', 'mac[_+ ]os[_+ ]x', 'mac[_+ ]?p', 'mac[_+ ]68', 'macweb', 'macintosh', 'linux(.*)android', 'linux(.*)asplinux', 'linux(.*)centos', 'linux(.*)debian', 'linux(.*)fedora', 'linux(.*)gentoo', 'linux(.*)mandr', 'linux(.*)momonga', 'linux(.*)pclinuxos', 'linux(.*)red[_+ ]hat', 'linux(.*)suse', 'linux(.*)ubuntu', 'linux(.*)vector', 'linux(.*)vine', 'linux(.*)white\sbox', 'linux(.*)zenwalk', 'linux', 'gnu.hurd', 'bsdi', 'gnu.kfreebsd', 'freebsd', 'openbsd', 'netbsd', 'dragonfly', 'aix', 'sunos', 'irix', 'osf', 'hp\-ux', 'unix', 'x11', 'gnome\-vfs', 'beos', 'os/2', 'amiga', 'atari', 'vms', 'commodore', 'qnx', 'inferno', 'palmos', 'syllable', 'blackberry', 'cp/m', 'crayos', 'dreamcast', 'iphone[_+ ]os', 'risc[_+ ]?os', 'symbian', 'webtv', 'playstation', 'xbox', 'wii', 'vienna', 'newsfire', 'applesyndication', 'akregator', 'plagger', 'syndirella', 'j2me', 'java', 'microsoft', 'msie[_+ ]', 'ms[_+ ]frontpage', 'windows'] + +operating_systems_hashid = {'qnx' : 'qnx', 'blackberry' : 'blackberry', 'linux(.*)suse' : 'linuxsuse', 'linux(.*)white\sbox' : 'linuxwhitebox', 'amiga' : 'amigaos', 'java' : 'java', 'linux(.*)momonga' : 'linuxmomonga', 'msie[_+ ]' : 'winunknown', 'symbian' : 'symbian', 'microsoft' : 'winunknown', 'beos' : 'beos', 'win(.*)ce' : 'wince', 'applesyndication' : 'macosx', 'playstation' : 'psp', 'windows[_+ ]me' : 'winme', 'gnu.hurd' : 'gnu', 'gnu.kfreebsd' : 'bsdkfreebsd', 'windows[_+ ]nt[_+ ]6' : 'winvista', 'syllable' : 'syllable', 'openbsd' : 'bsdopenbsd', 'unix' : 'unix', 'windows[_+ ]nt[_+ ]5\.2' : 'win2003', 'linux(.*)android' : 'linuxandroid', 'windows[_+ ]nt[_+ ]5\.1' : 'winxp', 'mac[_+ ]os[_+ ]x' : 'macosx', 'gnome\-vfs' : 'unix', 'windows[_+ ]nt[_+ ]6\.0' : 'winlong', 'palmos' : 'palmos', 'windows[_+ ]nt[_+ ]6\.1' : 'win7', 'sunos' : 'sunos', 'windows[_+ ]?2005' : 'winlong', 'newsfire' : 'macosx', 'vms' : 'vms', 'risc[_+ ]?os' : 'riscos', 'linux' : 'linux', 'ms[_+ ]frontpage' : 'winunknown', 'vienna' : 'macosx', 'mac[_+ ]68' : 'macintosh', 'mac[_+ ]os[_+ ]x[_+ ]10[_\.]7' : 'macosx7', 'windows[_+ ]nt[_+ ]5' : 'win2000', 'syndirella' : 'winxp', 'wii' : 'wii', 'irix' : 'irix', 'dragonflybsd' : 'bsddflybsd', 'windows' : 'winunknown', 'atari' : 'atari', 'netbsd' : 'bsdnetbsd', 'macintosh' : 'macintosh', 'plagger' : 'unix', 'x11' : 'unix', 'linux(.*)zenwalk' : 'linuxzenwalk', 'crayos' : 'crayos', 'dreamcast' : 'dreamcast', 'linux(.*)vine' : 'linuxvine', 'osf' : 'osf', 'akregator' : 'linux', 'mac[_+ ]os[_+ ]x[_+ ]10[_\.]8' : 'macosx8', 'mac[_+ ]os[_+ ]x[_+ ]10[_\.]6' : 'macosx6', 'win(.*)95' : 'win95', 'windows[_+ ]?vista' : 'winvista', 'os/2' : 'os/2', 'linux(.*)debian' : 'linuxdebian', 'webtv' : 'webtv', 'win[_+ ]9x' : 'winme', 'aix' : 'aix', 'cp/m' : 'cp/m', 'linux(.*)red[_+ ]hat' : 'linuxredhat', 'win(.*)16' : 'win16', 'mac[_+ ]os[_+ ]x[_+ ]10[_\.]5' : 'macosx5', 'linux(.*)asplinux' : 'linuxasplinux', 'inferno' : 'inferno', 'win(.*)98' : 'win98', 'bsdi' : 'bsdi', 'windows[_+ ]?2008' : 'win2008', 'freebsd' : 'bsdfreebsd', 'hp\-ux' : 'hp\-ux', 'windows[_+ ]xp' : 'winxp', 'commodore' : 'commodore', 'mac[_+ ]os[_+ ]x[_+ ]10[_\.]9' : 'macosx9', 'xbox' : 'winxbox', 'windows[_+ \-]?nt' : 'winnt', 'linux(.*)gentoo' : 'linuxgentoo', 'windows[_+ ]?2012' : 'win2012', 'macweb' : 'macintosh', 'winnt' : 'winnt', 'linux(.*)fedora' : 'linuxfedora', 'iphone[_+ ]os' : 'ios', 'win32' : 'winnt', 'windows[_+ ]?2000' : 'win2000', 'linux(.*)pclinuxos' : 'linuxpclinuxos', 'j2me' : 'j2me', 'windows[_+ ]3' : 'win16', 'linux(.*)vector' : 'linuxvector', 'mac[_+ ]?p' : 'macintosh', 'mac[_+ ]os[_+ ]x[_+ ]10[_\.]4' : 'macosx4', 'windows[_+ ]?2003' : 'win2003', 'linux(.*)mandr' : 'linuxmandr', 'linux(.*)ubuntu' : 'linuxubuntu', 'linux(.*)centos' : 'linuxcentos', 'windows[_+ ]nt[_+ ]6\.2' : 'win8'} + +operating_systems_family = {'linux' : 'Linux', 'bsd' : 'BSD', 'win' : 'Windows', 'mac' : 'Macintosh'} + +browsers = ['elinks', 'firebird', 'go!zilla', 'icab', 'links', 'lynx', 'omniweb', '22acidownload', 'abrowse', 'aol\-iweng', 'amaya', 'amigavoyager', 'arora', 'aweb', 'charon', 'donzilla', 'seamonkey', 'flock', 'minefield', 'bonecho', 'granparadiso', 'songbird', 'strata', 'sylera', 'kazehakase', 'prism', 'icecat', 'iceape', 'iceweasel', 'w3clinemode', 'bpftp', 'camino', 'chimera', 'cyberdog', 'dillo', 'xchaos_arachne', 'doris', 'dreamcast', 'xbox', 'downloadagent', 'ecatch', 'emailsiphon', 'encompass', 'epiphany', 'friendlyspider', 'fresco', 'galeon', 'flashget', 'freshdownload', 'getright', 'leechget', 'netants', 'headdump', 'hotjava', 'ibrowse', 'intergo', 'k\-meleon', 'k\-ninja', 'linemodebrowser', 'lotus\-notes', 'macweb', 'multizilla', 'ncsa_mosaic', 'netcaptor', 'netpositive', 'nutscrape', 'msfrontpageexpress', 'contiki', 'emacs\-w3', 'phoenix', 'shiira', 'tzgeturl', 'viking', 'webfetcher', 'webexplorer', 'webmirror', 'webvcr', 'qnx\svoyager', 'cloudflare', 'grabber', 'teleport', 'webcapture', 'webcopier', 'real', 'winamp', 'windows\-media\-player', 'audion', 'freeamp', 'itunes', 'jetaudio', 'mint_audio', 'mpg123', 'mplayer', 'nsplayer', 'qts', 'quicktime', 'sonique', 'uplayer', 'xaudio', 'xine', 'xmms', 'gstreamer', 'abilon', 'aggrevator', 'aiderss', 'akregator', 'applesyndication', 'betanews_reader', 'blogbridge', 'cyndicate', 'feeddemon', 'feedreader', 'feedtools', 'greatnews', 'gregarius', 'hatena_rss', 'jetbrains_omea', 'liferea', 'netnewswire', 'newsfire', 'newsgator', 'newzcrawler', 'plagger', 'pluck', 'potu', 'pubsub\-rss\-reader', 'pulpfiction', 'rssbandit', 'rssreader', 'rssowl', 'rss\sxpress', 'rssxpress', 'sage', 'sharpreader', 'shrook', 'straw', 'syndirella', 'vienna', 'wizz\srss\snews\sreader', 'alcatel', 'lg\-', 'mot\-', 'nokia', 'panasonic', 'philips', 'sagem', 'samsung', 'sie\-', 'sec\-', 'sonyericsson', 'ericsson', 'mmef', 'mspie', 'vodafone', 'wapalizer', 'wapsilon', 'wap', 'webcollage', 'up\.', 'android', 'blackberry', 'cnf2', 'docomo', 'ipcheck', 'iphone', 'portalmmm', 'webtv', 'democracy', 'cjb\.net', 'ossproxy', 'smallproxy', 'adobeair', 'apt', 'analogx_proxy', 'gnome\-vfs', 'neon', 'curl', 'csscheck', 'httrack', 'fdm', 'javaws', 'wget', 'fget', 'chilkat', 'webdownloader\sfor\sx', 'w3m', 'wdg_validator', 'w3c_validator', 'jigsaw', 'webreaper', 'webzip', 'staroffice', 'gnus', 'nikto', 'download\smaster', 'microsoft\-webdav\-miniredir', 'microsoft\sdata\saccess\sinternet\spublishing\sprovider\scache\smanager', 'microsoft\sdata\saccess\sinternet\spublishing\sprovider\sdav', 'POE\-Component\-Client\-HTTP', 'mozilla', 'libwww', 'lwp', 'WebSec'] + +browsers_hashid = {'sie\-' : 'SIE (PDA/Phone browser)', 'gnus' : 'Gnus Network User Services', 'webcopier' : 'WebCopier', 'nokia' : 'Nokia Browser (PDA/Phone browser)', 'feedtools' : 'FeedTools (RSS Reader)', 'iceape' : 'GNU IceApe', 'xbox' : 'XBoX', 'lotus\-notes' : 'Lotus Notes web client', 'konqueror' : 'Konqueror', 'hatena_rss' : 'Hatena (RSS Reader)', 'feeddemon' : 'FeedDemon (RSS Reader)', 'bpftp' : 'BPFTP', 'macweb' : 'MacWeb', 'sonyericsson' : 'Sony/Ericsson Browser (PDA/Phone browser)', 'straw' : 'Straw (RSS Reader)', 'democracy' : 'Democracy', 'emacs\-w3' : 'Emacs/w3s', 'xaudio' : 'Some XAudio Engine based MPEG player (media player)', 'android' : 'Android browser (PDA/Phone browser)', 'linemodebrowser' : 'W3C Line Mode Browser', 'sylera' : 'Sylera', 'jetaudio' : 'JetAudio (media player)', 'alcatel' : 'Alcatel Browser (PDA/Phone browser)', 'amaya' : 'Amaya', 'k\-meleon' : 'K-Meleon', 'netnewswire' : 'NetNewsWire (RSS Reader)', 'jetbrains_omea' : 'Omea (RSS Reader)', 'windows\-media\-player' : 'Windows Media Player (media player)', 'sage' : 'Sage (RSS Reader)', 'netpositive' : 'NetPositive', 'webvcr' : 'WebVCR', 'rssbandit' : 'RSS Bandit (RSS Reader)', 'wapalizer' : 'WAPalizer (PDA/Phone browser)', 'arora' : 'Arora', 'w3c_validator' : 'W3C Validator', 'netcaptor' : 'NetCaptor', 'audion' : 'Audion (media player)', 'sec\-' : 'Sony/Ericsson (PDA/Phone browser)', 'lynx' : 'Lynx', 'aggrevator' : 'Aggrevator (RSS Reader)', 'cjb\.net' : 'CJB.NET Proxy', 'feedreader' : 'FeedReader (RSS Reader)', 'itunes' : 'Apple iTunes (media player)', 'bonecho' : 'BonEcho (Firefox 2.0 development)', 'mozilla' : 'Mozilla', 'ericsson' : 'Ericsson Browser (PDA/Phone browser)', 'phoenix' : 'Phoenix', 'grabber' : 'Grabber', 'dillo' : 'Dillo', 'charon' : 'Charon', 'prism' : 'Prism', 'apt' : 'Debian APT', 'wdg_validator' : 'WDG HTML Validator', 'msfrontpageexpress' : 'MS FrontPage Express', 'newzcrawler' : 'NewzCrawler (RSS Reader)', 'mint_audio' : 'Mint Audio (media player)', 'abilon' : 'Abilon (RSS Reader)', 'adobeair' : 'AdobeAir', 'microsoft\-webdav\-miniredir' : 'Microsoft Data Access Component Internet Publishing Provider', 'staroffice' : 'StarOffice', '22acidownload' : '22AciDownload', 'microsoft\sdata\saccess\sinternet\spublishing\sprovider\scache\smanager' : 'Microsoft Data Access Component Internet Publishing Provider Cache Manager', 'seamonkey' : 'SeaMonkey', 'friendlyspider' : 'FriendlySpider', 'shrook' : 'Shrook (RSS Reader)', 'mspie' : 'MS Pocket Internet Explorer (PDA/Phone browser)', 'blogbridge' : 'BlogBridge (RSS Reader)', 'fresco' : 'ANT Fresco', 'wizz\srss\snews\sreader' : 'Wizz RSS News Reader (RSS Reader)', 'docomo' : 'I-Mode phone (PDA/Phone browser)', 'winamp' : 'WinAmp (media player)', 'webtv' : 'WebTV browser', 'freshdownload' : 'FreshDownload', 'ecatch' : 'eCatch', 'webzip' : 'WebZIP', 'sonique' : 'Sonique (media player)', 'contiki' : 'Contiki', 'pluck' : 'Pluck (RSS Reader)', 'webcollage' : 'WebCollage (PDA/Phone browser)', 'netants' : 'NetAnts', 'wget' : 'Wget', 'webexplorer' : 'IBM-WebExplorer', 'sagem' : 'Sagem (PDA/Phone browser)', 'wap' : 'Unknown WAP browser (PDA/Phone browser)', 'nutscrape' : 'Nutscrape', 'svn' : 'Subversion client', 'plagger' : 'Plagger (RSS Reader)', 'hotjava' : 'Sun HotJava', 'gstreamer' : 'GStreamer (media library)', 'aiderss' : 'AideRSS (RSS Reader)', 'opera' : 'Opera', 'dreamcast' : 'Dreamcast', 'analogx_proxy' : 'AnalogX Proxy', 'webcapture' : 'Acrobat Webcapture', 'doris' : 'Doris (for Symbian)', 'flashget' : 'FlashGet', 'downloadagent' : 'DownloadAgent', 'portalmmm' : 'I-Mode phone (PDA/Phone browser)', 'songbird' : 'Songbird', 'firebird' : 'Firebird (Old Firefox)', 'newsgator' : 'NewsGator (RSS Reader)', 'javaws' : 'Java Web Start', 'microsoft\sdata\saccess\sinternet\spublishing\sprovider\sdav' : 'Microsoft Data Access Component Internet Publishing Provider DAV', 'iceweasel' : 'Iceweasel', 'uplayer' : 'Ultra Player (media player)', 'getright' : 'GetRight', 'chrome' : 'Google Chrome', 'ipcheck' : 'Supervision IP Check (phone)', 'xmms' : 'XMMS (media player)', 'akregator' : 'Akregator (RSS Reader)', 'w3m' : 'w3m', 'cyndicate' : 'Cyndicate (RSS Reader)', 'nsplayer' : 'NetShow Player (media player)', 'mplayer' : 'The Movie Player (media player)', 'elinks' : 'ELinks', 'mmef' : 'Microsoft Mobile Explorer (PDA/Phone browser)', 'greatnews' : 'GreatNews (RSS Reader)', 'go!zilla' : 'Go!Zilla', 'jigsaw' : 'W3C Validator', 'minefield' : 'Minefield (Firefox 3.0 development)', 'icab' : 'iCab', 'ossproxy' : 'OSSProxy', 'shiira' : 'Shiira', 'rssxpress' : 'RSSXpress (RSS Reader)', 'webmirror' : 'WebMirror', 'gregarius' : 'Gregarius (RSS Reader)', 'syndirella' : 'Syndirella (RSS Reader)', 'libwww' : 'LibWWW', 'icecat' : 'GNU IceCat', 'epiphany' : 'Epiphany', 'xchaos_arachne' : 'Arachne', 'flock' : 'Flock', 'k\-ninja' : 'K-Ninja', 'cnf2' : 'Supervision I-Mode ByTel (phone)', 'leechget' : 'LeechGet', 'webfetcher' : 'WebFetcher', 'sharpreader' : 'SharpReader (RSS Reader)', 'strata' : 'Strata', 'ncsa_mosaic' : 'NCSA Mosaic', 'lwp' : 'LibWWW-perl', 'fget' : 'FGet', 'webreaper' : 'WebReaper', 'philips' : 'Philips Browser (PDA/Phone browser)', 'intergo' : 'InterGO', 'fdm' : 'FDM Free Download Manager', 'newsfire' : 'NewsFire (RSS Reader)', 'donzilla' : 'Donzilla', 'cyberdog' : 'Cyberdog', 'w3clinemode' : 'W3CLineMode', 'aweb' : 'AWeb', 'rss\sxpress' : 'RSS Xpress (RSS Reader)', 'netscape' : 'Netscape', 'firefox' : 'Firefox', 'vienna' : 'Vienna (RSS Reader)', 'curl' : 'Curl', 'lg\-' : 'LG (PDA/Phone browser)', 'liferea' : 'Liferea (RSS Reader)', 'rssreader' : 'RssReader (RSS Reader)', 'quicktime' : 'QuickTime (media player)', 'rssowl' : 'RSSOwl (RSS Reader)', 'potu' : 'Potu (RSS Reader)', 'real' : 'Real player or compatible (media player)', 'kazehakase' : 'Kazehakase', 'amigavoyager' : 'AmigaVoyager', 'nikto' : 'Nikto Web Scanner', 'samsung' : 'Samsung (PDA/Phone browser)', 'camino' : 'Camino', 'headdump' : 'HeadDump', 'mpg123' : 'mpg123 (media player)', 'ibrowse' : 'iBrowse', 'httrack' : 'HTTrack', 'betanews_reader' : 'Betanews Reader (RSS Reader)', 'multizilla' : 'MultiZilla', 'csscheck' : 'WDG CSS Validator', 'chilkat' : 'Chilkat', 'gnome\-vfs' : 'Gnome FileSystem Abstraction library', 'qnx\svoyager' : 'QNX Voyager', 'xine' : 'Xine, a free multimedia player (media player)', 'wapsilon' : 'WAPsilon (PDA/Phone browser)', 'omniweb' : 'OmniWeb', 'qts' : 'QuickTime (media player)', 'iphone' : 'IPhone (PDA/Phone browser)', 'download\smaster' : 'Download Master', 'chimera' : 'Chimera (Old Camino)', 'WebSec' : 'Web Secretary', 'viking' : 'Viking', 'links' : 'Links', 'galeon' : 'Galeon', 'aol\-iweng' : 'AOL-Iweng', 'neon' : 'Neon HTTP and WebDAV client library', 'blackberry' : 'BlackBerry (PDA/Phone browser)', 'POE\-Component\-Client\-HTTP' : 'HTTP user-agent for POE (portable networking framework for Perl)', 'emailsiphon' : 'EmailSiphon', 'pulpfiction' : 'PulpFiction (RSS Reader)', 'panasonic' : 'Panasonic Browser (PDA/Phone browser)', 'msie' : 'MS Internet Explorer', 'encompass' : 'Encompass', 'tzgeturl' : 'TzGetURL', 'up\.' : 'UP.Browser (PDA/Phone browser)', 'safari' : 'Safari', 'vodafone' : 'Vodaphone browser (PDA/Phone browser)', 'smallproxy' : 'SmallProxy', 'webdownloader\sfor\sx' : 'Downloader for X', 'cloudflare' : 'CloudFlare', 'freeamp' : 'FreeAmp (media player)', 'applesyndication' : 'AppleSyndication (RSS Reader)', 'teleport' : 'TelePort Pro', 'abrowse' : 'ABrowse', 'mot\-' : 'Motorola Browser (PDA/Phone browser)', 'granparadiso' : 'GranParadiso (Firefox 3.0 development)', 'pubsub\-rss\-reader' : 'PubSub (RSS Reader)'} + +browsers_icons = {'staroffice' : 'staroffice', 'microsoft\sdata\saccess\sinternet\spublishing\sprovider\scache\smanager' : 'frontpage', 'seamonkey' : 'seamonkey', 'abilon' : 'abilon', 'adobeair' : 'adobe', 'microsoft\-webdav\-miniredir' : 'frontpage', 'mspie' : 'pdaphone', 'avantbrowser' : 'avant', 'shrook' : 'rss', 'prism' : 'mozilla', 'apt' : 'apt', 'mint_audio' : 'mediaplayer', 'msfrontpageexpress' : 'fpexpress', 'newzcrawler' : 'rss', 'wap' : 'pdaphone', 'svn' : 'subversion', 'winamp' : 'mediaplayer', 'docomo' : 'pdaphone', 'webtv' : 'webtv', 'ecatch' : 'ecatch', 'freshdownload' : 'freshdownload', 'webzip' : 'webzip', 'sonique' : 'mediaplayer', 'blogbridge' : 'rss', 'fresco' : 'fresco', 'wizz\srss\snews\sreader' : 'wizz', 'sagem' : 'pdaphone', 'pluck' : 'rss', 'webcollage' : 'pdaphone', 'flashget' : 'flashget', 'analogx_proxy' : 'analogx', 'webcapture' : 'adobe', 'doris' : 'doris', 'javaws' : 'java', 'microsoft\sdata\saccess\sinternet\spublishing\sprovider\sdav' : 'frontpage', 'portalmmm' : 'pdaphone', 'songbird' : 'mozilla', 'newsgator' : 'rss', 'firebird' : 'phoenix', 'hotjava' : 'hotjava', 'aiderss' : 'rss', 'plagger' : 'rss', 'dreamcast' : 'dreamcast', 'microsoft\soffice\sprotocol\sdiscovery' : 'frontpage', 'opera' : 'opera', 'nsplayer' : 'netshow', 'mmef' : 'pdaphone', 'greatnews' : 'rss', 'mplayer' : 'mediaplayer', 'getright' : 'getright', 'chrome' : 'chrome', 'uplayer' : 'mediaplayer', 'iceweasel' : 'iceweasel', 'akregator' : 'rss', 'microsoft\sdata\saccess\sinternet\spublishing\sprovider\sprotocol\sdiscovery' : 'frontpage', 'xmms' : 'mediaplayer', 'hatena_rss' : 'rss', 'feeddemon' : 'rss', 'microsoft\soffice\sexistence\sdiscovery' : 'frontpage', 'sonyericsson' : 'pdaphone', 'straw' : 'rss', 'bpftp' : 'bpftp', 'macweb' : 'macweb', 'webcopier' : 'webcopier', 'gnus' : 'gnus', 'nokia' : 'pdaphone', 'feedtools' : 'rss', 'xbox' : 'winxbox', 'iceape' : 'mozilla', 'lotus\-notes' : 'lotusnotes', 'sie\-' : 'pdaphone', 'konqueror' : 'konqueror', 'netnewswire' : 'rss', 'k\-meleon' : 'kmeleon', 'alcatel' : 'pdaphone', 'amaya' : 'amaya', 'sage' : 'rss', 'windows\-media\-player' : 'mplayer', 'netpositive' : 'netpositive', 'jetbrains_omea' : 'rss', 'jetaudio' : 'mediaplayer', 'xaudio' : 'mediaplayer', 'android' : 'android', 'sylera' : 'mozilla', 'sec\-' : 'pdaphone', 'audion' : 'mediaplayer', 'lynx' : 'lynx', 'aggrevator' : 'rss', 'cjb\.net' : 'cjbnet', 'rssbandit' : 'rss', 'wapalizer' : 'pdaphone', 'dillo' : 'dillo', 'itunes' : 'mediaplayer', 'feedreader' : 'rss', 'bonecho' : 'firefox', 'mozilla' : 'mozilla', 'ericsson' : 'pdaphone', 'phoenix' : 'phoenix', 'grabber' : 'grabber', 'ibrowse' : 'ibrowse', 'httrack' : 'httrack', 'mpg123' : 'mediaplayer', 'multizilla' : 'multizilla', 'betanews_reader' : 'rss', 'samsung' : 'pdaphone', 'camino' : 'chimera', 'chimera' : 'chimera', 'iphone' : 'pdaphone', 'galeon' : 'galeon', 'gnome\-vfs' : 'gnome', 'omniweb' : 'omniweb', 'qts' : 'mediaplayer', 'xine' : 'mediaplayer', 'wapsilon' : 'pdaphone', 'msie' : 'msie', 'encompass' : 'encompass', 'panasonic' : 'pdaphone', 'up\.' : 'pdaphone', 'pulpfiction' : 'rss', 'neon' : 'neon', 'blackberry' : 'pdaphone', 'teleport' : 'teleport', 'mot\-' : 'pdaphone', 'granparadiso' : 'firefox', 'freeamp' : 'mediaplayer', 'applesyndication' : 'rss', 'pubsub\-rss\-reader' : 'rss', 'safari' : 'safari', 'vodafone' : 'pdaphone', 'rssxpress' : 'rss', 'gregarius' : 'rss', 'minefield' : 'firefox', 'go!zilla' : 'gozilla', 'icab' : 'icab', 'flock' : 'flock', 'leechget' : 'leechget', 'syndirella' : 'rss', 'icecat' : 'icecat', 'epiphany' : 'epiphany', 'donzilla' : 'mozilla', 'philips' : 'pdaphone', 'webreaper' : 'webreaper', 'newsfire' : 'rss', 'sharpreader' : 'rss', 'strata' : 'mozilla', 'ncsa_mosaic' : 'ncsa_mosaic', 'rssreader' : 'rss', 'rssowl' : 'rss', 'potu' : 'rss', 'vienna' : 'rss', 'lg\-' : 'pdaphone', 'liferea' : 'rss', 'kazehakase' : 'mozilla', 'amigavoyager' : 'amigavoyager', 'real' : 'real', 'aweb' : 'aweb', 'cyberdog' : 'cyberdog', 'netscape' : 'netscape', 'firefox' : 'firefox', 'rss\sxpress' : 'rss'} diff --git a/conf.py b/conf.py index e26b953..e06aa5b 100644 --- a/conf.py +++ b/conf.py @@ -1,7 +1,6 @@ -# -*- coding: utf-8 -*- # Web server log -analyzed_filename = 'access.log' +analyzed_filename = '/var/log/apache2/access.log.1,/var/log/apache2/access.log' # Domain name to analyze domain_name = 'soutade.fr' @@ -11,24 +10,28 @@ display_visitor_ip = True # Hooks used pre_analysis_hooks = ['page_to_hit', 'robots'] -post_analysis_hooks = ['referers', 'top_pages', 'top_downloads', 'top_hits']#, 'reverse_dns'] -display_hooks = ['top_visitors', 'all_visits', 'referers', 'top_pages', 'top_downloads', 'top_hits'] +post_analysis_hooks = ['referers', 'top_pages', 'top_downloads', 'operating_systems', 'browsers', 'feeds', 'hours_stats', 'reverse_dns'] +display_hooks = ['track_users', 'top_visitors', 'all_visits', 'referers', 'top_pages', 'top_downloads', 'referers_diff', 'operating_systems', 'browsers', 'feeds', 'hours_stats', 'top_downloads_diff'] # Reverse DNS timeout reverse_dns_timeout = 0.2 # Count this addresses as hit page_to_hit_conf = [r'^.+/logo[/]?$'] -## Count this addresses as page -hit_to_page_conf = [r'^.+/category/.+$', r'^.+/tag/.+$', r'^.+/archive/.+$', r'^.+/ljdc[/]?$'] +# Count this addresses as page +hit_to_page_conf = [r'^.+/category/.+$', r'^.+/tag/.+$', r'^.+/archive/.+$', r'^.+/ljdc[/]?$', r'^.+/source/tree/.*$', r'^.+/source/file/.*$', r'^.+/search/.+$'] # Because it's too long to build HTML when there is too much entries max_hits_displayed = 100 max_downloads_displayed = 100 -# Compress these files after generation -compress_output_files = ['html', 'css', 'js'] +compress_output_files = ['html', 'css', 'js', 'xml'] -# Display result in French -locale = 'fr' +#locale = 'fr' +# Tracked IP +tracked_ip = ['192.168.1.1'] + +feeds = [r'^.*/atom.xml$', r'^.*/rss.xml$'] + +multimedia_file_append = ['xml'] diff --git a/display.py b/display.py index cb98693..6be75df 100644 --- a/display.py +++ b/display.py @@ -52,6 +52,9 @@ class DisplayHTMLRaw(object): self._buildHTML() self._build(f, self.html) + def getTitle(self): + return '' + class DisplayHTMLBlock(DisplayHTMLRaw): def __init__(self, iwla, title=''): @@ -99,6 +102,21 @@ class DisplayHTMLBlockTable(DisplayHTMLBlock): self.rows.append(listToStr(row)) self.rows_cssclasses.append([u''] * len(row)) + def insertCol(self, col_number, col_title='', col_css_class=''): + self.cols.insert(col_number, col_title) + for r in self.rows: + r.insert(col_number, u'') + for r in self.rows_cssclasses: + v = r[0] + # If all cells have the same CSS class, set it + for cur_value in r: + if v != cur_value: + v = None + break + v = v or u'' + r.insert(col_number, v) + self.cols_cssclasses.insert(col_number, col_css_class) + def getNbRows(self): return len(self.rows) @@ -157,6 +175,20 @@ class DisplayHTMLBlockTable(DisplayHTMLBlock): self.cols_cssclasses = listToStr(values) + def computeRatio(self, column, column_insertion=None): + if column_insertion is None: + column_insertion = column+1 + + total = 0 + for r in self.rows: + if r[column]: + total += int(r[column]) + + self.insertCol(column_insertion, self.iwla._('Ratio'), u'iwla_hit') + for (index, r) in enumerate(self.rows): + val = r[column] and int(r[column]) or 0 + self.setCellValue(index, column_insertion, '%.1f%%' % (float(val*100)/float(total))) + def _buildHTML(self): style = u'' if self.table_css: style = u' class="%s"' % (self.table_css) @@ -226,7 +258,7 @@ class DisplayHTMLBlockTableWithGraph(DisplayHTMLBlockTable): elif style.startswith(u'iwla_visit'): icon = u'vv.png' else: return '' - return u'/%s/%s' % (self.icon_path, icon) + return u'/%s/other/%s' % (self.icon_path, icon) def _buildHTML(self): self._computeMax() @@ -287,7 +319,7 @@ class DisplayHTMLPage(object): def appendBlock(self, block): self.blocks.append(block) - def build(self, root): + def build(self, root, displayVersion=True): filename = os.path.join(root, self.filename) base = os.path.dirname(filename) @@ -305,11 +337,12 @@ class DisplayHTMLPage(object): f.write(u'' % (css)) if self.title: f.write(u'%s' % (self.title)) - f.write(u'') + f.write(u'') for block in self.blocks: block.build(f) - f.write(u'
Generated by IWLA %s
' % - ("http://indefero.soutade.fr/p/iwla", self.iwla.getVersion())) + if displayVersion: + f.write(u'
Generated by IWLA %s
' % + ("http://indefero.soutade.fr/p/iwla", self.iwla.getVersion())) f.write(u'') f.close() diff --git a/docs/index.md b/docs/index.md index 9fa98a3..0415eea 100644 --- a/docs/index.md +++ b/docs/index.md @@ -4,19 +4,21 @@ iwla Introduction ------------ -iwla (Intelligent Web Log Analyzer) is basically a clone of [awstats](http://www.awstats.org). The main problem with awstats is that it's a very monolothic project with everything in one big PERL file. In opposite, iwla has been though to be very modular : a small core analysis and a lot of filters. It can be viewed as UNIX pipes. Philosophy of iwla is : add, update, delete ! That's the job of each filter : modify statistics until final result. It's written in Python. +iwla (Intelligent Web Log Analyzer) is basically a clone of [awstats](http://www.awstats.org). The main problem with awstats is that it's a very monolithic project with everything in one big PERL file. In opposite, iwla has been though to be very modular : a small core analysis and a lot of filters. It can be viewed as UNIX pipes. Philosophy of iwla is : add, update, delete ! That's the job of each filter : modify statistics until final result. It's written in Python. Nevertheless, iwla is only focused on HTTP logs. It uses data (robots definitions, search engines definitions) and design from awstats. Moreover, it's not dynamic, but only generates static HTML page (with gzip compression option). Usage ----- - ./iwla [-c|--clean-output] [-i|--stdin] [-f FILE|--file FILE] [-d LOGLEVEL|--log-level LOGLEVEL] + ./iwla [-c|--clean-output] [-i|--stdin] [-f FILE|--file FILE] [-d LOGLEVEL|--log-level LOGLEVEL] [-r|--reset year/month] [-z|--dont-compress] -c : Clean output (database and HTML) before starting -i : Read data from stdin instead of conf.analyzed_filename - -f : Read data from FILE instead of conf.analyzed_filename + -f : Analyse this log file, multiple files can be specified (comma separated). gz files are acceptedRead data from FILE instead of conf.analyzed_filename -d : Loglevel in ['DEBUG', 'INFO', 'WARNING', 'ERROR', 'CRITICAL'] + -r : Reset analysis to a specific date (month/year) + -z : Don't compress databases (bigger but faster, not compatible with compressed databases) Basic usage ----------- @@ -32,6 +34,12 @@ Main values to edit are : * **display_hooks** : List of display hooks * **locale** : Displayed locale (_en_ or _fr_) +You can also append an element to an existing default configuration list by using "_append" suffix. Example : + multimedia_files_append = ['xml'] +or + multimedia_files_append = 'xml' +Will append 'xml' to current multimedia_files list + Then, you can launch iwla. Output HTML files are created in _output_ directory by default. To quickly see it, go into _output_ and type python -m SimpleHTTPServer 8000 @@ -90,6 +98,34 @@ Plugins Optional configuration values ends with *. + * iwla.py + * plugins/display/all_visits.py + * plugins/display/browsers.py + * plugins/display/feeds.py + * plugins/display/hours_stats.py + * plugins/display/istats_diff.py + * plugins/display/operating_systems.py + * plugins/display/referers.py + * plugins/display/referers_diff.py + * plugins/display/top_downloads.py + * plugins/display/top_downloads_diff.py + * plugins/display/top_hits.py + * plugins/display/top_pages.py + * plugins/display/top_visitors.py + * plugins/display/track_users.py + * plugins/post_analysis/browsers.py + * plugins/post_analysis/feeds.py + * plugins/post_analysis/hours_stats.py + * plugins/post_analysis/operating_systems.py + * plugins/post_analysis/referers.py + * plugins/post_analysis/reverse_dns.py + * plugins/post_analysis/top_downloads.py + * plugins/post_analysis/top_hits.py + * plugins/post_analysis/top_pages.py + * plugins/pre_analysis/page_to_hit.py + * plugins/pre_analysis/robots.py + + iwla ---- @@ -110,6 +146,7 @@ iwla DB_ROOT/meta.db DB_ROOT/year/month/iwla.db OUTPUT_ROOT/index.html + OUTPUT_ROOT/year/_stats.html OUTPUT_ROOT/year/month/index.html Statistics creation : @@ -156,6 +193,9 @@ iwla requests => [fields_from_format_log] extract_request => + http_method + http_uri + http_version extract_uri extract_parameters* extract_referer* => @@ -202,6 +242,139 @@ plugins.display.all_visits None +plugins.display.browsers +------------------------ + + Display hook + + Create browsers page + + Plugin requirements : + post_analysis/browsers + + Conf values needed : + max_browsers_displayed* + create_browsers_page* + + Output files : + OUTPUT_ROOT/year/month/browsers.html + OUTPUT_ROOT/year/month/index.html + + Statistics creation : + None + + Statistics update : + None + + Statistics deletion : + None + + +plugins.display.feeds +--------------------- + + Display hook + + Display feeds parsers + + Plugin requirements : + post_analysis/feeds + + Conf values needed : + create_all_feeds_page* + + Output files : + OUTPUT_ROOT/year/month/index.html + OUTPUT_ROOT/year/month/all_feeds.html + + Statistics creation : + None + + Statistics update : + None + + Statistics deletion : + None + + +plugins.display.hours_stats +--------------------------- + + Display hook + + Display statistics by hour/week day + + Plugin requirements : + post_analysis/hours_stats + + Conf values needed : + None + + Output files : + OUTPUT_ROOT/year/month/index.html + + Statistics creation : + None + + Statistics update : + None + + Statistics deletion : + None + + +plugins.display.istats_diff +--------------------------- + + Display hook interface + + Enlight new and updated statistics + + Plugin requirements : + None + + Conf values needed : + None + + Output files : + None + + Statistics creation : + None + + Statistics update : + None + + Statistics deletion : + None + + +plugins.display.operating_systems +--------------------------------- + + Display hook + + Add operating systems statistics + + Plugin requirements : + post_analysis/operating_systems + + Conf values needed : + create_families_page* + + Output files : + OUTPUT_ROOT/year/month/index.html + + Statistics creation : + None + + Statistics update : + None + + Statistics deletion : + None + + plugins.display.referers ------------------------ @@ -233,20 +406,102 @@ plugins.display.referers None -plugins.display.top_visitors ----------------------------- +plugins.display.referers_diff +----------------------------- Display hook - Create TOP visitors block + Enlight new and updated key phrases in in all_key_phrases.html Plugin requirements : - None + display/referers Conf values needed : - display_visitor_ip* + None Output files : + None + + Statistics creation : + None + + Statistics update : + None + + Statistics deletion : + None + + +plugins.display.top_downloads +----------------------------- + + Display hook + + Create TOP downloads page + + Plugin requirements : + post_analysis/top_downloads + + Conf values needed : + max_downloads_displayed* + create_all_downloads_page* + + Output files : + OUTPUT_ROOT/year/month/top_downloads.html + OUTPUT_ROOT/year/month/index.html + + Statistics creation : + None + + Statistics update : + None + + Statistics deletion : + None + + +plugins.display.top_downloads_diff +---------------------------------- + + Display hook + + Enlight new and updated downloads in in top_downloads.html + + Plugin requirements : + display/top_downloads + + Conf values needed : + None + + Output files : + None + + Statistics creation : + None + + Statistics update : + None + + Statistics deletion : + None + + +plugins.display.top_hits +------------------------ + + Display hook + + Create TOP hits page + + Plugin requirements : + post_analysis/top_hits + + Conf values needed : + max_hits_displayed* + create_all_hits_page* + + Output files : + OUTPUT_ROOT/year/month/top_hits.html OUTPUT_ROOT/year/month/index.html Statistics creation : @@ -287,22 +542,20 @@ plugins.display.top_pages None -plugins.display.top_hits ------------------------- +plugins.display.top_visitors +---------------------------- Display hook - Create TOP hits page + Create TOP visitors block Plugin requirements : - post_analysis/top_hits + None Conf values needed : - max_hits_displayed* - create_all_hits_page* + display_visitor_ip* Output files : - OUTPUT_ROOT/year/month/top_hits.html OUTPUT_ROOT/year/month/index.html Statistics creation : @@ -315,23 +568,23 @@ plugins.display.top_hits None -plugins.display.top_downloads ------------------------------ +plugins.display.track_users +--------------------------- Display hook - Create TOP downloads page + Track users Plugin requirements : - post_analysis/top_downloads + None Conf values needed : - max_downloads_displayed* - create_all_downloads_page* + tracked_ip + create_tracked_page* Output files : - OUTPUT_ROOT/year/month/top_downloads.html OUTPUT_ROOT/year/month/index.html + OUTPUT_ROOT/year/month/tracked_users.html Statistics creation : None @@ -343,6 +596,290 @@ plugins.display.top_downloads None +plugins.post_analysis.browsers +------------------------------ + + Post analysis hook + + Detect browser information from requests + + Plugin requirements : + None + + Conf values needed : + None + + Output files : + None + + Statistics creation : + visits : + remote_addr => + browser + + month_stats : + browsers => + browser => count + + Statistics update : + None + + Statistics deletion : + None + + +plugins.post_analysis.feeds +--------------------------- + + Post analysis hook + + Find feeds parsers (first hit in feeds conf value and no viewed pages if it's a robot) + If there is ony one hit per day to a feed, merge feeds parsers with the same user agent + as it must be the same person with a different IP address. + + Plugin requirements : + None + + Conf values needed : + feeds + merge_one_hit_only_feeds_parsers* + + Output files : + None + + Statistics creation : + remote_addr => + feed_parser + + Statistics update : + None + + Statistics deletion : + None + + +plugins.post_analysis.hours_stats +--------------------------------- + + Post analysis hook + + Count pages, hits and bandwidth by hour/week day + + Plugin requirements : + None + + Conf values needed : + None + + Output files : + None + + Statistics creation : + month_stats: + hours_stats => + 00 .. 23 => + pages + hits + bandwidth + + days_stats => + 0 .. 6 => + pages + hits + bandwidth + + Statistics update : + None + + Statistics deletion : + None + + +plugins.post_analysis.operating_systems +--------------------------------------- + + Post analysis hook + + Detect operating systems from requests + + Plugin requirements : + None + + Conf values needed : + None + + Output files : + None + + Statistics creation : + visits : + remote_addr => + operating_system + + month_stats : + operating_systems => + operating_system => count + + os_families => + family => count + + Statistics update : + None + + Statistics deletion : + None + + +plugins.post_analysis.referers +------------------------------ + + Post analysis hook + + Extract referers and key phrases from requests + + Plugin requirements : + None + + Conf values needed : + domain_name + + Output files : + None + + Statistics creation : + None + + Statistics update : + month_stats : + referers => + pages => count + hits => count + robots_referers => + pages => count + hits => count + search_engine_referers => + pages => count + hits => count + key_phrases => + phrase => count + + Statistics deletion : + None + + +plugins.post_analysis.reverse_dns +--------------------------------- + + Post analysis hook + + Replace IP by reverse DNS names + + Plugin requirements : + None + + Conf values needed : + reverse_dns_timeout* + + Output files : + None + + Statistics creation : + None + + Statistics update : + valid_visitors: + remote_addr + dns_name_replaced + dns_analyzed + + Statistics deletion : + None + + +plugins.post_analysis.top_downloads +----------------------------------- + + Post analysis hook + + Count TOP downloads + + Plugin requirements : + None + + Conf values needed : + None + + Output files : + None + + Statistics creation : + None + + Statistics update : + month_stats: + top_downloads => + uri => count + + Statistics deletion : + None + + +plugins.post_analysis.top_hits +------------------------------ + + Post analysis hook + + Count TOP hits + + Plugin requirements : + None + + Conf values needed : + None + + Output files : + None + + Statistics creation : + None + + Statistics update : + month_stats: + top_hits => + uri => count + + Statistics deletion : + None + + +plugins.post_analysis.top_pages +------------------------------- + + Post analysis hook + + Count TOP pages + + Plugin requirements : + None + + Conf values needed : + None + + Output files : + None + + Statistics creation : + None + + Statistics update : + month_stats: + top_pages => + uri => count + + Statistics deletion : + None + + plugins.pre_analysis.page_to_hit -------------------------------- @@ -400,153 +937,3 @@ plugins.pre_analysis.robots None -plugins.post_analysis.referers ------------------------------- - - Post analysis hook - - Extract referers and key phrases from requests - - Plugin requirements : - None - - Conf values needed : - domain_name - - Output files : - None - - Statistics creation : - None - - Statistics update : - month_stats : - referers => - pages - hits - robots_referers => - pages - hits - search_engine_referers => - pages - hits - key_phrases => - phrase - - Statistics deletion : - None - - -plugins.post_analysis.top_pages -------------------------------- - - Post analysis hook - - Count TOP pages - - Plugin requirements : - None - - Conf values needed : - None - - Output files : - None - - Statistics creation : - None - - Statistics update : - month_stats: - top_pages => - uri - - Statistics deletion : - None - - -plugins.post_analysis.reverse_dns ---------------------------------- - - Post analysis hook - - Replace IP by reverse DNS names - - Plugin requirements : - None - - Conf values needed : - reverse_dns_timeout* - - Output files : - None - - Statistics creation : - None - - Statistics update : - valid_visitors: - remote_addr - dns_name_replaced - dns_analyzed - - Statistics deletion : - None - - -plugins.post_analysis.top_hits ------------------------------- - - Post analysis hook - - Count TOP hits - - Plugin requirements : - None - - Conf values needed : - None - - Output files : - None - - Statistics creation : - None - - Statistics update : - month_stats: - top_hits => - uri - - Statistics deletion : - None - - -plugins.post_analysis.top_downloads ------------------------------------ - - Post analysis hook - - Count TOP downloads - - Plugin requirements : - None - - Conf values needed : - None - - Output files : - None - - Statistics creation : - None - - Statistics update : - month_stats: - top_downloads => - uri - - Statistics deletion : - None - - diff --git a/docs/main.md b/docs/main.md index 7df8333..09f4ef1 100644 --- a/docs/main.md +++ b/docs/main.md @@ -4,19 +4,21 @@ iwla Introduction ------------ -iwla (Intelligent Web Log Analyzer) is basically a clone of [awstats](http://www.awstats.org). The main problem with awstats is that it's a very monolothic project with everything in one big PERL file. In opposite, iwla has been though to be very modular : a small core analysis and a lot of filters. It can be viewed as UNIX pipes. Philosophy of iwla is : add, update, delete ! That's the job of each filter : modify statistics until final result. It's written in Python. +iwla (Intelligent Web Log Analyzer) is basically a clone of [awstats](http://www.awstats.org). The main problem with awstats is that it's a very monolithic project with everything in one big PERL file. In opposite, iwla has been though to be very modular : a small core analysis and a lot of filters. It can be viewed as UNIX pipes. Philosophy of iwla is : add, update, delete ! That's the job of each filter : modify statistics until final result. It's written in Python. Nevertheless, iwla is only focused on HTTP logs. It uses data (robots definitions, search engines definitions) and design from awstats. Moreover, it's not dynamic, but only generates static HTML page (with gzip compression option). Usage ----- - ./iwla [-c|--clean-output] [-i|--stdin] [-f FILE|--file FILE] [-d LOGLEVEL|--log-level LOGLEVEL] + ./iwla [-c|--clean-output] [-i|--stdin] [-f FILE|--file FILE] [-d LOGLEVEL|--log-level LOGLEVEL] [-r|--reset year/month] [-z|--dont-compress] -c : Clean output (database and HTML) before starting -i : Read data from stdin instead of conf.analyzed_filename - -f : Read data from FILE instead of conf.analyzed_filename + -f : Analyse this log file, multiple files can be specified (comma separated). gz files are acceptedRead data from FILE instead of conf.analyzed_filename -d : Loglevel in ['DEBUG', 'INFO', 'WARNING', 'ERROR', 'CRITICAL'] + -r : Reset analysis to a specific date (month/year) + -z : Don't compress databases (bigger but faster, not compatible with compressed databases) Basic usage ----------- @@ -32,6 +34,12 @@ Main values to edit are : * **display_hooks** : List of display hooks * **locale** : Displayed locale (_en_ or _fr_) +You can also append an element to an existing default configuration list by using "_append" suffix. Example : + multimedia_files_append = ['xml'] +or + multimedia_files_append = 'xml' +Will append 'xml' to current multimedia_files list + Then, you can launch iwla. Output HTML files are created in _output_ directory by default. To quickly see it, go into _output_ and type python -m SimpleHTTPServer 8000 diff --git a/docs/modules.md b/docs/modules.md index 5067afb..b68c706 100644 --- a/docs/modules.md +++ b/docs/modules.md @@ -1,3 +1,31 @@ + * iwla.py + * plugins/display/all_visits.py + * plugins/display/browsers.py + * plugins/display/feeds.py + * plugins/display/hours_stats.py + * plugins/display/istats_diff.py + * plugins/display/operating_systems.py + * plugins/display/referers.py + * plugins/display/referers_diff.py + * plugins/display/top_downloads.py + * plugins/display/top_downloads_diff.py + * plugins/display/top_hits.py + * plugins/display/top_pages.py + * plugins/display/top_visitors.py + * plugins/display/track_users.py + * plugins/post_analysis/browsers.py + * plugins/post_analysis/feeds.py + * plugins/post_analysis/hours_stats.py + * plugins/post_analysis/operating_systems.py + * plugins/post_analysis/referers.py + * plugins/post_analysis/reverse_dns.py + * plugins/post_analysis/top_downloads.py + * plugins/post_analysis/top_hits.py + * plugins/post_analysis/top_pages.py + * plugins/pre_analysis/page_to_hit.py + * plugins/pre_analysis/robots.py + + iwla ---- @@ -18,6 +46,7 @@ iwla DB_ROOT/meta.db DB_ROOT/year/month/iwla.db OUTPUT_ROOT/index.html + OUTPUT_ROOT/year/_stats.html OUTPUT_ROOT/year/month/index.html Statistics creation : @@ -64,6 +93,9 @@ iwla requests => [fields_from_format_log] extract_request => + http_method + http_uri + http_version extract_uri extract_parameters* extract_referer* => @@ -110,6 +142,139 @@ plugins.display.all_visits None +plugins.display.browsers +------------------------ + + Display hook + + Create browsers page + + Plugin requirements : + post_analysis/browsers + + Conf values needed : + max_browsers_displayed* + create_browsers_page* + + Output files : + OUTPUT_ROOT/year/month/browsers.html + OUTPUT_ROOT/year/month/index.html + + Statistics creation : + None + + Statistics update : + None + + Statistics deletion : + None + + +plugins.display.feeds +--------------------- + + Display hook + + Display feeds parsers + + Plugin requirements : + post_analysis/feeds + + Conf values needed : + create_all_feeds_page* + + Output files : + OUTPUT_ROOT/year/month/index.html + OUTPUT_ROOT/year/month/all_feeds.html + + Statistics creation : + None + + Statistics update : + None + + Statistics deletion : + None + + +plugins.display.hours_stats +--------------------------- + + Display hook + + Display statistics by hour/week day + + Plugin requirements : + post_analysis/hours_stats + + Conf values needed : + None + + Output files : + OUTPUT_ROOT/year/month/index.html + + Statistics creation : + None + + Statistics update : + None + + Statistics deletion : + None + + +plugins.display.istats_diff +--------------------------- + + Display hook interface + + Enlight new and updated statistics + + Plugin requirements : + None + + Conf values needed : + None + + Output files : + None + + Statistics creation : + None + + Statistics update : + None + + Statistics deletion : + None + + +plugins.display.operating_systems +--------------------------------- + + Display hook + + Add operating systems statistics + + Plugin requirements : + post_analysis/operating_systems + + Conf values needed : + create_families_page* + + Output files : + OUTPUT_ROOT/year/month/index.html + + Statistics creation : + None + + Statistics update : + None + + Statistics deletion : + None + + plugins.display.referers ------------------------ @@ -141,20 +306,102 @@ plugins.display.referers None -plugins.display.top_visitors ----------------------------- +plugins.display.referers_diff +----------------------------- Display hook - Create TOP visitors block + Enlight new and updated key phrases in in all_key_phrases.html Plugin requirements : - None + display/referers Conf values needed : - display_visitor_ip* + None Output files : + None + + Statistics creation : + None + + Statistics update : + None + + Statistics deletion : + None + + +plugins.display.top_downloads +----------------------------- + + Display hook + + Create TOP downloads page + + Plugin requirements : + post_analysis/top_downloads + + Conf values needed : + max_downloads_displayed* + create_all_downloads_page* + + Output files : + OUTPUT_ROOT/year/month/top_downloads.html + OUTPUT_ROOT/year/month/index.html + + Statistics creation : + None + + Statistics update : + None + + Statistics deletion : + None + + +plugins.display.top_downloads_diff +---------------------------------- + + Display hook + + Enlight new and updated downloads in in top_downloads.html + + Plugin requirements : + display/top_downloads + + Conf values needed : + None + + Output files : + None + + Statistics creation : + None + + Statistics update : + None + + Statistics deletion : + None + + +plugins.display.top_hits +------------------------ + + Display hook + + Create TOP hits page + + Plugin requirements : + post_analysis/top_hits + + Conf values needed : + max_hits_displayed* + create_all_hits_page* + + Output files : + OUTPUT_ROOT/year/month/top_hits.html OUTPUT_ROOT/year/month/index.html Statistics creation : @@ -195,22 +442,20 @@ plugins.display.top_pages None -plugins.display.top_hits ------------------------- +plugins.display.top_visitors +---------------------------- Display hook - Create TOP hits page + Create TOP visitors block Plugin requirements : - post_analysis/top_hits + None Conf values needed : - max_hits_displayed* - create_all_hits_page* + display_visitor_ip* Output files : - OUTPUT_ROOT/year/month/top_hits.html OUTPUT_ROOT/year/month/index.html Statistics creation : @@ -223,23 +468,23 @@ plugins.display.top_hits None -plugins.display.top_downloads ------------------------------ +plugins.display.track_users +--------------------------- Display hook - Create TOP downloads page + Track users Plugin requirements : - post_analysis/top_downloads + None Conf values needed : - max_downloads_displayed* - create_all_downloads_page* + tracked_ip + create_tracked_page* Output files : - OUTPUT_ROOT/year/month/top_downloads.html OUTPUT_ROOT/year/month/index.html + OUTPUT_ROOT/year/month/tracked_users.html Statistics creation : None @@ -251,6 +496,290 @@ plugins.display.top_downloads None +plugins.post_analysis.browsers +------------------------------ + + Post analysis hook + + Detect browser information from requests + + Plugin requirements : + None + + Conf values needed : + None + + Output files : + None + + Statistics creation : + visits : + remote_addr => + browser + + month_stats : + browsers => + browser => count + + Statistics update : + None + + Statistics deletion : + None + + +plugins.post_analysis.feeds +--------------------------- + + Post analysis hook + + Find feeds parsers (first hit in feeds conf value and no viewed pages if it's a robot) + If there is ony one hit per day to a feed, merge feeds parsers with the same user agent + as it must be the same person with a different IP address. + + Plugin requirements : + None + + Conf values needed : + feeds + merge_one_hit_only_feeds_parsers* + + Output files : + None + + Statistics creation : + remote_addr => + feed_parser + + Statistics update : + None + + Statistics deletion : + None + + +plugins.post_analysis.hours_stats +--------------------------------- + + Post analysis hook + + Count pages, hits and bandwidth by hour/week day + + Plugin requirements : + None + + Conf values needed : + None + + Output files : + None + + Statistics creation : + month_stats: + hours_stats => + 00 .. 23 => + pages + hits + bandwidth + + days_stats => + 0 .. 6 => + pages + hits + bandwidth + + Statistics update : + None + + Statistics deletion : + None + + +plugins.post_analysis.operating_systems +--------------------------------------- + + Post analysis hook + + Detect operating systems from requests + + Plugin requirements : + None + + Conf values needed : + None + + Output files : + None + + Statistics creation : + visits : + remote_addr => + operating_system + + month_stats : + operating_systems => + operating_system => count + + os_families => + family => count + + Statistics update : + None + + Statistics deletion : + None + + +plugins.post_analysis.referers +------------------------------ + + Post analysis hook + + Extract referers and key phrases from requests + + Plugin requirements : + None + + Conf values needed : + domain_name + + Output files : + None + + Statistics creation : + None + + Statistics update : + month_stats : + referers => + pages => count + hits => count + robots_referers => + pages => count + hits => count + search_engine_referers => + pages => count + hits => count + key_phrases => + phrase => count + + Statistics deletion : + None + + +plugins.post_analysis.reverse_dns +--------------------------------- + + Post analysis hook + + Replace IP by reverse DNS names + + Plugin requirements : + None + + Conf values needed : + reverse_dns_timeout* + + Output files : + None + + Statistics creation : + None + + Statistics update : + valid_visitors: + remote_addr + dns_name_replaced + dns_analyzed + + Statistics deletion : + None + + +plugins.post_analysis.top_downloads +----------------------------------- + + Post analysis hook + + Count TOP downloads + + Plugin requirements : + None + + Conf values needed : + None + + Output files : + None + + Statistics creation : + None + + Statistics update : + month_stats: + top_downloads => + uri => count + + Statistics deletion : + None + + +plugins.post_analysis.top_hits +------------------------------ + + Post analysis hook + + Count TOP hits + + Plugin requirements : + None + + Conf values needed : + None + + Output files : + None + + Statistics creation : + None + + Statistics update : + month_stats: + top_hits => + uri => count + + Statistics deletion : + None + + +plugins.post_analysis.top_pages +------------------------------- + + Post analysis hook + + Count TOP pages + + Plugin requirements : + None + + Conf values needed : + None + + Output files : + None + + Statistics creation : + None + + Statistics update : + month_stats: + top_pages => + uri => count + + Statistics deletion : + None + + plugins.pre_analysis.page_to_hit -------------------------------- @@ -308,153 +837,3 @@ plugins.pre_analysis.robots None -plugins.post_analysis.referers ------------------------------- - - Post analysis hook - - Extract referers and key phrases from requests - - Plugin requirements : - None - - Conf values needed : - domain_name - - Output files : - None - - Statistics creation : - None - - Statistics update : - month_stats : - referers => - pages - hits - robots_referers => - pages - hits - search_engine_referers => - pages - hits - key_phrases => - phrase - - Statistics deletion : - None - - -plugins.post_analysis.top_pages -------------------------------- - - Post analysis hook - - Count TOP pages - - Plugin requirements : - None - - Conf values needed : - None - - Output files : - None - - Statistics creation : - None - - Statistics update : - month_stats: - top_pages => - uri - - Statistics deletion : - None - - -plugins.post_analysis.reverse_dns ---------------------------------- - - Post analysis hook - - Replace IP by reverse DNS names - - Plugin requirements : - None - - Conf values needed : - reverse_dns_timeout* - - Output files : - None - - Statistics creation : - None - - Statistics update : - valid_visitors: - remote_addr - dns_name_replaced - dns_analyzed - - Statistics deletion : - None - - -plugins.post_analysis.top_hits ------------------------------- - - Post analysis hook - - Count TOP hits - - Plugin requirements : - None - - Conf values needed : - None - - Output files : - None - - Statistics creation : - None - - Statistics update : - month_stats: - top_hits => - uri - - Statistics deletion : - None - - -plugins.post_analysis.top_downloads ------------------------------------ - - Post analysis hook - - Count TOP downloads - - Plugin requirements : - None - - Conf values needed : - None - - Output files : - None - - Statistics creation : - None - - Statistics update : - month_stats: - top_downloads => - uri - - Statistics deletion : - None - - diff --git a/iplugin.py b/iplugin.py index 2664190..a745a53 100644 --- a/iplugin.py +++ b/iplugin.py @@ -79,13 +79,19 @@ def preloadPlugins(plugins, iwla): classes = [c for _,c in inspect.getmembers(mod)\ if inspect.isclass(c) and \ issubclass(c, IPlugin) and \ - c.__name__ != 'IPlugin' + c.__name__ != 'IPlugin' and \ + not c.__subclasses__() ] if not classes: logger.warning('No plugin defined in %s' % (plugin_path)) continue + if len(classes) > 1: + logger.warning('More than one class found in %s, loading may fail. Selecting %s' % (plugin_path, classes[0])) + print classes + continue + plugin = classes[0](iwla) plugin_name = plugin.__class__.__name__ @@ -103,7 +109,7 @@ def preloadPlugins(plugins, iwla): requirement_validated = False for r in requirements: - for (_,p) in cache_plugins.items(): + for p in cache_plugins.values(): if p.__class__.__name__ == r: requirement_validated = True break diff --git a/iwla.pot b/iwla.pot index d8b1510..478a75a 100644 --- a/iwla.pot +++ b/iwla.pot @@ -5,7 +5,7 @@ msgid "" msgstr "" "Project-Id-Version: PACKAGE VERSION\n" -"POT-Creation-Date: 2014-12-19 17:46+CET\n" +"POT-Creation-Date: 2015-03-02 19:44+CET\n" "PO-Revision-Date: YEAR-MO-DA HO:MI+ZONE\n" "Last-Translator: FULL NAME \n" "Language-Team: LANGUAGE \n" @@ -35,11 +35,11 @@ msgstr "" msgid "March" msgstr "" -#: display.py:32 iwla.py:428 +#: display.py:32 iwla.py:440 msgid "June" msgstr "" -#: display.py:32 iwla.py:428 +#: display.py:32 iwla.py:440 msgid "May" msgstr "" @@ -63,116 +63,143 @@ msgstr "" msgid "September" msgstr "" -#: iwla.py:371 +#: display.py:187 +msgid "Ratio" +msgstr "" + +#: iwla.py:381 msgid "Statistics" msgstr "" -#: iwla.py:377 -msgid "By day" -msgstr "" - -#: iwla.py:377 -msgid "Day" -msgstr "" - -#: iwla.py:377 iwla.py:430 +#: iwla.py:389 iwla.py:442 msgid "Not viewed Bandwidth" msgstr "" -#: iwla.py:377 iwla.py:430 +#: iwla.py:389 iwla.py:442 msgid "Visits" msgstr "" -#: iwla.py:377 iwla.py:430 plugins/display/all_visits.py:70 -#: plugins/display/referers.py:95 plugins/display/referers.py:153 -#: plugins/display/top_downloads.py:97 plugins/display/top_visitors.py:72 +#: iwla.py:389 iwla.py:442 plugins/display/all_visits.py:70 +#: plugins/display/feeds.py:75 plugins/display/hours_stats.py:73 +#: plugins/display/hours_stats.py:83 plugins/display/referers.py:95 +#: plugins/display/referers.py:153 plugins/display/top_downloads.py:97 +#: plugins/display/top_visitors.py:72 plugins/display/track_users.py:113 msgid "Hits" msgstr "" -#: iwla.py:377 iwla.py:430 plugins/display/all_visits.py:70 -#: plugins/display/referers.py:95 plugins/display/referers.py:153 -#: plugins/display/top_visitors.py:72 +#: iwla.py:389 iwla.py:442 plugins/display/all_visits.py:70 +#: plugins/display/feeds.py:75 plugins/display/hours_stats.py:73 +#: plugins/display/hours_stats.py:83 plugins/display/referers.py:95 +#: plugins/display/referers.py:153 plugins/display/top_visitors.py:72 +#: plugins/display/track_users.py:77 plugins/display/track_users.py:113 msgid "Pages" msgstr "" -#: iwla.py:377 iwla.py:430 plugins/display/all_visits.py:70 +#: iwla.py:389 iwla.py:442 plugins/display/all_visits.py:70 +#: plugins/display/hours_stats.py:73 plugins/display/hours_stats.py:83 #: plugins/display/top_visitors.py:72 msgid "Bandwidth" msgstr "" -#: iwla.py:414 +#: iwla.py:389 plugins/display/hours_stats.py:71 +msgid "By day" +msgstr "" + +#: iwla.py:389 plugins/display/hours_stats.py:73 +msgid "Day" +msgstr "" + +#: iwla.py:426 msgid "Average" msgstr "" -#: iwla.py:419 iwla.py:457 +#: iwla.py:431 iwla.py:476 msgid "Total" msgstr "" -#: iwla.py:428 +#: iwla.py:440 msgid "Apr" msgstr "" -#: iwla.py:428 +#: iwla.py:440 msgid "Aug" msgstr "" -#: iwla.py:428 +#: iwla.py:440 msgid "Dec" msgstr "" -#: iwla.py:428 +#: iwla.py:440 msgid "Feb" msgstr "" -#: iwla.py:428 +#: iwla.py:440 msgid "Jan" msgstr "" -#: iwla.py:428 +#: iwla.py:440 msgid "Jul" msgstr "" -#: iwla.py:428 +#: iwla.py:440 msgid "Mar" msgstr "" -#: iwla.py:428 +#: iwla.py:440 msgid "Nov" msgstr "" -#: iwla.py:428 +#: iwla.py:440 msgid "Oct" msgstr "" -#: iwla.py:428 +#: iwla.py:440 msgid "Sep" msgstr "" -#: iwla.py:429 +#: iwla.py:441 msgid "Summary" msgstr "" -#: iwla.py:430 +#: iwla.py:442 msgid "Month" msgstr "" -#: iwla.py:430 +#: iwla.py:442 msgid "Visitors" msgstr "" -#: iwla.py:430 iwla.py:440 +#: iwla.py:442 iwla.py:454 plugins/display/feeds.py:98 +#: plugins/display/operating_systems.py:90 plugins/display/track_users.py:108 msgid "Details" msgstr "" -#: iwla.py:465 +#: iwla.py:490 msgid "Statistics for" msgstr "" -#: iwla.py:472 +#: iwla.py:497 msgid "Last update" msgstr "" -#: plugins/display/all_visits.py:70 plugins/display/top_visitors.py:72 +#: iwla.py:501 +msgid "Time analysis" +msgstr "" + +#: iwla.py:503 +msgid "hours" +msgstr "" + +#: iwla.py:504 +msgid "minutes" +msgstr "" + +#: iwla.py:504 +msgid "seconds" +msgstr "" + +#: plugins/display/all_visits.py:70 plugins/display/feeds.py:75 +#: plugins/display/top_visitors.py:72 msgid "Host" msgstr "" @@ -188,6 +215,103 @@ msgstr "" msgid "Top visitors" msgstr "" +#: plugins/display/browsers.py:79 +msgid "Browsers" +msgstr "" + +#: plugins/display/browsers.py:79 plugins/display/browsers.py:113 +msgid "Browser" +msgstr "" + +#: plugins/display/browsers.py:79 plugins/display/browsers.py:113 +#: plugins/display/operating_systems.py:78 +#: plugins/display/operating_systems.py:95 plugins/display/top_hits.py:71 +#: plugins/display/top_hits.py:97 plugins/display/top_pages.py:71 +#: plugins/display/top_pages.py:96 +msgid "Entrance" +msgstr "" + +#: plugins/display/browsers.py:98 plugins/display/browsers.py:128 +#: plugins/display/referers.py:110 plugins/display/referers.py:125 +#: plugins/display/referers.py:140 plugins/display/referers.py:163 +#: plugins/display/referers.py:174 plugins/display/referers.py:185 +#: plugins/display/referers.py:222 plugins/display/top_downloads.py:83 +#: plugins/display/top_downloads.py:103 plugins/display/top_hits.py:82 +#: plugins/display/top_hits.py:103 plugins/display/top_pages.py:82 +#: plugins/display/top_pages.py:102 plugins/display/top_visitors.py:92 +msgid "Others" +msgstr "" + +#: plugins/display/browsers.py:107 +msgid "All Browsers" +msgstr "" + +#: plugins/display/feeds.py:69 +msgid "All Feeds parsers" +msgstr "" + +#: plugins/display/feeds.py:75 +msgid "All feeds parsers" +msgstr "" + +#: plugins/display/feeds.py:91 +msgid "Merged feeds parsers" +msgstr "" + +#: plugins/display/feeds.py:96 +msgid "Feeds parsers" +msgstr "" + +#: plugins/display/feeds.py:103 +msgid "Found" +msgstr "" + +#: plugins/display/hours_stats.py:72 +msgid "Fri" +msgstr "" + +#: plugins/display/hours_stats.py:72 +msgid "Mon" +msgstr "" + +#: plugins/display/hours_stats.py:72 +msgid "Sat" +msgstr "" + +#: plugins/display/hours_stats.py:72 +msgid "Sun" +msgstr "" + +#: plugins/display/hours_stats.py:72 +msgid "Thu" +msgstr "" + +#: plugins/display/hours_stats.py:72 +msgid "Tue" +msgstr "" + +#: plugins/display/hours_stats.py:72 +msgid "Wed" +msgstr "" + +#: plugins/display/hours_stats.py:81 +msgid "By Hours" +msgstr "" + +#: plugins/display/hours_stats.py:83 +msgid "Hours" +msgstr "" + +#: plugins/display/operating_systems.py:78 +#: plugins/display/operating_systems.py:88 +msgid "Operating Systems" +msgstr "" + +#: plugins/display/operating_systems.py:78 +#: plugins/display/operating_systems.py:95 +msgid "Operating System" +msgstr "" + #: plugins/display/referers.py:95 msgid "Connexion from" msgstr "" @@ -200,16 +324,6 @@ msgstr "" msgid "Search Engine" msgstr "" -#: plugins/display/referers.py:110 plugins/display/referers.py:125 -#: plugins/display/referers.py:140 plugins/display/referers.py:163 -#: plugins/display/referers.py:174 plugins/display/referers.py:185 -#: plugins/display/referers.py:222 plugins/display/top_downloads.py:83 -#: plugins/display/top_downloads.py:103 plugins/display/top_hits.py:82 -#: plugins/display/top_hits.py:103 plugins/display/top_pages.py:82 -#: plugins/display/top_pages.py:102 plugins/display/top_visitors.py:92 -msgid "Others" -msgstr "" - #: plugins/display/referers.py:114 plugins/display/referers.py:167 msgid "External URL" msgstr "" @@ -226,8 +340,12 @@ msgstr "" msgid "All Referers" msgstr "" -#: plugins/display/referers.py:200 plugins/display/referers.py:210 -msgid "Top key phrases" +#: plugins/display/referers.py:193 +msgid "All Key Phrases" +msgstr "" + +#: plugins/display/referers.py:200 +msgid "Key phrases" msgstr "" #: plugins/display/referers.py:200 plugins/display/referers.py:216 @@ -238,6 +356,10 @@ msgstr "" msgid "Search" msgstr "" +#: plugins/display/referers.py:210 +msgid "Top key phrases" +msgstr "" + #: plugins/display/referers.py:212 msgid "All key phrases" msgstr "" @@ -264,11 +386,6 @@ msgstr "" msgid "All Hits" msgstr "" -#: plugins/display/top_hits.py:71 plugins/display/top_hits.py:97 -#: plugins/display/top_pages.py:71 plugins/display/top_pages.py:96 -msgid "Entrance" -msgstr "" - #: plugins/display/top_pages.py:71 plugins/display/top_pages.py:90 msgid "All Pages" msgstr "" @@ -277,3 +394,15 @@ msgstr "" msgid "Top Pages" msgstr "" +#: plugins/display/track_users.py:77 plugins/display/track_users.py:106 +msgid "Tracked users" +msgstr "" + +#: plugins/display/track_users.py:77 plugins/display/track_users.py:113 +msgid "Last Access" +msgstr "" + +#: plugins/display/track_users.py:113 +msgid "IP" +msgstr "" + diff --git a/iwla.py b/iwla.py index 0c0f331..57e38f3 100755 --- a/iwla.py +++ b/iwla.py @@ -31,12 +31,10 @@ import argparse import logging import gettext from calendar import monthrange -from datetime import date +from datetime import date, datetime import default_conf as conf -import conf as _ -conf.__dict__.update(_.__dict__) -del _ +import conf as user_conf from iplugin import * from display import * @@ -59,6 +57,7 @@ Output files : DB_ROOT/meta.db DB_ROOT/year/month/iwla.db OUTPUT_ROOT/index.html + OUTPUT_ROOT/year/_stats.html OUTPUT_ROOT/year/month/index.html Statistics creation : @@ -105,6 +104,9 @@ visits : requests => [fields_from_format_log] extract_request => + http_method + http_uri + http_version extract_uri extract_parameters* extract_referer* => @@ -129,12 +131,13 @@ class IWLA(object): ANALYSIS_CLASS = 'HTTP' API_VERSION = 1 - IWLA_VERSION = '0.1' + IWLA_VERSION = '0.2' def __init__(self, logLevel): self.meta_infos = {} self.analyse_started = False self.current_analysis = {} + self.start_time = 0 self.cache_plugins = {} self.display = DisplayHTMLBuild(self) self.valid_visitors = None @@ -232,15 +235,18 @@ class IWLA(object): def getDBFilename(self, time): return os.path.join(conf.DB_ROOT, str(time.tm_year), '%02d' % (time.tm_mon), conf.DB_FILENAME) + def _openDB(self, filename, prot='r'): + if self.args.dont_compress: + return open(filename, prot) + else: + return gzip.open(filename, prot) + def _serialize(self, obj, filename): base = os.path.dirname(filename) if not os.path.exists(base): os.makedirs(base) - # TODO : remove return - #return - - with open(filename + '.tmp', 'wb+') as f, gzip.open(filename, 'w') as fzip: + with open(filename + '.tmp', 'wb+') as f, self._openDB(filename, 'w') as fzip: pickle.dump(obj, f) f.seek(0) fzip.write(f.read()) @@ -250,7 +256,7 @@ class IWLA(object): if not os.path.exists(filename): return None - with gzip.open(filename, 'r') as f: + with self._openDB(filename) as f: return pickle.load(f) return None @@ -265,10 +271,21 @@ class IWLA(object): mod.hook(*args) def isPage(self, request): + self.logger.debug("Is page %s" % (request)) for e in conf.pages_extensions: if request.endswith(e): + self.logger.debug("True") return True + self.logger.debug("False") + return False + def isMultimediaFile(self, request): + self.logger.debug("Is multimedia %s" % (request)) + for e in conf.multimedia_files: + if request.endswith(e): + self.logger.debug("True") + return True + self.logger.debug("False") return False def _appendHit(self, hit): @@ -351,10 +368,10 @@ class IWLA(object): gmt_offset_minutes = int(gmt_offset_str[3:5])*60 gmt_offset = gmt_offset_hours + gmt_offset_minutes hit['time_decoded'] = time.strptime(hit['time_local'][:-6], conf.time_format[:-3]) - if gmt_offset_str[0] == '+': - hit['time_decoded'] = time.localtime(time.mktime(hit['time_decoded'])+gmt_offset) - else: - hit['time_decoded'] = time.localtime(time.mktime(hit['time_decoded'])-gmt_offset) + # if gmt_offset_str[0] == '-': + # hit['time_decoded'] = time.localtime(time.mktime(hit['time_decoded'])+gmt_offset) + # else: + # hit['time_decoded'] = time.localtime(time.mktime(hit['time_decoded'])-gmt_offset) else: raise e return hit['time_decoded'] @@ -371,6 +388,8 @@ class IWLA(object): filename = self.getCurDisplayPath('index.html') self.logger.info('==> Generate display (%s)' % (filename)) page = self.display.createPage(title, filename, conf.css_path) + link = DisplayHTMLRaw(self, '') + page.appendBlock(link) _, nb_month_days = monthrange(cur_time.tm_year, cur_time.tm_mon) days = self.display.createBlock(DisplayHTMLBlockTableWithGraph, self._('By day'), [self._('Day'), self._('Visits'), self._('Pages'), self._('Hits'), self._('Bandwidth'), self._('Not viewed Bandwidth')], None, nb_month_days, range(1,6)) @@ -430,6 +449,8 @@ class IWLA(object): graph_cols=range(1,7) months = self.display.createBlock(DisplayHTMLBlockTableWithGraph, title, cols, None, 12, graph_cols) months.setColsCSSClass(['', 'iwla_visitor', 'iwla_visit', 'iwla_page', 'iwla_hit', 'iwla_bandwidth', 'iwla_bandwidth', '']) + months_ = self.display.createBlock(DisplayHTMLBlockTableWithGraph, title, cols[:-1], None, 12, graph_cols[:-1]) + months_.setColsCSSClass(['', 'iwla_visitor', 'iwla_visit', 'iwla_page', 'iwla_hit', 'iwla_bandwidth', 'iwla_bandwidth']) total = [0] * len(cols) for i in range(1, 13): month = '%s
%d' % (months_name[i], year) @@ -447,11 +468,16 @@ class IWLA(object): months.setCellValue(i-1, 5, bytesToStr(row[5])) months.setCellValue(i-1, 6, bytesToStr(row[6])) months.appendShortTitle(month) + months_.appendRow(row[:-1]) + months_.setCellValue(i-1, 5, bytesToStr(row[5])) + months_.setCellValue(i-1, 6, bytesToStr(row[6])) + months_.appendShortTitle(month) if year == cur_time.tm_year and i == cur_time.tm_mon: css = months.getCellCSSClass(i-1, 0) if css: css = '%s %s' % (css, 'iwla_curday') else: css = 'iwla_curday' months.setCellCSSClass(i-1, 0, css) + months_.setCellCSSClass(i-1, 0, css) total[0] = self._('Total') total[5] = bytesToStr(total[5]) @@ -460,6 +486,12 @@ class IWLA(object): months.appendRow(total) page.appendBlock(months) + months_.appendRow(total[:-1]) + filename = '%d/_stats.html' % (year) + page_ = self.display.createPage(u'', filename, conf.css_path) + page_.appendBlock(months_) + page_.build(conf.DISPLAY_ROOT, False) + def _generateDisplayWholeMonthStats(self): title = '%s %s' % (self._('Statistics for'), conf.domain_name) filename = 'index.html' @@ -468,8 +500,15 @@ class IWLA(object): page = self.display.createPage(title, filename, conf.css_path) - last_update = '%s %s
' % (self._('Last update'), time.strftime('%02d %b %Y %H:%M', time.localtime())) + last_update = u'%s %s
' % (self._(u'Last update'), time.strftime('%02d %b %Y %H:%M', time.localtime())) page.appendBlock(self.display.createBlock(DisplayHTMLRaw, last_update)) + duration = datetime.now() - self.start_time + duration = time.gmtime(duration.seconds) + time_analysis = u'%s ' % (self._('Time analysis')) + if duration.tm_hour: + time_analysis += u'%d %s, ' % (duration.tm_hour, self._(u'hours')) + time_analysis += u'%d %s and %d %s
' % (duration.tm_min, self._(u'minutes'), duration.tm_sec, self._(u'seconds')) + page.appendBlock(self.display.createBlock(DisplayHTMLRaw, time_analysis)) for year in sorted(self.meta_infos['stats'].keys(), reverse=True): self._generateDisplayMonthStats(page, year, self.meta_infos['stats'][year]) @@ -564,6 +603,9 @@ class IWLA(object): self.meta_infos['stats'][year] = {} self.meta_infos['stats'][year][month] = duplicated_stats + self.logger.info("==> Serialize to %s" % (conf.META_PATH)) + self._serialize(self.meta_infos, conf.META_PATH) + self._generateDisplay() def _generateDayStats(self): @@ -603,6 +645,7 @@ class IWLA(object): def _newHit(self, hit): if not self.domain_name_re.match(hit['server_name']): + self.logger.debug("Not in domain %s" % (hit)) return False t = self._decodeTime(hit) @@ -613,10 +656,13 @@ class IWLA(object): self.current_analysis = self._deserialize(self.getDBFilename(t)) or self._clearVisits() self.analyse_started = True else: - if time.mktime(t) <= time.mktime(cur_time): + if not self.analyse_started and\ + time.mktime(t) <= time.mktime(cur_time): + self.logger.debug("Not in time") return False self.analyse_started = True if cur_time.tm_mon != t.tm_mon: + self._generateDayStats() self._generateMonthStats() self.current_analysis = self._deserialize(self.getDBFilename(t)) or self._clearVisits() elif cur_time.tm_mday != t.tm_mday: @@ -629,6 +675,9 @@ class IWLA(object): if not self._decodeHTTPRequest(hit): return False + if hit['extract_request']['http_method'] not in ['GET', 'POST']: + return False + for k in hit.keys(): if hit[k] == '-' or hit[k] == '*': hit[k] = '' @@ -637,11 +686,40 @@ class IWLA(object): return True - def start(self, _file): + def _reset(self): + reset_time = time.strptime(self.args.reset, '%m/%Y') + + self.logger.info('Reset time') + self.logger.info(reset_time) + + self.meta_infos['last_time'] = reset_time + + cur_time = time.localtime() + year = reset_time.tm_year + while year < cur_time.tm_year: + db_path = os.path.join(conf.DB_ROOT, str(year)) + if os.path.exists(db_path): shutil.rmtree(db_path) + output_path = os.path.join(conf.DISPLAY_ROOT, str(year)) + if os.path.exists(output_path): shutil.rmtree(output_path) + year += 1 + month = reset_time.tm_mon + while month <= cur_time.tm_mon: + db_path = os.path.join(conf.DB_ROOT, str(year), '%02d' % (month)) + if os.path.exists(db_path): shutil.rmtree(db_path) + output_path = os.path.join(conf.DISPLAY_ROOT, str(year), '%02d' % (month)) + if os.path.exists(output_path): shutil.rmtree(output_path) + month += 1 + + def start(self, _file, args): + self.args = args + self.start_time = datetime.now() + self.logger.info('==> Load previous database') self.meta_infos = self._deserialize(conf.META_PATH) or self._clearMeta() if self.meta_infos['last_time']: + if args.reset: + self._reset() self.logger.info('Last time') self.logger.info(self.meta_infos['last_time']) self.current_analysis = self._deserialize(self.getDBFilename(self.meta_infos['last_time'])) or self._clearVisits() @@ -669,10 +747,45 @@ class IWLA(object): self._generateDayStats() self._generateMonthStats() del self.meta_infos['start_analysis_time'] - self._serialize(self.meta_infos, conf.META_PATH) else: self.logger.info('==> Analyse not started : nothing new') + +class FileIter(object): + def __init__(self, filenames): + self.filenames = [f for f in filenames.split(',') if f] + for f in self.filenames: + if not os.path.exists(f): + print 'No such file \'%s\'' % (f) + sys.exit(-1) + self.cur_file = None + self._openNextFile() + + def __iter__(self): + return self + + def __next__(self): + return self.next() + + def _openNextFile(self): + if self.cur_file: + self.cur_file.close() + self.cur_file = None + if not self.filenames: + raise StopIteration() + filename = self.filenames.pop(0) + if filename.endswith('gz'): + self.cur_file = gzip.open(filename, 'r') + else: + self.cur_file = open(filename) + + def next(self): + l = self.cur_file.readline() + if not l: + self._openNextFile() + l = self.cur_file.readline() + return l[:-1] + if __name__ == '__main__': parser = argparse.ArgumentParser(description='Intelligent Web Log Analyzer') @@ -685,14 +798,39 @@ if __name__ == '__main__': help='Read data from stdin instead of conf.analyzed_filename') parser.add_argument('-f', '--file', dest='file', - help='Analyse this log file') + help='Analyse this log file, multiple files can be specified (comma separated). gz files are accepted') parser.add_argument('-d', '--log-level', dest='loglevel', default='INFO', type=str, help='Loglevel in %s, default : %s' % (['DEBUG', 'INFO', 'WARNING', 'ERROR', 'CRITICAL'], 'INFO')) + parser.add_argument('-r', '--reset', dest='reset', action='store_true', + default=False, + help='Reset analysis to a specific date (month/year)') + + parser.add_argument('-z', '--dont-compress', dest='dont_compress', action='store_true', + default=False, + help='Don\'t compress databases (bigger but faster, not compatible with compressed databases)') + args = parser.parse_args() + # Load user conf + for (k,v) in user_conf.__dict__.items(): + if k.endswith('_append'): + new_k = k[:-7] + if new_k in dir(conf): + if type(conf.__dict__[new_k]) == list: + if type(v) == list: + conf.__dict__[new_k] += v + else: + conf.__dict__[new_k].append(v) + else: + print("Error %s is not a list" % (new_k)) + else: + print("Error %s doesn't exists in default conf" % (new_k)) + else: + conf.__dict__.update({k:v}) + if args.clean_output: if os.path.exists(conf.DB_ROOT): shutil.rmtree(conf.DB_ROOT) if os.path.exists(conf.DISPLAY_ROOT): shutil.rmtree(conf.DISPLAY_ROOT) @@ -708,11 +846,7 @@ if __name__ == '__main__': sys.exit(0) if args.stdin: - iwla.start(sys.stdin) + iwla.start(sys.stdin, args) else: filename = args.file or conf.analyzed_filename - if not os.path.exists(filename): - print 'No such file \'%s\'' % (filename) - sys.exit(-1) - with open(filename) as f: - iwla.start(f) + iwla.start(FileIter(filename), args) diff --git a/locales/fr_FR/LC_MESSAGES/iwla.mo b/locales/fr_FR/LC_MESSAGES/iwla.mo index 360249b..a37799c 100644 Binary files a/locales/fr_FR/LC_MESSAGES/iwla.mo and b/locales/fr_FR/LC_MESSAGES/iwla.mo differ diff --git a/locales/fr_FR/LC_MESSAGES/iwla.pot b/locales/fr_FR/LC_MESSAGES/iwla.pot index 2286f1d..09e7e94 100644 --- a/locales/fr_FR/LC_MESSAGES/iwla.pot +++ b/locales/fr_FR/LC_MESSAGES/iwla.pot @@ -5,8 +5,8 @@ msgid "" msgstr "" "Project-Id-Version: iwla\n" -"POT-Creation-Date: 2014-12-19 17:43+CET\n" -"PO-Revision-Date: 2014-12-19 17:43+0100\n" +"POT-Creation-Date: 2015-03-02 19:44+CET\n" +"PO-Revision-Date: 2015-03-02 19:45+0100\n" "Last-Translator: Soutadé \n" "Language-Team: iwla\n" "Language: fr_FR\n" @@ -37,11 +37,11 @@ msgstr "Juillet" msgid "March" msgstr "Mars" -#: display.py:32 iwla.py:428 +#: display.py:32 iwla.py:440 msgid "June" msgstr "Juin" -#: display.py:32 iwla.py:428 +#: display.py:32 iwla.py:440 msgid "May" msgstr "Mai" @@ -65,116 +65,143 @@ msgstr "Octobre" msgid "September" msgstr "Septembre" -#: iwla.py:371 +#: display.py:187 +msgid "Ratio" +msgstr "Pourcentage" + +#: iwla.py:381 msgid "Statistics" msgstr "Statistiques" -#: iwla.py:377 -msgid "By day" -msgstr "Par jour" - -#: iwla.py:377 -msgid "Day" -msgstr "Jour" - -#: iwla.py:377 iwla.py:430 +#: iwla.py:389 iwla.py:442 msgid "Not viewed Bandwidth" msgstr "Traffic non vu" -#: iwla.py:377 iwla.py:430 +#: iwla.py:389 iwla.py:442 msgid "Visits" msgstr "Visites" -#: iwla.py:377 iwla.py:430 plugins/display/all_visits.py:70 -#: plugins/display/referers.py:95 plugins/display/referers.py:153 -#: plugins/display/top_downloads.py:97 plugins/display/top_visitors.py:72 +#: iwla.py:389 iwla.py:442 plugins/display/all_visits.py:70 +#: plugins/display/feeds.py:75 plugins/display/hours_stats.py:73 +#: plugins/display/hours_stats.py:83 plugins/display/referers.py:95 +#: plugins/display/referers.py:153 plugins/display/top_downloads.py:97 +#: plugins/display/top_visitors.py:72 plugins/display/track_users.py:113 msgid "Hits" msgstr "Hits" -#: iwla.py:377 iwla.py:430 plugins/display/all_visits.py:70 -#: plugins/display/referers.py:95 plugins/display/referers.py:153 -#: plugins/display/top_visitors.py:72 +#: iwla.py:389 iwla.py:442 plugins/display/all_visits.py:70 +#: plugins/display/feeds.py:75 plugins/display/hours_stats.py:73 +#: plugins/display/hours_stats.py:83 plugins/display/referers.py:95 +#: plugins/display/referers.py:153 plugins/display/top_visitors.py:72 +#: plugins/display/track_users.py:77 plugins/display/track_users.py:113 msgid "Pages" msgstr "Pages" -#: iwla.py:377 iwla.py:430 plugins/display/all_visits.py:70 +#: iwla.py:389 iwla.py:442 plugins/display/all_visits.py:70 +#: plugins/display/hours_stats.py:73 plugins/display/hours_stats.py:83 #: plugins/display/top_visitors.py:72 msgid "Bandwidth" msgstr "Bande passante" -#: iwla.py:414 +#: iwla.py:389 plugins/display/hours_stats.py:71 +msgid "By day" +msgstr "Par jour" + +#: iwla.py:389 plugins/display/hours_stats.py:73 +msgid "Day" +msgstr "Jour" + +#: iwla.py:426 msgid "Average" msgstr "Moyenne" -#: iwla.py:419 iwla.py:457 +#: iwla.py:431 iwla.py:476 msgid "Total" msgstr "Total" -#: iwla.py:428 +#: iwla.py:440 msgid "Apr" msgstr "Avr" -#: iwla.py:428 +#: iwla.py:440 msgid "Aug" msgstr "Août" -#: iwla.py:428 +#: iwla.py:440 msgid "Dec" msgstr "Déc" -#: iwla.py:428 +#: iwla.py:440 msgid "Feb" msgstr "Fév" -#: iwla.py:428 +#: iwla.py:440 msgid "Jan" msgstr "Jan" -#: iwla.py:428 +#: iwla.py:440 msgid "Jul" msgstr "Jui" -#: iwla.py:428 +#: iwla.py:440 msgid "Mar" msgstr "Mars" -#: iwla.py:428 +#: iwla.py:440 msgid "Nov" msgstr "Nov" -#: iwla.py:428 +#: iwla.py:440 msgid "Oct" msgstr "Oct" -#: iwla.py:428 +#: iwla.py:440 msgid "Sep" msgstr "Sep" -#: iwla.py:429 +#: iwla.py:441 msgid "Summary" msgstr "Résumé" -#: iwla.py:430 +#: iwla.py:442 msgid "Month" msgstr "Mois" -#: iwla.py:430 +#: iwla.py:442 msgid "Visitors" msgstr "Visiteurs" -#: iwla.py:430 iwla.py:440 +#: iwla.py:442 iwla.py:454 plugins/display/feeds.py:98 +#: plugins/display/operating_systems.py:90 plugins/display/track_users.py:108 msgid "Details" msgstr "Détails" -#: iwla.py:465 +#: iwla.py:490 msgid "Statistics for" msgstr "Statistiques pour" -#: iwla.py:472 +#: iwla.py:497 msgid "Last update" msgstr "Dernière mise à jour" -#: plugins/display/all_visits.py:70 plugins/display/top_visitors.py:72 +#: iwla.py:501 +msgid "Time analysis" +msgstr "Durée de l'analyse" + +#: iwla.py:503 +msgid "hours" +msgstr "heures " + +#: iwla.py:504 +msgid "minutes" +msgstr "minutes" + +#: iwla.py:504 +msgid "seconds" +msgstr "secondes" + +#: plugins/display/all_visits.py:70 plugins/display/feeds.py:75 +#: plugins/display/top_visitors.py:72 msgid "Host" msgstr "Hôte" @@ -190,6 +217,103 @@ msgstr "Toutes les visites" msgid "Top visitors" msgstr "Top visiteurs" +#: plugins/display/browsers.py:79 +msgid "Browsers" +msgstr "Navigateurs" + +#: plugins/display/browsers.py:79 plugins/display/browsers.py:113 +msgid "Browser" +msgstr "Navigateur" + +#: plugins/display/browsers.py:79 plugins/display/browsers.py:113 +#: plugins/display/operating_systems.py:78 +#: plugins/display/operating_systems.py:95 plugins/display/top_hits.py:71 +#: plugins/display/top_hits.py:97 plugins/display/top_pages.py:71 +#: plugins/display/top_pages.py:96 +msgid "Entrance" +msgstr "Entrées" + +#: plugins/display/browsers.py:98 plugins/display/browsers.py:128 +#: plugins/display/referers.py:110 plugins/display/referers.py:125 +#: plugins/display/referers.py:140 plugins/display/referers.py:163 +#: plugins/display/referers.py:174 plugins/display/referers.py:185 +#: plugins/display/referers.py:222 plugins/display/top_downloads.py:83 +#: plugins/display/top_downloads.py:103 plugins/display/top_hits.py:82 +#: plugins/display/top_hits.py:103 plugins/display/top_pages.py:82 +#: plugins/display/top_pages.py:102 plugins/display/top_visitors.py:92 +msgid "Others" +msgstr "Autres" + +#: plugins/display/browsers.py:107 +msgid "All Browsers" +msgstr "Tous les navigateurs" + +#: plugins/display/feeds.py:69 +msgid "All Feeds parsers" +msgstr "Tous les agrégateurs" + +#: plugins/display/feeds.py:75 +msgid "All feeds parsers" +msgstr "Tous les agrégateurs" + +#: plugins/display/feeds.py:91 +msgid "Merged feeds parsers" +msgstr "Agrégateurs fusionnés" + +#: plugins/display/feeds.py:96 +msgid "Feeds parsers" +msgstr "Agrégateurs" + +#: plugins/display/feeds.py:103 +msgid "Found" +msgstr "Trouvé" + +#: plugins/display/hours_stats.py:72 +msgid "Fri" +msgstr "Jeu" + +#: plugins/display/hours_stats.py:72 +msgid "Mon" +msgstr "Lun" + +#: plugins/display/hours_stats.py:72 +msgid "Sat" +msgstr "Sam" + +#: plugins/display/hours_stats.py:72 +msgid "Sun" +msgstr "Dim" + +#: plugins/display/hours_stats.py:72 +msgid "Thu" +msgstr "Jeu" + +#: plugins/display/hours_stats.py:72 +msgid "Tue" +msgstr "Mar" + +#: plugins/display/hours_stats.py:72 +msgid "Wed" +msgstr "Mer" + +#: plugins/display/hours_stats.py:81 +msgid "By Hours" +msgstr "Par heures" + +#: plugins/display/hours_stats.py:83 +msgid "Hours" +msgstr "Heures" + +#: plugins/display/operating_systems.py:78 +#: plugins/display/operating_systems.py:88 +msgid "Operating Systems" +msgstr "Systèmes d'exploitation" + +#: plugins/display/operating_systems.py:78 +#: plugins/display/operating_systems.py:95 +msgid "Operating System" +msgstr "Système d'exploitation" + #: plugins/display/referers.py:95 msgid "Connexion from" msgstr "Connexion depuis" @@ -202,16 +326,6 @@ msgstr "Origine" msgid "Search Engine" msgstr "Moteur de recherche" -#: plugins/display/referers.py:110 plugins/display/referers.py:125 -#: plugins/display/referers.py:140 plugins/display/referers.py:163 -#: plugins/display/referers.py:174 plugins/display/referers.py:185 -#: plugins/display/referers.py:222 plugins/display/top_downloads.py:83 -#: plugins/display/top_downloads.py:103 plugins/display/top_hits.py:82 -#: plugins/display/top_hits.py:103 plugins/display/top_pages.py:82 -#: plugins/display/top_pages.py:102 plugins/display/top_visitors.py:92 -msgid "Others" -msgstr "Autres" - #: plugins/display/referers.py:114 plugins/display/referers.py:167 msgid "External URL" msgstr "URL externe" @@ -228,9 +342,13 @@ msgstr "Top Origines" msgid "All Referers" msgstr "Toutes les origines" -#: plugins/display/referers.py:200 plugins/display/referers.py:210 -msgid "Top key phrases" -msgstr "Top phrases clé" +#: plugins/display/referers.py:193 +msgid "All Key Phrases" +msgstr "Toutes les phrases clé" + +#: plugins/display/referers.py:200 +msgid "Key phrases" +msgstr "Phrases clé" #: plugins/display/referers.py:200 plugins/display/referers.py:216 msgid "Key phrase" @@ -240,6 +358,10 @@ msgstr "Phrase clé" msgid "Search" msgstr "Recherche" +#: plugins/display/referers.py:210 +msgid "Top key phrases" +msgstr "Top phrases clé" + #: plugins/display/referers.py:212 msgid "All key phrases" msgstr "Toutes les phrases clé" @@ -266,11 +388,6 @@ msgstr "Top Téléchargements" msgid "All Hits" msgstr "Tous les hits" -#: plugins/display/top_hits.py:71 plugins/display/top_hits.py:97 -#: plugins/display/top_pages.py:71 plugins/display/top_pages.py:96 -msgid "Entrance" -msgstr "Entrées" - #: plugins/display/top_pages.py:71 plugins/display/top_pages.py:90 msgid "All Pages" msgstr "Toutes les pages" @@ -279,5 +396,20 @@ msgstr "Toutes les pages" msgid "Top Pages" msgstr "Top Pages" +#: plugins/display/track_users.py:77 plugins/display/track_users.py:106 +msgid "Tracked users" +msgstr "Utilisateurs traqués" + +#: plugins/display/track_users.py:77 plugins/display/track_users.py:113 +msgid "Last Access" +msgstr "Dernière visite" + +#: plugins/display/track_users.py:113 +msgid "IP" +msgstr "IP" + +#~ msgid "Page" +#~ msgstr "Page" + #~ msgid "Key Phrases" #~ msgstr "Phrases clé" diff --git a/plugins/display/browsers.py b/plugins/display/browsers.py new file mode 100644 index 0000000..3d1202d --- /dev/null +++ b/plugins/display/browsers.py @@ -0,0 +1,132 @@ +# -*- coding: utf-8 -*- +# +# Copyright Grégory Soutadé 2015 + +# This file is part of iwla + +# iwla is free software: you can redistribute it and/or modify +# it under the terms of the GNU General Public License as published by +# the Free Software Foundation, either version 3 of the License, or +# (at your option) any later version. +# +# iwla is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU General Public License for more details. +# +# You should have received a copy of the GNU General Public License +# along with iwla. If not, see . +# + +from iwla import IWLA +from iplugin import IPlugin +from display import * + +import awstats_data + +""" +Display hook + +Create browsers page + +Plugin requirements : + post_analysis/browsers + +Conf values needed : + max_browsers_displayed* + create_browsers_page* + +Output files : + OUTPUT_ROOT/year/month/browsers.html + OUTPUT_ROOT/year/month/index.html + +Statistics creation : + None + +Statistics update : + None + +Statistics deletion : + None +""" + +class IWLADisplayBrowsers(IPlugin): + def __init__(self, iwla): + super(IWLADisplayBrowsers, self).__init__(iwla) + self.API_VERSION = 1 + self.requires = ['IWLAPostAnalysisBrowsers'] + + def load(self): + self.icon_path = self.iwla.getConfValue('icon_path', '/') + self.max_browsers = self.iwla.getConfValue('max_browsers_displayed', 0) + self.create_browsers = self.iwla.getConfValue('create_browsers_page', True) + self.icon_names = {v:k for (k, v) in awstats_data.browsers_hashid.items()} + + return True + + def hook(self): + display = self.iwla.getDisplay() + browsers = self.iwla.getMonthStats()['browsers'] + browsers = sorted(browsers.items(), key=lambda t: t[1], reverse=True) + + # All in a file + if self.create_browsers: + title = createCurTitle(self.iwla, u'Browsers') + filename = 'browsers.html' + path = self.iwla.getCurDisplayPath(filename) + + page = display.createPage(title, path, self.iwla.getConfValue('css_path', [])) + table = display.createBlock(DisplayHTMLBlockTable, self.iwla._(u'Browsers'), ['', self.iwla._(u'Browser'), self.iwla._(u'Entrance')]) + table.setColsCSSClass(['', '', 'iwla_hit']) + total_browsers = [0]*3 + new_list = self.max_browsers and browsers[:self.max_browsers] or browsers + for (browser, entrance) in new_list: + if browser != 'unknown': + try: + icon = '' % (self.icon_path, awstats_data.browsers_icons[self.icon_names[browser]]) + except: + icon = '' % (self.icon_path) + else: + icon = '' % (self.icon_path) + browser = 'Unknown' + table.appendRow([icon, browser, entrance]) + total_browsers[2] += entrance + if self.max_browsers: + others = 0 + for (browser, entrance) in browsers[self.max_browsers:]: + others += entrance + table.appendRow(['', self.iwla._(u'Others'), others]) + table.setCellCSSClass(table.getNbRows()-1, 0, 'iwla_others') + + page.appendBlock(table) + + display.addPage(page) + + title = self.iwla._(u'Top Browsers') + if self.create_browsers: + link = '%s' % (filename, self.iwla._(u'All Browsers')) + title = '%s - %s' % (title, link) + + # Top in index + index = self.iwla.getDisplayIndex() + + table = display.createBlock(DisplayHTMLBlockTable, title, ['', self.iwla._(u'Browser'), self.iwla._(u'Entrance')]) + table.setColsCSSClass(['', '', 'iwla_hit']) + for (browser, entrance) in browsers[:10]: + if browser != 'unknown': + try: + icon = '' % (self.icon_path, awstats_data.browsers_icons[self.icon_names[browser]]) + except: + icon = '' % (self.icon_path) + else: + icon = '' % (self.icon_path) + browser = self.iwla._(u'Unknown') + table.appendRow([icon, browser, entrance]) + total_browsers[2] -= entrance + if total_browsers[2]: + total_browsers[0] = u'' + total_browsers[1] = self.iwla._(u'Others') + table.appendRow(total_browsers) + table.setCellCSSClass(table.getNbRows()-1, 0, 'iwla_others') + table.computeRatio(2) + index.appendBlock(table) diff --git a/plugins/display/feeds.py b/plugins/display/feeds.py new file mode 100644 index 0000000..bcd7194 --- /dev/null +++ b/plugins/display/feeds.py @@ -0,0 +1,106 @@ +# -*- coding: utf-8 -*- +# +# Copyright Grégory Soutadé 2015 + +# This file is part of iwla + +# iwla is free software: you can redistribute it and/or modify +# it under the terms of the GNU General Public License as published by +# the Free Software Foundation, either version 3 of the License, or +# (at your option) any later version. +# +# iwla is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU General Public License for more details. +# +# You should have received a copy of the GNU General Public License +# along with iwla. If not, see . +# + +from iwla import IWLA +from iplugin import IPlugin +from display import * + +""" +Display hook + +Display feeds parsers + +Plugin requirements : + post_analysis/feeds + +Conf values needed : + create_all_feeds_page* + +Output files : + OUTPUT_ROOT/year/month/index.html + OUTPUT_ROOT/year/month/all_feeds.html + +Statistics creation : + None + +Statistics update : + None + +Statistics deletion : + None +""" + +class IWLADisplayFeeds(IPlugin): + def __init__(self, iwla): + super(IWLADisplayFeeds, self).__init__(iwla) + self.API_VERSION = 1 + self.requires = ['IWLAPostAnalysisFeeds'] + + def load(self): + self.create_all_feeds_page = self.iwla.getConfValue('create_all_feeds_page', True) + + return True + + def hook(self): + from plugins.post_analysis.feeds import IWLAPostAnalysisFeeds + + display = self.iwla.getDisplay() + hits = self.iwla.getCurrentVisists() + nb_feeds_parsers = 0 + + # All in a page + if self.create_all_feeds_page: + title = createCurTitle(self.iwla, self.iwla._(u'All Feeds parsers')) + filename = 'all_feeds.html' + path = self.iwla.getCurDisplayPath(filename) + display_visitor_ip = self.iwla.getConfValue('display_visitor_ip', False) + + page = display.createPage(title, path, self.iwla.getConfValue('css_path', [])) + table = display.createBlock(DisplayHTMLBlockTable, self.iwla._(u'All feeds parsers'), [self.iwla._(u'Host'), self.iwla._(u'Pages'), self.iwla._(u'Hits')]) + table.setColsCSSClass(['', 'iwla_page', 'iwla_hit']) + for super_hit in hits.values(): + if not super_hit['feed_parser']: continue + nb_feeds_parsers += 1 + address = super_hit['remote_addr'] + if display_visitor_ip and\ + super_hit.get('dns_name_replaced', False): + address = '%s [%s]' % (address, super_hit['remote_ip']) + if super_hit['feed_parser'] == IWLAPostAnalysisFeeds.MERGED_FEED_PARSER: + address += '*' + if super_hit['robot']: + table.appendRow([address, super_hit['not_viewed_pages'], super_hit['not_viewed_hits']]) + else: + table.appendRow([address, super_hit['viewed_pages'], super_hit['viewed_hits']]) + page.appendBlock(table) + note = DisplayHTMLRaw(self.iwla, ('*%s' % (self.iwla._(u'Merged feeds parsers')))) + page.appendBlock(note) + display.addPage(page) + + # Found in index + title = self.iwla._(u'Feeds parsers') + if self.create_all_feeds_page: + link = '%s' % (filename, self.iwla._(u'Details')) + title = '%s - %s' % (title, link) + + index = self.iwla.getDisplayIndex() + + table = display.createBlock(DisplayHTMLBlockTable, title, [self.iwla._(u'Found')]) + table.appendRow([nb_feeds_parsers]) + index.appendBlock(table) diff --git a/plugins/display/hours_stats.py b/plugins/display/hours_stats.py new file mode 100644 index 0000000..68dfaac --- /dev/null +++ b/plugins/display/hours_stats.py @@ -0,0 +1,88 @@ +# -*- coding: utf-8 -*- +# +# Copyright Grégory Soutadé 2015 + +# This file is part of iwla + +# iwla is free software: you can redistribute it and/or modify +# it under the terms of the GNU General Public License as published by +# the Free Software Foundation, either version 3 of the License, or +# (at your option) any later version. +# +# iwla is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU General Public License for more details. +# +# You should have received a copy of the GNU General Public License +# along with iwla. If not, see . +# + +from iwla import IWLA +from iplugin import IPlugin +from display import * + +""" +Display hook + +Display statistics by hour/week day + +Plugin requirements : + post_analysis/hours_stats + +Conf values needed : + None + +Output files : + OUTPUT_ROOT/year/month/index.html + +Statistics creation : + None + +Statistics update : + None + +Statistics deletion : + None +""" + +class IWLADisplayHoursStats(IPlugin): + def __init__(self, iwla): + super(IWLADisplayHoursStats, self).__init__(iwla) + self.API_VERSION = 1 + self.requires = ['IWLAPostAnalysisHoursStats'] + + def hook(self): + display = self.iwla.getDisplay() + month_stats = self.iwla.getMonthStats() + + hours_stats = month_stats.get('hours_stats', {}) + if not hours_stats: + for i in range(0, 24): + hours_stats[i] = {'pages':0, 'hits':0, 'bandwidth':0} + days_stats = month_stats.get('days_stats', {}) + if not days_stats: + for i in range(0, 7): + days_stats[i] = {'pages':0, 'hits':0, 'bandwidth':0} + + index = self.iwla.getDisplayIndex() + + # By Day + title = self.iwla._(u'By day') + days = [self.iwla._('Mon'), self.iwla._('Tue'), self.iwla._('Wed'), self.iwla._('Thu'), self.iwla._('Fri'), self.iwla._('Sat'), self.iwla._('Sun')] + table = display.createBlock(DisplayHTMLBlockTableWithGraph, title, [self.iwla._('Day'), self.iwla._('Pages'), self.iwla._('Hits'), self.iwla._('Bandwidth')], days, 7, range(1,4)) + table.setColsCSSClass(['', 'iwla_page', 'iwla_hit', 'iwla_bandwidth']) + for i in range(0,7): + table.appendRow([days[i], days_stats[i]['pages'], days_stats[i]['hits'], days_stats[i]['bandwidth']]) + table.setCellValue(i, 3, bytesToStr(days_stats[i]['bandwidth'])) + index.appendBlock(table) + + # By Hours + title = self.iwla._(u'By Hours') + hours = ['%02d' % i for i in range(0, 24)] + table = display.createBlock(DisplayHTMLBlockTableWithGraph, title, [self.iwla._('Hours'), self.iwla._('Pages'), self.iwla._('Hits'), self.iwla._('Bandwidth')], hours, 24, range(1,4)) + table.setColsCSSClass(['', 'iwla_page', 'iwla_hit', 'iwla_bandwidth']) + for i in range(0,24): + table.appendRow([hours[i], hours_stats[i]['pages'], hours_stats[i]['hits'], hours_stats[i]['bandwidth']]) + table.setCellValue(i, 3, bytesToStr(hours_stats[i]['bandwidth'])) + index.appendBlock(table) diff --git a/plugins/display/istats_diff.py b/plugins/display/istats_diff.py index 897d2c4..c5d50ca 100644 --- a/plugins/display/istats_diff.py +++ b/plugins/display/istats_diff.py @@ -24,7 +24,7 @@ from display import * import logging """ -Display hook itnerface +Display hook interface Enlight new and updated statistics @@ -73,7 +73,9 @@ class IWLADisplayStatsDiff(IPlugin): path = self.iwla.getCurDisplayPath(self.filename) page = display.getPage(path) - if not page: return + if not page: + self.logger.error('No page for %s' % (path)) + return title = self.iwla._(self.block_name) block = page.getBlock(title) if not block: @@ -94,5 +96,6 @@ class IWLADisplayStatsDiff(IPlugin): stats_diff[k] = 'iwla_new' for (idx, row) in enumerate(block.rows): - if row[0] in stats_diff.keys(): - block.setCellCSSClass(idx, 0, stats_diff[row[0]]) + for k in stats_diff.keys(): + if k in row[0]: + block.setCellCSSClass(idx, 0, stats_diff[k]) diff --git a/plugins/display/operating_systems.py b/plugins/display/operating_systems.py new file mode 100644 index 0000000..0b4324f --- /dev/null +++ b/plugins/display/operating_systems.py @@ -0,0 +1,101 @@ +# -*- coding: utf-8 -*- +# +# Copyright Grégory Soutadé 2015 + +# This file is part of iwla + +# iwla is free software: you can redistribute it and/or modify +# it under the terms of the GNU General Public License as published by +# the Free Software Foundation, either version 3 of the License, or +# (at your option) any later version. +# +# iwla is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU General Public License for more details. +# +# You should have received a copy of the GNU General Public License +# along with iwla. If not, see . +# + +from iwla import IWLA +from iplugin import IPlugin +from display import * + +import awstats_data + +""" +Display hook + +Add operating systems statistics + +Plugin requirements : + post_analysis/operating_systems + +Conf values needed : + create_families_page* + +Output files : + OUTPUT_ROOT/year/month/index.html + +Statistics creation : + None + +Statistics update : + None + +Statistics deletion : + None +""" + +class IWLADisplayTopOperatingSystems(IPlugin): + def __init__(self, iwla): + super(IWLADisplayTopOperatingSystems, self).__init__(iwla) + self.API_VERSION = 1 + self.requires = ['IWLAPostAnalysisOperatingSystems'] + + def load(self): + self.icon_path = self.iwla.getConfValue('icon_path', '/') + self.create_families_page = self.iwla.getConfValue('create_families_page_page', True) + self.icon_names = {v:k for (k, v) in awstats_data.operating_systems_family.items()} + + return True + + def hook(self): + display = self.iwla.getDisplay() + os_families = self.iwla.getMonthStats()['os_families'] + os_families = sorted(os_families.items(), key=lambda t: t[1], reverse=True) + operating_systems = self.iwla.getMonthStats()['operating_systems'] + operating_systems = sorted(operating_systems.items(), key=lambda t: t[1], reverse=True) + + # All in a page + if self.create_families_page: + title = createCurTitle(self.iwla, u'All Operating Systems') + filename = 'operating_systems.html' + path = self.iwla.getCurDisplayPath(filename) + + page = display.createPage(title, path, self.iwla.getConfValue('css_path', [])) + table = display.createBlock(DisplayHTMLBlockTable, self.iwla._(u'Operating Systems'), ['', self.iwla._(u'Operating System'), self.iwla._(u'Entrance')]) + table.setColsCSSClass(['', '', 'iwla_hit']) + for (os_name, entrance) in operating_systems: + icon = '' % (self.icon_path, os_name) + table.appendRow([icon, os_name, entrance]) + page.appendBlock(table) + + display.addPage(page) + + # Families in index + title = self.iwla._(u'Operating Systems') + if self.create_families_page: + link = '%s' % (filename, self.iwla._(u'Details')) + title = '%s - %s' % (title, link) + + index = self.iwla.getDisplayIndex() + + table = display.createBlock(DisplayHTMLBlockTable, title, ['', self.iwla._(u'Operating System'), self.iwla._(u'Entrance')]) + table.setColsCSSClass(['', '', 'iwla_hit']) + for (family, entrance) in os_families: + icon = '' % (self.icon_path, self.icon_names[family]) + table.appendRow([icon, family, entrance]) + table.computeRatio(2) + index.appendBlock(table) diff --git a/plugins/display/referers.py b/plugins/display/referers.py index bdd04a5..1c0b52d 100644 --- a/plugins/display/referers.py +++ b/plugins/display/referers.py @@ -190,14 +190,14 @@ class IWLADisplayReferers(IPlugin): # All key phrases in a file if self.create_all_key_phrases: - title = createCurTitle(self.iwla, u'Key Phrases') + title = createCurTitle(self.iwla, self.iwla._(u'All Key Phrases')) filename = 'key_phrases.html' path = self.iwla.getCurDisplayPath(filename) total_search = [0]*2 page = display.createPage(title, path, self.iwla.getConfValue('css_path', [])) - table = display.createBlock(DisplayHTMLBlockTable, self.iwla._(u'Top key phrases'), [self.iwla._(u'Key phrase'), self.iwla._(u'Search')]) + table = display.createBlock(DisplayHTMLBlockTable, self.iwla._(u'Key phrases'), [self.iwla._(u'Key phrase'), self.iwla._(u'Search')]) table.setColsCSSClass(['', 'iwla_search']) new_list = self.max_key_phrases and top_key_phrases[:self.max_key_phrases] or top_key_phrases for phrase in new_list: diff --git a/plugins/display/referers_diff.py b/plugins/display/referers_diff.py index 8e63ecb..2ff505a 100644 --- a/plugins/display/referers_diff.py +++ b/plugins/display/referers_diff.py @@ -53,7 +53,7 @@ class IWLADisplayReferersDiff(IWLADisplayStatsDiff): self.requires = ['IWLADisplayReferers'] self.month_stats_key = 'key_phrases' self.filename = 'key_phrases.html' - self.block_name = u'Key phrases' + self.block_name = self.iwla._(u'Key phrases') def load(self): if not self.iwla.getConfValue('create_all_key_phrases_page', True): diff --git a/plugins/display/top_downloads_diff.py b/plugins/display/top_downloads_diff.py new file mode 100644 index 0000000..85b927c --- /dev/null +++ b/plugins/display/top_downloads_diff.py @@ -0,0 +1,61 @@ +# -*- coding: utf-8 -*- +# +# Copyright Grégory Soutadé 2015 + +# This file is part of iwla + +# iwla is free software: you can redistribute it and/or modify +# it under the terms of the GNU General Public License as published by +# the Free Software Foundation, either version 3 of the License, or +# (at your option) any later version. +# +# iwla is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU General Public License for more details. +# +# You should have received a copy of the GNU General Public License +# along with iwla. If not, see . +# + +from iwla import IWLA +from istats_diff import IWLADisplayStatsDiff +from display import * + +""" +Display hook + +Enlight new and updated downloads in in top_downloads.html + +Plugin requirements : + display/top_downloads + +Conf values needed : + None + +Output files : + None + +Statistics creation : + None + +Statistics update : + None + +Statistics deletion : + None +""" + +class IWLADisplayTopDownloadsDiff(IWLADisplayStatsDiff): + def __init__(self, iwla): + super(IWLADisplayTopDownloadsDiff, self).__init__(iwla) + self.API_VERSION = 1 + self.requires = ['IWLADisplayTopDownloads'] + self.month_stats_key = u'top_downloads' + self.filename = u'top_downloads.html' + self.block_name = self.iwla._(u'All Downloads') + + def load(self): + if not self.iwla.getConfValue('create_all_downloads_page', True): + return False + return super(IWLADisplayTopDownloadsDiff, self).load() diff --git a/plugins/display/track_users.py b/plugins/display/track_users.py new file mode 100644 index 0000000..53b7b9d --- /dev/null +++ b/plugins/display/track_users.py @@ -0,0 +1,122 @@ +# -*- coding: utf-8 -*- +# +# Copyright Grégory Soutadé 2015 + +# This file is part of iwla + +# iwla is free software: you can redistribute it and/or modify +# it under the terms of the GNU General Public License as published by +# the Free Software Foundation, either version 3 of the License, or +# (at your option) any later version. +# +# iwla is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU General Public License for more details. +# +# You should have received a copy of the GNU General Public License +# along with iwla. If not, see . +# + +from iwla import IWLA +from iplugin import IPlugin +from display import * + +import awstats_data + +""" +Display hook + +Track users + +Plugin requirements : + None + +Conf values needed : + tracked_ip + create_tracked_page* + +Output files : + OUTPUT_ROOT/year/month/index.html + OUTPUT_ROOT/year/month/tracked_users.html + +Statistics creation : + None + +Statistics update : + None + +Statistics deletion : + None +""" + +class IWLADisplayTrackUsers(IPlugin): + def __init__(self, iwla): + super(IWLADisplayTrackUsers, self).__init__(iwla) + self.API_VERSION = 1 + self.conf_requires = ['tracked_ip'] + + def load(self): + self.create_tracked_page = self.iwla.getConfValue('create_tracked_page', True) + self.tracked_ip = self.iwla.getConfValue('tracked_ip', []) + + return True + + def hook(self): + display = self.iwla.getDisplay() + hits = self.iwla.getCurrentVisists() + stats = {} + + # All in a page + if self.create_tracked_page: + title = createCurTitle(self.iwla, u'Tracked users') + filename = 'tracked_users.html' + path = self.iwla.getCurDisplayPath(filename) + + page = display.createPage(title, path, self.iwla.getConfValue('css_path', [])) + table = display.createBlock(DisplayHTMLBlockTable, self.iwla._(u'Tracked users'), [self.iwla._(u'Pages'), self.iwla._(u'Last Access')]) + table.setColsCSSClass(['iwla_page', '']) + for ip in self.tracked_ip: + if not ip in hits.keys(): continue + if 'dns_name_replaced' in hits[ip].keys(): + ip_title = '%s [%s]' % (hits[ip]['remote_addr'], ip) + else: + ip_title = '%s' % (ip) + table.appendRow([ip_title, '']) + nb_hits = 0 + nb_pages = 0 + for r in hits[ip]['requests'][::-1]: + uri = r['extract_request']['extract_uri'].lower() + if not self.iwla.hasBeenViewed(r): continue + if not self.iwla.isPage(uri) or\ + self.iwla.isMultimediaFile(uri): + nb_hits += 1 + continue + + nb_pages += 1 + uri = "%s%s" % (r.get('server_name', ''), + r['extract_request']['extract_uri']) + table.appendRow([generateHTMLLink(uri), time.asctime(r['time_decoded'])]) + stats[ip] = (nb_pages, nb_hits) + page.appendBlock(table) + + display.addPage(page) + + # Last access in index + title = self.iwla._(u'Tracked users') + if self.create_tracked_page: + link = '%s' % (filename, self.iwla._(u'Details')) + title = '%s - %s' % (title, link) + + index = self.iwla.getDisplayIndex() + + table = display.createBlock(DisplayHTMLBlockTable, title, [self.iwla._(u'IP'), self.iwla._(u'Last Access'), self.iwla._(u'Pages'), self.iwla._(u'Hits')]) + table.setColsCSSClass(['', '', 'iwla_page', 'iwla_hit']) + for ip in self.tracked_ip: + if not ip in hits.keys(): continue + if 'dns_name_replaced' in hits[ip].keys(): + ip_title = '%s [%s]' % (hits[ip]['remote_addr'], ip) + else: + ip_title = ip + table.appendRow([ip_title, time.asctime(hits[ip]['last_access']), stats[ip][0], stats[ip][1]]) + index.appendBlock(table) diff --git a/plugins/post_analysis/browsers.py b/plugins/post_analysis/browsers.py new file mode 100644 index 0000000..2bb7b37 --- /dev/null +++ b/plugins/post_analysis/browsers.py @@ -0,0 +1,103 @@ +# -*- coding: utf-8 -*- +# +# Copyright Grégory Soutadé 2015 + +# This file is part of iwla + +# iwla is free software: you can redistribute it and/or modify +# it under the terms of the GNU General Public License as published by +# the Free Software Foundation, either version 3 of the License, or +# (at your option) any later version. +# +# iwla is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU General Public License for more details. +# +# You should have received a copy of the GNU General Public License +# along with iwla. If not, see . +# + +import re + +from iwla import IWLA +from iplugin import IPlugin + +import awstats_data + +""" +Post analysis hook + +Detect browser information from requests + +Plugin requirements : + None + +Conf values needed : + None + +Output files : + None + +Statistics creation : +visits : + remote_addr => + browser + +month_stats : + browsers => + browser => count + +Statistics update : + None + +Statistics deletion : + None +""" + +class IWLAPostAnalysisBrowsers(IPlugin): + def __init__(self, iwla): + super(IWLAPostAnalysisBrowsers, self).__init__(iwla) + self.API_VERSION = 1 + + def load(self): + self.browsers = [] + + for hashid in awstats_data.browsers: + hashid_re = re.compile(r'.*%s.*' % (hashid), re.IGNORECASE) + + if hashid in awstats_data.browsers_hashid.keys(): + self.browsers.append((hashid_re, awstats_data.browsers_hashid[hashid])) + + return True + + def hook(self): + stats = self.iwla.getValidVisitors() + month_stats = self.iwla.getMonthStats() + + browsers = month_stats.get('browsers', {}) + + browsers_stats = {} + + for (k, super_hit) in stats.items(): + if not 'browser' in super_hit: + for r in super_hit['requests'][::-1]: + user_agent = r['http_user_agent'] + if not user_agent: continue + + browser_name = 'unknown' + for (hashid_re, browser) in self.browsers: + if hashid_re.match(user_agent): + browser_name = browser + break + super_hit['browser'] = browser_name + break + else: + browser_name = super_hit['browser'] + + if not browser_name in browsers_stats.keys(): + browsers_stats[browser_name] = 1 + else: + browsers_stats[browser_name] += 1 + + month_stats['browsers'] = browsers_stats diff --git a/plugins/post_analysis/feeds.py b/plugins/post_analysis/feeds.py new file mode 100644 index 0000000..8476881 --- /dev/null +++ b/plugins/post_analysis/feeds.py @@ -0,0 +1,112 @@ +# -*- coding: utf-8 -*- +# +# Copyright Grégory Soutadé 2015 + +# This file is part of iwla + +# iwla is free software: you can redistribute it and/or modify +# it under the terms of the GNU General Public License as published by +# the Free Software Foundation, either version 3 of the License, or +# (at your option) any later version. +# +# iwla is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU General Public License for more details. +# +# You should have received a copy of the GNU General Public License +# along with iwla. If not, see . +# + +import re + +from iwla import IWLA +from iplugin import IPlugin + +""" +Post analysis hook + +Find feeds parsers (first hit in feeds conf value and no viewed pages if it's a robot) +If there is ony one hit per day to a feed, merge feeds parsers with the same user agent +as it must be the same person with a different IP address. + +Plugin requirements : + None + +Conf values needed : + feeds + merge_one_hit_only_feeds_parsers* + +Output files : + None + +Statistics creation : + remote_addr => + feed_parser + +Statistics update : + None + +Statistics deletion : + None +""" + +class IWLAPostAnalysisFeeds(IPlugin): + NOT_A_FEED_PARSER = 0 + FEED_PARSER = 1 + MERGED_FEED_PARSER = 2 + + def __init__(self, iwla): + super(IWLAPostAnalysisFeeds, self).__init__(iwla) + self.API_VERSION = 1 + self.conf_requires = ['feeds'] + + def load(self): + feeds = self.iwla.getConfValue('feeds', None) + self.merge_one_hit_only_feeds_parsers = self.iwla.getConfValue('merge_one_hit_only_feeds_parsers', True) + + if feeds is None: return False + + self.feeds_re = [] + for f in feeds: + self.feeds_re.append(re.compile(r'.*%s.*' % (f))) + + return True + + def mergeOneHitOnlyFeedsParsers(self, isFeedParser, one_hit_only, hit): + if isFeedParser and (hit['viewed_hits'] + hit['not_viewed_hits']) == 1: + user_agent = hit['requests'][0]['http_user_agent'].lower() + if one_hit_only.get(user_agent, None) is None: + # Merged + isFeedParser = self.MERGED_FEED_PARSER + one_hit_only[user_agent] = (hit) + else: + isFeedParser = self.NOT_A_FEED_PARSER + hit['feed_parser'] = isFeedParser + + def hook(self): + hits = self.iwla.getCurrentVisists() + one_hit_only = {} + for hit in hits.values(): + isFeedParser = hit.get('feed_parser', None) + + if isFeedParser == self.FEED_PARSER and\ + self.merge_one_hit_only_feeds_parsers: + self.mergeOneHitOnlyFeedsParsers(isFeedParser, one_hit_only, hit) + + if not isFeedParser is None: continue + + isFeedParser = self.NOT_A_FEED_PARSER + uri = hit['requests'][0]['extract_request']['extract_uri'].lower() + for regexp in self.feeds_re: + if regexp.match(uri): + isFeedParser = self.FEED_PARSER + # Robot that views pages -> bot + if hit['robot']: + if hit['viewed_pages']: + isFeedParser = self.NOT_A_FEED_PARSER + break + if self.merge_one_hit_only_feeds_parsers: + self.mergeOneHitOnlyFeedsParsers(isFeedParser, one_hit_only, hit) + else: + hit['feed_parser'] = isFeedParser diff --git a/plugins/post_analysis/hours_stats.py b/plugins/post_analysis/hours_stats.py new file mode 100644 index 0000000..a45dbdd --- /dev/null +++ b/plugins/post_analysis/hours_stats.py @@ -0,0 +1,96 @@ +# -*- coding: utf-8 -*- +# +# Copyright Grégory Soutadé 2015 + +# This file is part of iwla + +# iwla is free software: you can redistribute it and/or modify +# it under the terms of the GNU General Public License as published by +# the Free Software Foundation, either version 3 of the License, or +# (at your option) any later version. +# +# iwla is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU General Public License for more details. +# +# You should have received a copy of the GNU General Public License +# along with iwla. If not, see . +# + +from iwla import IWLA +from iplugin import IPlugin + +""" +Post analysis hook + +Count pages, hits and bandwidth by hour/week day + +Plugin requirements : + None + +Conf values needed : + None + +Output files : + None + +Statistics creation : +month_stats: + hours_stats => + 00 .. 23 => + pages + hits + bandwidth + + days_stats => + 0 .. 6 => + pages + hits + bandwidth + +Statistics update : + None + +Statistics deletion : + None +""" + +class IWLAPostAnalysisHoursStats(IPlugin): + def __init__(self, iwla): + super(IWLAPostAnalysisHoursStats, self).__init__(iwla) + self.API_VERSION = 1 + + def hook(self): + stats = self.iwla.getCurrentVisists() + month_stats = self.iwla.getMonthStats() + + hours_stats = month_stats.get('hours_stats', {}) + if not hours_stats: + for i in range(0, 24): + hours_stats[i] = {'pages':0, 'hits':0, 'bandwidth':0} + days_stats = month_stats.get('days_stats', {}) + if not days_stats: + for i in range(0, 7): + days_stats[i] = {'pages':0, 'hits':0, 'bandwidth':0} + + for super_hit in stats.values(): + if super_hit['robot']: continue + for r in super_hit['requests'][::-1]: + if not self.iwla.isValidForCurrentAnalysis(r): + break + + if not self.iwla.hasBeenViewed(r): continue + + key = r['is_page'] and 'pages' or 'hits' + + t = r['time_decoded'] + + hours_stats[t.tm_hour][key] += 1 + hours_stats[t.tm_hour]['bandwidth'] += int(r['body_bytes_sent']) + + days_stats[t.tm_wday][key] += 1 + days_stats[t.tm_wday]['bandwidth'] += int(r['body_bytes_sent']) + + month_stats['hours_stats'] = hours_stats + month_stats['days_stats'] = days_stats diff --git a/plugins/post_analysis/operating_systems.py b/plugins/post_analysis/operating_systems.py new file mode 100644 index 0000000..4fc1f73 --- /dev/null +++ b/plugins/post_analysis/operating_systems.py @@ -0,0 +1,126 @@ +# -*- coding: utf-8 -*- +# +# Copyright Grégory Soutadé 2015 + +# This file is part of iwla + +# iwla is free software: you can redistribute it and/or modify +# it under the terms of the GNU General Public License as published by +# the Free Software Foundation, either version 3 of the License, or +# (at your option) any later version. +# +# iwla is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU General Public License for more details. +# +# You should have received a copy of the GNU General Public License +# along with iwla. If not, see . +# + +import re + +from iwla import IWLA +from iplugin import IPlugin + +import awstats_data + +""" +Post analysis hook + +Detect operating systems from requests + +Plugin requirements : + None + +Conf values needed : + None + +Output files : + None + +Statistics creation : +visits : + remote_addr => + operating_system + +month_stats : + operating_systems => + operating_system => count + + os_families => + family => count + +Statistics update : + None + +Statistics deletion : + None +""" + +class IWLAPostAnalysisOperatingSystems(IPlugin): + def __init__(self, iwla): + super(IWLAPostAnalysisOperatingSystems, self).__init__(iwla) + self.API_VERSION = 1 + + def load(self): + self.operating_systems = [] + self.os_family = {} + + for hashid in awstats_data.operating_systems: + hashid_re = re.compile(r'.*%s.*' % (hashid), re.IGNORECASE) + + if hashid in awstats_data.operating_systems_hashid.keys(): + self.operating_systems.append((hashid_re, awstats_data.operating_systems_hashid[hashid])) + + for (name, family) in awstats_data.operating_systems_family.items(): + name_re = re.compile(r'.*%s.*' % (name)) + self.os_family[name_re] = family + + return True + + def hook(self): + stats = self.iwla.getValidVisitors() + month_stats = self.iwla.getMonthStats() + + operating_systems = month_stats.get('operating_systems', {}) + + os_stats = {} + family_stats = {} + + for (k, super_hit) in stats.items(): + if not 'operating_system' in super_hit: + for r in super_hit['requests'][::-1]: + user_agent = r['http_user_agent'] + if not user_agent: continue + + os_name = 'unknown' + for (hashid_re, operating_system) in self.operating_systems: + if hashid_re.match(user_agent): + os_name = operating_system + break + super_hit['operating_system'] = os_name + break + else: + os_name = super_hit['operating_system'] + + os_family = '' + if os_name != 'unknown': + for (name_re, family) in self.os_family.items(): + if name_re.match(os_name): + os_family = family + break + + if not os_name in os_stats.keys(): + os_stats[os_name] = 1 + else: + os_stats[os_name] += 1 + + if os_family: + if not os_family in family_stats.keys(): + family_stats[os_family] = 1 + else: + family_stats[os_family] += 1 + + month_stats['operating_systems'] = os_stats + month_stats['os_families'] = family_stats diff --git a/plugins/post_analysis/referers.py b/plugins/post_analysis/referers.py index 64ec66f..619963f 100644 --- a/plugins/post_analysis/referers.py +++ b/plugins/post_analysis/referers.py @@ -46,16 +46,16 @@ Statistics creation : Statistics update : month_stats : referers => - pages - hits + pages => count + hits => count robots_referers => - pages - hits + pages => count + hits => count search_engine_referers => - pages - hits + pages => count + hits => count key_phrases => - phrase + phrase => count Statistics deletion : None diff --git a/plugins/post_analysis/top_downloads.py b/plugins/post_analysis/top_downloads.py index 7b28c33..3d82e55 100644 --- a/plugins/post_analysis/top_downloads.py +++ b/plugins/post_analysis/top_downloads.py @@ -18,8 +18,6 @@ # along with iwla. If not, see . # -import re - from iwla import IWLA from iplugin import IPlugin @@ -43,7 +41,7 @@ Statistics creation : Statistics update : month_stats: top_downloads => - uri + uri => count Statistics deletion : None @@ -53,19 +51,14 @@ class IWLAPostAnalysisTopDownloads(IPlugin): def __init__(self, iwla): super(IWLAPostAnalysisTopDownloads, self).__init__(iwla) self.API_VERSION = 1 - self.conf_requires = ['multimedia_files', 'viewed_http_codes'] def hook(self): - stats = self.iwla.getCurrentVisists() + stats = self.iwla.getValidVisitors() month_stats = self.iwla.getMonthStats() - multimedia_files = self.iwla.getConfValue('multimedia_files') - viewed_http_codes = self.iwla.getConfValue('viewed_http_codes') - top_downloads = month_stats.get('top_downloads', {}) for (k, super_hit) in stats.items(): - if super_hit['robot']: continue for r in super_hit['requests'][::-1]: if not self.iwla.isValidForCurrentAnalysis(r): break @@ -75,13 +68,8 @@ class IWLAPostAnalysisTopDownloads(IPlugin): uri = r['extract_request']['extract_uri'].lower() - isMultimedia = False - for ext in multimedia_files: - if uri.endswith(ext): - isMultimedia = True - break - - if isMultimedia: continue + if self.iwla.isMultimediaFile(uri): + continue uri = "%s%s" % (r.get('server_name', ''), r['extract_request']['extract_uri']) diff --git a/plugins/post_analysis/top_hits.py b/plugins/post_analysis/top_hits.py index 64446c7..8006aa7 100644 --- a/plugins/post_analysis/top_hits.py +++ b/plugins/post_analysis/top_hits.py @@ -41,7 +41,7 @@ Statistics creation : Statistics update : month_stats: top_hits => - uri + uri => count Statistics deletion : None diff --git a/plugins/post_analysis/top_pages.py b/plugins/post_analysis/top_pages.py index a5d086c..37db81d 100644 --- a/plugins/post_analysis/top_pages.py +++ b/plugins/post_analysis/top_pages.py @@ -43,7 +43,7 @@ Statistics creation : Statistics update : month_stats: top_pages => - uri + uri => count Statistics deletion : None diff --git a/plugins/pre_analysis/page_to_hit.py b/plugins/pre_analysis/page_to_hit.py index a3919c4..282f53f 100644 --- a/plugins/pre_analysis/page_to_hit.py +++ b/plugins/pre_analysis/page_to_hit.py @@ -19,6 +19,7 @@ # import re +import logging from iwla import IWLA from iplugin import IPlugin @@ -58,14 +59,13 @@ class IWLAPreAnalysisPageToHit(IPlugin): def load(self): # Page to hit self.ph_regexps = self.iwla.getConfValue('page_to_hit_conf', []) - if not self.ph_regexps: return False self.ph_regexps = map(lambda(r): re.compile(r), self.ph_regexps) # Hit to page self.hp_regexps = self.iwla.getConfValue('hit_to_page_conf', []) - if not self.hp_regexps: return False self.hp_regexps = map(lambda(r): re.compile(r), self.hp_regexps) + self.logger = logging.getLogger(self.__class__.__name__) return True def hook(self): @@ -87,7 +87,7 @@ class IWLAPreAnalysisPageToHit(IPlugin): # Page to hit for regexp in self.ph_regexps: if regexp.match(uri): - #print '%s is a hit' % (uri ) + self.logger.debug('%s changed from page to hit' % (uri)) request['is_page'] = False super_hit['viewed_pages'] -= 1 super_hit['viewed_hits'] += 1 @@ -96,7 +96,7 @@ class IWLAPreAnalysisPageToHit(IPlugin): # Hit to page for regexp in self.hp_regexps: if regexp.match(uri): - #print '%s is a page' % (uri ) + self.logger.debug('%s changed from hit to page' % (uri)) request['is_page'] = True super_hit['viewed_pages'] += 1 super_hit['viewed_hits'] -= 1 diff --git a/plugins/pre_analysis/robots.py b/plugins/pre_analysis/robots.py index 662ce57..d84087d 100644 --- a/plugins/pre_analysis/robots.py +++ b/plugins/pre_analysis/robots.py @@ -19,6 +19,8 @@ # import re +import logging +import inspect from iwla import IWLA from iplugin import IPlugin @@ -59,20 +61,37 @@ class IWLAPreAnalysisRobots(IPlugin): def load(self): self.awstats_robots = map(lambda (x) : re.compile(('.*%s.*') % (x), re.IGNORECASE), awstats_data.robots) - + self.robot_re = re.compile(r'.*bot.*', re.IGNORECASE) + self.crawl_re = re.compile(r'.*crawl.*', re.IGNORECASE) + self.logger = logging.getLogger(self.__class__.__name__) return True + def _setRobot(self, k, super_hit): + callerframerecord = inspect.stack()[1] + frame = callerframerecord[0] + info = inspect.getframeinfo(frame) + + self.logger.debug('%s is a robot (caller %s:%d)' % (k, info.function, info.lineno)) + super_hit['robot'] = 1 + # Basic rule to detect robots def hook(self): hits = self.iwla.getCurrentVisists() for (k, super_hit) in hits.items(): - if super_hit['robot']: continue + if super_hit['robot']: + self.logger.debug('%s is a robot' % (k)) + continue isRobot = False referers = 0 first_page = super_hit['requests'][0] - if not self.iwla.isValidForCurrentAnalysis(first_page): continue + + if self.robot_re.match(first_page['http_user_agent']) or\ + self.crawl_re.match(first_page['http_user_agent']): + self.logger.debug(first_page['http_user_agent']) + self._setRobot(k, super_hit) + continue for r in self.awstats_robots: if r.match(first_page['http_user_agent']): @@ -80,7 +99,8 @@ class IWLAPreAnalysisRobots(IPlugin): break if isRobot: - super_hit['robot'] = 1 + self.logger.debug(first_page['http_user_agent']) + self._setRobot(k, super_hit) continue # 1) no pages view --> robot @@ -90,13 +110,14 @@ class IWLAPreAnalysisRobots(IPlugin): # 2) pages without hit --> robot if not super_hit['viewed_hits']: - super_hit['robot'] = 1 + self.logger.debug(super_hit) + self._setRobot(k, super_hit) continue for hit in super_hit['requests']: # 3) /robots.txt read - if hit['extract_request']['http_uri'] == '/robots.txt': - isRobot = True + if hit['extract_request']['http_uri'].endswith('/robots.txt'): + self._setRobot(k, super_hit) break # 4) Any referer for hits @@ -104,10 +125,10 @@ class IWLAPreAnalysisRobots(IPlugin): referers += 1 if isRobot: - super_hit['robot'] = 1 + self._setRobot(k, super_hit) continue if not super_hit['viewed_pages'] and \ (super_hit['viewed_hits'] and not referers): - super_hit['robot'] = 1 + self._setRobot(k, super_hit) continue diff --git a/resources/css/iwla.css b/resources/css/iwla.css index 71b652b..add0e57 100644 --- a/resources/css/iwla.css +++ b/resources/css/iwla.css @@ -69,6 +69,9 @@ td:first-child .iwla_weekend { background : #ECECEC; } .iwla_curday { font-weight: bold; } .iwla_others { color: #668; } +.iwla_update { background : orange; } +.iwla_new { background : green } + .iwla_graph_table { margin-left:auto; @@ -85,3 +88,5 @@ table.iwla_graph_table td { text-align:center; } + +iframe {outline:none; border:0px; width:100%; height:500px; display:block;} \ No newline at end of file diff --git a/resources/icon/browser/abilon.png b/resources/icon/browser/abilon.png new file mode 100644 index 0000000..0581c91 Binary files /dev/null and b/resources/icon/browser/abilon.png differ diff --git a/resources/icon/browser/adobe.png b/resources/icon/browser/adobe.png new file mode 100644 index 0000000..0830fac Binary files /dev/null and b/resources/icon/browser/adobe.png differ diff --git a/resources/icon/browser/akregator.png b/resources/icon/browser/akregator.png new file mode 100644 index 0000000..aa321e9 Binary files /dev/null and b/resources/icon/browser/akregator.png differ diff --git a/resources/icon/browser/alcatel.png b/resources/icon/browser/alcatel.png new file mode 100644 index 0000000..706f61d Binary files /dev/null and b/resources/icon/browser/alcatel.png differ diff --git a/resources/icon/browser/amaya.png b/resources/icon/browser/amaya.png new file mode 100644 index 0000000..4bfb97d Binary files /dev/null and b/resources/icon/browser/amaya.png differ diff --git a/resources/icon/browser/amigavoyager.png b/resources/icon/browser/amigavoyager.png new file mode 100644 index 0000000..eef57d5 Binary files /dev/null and b/resources/icon/browser/amigavoyager.png differ diff --git a/resources/icon/browser/analogx.png b/resources/icon/browser/analogx.png new file mode 100644 index 0000000..654d581 Binary files /dev/null and b/resources/icon/browser/analogx.png differ diff --git a/resources/icon/browser/android.png b/resources/icon/browser/android.png new file mode 100644 index 0000000..d12f8cb Binary files /dev/null and b/resources/icon/browser/android.png differ diff --git a/resources/icon/browser/apt.png b/resources/icon/browser/apt.png new file mode 100644 index 0000000..29a66ed Binary files /dev/null and b/resources/icon/browser/apt.png differ diff --git a/resources/icon/browser/avant.png b/resources/icon/browser/avant.png new file mode 100644 index 0000000..1143e9f Binary files /dev/null and b/resources/icon/browser/avant.png differ diff --git a/resources/icon/browser/aweb.png b/resources/icon/browser/aweb.png new file mode 100644 index 0000000..cf50596 Binary files /dev/null and b/resources/icon/browser/aweb.png differ diff --git a/resources/icon/browser/bpftp.png b/resources/icon/browser/bpftp.png new file mode 100644 index 0000000..3ab2cae Binary files /dev/null and b/resources/icon/browser/bpftp.png differ diff --git a/resources/icon/browser/bytel.png b/resources/icon/browser/bytel.png new file mode 100644 index 0000000..078fc8e Binary files /dev/null and b/resources/icon/browser/bytel.png differ diff --git a/resources/icon/browser/chimera.png b/resources/icon/browser/chimera.png new file mode 100644 index 0000000..c93232f Binary files /dev/null and b/resources/icon/browser/chimera.png differ diff --git a/resources/icon/browser/chrome.png b/resources/icon/browser/chrome.png new file mode 100644 index 0000000..84dc2de Binary files /dev/null and b/resources/icon/browser/chrome.png differ diff --git a/resources/icon/browser/cyberdog.png b/resources/icon/browser/cyberdog.png new file mode 100644 index 0000000..b94533a Binary files /dev/null and b/resources/icon/browser/cyberdog.png differ diff --git a/resources/icon/browser/da.png b/resources/icon/browser/da.png new file mode 100644 index 0000000..e4fa7bf Binary files /dev/null and b/resources/icon/browser/da.png differ diff --git a/resources/icon/browser/dillo.png b/resources/icon/browser/dillo.png new file mode 100644 index 0000000..ba666da Binary files /dev/null and b/resources/icon/browser/dillo.png differ diff --git a/resources/icon/browser/doris.png b/resources/icon/browser/doris.png new file mode 100644 index 0000000..ace3c9b Binary files /dev/null and b/resources/icon/browser/doris.png differ diff --git a/resources/icon/browser/dreamcast.png b/resources/icon/browser/dreamcast.png new file mode 100644 index 0000000..409d0dd Binary files /dev/null and b/resources/icon/browser/dreamcast.png differ diff --git a/resources/icon/browser/ecatch.png b/resources/icon/browser/ecatch.png new file mode 100644 index 0000000..9c85e2c Binary files /dev/null and b/resources/icon/browser/ecatch.png differ diff --git a/resources/icon/browser/encompass.png b/resources/icon/browser/encompass.png new file mode 100644 index 0000000..e9277cc Binary files /dev/null and b/resources/icon/browser/encompass.png differ diff --git a/resources/icon/browser/epiphany.png b/resources/icon/browser/epiphany.png new file mode 100644 index 0000000..dc87718 Binary files /dev/null and b/resources/icon/browser/epiphany.png differ diff --git a/resources/icon/browser/ericsson.png b/resources/icon/browser/ericsson.png new file mode 100644 index 0000000..8c182c5 Binary files /dev/null and b/resources/icon/browser/ericsson.png differ diff --git a/resources/icon/browser/feeddemon.png b/resources/icon/browser/feeddemon.png new file mode 100644 index 0000000..a840200 Binary files /dev/null and b/resources/icon/browser/feeddemon.png differ diff --git a/resources/icon/browser/feedreader.png b/resources/icon/browser/feedreader.png new file mode 100644 index 0000000..72928c6 Binary files /dev/null and b/resources/icon/browser/feedreader.png differ diff --git a/resources/icon/browser/firefox.png b/resources/icon/browser/firefox.png new file mode 100644 index 0000000..d2090b7 Binary files /dev/null and b/resources/icon/browser/firefox.png differ diff --git a/resources/icon/browser/flashget.png b/resources/icon/browser/flashget.png new file mode 100644 index 0000000..ce099d1 Binary files /dev/null and b/resources/icon/browser/flashget.png differ diff --git a/resources/icon/browser/flock.png b/resources/icon/browser/flock.png new file mode 100644 index 0000000..4247ba9 Binary files /dev/null and b/resources/icon/browser/flock.png differ diff --git a/resources/icon/browser/fpexpress.png b/resources/icon/browser/fpexpress.png new file mode 100644 index 0000000..2111c71 Binary files /dev/null and b/resources/icon/browser/fpexpress.png differ diff --git a/resources/icon/browser/fresco.png b/resources/icon/browser/fresco.png new file mode 100644 index 0000000..3321eba Binary files /dev/null and b/resources/icon/browser/fresco.png differ diff --git a/resources/icon/browser/freshdownload.png b/resources/icon/browser/freshdownload.png new file mode 100644 index 0000000..b06cb88 Binary files /dev/null and b/resources/icon/browser/freshdownload.png differ diff --git a/resources/icon/browser/frontpage.png b/resources/icon/browser/frontpage.png new file mode 100644 index 0000000..fdb3212 Binary files /dev/null and b/resources/icon/browser/frontpage.png differ diff --git a/resources/icon/browser/galeon.png b/resources/icon/browser/galeon.png new file mode 100644 index 0000000..92d0a22 Binary files /dev/null and b/resources/icon/browser/galeon.png differ diff --git a/resources/icon/browser/getright.png b/resources/icon/browser/getright.png new file mode 100644 index 0000000..cb70df8 Binary files /dev/null and b/resources/icon/browser/getright.png differ diff --git a/resources/icon/browser/gnome.png b/resources/icon/browser/gnome.png new file mode 100644 index 0000000..0b388b6 Binary files /dev/null and b/resources/icon/browser/gnome.png differ diff --git a/resources/icon/browser/gnus.png b/resources/icon/browser/gnus.png new file mode 100644 index 0000000..ddb59a7 Binary files /dev/null and b/resources/icon/browser/gnus.png differ diff --git a/resources/icon/browser/gozilla.png b/resources/icon/browser/gozilla.png new file mode 100644 index 0000000..eb1578e Binary files /dev/null and b/resources/icon/browser/gozilla.png differ diff --git a/resources/icon/browser/hotjava.png b/resources/icon/browser/hotjava.png new file mode 100644 index 0000000..6bd83da Binary files /dev/null and b/resources/icon/browser/hotjava.png differ diff --git a/resources/icon/browser/httrack.png b/resources/icon/browser/httrack.png new file mode 100644 index 0000000..7559701 Binary files /dev/null and b/resources/icon/browser/httrack.png differ diff --git a/resources/icon/browser/ibrowse.png b/resources/icon/browser/ibrowse.png new file mode 100644 index 0000000..8ecac94 Binary files /dev/null and b/resources/icon/browser/ibrowse.png differ diff --git a/resources/icon/browser/icab.png b/resources/icon/browser/icab.png new file mode 100644 index 0000000..c30ade7 Binary files /dev/null and b/resources/icon/browser/icab.png differ diff --git a/resources/icon/browser/icecat.png b/resources/icon/browser/icecat.png new file mode 100644 index 0000000..c3bd099 Binary files /dev/null and b/resources/icon/browser/icecat.png differ diff --git a/resources/icon/browser/iceweasel.png b/resources/icon/browser/iceweasel.png new file mode 100644 index 0000000..90f3881 Binary files /dev/null and b/resources/icon/browser/iceweasel.png differ diff --git a/resources/icon/browser/java.png b/resources/icon/browser/java.png new file mode 100644 index 0000000..5d9ce9f Binary files /dev/null and b/resources/icon/browser/java.png differ diff --git a/resources/icon/browser/jetbrains_omea.png b/resources/icon/browser/jetbrains_omea.png new file mode 100644 index 0000000..81f2418 Binary files /dev/null and b/resources/icon/browser/jetbrains_omea.png differ diff --git a/resources/icon/browser/kmeleon.png b/resources/icon/browser/kmeleon.png new file mode 100644 index 0000000..a787f6c Binary files /dev/null and b/resources/icon/browser/kmeleon.png differ diff --git a/resources/icon/browser/konqueror.png b/resources/icon/browser/konqueror.png new file mode 100644 index 0000000..3fb315d Binary files /dev/null and b/resources/icon/browser/konqueror.png differ diff --git a/resources/icon/browser/leechget.png b/resources/icon/browser/leechget.png new file mode 100644 index 0000000..76b752d Binary files /dev/null and b/resources/icon/browser/leechget.png differ diff --git a/resources/icon/browser/lg.png b/resources/icon/browser/lg.png new file mode 100644 index 0000000..c009af4 Binary files /dev/null and b/resources/icon/browser/lg.png differ diff --git a/resources/icon/browser/lotusnotes.png b/resources/icon/browser/lotusnotes.png new file mode 100644 index 0000000..31ce8ff Binary files /dev/null and b/resources/icon/browser/lotusnotes.png differ diff --git a/resources/icon/browser/lynx.png b/resources/icon/browser/lynx.png new file mode 100644 index 0000000..4d98861 Binary files /dev/null and b/resources/icon/browser/lynx.png differ diff --git a/resources/icon/browser/macweb.png b/resources/icon/browser/macweb.png new file mode 100644 index 0000000..8f4322a Binary files /dev/null and b/resources/icon/browser/macweb.png differ diff --git a/resources/icon/browser/mediaplayer.png b/resources/icon/browser/mediaplayer.png new file mode 100644 index 0000000..0536ebf Binary files /dev/null and b/resources/icon/browser/mediaplayer.png differ diff --git a/resources/icon/browser/motorola.png b/resources/icon/browser/motorola.png new file mode 100644 index 0000000..72604c9 Binary files /dev/null and b/resources/icon/browser/motorola.png differ diff --git a/resources/icon/browser/mozilla.png b/resources/icon/browser/mozilla.png new file mode 100644 index 0000000..6008a31 Binary files /dev/null and b/resources/icon/browser/mozilla.png differ diff --git a/resources/icon/browser/mplayer.png b/resources/icon/browser/mplayer.png new file mode 100644 index 0000000..1eb7839 Binary files /dev/null and b/resources/icon/browser/mplayer.png differ diff --git a/resources/icon/browser/msie.png b/resources/icon/browser/msie.png new file mode 100644 index 0000000..6dbe4d0 Binary files /dev/null and b/resources/icon/browser/msie.png differ diff --git a/resources/icon/browser/msie_large.png b/resources/icon/browser/msie_large.png new file mode 100644 index 0000000..0ff2362 Binary files /dev/null and b/resources/icon/browser/msie_large.png differ diff --git a/resources/icon/browser/multizilla.png b/resources/icon/browser/multizilla.png new file mode 100644 index 0000000..22f26b1 Binary files /dev/null and b/resources/icon/browser/multizilla.png differ diff --git a/resources/icon/browser/ncsa_mosaic.png b/resources/icon/browser/ncsa_mosaic.png new file mode 100644 index 0000000..0236f08 Binary files /dev/null and b/resources/icon/browser/ncsa_mosaic.png differ diff --git a/resources/icon/browser/neon.png b/resources/icon/browser/neon.png new file mode 100644 index 0000000..d404c26 Binary files /dev/null and b/resources/icon/browser/neon.png differ diff --git a/resources/icon/browser/netnewswire.png b/resources/icon/browser/netnewswire.png new file mode 100644 index 0000000..ed465f0 Binary files /dev/null and b/resources/icon/browser/netnewswire.png differ diff --git a/resources/icon/browser/netpositive.png b/resources/icon/browser/netpositive.png new file mode 100644 index 0000000..9c53455 Binary files /dev/null and b/resources/icon/browser/netpositive.png differ diff --git a/resources/icon/browser/netscape.png b/resources/icon/browser/netscape.png new file mode 100644 index 0000000..7c9921e Binary files /dev/null and b/resources/icon/browser/netscape.png differ diff --git a/resources/icon/browser/netscape_large.png b/resources/icon/browser/netscape_large.png new file mode 100644 index 0000000..7bd913a Binary files /dev/null and b/resources/icon/browser/netscape_large.png differ diff --git a/resources/icon/browser/netshow.png b/resources/icon/browser/netshow.png new file mode 100644 index 0000000..8abaacf Binary files /dev/null and b/resources/icon/browser/netshow.png differ diff --git a/resources/icon/browser/newsfire.png b/resources/icon/browser/newsfire.png new file mode 100644 index 0000000..9d180b1 Binary files /dev/null and b/resources/icon/browser/newsfire.png differ diff --git a/resources/icon/browser/newsgator.png b/resources/icon/browser/newsgator.png new file mode 100644 index 0000000..51e24fe Binary files /dev/null and b/resources/icon/browser/newsgator.png differ diff --git a/resources/icon/browser/newzcrawler.png b/resources/icon/browser/newzcrawler.png new file mode 100644 index 0000000..dc9b222 Binary files /dev/null and b/resources/icon/browser/newzcrawler.png differ diff --git a/resources/icon/browser/nokia.png b/resources/icon/browser/nokia.png new file mode 100644 index 0000000..3147b98 Binary files /dev/null and b/resources/icon/browser/nokia.png differ diff --git a/resources/icon/browser/notavailable.png b/resources/icon/browser/notavailable.png new file mode 100644 index 0000000..05ff318 Binary files /dev/null and b/resources/icon/browser/notavailable.png differ diff --git a/resources/icon/browser/omniweb.png b/resources/icon/browser/omniweb.png new file mode 100644 index 0000000..6f07f16 Binary files /dev/null and b/resources/icon/browser/omniweb.png differ diff --git a/resources/icon/browser/opera.png b/resources/icon/browser/opera.png new file mode 100644 index 0000000..9b70988 Binary files /dev/null and b/resources/icon/browser/opera.png differ diff --git a/resources/icon/browser/panasonic.png b/resources/icon/browser/panasonic.png new file mode 100644 index 0000000..329958b Binary files /dev/null and b/resources/icon/browser/panasonic.png differ diff --git a/resources/icon/browser/pdaphone.png b/resources/icon/browser/pdaphone.png new file mode 100644 index 0000000..4f4f68d Binary files /dev/null and b/resources/icon/browser/pdaphone.png differ diff --git a/resources/icon/browser/philips.png b/resources/icon/browser/philips.png new file mode 100644 index 0000000..3f57816 Binary files /dev/null and b/resources/icon/browser/philips.png differ diff --git a/resources/icon/browser/phoenix.png b/resources/icon/browser/phoenix.png new file mode 100644 index 0000000..d133ef2 Binary files /dev/null and b/resources/icon/browser/phoenix.png differ diff --git a/resources/icon/browser/pluck.png b/resources/icon/browser/pluck.png new file mode 100644 index 0000000..ef37504 Binary files /dev/null and b/resources/icon/browser/pluck.png differ diff --git a/resources/icon/browser/pulpfiction.png b/resources/icon/browser/pulpfiction.png new file mode 100644 index 0000000..b915740 Binary files /dev/null and b/resources/icon/browser/pulpfiction.png differ diff --git a/resources/icon/browser/real.png b/resources/icon/browser/real.png new file mode 100644 index 0000000..9ca3fbf Binary files /dev/null and b/resources/icon/browser/real.png differ diff --git a/resources/icon/browser/rss.png b/resources/icon/browser/rss.png new file mode 100644 index 0000000..7a139ea Binary files /dev/null and b/resources/icon/browser/rss.png differ diff --git a/resources/icon/browser/rssbandit.png b/resources/icon/browser/rssbandit.png new file mode 100644 index 0000000..e70b987 Binary files /dev/null and b/resources/icon/browser/rssbandit.png differ diff --git a/resources/icon/browser/rssowl.png b/resources/icon/browser/rssowl.png new file mode 100644 index 0000000..6185d26 Binary files /dev/null and b/resources/icon/browser/rssowl.png differ diff --git a/resources/icon/browser/rssreader.png b/resources/icon/browser/rssreader.png new file mode 100644 index 0000000..fc8deb4 Binary files /dev/null and b/resources/icon/browser/rssreader.png differ diff --git a/resources/icon/browser/rssxpress.png b/resources/icon/browser/rssxpress.png new file mode 100644 index 0000000..a4c4e02 Binary files /dev/null and b/resources/icon/browser/rssxpress.png differ diff --git a/resources/icon/browser/safari.png b/resources/icon/browser/safari.png new file mode 100644 index 0000000..683f2ea Binary files /dev/null and b/resources/icon/browser/safari.png differ diff --git a/resources/icon/browser/sagem.png b/resources/icon/browser/sagem.png new file mode 100644 index 0000000..4b05c81 Binary files /dev/null and b/resources/icon/browser/sagem.png differ diff --git a/resources/icon/browser/samsung.png b/resources/icon/browser/samsung.png new file mode 100644 index 0000000..8d14913 Binary files /dev/null and b/resources/icon/browser/samsung.png differ diff --git a/resources/icon/browser/seamonkey.png b/resources/icon/browser/seamonkey.png new file mode 100644 index 0000000..7242ed2 Binary files /dev/null and b/resources/icon/browser/seamonkey.png differ diff --git a/resources/icon/browser/sharp.png b/resources/icon/browser/sharp.png new file mode 100644 index 0000000..ee552bd Binary files /dev/null and b/resources/icon/browser/sharp.png differ diff --git a/resources/icon/browser/sharpreader.png b/resources/icon/browser/sharpreader.png new file mode 100644 index 0000000..845966b Binary files /dev/null and b/resources/icon/browser/sharpreader.png differ diff --git a/resources/icon/browser/shrook.png b/resources/icon/browser/shrook.png new file mode 100644 index 0000000..193cb33 Binary files /dev/null and b/resources/icon/browser/shrook.png differ diff --git a/resources/icon/browser/siemens.png b/resources/icon/browser/siemens.png new file mode 100644 index 0000000..9d82628 Binary files /dev/null and b/resources/icon/browser/siemens.png differ diff --git a/resources/icon/browser/sony.png b/resources/icon/browser/sony.png new file mode 100644 index 0000000..8c182c5 Binary files /dev/null and b/resources/icon/browser/sony.png differ diff --git a/resources/icon/browser/staroffice.png b/resources/icon/browser/staroffice.png new file mode 100644 index 0000000..b9d7778 Binary files /dev/null and b/resources/icon/browser/staroffice.png differ diff --git a/resources/icon/browser/subversion.png b/resources/icon/browser/subversion.png new file mode 100644 index 0000000..d732495 Binary files /dev/null and b/resources/icon/browser/subversion.png differ diff --git a/resources/icon/browser/teleport.png b/resources/icon/browser/teleport.png new file mode 100644 index 0000000..dceba90 Binary files /dev/null and b/resources/icon/browser/teleport.png differ diff --git a/resources/icon/browser/trium.png b/resources/icon/browser/trium.png new file mode 100644 index 0000000..e6d07f6 Binary files /dev/null and b/resources/icon/browser/trium.png differ diff --git a/resources/icon/browser/unknown.png b/resources/icon/browser/unknown.png new file mode 100644 index 0000000..895b771 Binary files /dev/null and b/resources/icon/browser/unknown.png differ diff --git a/resources/icon/browser/w3c.png b/resources/icon/browser/w3c.png new file mode 100644 index 0000000..d7e90d9 Binary files /dev/null and b/resources/icon/browser/w3c.png differ diff --git a/resources/icon/browser/webcopier.png b/resources/icon/browser/webcopier.png new file mode 100644 index 0000000..9a2bffb Binary files /dev/null and b/resources/icon/browser/webcopier.png differ diff --git a/resources/icon/browser/webreaper.png b/resources/icon/browser/webreaper.png new file mode 100644 index 0000000..c806913 Binary files /dev/null and b/resources/icon/browser/webreaper.png differ diff --git a/resources/icon/browser/webtv.png b/resources/icon/browser/webtv.png new file mode 100644 index 0000000..07dc458 Binary files /dev/null and b/resources/icon/browser/webtv.png differ diff --git a/resources/icon/browser/webzip.png b/resources/icon/browser/webzip.png new file mode 100644 index 0000000..e79bd25 Binary files /dev/null and b/resources/icon/browser/webzip.png differ diff --git a/resources/icon/browser/winxbox.png b/resources/icon/browser/winxbox.png new file mode 100644 index 0000000..85087e4 Binary files /dev/null and b/resources/icon/browser/winxbox.png differ diff --git a/resources/icon/browser/wizz.png b/resources/icon/browser/wizz.png new file mode 100644 index 0000000..f79d1d7 Binary files /dev/null and b/resources/icon/browser/wizz.png differ diff --git a/resources/icon/os/aix.png b/resources/icon/os/aix.png new file mode 100644 index 0000000..90581d1 Binary files /dev/null and b/resources/icon/os/aix.png differ diff --git a/resources/icon/os/amigaos.png b/resources/icon/os/amigaos.png new file mode 100644 index 0000000..8efdcec Binary files /dev/null and b/resources/icon/os/amigaos.png differ diff --git a/resources/icon/os/apple.png b/resources/icon/os/apple.png new file mode 100644 index 0000000..c3c1fe0 Binary files /dev/null and b/resources/icon/os/apple.png differ diff --git a/resources/icon/os/atari.png b/resources/icon/os/atari.png new file mode 100644 index 0000000..d51836b Binary files /dev/null and b/resources/icon/os/atari.png differ diff --git a/resources/icon/os/beos.png b/resources/icon/os/beos.png new file mode 100644 index 0000000..bab61ff Binary files /dev/null and b/resources/icon/os/beos.png differ diff --git a/resources/icon/os/blackberry.png b/resources/icon/os/blackberry.png new file mode 100644 index 0000000..4f4f68d Binary files /dev/null and b/resources/icon/os/blackberry.png differ diff --git a/resources/icon/os/bsd.png b/resources/icon/os/bsd.png new file mode 100644 index 0000000..6bd3d29 Binary files /dev/null and b/resources/icon/os/bsd.png differ diff --git a/resources/icon/os/bsddflybsd.png b/resources/icon/os/bsddflybsd.png new file mode 100644 index 0000000..3b59862 Binary files /dev/null and b/resources/icon/os/bsddflybsd.png differ diff --git a/resources/icon/os/bsdfreebsd.png b/resources/icon/os/bsdfreebsd.png new file mode 100644 index 0000000..3b59862 Binary files /dev/null and b/resources/icon/os/bsdfreebsd.png differ diff --git a/resources/icon/os/bsdi.png b/resources/icon/os/bsdi.png new file mode 100644 index 0000000..6bd3d29 Binary files /dev/null and b/resources/icon/os/bsdi.png differ diff --git a/resources/icon/os/bsdkfreebsd.png b/resources/icon/os/bsdkfreebsd.png new file mode 100644 index 0000000..3b59862 Binary files /dev/null and b/resources/icon/os/bsdkfreebsd.png differ diff --git a/resources/icon/os/bsdnetbsd.png b/resources/icon/os/bsdnetbsd.png new file mode 100644 index 0000000..3b59862 Binary files /dev/null and b/resources/icon/os/bsdnetbsd.png differ diff --git a/resources/icon/os/bsdopenbsd.png b/resources/icon/os/bsdopenbsd.png new file mode 100644 index 0000000..7d672e5 Binary files /dev/null and b/resources/icon/os/bsdopenbsd.png differ diff --git a/resources/icon/os/commodore.png b/resources/icon/os/commodore.png new file mode 100644 index 0000000..6dd71bd Binary files /dev/null and b/resources/icon/os/commodore.png differ diff --git a/resources/icon/os/cpm.png b/resources/icon/os/cpm.png new file mode 100644 index 0000000..caa00a1 Binary files /dev/null and b/resources/icon/os/cpm.png differ diff --git a/resources/icon/os/debian.png b/resources/icon/os/debian.png new file mode 100644 index 0000000..29a66ed Binary files /dev/null and b/resources/icon/os/debian.png differ diff --git a/resources/icon/os/digital.png b/resources/icon/os/digital.png new file mode 100644 index 0000000..4b946cb Binary files /dev/null and b/resources/icon/os/digital.png differ diff --git a/resources/icon/os/dos.png b/resources/icon/os/dos.png new file mode 100644 index 0000000..ce04e4d Binary files /dev/null and b/resources/icon/os/dos.png differ diff --git a/resources/icon/os/dreamcast.png b/resources/icon/os/dreamcast.png new file mode 100644 index 0000000..573d542 Binary files /dev/null and b/resources/icon/os/dreamcast.png differ diff --git a/resources/icon/os/freebsd.png b/resources/icon/os/freebsd.png new file mode 100644 index 0000000..3b59862 Binary files /dev/null and b/resources/icon/os/freebsd.png differ diff --git a/resources/icon/os/gnu.png b/resources/icon/os/gnu.png new file mode 100644 index 0000000..8469ec6 Binary files /dev/null and b/resources/icon/os/gnu.png differ diff --git a/resources/icon/os/hpux.png b/resources/icon/os/hpux.png new file mode 100644 index 0000000..a1d554a Binary files /dev/null and b/resources/icon/os/hpux.png differ diff --git a/resources/icon/os/ibm.png b/resources/icon/os/ibm.png new file mode 100644 index 0000000..67b103d Binary files /dev/null and b/resources/icon/os/ibm.png differ diff --git a/resources/icon/os/imode.png b/resources/icon/os/imode.png new file mode 100644 index 0000000..4c68317 Binary files /dev/null and b/resources/icon/os/imode.png differ diff --git a/resources/icon/os/inferno.png b/resources/icon/os/inferno.png new file mode 100644 index 0000000..72928c6 Binary files /dev/null and b/resources/icon/os/inferno.png differ diff --git a/resources/icon/os/ios.png b/resources/icon/os/ios.png new file mode 100644 index 0000000..3e9ca49 Binary files /dev/null and b/resources/icon/os/ios.png differ diff --git a/resources/icon/os/iphone.png b/resources/icon/os/iphone.png new file mode 100644 index 0000000..3e9ca49 Binary files /dev/null and b/resources/icon/os/iphone.png differ diff --git a/resources/icon/os/irix.png b/resources/icon/os/irix.png new file mode 100644 index 0000000..9722f69 Binary files /dev/null and b/resources/icon/os/irix.png differ diff --git a/resources/icon/os/j2me.png b/resources/icon/os/j2me.png new file mode 100644 index 0000000..72928c6 Binary files /dev/null and b/resources/icon/os/j2me.png differ diff --git a/resources/icon/os/java.png b/resources/icon/os/java.png new file mode 100644 index 0000000..5d9ce9f Binary files /dev/null and b/resources/icon/os/java.png differ diff --git a/resources/icon/os/kfreebsd.png b/resources/icon/os/kfreebsd.png new file mode 100644 index 0000000..3b59862 Binary files /dev/null and b/resources/icon/os/kfreebsd.png differ diff --git a/resources/icon/os/linux.png b/resources/icon/os/linux.png new file mode 100644 index 0000000..33dace8 Binary files /dev/null and b/resources/icon/os/linux.png differ diff --git a/resources/icon/os/linuxandroid.png b/resources/icon/os/linuxandroid.png new file mode 100644 index 0000000..07d266f Binary files /dev/null and b/resources/icon/os/linuxandroid.png differ diff --git a/resources/icon/os/linuxasplinux.png b/resources/icon/os/linuxasplinux.png new file mode 100644 index 0000000..33dace8 Binary files /dev/null and b/resources/icon/os/linuxasplinux.png differ diff --git a/resources/icon/os/linuxcentos.png b/resources/icon/os/linuxcentos.png new file mode 100644 index 0000000..c2541d1 Binary files /dev/null and b/resources/icon/os/linuxcentos.png differ diff --git a/resources/icon/os/linuxdebian.png b/resources/icon/os/linuxdebian.png new file mode 100644 index 0000000..97d8214 Binary files /dev/null and b/resources/icon/os/linuxdebian.png differ diff --git a/resources/icon/os/linuxfedora.png b/resources/icon/os/linuxfedora.png new file mode 100644 index 0000000..00a02c6 Binary files /dev/null and b/resources/icon/os/linuxfedora.png differ diff --git a/resources/icon/os/linuxgentoo.png b/resources/icon/os/linuxgentoo.png new file mode 100644 index 0000000..f00eeff Binary files /dev/null and b/resources/icon/os/linuxgentoo.png differ diff --git a/resources/icon/os/linuxmandr.png b/resources/icon/os/linuxmandr.png new file mode 100644 index 0000000..5dcf361 Binary files /dev/null and b/resources/icon/os/linuxmandr.png differ diff --git a/resources/icon/os/linuxpclinuxos.png b/resources/icon/os/linuxpclinuxos.png new file mode 100644 index 0000000..33dace8 Binary files /dev/null and b/resources/icon/os/linuxpclinuxos.png differ diff --git a/resources/icon/os/linuxredhat.png b/resources/icon/os/linuxredhat.png new file mode 100644 index 0000000..00e6095 Binary files /dev/null and b/resources/icon/os/linuxredhat.png differ diff --git a/resources/icon/os/linuxsuse.png b/resources/icon/os/linuxsuse.png new file mode 100644 index 0000000..26cb28a Binary files /dev/null and b/resources/icon/os/linuxsuse.png differ diff --git a/resources/icon/os/linuxubuntu.png b/resources/icon/os/linuxubuntu.png new file mode 100644 index 0000000..7454e09 Binary files /dev/null and b/resources/icon/os/linuxubuntu.png differ diff --git a/resources/icon/os/linuxvine.png b/resources/icon/os/linuxvine.png new file mode 100644 index 0000000..33dace8 Binary files /dev/null and b/resources/icon/os/linuxvine.png differ diff --git a/resources/icon/os/linuxzenwalk.png b/resources/icon/os/linuxzenwalk.png new file mode 100644 index 0000000..33dace8 Binary files /dev/null and b/resources/icon/os/linuxzenwalk.png differ diff --git a/resources/icon/os/mac.png b/resources/icon/os/mac.png new file mode 100644 index 0000000..03f56f4 Binary files /dev/null and b/resources/icon/os/mac.png differ diff --git a/resources/icon/os/macintosh.png b/resources/icon/os/macintosh.png new file mode 100644 index 0000000..03f56f4 Binary files /dev/null and b/resources/icon/os/macintosh.png differ diff --git a/resources/icon/os/macosx.png b/resources/icon/os/macosx.png new file mode 100644 index 0000000..777f350 Binary files /dev/null and b/resources/icon/os/macosx.png differ diff --git a/resources/icon/os/netbsd.png b/resources/icon/os/netbsd.png new file mode 100644 index 0000000..3b59862 Binary files /dev/null and b/resources/icon/os/netbsd.png differ diff --git a/resources/icon/os/netware.png b/resources/icon/os/netware.png new file mode 100644 index 0000000..79b5c10 Binary files /dev/null and b/resources/icon/os/netware.png differ diff --git a/resources/icon/os/next.png b/resources/icon/os/next.png new file mode 100644 index 0000000..f4da2ae Binary files /dev/null and b/resources/icon/os/next.png differ diff --git a/resources/icon/os/openbsd.png b/resources/icon/os/openbsd.png new file mode 100644 index 0000000..7d672e5 Binary files /dev/null and b/resources/icon/os/openbsd.png differ diff --git a/resources/icon/os/os2.png b/resources/icon/os/os2.png new file mode 100644 index 0000000..4dcb675 Binary files /dev/null and b/resources/icon/os/os2.png differ diff --git a/resources/icon/os/osf.png b/resources/icon/os/osf.png new file mode 100644 index 0000000..18836fc Binary files /dev/null and b/resources/icon/os/osf.png differ diff --git a/resources/icon/os/palmos.png b/resources/icon/os/palmos.png new file mode 100644 index 0000000..4f4f68d Binary files /dev/null and b/resources/icon/os/palmos.png differ diff --git a/resources/icon/os/psp.png b/resources/icon/os/psp.png new file mode 100644 index 0000000..6734f7d Binary files /dev/null and b/resources/icon/os/psp.png differ diff --git a/resources/icon/os/qnx.png b/resources/icon/os/qnx.png new file mode 100644 index 0000000..8ffcce6 Binary files /dev/null and b/resources/icon/os/qnx.png differ diff --git a/resources/icon/os/riscos.png b/resources/icon/os/riscos.png new file mode 100644 index 0000000..c20b34d Binary files /dev/null and b/resources/icon/os/riscos.png differ diff --git a/resources/icon/os/sco.png b/resources/icon/os/sco.png new file mode 100644 index 0000000..55cc4ca Binary files /dev/null and b/resources/icon/os/sco.png differ diff --git a/resources/icon/os/sunos.png b/resources/icon/os/sunos.png new file mode 100644 index 0000000..e37e602 Binary files /dev/null and b/resources/icon/os/sunos.png differ diff --git a/resources/icon/os/syllable.png b/resources/icon/os/syllable.png new file mode 100644 index 0000000..72928c6 Binary files /dev/null and b/resources/icon/os/syllable.png differ diff --git a/resources/icon/os/symbian.png b/resources/icon/os/symbian.png new file mode 100644 index 0000000..c9b5984 Binary files /dev/null and b/resources/icon/os/symbian.png differ diff --git a/resources/icon/os/unix.png b/resources/icon/os/unix.png new file mode 100644 index 0000000..995c967 Binary files /dev/null and b/resources/icon/os/unix.png differ diff --git a/resources/icon/os/unknown.png b/resources/icon/os/unknown.png new file mode 100644 index 0000000..895b771 Binary files /dev/null and b/resources/icon/os/unknown.png differ diff --git a/resources/icon/os/vms.png b/resources/icon/os/vms.png new file mode 100644 index 0000000..58568f6 Binary files /dev/null and b/resources/icon/os/vms.png differ diff --git a/resources/icon/os/webtv.png b/resources/icon/os/webtv.png new file mode 100644 index 0000000..07dc458 Binary files /dev/null and b/resources/icon/os/webtv.png differ diff --git a/resources/icon/os/wii.png b/resources/icon/os/wii.png new file mode 100644 index 0000000..9d44c99 Binary files /dev/null and b/resources/icon/os/wii.png differ diff --git a/resources/icon/os/win.png b/resources/icon/os/win.png new file mode 100644 index 0000000..a3e9a96 Binary files /dev/null and b/resources/icon/os/win.png differ diff --git a/resources/icon/os/win16.png b/resources/icon/os/win16.png new file mode 100644 index 0000000..a3e9a96 Binary files /dev/null and b/resources/icon/os/win16.png differ diff --git a/resources/icon/os/win2000.png b/resources/icon/os/win2000.png new file mode 100644 index 0000000..a3e9a96 Binary files /dev/null and b/resources/icon/os/win2000.png differ diff --git a/resources/icon/os/win2003.png b/resources/icon/os/win2003.png new file mode 100644 index 0000000..247caed Binary files /dev/null and b/resources/icon/os/win2003.png differ diff --git a/resources/icon/os/win2008.png b/resources/icon/os/win2008.png new file mode 100644 index 0000000..247caed Binary files /dev/null and b/resources/icon/os/win2008.png differ diff --git a/resources/icon/os/win7.png b/resources/icon/os/win7.png new file mode 100644 index 0000000..8001539 Binary files /dev/null and b/resources/icon/os/win7.png differ diff --git a/resources/icon/os/win95.png b/resources/icon/os/win95.png new file mode 100644 index 0000000..a3e9a96 Binary files /dev/null and b/resources/icon/os/win95.png differ diff --git a/resources/icon/os/win98.png b/resources/icon/os/win98.png new file mode 100644 index 0000000..a3e9a96 Binary files /dev/null and b/resources/icon/os/win98.png differ diff --git a/resources/icon/os/wince.png b/resources/icon/os/wince.png new file mode 100644 index 0000000..a3e9a96 Binary files /dev/null and b/resources/icon/os/wince.png differ diff --git a/resources/icon/os/winlong.png b/resources/icon/os/winlong.png new file mode 100644 index 0000000..8001539 Binary files /dev/null and b/resources/icon/os/winlong.png differ diff --git a/resources/icon/os/winme.png b/resources/icon/os/winme.png new file mode 100644 index 0000000..a3e9a96 Binary files /dev/null and b/resources/icon/os/winme.png differ diff --git a/resources/icon/os/winnt.png b/resources/icon/os/winnt.png new file mode 100644 index 0000000..a3e9a96 Binary files /dev/null and b/resources/icon/os/winnt.png differ diff --git a/resources/icon/os/winunknown.png b/resources/icon/os/winunknown.png new file mode 100644 index 0000000..a3e9a96 Binary files /dev/null and b/resources/icon/os/winunknown.png differ diff --git a/resources/icon/os/winvista.png b/resources/icon/os/winvista.png new file mode 100644 index 0000000..247caed Binary files /dev/null and b/resources/icon/os/winvista.png differ diff --git a/resources/icon/os/winxbox.png b/resources/icon/os/winxbox.png new file mode 100644 index 0000000..85087e4 Binary files /dev/null and b/resources/icon/os/winxbox.png differ diff --git a/resources/icon/os/winxp.png b/resources/icon/os/winxp.png new file mode 100644 index 0000000..247caed Binary files /dev/null and b/resources/icon/os/winxp.png differ diff --git a/resources/icon/other/vh.png b/resources/icon/other/vh.png new file mode 100644 index 0000000..13e52f9 Binary files /dev/null and b/resources/icon/other/vh.png differ diff --git a/resources/icon/other/vk.png b/resources/icon/other/vk.png new file mode 100644 index 0000000..ac1bc63 Binary files /dev/null and b/resources/icon/other/vk.png differ diff --git a/resources/icon/other/vp.png b/resources/icon/other/vp.png new file mode 100644 index 0000000..8ebf702 Binary files /dev/null and b/resources/icon/other/vp.png differ diff --git a/resources/icon/other/vu.png b/resources/icon/other/vu.png new file mode 100644 index 0000000..7d39356 Binary files /dev/null and b/resources/icon/other/vu.png differ diff --git a/resources/icon/other/vv.png b/resources/icon/other/vv.png new file mode 100644 index 0000000..d5b7cdd Binary files /dev/null and b/resources/icon/other/vv.png differ diff --git a/tools/extract_doc.py b/tools/extract_doc.py index eee534b..6b39063 100755 --- a/tools/extract_doc.py +++ b/tools/extract_doc.py @@ -1,12 +1,30 @@ #!/usr/bin/env python import sys +import re + +excludes = [] +# excludes = [r'.*_diff.py'] filename = sys.argv[1] +printName = False + +if filename == '-p': + filename = sys.argv[2] + printName = True if filename.endswith('__init__.py'): sys.exit(0) +for e in excludes: + if re.match(e, filename): + sys.stderr.write('\tSkip %s\n' % (filename)) + sys.exit(0) + +if printName: + sys.stdout.write(' * %s\n' % (filename)) + sys.exit(0) + package_name = filename.replace('/', '.').replace('.py', '') sys.stdout.write('%s' % (package_name)) sys.stdout.write('\n') diff --git a/tools/extract_docs.sh b/tools/extract_docs.sh index e556330..539eb84 100755 --- a/tools/extract_docs.sh +++ b/tools/extract_docs.sh @@ -6,11 +6,20 @@ TARGET_MD="docs/index.md" rm -f "${MODULES_TARGET}" +echo "Generate plugins index" +python tools/extract_doc.py -p iwla.py > "${MODULES_TARGET}" +for p in `find plugins -name '*.py' | sort | sed ':a;N;$!ba;s/\n/ /g'`; do + python tools/extract_doc.py -p $p >> "${MODULES_TARGET}" +done +echo "\n" >> "${MODULES_TARGET}" + echo "Generate doc from iwla.py" -python tools/extract_doc.py iwla.py > "${MODULES_TARGET}" +python tools/extract_doc.py iwla.py >> "${MODULES_TARGET}" echo "Generate plugins documentation" -find plugins -name '*.py' -exec python tools/extract_doc.py \{\} \; >> "${MODULES_TARGET}" +for p in `find plugins -name '*.py' | sort` ; do + python tools/extract_doc.py $p >> "${MODULES_TARGET}" +done echo "Generate ${TARGET_MD}" cat "${MAIN_MD}" "${MODULES_TARGET}" > "${TARGET_MD}" diff --git a/tools/iwla_convert.pl b/tools/iwla_convert.pl index b5c9587..696e230 100755 --- a/tools/iwla_convert.pl +++ b/tools/iwla_convert.pl @@ -1,12 +1,13 @@ #!/usr/bin/perl -my $awstats_lib_root = './'; -my @awstats_libs = ('search_engines.pm', 'robots.pm'); +my $awstats_lib_root = '/usr/share/awstats/lib/'; +# my $awstats_lib_root = './'; +my @awstats_libs = ('search_engines.pm', 'robots.pm', 'operating_systems.pm', 'browsers.pm'); -# my $awstats_lib_root = '/usr/share/awstats/lib/'; # my @awstats_libs = ('browsers.pm', 'browsers_phone.pm', 'mime.pm', 'referer_spam.pm', 'search_engines.pm', 'operating_systems.pm', 'robots.pm', 'worms.pm'); foreach $lib (@awstats_libs) {require $awstats_lib_root . $lib;} +require './tools/own_search_engines.pm'; sub dumpList { my @list = @{$_[0]}; @@ -51,6 +52,8 @@ sub dumpHash { # Robots open($FIC,">", "awstats_data.py") or die $!; +print $FIC "#This file was automatically generated by iwla_convert.pl. Do not edit manually.\n\n"; + print $FIC "robots = ["; dumpList(\@RobotsSearchIDOrder_list1, $FIC, 1); dumpList(\@RobotsSearchIDOrder_list2, $FIC, 0); @@ -62,6 +65,7 @@ print $FIC "]\n\n"; print $FIC "search_engines_2 = ["; dumpList(\@SearchEnginesSearchIDOrder_list2, $FIC, 1); +dumpList(\@Own_SearchEnginesSearchIDOrder, $FIC, 0); print $FIC "]\n\n"; print $FIC "not_search_engines_keys = {"; @@ -70,10 +74,36 @@ print $FIC "}\n\n"; print $FIC "search_engines_hashid = {"; dumpHash(\%SearchEnginesHashID, $FIC, 1); +dumpHash(\%Own_SearchEnginesHashID, $FIC, 0); print $FIC "}\n\n"; print $FIC "search_engines_knwown_url = {"; dumpHash(\%SearchEnginesKnownUrl, $FIC, 1); +dumpHash(\%Own_SearchEnginesKnownUrl, $FIC, 0); +print $FIC "}\n\n"; + +print $FIC "operating_systems = ["; +dumpList(\@OSSearchIDOrder, $FIC, 1); +print $FIC "]\n\n"; + +print $FIC "operating_systems_hashid = {"; +dumpHash(\%OSHashID, $FIC, 1); +print $FIC "}\n\n"; + +print $FIC "operating_systems_family = {"; +dumpHash(\%OSFamily, $FIC, 1); +print $FIC "}\n\n"; + +print $FIC "browsers = ["; +dumpList(\@BrowsersSearchIDOrder, $FIC, 1); +print $FIC "]\n\n"; + +print $FIC "browsers_hashid = {"; +dumpHash(\%BrowsersHashIDLib, $FIC, 1); +print $FIC "}\n\n"; + +print $FIC "browsers_icons = {"; +dumpHash(\%BrowsersHashIcon, $FIC, 1); print $FIC "}\n\n"; close($FIC); diff --git a/tools/own_search_engines.pm b/tools/own_search_engines.pm new file mode 100644 index 0000000..ff758a6 --- /dev/null +++ b/tools/own_search_engines.pm @@ -0,0 +1,35 @@ +@Own_SearchEnginesSearchIDOrder=( +'jwss\.cc', +'lemoteur\.orange\.fr', +'windowssearch\.com', +'qwant\.com', +'wow\.com', +'searches\.omiga-plus\.com', +'buenosearch\.com', +'searches\.vi-view\.com' +); + +%Own_SearchEnginesHashID = ( + 'jwss\.cc', 'jws', + 'lemoteur\.orange\.fr', 'Orange', + 'windowssearch\.com', 'Windows Search', + 'qwant\.com', 'Qwant', + 'wow\.com', 'WOW', + 'searches\.omiga-plus\.com', 'Omiga-plus', + 'buenosearch\.com', 'Bueno Search', + 'searches\.vi-view\.com', 'vi-view', + 'www.sfr\.fr\/recherche\/google', 'google' + ); + +%Own_SearchEnginesKnownUrl=( + 'jws','q=', + 'Orange', 'kw=', + 'Windows Search', 'q=', + 'Qwant', 'q=', + 'WOW', 'q=', + 'Omiga-plus', 'q=', + 'Bueno Search', 'q=', + 'vi-view', 'q=' + ); + +