iwla/robots.py

3 lines
11 KiB
Python
Raw Normal View History

2014-11-18 20:18:53 +01:00
awstats_robots = ['.*appie.*', '.*architext.*', '.*jeeves.*', '.*bjaaland.*', '.*contentmatch.*', '.*ferret.*', '.*googlebot.*', '.*google\-sitemaps.*', '.*gulliver.*', '.*virus[_+ ]detector.*', '.*harvest.*', '.*htdig.*', '.*linkwalker.*', '.*lilina.*', '.*lycos[_+ ].*', '.*moget.*', '.*muscatferret.*', '.*myweb.*', '.*nomad.*', '.*scooter.*', '.*slurp.*', '.*^voyager\/.*', '.*weblayers.*', '.*antibot.*', '.*bruinbot.*', '.*digout4u.*', '.*echo!.*', '.*fast\-webcrawler.*', '.*ia_archiver\-web\.archive\.org.*', '.*ia_archiver.*', '.*jennybot.*', '.*mercator.*', '.*netcraft.*', '.*msnbot\-media.*', '.*msnbot.*', '.*petersnews.*', '.*relevantnoise\.com.*', '.*unlost_web_crawler.*', '.*voila.*', '.*webbase.*', '.*webcollage.*', '.*cfetch.*', '.*zyborg.*', '.*wisenutbot.*', '.*[^a]fish.*', '.*abcdatos.*', '.*acme\.spider.*', '.*ahoythehomepagefinder.*', '.*alkaline.*', '.*anthill.*', '.*arachnophilia.*', '.*arale.*', '.*araneo.*', '.*aretha.*', '.*ariadne.*', '.*powermarks.*', '.*arks.*', '.*aspider.*', '.*atn\.txt.*', '.*atomz.*', '.*auresys.*', '.*backrub.*', '.*bbot.*', '.*bigbrother.*', '.*blackwidow.*', '.*blindekuh.*', '.*bloodhound.*', '.*borg\-bot.*', '.*brightnet.*', '.*bspider.*', '.*cactvschemistryspider.*', '.*calif[^r].*', '.*cassandra.*', '.*cgireader.*', '.*checkbot.*', '.*christcrawler.*', '.*churl.*', '.*cienciaficcion.*', '.*collective.*', '.*combine.*', '.*conceptbot.*', '.*coolbot.*', '.*core.*', '.*cosmos.*', '.*cruiser.*', '.*cusco.*', '.*cyberspyder.*', '.*desertrealm.*', '.*deweb.*', '.*dienstspider.*', '.*digger.*', '.*diibot.*', '.*direct_hit.*', '.*dnabot.*', '.*download_express.*', '.*dragonbot.*', '.*dwcp.*', '.*e\-collector.*', '.*ebiness.*', '.*elfinbot.*', '.*emacs.*', '.*emcspider.*', '.*esther.*', '.*evliyacelebi.*', '.*fastcrawler.*', '.*feedcrawl.*', '.*fdse.*', '.*felix.*', '.*fetchrover.*', '.*fido.*', '.*finnish.*', '.*fireball.*', '.*fouineur.*', '.*francoroute.*', '.*freecrawl.*', '.*funnelweb.*', '.*gama.*', '.*gazz.*', '.*gcreep.*', '.*getbot.*', '.*geturl.*', '.*golem.*', '.*gougou.*', '.*grapnel.*', '.*griffon.*', '.*gromit.*', '.*gulperbot.*', '.*hambot.*', '.*havindex.*', '.*hometown.*', '.*htmlgobble.*', '.*hyperdecontextualizer.*', '.*iajabot.*', '.*iaskspider.*', '.*hl_ftien_spider.*', '.*sogou.*', '.*iconoclast.*', '.*ilse.*', '.*imagelock.*', '.*incywincy.*', '.*informant.*', '.*infoseek.*', '.*infoseeksidewinder.*', '.*infospider.*', '.*inspectorwww.*', '.*intelliagent.*', '.*irobot.*', '.*iron33.*', '.*israelisearch.*', '.*javabee.*', '.*jbot.*', '.*jcrawler.*', '.*jobo.*', '.*jobot.*', '.*joebot.*', '.*jubii.*', '.*jumpstation.*', '.*kapsi.*', '.*katipo.*', '.*kilroy.*', '.*ko[_+ ]yappo[_+ ]robot.*', '.*kummhttp.*', '.*labelgrabber\.txt.*', '.*larbin.*', '.*legs.*', '.*linkidator.*', '.*linkscan.*', '.*lockon.*', '.*logo_gif.*', '.*macworm.*', '.*magpie.*', '.*marvin.*', '.*mattie.*', '.*mediafox.*', '.*merzscope.*', '.*meshexplorer.*', '.*mindcrawler.*', '.*mnogosearch.*', '.*momspider.*', '.*monster.*', '.*motor.*', '.*muncher.*', '.*mwdsearch.*', '.*ndspider.*', '.*nederland\.zoek.*', '.*netcarta.*', '.*netmechanic.*', '.*netscoop.*', '.*newscan\-online.*', '.*nhse.*', '.*northstar.*', '.*nzexplorer.*', '.*objectssearch.*', '.*occam.*', '.*octopus.*', '.*openfind.*', '.*orb_search.*', '.*packrat.*', '.*pageboy.*', '.*parasite.*', '.*patric.*', '.*pegasus.*', '.*perignator.*', '.*perlcrawler.*', '.*phantom.*', '.*phpdig.*', '.*piltdownman.*', '.*pimptrain.*', '.*pioneer.*', '.*pitkow.*', '.*pjspider.*', '.*plumtreewebaccessor.*', '.*poppi.*', '.*portalb.*', '.*psbot.*', '.*python.*', '.*raven.*', '.*rbse.*', '.*resumerobot.*', '.*rhcs.*', '.*road_runner.*', '.*robbie.*', '.*robi.*', '.*robocrawl.*', '.*robofox.*', '.*robozilla.*', '.*roverbot.*', '.*rules.*', '.*safetynetrobot.*', '.*search\-info.*', '.*search_au.*', '.*searchprocess.*', '.*senrigan.*', '.*sgscout.*', '.*shaggy.*', '.*shaihulud.*', '.*sift.*', '.*simbot.*', '.*site\-valet.*', '.*sitetech.*', '.*skymob.*', '.*slcrawler.*', '.*smartspider.*', '.*snooper.*', '.*solbot.*', '.*speedy.*', '.*spider[_+ ]monk