Merge branch 'dev' of soutade.fr:iwla into dev
This commit is contained in:
commit
4759207f7c
|
@ -1,8 +1,11 @@
|
||||||
v0.4 (02/05/2016)
|
v0.4 (20/08/2016)
|
||||||
** User **
|
** User **
|
||||||
|
Remove crwalers from feed parsers
|
||||||
|
Add display only switch (-p)
|
||||||
** Dev **
|
** Dev **
|
||||||
** Bugs **
|
** Bugs **
|
||||||
for robots, we have to use not_viewed_pages (feeds plugin)
|
for robots, we have to use not_viewed_pages (feeds plugin)
|
||||||
|
gz files were not generated due to bad time comparison
|
||||||
|
|
||||||
v0.3 (12/04/2016)
|
v0.3 (12/04/2016)
|
||||||
** User **
|
** User **
|
||||||
|
|
|
@ -34,7 +34,8 @@ display_hooks = []
|
||||||
|
|
||||||
# Extensions that are considered as a HTML page (or result) in opposite to hits
|
# Extensions that are considered as a HTML page (or result) in opposite to hits
|
||||||
pages_extensions = ['/', 'htm', 'html', 'xhtml', 'py', 'pl', 'rb', 'php']
|
pages_extensions = ['/', 'htm', 'html', 'xhtml', 'py', 'pl', 'rb', 'php']
|
||||||
# HTTP codes that are cosidered OK
|
|
||||||
|
# HTTP codes that are considered OK
|
||||||
viewed_http_codes = [200, 304]
|
viewed_http_codes = [200, 304]
|
||||||
|
|
||||||
# If False, doesn't cout visitors that doesn't GET a page but resources only (images, rss...)
|
# If False, doesn't cout visitors that doesn't GET a page but resources only (images, rss...)
|
||||||
|
|
13
iwla.py
13
iwla.py
|
@ -565,7 +565,7 @@ class IWLA(object):
|
||||||
self._generateDisplayDaysStats()
|
self._generateDisplayDaysStats()
|
||||||
self._callPlugins(conf.DISPLAY_HOOK_DIRECTORY)
|
self._callPlugins(conf.DISPLAY_HOOK_DIRECTORY)
|
||||||
self._generateDisplayWholeMonthStats()
|
self._generateDisplayWholeMonthStats()
|
||||||
build_time = time.localtime()
|
build_time = time.mktime(time.localtime())
|
||||||
self.display.build(conf.DISPLAY_ROOT)
|
self.display.build(conf.DISPLAY_ROOT)
|
||||||
self._compressFiles(build_time, conf.DISPLAY_ROOT)
|
self._compressFiles(build_time, conf.DISPLAY_ROOT)
|
||||||
|
|
||||||
|
@ -608,6 +608,10 @@ class IWLA(object):
|
||||||
|
|
||||||
duplicated_stats['nb_visitors'] = stats['nb_visitors'] = len(self.valid_visitors.keys())
|
duplicated_stats['nb_visitors'] = stats['nb_visitors'] = len(self.valid_visitors.keys())
|
||||||
|
|
||||||
|
if args.display_only:
|
||||||
|
self._generateDisplay()
|
||||||
|
return
|
||||||
|
|
||||||
self._callPlugins(conf.POST_HOOK_DIRECTORY)
|
self._callPlugins(conf.POST_HOOK_DIRECTORY)
|
||||||
|
|
||||||
path = self.getDBFilename(cur_time)
|
path = self.getDBFilename(cur_time)
|
||||||
|
@ -632,6 +636,9 @@ class IWLA(object):
|
||||||
self._generateDisplay()
|
self._generateDisplay()
|
||||||
|
|
||||||
def _generateDayStats(self):
|
def _generateDayStats(self):
|
||||||
|
if args.display_only:
|
||||||
|
return
|
||||||
|
|
||||||
visits = self.current_analysis['visits']
|
visits = self.current_analysis['visits']
|
||||||
cur_time = self.meta_infos['last_time']
|
cur_time = self.meta_infos['last_time']
|
||||||
|
|
||||||
|
@ -835,6 +842,10 @@ if __name__ == '__main__':
|
||||||
default=False,
|
default=False,
|
||||||
help='Don\'t compress databases (bigger but faster, not compatible with compressed databases)')
|
help='Don\'t compress databases (bigger but faster, not compatible with compressed databases)')
|
||||||
|
|
||||||
|
parser.add_argument('-p', '--display-only', dest='display_only', action='store_true',
|
||||||
|
default=False,
|
||||||
|
help='Only generate display')
|
||||||
|
|
||||||
args = parser.parse_args()
|
args = parser.parse_args()
|
||||||
|
|
||||||
# Load user conf
|
# Load user conf
|
||||||
|
|
|
@ -76,7 +76,9 @@ class IWLADisplayFeeds(IPlugin):
|
||||||
table = display.createBlock(DisplayHTMLBlockTable, self.iwla._(u'All feeds parsers'), [self.iwla._(u'Host'), self.iwla._(u'Pages'), self.iwla._(u'Hits')])
|
table = display.createBlock(DisplayHTMLBlockTable, self.iwla._(u'All feeds parsers'), [self.iwla._(u'Host'), self.iwla._(u'Pages'), self.iwla._(u'Hits')])
|
||||||
table.setColsCSSClass(['', 'iwla_page', 'iwla_hit'])
|
table.setColsCSSClass(['', 'iwla_page', 'iwla_hit'])
|
||||||
for super_hit in hits.values():
|
for super_hit in hits.values():
|
||||||
if not super_hit['feed_parser']: continue
|
if not super_hit.get('feed_parser', False): continue
|
||||||
|
if super_hit['feed_parser'] == IWLAPostAnalysisFeeds.BAD_FEED_PARSER:
|
||||||
|
continue
|
||||||
nb_feeds_parsers += 1
|
nb_feeds_parsers += 1
|
||||||
address = super_hit['remote_addr']
|
address = super_hit['remote_addr']
|
||||||
if display_visitor_ip and\
|
if display_visitor_ip and\
|
||||||
|
|
|
@ -55,6 +55,7 @@ class IWLAPostAnalysisFeeds(IPlugin):
|
||||||
NOT_A_FEED_PARSER = 0
|
NOT_A_FEED_PARSER = 0
|
||||||
FEED_PARSER = 1
|
FEED_PARSER = 1
|
||||||
MERGED_FEED_PARSER = 2
|
MERGED_FEED_PARSER = 2
|
||||||
|
BAD_FEED_PARSER = 3
|
||||||
|
|
||||||
def __init__(self, iwla):
|
def __init__(self, iwla):
|
||||||
super(IWLAPostAnalysisFeeds, self).__init__(iwla)
|
super(IWLAPostAnalysisFeeds, self).__init__(iwla)
|
||||||
|
@ -71,6 +72,9 @@ class IWLAPostAnalysisFeeds(IPlugin):
|
||||||
for f in feeds:
|
for f in feeds:
|
||||||
self.feeds_re.append(re.compile(r'.*%s.*' % (f)))
|
self.feeds_re.append(re.compile(r'.*%s.*' % (f)))
|
||||||
|
|
||||||
|
self.bad_feeds_re = []
|
||||||
|
self.bad_feeds_re.append(re.compile(r'.*crawl.*'))
|
||||||
|
|
||||||
return True
|
return True
|
||||||
|
|
||||||
def mergeOneHitOnlyFeedsParsers(self, isFeedParser, one_hit_only, hit):
|
def mergeOneHitOnlyFeedsParsers(self, isFeedParser, one_hit_only, hit):
|
||||||
|
@ -94,7 +98,17 @@ class IWLAPostAnalysisFeeds(IPlugin):
|
||||||
self.merge_one_hit_only_feeds_parsers:
|
self.merge_one_hit_only_feeds_parsers:
|
||||||
self.mergeOneHitOnlyFeedsParsers(isFeedParser, one_hit_only, hit)
|
self.mergeOneHitOnlyFeedsParsers(isFeedParser, one_hit_only, hit)
|
||||||
|
|
||||||
if not isFeedParser is None: continue
|
if isFeedParser:
|
||||||
|
if hit['feed_parser'] == self.BAD_FEED_PARSER: continue
|
||||||
|
if not hit.get('feed_name_analysed', False) and\
|
||||||
|
hit.get('dns_name_replaced', False):
|
||||||
|
hit['feed_name_analysed'] = True
|
||||||
|
addr = hit.get('remote_addr', None)
|
||||||
|
for r in self.bad_feeds_re:
|
||||||
|
if r.match(addr):
|
||||||
|
hit['feed_parser'] = self.BAD_FEED_PARSER
|
||||||
|
return
|
||||||
|
return
|
||||||
|
|
||||||
isFeedParser = self.NOT_A_FEED_PARSER
|
isFeedParser = self.NOT_A_FEED_PARSER
|
||||||
uri = hit['requests'][0]['extract_request']['extract_uri'].lower()
|
uri = hit['requests'][0]['extract_request']['extract_uri'].lower()
|
||||||
|
|
|
@ -67,7 +67,7 @@ class IWLAPostAnalysisReverseDNS(IPlugin):
|
||||||
hits = self.iwla.getCurrentVisits()
|
hits = self.iwla.getCurrentVisits()
|
||||||
for (k, hit) in hits.items():
|
for (k, hit) in hits.items():
|
||||||
if hit.get('dns_analysed', False): continue
|
if hit.get('dns_analysed', False): continue
|
||||||
if not hit['feed_parser'] and\
|
if not hit.get('feed_parser', False) and\
|
||||||
not self.iwla.isValidVisitor(hit):
|
not self.iwla.isValidVisitor(hit):
|
||||||
continue
|
continue
|
||||||
try:
|
try:
|
||||||
|
|
Loading…
Reference in New Issue
Block a user