Speed up search

This commit is contained in:
Grégory Soutadé 2012-12-24 19:18:19 +01:00
parent efff6d8a47
commit b7c4cf4e2f

View File

@ -23,6 +23,7 @@ class Search:
'£', '%', 'µ', '*', ',', '?', ';', '.', '/', '£', '%', 'µ', '*', ',', '?', ';', '.', '/',
':', '!', '§', '', '²') ':', '!', '§', '', '²')
# Imported from generator.py
def _addReport(self, string, color=''): def _addReport(self, string, color=''):
if color != '': if color != '':
self.report = self.report + '<span style="color:' + color + '">' self.report = self.report + '<span style="color:' + color + '">'
@ -59,24 +60,12 @@ class Search:
return hashtable return hashtable
def _remove_reg(self, content, reg):
found = re.search(reg, content)
while found != None:
#print str(found.start()) + ' ' + str(found.end())
# print content[found.start(0):found.end(0)]
# print "============================================="
content = content[:found.start(0)].lstrip() + ' ' + content[found.end(0):]
found = re.search(reg, content)
return content
def _strip_accents(self, s): def _strip_accents(self, s):
return ''.join((c for c in unicodedata.normalize('NFD', s) if unicodedata.category(c) != 'Mn')) return ''.join((c for c in unicodedata.normalize('NFD', s) if unicodedata.category(c) != 'Mn'))
def _remove_tag(self, content): def _remove_tag(self, content):
content = self._remove_reg(content, self.htmlreg) content = self.htmlreg.sub('', content)
content = self._remove_reg(content, self.numreg) content = self.numreg.sub('', content)
content = content.replace('\n', '') content = content.replace('\n', '')
content = content.replace('\r', '') content = content.replace('\r', '')
@ -85,7 +74,7 @@ class Search:
for c in self.replace_by_space: for c in self.replace_by_space:
content = content.replace(c, ' ') content = content.replace(c, ' ')
content = self._remove_reg(content, self.tagreg) content = self.tagreg.sub('', content)
content = self.pat.sub(' ', content) content = self.pat.sub(' ', content)
@ -115,16 +104,16 @@ class Search:
hashtable[word][1] = weight + word_weight hashtable[word][1] = weight + word_weight
def _index_file(self, hashtable, filename, index): def _index_file(self, hashtable, filename, index):
f = open(filename, 'r')
content = f.read()
f.close()
try: try:
post = Post.objects.get(pk=index) post = Post.objects.get(pk=index)
if post.published == False: return if post.published == False: return
except: except:
return return
f = open(filename, 'r')
content = f.read()
f.close()
self._indexContent(hashtable, index, content, 1) self._indexContent(hashtable, index, content, 1)
self._indexContent(hashtable, index, post.title.encode('utf-8'), 5) self._indexContent(hashtable, index, post.title.encode('utf-8'), 5)
@ -164,8 +153,10 @@ class Search:
if hashtable is None: return if hashtable is None: return
for k, v in hashtable.items(): for k, v in hashtable.items():
if post in v: # For tuples in values
v.remove(post) for t in v:
if post == v[0]:
v.remove(t)
if saveDatabase: if saveDatabase:
self._saveDatabase(blog, hashtable) self._saveDatabase(blog, hashtable)