Speed up search
This commit is contained in:
parent
efff6d8a47
commit
b7c4cf4e2f
33
search.py
33
search.py
|
@ -23,6 +23,7 @@ class Search:
|
|||
'£', '%', 'µ', '*', ',', '?', ';', '.', '/',
|
||||
':', '!', '§', '€', '²')
|
||||
|
||||
# Imported from generator.py
|
||||
def _addReport(self, string, color=''):
|
||||
if color != '':
|
||||
self.report = self.report + '<span style="color:' + color + '">'
|
||||
|
@ -59,24 +60,12 @@ class Search:
|
|||
|
||||
return hashtable
|
||||
|
||||
def _remove_reg(self, content, reg):
|
||||
found = re.search(reg, content)
|
||||
while found != None:
|
||||
#print str(found.start()) + ' ' + str(found.end())
|
||||
# print content[found.start(0):found.end(0)]
|
||||
# print "============================================="
|
||||
content = content[:found.start(0)].lstrip() + ' ' + content[found.end(0):]
|
||||
|
||||
found = re.search(reg, content)
|
||||
|
||||
return content
|
||||
|
||||
def _strip_accents(self, s):
|
||||
return ''.join((c for c in unicodedata.normalize('NFD', s) if unicodedata.category(c) != 'Mn'))
|
||||
|
||||
def _remove_tag(self, content):
|
||||
content = self._remove_reg(content, self.htmlreg)
|
||||
content = self._remove_reg(content, self.numreg)
|
||||
content = self.htmlreg.sub('', content)
|
||||
content = self.numreg.sub('', content)
|
||||
|
||||
content = content.replace('\n', '')
|
||||
content = content.replace('\r', '')
|
||||
|
@ -85,7 +74,7 @@ class Search:
|
|||
for c in self.replace_by_space:
|
||||
content = content.replace(c, ' ')
|
||||
|
||||
content = self._remove_reg(content, self.tagreg)
|
||||
content = self.tagreg.sub('', content)
|
||||
|
||||
content = self.pat.sub(' ', content)
|
||||
|
||||
|
@ -115,16 +104,16 @@ class Search:
|
|||
hashtable[word][1] = weight + word_weight
|
||||
|
||||
def _index_file(self, hashtable, filename, index):
|
||||
f = open(filename, 'r')
|
||||
content = f.read()
|
||||
f.close()
|
||||
|
||||
try:
|
||||
post = Post.objects.get(pk=index)
|
||||
if post.published == False: return
|
||||
except:
|
||||
return
|
||||
|
||||
f = open(filename, 'r')
|
||||
content = f.read()
|
||||
f.close()
|
||||
|
||||
self._indexContent(hashtable, index, content, 1)
|
||||
self._indexContent(hashtable, index, post.title.encode('utf-8'), 5)
|
||||
|
||||
|
@ -164,8 +153,10 @@ class Search:
|
|||
if hashtable is None: return
|
||||
|
||||
for k, v in hashtable.items():
|
||||
if post in v:
|
||||
v.remove(post)
|
||||
# For tuples in values
|
||||
for t in v:
|
||||
if post == v[0]:
|
||||
v.remove(t)
|
||||
|
||||
if saveDatabase:
|
||||
self._saveDatabase(blog, hashtable)
|
||||
|
|
Loading…
Reference in New Issue
Block a user