Speed up search
This commit is contained in:
parent
efff6d8a47
commit
b7c4cf4e2f
33
search.py
33
search.py
|
@ -23,6 +23,7 @@ class Search:
|
||||||
'£', '%', 'µ', '*', ',', '?', ';', '.', '/',
|
'£', '%', 'µ', '*', ',', '?', ';', '.', '/',
|
||||||
':', '!', '§', '€', '²')
|
':', '!', '§', '€', '²')
|
||||||
|
|
||||||
|
# Imported from generator.py
|
||||||
def _addReport(self, string, color=''):
|
def _addReport(self, string, color=''):
|
||||||
if color != '':
|
if color != '':
|
||||||
self.report = self.report + '<span style="color:' + color + '">'
|
self.report = self.report + '<span style="color:' + color + '">'
|
||||||
|
@ -59,24 +60,12 @@ class Search:
|
||||||
|
|
||||||
return hashtable
|
return hashtable
|
||||||
|
|
||||||
def _remove_reg(self, content, reg):
|
|
||||||
found = re.search(reg, content)
|
|
||||||
while found != None:
|
|
||||||
#print str(found.start()) + ' ' + str(found.end())
|
|
||||||
# print content[found.start(0):found.end(0)]
|
|
||||||
# print "============================================="
|
|
||||||
content = content[:found.start(0)].lstrip() + ' ' + content[found.end(0):]
|
|
||||||
|
|
||||||
found = re.search(reg, content)
|
|
||||||
|
|
||||||
return content
|
|
||||||
|
|
||||||
def _strip_accents(self, s):
|
def _strip_accents(self, s):
|
||||||
return ''.join((c for c in unicodedata.normalize('NFD', s) if unicodedata.category(c) != 'Mn'))
|
return ''.join((c for c in unicodedata.normalize('NFD', s) if unicodedata.category(c) != 'Mn'))
|
||||||
|
|
||||||
def _remove_tag(self, content):
|
def _remove_tag(self, content):
|
||||||
content = self._remove_reg(content, self.htmlreg)
|
content = self.htmlreg.sub('', content)
|
||||||
content = self._remove_reg(content, self.numreg)
|
content = self.numreg.sub('', content)
|
||||||
|
|
||||||
content = content.replace('\n', '')
|
content = content.replace('\n', '')
|
||||||
content = content.replace('\r', '')
|
content = content.replace('\r', '')
|
||||||
|
@ -85,7 +74,7 @@ class Search:
|
||||||
for c in self.replace_by_space:
|
for c in self.replace_by_space:
|
||||||
content = content.replace(c, ' ')
|
content = content.replace(c, ' ')
|
||||||
|
|
||||||
content = self._remove_reg(content, self.tagreg)
|
content = self.tagreg.sub('', content)
|
||||||
|
|
||||||
content = self.pat.sub(' ', content)
|
content = self.pat.sub(' ', content)
|
||||||
|
|
||||||
|
@ -115,16 +104,16 @@ class Search:
|
||||||
hashtable[word][1] = weight + word_weight
|
hashtable[word][1] = weight + word_weight
|
||||||
|
|
||||||
def _index_file(self, hashtable, filename, index):
|
def _index_file(self, hashtable, filename, index):
|
||||||
f = open(filename, 'r')
|
|
||||||
content = f.read()
|
|
||||||
f.close()
|
|
||||||
|
|
||||||
try:
|
try:
|
||||||
post = Post.objects.get(pk=index)
|
post = Post.objects.get(pk=index)
|
||||||
if post.published == False: return
|
if post.published == False: return
|
||||||
except:
|
except:
|
||||||
return
|
return
|
||||||
|
|
||||||
|
f = open(filename, 'r')
|
||||||
|
content = f.read()
|
||||||
|
f.close()
|
||||||
|
|
||||||
self._indexContent(hashtable, index, content, 1)
|
self._indexContent(hashtable, index, content, 1)
|
||||||
self._indexContent(hashtable, index, post.title.encode('utf-8'), 5)
|
self._indexContent(hashtable, index, post.title.encode('utf-8'), 5)
|
||||||
|
|
||||||
|
@ -164,8 +153,10 @@ class Search:
|
||||||
if hashtable is None: return
|
if hashtable is None: return
|
||||||
|
|
||||||
for k, v in hashtable.items():
|
for k, v in hashtable.items():
|
||||||
if post in v:
|
# For tuples in values
|
||||||
v.remove(post)
|
for t in v:
|
||||||
|
if post == v[0]:
|
||||||
|
v.remove(t)
|
||||||
|
|
||||||
if saveDatabase:
|
if saveDatabase:
|
||||||
self._saveDatabase(blog, hashtable)
|
self._saveDatabase(blog, hashtable)
|
||||||
|
|
Loading…
Reference in New Issue
Block a user