Speed up search

2012-12-24 19:18:19 +01:00
parent efff6d8a47
commit b7c4cf4e2f
1 changed files with 12 additions and 21 deletions
--- a/search.py
+++ b/search.py
@@ -23,6 +23,7 @@ class Search:
                                 '£', '%', 'µ', '*', ',', '?', ';', '.', '/',
                                 ':', '!', '§', '€', '²')
    # Imported from generator.py
    def _addReport(self, string, color=''):
        if color != '':
            self.report = self.report + '<span style="color:' + color + '">'
@@ -59,24 +60,12 @@ class Search:
        return hashtable
    def _remove_reg(self, content, reg):
        found = re.search(reg, content)
        while found != None:
        #print str(found.start()) + ' ' + str(found.end())
            # print content[found.start(0):found.end(0)]
            # print "============================================="
            content = content[:found.start(0)].lstrip() + ' ' + content[found.end(0):]
            found = re.search(reg, content)
        return content
    def _strip_accents(self, s):
        return ''.join((c for c in unicodedata.normalize('NFD', s) if unicodedata.category(c) != 'Mn'))
    def _remove_tag(self, content):
-        content = self._remove_reg(content, self.htmlreg)
+        content = self.htmlreg.sub('', content)
-        content = self._remove_reg(content, self.numreg)
+        content = self.numreg.sub('', content)
        content = content.replace('\n', '')
        content = content.replace('\r', '')
@@ -85,7 +74,7 @@ class Search:
        for c in self.replace_by_space:
            content = content.replace(c, ' ')
-        content = self._remove_reg(content, self.tagreg)
+        content = self.tagreg.sub('', content)
        content = self.pat.sub(' ', content)
@@ -115,16 +104,16 @@ class Search:
                hashtable[word][1] = weight + word_weight
    def _index_file(self, hashtable, filename, index):
        f = open(filename, 'r')
        content = f.read()
        f.close()
        try:
            post = Post.objects.get(pk=index)
            if post.published == False: return
        except:
            return
        f = open(filename, 'r')
        content = f.read()
        f.close()
        self._indexContent(hashtable, index, content, 1)
        self._indexContent(hashtable, index, post.title.encode('utf-8'), 5)
@@ -164,8 +153,10 @@ class Search:
        if hashtable is None: return
        for k, v in hashtable.items():
-            if post in v:
+            # For tuples in values
-                v.remove(post)
+            for t in v:
                if post == v[0]:
                    v.remove(t)
        if saveDatabase:
            self._saveDatabase(blog, hashtable)