From b7c4cf4e2fb102ccc3eae0577bb9ea365934a83b Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Gr=C3=A9gory=20Soutad=C3=A9?= <soutade@gmail.com>
Date: Mon, 24 Dec 2012 19:18:19 +0100
Subject: [PATCH] Speed up search

---
 search.py | 33 ++++++++++++---------------------
 1 file changed, 12 insertions(+), 21 deletions(-)
diff --git a/search.py b/search.py
index b551203..ec74e5d 100644
--- a/search.py
+++ b/search.py
@@ -23,6 +23,7 @@ class Search:
                                  '£', '%', 'µ', '*', ',', '?', ';', '.', '/',
                                  ':', '!', '§', '€', '²')
 
+    # Imported from generator.py
     def _addReport(self, string, color=''):
         if color != '':
             self.report = self.report + '<span style="color:' + color + '">'
@@ -59,24 +60,12 @@ class Search:
 
         return hashtable
 
-    def _remove_reg(self, content, reg):
-        found = re.search(reg, content)
-        while found != None:
-        #print str(found.start()) + ' ' + str(found.end())
-            # print content[found.start(0):found.end(0)]
-            # print "============================================="
-            content = content[:found.start(0)].lstrip() + ' ' + content[found.end(0):]
-
-            found = re.search(reg, content)
-
-        return content
-
     def _strip_accents(self, s):
         return ''.join((c for c in unicodedata.normalize('NFD', s) if unicodedata.category(c) != 'Mn'))
 
     def _remove_tag(self, content):
-        content = self._remove_reg(content, self.htmlreg)
-        content = self._remove_reg(content, self.numreg)
+        content = self.htmlreg.sub('', content)
+        content = self.numreg.sub('', content)
 
         content = content.replace('\n', '')
         content = content.replace('\r', '')
@@ -85,7 +74,7 @@ class Search:
         for c in self.replace_by_space:
             content = content.replace(c, ' ')
         
-        content = self._remove_reg(content, self.tagreg)
+        content = self.tagreg.sub('', content)
 
         content = self.pat.sub(' ', content)
 
@@ -115,16 +104,16 @@ class Search:
                 hashtable[word][1] = weight + word_weight
 
     def _index_file(self, hashtable, filename, index):
-        f = open(filename, 'r')
-        content = f.read()
-        f.close()
-
         try:
             post = Post.objects.get(pk=index)
             if post.published == False: return
         except:
             return
 
+        f = open(filename, 'r')
+        content = f.read()
+        f.close()
+
         self._indexContent(hashtable, index, content, 1)
         self._indexContent(hashtable, index, post.title.encode('utf-8'), 5)
         
@@ -164,8 +153,10 @@ class Search:
         if hashtable is None: return
 
         for k, v in hashtable.items():
-            if post in v:
-                v.remove(post)
+            # For tuples in values
+            for t in v:
+                if post == v[0]:
+                    v.remove(t)
 
         if saveDatabase:
             self._saveDatabase(blog, hashtable)