From 839b935d4783e2c76cc95283b7d977f96f18ee4a Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Gr=C3=A9gory=20Soutad=C3=A9?= <soutade@gmail.com>
Date: Mon, 10 Dec 2012 20:50:27 +0100
Subject: [PATCH] Do a more pertinent search by including titles in indexing

---
 search.py                        | 37 +++++++++++++++++++++-----------
 sites/blog.soutade.fr/about.html | 13 +++++++----
 2 files changed, 34 insertions(+), 16 deletions(-)
diff --git a/search.py b/search.py
index 503a27f..cdad7c6 100644
--- a/search.py
+++ b/search.py
@@ -5,6 +5,7 @@ import os
 import operator
 import pickle
 from django.db import models
+from dynastie.models import Post
 
 class Search:
     MINIMUM_LETTERS = 3
@@ -16,7 +17,6 @@ class Search:
         self.htmlreg = re.compile('&[^;]+;')
         self.numreg = re.compile('[0-9]+')
         self.pat = re.compile(r'\s+')
-        self.wordreg = re.compile('\w+')
 
         self.replace_by_space = ('(', ')', '#', '\'', '{', '}', '[', ']',
                                  '-', '|', '\t', '\\', '_', '^' '=', '+', '$',
@@ -97,14 +97,10 @@ class Search:
 
         return content
 
-    def _index_file(self, hashtable, filename, index):
-        f = open(filename, 'r')
-        content = f.read()
-        f.close()
-
+    def _indexContent(self, hashtable, index, content, word_weight):
         content = self._prepare_string(content)
 
-        wordlist = re.findall(self.wordreg, content)
+        wordlist = content.split(' ')
 
         for word in wordlist:
             if len(word) < self.MINIMUM_LETTERS:
@@ -113,8 +109,25 @@ class Search:
             if not word in hashtable:
                 hashtable[word] = []
             if not index in hashtable[word]:
-                hashtable[word].append(index)
+                hashtable[word].append([index, word_weight])
+            else:
+                weight = hashtable[word][1]
+                hashtable[word][1] = weight + word_weight
 
+    def _index_file(self, hashtable, filename, index):
+        f = open(filename, 'r')
+        content = f.read()
+        f.close()
+
+        try:
+            post = Post.objects.get(pk=index)
+            if post.published == False: return
+        except:
+            return
+
+        self._indexContent(hashtable, index, content, 1)
+        self._indexContent(hashtable, index, post.title.encode('utf-8'), 5)
+        
     def create_index(self, blog):
         hashtable = {}
         
@@ -172,7 +185,7 @@ class Search:
         
         string = self._prepare_string(string.encode('utf-8'))
 
-        wordlist = re.findall(self.wordreg, string)
+        wordlist = string.split(' ')
 
         res = {}
         for word in wordlist:
@@ -184,9 +197,9 @@ class Search:
             if word not in hashtable:
                 continue
             for post in hashtable[word]:
-                if not post in res:
-                    res[post] = 0
-                res[post] = res[post] + 1
+                if not post[0] in res:
+                    res[post[0]] = post[1]
+                res[post[0]] += post[1]
         sorted_res = sorted(res.iteritems(), key=operator.itemgetter(1))
 
         sorted_res.reverse()
diff --git a/sites/blog.soutade.fr/about.html b/sites/blog.soutade.fr/about.html
index 03223cf..fcc8506 100755
--- a/sites/blog.soutade.fr/about.html
+++ b/sites/blog.soutade.fr/about.html
@@ -43,15 +43,20 @@
 	  </ul>
 	</div>
 	<div class="menu">
-	  <!-- <div class="menu_content"> -->
-	  <!--   <div class="menu_content_header">Search</div> -->
-	  <!--   <div class="menu_content_content">La recherche</div> -->
-	  <!-- </div> -->
+	  <div class="menu_content">
+	    <div class="menu_content_header">Recherche</div>
+	    <div id="menu_main">
+	      <dyn:replace div_name="form" id="search_form" method="POST" action="/search/dyn:blog_id">
+		<input type="text" name="text" onkeypress="handleKeyPress(event,this.form)"/>
+	      </dyn:replace>
+	    </div>
+	  </div>
 	  <div class="menu_content">
 	    <div class="menu_content_header">Menu principal</div>
 	    <div id="menu_main">
 	       <div class="menu_content_content"><a href="/">Première page</a></div>
 	       <div class="menu_content_content"><a href="/about.html">À propos</a></div>
+	       <div class="menu_content_content"><a href="http://indefero.soutade.fr">Projets personnels</a></div>
 	  </div>
 	  </div>
 	  <div class="menu_content">