Do a more pertinent search by including titles in indexing
This commit is contained in:
		
							
								
								
									
										37
									
								
								search.py
									
									
									
									
									
								
							
							
						
						
									
										37
									
								
								search.py
									
									
									
									
									
								
							| @@ -5,6 +5,7 @@ import os | ||||
| import operator | ||||
| import pickle | ||||
| from django.db import models | ||||
| from dynastie.models import Post | ||||
|  | ||||
| class Search: | ||||
|     MINIMUM_LETTERS = 3 | ||||
| @@ -16,7 +17,6 @@ class Search: | ||||
|         self.htmlreg = re.compile('&[^;]+;') | ||||
|         self.numreg = re.compile('[0-9]+') | ||||
|         self.pat = re.compile(r'\s+') | ||||
|         self.wordreg = re.compile('\w+') | ||||
|  | ||||
|         self.replace_by_space = ('(', ')', '#', '\'', '{', '}', '[', ']', | ||||
|                                  '-', '|', '\t', '\\', '_', '^' '=', '+', '$', | ||||
| @@ -97,14 +97,10 @@ class Search: | ||||
|  | ||||
|         return content | ||||
|  | ||||
|     def _index_file(self, hashtable, filename, index): | ||||
|         f = open(filename, 'r') | ||||
|         content = f.read() | ||||
|         f.close() | ||||
|  | ||||
|     def _indexContent(self, hashtable, index, content, word_weight): | ||||
|         content = self._prepare_string(content) | ||||
|  | ||||
|         wordlist = re.findall(self.wordreg, content) | ||||
|         wordlist = content.split(' ') | ||||
|  | ||||
|         for word in wordlist: | ||||
|             if len(word) < self.MINIMUM_LETTERS: | ||||
| @@ -113,8 +109,25 @@ class Search: | ||||
|             if not word in hashtable: | ||||
|                 hashtable[word] = [] | ||||
|             if not index in hashtable[word]: | ||||
|                 hashtable[word].append(index) | ||||
|                 hashtable[word].append([index, word_weight]) | ||||
|             else: | ||||
|                 weight = hashtable[word][1] | ||||
|                 hashtable[word][1] = weight + word_weight | ||||
|  | ||||
|     def _index_file(self, hashtable, filename, index): | ||||
|         f = open(filename, 'r') | ||||
|         content = f.read() | ||||
|         f.close() | ||||
|  | ||||
|         try: | ||||
|             post = Post.objects.get(pk=index) | ||||
|             if post.published == False: return | ||||
|         except: | ||||
|             return | ||||
|  | ||||
|         self._indexContent(hashtable, index, content, 1) | ||||
|         self._indexContent(hashtable, index, post.title.encode('utf-8'), 5) | ||||
|          | ||||
|     def create_index(self, blog): | ||||
|         hashtable = {} | ||||
|          | ||||
| @@ -172,7 +185,7 @@ class Search: | ||||
|          | ||||
|         string = self._prepare_string(string.encode('utf-8')) | ||||
|  | ||||
|         wordlist = re.findall(self.wordreg, string) | ||||
|         wordlist = string.split(' ') | ||||
|  | ||||
|         res = {} | ||||
|         for word in wordlist: | ||||
| @@ -184,9 +197,9 @@ class Search: | ||||
|             if word not in hashtable: | ||||
|                 continue | ||||
|             for post in hashtable[word]: | ||||
|                 if not post in res: | ||||
|                     res[post] = 0 | ||||
|                 res[post] = res[post] + 1 | ||||
|                 if not post[0] in res: | ||||
|                     res[post[0]] = post[1] | ||||
|                 res[post[0]] += post[1] | ||||
|         sorted_res = sorted(res.iteritems(), key=operator.itemgetter(1)) | ||||
|  | ||||
|         sorted_res.reverse() | ||||
|   | ||||
| @@ -43,15 +43,20 @@ | ||||
| 	  </ul> | ||||
| 	</div> | ||||
| 	<div class="menu"> | ||||
| 	  <!-- <div class="menu_content"> --> | ||||
| 	  <!--   <div class="menu_content_header">Search</div> --> | ||||
| 	  <!--   <div class="menu_content_content">La recherche</div> --> | ||||
| 	  <!-- </div> --> | ||||
| 	  <div class="menu_content"> | ||||
| 	    <div class="menu_content_header">Recherche</div> | ||||
| 	    <div id="menu_main"> | ||||
| 	      <dyn:replace div_name="form" id="search_form" method="POST" action="/search/dyn:blog_id"> | ||||
| 		<input type="text" name="text" onkeypress="handleKeyPress(event,this.form)"/> | ||||
| 	      </dyn:replace> | ||||
| 	    </div> | ||||
| 	  </div> | ||||
| 	  <div class="menu_content"> | ||||
| 	    <div class="menu_content_header">Menu principal</div> | ||||
| 	    <div id="menu_main"> | ||||
| 	       <div class="menu_content_content"><a href="/">Première page</a></div> | ||||
| 	       <div class="menu_content_content"><a href="/about.html">À propos</a></div> | ||||
| 	       <div class="menu_content_content"><a href="http://indefero.soutade.fr">Projets personnels</a></div> | ||||
| 	  </div> | ||||
| 	  </div> | ||||
| 	  <div class="menu_content"> | ||||
|   | ||||
		Reference in New Issue
	
	Block a user