Do a more pertinent search by including titles in indexing
This commit is contained in:
parent
2f1f38ca5c
commit
839b935d47
37
search.py
37
search.py
|
@ -5,6 +5,7 @@ import os
|
|||
import operator
|
||||
import pickle
|
||||
from django.db import models
|
||||
from dynastie.models import Post
|
||||
|
||||
class Search:
|
||||
MINIMUM_LETTERS = 3
|
||||
|
@ -16,7 +17,6 @@ class Search:
|
|||
self.htmlreg = re.compile('&[^;]+;')
|
||||
self.numreg = re.compile('[0-9]+')
|
||||
self.pat = re.compile(r'\s+')
|
||||
self.wordreg = re.compile('\w+')
|
||||
|
||||
self.replace_by_space = ('(', ')', '#', '\'', '{', '}', '[', ']',
|
||||
'-', '|', '\t', '\\', '_', '^' '=', '+', '$',
|
||||
|
@ -97,14 +97,10 @@ class Search:
|
|||
|
||||
return content
|
||||
|
||||
def _index_file(self, hashtable, filename, index):
|
||||
f = open(filename, 'r')
|
||||
content = f.read()
|
||||
f.close()
|
||||
|
||||
def _indexContent(self, hashtable, index, content, word_weight):
|
||||
content = self._prepare_string(content)
|
||||
|
||||
wordlist = re.findall(self.wordreg, content)
|
||||
wordlist = content.split(' ')
|
||||
|
||||
for word in wordlist:
|
||||
if len(word) < self.MINIMUM_LETTERS:
|
||||
|
@ -113,8 +109,25 @@ class Search:
|
|||
if not word in hashtable:
|
||||
hashtable[word] = []
|
||||
if not index in hashtable[word]:
|
||||
hashtable[word].append(index)
|
||||
hashtable[word].append([index, word_weight])
|
||||
else:
|
||||
weight = hashtable[word][1]
|
||||
hashtable[word][1] = weight + word_weight
|
||||
|
||||
def _index_file(self, hashtable, filename, index):
|
||||
f = open(filename, 'r')
|
||||
content = f.read()
|
||||
f.close()
|
||||
|
||||
try:
|
||||
post = Post.objects.get(pk=index)
|
||||
if post.published == False: return
|
||||
except:
|
||||
return
|
||||
|
||||
self._indexContent(hashtable, index, content, 1)
|
||||
self._indexContent(hashtable, index, post.title.encode('utf-8'), 5)
|
||||
|
||||
def create_index(self, blog):
|
||||
hashtable = {}
|
||||
|
||||
|
@ -172,7 +185,7 @@ class Search:
|
|||
|
||||
string = self._prepare_string(string.encode('utf-8'))
|
||||
|
||||
wordlist = re.findall(self.wordreg, string)
|
||||
wordlist = string.split(' ')
|
||||
|
||||
res = {}
|
||||
for word in wordlist:
|
||||
|
@ -184,9 +197,9 @@ class Search:
|
|||
if word not in hashtable:
|
||||
continue
|
||||
for post in hashtable[word]:
|
||||
if not post in res:
|
||||
res[post] = 0
|
||||
res[post] = res[post] + 1
|
||||
if not post[0] in res:
|
||||
res[post[0]] = post[1]
|
||||
res[post[0]] += post[1]
|
||||
sorted_res = sorted(res.iteritems(), key=operator.itemgetter(1))
|
||||
|
||||
sorted_res.reverse()
|
||||
|
|
|
@ -43,15 +43,20 @@
|
|||
</ul>
|
||||
</div>
|
||||
<div class="menu">
|
||||
<!-- <div class="menu_content"> -->
|
||||
<!-- <div class="menu_content_header">Search</div> -->
|
||||
<!-- <div class="menu_content_content">La recherche</div> -->
|
||||
<!-- </div> -->
|
||||
<div class="menu_content">
|
||||
<div class="menu_content_header">Recherche</div>
|
||||
<div id="menu_main">
|
||||
<dyn:replace div_name="form" id="search_form" method="POST" action="/search/dyn:blog_id">
|
||||
<input type="text" name="text" onkeypress="handleKeyPress(event,this.form)"/>
|
||||
</dyn:replace>
|
||||
</div>
|
||||
</div>
|
||||
<div class="menu_content">
|
||||
<div class="menu_content_header">Menu principal</div>
|
||||
<div id="menu_main">
|
||||
<div class="menu_content_content"><a href="/">Première page</a></div>
|
||||
<div class="menu_content_content"><a href="/about.html">À propos</a></div>
|
||||
<div class="menu_content_content"><a href="http://indefero.soutade.fr">Projets personnels</a></div>
|
||||
</div>
|
||||
</div>
|
||||
<div class="menu_content">
|
||||
|
|
Loading…
Reference in New Issue
Block a user