Do a more pertinent search by including titles in indexing
This commit is contained in:
parent
2f1f38ca5c
commit
839b935d47
37
search.py
37
search.py
|
@ -5,6 +5,7 @@ import os
|
||||||
import operator
|
import operator
|
||||||
import pickle
|
import pickle
|
||||||
from django.db import models
|
from django.db import models
|
||||||
|
from dynastie.models import Post
|
||||||
|
|
||||||
class Search:
|
class Search:
|
||||||
MINIMUM_LETTERS = 3
|
MINIMUM_LETTERS = 3
|
||||||
|
@ -16,7 +17,6 @@ class Search:
|
||||||
self.htmlreg = re.compile('&[^;]+;')
|
self.htmlreg = re.compile('&[^;]+;')
|
||||||
self.numreg = re.compile('[0-9]+')
|
self.numreg = re.compile('[0-9]+')
|
||||||
self.pat = re.compile(r'\s+')
|
self.pat = re.compile(r'\s+')
|
||||||
self.wordreg = re.compile('\w+')
|
|
||||||
|
|
||||||
self.replace_by_space = ('(', ')', '#', '\'', '{', '}', '[', ']',
|
self.replace_by_space = ('(', ')', '#', '\'', '{', '}', '[', ']',
|
||||||
'-', '|', '\t', '\\', '_', '^' '=', '+', '$',
|
'-', '|', '\t', '\\', '_', '^' '=', '+', '$',
|
||||||
|
@ -97,14 +97,10 @@ class Search:
|
||||||
|
|
||||||
return content
|
return content
|
||||||
|
|
||||||
def _index_file(self, hashtable, filename, index):
|
def _indexContent(self, hashtable, index, content, word_weight):
|
||||||
f = open(filename, 'r')
|
|
||||||
content = f.read()
|
|
||||||
f.close()
|
|
||||||
|
|
||||||
content = self._prepare_string(content)
|
content = self._prepare_string(content)
|
||||||
|
|
||||||
wordlist = re.findall(self.wordreg, content)
|
wordlist = content.split(' ')
|
||||||
|
|
||||||
for word in wordlist:
|
for word in wordlist:
|
||||||
if len(word) < self.MINIMUM_LETTERS:
|
if len(word) < self.MINIMUM_LETTERS:
|
||||||
|
@ -113,7 +109,24 @@ class Search:
|
||||||
if not word in hashtable:
|
if not word in hashtable:
|
||||||
hashtable[word] = []
|
hashtable[word] = []
|
||||||
if not index in hashtable[word]:
|
if not index in hashtable[word]:
|
||||||
hashtable[word].append(index)
|
hashtable[word].append([index, word_weight])
|
||||||
|
else:
|
||||||
|
weight = hashtable[word][1]
|
||||||
|
hashtable[word][1] = weight + word_weight
|
||||||
|
|
||||||
|
def _index_file(self, hashtable, filename, index):
|
||||||
|
f = open(filename, 'r')
|
||||||
|
content = f.read()
|
||||||
|
f.close()
|
||||||
|
|
||||||
|
try:
|
||||||
|
post = Post.objects.get(pk=index)
|
||||||
|
if post.published == False: return
|
||||||
|
except:
|
||||||
|
return
|
||||||
|
|
||||||
|
self._indexContent(hashtable, index, content, 1)
|
||||||
|
self._indexContent(hashtable, index, post.title.encode('utf-8'), 5)
|
||||||
|
|
||||||
def create_index(self, blog):
|
def create_index(self, blog):
|
||||||
hashtable = {}
|
hashtable = {}
|
||||||
|
@ -172,7 +185,7 @@ class Search:
|
||||||
|
|
||||||
string = self._prepare_string(string.encode('utf-8'))
|
string = self._prepare_string(string.encode('utf-8'))
|
||||||
|
|
||||||
wordlist = re.findall(self.wordreg, string)
|
wordlist = string.split(' ')
|
||||||
|
|
||||||
res = {}
|
res = {}
|
||||||
for word in wordlist:
|
for word in wordlist:
|
||||||
|
@ -184,9 +197,9 @@ class Search:
|
||||||
if word not in hashtable:
|
if word not in hashtable:
|
||||||
continue
|
continue
|
||||||
for post in hashtable[word]:
|
for post in hashtable[word]:
|
||||||
if not post in res:
|
if not post[0] in res:
|
||||||
res[post] = 0
|
res[post[0]] = post[1]
|
||||||
res[post] = res[post] + 1
|
res[post[0]] += post[1]
|
||||||
sorted_res = sorted(res.iteritems(), key=operator.itemgetter(1))
|
sorted_res = sorted(res.iteritems(), key=operator.itemgetter(1))
|
||||||
|
|
||||||
sorted_res.reverse()
|
sorted_res.reverse()
|
||||||
|
|
|
@ -43,15 +43,20 @@
|
||||||
</ul>
|
</ul>
|
||||||
</div>
|
</div>
|
||||||
<div class="menu">
|
<div class="menu">
|
||||||
<!-- <div class="menu_content"> -->
|
<div class="menu_content">
|
||||||
<!-- <div class="menu_content_header">Search</div> -->
|
<div class="menu_content_header">Recherche</div>
|
||||||
<!-- <div class="menu_content_content">La recherche</div> -->
|
<div id="menu_main">
|
||||||
<!-- </div> -->
|
<dyn:replace div_name="form" id="search_form" method="POST" action="/search/dyn:blog_id">
|
||||||
|
<input type="text" name="text" onkeypress="handleKeyPress(event,this.form)"/>
|
||||||
|
</dyn:replace>
|
||||||
|
</div>
|
||||||
|
</div>
|
||||||
<div class="menu_content">
|
<div class="menu_content">
|
||||||
<div class="menu_content_header">Menu principal</div>
|
<div class="menu_content_header">Menu principal</div>
|
||||||
<div id="menu_main">
|
<div id="menu_main">
|
||||||
<div class="menu_content_content"><a href="/">Première page</a></div>
|
<div class="menu_content_content"><a href="/">Première page</a></div>
|
||||||
<div class="menu_content_content"><a href="/about.html">À propos</a></div>
|
<div class="menu_content_content"><a href="/about.html">À propos</a></div>
|
||||||
|
<div class="menu_content_content"><a href="http://indefero.soutade.fr">Projets personnels</a></div>
|
||||||
</div>
|
</div>
|
||||||
</div>
|
</div>
|
||||||
<div class="menu_content">
|
<div class="menu_content">
|
||||||
|
|
Loading…
Reference in New Issue
Block a user