First work on search
This commit is contained in:
parent
0bc3089d05
commit
0b61fa7ff2
192
search.py
Normal file
192
search.py
Normal file
|
@ -0,0 +1,192 @@
|
|||
# -*- coding: utf-8 -*-
|
||||
import re
|
||||
import unicodedata
|
||||
import os
|
||||
import operator
|
||||
import pickle
|
||||
from django.db import models
|
||||
|
||||
class Search:
|
||||
MINIMUM_LETTERS = 3
|
||||
|
||||
def __init__(self):
|
||||
self.report = ''
|
||||
|
||||
self.tagreg = re.compile('<[^>]+>')
|
||||
self.htmlreg = re.compile('&[^;]+;')
|
||||
self.numreg = re.compile('[0-9]+')
|
||||
self.pat = re.compile(r'\s+')
|
||||
self.wordreg = re.compile('\w+')
|
||||
|
||||
self.replace_by_space = ('(', ')', '#', '\'', '{', '}', '[', ']',
|
||||
'-', '|', '\t', '\\', '_', '^' '=', '+', '$',
|
||||
'£', '%', 'µ', '*', ',', '?', ';', '.', '/',
|
||||
':', '!', '§', '€', '²')
|
||||
|
||||
def _addReport(self, string, color=''):
|
||||
if color != '':
|
||||
self.report = self.report + '<span style="color:' + color + '">'
|
||||
self.report = self.report + '<b>' + self.__class__.__name__ + '</b> : '
|
||||
self.report = self.report + string
|
||||
if color != '':
|
||||
self.report = self.report + '</span>'
|
||||
self.report = self.report + '<br/>\n'
|
||||
|
||||
def _addWarning(self, string):
|
||||
self.addReport(string, 'yellow')
|
||||
|
||||
def _addError(self, string):
|
||||
self.addReport(string, 'red')
|
||||
|
||||
|
||||
def _saveDatabase(self, blog, hashtable):
|
||||
d = pickle.dumps(hashtable)
|
||||
|
||||
f = open(blog.src_path + '/_search.db', 'w')
|
||||
f.write(d)
|
||||
f.close()
|
||||
|
||||
def _loadDatabase(self, blog):
|
||||
filename = blog.src_path + '/_search.db'
|
||||
|
||||
if not os.path.exists(filename):
|
||||
return None
|
||||
|
||||
f = open(filename, 'rb')
|
||||
hashtable = pickle.load(f)
|
||||
f.close()
|
||||
|
||||
return hashtable
|
||||
|
||||
def _remove_reg(self, content, reg):
|
||||
found = re.search(reg, content)
|
||||
while found != None:
|
||||
#print str(found.start()) + ' ' + str(found.end())
|
||||
# print content[found.start(0):found.end(0)]
|
||||
# print "============================================="
|
||||
content = content[:found.start(0)].lstrip() + ' ' + content[found.end(0):]
|
||||
|
||||
found = re.search(reg, content)
|
||||
|
||||
return content
|
||||
|
||||
def _strip_accents(self, s):
|
||||
return ''.join((c for c in unicodedata.normalize('NFD', s) if unicodedata.category(c) != 'Mn'))
|
||||
|
||||
def _remove_tag(self, content):
|
||||
content = self._remove_reg(content, self.htmlreg)
|
||||
content = self._remove_reg(content, self.numreg)
|
||||
|
||||
content = content.replace('\n', '')
|
||||
content = content.replace('\r', '')
|
||||
content = content.replace('"', '')
|
||||
|
||||
for c in self.replace_by_space:
|
||||
content = content.replace(c, ' ')
|
||||
|
||||
content = self._remove_reg(content, self.tagreg)
|
||||
|
||||
content = self.pat.sub(' ', content)
|
||||
|
||||
return content
|
||||
|
||||
def _prepare_string(self, content):
|
||||
content = self._remove_tag(content)
|
||||
content = self._strip_accents(unicode(content, 'utf8'))
|
||||
|
||||
return content
|
||||
|
||||
def _index_file(self, hashtable, filename, index):
|
||||
f = open(filename, 'r')
|
||||
content = f.read()
|
||||
f.close()
|
||||
|
||||
content = self._prepare_string(content)
|
||||
|
||||
wordlist = re.findall(self.wordreg, content)
|
||||
|
||||
for word in wordlist:
|
||||
if len(word) < self.MINIMUM_LETTERS:
|
||||
continue
|
||||
word = word.lower()
|
||||
if not word in hashtable:
|
||||
hashtable[word] = []
|
||||
if not index in hashtable[word]:
|
||||
hashtable[word].append(index)
|
||||
|
||||
def create_index(self, blog):
|
||||
hashtable = {}
|
||||
|
||||
root = blog.src_path + '/_post'
|
||||
|
||||
if os.path.exists(root):
|
||||
for post in os.listdir(root):
|
||||
# Not a post number
|
||||
if not re.search(self.numreg, post): continue
|
||||
self._index_file(hashtable, root + '/' + post, int(post))
|
||||
|
||||
self._saveDatabase(blog, hashtable)
|
||||
|
||||
self._addReport('Search generated @ ' + blog.src_path + '/_search.db')
|
||||
|
||||
return self.report
|
||||
|
||||
def _index_post(self, blog, post, saveDatabase=True):
|
||||
hashtable = self._loadDatabase(blog)
|
||||
|
||||
filename = blog.src_path + '/_post/' + post
|
||||
|
||||
if hashtable is None:
|
||||
return self.create_index(blog)
|
||||
|
||||
self._index_file(hashtable, filename, int(post))
|
||||
|
||||
if saveDatabase:
|
||||
self._saveDatabase(blog, hashtable)
|
||||
|
||||
def _remove_post(self, blog, post, saveDatabase=True):
|
||||
hashtable = self._loadDatabase(blog)
|
||||
|
||||
if hashtable is None: return
|
||||
|
||||
for k, v in hashtable.items():
|
||||
if post in v:
|
||||
v.remove(post)
|
||||
|
||||
if saveDatabase:
|
||||
self._saveDatabase(blog, hashtable)
|
||||
|
||||
def index_post(self, blog, post):
|
||||
return self._index_post(blog, post, True)
|
||||
|
||||
def delete_post(self, blog, post):
|
||||
return self._remove_post(blog, post, True)
|
||||
|
||||
def edit_post(self, blog, post, saveDatabase=True):
|
||||
self._remove_post(blog, post, False)
|
||||
self._index_post(blog, post, True)
|
||||
|
||||
def search(self, blog, string):
|
||||
hashtable = self._loadDatabase(blog)
|
||||
|
||||
string = self._prepare_string(string)
|
||||
|
||||
wordlist = re.findall(self.wordreg, string)
|
||||
|
||||
res = {}
|
||||
for word in wordlist:
|
||||
if len(word) < 4:
|
||||
continue
|
||||
word = word.lower()
|
||||
while not word in hashtable and len(word) > 3:
|
||||
word = word[:-1]
|
||||
if word not in hashtable:
|
||||
continue
|
||||
for post in hashtable[word]:
|
||||
if not post in res:
|
||||
res[post] = 0
|
||||
res[post] = res[post] + 1
|
||||
|
||||
sorted_res = sorted(res.iteritems(), key=operator.itemgetter(1))
|
||||
|
||||
return sorted_res.reverse()
|
|
@ -10,7 +10,7 @@
|
|||
</form>
|
||||
{% endif %}
|
||||
<br/><br/>
|
||||
<a href="/post/add/{{ blog.id }}">Add a post</a> <a href="/generate/{{ blog.id }}">Generate blog</a><br/>
|
||||
<a href="/post/add/{{ blog.id }}">Add a post</a> <a href="/generate/{{ blog.id }}">Generate blog</a> <a href="/search/generate/{{ blog.id }}">Generate search index</a><br/><br/>
|
||||
{% if report|length == 0 %}
|
||||
<b style="color:red">Any engine selected</b><br/><br/>
|
||||
{% else %}
|
||||
|
|
|
@ -10,7 +10,7 @@
|
|||
</form>
|
||||
{% endif %}
|
||||
<br/><br/>
|
||||
<a href="/post/add/{{ blog.id }}">Add a post</a> <a href="/generate/{{ blog.id }}">Generate blog</a>
|
||||
<a href="/post/add/{{ blog.id }}">Add a post</a> <a href="/generate/{{ blog.id }}">Generate blog</a> <a href="/search/generate/{{ blog.id }}">Generate search index</a>
|
||||
<br/><br/>
|
||||
{% if posts|length == 0 %}
|
||||
<br/><br/>
|
||||
|
|
7
urls.py
7
urls.py
|
@ -31,9 +31,10 @@ urlpatterns = patterns('',
|
|||
url(r'^comment/add/(\d+)/(\d+)$', 'dynastie.views.add_comment', name='add_comment'),
|
||||
url(r'^comment/edit/(\d+)$', 'dynastie.views.edit_comment', name='edit_comment'),
|
||||
url(r'^comment/delete/(\d+)$','dynastie.views.delete_comment',name='delete_comment'),
|
||||
url(r'^tag/(\d+)$', 'dynastie.views.tag', name='tag'),
|
||||
url(r'^tag/edit/(\d+)$', 'dynastie.views.edit_tag', name='edit_tag'),
|
||||
url(r'^tag/delete/(\d+)$', 'dynastie.views.delete_tag', name='delete_tag'),
|
||||
url(r'^tag/(\d+)$', 'dynastie.views.tag', name='tag'),
|
||||
url(r'^tag/edit/(\d+)$', 'dynastie.views.edit_tag', name='edit_tag'),
|
||||
url(r'^tag/delete/(\d+)$', 'dynastie.views.delete_tag', name='delete_tag'),
|
||||
url(r'^search/generate/(\d+)$', 'dynastie.views.generate_search',name='generate_search'),
|
||||
# url(r'^dynastie/', include('dynastie.foo.urls')),
|
||||
|
||||
# Uncomment the admin/doc line below to enable admin documentation:
|
||||
|
|
37
views.py
37
views.py
|
@ -12,6 +12,7 @@ from django.core.mail import EmailMultiAlternatives
|
|||
|
||||
from dynastie.models import *
|
||||
from dynastie.forms import *
|
||||
from dynastie.search import *
|
||||
|
||||
from django.template.defaultfilters import register
|
||||
from django.template import Variable, VariableDoesNotExist
|
||||
|
@ -399,7 +400,7 @@ def edit_blog(request, blog_id):
|
|||
|
||||
@login_required
|
||||
def add_post(request, blog_id):
|
||||
(b,p) = have_I_right(request, blog_id)
|
||||
(b,_) = have_I_right(request, blog_id)
|
||||
|
||||
if request.method == 'POST': # If the form has been submitted...
|
||||
if 'add' in request.POST:
|
||||
|
@ -410,6 +411,8 @@ def add_post(request, blog_id):
|
|||
if form.is_valid(): # All validation rules pass
|
||||
form = form.save()
|
||||
form.createPost(content, request.POST['text_tags'])
|
||||
s = Search()
|
||||
s.index_post(b, form.id)
|
||||
# Process the data in form.cleaned_data
|
||||
# ...
|
||||
return HttpResponseRedirect('/blog/' + blog_id) # Redirect after POST
|
||||
|
@ -437,6 +440,8 @@ def edit_post(request, post_id):
|
|||
post.remove()
|
||||
form.save()
|
||||
post.createPost(request.POST['content'], request.POST['text_tags'])
|
||||
s = Search()
|
||||
s.edit_post(b, post_id)
|
||||
# Process the data in form.cleaned_data
|
||||
# ...
|
||||
return HttpResponseRedirect('/blog/' + str(blog_id)) # Redirect after POST
|
||||
|
@ -469,17 +474,16 @@ def edit_post(request, post_id):
|
|||
def delete_post(request, post_id):
|
||||
(b, post) = have_I_right(request, None, post_id)
|
||||
|
||||
s = Search()
|
||||
s.edit_post(b, post_id)
|
||||
|
||||
post.delete()
|
||||
|
||||
return HttpResponseRedirect('/blog/' + str(b.id))
|
||||
|
||||
@login_required
|
||||
def generate(request, blog_id):
|
||||
def _generate(request, blog_id, report):
|
||||
b,_ = have_I_right(request, blog_id)
|
||||
|
||||
b.create_paths()
|
||||
report = b.generate()
|
||||
|
||||
count = Post.objects.filter(blog=b).count()
|
||||
nb_pages = int(count/50)
|
||||
posts = Post.objects.filter(blog=b).order_by('-creation_date')[0:50]
|
||||
|
@ -501,6 +505,27 @@ def generate(request, blog_id):
|
|||
|
||||
return render(request, 'templates/generate.html', c)
|
||||
|
||||
@login_required
|
||||
def generate(request, blog_id):
|
||||
b,_ = have_I_right(request, blog_id)
|
||||
|
||||
b.create_paths()
|
||||
report = b.generate()
|
||||
|
||||
return _generate(request, blog_id, report)
|
||||
|
||||
@login_required
|
||||
def generate_search(request, blog_id):
|
||||
b,_ = have_I_right(request, blog_id)
|
||||
|
||||
b.create_paths()
|
||||
|
||||
s = Search()
|
||||
|
||||
report = s.create_index(b)
|
||||
|
||||
return _generate(request, blog_id, report)
|
||||
|
||||
@login_required
|
||||
def preview(request, blog_id):
|
||||
from dynastie.generators import post
|
||||
|
|
Loading…
Reference in New Issue
Block a user