diff --git a/search.py b/search.py
new file mode 100644
index 0000000..0d77d66
--- /dev/null
+++ b/search.py
@@ -0,0 +1,192 @@
+# -*- coding: utf-8 -*-
+import re
+import unicodedata
+import os
+import operator
+import pickle
+from django.db import models
+
+class Search:
+ MINIMUM_LETTERS = 3
+
+ def __init__(self):
+ self.report = ''
+
+ self.tagreg = re.compile('<[^>]+>')
+ self.htmlreg = re.compile('&[^;]+;')
+ self.numreg = re.compile('[0-9]+')
+ self.pat = re.compile(r'\s+')
+ self.wordreg = re.compile('\w+')
+
+ self.replace_by_space = ('(', ')', '#', '\'', '{', '}', '[', ']',
+ '-', '|', '\t', '\\', '_', '^' '=', '+', '$',
+ '£', '%', 'µ', '*', ',', '?', ';', '.', '/',
+ ':', '!', '§', '€', '²')
+
+ def _addReport(self, string, color=''):
+ if color != '':
+ self.report = self.report + ''
+ self.report = self.report + '' + self.__class__.__name__ + ' : '
+ self.report = self.report + string
+ if color != '':
+ self.report = self.report + ''
+ self.report = self.report + '
\n'
+
+ def _addWarning(self, string):
+ self.addReport(string, 'yellow')
+
+ def _addError(self, string):
+ self.addReport(string, 'red')
+
+
+ def _saveDatabase(self, blog, hashtable):
+ d = pickle.dumps(hashtable)
+
+ f = open(blog.src_path + '/_search.db', 'w')
+ f.write(d)
+ f.close()
+
+ def _loadDatabase(self, blog):
+ filename = blog.src_path + '/_search.db'
+
+ if not os.path.exists(filename):
+ return None
+
+ f = open(filename, 'rb')
+ hashtable = pickle.load(f)
+ f.close()
+
+ return hashtable
+
+ def _remove_reg(self, content, reg):
+ found = re.search(reg, content)
+ while found != None:
+ #print str(found.start()) + ' ' + str(found.end())
+ # print content[found.start(0):found.end(0)]
+ # print "============================================="
+ content = content[:found.start(0)].lstrip() + ' ' + content[found.end(0):]
+
+ found = re.search(reg, content)
+
+ return content
+
+ def _strip_accents(self, s):
+ return ''.join((c for c in unicodedata.normalize('NFD', s) if unicodedata.category(c) != 'Mn'))
+
+ def _remove_tag(self, content):
+ content = self._remove_reg(content, self.htmlreg)
+ content = self._remove_reg(content, self.numreg)
+
+ content = content.replace('\n', '')
+ content = content.replace('\r', '')
+ content = content.replace('"', '')
+
+ for c in self.replace_by_space:
+ content = content.replace(c, ' ')
+
+ content = self._remove_reg(content, self.tagreg)
+
+ content = self.pat.sub(' ', content)
+
+ return content
+
+ def _prepare_string(self, content):
+ content = self._remove_tag(content)
+ content = self._strip_accents(unicode(content, 'utf8'))
+
+ return content
+
+ def _index_file(self, hashtable, filename, index):
+ f = open(filename, 'r')
+ content = f.read()
+ f.close()
+
+ content = self._prepare_string(content)
+
+ wordlist = re.findall(self.wordreg, content)
+
+ for word in wordlist:
+ if len(word) < self.MINIMUM_LETTERS:
+ continue
+ word = word.lower()
+ if not word in hashtable:
+ hashtable[word] = []
+ if not index in hashtable[word]:
+ hashtable[word].append(index)
+
+ def create_index(self, blog):
+ hashtable = {}
+
+ root = blog.src_path + '/_post'
+
+ if os.path.exists(root):
+ for post in os.listdir(root):
+ # Not a post number
+ if not re.search(self.numreg, post): continue
+ self._index_file(hashtable, root + '/' + post, int(post))
+
+ self._saveDatabase(blog, hashtable)
+
+ self._addReport('Search generated @ ' + blog.src_path + '/_search.db')
+
+ return self.report
+
+ def _index_post(self, blog, post, saveDatabase=True):
+ hashtable = self._loadDatabase(blog)
+
+ filename = blog.src_path + '/_post/' + post
+
+ if hashtable is None:
+ return self.create_index(blog)
+
+ self._index_file(hashtable, filename, int(post))
+
+ if saveDatabase:
+ self._saveDatabase(blog, hashtable)
+
+ def _remove_post(self, blog, post, saveDatabase=True):
+ hashtable = self._loadDatabase(blog)
+
+ if hashtable is None: return
+
+ for k, v in hashtable.items():
+ if post in v:
+ v.remove(post)
+
+ if saveDatabase:
+ self._saveDatabase(blog, hashtable)
+
+ def index_post(self, blog, post):
+ return self._index_post(blog, post, True)
+
+ def delete_post(self, blog, post):
+ return self._remove_post(blog, post, True)
+
+ def edit_post(self, blog, post, saveDatabase=True):
+ self._remove_post(blog, post, False)
+ self._index_post(blog, post, True)
+
+ def search(self, blog, string):
+ hashtable = self._loadDatabase(blog)
+
+ string = self._prepare_string(string)
+
+ wordlist = re.findall(self.wordreg, string)
+
+ res = {}
+ for word in wordlist:
+ if len(word) < 4:
+ continue
+ word = word.lower()
+ while not word in hashtable and len(word) > 3:
+ word = word[:-1]
+ if word not in hashtable:
+ continue
+ for post in hashtable[word]:
+ if not post in res:
+ res[post] = 0
+ res[post] = res[post] + 1
+
+ sorted_res = sorted(res.iteritems(), key=operator.itemgetter(1))
+
+ return sorted_res.reverse()
diff --git a/templates/generate.html b/templates/generate.html
index 9ef2591..ea6f2e6 100644
--- a/templates/generate.html
+++ b/templates/generate.html
@@ -10,7 +10,7 @@
{% endif %}
-Add a post Generate blog
+Add a post Generate blog Generate search index
{% if report|length == 0 %}
Any engine selected
{% else %}
diff --git a/templates/view_blog.html b/templates/view_blog.html
index 4fac7c0..3b98a8d 100644
--- a/templates/view_blog.html
+++ b/templates/view_blog.html
@@ -10,7 +10,7 @@
{% endif %}
-Add a post Generate blog
+Add a post Generate blog Generate search index
{% if posts|length == 0 %}
diff --git a/urls.py b/urls.py
index 7883b20..e2b26d6 100644
--- a/urls.py
+++ b/urls.py
@@ -31,9 +31,10 @@ urlpatterns = patterns('',
url(r'^comment/add/(\d+)/(\d+)$', 'dynastie.views.add_comment', name='add_comment'),
url(r'^comment/edit/(\d+)$', 'dynastie.views.edit_comment', name='edit_comment'),
url(r'^comment/delete/(\d+)$','dynastie.views.delete_comment',name='delete_comment'),
- url(r'^tag/(\d+)$', 'dynastie.views.tag', name='tag'),
- url(r'^tag/edit/(\d+)$', 'dynastie.views.edit_tag', name='edit_tag'),
- url(r'^tag/delete/(\d+)$', 'dynastie.views.delete_tag', name='delete_tag'),
+ url(r'^tag/(\d+)$', 'dynastie.views.tag', name='tag'),
+ url(r'^tag/edit/(\d+)$', 'dynastie.views.edit_tag', name='edit_tag'),
+ url(r'^tag/delete/(\d+)$', 'dynastie.views.delete_tag', name='delete_tag'),
+ url(r'^search/generate/(\d+)$', 'dynastie.views.generate_search',name='generate_search'),
# url(r'^dynastie/', include('dynastie.foo.urls')),
# Uncomment the admin/doc line below to enable admin documentation:
diff --git a/views.py b/views.py
index 96c766d..dbbefb0 100644
--- a/views.py
+++ b/views.py
@@ -12,6 +12,7 @@ from django.core.mail import EmailMultiAlternatives
from dynastie.models import *
from dynastie.forms import *
+from dynastie.search import *
from django.template.defaultfilters import register
from django.template import Variable, VariableDoesNotExist
@@ -399,7 +400,7 @@ def edit_blog(request, blog_id):
@login_required
def add_post(request, blog_id):
- (b,p) = have_I_right(request, blog_id)
+ (b,_) = have_I_right(request, blog_id)
if request.method == 'POST': # If the form has been submitted...
if 'add' in request.POST:
@@ -410,6 +411,8 @@ def add_post(request, blog_id):
if form.is_valid(): # All validation rules pass
form = form.save()
form.createPost(content, request.POST['text_tags'])
+ s = Search()
+ s.index_post(b, form.id)
# Process the data in form.cleaned_data
# ...
return HttpResponseRedirect('/blog/' + blog_id) # Redirect after POST
@@ -437,6 +440,8 @@ def edit_post(request, post_id):
post.remove()
form.save()
post.createPost(request.POST['content'], request.POST['text_tags'])
+ s = Search()
+ s.edit_post(b, post_id)
# Process the data in form.cleaned_data
# ...
return HttpResponseRedirect('/blog/' + str(blog_id)) # Redirect after POST
@@ -469,17 +474,16 @@ def edit_post(request, post_id):
def delete_post(request, post_id):
(b, post) = have_I_right(request, None, post_id)
+ s = Search()
+ s.edit_post(b, post_id)
+
post.delete()
return HttpResponseRedirect('/blog/' + str(b.id))
-@login_required
-def generate(request, blog_id):
+def _generate(request, blog_id, report):
b,_ = have_I_right(request, blog_id)
- b.create_paths()
- report = b.generate()
-
count = Post.objects.filter(blog=b).count()
nb_pages = int(count/50)
posts = Post.objects.filter(blog=b).order_by('-creation_date')[0:50]
@@ -501,6 +505,27 @@ def generate(request, blog_id):
return render(request, 'templates/generate.html', c)
+@login_required
+def generate(request, blog_id):
+ b,_ = have_I_right(request, blog_id)
+
+ b.create_paths()
+ report = b.generate()
+
+ return _generate(request, blog_id, report)
+
+@login_required
+def generate_search(request, blog_id):
+ b,_ = have_I_right(request, blog_id)
+
+ b.create_paths()
+
+ s = Search()
+
+ report = s.create_index(b)
+
+ return _generate(request, blog_id, report)
+
@login_required
def preview(request, blog_id):
from dynastie.generators import post