First work on search
This commit is contained in:
		
							
								
								
									
										192
									
								
								search.py
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										192
									
								
								search.py
									
									
									
									
									
										Normal file
									
								
							| @@ -0,0 +1,192 @@ | ||||
| # -*- coding: utf-8 -*- | ||||
| import re | ||||
| import unicodedata | ||||
| import os | ||||
| import operator | ||||
| import pickle | ||||
| from django.db import models | ||||
|  | ||||
| class Search: | ||||
|     MINIMUM_LETTERS = 3 | ||||
|  | ||||
|     def __init__(self): | ||||
|         self.report = '' | ||||
|  | ||||
|         self.tagreg = re.compile('<[^>]+>') | ||||
|         self.htmlreg = re.compile('&[^;]+;') | ||||
|         self.numreg = re.compile('[0-9]+') | ||||
|         self.pat = re.compile(r'\s+') | ||||
|         self.wordreg = re.compile('\w+') | ||||
|  | ||||
|         self.replace_by_space = ('(', ')', '#', '\'', '{', '}', '[', ']', | ||||
|                                  '-', '|', '\t', '\\', '_', '^' '=', '+', '$', | ||||
|                                  '£', '%', 'µ', '*', ',', '?', ';', '.', '/', | ||||
|                                  ':', '!', '§', '€', '²') | ||||
|  | ||||
|     def _addReport(self, string, color=''): | ||||
|         if color != '': | ||||
|             self.report = self.report + '<span style="color:' + color + '">' | ||||
|         self.report = self.report + '<b>' + self.__class__.__name__ + '</b> : ' | ||||
|         self.report = self.report + string | ||||
|         if color != '': | ||||
|             self.report = self.report + '</span>' | ||||
|         self.report = self.report + '<br/>\n' | ||||
|  | ||||
|     def _addWarning(self, string): | ||||
|         self.addReport(string, 'yellow') | ||||
|  | ||||
|     def _addError(self, string): | ||||
|         self.addReport(string, 'red') | ||||
|  | ||||
|  | ||||
|     def _saveDatabase(self, blog, hashtable): | ||||
|         d = pickle.dumps(hashtable) | ||||
|  | ||||
|         f = open(blog.src_path + '/_search.db', 'w') | ||||
|         f.write(d) | ||||
|         f.close() | ||||
|  | ||||
|     def _loadDatabase(self, blog): | ||||
|         filename = blog.src_path + '/_search.db' | ||||
|  | ||||
|         if not os.path.exists(filename): | ||||
|             return None | ||||
|  | ||||
|         f = open(filename, 'rb') | ||||
|         hashtable = pickle.load(f) | ||||
|         f.close() | ||||
|  | ||||
|         return hashtable | ||||
|  | ||||
|     def _remove_reg(self, content, reg): | ||||
|         found = re.search(reg, content) | ||||
|         while found != None: | ||||
|         #print str(found.start()) + ' ' + str(found.end()) | ||||
|             # print content[found.start(0):found.end(0)] | ||||
|             # print "=============================================" | ||||
|             content = content[:found.start(0)].lstrip() + ' ' + content[found.end(0):] | ||||
|  | ||||
|             found = re.search(reg, content) | ||||
|  | ||||
|         return content | ||||
|  | ||||
|     def _strip_accents(self, s): | ||||
|         return ''.join((c for c in unicodedata.normalize('NFD', s) if unicodedata.category(c) != 'Mn')) | ||||
|  | ||||
|     def _remove_tag(self, content): | ||||
|         content = self._remove_reg(content, self.htmlreg) | ||||
|         content = self._remove_reg(content, self.numreg) | ||||
|  | ||||
|         content = content.replace('\n', '') | ||||
|         content = content.replace('\r', '') | ||||
|         content = content.replace('"', '') | ||||
|  | ||||
|         for c in self.replace_by_space: | ||||
|             content = content.replace(c, ' ') | ||||
|          | ||||
|         content = self._remove_reg(content, self.tagreg) | ||||
|  | ||||
|         content = self.pat.sub(' ', content) | ||||
|  | ||||
|         return content | ||||
|  | ||||
|     def _prepare_string(self, content): | ||||
|         content = self._remove_tag(content) | ||||
|         content = self._strip_accents(unicode(content, 'utf8')) | ||||
|  | ||||
|         return content | ||||
|  | ||||
|     def _index_file(self, hashtable, filename, index): | ||||
|         f = open(filename, 'r') | ||||
|         content = f.read() | ||||
|         f.close() | ||||
|  | ||||
|         content = self._prepare_string(content) | ||||
|  | ||||
|         wordlist = re.findall(self.wordreg, content) | ||||
|  | ||||
|         for word in wordlist: | ||||
|             if len(word) < self.MINIMUM_LETTERS: | ||||
|                 continue | ||||
|             word = word.lower() | ||||
|             if not word in hashtable: | ||||
|                 hashtable[word] = [] | ||||
|             if not index in hashtable[word]: | ||||
|                 hashtable[word].append(index) | ||||
|  | ||||
|     def create_index(self, blog): | ||||
|         hashtable = {} | ||||
|          | ||||
|         root = blog.src_path + '/_post'  | ||||
|  | ||||
|         if os.path.exists(root): | ||||
|             for post in os.listdir(root): | ||||
|                 # Not a post number | ||||
|                 if not re.search(self.numreg, post): continue | ||||
|                 self._index_file(hashtable, root + '/' + post, int(post)) | ||||
|  | ||||
|         self._saveDatabase(blog, hashtable) | ||||
|  | ||||
|         self._addReport('Search generated @ ' + blog.src_path + '/_search.db') | ||||
|  | ||||
|         return self.report | ||||
|  | ||||
|     def _index_post(self, blog, post, saveDatabase=True): | ||||
|         hashtable = self._loadDatabase(blog) | ||||
|  | ||||
|         filename = blog.src_path + '/_post/' + post | ||||
|  | ||||
|         if hashtable is None: | ||||
|             return self.create_index(blog) | ||||
|  | ||||
|         self._index_file(hashtable, filename, int(post)) | ||||
|  | ||||
|         if saveDatabase: | ||||
|             self._saveDatabase(blog, hashtable) | ||||
|  | ||||
|     def _remove_post(self, blog, post, saveDatabase=True): | ||||
|         hashtable = self._loadDatabase(blog) | ||||
|  | ||||
|         if hashtable is None: return | ||||
|  | ||||
|         for k, v in hashtable.items(): | ||||
|             if post in v: | ||||
|                 v.remove(post) | ||||
|  | ||||
|         if saveDatabase: | ||||
|             self._saveDatabase(blog, hashtable) | ||||
|  | ||||
|     def index_post(self, blog, post): | ||||
|         return self._index_post(blog, post, True) | ||||
|  | ||||
|     def delete_post(self, blog, post): | ||||
|         return self._remove_post(blog, post, True) | ||||
|  | ||||
|     def edit_post(self, blog, post, saveDatabase=True): | ||||
|         self._remove_post(blog, post, False) | ||||
|         self._index_post(blog, post, True) | ||||
|  | ||||
|     def search(self, blog, string): | ||||
|         hashtable = self._loadDatabase(blog) | ||||
|          | ||||
|         string = self._prepare_string(string) | ||||
|  | ||||
|         wordlist = re.findall(self.wordreg, string) | ||||
|  | ||||
|         res = {} | ||||
|         for word in wordlist: | ||||
|             if len(word) < 4: | ||||
|                 continue | ||||
|             word = word.lower() | ||||
|             while not word in hashtable and len(word) > 3: | ||||
|                 word = word[:-1] | ||||
|             if word not in hashtable: | ||||
|                 continue | ||||
|             for post in hashtable[word]: | ||||
|                 if not post in res: | ||||
|                     res[post] = 0 | ||||
|                 res[post] = res[post] + 1 | ||||
|  | ||||
|         sorted_res = sorted(res.iteritems(), key=operator.itemgetter(1)) | ||||
|  | ||||
|         return sorted_res.reverse() | ||||
| @@ -10,7 +10,7 @@ | ||||
| </form> | ||||
| {% endif %} | ||||
| <br/><br/> | ||||
| <a href="/post/add/{{ blog.id }}">Add a post</a> <a href="/generate/{{ blog.id }}">Generate blog</a><br/> | ||||
| <a href="/post/add/{{ blog.id }}">Add a post</a> <a href="/generate/{{ blog.id }}">Generate blog</a> <a href="/search/generate/{{ blog.id }}">Generate search index</a><br/><br/> | ||||
| {% if report|length == 0 %}  | ||||
| <b style="color:red">Any engine selected</b><br/><br/> | ||||
| {% else %} | ||||
|   | ||||
| @@ -10,7 +10,7 @@ | ||||
| </form> | ||||
| {% endif %} | ||||
| <br/><br/> | ||||
| <a href="/post/add/{{ blog.id }}">Add a post</a> <a href="/generate/{{ blog.id }}">Generate blog</a> | ||||
| <a href="/post/add/{{ blog.id }}">Add a post</a> <a href="/generate/{{ blog.id }}">Generate blog</a> <a href="/search/generate/{{ blog.id }}">Generate search index</a> | ||||
| <br/><br/> | ||||
| {% if posts|length == 0 %} | ||||
| <br/><br/> | ||||
|   | ||||
							
								
								
									
										7
									
								
								urls.py
									
									
									
									
									
								
							
							
						
						
									
										7
									
								
								urls.py
									
									
									
									
									
								
							| @@ -31,9 +31,10 @@ urlpatterns = patterns('', | ||||
|     url(r'^comment/add/(\d+)/(\d+)$',   'dynastie.views.add_comment',   name='add_comment'), | ||||
|     url(r'^comment/edit/(\d+)$',  'dynastie.views.edit_comment',  name='edit_comment'), | ||||
|     url(r'^comment/delete/(\d+)$','dynastie.views.delete_comment',name='delete_comment'), | ||||
|     url(r'^tag/(\d+)$',      'dynastie.views.tag',     name='tag'), | ||||
|     url(r'^tag/edit/(\d+)$', 'dynastie.views.edit_tag', name='edit_tag'), | ||||
|     url(r'^tag/delete/(\d+)$', 'dynastie.views.delete_tag', name='delete_tag'), | ||||
|     url(r'^tag/(\d+)$',           'dynastie.views.tag',          name='tag'), | ||||
|     url(r'^tag/edit/(\d+)$',      'dynastie.views.edit_tag',     name='edit_tag'), | ||||
|     url(r'^tag/delete/(\d+)$',    'dynastie.views.delete_tag',   name='delete_tag'), | ||||
|     url(r'^search/generate/(\d+)$',    'dynastie.views.generate_search',name='generate_search'), | ||||
|     # url(r'^dynastie/', include('dynastie.foo.urls')), | ||||
|  | ||||
|     # Uncomment the admin/doc line below to enable admin documentation: | ||||
|   | ||||
							
								
								
									
										37
									
								
								views.py
									
									
									
									
									
								
							
							
						
						
									
										37
									
								
								views.py
									
									
									
									
									
								
							| @@ -12,6 +12,7 @@ from django.core.mail import EmailMultiAlternatives | ||||
|  | ||||
| from dynastie.models import * | ||||
| from dynastie.forms import * | ||||
| from dynastie.search import * | ||||
|  | ||||
| from django.template.defaultfilters import register | ||||
| from django.template import Variable, VariableDoesNotExist | ||||
| @@ -399,7 +400,7 @@ def edit_blog(request, blog_id): | ||||
|  | ||||
| @login_required | ||||
| def add_post(request, blog_id): | ||||
|     (b,p) = have_I_right(request, blog_id) | ||||
|     (b,_) = have_I_right(request, blog_id) | ||||
|  | ||||
|     if request.method == 'POST': # If the form has been submitted... | ||||
|         if 'add' in request.POST: | ||||
| @@ -410,6 +411,8 @@ def add_post(request, blog_id): | ||||
|             if form.is_valid(): # All validation rules pass | ||||
|                 form = form.save() | ||||
|                 form.createPost(content, request.POST['text_tags']) | ||||
|                 s = Search() | ||||
|                 s.index_post(b, form.id) | ||||
|             # Process the data in form.cleaned_data | ||||
|             # ... | ||||
|                 return HttpResponseRedirect('/blog/' + blog_id) # Redirect after POST | ||||
| @@ -437,6 +440,8 @@ def edit_post(request, post_id): | ||||
|                     post.remove() | ||||
|                 form.save() | ||||
|                 post.createPost(request.POST['content'], request.POST['text_tags']) | ||||
|                 s = Search() | ||||
|                 s.edit_post(b, post_id) | ||||
|             # Process the data in form.cleaned_data | ||||
|             # ... | ||||
|                 return HttpResponseRedirect('/blog/' + str(blog_id)) # Redirect after POST | ||||
| @@ -469,17 +474,16 @@ def edit_post(request, post_id): | ||||
| def delete_post(request, post_id): | ||||
|     (b, post) = have_I_right(request, None, post_id) | ||||
|  | ||||
|     s = Search() | ||||
|     s.edit_post(b, post_id) | ||||
|  | ||||
|     post.delete() | ||||
|  | ||||
|     return HttpResponseRedirect('/blog/' + str(b.id)) | ||||
|  | ||||
| @login_required | ||||
| def generate(request, blog_id): | ||||
| def _generate(request, blog_id, report): | ||||
|     b,_ = have_I_right(request, blog_id) | ||||
|  | ||||
|     b.create_paths() | ||||
|     report = b.generate() | ||||
|  | ||||
|     count = Post.objects.filter(blog=b).count() | ||||
|     nb_pages = int(count/50) | ||||
|     posts = Post.objects.filter(blog=b).order_by('-creation_date')[0:50] | ||||
| @@ -501,6 +505,27 @@ def generate(request, blog_id): | ||||
|      | ||||
|     return render(request, 'templates/generate.html', c) | ||||
|  | ||||
| @login_required | ||||
| def generate(request, blog_id): | ||||
|     b,_ = have_I_right(request, blog_id) | ||||
|  | ||||
|     b.create_paths() | ||||
|     report = b.generate() | ||||
|  | ||||
|     return _generate(request, blog_id, report) | ||||
|  | ||||
| @login_required | ||||
| def generate_search(request, blog_id): | ||||
|     b,_ = have_I_right(request, blog_id) | ||||
|  | ||||
|     b.create_paths() | ||||
|      | ||||
|     s = Search() | ||||
|  | ||||
|     report = s.create_index(b) | ||||
|  | ||||
|     return _generate(request, blog_id, report) | ||||
|  | ||||
| @login_required | ||||
| def preview(request, blog_id): | ||||
|     from dynastie.generators import post | ||||
|   | ||||
		Reference in New Issue
	
	Block a user