Browse Source

fix #645: search filters

Rafał Pitoń 8 years ago
parent
commit
14a3ba3e6c

+ 1 - 0
docs/SUMMARY.md

@@ -19,6 +19,7 @@
 * [Extending pages](./ExtendingPages.md)
 * [Validating registrations](./ValidatingRegistrations.md)
 * [Validators](./Validators.md)
+* [Search filters](./SearchFilters.md)
 * [Template tags](./TemplateTags.md)
 * [Shortcuts](./Shortcuts.md)
 * [Thread store](./ThreadStore.md)

+ 23 - 0
docs/SearchFilters.md

@@ -0,0 +1,23 @@
+Search filters
+==============
+
+Misago implements small feature that allows forum administrators to define custom search filters that may be used to improve forum searches accuracy without need for developing custom dictionaries.
+
+Consider the scenario in which community is ran for computer game. This game's community will eventually develop jargon of its own that will likely confuse the search engine backed by language's dictionary.
+
+In Misago this situation may be easily solved with custom search filters that post and search queries are passed through before they are sent to search engine. Those filters are simple python functions that take string as only argument, do something with it, and then return changed string back, like this:
+
+
+```python
+def my_search_filter(search):
+    """very basic filter that lets search engine understand what MMM stands for"""
+    return search.replace('MMM', 'Marines, Medics and Marauders')
+```
+
+Misago is made aware of search filters by specifying paths to callables in `MISAGO_POST_SEARCH_FILTERS` setting, like this:
+
+```python
+MISAGO_POST_SEARCH_FILTERS = [
+    'myforumsearch.filters.my_search_filter',
+]
+```

+ 5 - 0
docs/settings/Core.md

@@ -152,6 +152,11 @@ Max age, in days, of notifications stored in database. Notifications older than
 Limit of attachments that may be uploaded in single post. Lower limits may hamper image-heavy forums, but help keep memory usage by posting process. 
 
 
+## `MISAGO_POST_SEARCH_FILTERS`
+
+List of post search filters that are used to normalize search queries and documents used in forum search engine.
+
+
 ## `MISAGO_POST_VALIDATORS`
 
 List of post validators used to validate posts.

+ 5 - 0
misago/conf/defaults.py

@@ -34,6 +34,11 @@ MISAGO_MARKUP_EXTENSIONS = []
 MISAGO_POST_VALIDATORS = []
 
 
+# Post search filters
+
+MISAGO_POST_SEARCH_FILTERS = []
+
+
 # Posting middlewares
 # https://misago.readthedocs.io/en/latest/developers/posting_process.html
 

+ 2 - 0
misago/core/testproject/searchfilters.py

@@ -0,0 +1,2 @@
+def test_filter(search):
+    return search.replace(u'MMM', u'Marines, Marauders and Medics')

+ 16 - 0
misago/threads/filtersearch.py

@@ -0,0 +1,16 @@
+from django.utils.module_loading import import_string
+
+from misago.conf import settings
+
+
+filters_list = settings.MISAGO_POST_SEARCH_FILTERS
+SEARCH_FILTERS = list(map(import_string, filters_list))
+
+
+def filter_search(search, filters=None):
+    filters = filters or SEARCH_FILTERS
+
+    for filter in filters:
+        search = filter(search) or search
+
+    return search

+ 3 - 2
misago/threads/models/post.py

@@ -12,6 +12,7 @@ from misago.conf import settings
 from misago.core.utils import parse_iso8601_string
 from misago.markup import finalise_markup
 from misago.threads.checksums import is_post_valid, update_post_checksum
+from misago.threads.filtersearch import filter_search
 
 
 @python_2_unicode_compatible
@@ -166,9 +167,9 @@ class Post(models.Model):
 
     def set_search_document(self, thread_title=None):
         if thread_title:
-            self.search_document = '\n\n'.join([thread_title, self.original])
+            self.search_document = filter_search('\n\n'.join([thread_title, self.original]))
         else:
-            self.search_document = self.original
+            self.search_document = filter_search(self.original)
 
     def update_search_vector(self):
         self.search_vector = SearchVector(

+ 9 - 3
misago/threads/search.py

@@ -5,13 +5,13 @@ from misago.conf import settings
 from misago.core.shortcuts import paginate, pagination_dict
 from misago.search import SearchProvider
 
+from .filtersearch import filter_search
 from .models import Post, Thread
 from .permissions import exclude_invisible_threads
 from .serializers import FeedSerializer
 from .utils import add_categories_to_items
 from .viewmodels import ThreadsRootCategory
 
-
 class SearchThreads(SearchProvider):
     name = _("Threads")
     url = 'threads'
@@ -56,8 +56,14 @@ class SearchThreads(SearchProvider):
 
 
 def search_threads(request, query, visible_threads):
-    search_query = SearchQuery(query, config=settings.MISAGO_SEARCH_CONFIG)
-    search_vector = SearchVector('search_document', config=settings.MISAGO_SEARCH_CONFIG)
+    search_query = SearchQuery(
+        filter_search(query),
+        config=settings.MISAGO_SEARCH_CONFIG,
+    )
+    search_vector = SearchVector(
+        'search_document',
+        config=settings.MISAGO_SEARCH_CONFIG,
+    )
 
     return Post.objects.select_related('thread', 'poster').filter(
         is_event=False,

+ 31 - 0
misago/threads/tests/test_search.py

@@ -175,6 +175,37 @@ class SearchApiTests(AuthenticatedUserTestCase):
                 self.assertEqual(len(results), 1)
                 self.assertEqual(results[0]['id'], post.id)
 
+    def test_filtered_query(self):
+        """search filters are used by search system"""
+        thread = testutils.post_thread(self.category)
+        post = testutils.reply_thread(
+            thread,
+            message="You just do MMM in 4th minute and its pwnt",
+        )
+
+        self.index_post(post)
+
+        response = self.client.get('%s?q=MMM' % self.api_link)
+        self.assertEqual(response.status_code, 200)
+
+        reponse_json = response.json()
+        self.assertIn('threads', [p['id'] for p in reponse_json])
+
+        for provider in reponse_json:
+            if provider['id'] == 'threads':
+                results = provider['results']['results']
+                self.assertEqual(len(results), 1)
+                self.assertEqual(results[0]['id'], post.id)
+
+        response = self.client.get('%s?q=Marines Medics' % self.api_link)
+        self.assertEqual(response.status_code, 200)
+
+        for provider in reponse_json:
+            if provider['id'] == 'threads':
+                results = provider['results']['results']
+                self.assertEqual(len(results), 1)
+                self.assertEqual(results[0]['id'], post.id)
+
 
 class SearchProviderApiTests(SearchApiTests):
     def setUp(self):

+ 6 - 0
runtests.py

@@ -89,6 +89,12 @@ MISAGO_SEARCH_CONFIG = 'english'
 MISAGO_POST_VALIDATORS = [
     'misago.core.testproject.validators.test_post_validator',
 ]
+
+
+# Register test post search filter
+MISAGO_POST_SEARCH_FILTERS = [
+    'misago.core.testproject.searchfilters.test_filter',
+]
 """
 
     if os.environ.get('TRAVIS'):