Browse Source

fix #843: Forum search improvements: GIL and dataset trimming

Rafał Pitoń 8 years ago
parent
commit
6424cb4705

+ 17 - 17
misago/core/pgutils.py

@@ -45,23 +45,6 @@ DROP INDEX %(index_name)s
         return message % formats
 
 
-def batch_update(queryset, step=50):
-    """util because psycopg2 iterators aren't memory effective in Dj<1.11"""
-    paginator = Paginator(queryset.order_by('pk'), step)
-    for page_number in paginator.page_range:
-        for obj in paginator.page(page_number).object_list:
-            yield obj
-
-
-def batch_delete(queryset, step=50):
-    """another util cos paginator goes bobbins when you are deleting"""
-    queryset_exists = True
-    while queryset_exists:
-        for obj in queryset[:step]:
-            yield obj
-        queryset_exists = queryset.exists()
-
-
 class CreatePartialCompositeIndex(CreatePartialIndex):
     CREATE_SQL = """
 CREATE INDEX %(index_name)s ON %(table)s (%(fields)s)
@@ -94,3 +77,20 @@ DROP INDEX %(index_name)s
         message = ("Create PostgreSQL partial composite index on fields %s in %s for %s")
         formats = (', '.join(self.fields), self.model_name, self.values)
         return message % formats
+
+
+def batch_update(queryset, step=50):
+    """util because psycopg2 iterators aren't memory effective in Dj<1.11"""
+    paginator = Paginator(queryset.order_by('pk'), step)
+    for page_number in paginator.page_range:
+        for obj in paginator.page(page_number).object_list:
+            yield obj
+
+
+def batch_delete(queryset, step=50):
+    """another util cos paginator goes bobbins when you are deleting"""
+    queryset_exists = True
+    while queryset_exists:
+        for obj in queryset[:step]:
+            yield obj
+        queryset_exists = queryset.exists()

+ 22 - 0
misago/threads/migrations/0005_index_search_document.py

@@ -0,0 +1,22 @@
+# -*- coding: utf-8 -*-
+# Generated by Django 1.11.1 on 2017-05-21 17:52
+from __future__ import unicode_literals
+
+import django.contrib.postgres.indexes
+from django.contrib.postgres.operations import BtreeGinExtension
+from django.db import migrations
+
+
+class Migration(migrations.Migration):
+
+    dependencies = [
+        ('misago_threads', '0004_update_settings'),
+    ]
+
+    operations = [
+        BtreeGinExtension(),
+        migrations.AddIndex(
+            model_name='post',
+            index=django.contrib.postgres.indexes.GinIndex(fields=['search_vector'], name='misago_thre_search__b472a2_gin'),
+        ),
+    ]

+ 5 - 0
misago/threads/models/post.py

@@ -2,6 +2,7 @@ from __future__ import unicode_literals
 
 import copy
 
+from django.contrib.postgres.indexes import GinIndex
 from django.contrib.postgres.fields import JSONField
 from django.contrib.postgres.search import SearchVector, SearchVectorField
 from django.db import models
@@ -88,6 +89,10 @@ class Post(models.Model):
     search_vector = SearchVectorField()
 
     class Meta:
+        indexes = [
+            GinIndex(fields=['search_vector'])
+        ]
+
         index_together = [
             ('thread', 'id'),  # speed up threadview for team members
             ('is_event', 'is_hidden'),

+ 23 - 6
misago/threads/search.py

@@ -13,6 +13,9 @@ from .utils import add_categories_to_items
 from .viewmodels import ThreadsRootCategory
 
 
+HITS_CEILING = settings.MISAGO_POSTS_PER_PAGE * 5
+
+
 class SearchThreads(SearchProvider):
     name = _("Threads")
     icon = 'forum'
@@ -39,13 +42,18 @@ class SearchThreads(SearchProvider):
         )
         paginator = pagination_dict(list_page)
 
-        posts = list(list_page.object_list)
+        posts = []
         threads = []
+        if paginator['count']:
+            posts = list(list_page.object_list.select_related(
+                'thread', 'poster', 'poster__rank'
+            ))
 
-        for post in posts:
-            threads.append(post.thread)
+            threads = []
+            for post in posts:
+                threads.append(post.thread)
 
-        add_categories_to_items(root_category.unwrap(), threads_categories, posts + threads)
+            add_categories_to_items(root_category.unwrap(), threads_categories, posts + threads)
 
         results = {
             'results': FeedSerializer(posts, many=True, context={
@@ -67,10 +75,19 @@ def search_threads(request, query, visible_threads):
         config=settings.MISAGO_SEARCH_CONFIG,
     )
 
-    return Post.objects.select_related('thread', 'poster', 'poster__rank').filter(
+    queryset = Post.objects.filter(
         is_event=False,
         is_hidden=False,
         is_unapproved=False,
         thread_id__in=visible_threads.values('id'),
         search_vector=search_query,
-    ).annotate(rank=SearchRank(search_vector, search_query)).order_by('-rank', '-id')
+    )
+
+    if queryset[:HITS_CEILING + 1].count() > HITS_CEILING:
+        queryset = queryset.order_by('-id')[:HITS_CEILING]
+
+    return Post.objects.filter(
+        id__in=queryset.values('id'),
+    ).annotate(
+        rank=SearchRank(search_vector, search_query),
+    ).order_by('-rank', '-id')