Просмотр исходного кода

Replace batch_ postgresql utilities with single chunk_queryset utility

Rafał Pitoń 7 лет назад
Родитель
Сommit
d10f40479c

+ 9 - 15
misago/core/pgutils.py

@@ -96,18 +96,12 @@ class PgPartialIndex(Index):
         return ' WHERE {}'.format(' AND '.join(sorted(clauses)))
 
 
-def batch_update(queryset, step=50):
-    """util because psycopg2 iterators aren't memory effective in Dj<1.11"""
-    paginator = Paginator(queryset.order_by('pk'), step)
-    for page_number in paginator.page_range:
-        for obj in paginator.page(page_number).object_list:
-            yield obj
-
-
-def batch_delete(queryset, step=50):
-    """another util cos paginator goes bobbins when you are deleting"""
-    queryset_exists = True
-    while queryset_exists:
-        for obj in queryset[:step]:
-            yield obj
-        queryset_exists = queryset.exists()
+def chunk_queryset(queryset, chunk_size=20):
+    ordered_queryset = queryset.order_by('-pk') # bias to newest items first
+    chunk = ordered_queryset[:chunk_size]
+    while chunk:
+        last_pk = None
+        for item in chunk:
+            last_pk = item.pk
+            yield item
+        chunk = ordered_queryset.filter(pk__lt=last_pk)[:chunk_size]

+ 38 - 0
misago/core/tests/test_chunk_queryset.py

@@ -0,0 +1,38 @@
+from django.test import TestCase
+
+from misago.core.models import CacheVersion
+from misago.core.pgutils import chunk_queryset
+
+
+class ChunkQuerysetTest(TestCase):
+    def setUp(self):
+        # clear table
+        CacheVersion.objects.all().delete()
+
+        # create 100 items
+        items_ids = []
+        for _ in range(100):
+            obj = CacheVersion.objects.create(cache='nomatter')
+            items_ids.append(obj.id)
+        self.items_ids = list(reversed(items_ids))
+
+    def test_chunk_queryset(self):
+        """chunk_queryset utility chunks queryset but returns all items"""
+        chunked_ids = []
+
+        with self.assertNumQueries(21):
+            queryset = CacheVersion.objects.all()
+            for obj in chunk_queryset(queryset, chunk_size=5):
+                chunked_ids.append(obj.id)
+
+        self.assertEqual(chunked_ids, self.items_ids)
+            
+    def test_chunk_shrinking_queryset(self):
+        """chunk_queryset utility chunks queryset in delete action"""
+        with self.assertNumQueries(121):
+            queryset = CacheVersion.objects.all()
+            for obj in chunk_queryset(queryset, chunk_size=5):
+                obj.delete()
+
+        self.assertEqual(CacheVersion.objects.count(), 0)
+            

+ 2 - 2
misago/threads/management/commands/clearattachments.py

@@ -6,7 +6,7 @@ from django.utils import timezone
 
 from misago.conf import settings
 from misago.core.management.progressbar import show_progress
-from misago.core.pgutils import batch_update
+from misago.core.pgutils import chunk_queryset
 from misago.threads.models import Attachment
 
 
@@ -36,7 +36,7 @@ class Command(BaseCommand):
         synchronized_count = 0
         show_progress(self, synchronized_count, attachments_to_sync)
         start_time = time.time()
-        for attachment in batch_update(queryset):
+        for attachment in chunk_queryset(queryset):
             attachment.delete()
 
             synchronized_count += 1

+ 2 - 2
misago/threads/management/commands/rebuildpostssearch.py

@@ -3,7 +3,7 @@ import time
 from django.core.management.base import BaseCommand
 
 from misago.core.management.progressbar import show_progress
-from misago.core.pgutils import batch_update
+from misago.core.pgutils import chunk_queryset
 from misago.threads.models import Post
 
 
@@ -29,7 +29,7 @@ class Command(BaseCommand):
         start_time = time.time()
 
         queryset = Post.objects.select_related('thread').filter(is_event=False)
-        for post in batch_update(queryset):
+        for post in chunk_queryset(queryset):
             if post.id == post.thread.first_post_id:
                 post.set_search_document(post.thread.title)
             else:

+ 2 - 2
misago/threads/management/commands/synchronizethreads.py

@@ -3,7 +3,7 @@ import time
 from django.core.management.base import BaseCommand
 
 from misago.core.management.progressbar import show_progress
-from misago.core.pgutils import batch_update
+from misago.core.pgutils import chunk_queryset
 from misago.threads.models import Thread
 
 
@@ -27,7 +27,7 @@ class Command(BaseCommand):
         synchronized_count = 0
         show_progress(self, synchronized_count, threads_to_sync)
         start_time = time.time()
-        for thread in batch_update(Thread.objects.all()):
+        for thread in chunk_queryset(Thread.objects.all()):
             thread.synchronize()
             thread.save()
 

+ 9 - 9
misago/threads/signals.py

@@ -5,7 +5,7 @@ from django.dispatch import Signal, receiver
 
 from misago.categories.models import Category
 from misago.categories.signals import delete_category_content, move_category_content
-from misago.core.pgutils import batch_delete, batch_update
+from misago.core.pgutils import chunk_queryset
 from misago.core.utils import ANONYMOUS_IP
 from misago.users.signals import anonymize_user_content, delete_user_content, username_changed
 
@@ -93,18 +93,18 @@ def delete_user_threads(sender, **kwargs):
     recount_categories = set()
     recount_threads = set()
 
-    for post in sender.liked_post_set.iterator():
+    for post in sender.liked_post_set.all():
         cleaned_likes = list(filter(lambda i: i['id'] != sender.id, post.last_likes))
         if cleaned_likes != post.last_likes:
             post.last_likes = cleaned_likes
             post.save(update_fields=['last_likes'])
             
-    for thread in batch_delete(sender.thread_set.all(), 50):
+    for thread in chunk_queryset(sender.thread_set.all()):
         recount_categories.add(thread.category_id)
         with transaction.atomic():
             thread.delete()
 
-    for post in batch_delete(sender.post_set.all(), 50):
+    for post in chunk_queryset(sender.post_set.all()):
         recount_categories.add(post.category_id)
         recount_threads.add(post.thread_id)
         with transaction.atomic():
@@ -112,7 +112,7 @@ def delete_user_threads(sender, **kwargs):
 
     if recount_threads:
         changed_threads_qs = Thread.objects.filter(id__in=recount_threads)
-        for thread in batch_update(changed_threads_qs, 50):
+        for thread in chunk_queryset(changed_threads_qs):
             thread.synchronize()
             thread.save()
 
@@ -140,15 +140,15 @@ def anonymize_user_in_events(sender, **kwargs):
         is_event=True,
         event_type__in=ANONYMIZABLE_EVENTS,
         event_context__user__id=sender.id,
-    ).iterator()
+    )
 
-    for event in queryset:
+    for event in chunk_queryset(queryset):
         anonymize_event(sender, event)
 
 
 @receiver([anonymize_user_content])
 def anonymize_user_in_likes(sender, **kwargs):
-    for post in sender.liked_post_set.iterator():
+    for post in chunk_queryset(sender.liked_post_set):
         anonymize_post_last_likes(sender, post)
 
 
@@ -207,7 +207,7 @@ def update_usernames(sender, **kwargs):
 @receiver(pre_delete, sender=get_user_model())
 def remove_unparticipated_private_threads(sender, **kwargs):
     threads_qs = kwargs['instance'].privatethread_set.all()
-    for thread in batch_update(threads_qs, 50):
+    for thread in chunk_queryset(threads_qs):
         if thread.participants.count() == 1:
             with transaction.atomic():
                 thread.delete()

+ 2 - 2
misago/users/management/commands/synchronizeusers.py

@@ -5,7 +5,7 @@ from django.core.management.base import BaseCommand
 
 from misago.categories.models import Category
 from misago.core.management.progressbar import show_progress
-from misago.core.pgutils import batch_update
+from misago.core.pgutils import chunk_queryset
 
 
 UserModel = get_user_model()
@@ -31,7 +31,7 @@ class Command(BaseCommand):
         synchronized_count = 0
         show_progress(self, synchronized_count, users_to_sync)
         start_time = time.time()
-        for user in batch_update(UserModel.objects.all()):
+        for user in chunk_queryset(UserModel.objects.all()):
             user.threads = user.thread_set.filter(
                 category__in=categories,
                 is_hidden=False,

+ 2 - 2
misago/users/views/admin/users.py

@@ -10,7 +10,7 @@ from misago.admin.views import generic
 from misago.categories.models import Category
 from misago.conf import settings
 from misago.core.mail import mail_users
-from misago.core.pgutils import batch_update
+from misago.core.pgutils import chunk_queryset
 from misago.threads.models import Thread
 from misago.users.avatars.dynamic import set_avatar as set_dynamic_avatar
 from misago.users.forms.admin import (
@@ -386,7 +386,7 @@ class DeletePostsStep(DeletionStep):
 
         if recount_categories:
             changed_threads_qs = Thread.objects.filter(id__in=recount_threads)
-            for thread in batch_update(changed_threads_qs, 50):
+            for thread in chunk_queryset(changed_threads_qs, 50):
                 thread.synchronize()
                 thread.save()