Browse Source

Basic data archiver for planned types

Rafał Pitoń 7 years ago
parent
commit
b7a2690440

+ 38 - 1
misago/threads/signals.py

@@ -2,11 +2,13 @@ from django.contrib.auth import get_user_model
 from django.db import transaction
 from django.db.models.signals import pre_delete
 from django.dispatch import Signal, receiver
+from django.utils.translation import ugettext as _
 
 from misago.categories.models import Category
 from misago.categories.signals import delete_category_content, move_category_content
 from misago.core.pgutils import chunk_queryset
-from misago.users.signals import anonymize_user_content, delete_user_content, username_changed
+from misago.users.signals import (
+    anonymize_user_content, archive_user_data, delete_user_content, username_changed)
 
 from .anonymize import ANONYMIZABLE_EVENTS, anonymize_event, anonymize_post_last_likes
 from .models import Attachment, Poll, PollVote, Post, PostEdit, PostLike, Thread
@@ -121,6 +123,41 @@ def delete_user_threads(sender, **kwargs):
             category.save()
 
 
+@receiver(archive_user_data)
+def archive_user_attachments(sender, data_archiver=None, **kwargs):
+    collection = data_archiver.create_collection('attachment')
+    queryset = sender.attachment_set.order_by('id')
+    for attachment in chunk_queryset(queryset):
+        collection.write_model_file(attachment.image or attachment.file)
+
+
+@receiver(archive_user_data)
+def archive_user_posts(sender, data_archiver=None, **kwargs):
+    collection = data_archiver.create_collection('post')
+    queryset = sender.post_set.order_by('id')
+    for post in chunk_queryset(queryset):
+        collection.write_data_file(post.posted_on, post.parsed)
+
+
+@receiver(archive_user_data)
+def archive_user_posts_edits(sender, data_archiver=None, **kwargs):
+    collection = data_archiver.create_collection('post_edit')
+    queryset = sender.postedit_set.order_by('id')
+    for post_edit in chunk_queryset(queryset):
+        collection.write_data_file(post_edit.edited_on, post_edit.edited_from)
+
+
+@receiver(archive_user_data)
+def archive_user_polls(sender, data_archiver=None, **kwargs):
+    collection = data_archiver.create_collection('poll')
+    queryset = sender.poll_set.order_by('id')
+    for poll in chunk_queryset(queryset):
+        collection.write_data_file(poll.posted_on, {
+            _("Question"): poll.question,
+            _("Choices"): [c['label'] for c in poll.choices],
+        })
+
+
 @receiver(anonymize_user_content)
 def anonymize_user_in_events(sender, **kwargs):
     queryset = Post.objects.filter(

+ 4 - 2
misago/users/datacollector.py → misago/users/dataarchiver.py

@@ -5,11 +5,13 @@ import yaml
 
 from django.utils import timezone
 from django.utils.crypto import get_random_string
+from misago.core.utils import slugify
 
 
 class DataWriter(object):
     def write_data_file(self, name, data):
-        file_path = os.path.join(self.data_dir_path, '{}.txt'.format(name))
+        clean_name = slugify(str(name))
+        file_path = os.path.join(self.data_dir_path, '{}.txt'.format(clean_name))
         with open(file_path, 'w+') as fp:
             yaml.safe_dump(data, fp, default_flow_style=False, allow_unicode=True)
             return file_path
@@ -35,7 +37,7 @@ class DataCollection(DataWriter):
         os.mkdir(data_dir_path)
 
 
-class DataCollector(DataWriter):
+class DataArchiver(DataWriter):
     def __init__(self, user, working_dir_path):
         self.user = user
         self.working_dir_path = working_dir_path

+ 9 - 19
misago/users/management/commands/prepareuserdatadownloads.py

@@ -5,8 +5,9 @@ from django.core.management.base import BaseCommand
 
 from misago.conf import settings
 from misago.core.pgutils import chunk_queryset
-from misago.users.datacollector import DataCollector
+from misago.users.dataarchiver import DataArchiver
 from misago.users.models import DataDownload
+from misago.users.signals import archive_user_data
 
 
 logger = logging.getLogger('misago.users.datadownloads')
@@ -29,29 +30,18 @@ class Command(BaseCommand):
         queryset = DataDownload.objects.select_related('user')
         queryset = queryset.filter(status=DataDownload.STATUS_PENDING)
         for data_download in chunk_queryset(queryset):
-            data_collector = DataCollector(data_download.user, working_dir)
+            user = data_download.user
+            data_archiver = DataArchiver(user, working_dir)
             try:
-                collect_user_data(data_download.user, data_collector)
-                data_collector.create_archive()
-                data_download.save()
+                archive_user_data.send(user, data_archiver=data_archiver)
+                data_archiver.create_archive()
+                #data_download.save()
             except Exception as e:
                 print(e)
                 logger.exception(e)
-            data_collector.delete_tmp_dir()
+            # data_archiver.delete_archive()
+            data_archiver.delete_tmp_dir()
 
             downloads_prepared += 1
 
         self.stdout.write("Data downloads prepared: {}".format(downloads_prepared))
-
-
-def collect_user_data(user, data_collector):
-    data_collector.write_json_file('details', {
-        'username': user.username,
-        'email': user.email,
-    })
-
-    avatars = data_collector.create_collection('avatars')
-    avatars.write_file(user.avatar_tmp)
-    avatars.write_file(user.avatar_src)
-    for avatar in user.avatar_set.iterator():
-        avatars.write_file(avatar.image)

+ 54 - 0
misago/users/signals.py

@@ -4,20 +4,74 @@ from django.contrib.auth import get_user_model
 from django.db.models import Q
 from django.dispatch import Signal, receiver
 from django.utils import timezone
+from django.utils.translation import ugettext as _
 
 from misago.conf import settings
+from misago.core.pgutils import chunk_queryset
 
 from .models import AuditTrail
+from .profilefields import profilefields
 
 
 UserModel = get_user_model()
 
 anonymize_user_content = Signal()
+archive_user_data = Signal()
 delete_user_content = Signal()
 remove_old_ips = Signal()
 username_changed = Signal()
 
 
+@receiver(archive_user_data)
+def archive_user_details(sender, data_archiver=None, **kwargs):
+    data_archiver.write_data_file('details', {
+        _('Username'): sender.username,
+        _('E-mail'): sender.email,
+        _('Joined on'): sender.joined_on,
+        _('Joined from ip'): sender.joined_from_ip or 'unavailable',
+    })
+
+
+@receiver(archive_user_data)
+def archive_user_profile_fields(sender, data_archiver=None, **kwargs):
+    clean_profile_fields = {}
+    for profile_fields_group in profilefields.get_fields_groups():
+        for profile_field in profile_fields_group['fields']:
+            if sender.profile_fields.get(profile_field.fieldname):
+                field_value = sender.profile_fields[profile_field.fieldname]
+                clean_profile_fields[str(profile_field.label)] = field_value
+                
+    if clean_profile_fields:
+        data_archiver.write_data_file('profile_fields', clean_profile_fields)
+
+
+@receiver(archive_user_data)
+def archive_user_avatar(sender, data_archiver=None, **kwargs):
+    collection = data_archiver.create_collection('avatar')
+    collection.write_model_file(sender.avatar_tmp)
+    collection.write_model_file(sender.avatar_src)
+    for avatar in sender.avatar_set.iterator():
+        collection.write_model_file(avatar.image)
+
+
+@receiver(archive_user_data)
+def archive_user_audit_trail(sender, data_archiver=None, **kwargs):
+    collection = data_archiver.create_collection('audit_trail')
+    queryset = sender.audittrail_set.order_by('id')
+    for audit_trail in chunk_queryset(queryset):
+        collection.write_data_file(audit_trail.created_at, audit_trail.ip_address)
+
+
+@receiver(archive_user_data)
+def archive_user_name_history(sender, data_archiver=None, **kwargs):
+    collection = data_archiver.create_collection('name_history')
+    for name_change in sender.namechanges.order_by('id').iterator():
+        collection.write_data_file(name_change.changed_on, {
+            _("New username"): name_change.new_username,
+            _("Old username"): name_change.old_username,
+        })
+
+
 @receiver(username_changed)
 def handle_name_change(sender, **kwargs):
     sender.user_renames.update(changed_by_username=sender.username)

+ 42 - 42
misago/users/tests/test_datacollector.py → misago/users/tests/test_dataarchiver.py

@@ -4,7 +4,7 @@ import os
 from django.core.files import File
 
 from misago.conf import settings
-from misago.users.datacollector import DataCollector
+from misago.users.dataarchiver import DataArchiver
 from misago.users.testutils import AuthenticatedUserTestCase
 
 
@@ -14,16 +14,16 @@ TEST_AVATAR_PATH = os.path.join(TESTFILES_DIR, 'avatar.png')
 DATA_DOWNLOADS_WORKING_DIR = settings.MISAGO_USER_DATA_DOWNLOADS_WORKING_DIR
 
 
-class DataCollectorTests(AuthenticatedUserTestCase):
+class DataArchiverTests(AuthenticatedUserTestCase):
     def test_init_with_dirs(self):
         """data collector initializes with valid tmp directories"""
-        data_collector = DataCollector(self.user, DATA_DOWNLOADS_WORKING_DIR)
-        self.assertTrue(os.path.exists(data_collector.tmp_dir_path))
-        self.assertTrue(os.path.exists(data_collector.data_dir_path))
+        data_archiver = DataArchiver(self.user, DATA_DOWNLOADS_WORKING_DIR)
+        self.assertTrue(os.path.exists(data_archiver.tmp_dir_path))
+        self.assertTrue(os.path.exists(data_archiver.data_dir_path))
 
         data_downloads_working_dir = str(DATA_DOWNLOADS_WORKING_DIR)
-        tmp_dir_path = str(data_collector.tmp_dir_path)
-        data_dir_path = str(data_collector.data_dir_path)
+        tmp_dir_path = str(data_archiver.tmp_dir_path)
+        data_dir_path = str(data_archiver.data_dir_path)
         
         self.assertTrue(tmp_dir_path.startswith(data_downloads_working_dir))
         self.assertTrue(data_dir_path.startswith(data_downloads_working_dir))
@@ -33,13 +33,13 @@ class DataCollectorTests(AuthenticatedUserTestCase):
 
     def test_write_data_file(self):
         """write_data_file creates new data file in data_dir_path"""
-        data_collector = DataCollector(self.user, DATA_DOWNLOADS_WORKING_DIR)
+        data_archiver = DataArchiver(self.user, DATA_DOWNLOADS_WORKING_DIR)
 
         data_to_write = {'hello': "I am test!", 'nice': u"łał!"}
-        data_file_path = data_collector.write_data_file("testfile", data_to_write)
+        data_file_path = data_archiver.write_data_file("testfile", data_to_write)
         self.assertTrue(os.path.isfile(data_file_path))
 
-        valid_output_path = os.path.join(data_collector.data_dir_path, 'testfile.txt')
+        valid_output_path = os.path.join(data_archiver.data_dir_path, 'testfile.txt')
         self.assertEqual(data_file_path, valid_output_path)
 
         with open(data_file_path, 'r') as fp:
@@ -52,38 +52,38 @@ class DataCollectorTests(AuthenticatedUserTestCase):
             self.user.avatar_tmp = File(avatar)
             self.user.save()
 
-        data_collector = DataCollector(self.user, DATA_DOWNLOADS_WORKING_DIR)
-        file_path = data_collector.write_model_file(self.user.avatar_tmp)
+        data_archiver = DataArchiver(self.user, DATA_DOWNLOADS_WORKING_DIR)
+        file_path = data_archiver.write_model_file(self.user.avatar_tmp)
         
         self.assertTrue(os.path.isfile(file_path))
     
-        data_dir_path = str(data_collector.data_dir_path)
+        data_dir_path = str(data_archiver.data_dir_path)
         self.assertTrue(str(file_path).startswith(data_dir_path))
 
     def test_write_model_file_empty(self):
         """write_model_file is noop if model file field is none"""
-        data_collector = DataCollector(self.user, DATA_DOWNLOADS_WORKING_DIR)
-        file_path = data_collector.write_model_file(self.user.avatar_tmp)
+        data_archiver = DataArchiver(self.user, DATA_DOWNLOADS_WORKING_DIR)
+        file_path = data_archiver.write_model_file(self.user.avatar_tmp)
         
         self.assertIsNone(file_path)
-        self.assertFalse(os.listdir(data_collector.data_dir_path))
+        self.assertFalse(os.listdir(data_archiver.data_dir_path))
 
     def test_create_collection(self):
         """create_collection creates new directory for collection"""
-        data_collector = DataCollector(self.user, DATA_DOWNLOADS_WORKING_DIR)
-        collection = data_collector.create_collection('collection')
+        data_archiver = DataArchiver(self.user, DATA_DOWNLOADS_WORKING_DIR)
+        collection = data_archiver.create_collection('collection')
 
-        data_dir_path = str(data_collector.data_dir_path)
+        data_dir_path = str(data_archiver.data_dir_path)
         collection_dir_path = str(collection.data_dir_path)
         self.assertNotEqual(data_dir_path, collection_dir_path)
         self.assertTrue(collection_dir_path.startswith(data_dir_path))
 
         self.assertTrue(os.path.exists(collection.data_dir_path))
 
-    def test_collect_write_data_file(self):
+    def test_collection_write_data_file(self):
         """write_data_file creates new data file in collection data_dir_path"""
-        data_collector = DataCollector(self.user, DATA_DOWNLOADS_WORKING_DIR)
-        collection = data_collector.create_collection('collection')
+        data_archiver = DataArchiver(self.user, DATA_DOWNLOADS_WORKING_DIR)
+        collection = data_archiver.create_collection('collection')
 
         data_to_write = {'hello': "I am test!", 'nice': u"łał!"}
         data_file_path = collection.write_data_file("testfile", data_to_write)
@@ -96,14 +96,14 @@ class DataCollectorTests(AuthenticatedUserTestCase):
             saved_data = fp.read().strip().splitlines()
             self.assertEqual(saved_data, ["hello: I am test!", u"nice: łał!"])
 
-    def test_collect_write_model_file(self):
+    def test_collection_write_model_file(self):
         """write_model_file includes model file in collection data_dir_path"""
         with open(TEST_AVATAR_PATH, 'rb') as avatar:
             self.user.avatar_tmp = File(avatar)
             self.user.save()
 
-        data_collector = DataCollector(self.user, DATA_DOWNLOADS_WORKING_DIR)
-        collection = data_collector.create_collection('collection')
+        data_archiver = DataArchiver(self.user, DATA_DOWNLOADS_WORKING_DIR)
+        collection = data_archiver.create_collection('collection')
 
         file_path = collection.write_model_file(self.user.avatar_tmp)
         
@@ -112,10 +112,10 @@ class DataCollectorTests(AuthenticatedUserTestCase):
         data_dir_path = str(collection.data_dir_path)
         self.assertTrue(str(file_path).startswith(data_dir_path))
 
-    def test_collect_write_model_file_empty(self):
+    def test_collection_write_model_file_empty(self):
         """write_model_file is noop if model file field is none"""
-        data_collector = DataCollector(self.user, DATA_DOWNLOADS_WORKING_DIR)
-        collection = data_collector.create_collection('collection')
+        data_archiver = DataArchiver(self.user, DATA_DOWNLOADS_WORKING_DIR)
+        collection = data_archiver.create_collection('collection')
 
         file_path = collection.write_model_file(self.user.avatar_tmp)
         
@@ -124,36 +124,36 @@ class DataCollectorTests(AuthenticatedUserTestCase):
 
     def test_create_archive(self):
         """create_archive creates zip file from collected data"""
-        data_collector = DataCollector(self.user, DATA_DOWNLOADS_WORKING_DIR)
+        data_archiver = DataArchiver(self.user, DATA_DOWNLOADS_WORKING_DIR)
         
         data_to_write = {'hello': "I am test!", 'nice': u"łał!"}
-        data_collector.write_data_file("testfile", data_to_write)
+        data_archiver.write_data_file("testfile", data_to_write)
 
         with open(TEST_AVATAR_PATH, 'rb') as avatar:
             self.user.avatar_tmp = File(avatar)
             self.user.save()
-        data_collector.write_model_file(self.user.avatar_tmp)
+        data_archiver.write_model_file(self.user.avatar_tmp)
 
-        archive_path = data_collector.create_archive()
-        self.assertEqual(data_collector.archive_path, archive_path)
+        archive_path = data_archiver.create_archive()
+        self.assertEqual(data_archiver.archive_path, archive_path)
         self.assertTrue(os.path.isfile(archive_path))
 
     def test_delete_archive(self):
         """delete_archive deletes zip file"""
-        data_collector = DataCollector(self.user, DATA_DOWNLOADS_WORKING_DIR)
-        archive_path = data_collector.create_archive()
-        data_collector.delete_archive()
+        data_archiver = DataArchiver(self.user, DATA_DOWNLOADS_WORKING_DIR)
+        archive_path = data_archiver.create_archive()
+        data_archiver.delete_archive()
         self.assertFalse(os.path.isfile(archive_path))
 
     def test_delete_archive_none(self):
         """delete_archive is noop if zip file doesnt exist"""
-        data_collector = DataCollector(self.user, DATA_DOWNLOADS_WORKING_DIR)
-        self.assertIsNone(data_collector.archive_path)
-        data_collector.delete_archive()
+        data_archiver = DataArchiver(self.user, DATA_DOWNLOADS_WORKING_DIR)
+        self.assertIsNone(data_archiver.archive_path)
+        data_archiver.delete_archive()
 
     def test_delete_tmp_dir(self):
         """delete_tmp_dir delete's directory but leaves archive"""
-        data_collector = DataCollector(self.user, DATA_DOWNLOADS_WORKING_DIR)
-        tmp_dir_path = data_collector.tmp_dir_path
-        data_collector.delete_tmp_dir()
+        data_archiver = DataArchiver(self.user, DATA_DOWNLOADS_WORKING_DIR)
+        tmp_dir_path = data_archiver.tmp_dir_path
+        data_archiver.delete_tmp_dir()
         self.assertFalse(os.path.exists(tmp_dir_path))