Browse Source

Get fake forum history generation working, add shortcut to dev

rafalp 6 years ago
parent
commit
873265fa03

+ 10 - 0
dev

@@ -90,6 +90,7 @@ intro() {
     echo "    ${BOLD}run${NORMAL}               runs \"docker-compose run --rm misago\"."
     echo "    ${BOLD}psql${NORMAL}              runs psql connected to development database."
     echo "    ${BOLD}pyfmt${NORMAL}             runs isort + black on python code."
+    echo "    ${BOLD}fakedata${NORMAL}          populates database with testing data."
     echo
 }
 
@@ -263,6 +264,13 @@ psql_in_docker() {
     PGPASSWORD=$POSTGRES_PASSWORD psql --username $POSTGRES_USER --host $POSTGRES_HOST $POSTGRES_DB
 }
 
+# Shortcut for creating small dev forum
+create_fake_data() {
+    docker-compose run --rm misago python manage.py createfakecategories 7
+    docker-compose run --rm misago python manage.py createfakecategories 12 1
+    docker-compose run --rm misago python manage.py createfakehistory 600
+}
+
 # Command dispatcher
 if [[ $1 ]]; then
     if [[ $1 = "init" ]]; then
@@ -309,6 +317,8 @@ if [[ $1 ]]; then
     elif [[ $1 = "pyfmt" ]]; then
         isort -rc misago
         black devproject misago
+    elif [[ $1 = "fakedata" ]]; then
+        create_fake_data
     else
         invalid_argument $1
     fi

+ 0 - 155
misago/faker/management/commands/createfakeforumhistory.py

@@ -1,155 +0,0 @@
-import random
-import time
-from datetime import timedelta
-
-from django.contrib.auth import get_user_model
-from django.core.management.base import BaseCommand
-from django.utils import timezone
-from faker import Factory
-
-from ....categories.models import Category
-from ....threads.checksums import update_post_checksum
-from ....threads.models import Post, Thread
-from ....users.models import Rank
-from ...englishcorpus import EnglishCorpus
-from ...users import (
-    get_fake_inactive_user,
-    get_fake_admin_activated_user,
-    get_fake_banned_user,
-    get_fake_user,
-)
-
-User = get_user_model()
-
-corpus = EnglishCorpus()
-corpus_short = EnglishCorpus(max_length=150)
-
-USER = 0
-THREAD = 1
-POST = 2
-ACTIONS = [USER, THREAD, POST, POST, POST]
-
-
-class Command(BaseCommand):
-    help = "Creates fake forum history reaching specified period."
-
-    def add_arguments(self, parser):
-        parser.add_argument(
-            "length",
-            help="generated history length (in days)",
-            nargs="?",
-            type=int,
-            default=5,
-        )
-
-    def handle(self, *args, **options):
-        history_length = options["length"]
-        fake = Factory.create()
-
-        categories = list(Category.objects.all_categories())
-        ranks = list(Rank.objects.all())
-
-        message = "Creating fake forum history for %s days...\n"
-        self.stdout.write(message % history_length)
-
-        start_time = time.time()
-
-        self.move_existing_users_to_past(history_length)
-
-        start_timestamp = timezone.now()
-        for days_ago in reversed(range(history_length)):
-            date = start_timestamp - timedelta(days=days_ago)
-            for date_variation in get_random_date_variations(date, 0, 20):
-                action = random.choice(ACTIONS)
-                if action == USER:
-                    self.create_fake_user(fake, date_variation, ranks)
-                elif action == THREAD:
-                    self.create_fake_thread(fake, date_variation, categories)
-                elif action == POST:
-                    self.create_fake_post(fake, date_variation)
-
-        total_time = time.time() - start_time
-        total_humanized = time.strftime("%H:%M:%S", time.gmtime(total_time))
-        message = "\n\nSuccessfully created fake history for %s days in %s"
-        self.stdout.write(message % (history_length, total_humanized))
-
-    def move_existing_users_to_past(self, history_length):
-        for user in User.objects.all():
-            user.joined_on -= timedelta(days=history_length)
-            user.save(update_fields=["joined_on"])
-            user.audittrail_set.all().delete()
-
-    def create_fake_user(self, fake, date, ranks):
-        # There's 40% chance user has registered on this day
-        if random.randint(1, 100) > 25:
-            return
-
-        # Pick random rank for next user
-        rank = random.choice(ranks)
-
-        # There's 10% chance user is inactive
-        if random.randint(0, 100) > 90:
-            user = get_fake_inactive_user(fake, rank)
-
-        # There's another 10% chance user is admin-activated
-        elif random.randint(0, 100) > 90:
-            user = get_fake_admin_activated_user(fake, rank)
-
-        # And further chance user is banned
-        elif random.randint(0, 100) > 90:
-            user = get_fake_banned_user(fake, rank)
-
-        # User is active
-        else:
-            user = get_fake_user(fake, rank)
-
-        user.joined_on = date
-        user.save(update_fields=["joined_on"])
-        user.audittrail_set.all().delete()
-
-        self.write_event(date, "%s has joined" % user)
-
-    def create_fake_thread(self, fake, date, categories):
-        user = self.get_random_user(date)
-        category = random.choice(categories)
-
-        thread_is_unapproved = random.randint(0, 100) > 90
-        thread_is_hidden = random.randint(0, 100) > 90
-        thread_is_closed = random.randint(0, 100) > 90
-
-        thread = Thread(
-            category=category,
-            started_on=datetime,
-            starter_name="-",
-            starter_slug="-",
-            last_post_on=datetime,
-            last_poster_name="-",
-            last_poster_slug="-",
-            replies=0,
-            is_unapproved=thread_is_unapproved,
-            is_hidden=thread_is_hidden,
-            is_closed=thread_is_closed,
-        )
-        thread.set_title(corpus_short.random_sentence())
-        thread.save()
-
-        self.write_event(date, '%s has started "%s" thread' % (user, "TODO"))
-
-    def create_fake_post(self, fake, date):
-        user = self.get_random_user(date)
-        self.write_event(date, '%s has replied to "%s" thread' % (user, "TODO"))
-
-    def get_random_user(self, date):
-        return User.objects.filter(joined_on__lt=date).order_by("?").first()
-
-    def write_event(self, date, event):
-        formatted_date = date.strftime("%Y-%m-%d %H:%M")
-        self.stdout.write("%s: %s" % (formatted_date, event))
-
-
-def get_random_date_variations(date, min, max):
-    variations = []
-    for _ in range(random.randint(min, max)):
-        random_offset = timedelta(minutes=random.randint(1, 1200))
-        variations.append(date - random_offset)
-    return sorted(variations)

+ 271 - 0
misago/faker/management/commands/createfakehistory.py

@@ -0,0 +1,271 @@
+import random
+import time
+from datetime import timedelta
+
+from django.contrib.auth import get_user_model
+from django.core.management.base import BaseCommand
+from django.utils import timezone
+from faker import Factory
+
+from ....categories.models import Category
+from ....core.pgutils import chunk_queryset
+from ....threads.checksums import update_post_checksum
+from ....threads.models import Post, Thread
+from ....users.models import Rank
+from ...posts import get_fake_hidden_post, get_fake_post, get_fake_unapproved_post
+from ...threads import (
+    get_fake_closed_thread,
+    get_fake_hidden_thread,
+    get_fake_thread,
+    get_fake_unapproved_thread,
+)
+from ...users import (
+    get_fake_admin_activated_user,
+    get_fake_banned_user,
+    get_fake_deleted_user,
+    get_fake_inactive_user,
+    get_fake_user,
+)
+
+User = get_user_model()
+
+
+class Command(BaseCommand):
+    help = "Creates fake forum history reaching specified period."
+
+    def add_arguments(self, parser):
+        parser.add_argument(
+            "length",
+            help="generated history length (in days)",
+            nargs="?",
+            type=int,
+            default=5,
+        )
+
+    def handle(self, *args, **options):
+        history_length = options["length"]
+        fake = Factory.create()
+
+        categories = list(Category.objects.all_categories())
+        ranks = list(Rank.objects.all())
+
+        message = "Creating fake forum history for %s days...\n"
+        self.stdout.write(message % history_length)
+
+        start_time = time.time()
+
+        self.move_existing_users_to_past(history_length)
+
+        start_timestamp = timezone.now()
+        for days_ago in reversed(range(history_length)):
+            date = start_timestamp - timedelta(days=days_ago)
+            for date_variation in get_random_date_variations(date, 0, 50):
+                action = random.randint(0, 100)
+                if action >= 80:
+                    self.create_fake_user(fake, date_variation, ranks)
+                elif action > 50:
+                    self.create_fake_thread(fake, date_variation, categories)
+                else:
+                    self.create_fake_post(fake, date_variation)
+
+                if random.randint(0, 100) > 80:
+                    self.create_fake_follow(date)
+
+        self.synchronize_threads()
+        self.synchronize_categories()
+
+        total_time = time.time() - start_time
+        total_humanized = time.strftime("%H:%M:%S", time.gmtime(total_time))
+        message = "\n\nSuccessfully created fake history for %s days in %s"
+        self.stdout.write(message % (history_length, total_humanized))
+
+    def move_existing_users_to_past(self, history_length):
+        for user in User.objects.all():
+            user.joined_on -= timedelta(days=history_length)
+            user.save(update_fields=["joined_on"])
+            user.audittrail_set.all().delete()
+
+    def create_fake_user(self, fake, date, ranks):
+        # There's 40% chance user has registered on this day
+        if random.randint(1, 100) > 25:
+            return
+
+        # Pick random rank for next user
+        rank = random.choice(ranks)
+
+        # There's 10% chance user is inactive
+        if random.randint(0, 100) > 90:
+            user = get_fake_inactive_user(fake, rank)
+
+        # There's another 10% chance user is admin-activated
+        elif random.randint(0, 100) > 90:
+            user = get_fake_admin_activated_user(fake, rank)
+
+        # And further chance user is banned
+        elif random.randint(0, 100) > 90:
+            user = get_fake_banned_user(fake, rank)
+
+        # Or deleted their account
+        elif random.randint(0, 100) > 90:
+            user = get_fake_deleted_user(fake, rank)
+
+        # User is active
+        else:
+            user = get_fake_user(fake, rank)
+
+        user.joined_on = date
+        user.save(update_fields=["joined_on"])
+        user.audittrail_set.all().delete()
+
+        self.write_event(date, "%s has joined" % user)
+
+    def create_fake_thread(self, fake, date, categories):
+        category = random.choice(categories)
+
+        # 10% chance thread poster is anonymous
+        if random.randint(0, 100) > 90:
+            starter = None
+        else:
+            starter = self.get_random_user(date)
+
+        # There's 10% chance thread is closed
+        if random.randint(0, 100) > 90:
+            thread = get_fake_closed_thread(fake, category, starter)
+
+        # There's further 5% chance thread is hidden
+        elif random.randint(0, 100) > 95:
+            if random.randint(0, 100) > 90:
+                hidden_by = None
+            else:
+                hidden_by = self.get_random_user(date)
+
+            thread = get_fake_hidden_thread(fake, category, starter, hidden_by)
+
+        # And further 5% chance thread is unapproved
+        elif random.randint(0, 100) > 95:
+            thread = get_fake_unapproved_thread(fake, category, starter)
+
+        # Default, standard thread
+        else:
+            thread = get_fake_thread(fake, category, starter)
+
+        thread.first_post.posted_on = date
+        thread.first_post.updated_on = date
+        thread.first_post.checksum = update_post_checksum(thread.first_post)
+        thread.first_post.save(update_fields=["checksum", "posted_on", "updated_on"])
+
+        thread.started_on = date
+        thread.save(update_fields=["started_on"])
+
+        self.write_event(
+            date, '%s has started "%s" thread' % (thread.first_post.poster_name, thread)
+        )
+
+    def create_fake_post(self, fake, date):
+        thread = self.get_random_thread(date)
+        if not thread:
+            return
+
+        # 10% chance poster is anonymous
+        if random.randint(0, 100) > 90:
+            poster = None
+        else:
+            poster = self.get_random_user(date)
+
+        # There's 5% chance post is unapproved
+        if random.randint(0, 100) > 90:
+            post = get_fake_unapproved_post(fake, thread, poster)
+
+        # There's further 5% chance post is hidden
+        elif random.randint(0, 100) > 95:
+            if random.randint(0, 100) > 90:
+                hidden_by = None
+            else:
+                hidden_by = self.get_random_user(date)
+
+            post = get_fake_hidden_post(fake, thread, poster, hidden_by)
+
+        # Default, standard post
+        else:
+            post = get_fake_post(fake, thread, poster)
+
+        post.posted_on = date
+        post.updated_on = date
+        post.checksum = update_post_checksum(post)
+        post.save(update_fields=["checksum", "posted_on", "updated_on"])
+
+        self.write_event(
+            date, '%s has replied to "%s" thread' % (post.poster_name, thread)
+        )
+
+    def create_fake_follow(self, date):
+        user_a = self.get_random_user(date)
+        user_b = self.get_random_user(date)
+
+        if not (user_a or user_b) or user_a == user_b:
+            return
+
+        if not user_a.is_following(user_b):
+            user_a.follows.add(user_b)
+
+        self.write_event(date, "%s followed %s" % (user_a, user_b))
+
+    def get_random_post(self, data):
+        return Post.objects.filter(posted_on__lt=date).order_by("?").first()
+
+    def get_random_thread(self, date):
+        return (
+            Thread.objects.filter(started_on__lt=date)
+            .select_related("category")
+            .order_by("?")
+            .first()
+        )
+
+    def get_random_user(self, date):
+        return (
+            User.objects.filter(
+                joined_on__lt=date, requires_activation=User.ACTIVATION_NONE
+            )
+            .order_by("?")
+            .first()
+        )
+
+    def write_event(self, date, event):
+        formatted_date = date.strftime("%Y-%m-%d %H:%M")
+        self.stdout.write("%s: %s" % (formatted_date, event))
+
+    def synchronize_threads(self):
+        self.stdout.write("\nSynchronizing threads...")
+        start_time = time.time()
+
+        for thread in chunk_queryset(Thread.objects.all()):
+            thread.synchronize()
+            thread.save()
+
+        total_time = time.time() - start_time
+        total_humanized = time.strftime("%H:%M:%S", time.gmtime(total_time))
+
+        message = "Synchronized %s threads in %s"
+        self.stdout.write(message % (Thread.objects.count(), total_humanized))
+
+    def synchronize_categories(self):
+        self.stdout.write("\nSynchronizing categories...")
+        start_time = time.time()
+
+        for category in Category.objects.all():
+            category.synchronize()
+            category.save()
+
+        total_time = time.time() - start_time
+        total_humanized = time.strftime("%H:%M:%S", time.gmtime(total_time))
+
+        message = "Synchronized %s categories in %s"
+        self.stdout.write(message % (Category.objects.count(), total_humanized))
+
+
+def get_random_date_variations(date, min, max):
+    variations = []
+    for _ in range(random.randint(min, max)):
+        random_offset = timedelta(minutes=random.randint(1, 1200))
+        variations.append(date - random_offset)
+    return sorted(variations)

+ 87 - 0
misago/faker/posts.py

@@ -0,0 +1,87 @@
+import random
+
+from django.utils import timezone
+
+from ..threads.checksums import update_post_checksum
+from ..threads.models import Post
+from .englishcorpus import EnglishCorpus
+from .users import get_fake_username
+
+PLACEKITTEN_URL = "https://placekitten.com/g/%s/%s"
+
+corpus = EnglishCorpus()
+
+
+def get_fake_post(fake, thread, poster=None):
+    original, parsed = get_fake_post_content(fake)
+    posted_on = timezone.now()
+
+    post = Post.objects.create(
+        category=thread.category,
+        thread=thread,
+        poster=poster,
+        poster_name=poster or get_fake_username(fake),
+        original=original,
+        parsed=parsed,
+        posted_on=posted_on,
+        updated_on=posted_on,
+    )
+    update_post_checksum(post)
+    post.save(update_fields=["checksum"])
+
+    return post
+
+
+def get_fake_unapproved_post(fake, thread, poster=None):
+    post = get_fake_post(fake, thread, poster)
+    post.is_unapproved = True
+    post.save(update_fields=["is_unapproved"])
+    return post
+
+
+def get_fake_hidden_post(fake, thread, poster=None, hidden_by=None):
+    post = get_fake_post(fake, thread, poster)
+    post.is_hidden = True
+
+    if hidden_by:
+        post.hidden_by = hidden_by
+        post.hidden_by_name = hidden_by.username
+        post.hidden_by_slug = hidden_by.slug
+    else:
+        post.hidden_by_name = fake.first_name()
+        post.hidden_by_slug = post.hidden_by_name.lower()
+
+    post.save(
+        update_fields=["is_unapproved", "hidden_by", "hidden_by_name", "hidden_by_slug"]
+    )
+
+    return post
+
+
+def get_fake_post_content(fake):
+    raw = []
+    parsed = []
+
+    if random.randint(0, 100) > 90:
+        paragraphs_to_make = random.randint(1, 20)
+    else:
+        paragraphs_to_make = random.randint(1, 5)
+
+    for _ in range(paragraphs_to_make):
+        if random.randint(0, 100) > 95:
+            cat_width = random.randint(1, 16) * random.choice([100, 90, 80])
+            cat_height = random.randint(1, 12) * random.choice([100, 90, 80])
+
+            cat_url = PLACEKITTEN_URL % (cat_width, cat_height)
+
+            raw.append("!(%s)" % cat_url)
+            parsed.append('<p><img src="%s" alt=""/></p>' % cat_url)
+        else:
+            if random.randint(0, 100) > 95:
+                sentences_to_make = random.randint(1, 20)
+            else:
+                sentences_to_make = random.randint(1, 7)
+            raw.append(" ".join(corpus.random_sentences(sentences_to_make)))
+            parsed.append("<p>%s</p>" % raw[-1])
+
+    return "\n\n".join(raw), "\n".join(parsed)

+ 35 - 28
misago/faker/threads.py

@@ -1,46 +1,53 @@
 from django.utils import timezone
 
-from ...englishcorpus import EnglishCorpus
+from ..threads.models import Thread
+from .englishcorpus import EnglishCorpus
+from .posts import get_fake_hidden_post, get_fake_post, get_fake_unapproved_post
 
-PLACEKITTEN_URL = "https://placekitten.com/g/%s/%s"
-
-corpus = EnglishCorpus()
 corpus_short = EnglishCorpus(max_length=150)
 
 
-def fake_thread(fake, category, starter):
-    thread = Thread(
-        category=category,
-        started_on=timezone.now(),
-        starter_name="-",
-        starter_slug="-",
-        last_post_on=timezone.now(),
-        last_poster_name="-",
-        last_poster_slug="-",
-        replies=0,
-    )
-    thread.set_title(corpus_short.random_sentence())
-    thread.save()
-
+def get_fake_thread(fake, category, starter):
+    thread = create_fake_thread(fake, category, starter)
+    thread.first_post = get_fake_post(fake, thread, starter)
+    thread.save(update_fields=["first_post"])
     return thread
 
 
-def fake_closed_thread(fake, category, starter):
-    thread = fake_thread(fake, category, starter)
+def get_fake_closed_thread(fake, category, starter):
+    thread = get_fake_thread(fake, category, starter)
     thread.is_closed = True
     thread.save(update_fields=["is_closed"])
     return thread
 
 
-def fake_hidden_thread(fake, category, starter):
-    thread = fake_thread(fake, category, starter)
-    thread.is_hidden = True
-    thread.save(update_fields=["is_hidden"])
+def get_fake_hidden_thread(fake, category, starter, hidden_by=None):
+    thread = create_fake_thread(fake, category, starter)
+    thread.first_post = get_fake_hidden_post(fake, thread, starter, hidden_by)
+    thread.save(update_fields=["first_post"])
+    return thread
+
+
+def get_fake_unapproved_thread(fake, category, starter):
+    thread = create_fake_thread(fake, category, starter)
+    thread.first_post = get_fake_unapproved_post(fake, thread, starter)
+    thread.save(update_fields=["first_post"])
     return thread
 
 
-def fake_unapproved_thread(fake, category, starter):
-    thread = fake_thread(fake, category, starter)
-    thread.is_hidden = True
-    thread.save(update_fields=["is_hidden"])
+def create_fake_thread(fake, category, starter):
+    started_on = timezone.now()
+    thread = Thread(
+        category=category,
+        started_on=started_on,
+        starter_name="-",
+        starter_slug="-",
+        last_post_on=started_on,
+        last_poster_name="-",
+        last_poster_slug="-",
+        replies=0,
+    )
+    thread.set_title(corpus_short.random_sentence())
+    thread.save()
+
     return thread

+ 22 - 1
misago/faker/users.py

@@ -1,3 +1,4 @@
+import hashlib
 import random
 
 from django.contrib.auth import get_user_model
@@ -9,16 +10,20 @@ from .utils import retry_on_db_error
 
 User = get_user_model()
 
+AVATAR_SIZES = (400, 200, 100)
+GRAVATAR_URL = "https://www.gravatar.com/avatar/%s?s=%s&d=retro"
 PASSWORD = "password"
 
 
 @retry_on_db_error
 def get_fake_user(fake, rank=None, requires_activation=User.ACTIVATION_NONE):
     username = get_fake_username(fake)
+    email = fake.email()
     return create_test_user(
         username,
-        fake.email(),
+        email.lower(),
         PASSWORD,
+        avatars=get_fake_avatars(email),
         rank=rank,
         requires_activation=requires_activation,
     )
@@ -38,6 +43,13 @@ def get_fake_admin_activated_user(fake, rank=None):
     return get_fake_user(fake, rank=rank, requires_activation=User.ACTIVATION_ADMIN)
 
 
+def get_fake_deleted_user(fake, rank=None):
+    user = get_fake_user(fake, rank=rank)
+    user.is_active = False
+    user.save(update_fields=["is_active"])
+    return user
+
+
 def get_fake_username(fake):
     possible_usernames = [
         fake.first_name(),
@@ -48,3 +60,12 @@ def get_fake_username(fake):
     ]
 
     return random.choice(possible_usernames)
+
+
+def get_fake_avatars(email):
+    email_hash = hashlib.md5(email.lower().encode("utf-8")).hexdigest()
+
+    return [
+        {"size": size, "url": GRAVATAR_URL % (email_hash, size)}
+        for size in AVATAR_SIZES
+    ]