createfakehistory.py 8.7 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268
  1. import random
  2. import time
  3. from datetime import timedelta
  4. from django.contrib.auth import get_user_model
  5. from django.core.management.base import BaseCommand
  6. from django.utils import timezone
  7. from faker import Factory
  8. from ....categories.models import Category
  9. from ....core.pgutils import chunk_queryset
  10. from ....threads.checksums import update_post_checksum
  11. from ....threads.models import Thread
  12. from ....users.models import Rank
  13. from ...posts import get_fake_hidden_post, get_fake_post, get_fake_unapproved_post
  14. from ...threads import (
  15. get_fake_closed_thread,
  16. get_fake_hidden_thread,
  17. get_fake_thread,
  18. get_fake_unapproved_thread,
  19. )
  20. from ...users import (
  21. get_fake_admin_activated_user,
  22. get_fake_banned_user,
  23. get_fake_deleted_user,
  24. get_fake_inactive_user,
  25. get_fake_user,
  26. )
  27. User = get_user_model()
  28. class Command(BaseCommand):
  29. help = "Creates fake forum history reaching specified period."
  30. def add_arguments(self, parser):
  31. parser.add_argument(
  32. "length",
  33. help="generated history length (in days)",
  34. nargs="?",
  35. type=int,
  36. default=5,
  37. )
  38. def handle(self, *args, **options): # pylint: disable=too-many-locals
  39. history_length = options["length"]
  40. fake = Factory.create()
  41. categories = list(Category.objects.all_categories())
  42. ranks = list(Rank.objects.all())
  43. message = "Creating fake forum history for %s days...\n"
  44. self.stdout.write(message % history_length)
  45. start_time = time.time()
  46. self.move_existing_users_to_past(history_length)
  47. start_timestamp = timezone.now()
  48. for days_ago in reversed(range(history_length)):
  49. date = start_timestamp - timedelta(days=days_ago)
  50. for date_variation in get_random_date_variations(date, 0, 50):
  51. action = random.randint(0, 100)
  52. if action >= 80:
  53. self.create_fake_user(fake, date_variation, ranks)
  54. elif action > 50:
  55. self.create_fake_thread(fake, date_variation, categories)
  56. else:
  57. self.create_fake_post(fake, date_variation)
  58. if random.randint(0, 100) > 80:
  59. self.create_fake_follow(date)
  60. self.synchronize_threads()
  61. self.synchronize_categories()
  62. total_time = time.time() - start_time
  63. total_humanized = time.strftime("%H:%M:%S", time.gmtime(total_time))
  64. message = "\n\nSuccessfully created fake history for %s days in %s"
  65. self.stdout.write(message % (history_length, total_humanized))
  66. def move_existing_users_to_past(self, history_length):
  67. for user in User.objects.all():
  68. user.joined_on -= timedelta(days=history_length)
  69. user.save(update_fields=["joined_on"])
  70. user.audittrail_set.all().delete()
  71. def create_fake_user(self, fake, date, ranks):
  72. # There's 40% chance user has registered on this day
  73. if random.randint(1, 100) > 25:
  74. return
  75. # Pick random rank for next user
  76. rank = random.choice(ranks)
  77. # There's 10% chance user is inactive
  78. if random.randint(0, 100) > 90:
  79. user = get_fake_inactive_user(fake, rank)
  80. # There's another 10% chance user is admin-activated
  81. elif random.randint(0, 100) > 90:
  82. user = get_fake_admin_activated_user(fake, rank)
  83. # And further chance user is banned
  84. elif random.randint(0, 100) > 90:
  85. user = get_fake_banned_user(fake, rank)
  86. # Or deleted their account
  87. elif random.randint(0, 100) > 90:
  88. user = get_fake_deleted_user(fake, rank)
  89. # User is active
  90. else:
  91. user = get_fake_user(fake, rank)
  92. user.joined_on = date
  93. user.save(update_fields=["joined_on"])
  94. user.audittrail_set.all().delete()
  95. self.write_event(date, "%s has joined" % user)
  96. def create_fake_thread(self, fake, date, categories):
  97. category = random.choice(categories)
  98. # 10% chance thread poster is anonymous
  99. if random.randint(0, 100) > 90:
  100. starter = None
  101. else:
  102. starter = self.get_random_user(date)
  103. # There's 10% chance thread is closed
  104. if random.randint(0, 100) > 90:
  105. thread = get_fake_closed_thread(fake, category, starter)
  106. # There's further 5% chance thread is hidden
  107. elif random.randint(0, 100) > 95:
  108. if random.randint(0, 100) > 90:
  109. hidden_by = None
  110. else:
  111. hidden_by = self.get_random_user(date)
  112. thread = get_fake_hidden_thread(fake, category, starter, hidden_by)
  113. # And further 5% chance thread is unapproved
  114. elif random.randint(0, 100) > 95:
  115. thread = get_fake_unapproved_thread(fake, category, starter)
  116. # Default, standard thread
  117. else:
  118. thread = get_fake_thread(fake, category, starter)
  119. thread.first_post.posted_on = date
  120. thread.first_post.updated_on = date
  121. thread.first_post.checksum = update_post_checksum(thread.first_post)
  122. thread.first_post.save(update_fields=["checksum", "posted_on", "updated_on"])
  123. thread.started_on = date
  124. thread.save(update_fields=["started_on"])
  125. self.write_event(
  126. date, '%s has started "%s" thread' % (thread.first_post.poster_name, thread)
  127. )
  128. def create_fake_post(self, fake, date):
  129. thread = self.get_random_thread(date)
  130. if not thread:
  131. return
  132. # 10% chance poster is anonymous
  133. if random.randint(0, 100) > 90:
  134. poster = None
  135. else:
  136. poster = self.get_random_user(date)
  137. # There's 5% chance post is unapproved
  138. if random.randint(0, 100) > 90:
  139. post = get_fake_unapproved_post(fake, thread, poster)
  140. # There's further 5% chance post is hidden
  141. elif random.randint(0, 100) > 95:
  142. if random.randint(0, 100) > 90:
  143. hidden_by = None
  144. else:
  145. hidden_by = self.get_random_user(date)
  146. post = get_fake_hidden_post(fake, thread, poster, hidden_by)
  147. # Default, standard post
  148. else:
  149. post = get_fake_post(fake, thread, poster)
  150. post.posted_on = date
  151. post.updated_on = date
  152. post.checksum = update_post_checksum(post)
  153. post.save(update_fields=["checksum", "posted_on", "updated_on"])
  154. self.write_event(
  155. date, '%s has replied to "%s" thread' % (post.poster_name, thread)
  156. )
  157. def create_fake_follow(self, date):
  158. user_a = self.get_random_user(date)
  159. user_b = self.get_random_user(date)
  160. if not (user_a or user_b) or user_a == user_b:
  161. return
  162. if not user_a.is_following(user_b):
  163. user_a.follows.add(user_b)
  164. self.write_event(date, "%s followed %s" % (user_a, user_b))
  165. def get_random_thread(self, date):
  166. return (
  167. Thread.objects.filter(started_on__lt=date)
  168. .select_related("category")
  169. .order_by("?")
  170. .first()
  171. )
  172. def get_random_user(self, date):
  173. return (
  174. User.objects.filter(
  175. joined_on__lt=date, requires_activation=User.ACTIVATION_NONE
  176. )
  177. .order_by("?")
  178. .first()
  179. )
  180. def write_event(self, date, event):
  181. formatted_date = date.strftime("%Y-%m-%d %H:%M")
  182. self.stdout.write("%s: %s" % (formatted_date, event))
  183. def synchronize_threads(self):
  184. self.stdout.write("\nSynchronizing threads...")
  185. start_time = time.time()
  186. for thread in chunk_queryset(Thread.objects.all()):
  187. thread.synchronize()
  188. thread.save()
  189. total_time = time.time() - start_time
  190. total_humanized = time.strftime("%H:%M:%S", time.gmtime(total_time))
  191. message = "Synchronized %s threads in %s"
  192. self.stdout.write(message % (Thread.objects.count(), total_humanized))
  193. def synchronize_categories(self):
  194. self.stdout.write("\nSynchronizing categories...")
  195. start_time = time.time()
  196. for category in Category.objects.all():
  197. category.synchronize()
  198. category.save()
  199. total_time = time.time() - start_time
  200. total_humanized = time.strftime("%H:%M:%S", time.gmtime(total_time))
  201. message = "Synchronized %s categories in %s"
  202. self.stdout.write(message % (Category.objects.count(), total_humanized))
  203. def get_random_date_variations(date, min_date, max_date):
  204. variations = []
  205. for _ in range(random.randint(min_date, max_date)):
  206. random_offset = timedelta(minutes=random.randint(1, 1200))
  207. variations.append(date - random_offset)
  208. return sorted(variations)