createfakehistory.py 8.9 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272
  1. import random
  2. import time
  3. from datetime import timedelta
  4. from django.contrib.auth import get_user_model
  5. from django.core.management.base import BaseCommand
  6. from django.utils import timezone
  7. from faker import Factory
  8. from ....categories.models import Category
  9. from ....core.pgutils import chunk_queryset
  10. from ....threads.checksums import update_post_checksum
  11. from ....threads.models import Thread
  12. from ....users.models import Rank
  13. from ...posts import get_fake_hidden_post, get_fake_post, get_fake_unapproved_post
  14. from ...threads import (
  15. get_fake_closed_thread,
  16. get_fake_hidden_thread,
  17. get_fake_thread,
  18. get_fake_unapproved_thread,
  19. )
  20. from ...users import (
  21. get_fake_admin_activated_user,
  22. get_fake_banned_user,
  23. get_fake_deleted_user,
  24. get_fake_inactive_user,
  25. get_fake_user,
  26. )
  27. User = get_user_model()
  28. class Command(BaseCommand):
  29. help = "Creates fake forum history reaching specified period."
  30. def add_arguments(self, parser):
  31. parser.add_argument(
  32. "length",
  33. help="generated history length (in days)",
  34. nargs="?",
  35. type=int,
  36. default=5,
  37. )
  38. parser.add_argument(
  39. "max_actions",
  40. help="number of items generate for a single day",
  41. nargs="?",
  42. type=int,
  43. default=50,
  44. )
  45. def handle(self, *args, **options): # pylint: disable=too-many-locals
  46. history_length = options["length"]
  47. max_actions = options["max_actions"]
  48. fake = Factory.create()
  49. categories = list(Category.objects.all_categories())
  50. ranks = list(Rank.objects.all())
  51. message = "Creating fake forum history for %s days...\n"
  52. self.stdout.write(message % history_length)
  53. start_time = time.time()
  54. self.move_existing_users_to_past(history_length)
  55. start_timestamp = timezone.now()
  56. for days_ago in reversed(range(history_length)):
  57. date = start_timestamp - timedelta(days=days_ago)
  58. for date_variation in get_random_date_variations(date, 0, max_actions):
  59. action = random.randint(0, 100)
  60. if action >= 80:
  61. self.create_fake_user(fake, date_variation, ranks)
  62. elif action > 50:
  63. self.create_fake_thread(fake, date_variation, categories)
  64. else:
  65. self.create_fake_post(fake, date_variation)
  66. if random.randint(0, 100) > 80:
  67. self.create_fake_follow(date)
  68. self.synchronize_threads()
  69. self.synchronize_categories()
  70. total_time = time.time() - start_time
  71. total_humanized = time.strftime("%H:%M:%S", time.gmtime(total_time))
  72. message = "\n\nSuccessfully created fake history for %s days in %s"
  73. self.stdout.write(message % (history_length, total_humanized))
  74. def move_existing_users_to_past(self, history_length):
  75. for user in User.objects.all():
  76. user.joined_on -= timedelta(days=history_length)
  77. user.save(update_fields=["joined_on"])
  78. user.audittrail_set.all().delete()
  79. def create_fake_user(self, fake, date, ranks):
  80. # Pick random rank for next user
  81. rank = random.choice(ranks)
  82. # There's 10% chance user is inactive
  83. if random.randint(0, 100) > 90:
  84. user = get_fake_inactive_user(fake, rank)
  85. # There's another 10% chance user is admin-activated
  86. elif random.randint(0, 100) > 90:
  87. user = get_fake_admin_activated_user(fake, rank)
  88. # And further chance user is banned
  89. elif random.randint(0, 100) > 90:
  90. user = get_fake_banned_user(fake, rank)
  91. # Or deleted their account
  92. elif random.randint(0, 100) > 90:
  93. user = get_fake_deleted_user(fake, rank)
  94. # User is active
  95. else:
  96. user = get_fake_user(fake, rank)
  97. user.joined_on = date
  98. user.save(update_fields=["joined_on"])
  99. user.audittrail_set.all().delete()
  100. self.write_event(date, "%s has joined" % user)
  101. def create_fake_thread(self, fake, date, categories):
  102. category = random.choice(categories)
  103. # 10% chance thread poster is anonymous
  104. if random.randint(0, 100) > 90:
  105. starter = None
  106. else:
  107. starter = self.get_random_user(date)
  108. # There's 10% chance thread is closed
  109. if random.randint(0, 100) > 90:
  110. thread = get_fake_closed_thread(fake, category, starter)
  111. # There's further 5% chance thread is hidden
  112. elif random.randint(0, 100) > 95:
  113. if random.randint(0, 100) > 90:
  114. hidden_by = None
  115. else:
  116. hidden_by = self.get_random_user(date)
  117. thread = get_fake_hidden_thread(fake, category, starter, hidden_by)
  118. # And further 5% chance thread is unapproved
  119. elif random.randint(0, 100) > 95:
  120. thread = get_fake_unapproved_thread(fake, category, starter)
  121. # Default, standard thread
  122. else:
  123. thread = get_fake_thread(fake, category, starter)
  124. thread.first_post.posted_on = date
  125. thread.first_post.updated_on = date
  126. thread.first_post.checksum = update_post_checksum(thread.first_post)
  127. thread.first_post.save(update_fields=["checksum", "posted_on", "updated_on"])
  128. thread.started_on = date
  129. thread.save(update_fields=["started_on"])
  130. self.write_event(
  131. date, '%s has started "%s" thread' % (thread.first_post.poster_name, thread)
  132. )
  133. def create_fake_post(self, fake, date):
  134. thread = self.get_random_thread(date)
  135. if not thread:
  136. return
  137. # 10% chance poster is anonymous
  138. if random.randint(0, 100) > 90:
  139. poster = None
  140. else:
  141. poster = self.get_random_user(date)
  142. # There's 5% chance post is unapproved
  143. if random.randint(0, 100) > 90:
  144. post = get_fake_unapproved_post(fake, thread, poster)
  145. # There's further 5% chance post is hidden
  146. elif random.randint(0, 100) > 95:
  147. if random.randint(0, 100) > 90:
  148. hidden_by = None
  149. else:
  150. hidden_by = self.get_random_user(date)
  151. post = get_fake_hidden_post(fake, thread, poster, hidden_by)
  152. # Default, standard post
  153. else:
  154. post = get_fake_post(fake, thread, poster)
  155. post.posted_on = date
  156. post.updated_on = date
  157. post.checksum = update_post_checksum(post)
  158. post.save(update_fields=["checksum", "posted_on", "updated_on"])
  159. self.write_event(
  160. date, '%s has replied to "%s" thread' % (post.poster_name, thread)
  161. )
  162. def create_fake_follow(self, date):
  163. user_a = self.get_random_user(date)
  164. user_b = self.get_random_user(date)
  165. if not (user_a or user_b) or user_a == user_b:
  166. return
  167. if not user_a.is_following(user_b):
  168. user_a.follows.add(user_b)
  169. self.write_event(date, "%s followed %s" % (user_a, user_b))
  170. def get_random_thread(self, date):
  171. return (
  172. Thread.objects.filter(started_on__lt=date)
  173. .select_related("category")
  174. .order_by("?")
  175. .first()
  176. )
  177. def get_random_user(self, date):
  178. return (
  179. User.objects.filter(
  180. joined_on__lt=date, requires_activation=User.ACTIVATION_NONE
  181. )
  182. .order_by("?")
  183. .first()
  184. )
  185. def write_event(self, date, event):
  186. formatted_date = date.strftime("%Y-%m-%d %H:%M")
  187. self.stdout.write("%s: %s" % (formatted_date, event))
  188. def synchronize_threads(self):
  189. self.stdout.write("\nSynchronizing threads...")
  190. start_time = time.time()
  191. for thread in chunk_queryset(Thread.objects.all()):
  192. thread.synchronize()
  193. thread.save()
  194. total_time = time.time() - start_time
  195. total_humanized = time.strftime("%H:%M:%S", time.gmtime(total_time))
  196. message = "Synchronized %s threads in %s"
  197. self.stdout.write(message % (Thread.objects.count(), total_humanized))
  198. def synchronize_categories(self):
  199. self.stdout.write("\nSynchronizing categories...")
  200. start_time = time.time()
  201. for category in Category.objects.all_categories():
  202. category.synchronize()
  203. category.save()
  204. total_time = time.time() - start_time
  205. total_humanized = time.strftime("%H:%M:%S", time.gmtime(total_time))
  206. message = "Synchronized %s categories in %s"
  207. self.stdout.write(message % (Category.objects.count(), total_humanized))
  208. def get_random_date_variations(date, min_date, max_date):
  209. variations = []
  210. for _ in range(random.randint(min_date, max_date)):
  211. random_offset = timedelta(minutes=random.randint(1, 1200))
  212. variations.append(date - random_offset)
  213. return sorted(variations)