scrape_hexpm.sh 1.4 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748
  1. #!/usr/bin/sh
  2. # This script will scrape packages from hex.pm that are believed to be
  3. # Erlang packages. We do this by first walking through all packages and
  4. # then getting the most recent release of each package. If the build tools
  5. # listed contain "rebar3" or "make" we keep them and write the package
  6. # name and version to a file.
  7. #
  8. # This script should only be run occasionally to refresh the file
  9. # containing the list of packages.
  10. #
  11. # @todo Some of the projects fetched are Elixir despite indicating
  12. # "rebar3" or "make". We should ignore them here once identified
  13. # so they don't make it to the output.
  14. # @todo Probably better to only check "rebar3" since many Elixir
  15. # projects include "make".
  16. NUM=1
  17. while true; do
  18. echo "# Packages page $NUM"
  19. PAGE=$(curl -s "https://hex.pm/api/packages?sort=name&page=$NUM")
  20. if [ "$PAGE" = "[]" ]; then exit 0; fi
  21. PACKAGES=$(echo $PAGE | jq -r "map({name: .name, url: .releases[0].url})")
  22. echo $PACKAGES | jq -r '.[] | [.name, .url] | join(" ")' | while read -r NAMEURL; do
  23. NAME=$(echo $NAMEURL | awk '{print $1;}')
  24. URL=$(echo $NAMEURL | awk '{print $2;}')
  25. VERSION=$(curl -s "$URL" | jq 'select(.meta.build_tools | index("rebar3") or index("make")) | .version | tostring')
  26. VERSION=$(echo $VERSION | tr -d '"')
  27. if [ -n "$VERSION" ]; then
  28. echo "$NAME $VERSION"
  29. fi
  30. done
  31. NUM=$(expr $NUM + 1)
  32. sleep 10
  33. done