lua script to generate a nodejs cache from a package-lock.json

this is a lua script that generates a nodejs cache from a package-lock.json. why is this useful? well you can generate a cache without this by deleting node_modules, and then running npm install --no-save --cache path/to/some/dir. It will cache all the downloads to the directory you gave it. but, it won’t cache anything it doesn’t download. This means if you install esbuild for example it will only cache the binary executable for your CPU architecture and not other ones.

if that’s fine for you then just do that instead. But i’m trying to generate a node module cache to use when building a node package in a network-isolated sandbox (gentoo package build). I don’t want to generate a bunch of different tar files on different CPU archs, I just want the one, so I want all the different esbuild binaries in there, and it’ll just use the right one. All of them are in package-lock.json:

        "@esbuild/android-arm": "0.17.19",
        "@esbuild/android-arm64": "0.17.19",
        "@esbuild/android-x64": "0.17.19",
        "@esbuild/darwin-arm64": "0.17.19",
        "@esbuild/darwin-x64": "0.17.19",
        "@esbuild/freebsd-arm64": "0.17.19",
        "@esbuild/freebsd-x64": "0.17.19",
        "@esbuild/linux-arm": "0.17.19",
        "@esbuild/linux-arm64": "0.17.19",
        "@esbuild/linux-ia32": "0.17.19",
        "@esbuild/linux-loong64": "0.17.19",
        "@esbuild/linux-mips64el": "0.17.19",
        "@esbuild/linux-ppc64": "0.17.19",
        "@esbuild/linux-riscv64": "0.17.19",
        "@esbuild/linux-s390x": "0.17.19",
        "@esbuild/linux-x64": "0.17.19",
        "@esbuild/netbsd-x64": "0.17.19",
        "@esbuild/openbsd-x64": "0.17.19",
        "@esbuild/sunos-x64": "0.17.19",
        "@esbuild/win32-arm64": "0.17.19",
        "@esbuild/win32-ia32": "0.17.19",
        "@esbuild/win32-x64": "0.17.19"

There are entries for each one of these specifying the source. So anyways this lua script just traverses the package-lock.json and adds each tarfile URL from each resolved field to the custom cache, so I can tar it up. It reads $PWD/package-lock.json and writes $PWD/node-modules-cache/

You need to install subproc, luaposix, and lunajson as lua packages. You need openssl, base64, and npm on your $PATH.

#!/usr/bin/env lua
--[[
	builds a node-modules-cache/ dir from a package-lock.json

	requires lua packages:
		- lunajson
		- subproc
		- luaposix

	requires commandline tools:
		- openssl
		- base64
		- npm
]]

local lunajson = require('lunajson')
local subproc = require('subproc')
local posix = require('posix')
local posix_stdio = require('posix.stdio')

-- config as necessary
local outdir = 'node-modules-cache'

local function dbg(arg)
	io.stderr:write(tostring(arg) .. '\n')
	io.stderr:flush()
end

local base16 = (function()
	local alphabet = '0123456789abcdef'
	local lut = { }
	for i = 1, 16 do
		for j = 1, 16 do
			lut[((i - 1) << 4) | (j - 1)] = alphabet:sub(i, i) .. alphabet:sub(j, j)
		end
	end

	return function(data)
		local out = ''
		for i = 1, #data do
			out = out .. lut[data:byte(i)]
		end
		return out
	end
end)()

local function integrity_check_file(hash, path)
	local algo, expected = assert(hash:match('^([^-]+)-(.+)$'))

	local pfd = posix.popen_pipeline({
		{'openssl', algo, '-binary', path},
		{'base64', '-w0'}
	}, 'r')

	local f = assert(posix_stdio.fdopen(pfd.fd, 'r'))
	local actual = assert(f:read('a'))
	f:close()

	return expected == actual
end

--[[
	files are stored in the cache based on their integrity hash. This function
	takes an integrity hash and generates the path within the cache to where npm
	will put the file
]]
local function cacache_path(integrity)
	local algo, hash = assert(integrity:match('^([^-]+)-(.+)$'))

	-- convert hash to base16...
	local pfd = posix.popen_pipeline({
		function()
			print(hash)
		end,
		{'openssl', 'base64', '-d'}
	}, 'r')
	local f = assert(posix_stdio.fdopen(pfd.fd, 'r'))
	local hash_bin = assert(f:read('a'))
	f:close()

	local hash_b16 = base16(hash_bin)

	-- 2 levels of dirs
	local d1, d2, fname = hash_b16:match('^(..)(..)(.+)$') 

	return '_cacache/content-v2/' .. algo .. '/' .. d1 .. '/' .. d2 .. '/' .. fname
end


local lock_file = io.open('package-lock.json', 'r')
local lock = lunajson.decode(lock_file:read('a'))

subproc('mkdir', '-p', outdir)

for pkgname, pkg in pairs(lock.packages) do
	dbg('evaluating ' .. pkgname)
	if pkg.resolved then
		local outfile = outdir .. '/' .. cacache_path(pkg.integrity)

		local needs_download = false

		local _, _, ecode = subproc('test', '-f', outfile)
		if ecode ~= 0 then
			dbg('outfile ' .. outfile .. ' does not exist.')
			needs_download = true
		elseif not integrity_check_file(pkg.integrity, outfile) then
			dbg('outfile ' .. outfile .. ' has the wrong hash.')
			needs_download = true
		end

		if needs_download then
			dbg('downloading ' .. pkg.resolved)
			print(subproc('npm', 'cache', '--cache', outdir, 'add', pkg.resolved))
			dbg('checking hash of ' .. outfile)
			if integrity_check_file(pkg.integrity, outfile) then
				dbg('hash is correct')
			else
				dbg('hash is wrong')
				error()
			end
		else
			dbg('already have local copy')
		end
	end
	dbg('=====')
end

print(subproc('rm', '-rv', outdir .. '/' .. '_logs'))

hope that helps