Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Node script to copy projects from staging or prod #1816

Merged
merged 27 commits into from
Jun 14, 2024
Merged
Show file tree
Hide file tree
Changes from 2 commits
Commits
Show all changes
27 commits
Select commit Hold shift + click to select a range
f41e5c7
Bash script to copy projects from staging or prod
rmunn May 21, 2024
67930fb
Fetch project assets with rsync and retry failures
rmunn May 22, 2024
93ba2d5
Clean up temp dir and remove debugging code
rmunn May 22, 2024
4bdf4f6
Also check for rsync and warn if not present
rmunn May 22, 2024
6578b4d
Remove one unnecessary output line
rmunn May 22, 2024
43d2956
Fix permission issues on copied files
rmunn May 22, 2024
c3d2447
Better solution for file ownership of copied files
rmunn May 22, 2024
e13eaa6
WIP converting script to Node.JS
rmunn May 22, 2024
0ddfe00
Finish converting script to Node.JS
rmunn May 23, 2024
0cbe33f
Rename backup script now that it works
rmunn May 23, 2024
6d9bd55
Remove no-longer-used query
rmunn May 23, 2024
6f9a9cf
Allow selecting qa or prod via 2nd command-line arg
rmunn May 23, 2024
23c9c9d
Fix kubectl port forward on production server
rmunn May 23, 2024
07ce62d
Only set up rsync if needed
rmunn May 23, 2024
ff0f165
Only include assets that are really there
rmunn May 24, 2024
f3e393b
Merge branch 'develop' into feat/backup-projects-to-local-mongodb
rmunn May 27, 2024
801f723
Remove now-completed TODO comments
rmunn Jun 10, 2024
80d90be
Stop using `echo -n` as it may cause Windows issues
rmunn Jun 10, 2024
a8dbffa
Make backup script slightly more cross-platform
rmunn Jun 11, 2024
04a3201
Mongo doesn't like `.insertMany([])`
rmunn Jun 11, 2024
d2ce947
Address one last TODO comment
rmunn Jun 11, 2024
038451e
Remove one unnecessary line of output
rmunn Jun 11, 2024
6bd5c1d
Better EDIT THIS section, add explanatory comments
rmunn Jun 11, 2024
d1aae7b
Explain how to select prod if CLI arg missing
rmunn Jun 11, 2024
b22a0dd
Clean up assets tarball when done
rmunn Jun 12, 2024
45d8294
Add note re drive letter bug in kubectl cp
rmunn Jun 12, 2024
1e3ced2
Rewrite path, to workaround kubectl interpreting Windows drive letter…
myieye Jun 12, 2024
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
260 changes: 260 additions & 0 deletions backup.mjs
Original file line number Diff line number Diff line change
@@ -0,0 +1,260 @@
// TODO: Rename to backup.mjs before committing
rmunn marked this conversation as resolved.
Show resolved Hide resolved

import { execSync, spawn } from "child_process";
import { existsSync, mkdtempSync, rmSync, statSync } from "fs";
import { MongoClient, ObjectId } from "mongodb";
import os from "os";
import path from "path";
import net from "net";

// ===== EDIT THIS =====

const stagingContext = "dallas-rke";
const prodContext = "aws-rke";

// Choose one, comment out the other
const context = stagingContext;
// const context = prodContext

// ===== END of EDIT THIS =====

// Create a temp dir reliably
const tempdir = mkdtempSync(path.join(os.tmpdir(), "lfbackup-"));
let portForwardProcess;
let localConn;
let remoteConn;

async function cleanup() {
if (existsSync(tempdir)) {
console.warn(`Cleaning up temporary directory ${tempdir}...`);
rmSync(tempdir, { recursive: true, force: true });
}
if (localConn) await localConn.close();
if (remoteConn) await remoteConn.close();
if (portForwardProcess) await portForwardProcess.kill();
}

async function randomFreePort() {
return new Promise((resolve) => {
const server = net.createServer();
server.listen(0, () => {
// Asking for port 0 makes Node automatically find a free port
const port = server.address().port;
server.close((_) => resolve(port));
});
});
}

process.on("exit", cleanup);
process.on("uncaughtExceptionMonitor", cleanup);

function run(cmd) {
return execSync(cmd).toString().trimEnd();
}

function getContexts() {
var stdout = run("kubectl config get-contexts -o name");
return stdout.split("\n");
}

// Sanity check

var contexts = getContexts();
if (!contexts.includes(stagingContext)) {
console.warn("Staging context not found. Tried", stagingContext, "but did not find it in", contexts);
console.warn("Might need to edit the top level of this file and try again");
process.exit(1);
}
if (!contexts.includes(prodContext)) {
console.warn("Prod context not found. Tried", prodContext, "but did not find it in", contexts);
console.warn("Might need to edit the top level of this file and try again");
process.exit(1);
}

// Process args

if (process.argv.length < 3) {
console.warn("Please pass project ID or URL as argument, e.g. node backup.mjs 5dbf805650b51914727e06c4");
process.exit(2);
}

let projId;
const arg = process.argv[2];
if (URL.canParse(arg)) {
const url = new URL(arg);
if (url.pathname.startsWith("/app/lexicon/")) {
projId = url.pathname.substring("/app/lexicon/".length);
} else {
projId = url.pathname; // Will probably fail, but worth a try
}
} else {
projId = arg;
}

projId = projId.trim();
console.log("Project ID:", projId);

// Start running

console.warn("Setting up kubectl port forwarding for remote Mongo...");
const remoteMongoPort = await randomFreePort();
// TODO: Improve by finding a local port that's not in use, rather than hardcoding this
rmunn marked this conversation as resolved.
Show resolved Hide resolved
let portForwardingReady;
const portForwardingPromise = new Promise((resolve) => {
portForwardingReady = resolve;
});
portForwardProcess = spawn("kubectl", [`--context=${context}`, "port-forward", "svc/db", `${remoteMongoPort}:27017`], {
stdio: "pipe",
});
portForwardProcess.stdout.on("data", (data) => {
portForwardingReady();
});
portForwardProcess.stderr.on("data", (data) => {
console.warn("Port forwarding failed:");
console.warn(data.toString());
console.warn("Exiting");
process.exit(1);
});

console.warn("Setting up local Mongo connection...");

const localMongoPort = run("docker compose port db 27017").split(":")[1];
const localConnStr = `mongodb://admin:pass@localhost:${localMongoPort}/?authSource=admin`;
localConn = await MongoClient.connect(localConnStr);

const localAdmin = await localConn.db("scriptureforge").collection("users").findOne({ username: "admin" });
const adminId = localAdmin._id.toString();
console.log(`Local admin ID: ${adminId}`);
console.warn("If that doesn't look right, hit Ctrl+C NOW");

await portForwardingPromise;
console.warn("Port forwarding is ready. Setting up remote Mongo connection...");

const remoteConnStr = `mongodb://localhost:${remoteMongoPort}`;
remoteConn = await MongoClient.connect(remoteConnStr);

const remoteAdmin = await remoteConn.db("scriptureforge").collection("users").findOne({ username: "admin" });
rmunn marked this conversation as resolved.
Show resolved Hide resolved
console.warn("Remote Mongo connection established. Fetching project record...");

// Get project record
const project = await remoteConn
.db("scriptureforge")
.collection("projects")
.findOne({ _id: new ObjectId(projId) });
console.log("Project code:", project.projectCode);

const dbname = `sf_${project.projectCode}`;
project.users = { [adminId]: { role: "project_manager" } };
project.ownerRef = new ObjectId(adminId);
console.warn(project.users);

// TODO: Move to after database is copied, so there's never a race condition where the project exists but its entry database doesn't
console.warn("Copying project record...");
await localConn
.db("scriptureforge")
.collection("projects")
.findOneAndReplace({ _id: new ObjectId(projId) }, project, { upsert: true });

// Mongo removed the .copyDatabase method in version 4.2, whose release notes said to just use mongodump/mongorestore if you want to do that

console.warn(`Copying ${dbname} database...`);
const collections = await remoteConn.db(dbname).collections();
for (const remoteColl of collections) {
const name = remoteColl.collectionName;
console.log(` Copying ${name} collection...`);
const indexes = await remoteColl.indexes();
const cursor = remoteColl.find();
const docs = await cursor.toArray();
const localColl = await localConn.db(dbname).collection(name);
try {
await localColl.drop();
} catch (_) {} // Throws if collection doesn't exist, which is fine
try {
await localColl.dropIndexes();
} catch (_) {} // Throws if collection doesn't exist, which is fine
await localColl.createIndexes(indexes);
await localColl.insertMany(docs);
console.log(` ${docs.length} documents copied`);
}
console.warn(`${dbname} database successfully copied`);

// NOTE: mongodump/mongorestore approach below can be revived once Kubernetes 1.30 is installed on client *and* server, so kubectl exec is finally reliable

// console.warn(`About to try fetching ${dbname} database from remote, will retry until success`);
// let done = false;
// while (!done) {
// try {
// console.warn(`Fetching ${dbname} database...`);
// execSync(
// `kubectl --context="${context}" exec -i deploy/db -- mongodump --archive -d "${dbname}" > ${tempdir}/dump`,
// );
// console.warn(`Uploading to local ${dbname} database...`);
// execSync(`docker exec -i lf-db mongorestore --archive --drop -d "${dbname}" ${localConnStr} < ${tempdir}/dump`);
// console.warn(`Successfully uploaded ${dbname} database`);
// done = true;
// } catch (err) {
// console.warn("mongodump failed, retrying...");
// }
// }

console.warn("Setting up rsync on target container...");
execSync(
`kubectl exec --context="${context}" -c app deploy/app -- bash -c "which rsync || (apt update && apt install rsync -y)"`,
);

console.warn("Creating assets tarball in remote...");
execSync(
`kubectl --context="${context}" exec -c app deploy/app -- tar chf /tmp/assets-${dbname}.tar --owner=www-data --group=www-data -C "/var/www/html/assets/lexicon/${dbname}" .`,
);
const sizeStr = run(
`kubectl --context="${context}" exec -c app deploy/app -- sh -c 'ls -l /tmp/assets-${dbname}.tar | cut -d" " -f5'`,
);
const correctSize = +sizeStr;
console.warn(`Asserts tarball size is ${sizeStr}`);

console.warn("Getting name of remote app pod...");
const pod = run(
`kubectl --context="${context}" get pod -o jsonpath="{.items[*]['metadata.name']}" -l app=app --field-selector "status.phase=Running"`,
);
console.warn("Trying to fetch assets tarball with kubectl cp...");
let failed = false;
try {
execSync(`kubectl --context="${context}" cp ${pod}:/tmp/assets-${dbname}.tar ${tempdir}/assets-${dbname}.tar`);
} catch (_) {
console.warn("kubectl cp failed. Will try to continue with rsync...");
failed = true;
}
if (!failed) {
const localSize = statSync(`${tempdir}/assets-${dbname}.tar`).size;
if (localSize < correctSize) {
console.warn(`Got only ${localSize} bytes instead of ${correctSize}. Will try to continue with rsync...`);
failed = true;
}
}
if (failed) {
console.warn("\n===== IMPORTANT NOTE =====");
console.warn(
"This may (probably will) stall at 100%. You'll have to find the rsync process and kill it. Sorry about that.",
);
console.warn("===== IMPORTANT NOTE =====\n");
let done = false;
while (!done) {
try {
execSync(
`rsync -v --partial --info=progress2 --rsync-path="/tmp/" --rsh="kubectl --context=${context} exec -i -c app deploy/app -- " "rsync:/tmp/assets-${dbname}.tar" "${tempdir}/"`,
{ stdio: "inherit" }, // Allows us to see rsync progress
);
done = true;
} catch (err) {
console.warn(`Rsync failed with error: ${err}. Retrying...`);
}
}
}
console.warn("Uploading assets tarball to local...");
execSync(
`docker exec lf-app mkdir -p "/var/www/html/assets/lexicon/${dbname}" ; docker exec lf-app chown www-data:www-data "/var/www/html/assets/lexicon/${dbname}" || true`,
);
execSync(`docker cp - lf-app:/var/www/html/assets/lexicon/${dbname}/ < ${tempdir}/assets-${dbname}.tar`);
console.warn("Assets successfully uploaded");

process.exit(0);
107 changes: 0 additions & 107 deletions backup.sh

This file was deleted.

Loading
Loading