Skip to content

Commit

Permalink
Git sparseCheckout function
Browse files Browse the repository at this point in the history
Description tbd
  • Loading branch information
adamziel committed Sep 14, 2024
1 parent db78ea4 commit e818ec8
Show file tree
Hide file tree
Showing 10 changed files with 2,945 additions and 4,398 deletions.
6,826 changes: 2,435 additions & 4,391 deletions package-lock.json

Large diffs are not rendered by default.

10 changes: 9 additions & 1 deletion package.json
Original file line number Diff line number Diff line change
Expand Up @@ -51,7 +51,9 @@
"test": "node --expose-gc node_modules/nx/bin/nx run-many --all --target=test",
"fix-asyncify": "node packages/php-wasm/node/bin/rebuild-while-asyncify-functions-missing.mjs",
"typecheck": "nx run-many --all --target=typecheck",
"prepare": "husky install"
"prepare": "husky install",
"postinstall": "npm run clone-isomorphic-git",
"clone-isomorphic-git": "if [ -d \"node_modules/isomorphic-git\" ]; then echo 'Already cloned'; else git clone https://github.com/isomorphic-git/isomorphic-git.git node_modules/isomorphic-git; cd node_modules/isomorphic-git && git checkout bbcdda7d && cd ../../; fi"
},
"private": true,
"dependencies": {
Expand All @@ -61,20 +63,26 @@
"@types/react-transition-group": "4.4.11",
"@types/wicg-file-system-access": "2023.10.5",
"ajv": "8.12.0",
"async-lock": "1.4.1",
"axios": "1.6.1",
"classnames": "^2.3.2",
"comlink": "^4.4.1",
"crc-32": "1.2.2",
"diff3": "0.0.4",
"express": "4.19.2",
"file-saver": "^2.0.5",
"fs-extra": "11.1.1",
"ini": "4.1.2",
"octokit": "3.1.1",
"octokit-plugin-create-pull-request": "5.1.1",
"pako": "1.0.10",
"react": "^18.2.25",
"react-dom": "^18.2.25",
"react-modal": "^3.16.1",
"react-redux": "8.1.3",
"react-transition-group": "4.4.5",
"sha.js": "2.4.11",
"sha1": "1.1.1",
"unzipper": "0.10.11",
"vite-plugin-api": "1.0.4",
"xterm": "5.3.0",
Expand Down
27 changes: 27 additions & 0 deletions packages/playground/storage/src/lib/git-sparse-checkout.spec.ts
Original file line number Diff line number Diff line change
@@ -0,0 +1,27 @@
import { lsRefs, sparseCheckout } from './git-sparse-checkout';

describe('lsRefs', () => {
it('should return the latest commit hash for a given ref', async () => {
const refs = await lsRefs(
'https://github.com/WordPress/wordpress-playground',
'refs/heads/trunk'
);
expect(refs).toEqual({
'refs/heads/trunk': expect.stringMatching(/^[a-f0-9]{40}$/),
});
});
});

describe('sparseCheckout', () => {
it('should retrieve the requested files from a git repo', async () => {
const files = await sparseCheckout(
'https://github.com/WordPress/wordpress-playground.git',
'refs/heads/trunk',
['README.md']
);
expect(files).toEqual({
'README.md': expect.any(Uint8Array),
});
expect(files['README.md'].length).toBeGreaterThan(0);
});
});
308 changes: 308 additions & 0 deletions packages/playground/storage/src/lib/git-sparse-checkout.ts
Original file line number Diff line number Diff line change
@@ -0,0 +1,308 @@
/*
* Import internal data parsers and structures from isomorphic-git. These
* exports are not available in the npm version of isomorphic-git, which is why
* we use one from the git repository.
*
* This file heavily relies on isomorphic-git internals to parse Git data formats
* such as PACK, trees, deltas, etc.
*/
import { GitPktLine } from 'isomorphic-git/src/models/GitPktLine.js';
import { GitTree } from 'isomorphic-git/src/models/GitTree.js';
import { GitAnnotatedTag } from 'isomorphic-git/src/models/GitAnnotatedTag.js';
import { GitCommit } from 'isomorphic-git/src/models/GitCommit.js';
import { GitPackIndex } from 'isomorphic-git/src/models/GitPackIndex.js';
import { collect } from 'isomorphic-git/src/internal-apis.js';
import { parseUploadPackResponse } from 'isomorphic-git/src/wire/parseUploadPackResponse.js';
import { ObjectTypeError } from 'isomorphic-git/src/errors/ObjectTypeError.js';
import { Buffer } from 'buffer';

/**
* A polyfill for the Buffer class. We need it because isomorphic-git uses it internally.
* The isomorphic-git version released via npm shipes a Buffer implementation, but we're
* using a version cloned from the git repository which assumes a global Buffer is available.
*/
if (typeof window !== 'undefined') {
window.Buffer = Buffer;
}

/**
* Downloads specific files from a git repository.
* It uses the git protocol over HTTP to fetch the files. It only uses
* three HTTP requests regardless of the number of paths requested.
*
* @param repoUrl The URL of the git repository.
* @param fullyQualifiedBranchName The full name of the branch to fetch from (e.g., 'refs/heads/main').
* @param filesPaths An array of all the file paths to fetch from the repository. Does **not** accept
* patterns, wildcards, directory paths. All files must be explicitly listed.
* @returns A record where keys are file paths and values are the retrieved file contents.
*/
export async function sparseCheckout(
repoUrl: string,
fullyQualifiedBranchName: string,
filesPaths: string[]
) {
const refs = await lsRefs(repoUrl, fullyQualifiedBranchName);
const commitHash = refs[fullyQualifiedBranchName];
const treesIdx = await fetchWithoutBlobs(repoUrl, commitHash);
const objects = await resolveObjects(treesIdx, commitHash, filesPaths);

const blobsIdx = await fetchObjects(
repoUrl,
filesPaths.map((path) => objects[path].oid)
);

const fetchedPaths: Record<string, any> = {};
await Promise.all(
filesPaths.map(async (path) => {
fetchedPaths[path] = await extractGitObjectFromIdx(
blobsIdx,
objects[path].oid
);
})
);
return fetchedPaths;
}

/**
* Retrieves a list of files on matching git branches.
*
* @param repoUrl The URL of the git repository. For example: https://github.com/WordPress/gutenberg.git
* @param fullyQualifiedBranchPrefix The prefix of the branch names to fetch. For example: refs/heads/my-feature-branch
* @returns A map of branch names to their corresponding commit hashes.
*/
export async function lsRefs(
repoUrl: string,
fullyQualifiedBranchPrefix: string
) {
const packbuffer = Buffer.from(
await collect([
GitPktLine.encode(`command=ls-refs\n`),
GitPktLine.encode(`agent=git/2.37.3\n`),
GitPktLine.encode(`object-format=sha1\n`),
GitPktLine.delim(),
GitPktLine.encode(`peel\n`),
GitPktLine.encode(`ref-prefix ${fullyQualifiedBranchPrefix}\n`),
GitPktLine.flush(),
])
);

const response = await fetch(repoUrl + '/git-upload-pack', {
method: 'POST',
headers: {
Accept: 'application/x-git-upload-pack-advertisement',
'content-type': 'application/x-git-upload-pack-request',
'Content-Length': `${packbuffer.length}`,
'Git-Protocol': 'version=2',
},
body: packbuffer,
});

const refs: Record<string, string> = {};
for await (const line of parseGitResponseLines(response)) {
const spaceAt = line.indexOf(' ');
const ref = line.slice(0, spaceAt);
const name = line.slice(spaceAt + 1, line.length - 1);
refs[name] = ref;
}
return refs;
}

async function fetchWithoutBlobs(repoUrl: string, commitHash: string) {
const packbuffer = Buffer.from(
await collect([
GitPktLine.encode(
`want ${commitHash} multi_ack_detailed no-done side-band-64k thin-pack ofs-delta agent=git/2.37.3 filter \n`
),
GitPktLine.encode(`filter blob:none\n`),
GitPktLine.encode(`shallow ${commitHash}\n`),
GitPktLine.encode(`deepen 1\n`),
GitPktLine.flush(),
GitPktLine.encode(`done\n`),
GitPktLine.encode(`done\n`),
])
);

const response = await fetch(repoUrl + '/git-upload-pack', {
method: 'POST',
headers: {
Accept: 'application/x-git-upload-pack-advertisement',
'content-type': 'application/x-git-upload-pack-request',
'Content-Length': `${packbuffer.length}`,
},
body: packbuffer,
});

const iterator = streamToIterator(response.body!);
const parsed = await parseUploadPackResponse(iterator);
const packfile = Buffer.from(await collect(parsed.packfile));
const idx = await GitPackIndex.fromPack({
pack: packfile,
});
const originalRead = idx.read as any;
idx.read = async function ({ oid, ...rest }: { oid: string }) {
const result = await originalRead.call(this, { oid, ...rest });
result.oid = oid;
return result;
};
return idx;
}

async function resolveObjects(
idx: GitPackIndex,
commitHash: string,
paths: string[]
) {
const commit = await idx.read({
oid: commitHash,
});
readObject(commit);

const rootTree = await idx.read({ oid: commit.object.tree });
readObject(rootTree);

// Resolve refs to fetch
const resolvedOids: Record<string, any> = {};
for (const path of paths) {
let currentObject = rootTree;
const segments = path.split('/');
for (const segment of segments) {
if (currentObject.type !== 'tree') {
throw new Error(`Path not found in the repo: ${path}`);
}

let found = false;
for (const item of currentObject.object) {
if (item.path === segment) {
try {
currentObject = await idx.read({ oid: item.oid });
readObject(currentObject);
} catch (e) {
currentObject = item;
}
found = true;
break;
}
}
if (!found) {
throw new Error(`Path not found in the repo: ${path}`);
}
}
resolvedOids[path] = currentObject;
}
return resolvedOids;
}

// Request oid for each resolvedRef
async function fetchObjects(url: string, objectHashes: string[]) {
const packbuffer = Buffer.from(
await collect([
...objectHashes.map((objectHash) =>
GitPktLine.encode(
`want ${objectHash} multi_ack_detailed no-done side-band-64k thin-pack ofs-delta agent=git/2.37.3 \n`
)
),
GitPktLine.flush(),
GitPktLine.encode(`done\n`),
])
);

const response = await fetch(url + '/git-upload-pack', {
method: 'POST',
headers: {
Accept: 'application/x-git-upload-pack-advertisement',
'content-type': 'application/x-git-upload-pack-request',
'Content-Length': `${packbuffer.length}`,
},
body: packbuffer,
});

const iterator = streamToIterator(response.body!);
const parsed = await parseUploadPackResponse(iterator);
const packfile = Buffer.from(await collect(parsed.packfile));
return await GitPackIndex.fromPack({
pack: packfile,
});
}

async function extractGitObjectFromIdx(idx: GitPackIndex, objectHash: string) {
const tree = await idx.read({ oid: objectHash });
readObject(tree);

if (tree.type === 'blob') {
return tree.object;
}

const files: Record<string, any> = {};
for (const { path, oid, type } of tree.object) {
if (type === 'blob') {
const object = await idx.read({ oid });
readObject(object);
files[path] = object.object;
} else if (type === 'tree') {
files[path] = await extractGitObjectFromIdx(idx, oid);
}
}
return files;
}

function readObject(result: any) {
if (!(result.object instanceof Buffer)) {
return;
}
switch (result.type) {
case 'commit':
result.object = GitCommit.from(result.object).parse();
break;
case 'tree':
result.object = GitTree.from(result.object).entries();
break;
case 'blob':
result.object = new Uint8Array(result.object);
result.format = 'content';
break;
case 'tag':
result.object = GitAnnotatedTag.from(result.object).parse();
break;
default:
throw new ObjectTypeError(
result.oid,
result.type,
'blob|commit|tag|tree'
);
}
}

async function* parseGitResponseLines(response: Response) {
const text = await response.text();
let at = 0;

while (at <= text.length) {
const lineLength = parseInt(text.substring(at, at + 4), 16);
if (lineLength === 0) {
break;
}
const line = text.substring(at + 4, at + lineLength);
yield line;
at += lineLength;
}
}

function streamToIterator(stream: any) {
// Use native async iteration if it's available.
if (stream[Symbol.asyncIterator]) {
return stream;
}
const reader = stream.getReader();
return {
next() {
return reader.read();
},
return() {
reader.releaseLock();
return {};
},
[Symbol.asyncIterator]() {
return this;
},
};
}
2 changes: 1 addition & 1 deletion packages/playground/storage/src/vitest-setup-file.ts
Original file line number Diff line number Diff line change
Expand Up @@ -4,4 +4,4 @@
*
* @see tests.setupFiles in vite.config.ts
*/
import '@php-wasm/node-polyfills';
// import '@php-wasm/node-polyfills';
3 changes: 2 additions & 1 deletion packages/playground/storage/tsconfig.lib.json
Original file line number Diff line number Diff line change
Expand Up @@ -8,7 +8,8 @@
"vite/client",
"@nx/react/typings/cssmodule.d.ts",
"@nx/react/typings/image.d.ts",
"@types/wicg-file-system-access"
"@types/wicg-file-system-access",
"../../../types/isomorphic-git.d.ts"
]
},
"include": ["src/**/*.ts"],
Expand Down
Loading

0 comments on commit e818ec8

Please sign in to comment.