Skip to content

Commit 2fb8ea0

Browse files
Fix: Streaming packfile write/read (#79)
* add new API for FsClient to support streamable writes * write packfile directly into file, without collecting data into memory first * optimize code for disk reading for packfile instead of loading everything into memory * check out files in parallel in batches * increase batch size on checkout to 10 * improve FileSystemAccessApiFsClient.rename function
1 parent 3e36d28 commit 2fb8ea0

10 files changed

Lines changed: 567 additions & 88 deletions

File tree

src/clients/fs/FileSystemAccessApiFsClient.ts

Lines changed: 78 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -10,7 +10,8 @@ import {
1010
EncodingOptions,
1111
FsClient,
1212
RmOptions,
13-
StatsLike
13+
StatsLike,
14+
WritableStreamHandle
1415
} from '../../'
1516

1617
import { BasicStats } from './BasicStats'
@@ -249,9 +250,7 @@ export class FileSystemAccessApiFsClient implements FsClient {
249250

250251
const oldFilepathStat = await this.stat(oldPath)
251252
if (oldFilepathStat.isFile()) {
252-
const data = await this.readFile(oldPath)
253-
await this.writeFile(newPath, data)
254-
await this.rm(oldPath)
253+
await this.renameFile(oldPath, newPath)
255254
} else if (oldFilepathStat.isDirectory()) {
256255
await this.mkdir(newPath)
257256
const sourceFolder = await this.getDirectoryByPath(oldPath)
@@ -263,6 +262,40 @@ export class FileSystemAccessApiFsClient implements FsClient {
263262
}
264263
}
265264

265+
private async renameFile(oldPath: string, newPath: string): Promise<void> {
266+
const { folderPath: oldFolder, leafSegment: oldName } = this.getFolderPathAndLeafSegment(oldPath)
267+
const { folderPath: newFolder, leafSegment: newName } = this.getFolderPathAndLeafSegment(newPath)
268+
269+
const oldDir = await this.getDirectoryByPath(oldFolder)
270+
const fileHandle = await this.getEntry<'file'>(oldDir, oldName, 'file')
271+
if (!fileHandle) {
272+
throw new ENOENT(oldPath)
273+
}
274+
275+
// Strategy 1: Native move() — zero-copy rename, supported in Chrome and Safari OPFS.
276+
// Always pass (directory, newName) form — Safari doesn't support the move(newName) shorthand.
277+
if (typeof fileHandle.move === 'function') {
278+
const newDir = oldFolder === newFolder ? oldDir : await this.getDirectoryByPath(newFolder)
279+
await fileHandle.move(newDir, newName)
280+
return
281+
}
282+
283+
// Strategy 2: Streaming copy — read in chunks, write via stream. Never loads entire file.
284+
const CHUNK_SIZE = 1024 * 1024
285+
const file = await fileHandle.getFile()
286+
const writable = await this.createWritableStream(newPath)
287+
let offset = 0
288+
while (offset < file.size) {
289+
const end = Math.min(offset + CHUNK_SIZE, file.size)
290+
const blob = file.slice(offset, end)
291+
const chunk = new Uint8Array(await blob.arrayBuffer())
292+
await writable.write(chunk)
293+
offset = end
294+
}
295+
await writable.close()
296+
await this.rm(oldPath)
297+
}
298+
266299
/**
267300
* Symlinks are not supported in the current implementation.
268301
* @throws Error: symlinks are not supported.
@@ -279,6 +312,44 @@ export class FileSystemAccessApiFsClient implements FsClient {
279312
throw new Error('Symlinks are not supported.')
280313
}
281314

315+
public async createWritableStream(path: string): Promise<WritableStreamHandle> {
316+
const { folderPath, leafSegment } = this.getFolderPathAndLeafSegment(path)
317+
const targetDir = await this.getDirectoryByPath(folderPath)
318+
319+
const fileHandle = await targetDir.getFileHandle(leafSegment, { create: true })
320+
const writable = await fileHandle.createWritable()
321+
322+
return {
323+
write: async (data: Uint8Array) => {
324+
// FileSystemWritableFileStream.write() may write the entire underlying
325+
// ArrayBuffer instead of just the TypedArray view when byteOffset > 0.
326+
// This happens with Buffer.slice() which shares the backing memory.
327+
// Create a clean copy when the view doesn't cover the full buffer.
328+
if (data.byteOffset !== 0 || data.buffer.byteLength !== data.byteLength) {
329+
data = new Uint8Array(data)
330+
}
331+
await writable.write(data)
332+
},
333+
close: async () => {
334+
await writable.close()
335+
}
336+
}
337+
}
338+
339+
public async readFileSlice(path: string, start: number, end: number): Promise<Uint8Array> {
340+
const { folderPath, leafSegment } = this.getFolderPathAndLeafSegment(path)
341+
const targetDir = await this.getDirectoryByPath(folderPath)
342+
343+
const fileHandle = await this.getEntry<'file'>(targetDir, leafSegment, 'file')
344+
if (!fileHandle) {
345+
throw new ENOENT(path)
346+
}
347+
348+
const file = await fileHandle.getFile()
349+
const blob = file.slice(start, end)
350+
return new Uint8Array(await blob.arrayBuffer())
351+
}
352+
282353
/**
283354
* Return true if a entry exists, false if it doesn't exist.
284355
* Rethrows errors that aren't related to entry existance.
@@ -388,13 +459,14 @@ export class FileSystemAccessApiFsClient implements FsClient {
388459

389460
if (this.options.useSyncAccessHandle) {
390461
const accessHandle = await fileHandle.createSyncAccessHandle()
391-
const dataArray = typeof data === 'string' ? this.textEncoder.encode(data) : data
462+
const dataArray = typeof data === 'string' ? this.textEncoder.encode(data) : new Uint8Array(data)
392463
accessHandle.write(dataArray.buffer as ArrayBuffer, { at: 0 })
393464
await accessHandle.flush()
394465
await accessHandle.close()
395466
} else {
396467
const writable = await fileHandle.createWritable()
397-
await writable.write(typeof data === 'string' ? data : data.buffer as ArrayBuffer)
468+
const writeData = typeof data === 'string' ? data : new Uint8Array(data)
469+
await writable.write(writeData)
398470
await writable.close()
399471
}
400472
}, 'writeFile', name)

src/commands/checkout.ts

Lines changed: 64 additions & 61 deletions
Original file line numberDiff line numberDiff line change
@@ -219,71 +219,74 @@ export async function _checkout({
219219
)
220220

221221
await GitIndexManager.acquire({ fs, gitdir, cache }, async function(index) {
222-
await Promise.all(
223-
ops
224-
.filter(
225-
([method]) =>
226-
method === 'create' ||
227-
method === 'create-index' ||
228-
method === 'update' ||
229-
method === 'mkdir-index'
230-
)
231-
.map(async function([method, fullpath, oid, mode, chmod]) {
232-
const modeNum = Number(mode)
233-
const filepath = `${dir}/${fullpath}`
234-
try {
235-
if (method !== 'create-index' && method !== 'mkdir-index') {
236-
const { object } = await readObject({ fs, cache, gitdir, oid })
237-
if (chmod) {
238-
// Note: the mode option of fs.write only works when creating files,
239-
// not updating them. Since the `fs` plugin doesn't expose `chmod` this
240-
// is our only option.
241-
await fs.rm(filepath)
242-
}
243-
if (modeNum === 0o100644) {
244-
// regular file
245-
await fs.write(filepath, object)
246-
} else if (modeNum === 0o100755) {
247-
// executable file
248-
await fs.write(filepath, object, { mode: 0o777 })
249-
} else if (modeNum === 0o120000) {
250-
// symlink
251-
await fs.writelink(filepath, object)
252-
} else {
253-
throw new InternalError(
254-
`Invalid mode 0o${modeNum.toString(8)} detected in blob ${oid}`
255-
)
256-
}
257-
}
258-
259-
const stats = (await fs.lstat(filepath))!
260-
// We can't trust the executable bit returned by lstat on Windows,
261-
// so we need to preserve this value from the TREE.
262-
// TODO: Figure out how git handles this internally.
263-
if (modeNum === 0o100755) {
264-
stats.mode = 0o755
222+
const writeOps = ops.filter(
223+
([method]) =>
224+
method === 'create' ||
225+
method === 'create-index' ||
226+
method === 'update' ||
227+
method === 'mkdir-index'
228+
)
229+
// Process files in small batches to balance I/O concurrency with memory usage.
230+
// Full Promise.all would OOM on large packfiles; purely sequential loses I/O overlap.
231+
const BATCH_SIZE = 10
232+
for (let i = 0; i < writeOps.length; i += BATCH_SIZE) {
233+
const batch = writeOps.slice(i, i + BATCH_SIZE)
234+
await Promise.all(batch.map(async ([method, fullpath, oid, mode, chmod]) => {
235+
const modeNum = Number(mode)
236+
const filepath = `${dir}/${fullpath}`
237+
try {
238+
if (method !== 'create-index' && method !== 'mkdir-index') {
239+
const { object } = await readObject({ fs, cache, gitdir, oid })
240+
if (chmod) {
241+
// Note: the mode option of fs.write only works when creating files,
242+
// not updating them. Since the `fs` plugin doesn't expose `chmod` this
243+
// is our only option.
244+
await fs.rm(filepath)
265245
}
266-
// Submodules are present in the git index but use a unique mode different from trees
267-
if (method === 'mkdir-index') {
268-
stats.mode = 0o160000
246+
if (modeNum === 0o100644) {
247+
// regular file
248+
await fs.write(filepath, object)
249+
} else if (modeNum === 0o100755) {
250+
// executable file
251+
await fs.write(filepath, object, { mode: 0o777 })
252+
} else if (modeNum === 0o120000) {
253+
// symlink
254+
await fs.writelink(filepath, object)
255+
} else {
256+
throw new InternalError(
257+
`Invalid mode 0o${modeNum.toString(8)} detected in blob ${oid}`
258+
)
269259
}
270-
index.insert({
271-
filepath: fullpath,
272-
stats,
273-
oid,
260+
}
261+
262+
const stats = (await fs.lstat(filepath))!
263+
// We can't trust the executable bit returned by lstat on Windows,
264+
// so we need to preserve this value from the TREE.
265+
// TODO: Figure out how git handles this internally.
266+
if (modeNum === 0o100755) {
267+
stats.mode = 0o755
268+
}
269+
// Submodules are present in the git index but use a unique mode different from trees
270+
if (method === 'mkdir-index') {
271+
stats.mode = 0o160000
272+
}
273+
index.insert({
274+
filepath: fullpath,
275+
stats,
276+
oid,
277+
})
278+
if (onProgress) {
279+
await onProgress({
280+
phase: 'Updating workdir',
281+
loaded: ++count,
282+
total,
274283
})
275-
if (onProgress) {
276-
await onProgress({
277-
phase: 'Updating workdir',
278-
loaded: ++count,
279-
total,
280-
})
281-
}
282-
} catch (e) {
283-
console.log(e)
284284
}
285-
})
286-
)
285+
} catch (e) {
286+
console.log(e)
287+
}
288+
}))
289+
}
287290
})
288291
}
289292

0 commit comments

Comments
 (0)