feat(attachments): setup Garbage Collector for attachments module

This commit is contained in:
Matthieu Haineault 2025-08-25 16:42:05 -04:00
parent fe32081ed9
commit 35e2e38811
2 changed files with 88 additions and 2 deletions

View File

@ -2,10 +2,18 @@ import { ScheduleModule } from "@nestjs/schedule";
import { PrismaService } from "src/prisma/prisma.service";
import { ArchivalAttachmentService } from "./services/archival-attachment.service";
import { Module } from "@nestjs/common";
import { GarbargeCollectorService } from "./services/garbage-collector.service";
@Module({
imports: [ScheduleModule.forRoot()],
providers: [PrismaService, ArchivalAttachmentService],
exports: [ArchivalAttachmentService],
providers: [
PrismaService,
ArchivalAttachmentService,
GarbargeCollectorService,
],
exports: [
ArchivalAttachmentService,
GarbargeCollectorService
],
})
export class ArchivalAttachmentModule {}

View File

@ -0,0 +1,78 @@
import { Injectable, Logger } from "@nestjs/common";
import { Cron } from "@nestjs/schedule";
import { PrismaService } from 'src/prisma/prisma.service';
import * as path from 'node:path';
import { promises as fsp } from 'node:fs';
import { resolveAttachmentsRoot } from "src/config/attachment.config";
@Injectable()
export class GarbargeCollectorService {
private readonly logger = new Logger(GarbargeCollectorService.name);
//.env refs
private readonly batch_size = Number(process.env.GC_BATCH_SIZE || 500);
private readonly cron_expression = process.env.GC_CRON || '15 4 * * *'; // everyday at 04:15 AM
//fetchs root of storage
private readonly root = resolveAttachmentsRoot();
constructor(private readonly prisma: PrismaService) {}
//planif for the Cronjob
@Cron(function(this:GarbargeCollectorService) { return this.cron_expression; } as any)
async runScheduled() {
await this.collect();
}
//Manage Garbage collecting by batch of elements until a batch != full
async collect() {
let total = 0, round = 0;
//infinit loop (;;) with break
for(;;) {
round++;
const num = await this.collectBatch();
total += num;
this.logger.log(`Garbage Collector round #${round} removed ${num}`);
if(num < this.batch_size) break; //breaks if not a full batch
}
this.logger.log(`Garbage Collecting done: total removed ${total}`);
return { removed:total };
}
//Manage a single lot of orphan blobs
private async collectBatch(): Promise<number> {
const blobs = await this.prisma.blobs.findMany({
where: { refcount: { lte: 0 } },
select: { sha256: true, storage_path: true },
take: this.batch_size,
});
if(blobs.length === 0) return 0;
// delete original file and all its variants <hash> in the same file
await Promise.all(
blobs.map(async (blob)=> {
const absolute_path = path.join(this.root, blob.storage_path);
await this.deleteFileIfExists(absolute_path); //tries to delete original file if found
const dir = path.dirname(absolute_path);
const base = path.basename(absolute_path);
try {
const entries = await fsp.readdir(dir, { withFileTypes: true});
const targets = entries.filter(entry => entry.isFile() && entry.name.startsWith(base + '.'))
.map(entry => path.join(dir, entry.name));
//deletes all variants
await Promise.all(targets.map(target => this.deleteFileIfExists(target)));
} catch {}
})
);
//deletes blobs lignes if file is deleted
const hashes = blobs.map(blob => blob.sha256);
await this.prisma.blobs.deleteMany({where: { sha256: { in: hashes } } });
return blobs.length;
}
//helper: deletes path if exists and ignore errors
private async deleteFileIfExists(path: string) {
try { await fsp.unlink(path); } catch {}
}
}