Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
130 changes: 113 additions & 17 deletions apps/sim/background/cleanup-logs.ts
Original file line number Diff line number Diff line change
@@ -1,14 +1,15 @@
import { db } from '@sim/db'
import { jobExecutionLogs, workflowExecutionLogs } from '@sim/db/schema'
import { jobExecutionLogs, pausedExecutions, workflowExecutionLogs } from '@sim/db/schema'
import { createLogger } from '@sim/logger'
import { task } from '@trigger.dev/sdk'
import { and, inArray, lt } from 'drizzle-orm'
import { and, eq, inArray, isNull, lt, notInArray, or, sql } from 'drizzle-orm'
import { type CleanupJobPayload, resolveCleanupScope } from '@/lib/billing/cleanup-dispatcher'
import {
batchDeleteByWorkspaceAndTimestamp,
chunkedBatchDelete,
type TableCleanupResult,
} from '@/lib/cleanup/batch-delete'
import { collectLargeValueKeys } from '@/lib/execution/payloads/large-execution-value'
import { snapshotService } from '@/lib/logs/execution/snapshot/service'
import { isUsingCloudStorage, StorageService } from '@/lib/uploads'
import { deleteFileMetadata } from '@/lib/uploads/server/metadata'
Expand All @@ -19,6 +20,38 @@ interface FileDeleteStats {
filesTotal: number
filesDeleted: number
filesDeleteFailed: number
largeValuesTotal: number
largeValuesDeleted: number
largeValuesDeleteFailed: number
}

const RESUMABLE_PAUSED_STATUSES = ['paused', 'partially_resumed', 'cancelling']

async function filterLargeValueKeysWithoutRetainedReferences(
keys: string[],
deletedLogIds: string[]
): Promise<string[]> {
if (keys.length === 0 || deletedLogIds.length === 0) return []

const unreferencedKeys: string[] = []
for (const key of Array.from(new Set(keys))) {
const [referencingLog] = await db
.select({ id: workflowExecutionLogs.id })
.from(workflowExecutionLogs)
.where(
and(
notInArray(workflowExecutionLogs.id, deletedLogIds),
sql`position(${key} in ${workflowExecutionLogs.executionData}::text) > 0`
)
)
.limit(1)

if (!referencingLog) {
unreferencedKeys.push(key)
}
}

return unreferencedKeys
}

async function deleteExecutionFiles(files: unknown, stats: FileDeleteStats): Promise<void> {
Expand All @@ -41,36 +74,103 @@ async function deleteExecutionFiles(files: unknown, stats: FileDeleteStats): Pro
)
}

async function deleteLargeValueStorageKeys(keys: string[], stats: FileDeleteStats): Promise<void> {
if (!isUsingCloudStorage() || keys.length === 0) return

const uniqueKeys = Array.from(new Set(keys))
stats.largeValuesTotal += uniqueKeys.length

await Promise.all(
uniqueKeys.map(async (key) => {
try {
await StorageService.deleteFile({ key, context: 'execution' })
await deleteFileMetadata(key)
stats.largeValuesDeleted++
} catch (error) {
stats.largeValuesDeleteFailed++
logger.error(`Failed to delete large execution value ${key}:`, { error })
}
})
)
}

async function cleanupWorkflowExecutionLogs(
workspaceIds: string[],
retentionDate: Date,
label: string
): Promise<TableCleanupResult & FileDeleteStats> {
const fileStats: FileDeleteStats = { filesTotal: 0, filesDeleted: 0, filesDeleteFailed: 0 }
const fileStats: FileDeleteStats = {
filesTotal: 0,
filesDeleted: 0,
filesDeleteFailed: 0,
largeValuesTotal: 0,
largeValuesDeleted: 0,
largeValuesDeleteFailed: 0,
}

const dbStats = await chunkedBatchDelete({
tableDef: workflowExecutionLogs,
workspaceIds,
tableName: `${label}/workflow_execution_logs`,
selectChunk: (chunkIds, limit) =>
db
.select({ id: workflowExecutionLogs.id, files: workflowExecutionLogs.files })
.select({
id: workflowExecutionLogs.id,
executionId: workflowExecutionLogs.executionId,
executionData: workflowExecutionLogs.executionData,
files: workflowExecutionLogs.files,
})
.from(workflowExecutionLogs)
.leftJoin(
pausedExecutions,
eq(pausedExecutions.executionId, workflowExecutionLogs.executionId)
)
.where(
and(
inArray(workflowExecutionLogs.workspaceId, chunkIds),
lt(workflowExecutionLogs.startedAt, retentionDate)
lt(workflowExecutionLogs.startedAt, retentionDate),
or(
isNull(pausedExecutions.status),
notInArray(pausedExecutions.status, RESUMABLE_PAUSED_STATUSES)
)
Comment thread
icecrasher321 marked this conversation as resolved.
)
)
Comment thread
icecrasher321 marked this conversation as resolved.
.limit(limit),
onBatch: async (rows) => {
for (const row of rows) await deleteExecutionFiles(row.files, fileStats)
const deletedLogIds = rows.map((row) => row.id)
const largeValueKeys = rows.flatMap((row) => collectLargeValueKeys(row.executionData))
const unreferencedLargeValueKeys = await filterLargeValueKeysWithoutRetainedReferences(
largeValueKeys,
deletedLogIds
)

for (const row of rows) {
await deleteExecutionFiles(row.files, fileStats)
}
await deleteLargeValueStorageKeys(unreferencedLargeValueKeys, fileStats)
},
})

return { ...dbStats, ...fileStats }
}

async function cleanupFreePlanOrphanedSnapshots(
payload: CleanupJobPayload,
retentionHours: number
): Promise<void> {
if (payload.plan !== 'free') {
return
}

try {
const retentionDays = Math.floor(retentionHours / 24)
const snapshotsCleaned = await snapshotService.cleanupOrphanedSnapshots(retentionDays + 1)
logger.info(`Cleaned up ${snapshotsCleaned} orphaned snapshots`)
} catch (snapshotError) {
logger.error('Error cleaning up orphaned snapshots:', { snapshotError })
}
}

export async function runCleanupLogs(payload: CleanupJobPayload): Promise<void> {
const startTime = Date.now()

Expand All @@ -82,12 +182,14 @@ export async function runCleanupLogs(payload: CleanupJobPayload): Promise<void>

const { workspaceIds, retentionHours, label } = scope

const retentionDate = new Date(Date.now() - retentionHours * 60 * 60 * 1000)

if (workspaceIds.length === 0) {
logger.info(`[${label}] No workspaces to process`)
await cleanupFreePlanOrphanedSnapshots(payload, retentionHours)
return
}

const retentionDate = new Date(Date.now() - retentionHours * 60 * 60 * 1000)
logger.info(
`[${label}] Cleaning ${workspaceIds.length} workspaces, cutoff: ${retentionDate.toISOString()}`
)
Expand All @@ -96,6 +198,9 @@ export async function runCleanupLogs(payload: CleanupJobPayload): Promise<void>
logger.info(
`[${label}] workflow_execution_logs files: ${workflowResults.filesDeleted}/${workflowResults.filesTotal} deleted, ${workflowResults.filesDeleteFailed} failed`
)
logger.info(
`[${label}] workflow_execution_logs large values: ${workflowResults.largeValuesDeleted}/${workflowResults.largeValuesTotal} deleted, ${workflowResults.largeValuesDeleteFailed} failed`
)
Comment thread
icecrasher321 marked this conversation as resolved.

await batchDeleteByWorkspaceAndTimestamp({
tableDef: jobExecutionLogs,
Expand All @@ -106,16 +211,7 @@ export async function runCleanupLogs(payload: CleanupJobPayload): Promise<void>
tableName: `${label}/job_execution_logs`,
})

// Snapshot cleanup runs only on the free job to avoid running it N times for N enterprise workspaces.
if (payload.plan === 'free') {
try {
const retentionDays = Math.floor(retentionHours / 24)
const snapshotsCleaned = await snapshotService.cleanupOrphanedSnapshots(retentionDays + 1)
logger.info(`Cleaned up ${snapshotsCleaned} orphaned snapshots`)
} catch (snapshotError) {
logger.error('Error cleaning up orphaned snapshots:', { snapshotError })
}
}
await cleanupFreePlanOrphanedSnapshots(payload, retentionHours)

const timeElapsed = (Date.now() - startTime) / 1000
logger.info(`[${label}] Job completed in ${timeElapsed.toFixed(2)}s`)
Expand Down
Loading
Loading