feat(campaigns/send): real SMTP error + auto-retry + one-click Renvoyer

The send worker used to write "SMTP send returned false (see hub logs)"
on every failure, forcing the operator to SSH into the box to find the
actual cause. Now we capture the real reason and surface it in the UI.

Three changes:

1. lib/email.js exposes getLastError() — a side-channel for the most
   recent nodemailer error message, cleared at the start of every
   sendEmail call. Legacy "if (await sendEmail(...))" callers stay on
   the false-return contract; only the campaign worker reads the
   side-channel for detailed error capture.

2. The worker now retries each recipient up to 3 times (initial +
   2 retries with 2s/5s backoff). Most "Unexpected socket close"-style
   transient Mailjet errors recover on the second attempt. We observed
   exactly this case for Myriam Bergevin in cmp-20260522-2d4605 — a
   single socket close interrupted 1 of 202 sends; auto-retry would
   have caught it. retry_count is now stored on the recipient.

3. POST /campaigns/:id/recipients/:row/retry resets a single failed
   row back to pending and re-fires the worker. Surfaced in the
   detail-page table as a small 🔁 button next to the error text on
   any row with status=failed. Useful when auto-retry exhausted its
   3 attempts on a one-off transient.

🤖 Generated with [Claude Code](https://claude.com/claude-code)

Co-Authored-By: Claude Opus 4.7 <noreply@anthropic.com>
This commit is contained in:
louispaulb 2026-05-22 13:29:25 -04:00
parent d5ee57acf2
commit 6577bb79bc
4 changed files with 109 additions and 23 deletions

View File

@ -94,6 +94,16 @@ export function revokeGift (campaignId, rowIndex) {
)
}
// Re-attempt a single failed recipient — resets status pending and
// fires the worker. Used for one-off failures the auto-retry didn't
// recover (rare transient Mailjet socket closes, etc.).
export function retryRecipient (campaignId, rowIndex) {
return hubFetch(
`/campaigns/${encodeURIComponent(campaignId)}/recipients/${rowIndex}/retry`,
{ method: 'POST' },
)
}
// Build the URL the browser hits to download the per-recipient CSV report
// (giftbit shortlink ↔ email ↔ name ↔ status). The hub serves it with the
// proper Content-Disposition so an <a download> click triggers a save.

View File

@ -105,7 +105,13 @@
</template>
<template v-slot:body-cell-error="props">
<q-td :props="props">
<span v-if="props.row.error" class="text-negative text-caption">{{ props.row.error }}</span>
<div v-if="props.row.error" class="row items-center q-gutter-xs">
<span class="text-negative text-caption" style="max-width:240px;word-break:break-word">{{ props.row.error }}</span>
<q-btn v-if="props.row.status === 'failed'" flat dense size="xs" color="primary"
icon="refresh" @click="retryFailedRow(props.row)">
<q-tooltip>Renvoyer ce destinataire (réessaie l'envoi SMTP avec 3 tentatives)</q-tooltip>
</q-btn>
</div>
</q-td>
</template>
</q-table>
@ -121,7 +127,7 @@
import { ref, computed, onMounted, onBeforeUnmount } from 'vue'
import { useRoute } from 'vue-router'
import { useQuasar } from 'quasar'
import { getCampaign, sendCampaign, campaignSseUrl, campaignReportCsvUrl } from 'src/api/campaigns'
import { getCampaign, sendCampaign, campaignSseUrl, campaignReportCsvUrl, retryRecipient } from 'src/api/campaigns'
const route = useRoute()
const $q = useQuasar()
@ -224,6 +230,20 @@ async function relaunch () {
}
}
// Re-send a single failed recipient. The hub resets status pending and
// re-fires the worker which retries up to 3 times with backoff. UI live
// updates via the SSE channel no manual reload needed.
async function retryFailedRow (row) {
const rowIdx = (campaign.value?.recipients || []).indexOf(row)
if (rowIdx < 0) return
try {
await retryRecipient(id, rowIdx)
$q.notify({ type: 'positive', message: `Renvoi en cours pour ${row.firstname || row.email}` })
} catch (e) {
$q.notify({ type: 'negative', message: 'Renvoi impossible : ' + e.message })
}
}
onMounted(async () => {
await load()
// Auto-subscribe to SSE if still running (or about to run)

View File

@ -1156,31 +1156,48 @@ async function sendCampaignAsync (id) {
const customId = `${id}:${i}`
r.mailjet_custom_id = customId
// email.sendEmail returns the nodemailer info object on success
// (truthy, with .messageId), or `false` on failure (error logged in
// lib/email.js). It doesn't throw. We treat falsy = failed.
let sendRes
try {
sendRes = await email.sendEmail({
to,
subject: p.subject || 'Un cadeau pour toi, de la part de TARGO',
html,
from: p.from || cfg.MAIL_FROM,
headers: { 'X-MJ-CustomID': customId },
})
} catch (e) {
sendRes = false
r.error = String(e.message || e).slice(0, 500)
// Auto-retry: most SMTP failures in this campaign (we observed
// "Unexpected socket close" once per ~200 sends) are transient
// Mailjet connection hiccups. Retry up to 2 times with backoff
// before marking the recipient as failed. The retry doesn't
// require any operator action and adds at most ~10s to the run.
const RETRY_BACKOFF_MS = [2000, 5000]
let sendRes = null
let lastErrMessage = null
const attempts = 1 + RETRY_BACKOFF_MS.length
for (let attempt = 1; attempt <= attempts; attempt++) {
try {
sendRes = await email.sendEmail({
to,
subject: p.subject || 'Un cadeau pour toi, de la part de TARGO',
html,
from: p.from || cfg.MAIL_FROM,
headers: { 'X-MJ-CustomID': customId },
})
} catch (e) {
sendRes = false
lastErrMessage = String(e.message || e)
}
if (sendRes && sendRes.messageId !== undefined) break // success
// Falsy return — pick up the real reason from email.js side-channel
const le = email.getLastError && email.getLastError()
if (le) lastErrMessage = String(le.message || le).slice(0, 500)
if (attempt < attempts) {
log(`campaign ${id} recipient ${i} attempt ${attempt} failed (${lastErrMessage || 'unknown'}); retry in ${RETRY_BACKOFF_MS[attempt - 1]}ms`)
await new Promise(rs => setTimeout(rs, RETRY_BACKOFF_MS[attempt - 1]))
}
}
if (sendRes && sendRes.messageId !== undefined) {
r.mailjet_uuid = sendRes.messageId || null // SMTP Message-ID for reference
r.status = 'sent'
r.sent_at = new Date().toISOString()
r.error = null
r.retry_count = attempts - 1 // 0 means first attempt succeeded
} else {
r.status = 'failed'
if (!r.error) r.error = 'SMTP send returned false (see hub logs)'
log(`campaign ${id} recipient ${i} failed:`, r.error)
r.error = lastErrMessage || 'SMTP send failed (no detail available)'
r.retry_count = RETRY_BACKOFF_MS.length
log(`campaign ${id} recipient ${i} failed after ${attempts} attempts:`, r.error)
}
saveCampaign(campaign)
@ -2078,6 +2095,35 @@ async function handle (req, res, method, path) {
return json(res, 202, { id, status: 'sending' })
}
// POST /campaigns/:id/recipients/:row/retry — reset a single failed
// recipient back to "pending" and re-fire the worker so it picks the
// row up on the next pass. Used by the "Renvoyer" button in the UI
// for one-off transient failures that didn't recover via auto-retry.
const retryMatch = path.match(/^\/campaigns\/([^/]+)\/recipients\/(\d+)\/retry$/)
if (retryMatch && method === 'POST') {
const id = retryMatch[1]
const c = loadCampaign(id)
if (!c) return json(res, 404, { error: 'not found' })
const i = parseInt(retryMatch[2], 10)
const r = (c.recipients || [])[i]
if (!r) return json(res, 404, { error: 'recipient not found' })
if (r.status !== 'failed') return json(res, 400, { error: `recipient status is "${r.status}", only "failed" can be retried` })
if (activeWorkers.has(id)) return json(res, 409, { error: 'campaign worker already running' })
r.status = 'pending'
r.error = null
// Clear the previous retry counter so the new attempt gets its own 3
// retries inside the worker. Keep mailjet_uuid in case it WAS partially
// accepted by Mailjet — we'll overwrite on a successful resend.
r.retry_count = 0
// Also force the global campaign status back to 'sending' so the UI
// counter strip refreshes.
if (c.status === 'completed' || c.status === 'failed') c.status = 'sending'
saveCampaign(c)
sse.broadcast(`campaign:${id}`, 'recipient-update', { i, recipient: r })
setImmediate(() => sendCampaignAsync(id))
return json(res, 202, { id, row: i, status: 'pending' })
}
// DELETE /campaigns/:id — remove the campaign JSON from disk. Mostly for
// cleaning up test/draft runs; the gifts themselves live on Giftbit and
// are unaffected. Refuses to delete while the send worker is active for

View File

@ -3,6 +3,14 @@ const cfg = require('./config')
const { log } = require('./helpers')
let _transporter = null
// Side-channel for the most recent send failure. Callers that need the
// specific reason (e.g. campaign worker that wants to display it in the
// UI) read this after a falsy return from sendEmail. Cleared at the
// start of every send call. NOT thread-safe — fine for the worker's
// sequential loop, but if we ever parallelise sends we'd need a proper
// returned result tuple instead.
let _lastError = null
function getLastError () { return _lastError }
function getTransporter () {
if (_transporter) return _transporter
@ -40,8 +48,10 @@ function getTransporter () {
* @returns {Promise<boolean>} true if sent
*/
async function sendEmail (opts) {
_lastError = null
const transport = getTransporter()
if (!transport) {
_lastError = new Error('No transport available (SMTP not configured)')
log('Cannot send email — no transport available')
return false
}
@ -74,11 +84,11 @@ async function sendEmail (opts) {
// callers continue to work because the object is truthy.
return info || { messageId: null }
} catch (e) {
_lastError = e
log(`Email send failed to ${opts.to}: ${e.message}`)
// Legacy contract: return false on failure. New callers that need the
// error string should check `Promise.allSettled` style or wrap in try
// (we don't throw here to preserve existing `if (await sendEmail(...))`
// call sites). The error is logged above.
// error string should call email.getLastError() right after a falsy
// return — set above, cleared at the start of every send call.
return false
}
}
@ -129,4 +139,4 @@ async function sendQuotationEmail (opts) {
})
}
module.exports = { sendEmail, sendQuotationEmail }
module.exports = { sendEmail, sendQuotationEmail, getLastError }