diff --git a/app/api/cron/check-research/route.ts b/app/api/cron/check-research/route.ts index bd1b0cdb..8f4742d1 100644 --- a/app/api/cron/check-research/route.ts +++ b/app/api/cron/check-research/route.ts @@ -4,9 +4,8 @@ export const maxDuration = 60; import { type NextRequest } from 'next/server'; import { createClient, type SanityClient } from 'next-sanity'; import { apiVersion, dataset, projectId } from '@/sanity/lib/api'; -import { NotebookLMClient } from '@/lib/services/notebooklm/client'; -import { initAuth } from '@/lib/services/notebooklm/auth'; -import { ArtifactTypeCode, ArtifactStatus } from '@/lib/services/notebooklm/types'; +import { pollResearch, parseResearchReport } from '@/lib/services/gemini-research'; +import { generateInfographicsForTopic } from '@/lib/services/gemini-infographics'; import { generateWithGemini, stripCodeFences } from '@/lib/gemini'; import { getConfigValue } from '@/lib/config'; import type { ResearchPayload } from '@/lib/services/research'; @@ -20,8 +19,8 @@ interface PipelineDoc { _id: string; title: string; status: string; - researchNotebookId: string; - researchTaskId?: string; + researchInteractionId?: string; + researchNotebookId?: string; trendScore?: number; trendSources?: string; script?: { @@ -46,7 +45,6 @@ interface PipelineDoc { cta: string; }; researchData?: string; - infographicArtifactIds?: string[]; _updatedAt: string; } @@ -109,15 +107,6 @@ async function buildStuckThresholds(): Promise> { /** Max docs to process per status per run — keeps total time well under 60s */ const MAX_DOCS_PER_STATUS = 2; -/** Default infographic instructions — used when content_config singleton doesn't exist yet */ -const DEFAULT_INFOGRAPHIC_INSTRUCTIONS = [ - 'Create a high-level architecture overview diagram', - 'Create a comparison chart of key features and alternatives', - 'Create a step-by-step workflow diagram', - 'Create a timeline of key developments and milestones', - 'Create a pros and cons visual summary', -]; - // --------------------------------------------------------------------------- // Sanity Write Client // --------------------------------------------------------------------------- @@ -177,283 +166,158 @@ async function flagStuckDocs( } // --------------------------------------------------------------------------- -// Step 1: researching → research_complete +// Step 1: researching → research_complete (Gemini Deep Research polling) // --------------------------------------------------------------------------- async function stepResearching( doc: PipelineDoc, - nbClient: NotebookLMClient, sanity: SanityClient, ): Promise { - const notebookId = doc.researchNotebookId; - console.log(`[check-research] Step 1: Polling research for "${doc.title}" (notebook: ${notebookId})`); - - const pollResult = await nbClient.pollResearch(notebookId); - console.log(`[check-research] Research status: ${pollResult.status} (${pollResult.sources.length} sources)`); + // Use researchInteractionId (new Gemini) or fall back to researchNotebookId (legacy) + const interactionId = doc.researchInteractionId; - if (pollResult.status === 'in_progress') { - return { id: doc._id, title: doc.title, step: 'researching', outcome: 'still_in_progress' }; + if (!interactionId) { + // Legacy doc without interaction ID — skip to enriching with existing data + console.warn(`[check-research] No researchInteractionId for "${doc.title}" — skipping to enriching`); + await sanity.patch(doc._id).set({ status: 'enriching' }).commit(); + return { id: doc._id, title: doc.title, step: 'researching', outcome: 'no_interaction_skip_to_enriching' }; } - if (pollResult.status === 'no_research') { - console.warn(`[check-research] No research found for "${doc.title}" — moving to script_ready with existing script`); - await sanity.patch(doc._id).set({ status: 'script_ready' }).commit(); - return { id: doc._id, title: doc.title, step: 'researching', outcome: 'no_research_skip_to_script_ready' }; - } + console.log(`[check-research] Polling research for "${doc.title}" (interaction: ${interactionId})`); - // Research completed — import sources and save research data - const researchTaskId = pollResult.taskId || doc.researchTaskId || ''; - const researchSources = pollResult.sources; + const result = await pollResearch(interactionId); - if (researchSources.length > 0 && researchTaskId) { - console.log(`[check-research] Importing ${researchSources.length} research sources...`); - try { - await nbClient.importResearchSources(notebookId, researchTaskId, researchSources); - } catch (err) { - console.warn(`[check-research] Failed to import sources (non-fatal):`, err instanceof Error ? err.message : err); - } + if (result.status === 'in_progress') { + return { id: doc._id, title: doc.title, step: 'researching', outcome: 'still_in_progress' }; } - // Get summary/briefing - let briefing = pollResult.summary || ''; - if (!briefing) { - try { - briefing = await nbClient.getSummary(notebookId); - } catch (err) { - console.warn(`[check-research] Failed to get summary (non-fatal):`, err instanceof Error ? err.message : err); - } + if (result.status === 'failed' || result.status === 'not_found') { + console.error(`[check-research] Research ${result.status} for "${doc.title}": ${result.error}`); + await sanity.patch(doc._id).set({ + status: 'flagged', + flaggedReason: `Research ${result.status}: ${result.error || 'Unknown error'}`, + }).commit(); + return { id: doc._id, title: doc.title, step: 'researching', outcome: result.status, error: result.error }; } - // Save research data to Sanity for later steps - const researchData = { - briefing, - sources: researchSources, - taskId: researchTaskId, - completedAt: new Date().toISOString(), - }; + // Research completed — parse the report into structured data + const report = result.report || ''; + console.log(`[check-research] Research completed for "${doc.title}" (${report.length} chars)`); - await sanity - .patch(doc._id) - .set({ - status: 'research_complete', - researchTaskId, - researchData: JSON.stringify(researchData), - }) - .commit(); + const researchPayload = await parseResearchReport(doc.title, report); + + // Save research data and advance to research_complete + await sanity.patch(doc._id).set({ + status: 'research_complete', + researchData: JSON.stringify(researchPayload), + }).commit(); console.log(`[check-research] "${doc.title}" → research_complete`); return { id: doc._id, title: doc.title, step: 'researching', outcome: 'research_complete' }; } // --------------------------------------------------------------------------- -// Step 2: research_complete → infographics_generating +// Step 2: research_complete → enriching (Gemini Imagen infographics) // --------------------------------------------------------------------------- async function stepResearchComplete( doc: PipelineDoc, - nbClient: NotebookLMClient, sanity: SanityClient, ): Promise { - const notebookId = doc.researchNotebookId; - console.log(`[check-research] Step 2: Starting infographics for "${doc.title}"`); - - // Get source IDs for infographic generation - const sourceIds = await nbClient.getSourceIds(notebookId); - console.log(`[check-research] Found ${sourceIds.length} source IDs`); - - // Start all infographic generations (instructions from Sanity config) - const infographicInstructions = await getConfigValue( - "content_config", - "infographicInstructions", - DEFAULT_INFOGRAPHIC_INSTRUCTIONS, - ); - const artifactIds: string[] = []; - for (const instruction of infographicInstructions) { - try { - const result = await nbClient.generateInfographic(notebookId, { - sourceIds, - instructions: instruction, - language: 'en', - orientation: 1, // landscape - detailLevel: 2, // detailed - }); - if (result.taskId) { - artifactIds.push(result.taskId); - } - } catch (err) { - console.warn(`[check-research] Failed to start infographic (non-fatal):`, err instanceof Error ? err.message : err); - } - } - - console.log(`[check-research] Started ${artifactIds.length} infographic generations`); + console.log(`[check-research] Generating infographics for "${doc.title}"`); - if (artifactIds.length === 0) { - // No infographics started — skip to enriching - console.warn(`[check-research] No infographics started — skipping to enriching`); - await sanity - .patch(doc._id) - .set({ - status: 'enriching', - infographicArtifactIds: [], - }) - .commit(); - return { id: doc._id, title: doc.title, step: 'research_complete', outcome: 'skip_to_enriching_no_infographics' }; + // Parse research data for briefing context + let briefing = ''; + if (doc.researchData) { + try { + const data = JSON.parse(doc.researchData) as { briefing?: string }; + briefing = data.briefing || ''; + } catch { /* ignore */ } } - // Save artifact IDs and transition status - await sanity - .patch(doc._id) - .set({ - status: 'infographics_generating', - infographicArtifactIds: artifactIds, - }) - .commit(); - - console.log(`[check-research] "${doc.title}" → infographics_generating (${artifactIds.length} artifacts)`); - return { id: doc._id, title: doc.title, step: 'research_complete', outcome: 'infographics_generating' }; -} - -// --------------------------------------------------------------------------- -// Step 3: infographics_generating → enriching -// --------------------------------------------------------------------------- - -async function stepInfographicsGenerating( - doc: PipelineDoc, - nbClient: NotebookLMClient, - sanity: SanityClient, -): Promise { - const notebookId = doc.researchNotebookId; - const artifactIds = doc.infographicArtifactIds ?? []; - console.log(`[check-research] Step 3: Checking ${artifactIds.length} infographics for "${doc.title}"`); + try { + // Generate all infographics using Imagen 4 Fast + const batchResult = await generateInfographicsForTopic(doc.title, briefing); - if (artifactIds.length === 0) { - // No artifacts to wait for — skip to enriching - await sanity.patch(doc._id).set({ status: 'enriching' }).commit(); - return { id: doc._id, title: doc.title, step: 'infographics_generating', outcome: 'skip_to_enriching_no_artifacts' }; - } + console.log(`[check-research] Generated ${batchResult.results.length} infographics, ${batchResult.errors.length} failed`); - // List all artifacts ONCE (not per artifact) - const allArtifacts = await nbClient.listArtifacts(notebookId); - console.log(`[check-research] Found ${allArtifacts.length} total artifacts in notebook`); - - // Check if ALL our artifacts are completed - const ourArtifacts = allArtifacts.filter((a) => artifactIds.includes(a.id)); - const completed = ourArtifacts.filter((a) => a.statusCode === ArtifactStatus.COMPLETED); - const failed = ourArtifacts.filter( - (a) => a.statusCode !== ArtifactStatus.COMPLETED && a.statusCode !== 1 && a.statusCode !== 2, - ); - - console.log( - `[check-research] Infographic status: ${completed.length} completed, ${failed.length} failed, ${artifactIds.length - ourArtifacts.length} not found, ${ourArtifacts.length - completed.length - failed.length} still generating`, - ); - - // All done (completed or failed) — move to enriching - const allDone = completed.length + failed.length >= artifactIds.length || - ourArtifacts.length >= artifactIds.length && - ourArtifacts.every((a) => a.statusCode === ArtifactStatus.COMPLETED || a.statusCode >= 4); - - if (!allDone) { - // Still generating — wait for next run - return { id: doc._id, title: doc.title, step: 'infographics_generating', outcome: 'still_generating' }; - } - - // Download and upload infographics to Sanity - interface SanityImageRef { - _type: 'image'; - _key: string; - alt?: string; - asset: { _type: 'reference'; _ref: string }; - } + // Upload each generated image to Sanity + const infographicRefs: Array<{ + _type: 'image'; + _key: string; + alt?: string; + asset: { _type: 'reference'; _ref: string }; + }> = []; + const infographicUrls: string[] = []; - const infographicRefs: SanityImageRef[] = []; - const infographicUrls: string[] = []; + for (let i = 0; i < batchResult.results.length; i++) { + const imgResult = batchResult.results[i]; + try { + const buffer = Buffer.from(imgResult.imageBase64, 'base64'); + const filename = `infographic-${doc._id}-${i}.png`; - for (let i = 0; i < artifactIds.length; i++) { - const artifactId = artifactIds[i]; - try { - // Step 1: Get the auth-gated URL - const authUrl = await nbClient.getInfographicUrl(notebookId, artifactId); - if (!authUrl) { - console.warn(`[check-research] No URL for artifact ${artifactId}`); - continue; - } + const asset = await writeClient.assets.upload('image', buffer, { + filename, + contentType: imgResult.mimeType, + }); - // Step 2: Download PNG with NotebookLM auth cookies - const cookies = nbClient.getCookieHeader(); - const imageResponse = await fetch(authUrl, { - headers: { Cookie: cookies }, - redirect: 'follow', - }); + console.log(`[check-research] Uploaded infographic ${i + 1}: ${asset._id}`); - if (!imageResponse.ok) { - console.warn(`[check-research] Failed to download infographic ${artifactId}: ${imageResponse.status}`); - continue; - } + infographicRefs.push({ + _type: 'image', + _key: `infographic-${i}`, + alt: `Research infographic ${i + 1} for ${doc.title}`, + asset: { _type: 'reference', _ref: asset._id }, + }); - const contentType = imageResponse.headers.get('content-type') || ''; - if (!contentType.includes('image')) { - console.warn(`[check-research] Infographic ${artifactId} returned non-image: ${contentType}`); - continue; + // Build CDN URL for backward compat + const cdnUrl = `https://cdn.sanity.io/images/${projectId}/${dataset}/${asset._id.replace('image-', '').replace('-png', '.png').replace('-jpg', '.jpg')}`; + infographicUrls.push(cdnUrl); + } catch (err) { + console.warn(`[check-research] Failed to upload infographic ${i}:`, err instanceof Error ? err.message : err); } - - const arrayBuffer = await imageResponse.arrayBuffer(); - const buffer = Buffer.from(arrayBuffer); - console.log(`[check-research] Downloaded infographic ${i + 1}: ${buffer.length} bytes`); - - // Step 3: Upload to Sanity assets - const filename = `infographic-${doc._id}-${i}.png`; - const asset = await writeClient.assets.upload('image', buffer, { - filename, - contentType: 'image/png', - }); - - console.log(`[check-research] Uploaded to Sanity: ${asset._id}`); - - // Step 4: Build image reference for the array field - const artifact = ourArtifacts.find(a => a.id === artifactId); - infographicRefs.push({ - _type: 'image', - _key: artifactId.slice(0, 8), - alt: artifact?.title || `Research infographic ${i + 1}`, - asset: { _type: 'reference', _ref: asset._id }, - }); - - // Also store the Sanity CDN URL for researchData backward compat - const cdnUrl = `https://cdn.sanity.io/images/${process.env.NEXT_PUBLIC_SANITY_PROJECT_ID}/${process.env.NEXT_PUBLIC_SANITY_DATASET}/${asset._id.replace('image-', '').replace('-png', '.png').replace('-jpg', '.jpg')}`; - infographicUrls.push(cdnUrl); - - } catch (err) { - console.warn(`[check-research] Failed to process infographic ${artifactId}:`, err instanceof Error ? err.message : err); } - } - console.log(`[check-research] Processed ${infographicRefs.length} infographics (${infographicUrls.length} URLs)`); + // Update research data with infographic URLs + let researchData: Record = {}; + if (doc.researchData) { + try { researchData = JSON.parse(doc.researchData); } catch { /* ignore */ } + } + researchData.infographicUrls = infographicUrls; - // Parse existing research data and add infographic URLs - let researchData: Record = {}; - if (doc.researchData) { - try { - researchData = JSON.parse(doc.researchData) as Record; - } catch { - console.warn(`[check-research] Failed to parse existing researchData`); + const patchData: Record = { + status: 'enriching', + researchData: JSON.stringify(researchData), + }; + if (infographicRefs.length > 0) { + patchData.infographics = infographicRefs; } - } - researchData.infographicUrls = infographicUrls; - const patchData: Record = { - status: 'enriching', - researchData: JSON.stringify(researchData), - }; + await sanity.patch(doc._id).set(patchData).commit(); - // Add infographic image refs if we have any - if (infographicRefs.length > 0) { - patchData.infographics = infographicRefs; + console.log(`[check-research] "${doc.title}" → enriching (${infographicRefs.length} infographics)`); + return { id: doc._id, title: doc.title, step: 'research_complete', outcome: 'enriching' }; + } catch (err) { + // Infographic generation failed — skip to enriching without infographics + console.error(`[check-research] Infographic generation failed for "${doc.title}":`, err); + await sanity.patch(doc._id).set({ status: 'enriching' }).commit(); + return { id: doc._id, title: doc.title, step: 'research_complete', outcome: 'enriching_no_infographics', error: err instanceof Error ? err.message : String(err) }; } +} - await sanity.patch(doc._id).set(patchData).commit(); +// --------------------------------------------------------------------------- +// Step 3: infographics_generating → enriching (legacy migration handler) +// --------------------------------------------------------------------------- - console.log(`[check-research] "${doc.title}" → enriching (${infographicRefs.length} infographics, ${infographicUrls.length} URLs)`); - return { id: doc._id, title: doc.title, step: 'infographics_generating', outcome: 'enriching' }; +async function stepInfographicsGenerating( + doc: PipelineDoc, + sanity: SanityClient, +): Promise { + // Legacy migration: docs stuck in infographics_generating from old NotebookLM pipeline + // Just advance them to enriching — they may or may not have infographics + console.warn(`[check-research] Legacy doc "${doc.title}" in infographics_generating — advancing to enriching`); + await sanity.patch(doc._id).set({ status: 'enriching' }).commit(); + return { id: doc._id, title: doc.title, step: 'infographics_generating', outcome: 'legacy_advance_to_enriching' }; } // --------------------------------------------------------------------------- @@ -467,26 +331,18 @@ async function stepEnriching( console.log(`[check-research] Step 4: Enriching script for "${doc.title}"`); // Parse research data from Sanity - let researchData: { - briefing?: string; - sources?: Array<{ url: string; title: string }>; - infographicUrls?: string[]; - } = {}; + let researchData: Record = {}; if (doc.researchData) { try { - researchData = JSON.parse(doc.researchData) as typeof researchData; + researchData = JSON.parse(doc.researchData) as Record; } catch { console.warn(`[check-research] Failed to parse researchData for "${doc.title}"`); } } - const briefing = researchData.briefing ?? ''; - const sources = researchData.sources ?? []; - const infographicUrls = researchData.infographicUrls ?? []; - // Build full research payload - const researchPayload = buildResearchPayload(doc, briefing, sources, infographicUrls); + const researchPayload = buildResearchPayload(doc, researchData); // Generate enriched script with Gemini let enrichedScript: EnrichedScript | null = null; @@ -822,10 +678,18 @@ function classifySourceType(url: string): 'youtube' | 'article' | 'docs' | 'unkn function buildResearchPayload( doc: PipelineDoc, - briefing: string, - sources: Array<{ url: string; title: string }>, - infographicUrls: string[], + researchData: Record, ): ResearchPayload { + // If researchData already has the full ResearchPayload shape, use it directly + if (researchData.topic && researchData.talkingPoints && researchData.sceneHints) { + return researchData as unknown as ResearchPayload; + } + + // Legacy format: extract from briefing + sources + const briefing = (researchData.briefing as string) ?? ''; + const sources = (researchData.sources as Array<{ url: string; title: string }>) ?? []; + const infographicUrls = (researchData.infographicUrls as string[]) ?? []; + const talkingPoints = extractTalkingPoints(briefing); const codeExamples = extractCodeExamples(briefing); @@ -840,7 +704,7 @@ function buildResearchPayload( return { topic: doc.title, - notebookId: doc.researchNotebookId, + notebookId: doc.researchNotebookId || '', createdAt: doc._updatedAt, completedAt: new Date().toISOString(), sources: sources.map((s) => ({ @@ -877,10 +741,11 @@ export async function GET(request: NextRequest) { const sanity = getSanityWriteClient(); // Single query for all active pipeline statuses + // Include both researchInteractionId (new Gemini) and researchNotebookId (legacy) const docs = await sanity.fetch( - `*[_type == "automatedVideo" && status in ["researching", "research_complete", "infographics_generating", "enriching"] && defined(researchNotebookId)] { - _id, title, status, researchNotebookId, researchTaskId, trendScore, trendSources, - script, researchData, infographicArtifactIds, _updatedAt + `*[_type == "automatedVideo" && status in ["researching", "research_complete", "infographics_generating", "enriching"] && (defined(researchInteractionId) || defined(researchNotebookId))] { + _id, title, status, researchInteractionId, researchNotebookId, trendScore, trendSources, + script, researchData, _updatedAt }`, ); @@ -911,39 +776,49 @@ export async function GET(request: NextRequest) { `[check-research] Pipeline: ${researching.length} researching, ${researchComplete.length} research_complete, ${infographicsGenerating.length} infographics_generating, ${enriching.length} enriching`, ); - // Phase 2: Only init NotebookLM if needed - const needsNotebookLM = researching.length > 0 || researchComplete.length > 0 || infographicsGenerating.length > 0; - let nbClient: NotebookLMClient | null = null; - - if (needsNotebookLM) { - console.log('[check-research] Initializing NotebookLM client...'); - const auth = await initAuth(); - nbClient = new NotebookLMClient(auth); - } - - // Phase 3: Process each status group (max MAX_DOCS_PER_STATUS per group) + // Check enableDeepResearch toggle + const enableDeepResearch = await getConfigValue('pipeline_config', 'enableDeepResearch', false); // Step 1: researching → research_complete - for (const doc of researching.slice(0, MAX_DOCS_PER_STATUS)) { - try { - const result = await stepResearching(doc, nbClient!, sanity); - results.push(result); - } catch (err) { - console.error(`[check-research] Error in stepResearching for ${doc._id}:`, err); - results.push({ - id: doc._id, - title: doc.title, - step: 'researching', - outcome: 'error', - error: err instanceof Error ? err.message : String(err), - }); + if (!enableDeepResearch) { + // Deep research disabled — skip researching docs to enriching + for (const doc of researching.slice(0, MAX_DOCS_PER_STATUS)) { + try { + await sanity.patch(doc._id).set({ status: doc.script ? 'script_ready' : 'enriching' }).commit(); + results.push({ id: doc._id, title: doc.title, step: 'researching', outcome: 'deep_research_disabled_skip' }); + } catch (err) { + console.error(`[check-research] Error skipping researching doc ${doc._id}:`, err); + results.push({ + id: doc._id, + title: doc.title, + step: 'researching', + outcome: 'error', + error: err instanceof Error ? err.message : String(err), + }); + } + } + } else { + for (const doc of researching.slice(0, MAX_DOCS_PER_STATUS)) { + try { + const result = await stepResearching(doc, sanity); + results.push(result); + } catch (err) { + console.error(`[check-research] Error in stepResearching for ${doc._id}:`, err); + results.push({ + id: doc._id, + title: doc.title, + step: 'researching', + outcome: 'error', + error: err instanceof Error ? err.message : String(err), + }); + } } } - // Step 2: research_complete → infographics_generating + // Step 2: research_complete → enriching (infographics generated inline) for (const doc of researchComplete.slice(0, MAX_DOCS_PER_STATUS)) { try { - const result = await stepResearchComplete(doc, nbClient!, sanity); + const result = await stepResearchComplete(doc, sanity); results.push(result); } catch (err) { console.error(`[check-research] Error in stepResearchComplete for ${doc._id}:`, err); @@ -957,10 +832,10 @@ export async function GET(request: NextRequest) { } } - // Step 3: infographics_generating → enriching + // Step 3: infographics_generating → enriching (legacy migration only) for (const doc of infographicsGenerating.slice(0, MAX_DOCS_PER_STATUS)) { try { - const result = await stepInfographicsGenerating(doc, nbClient!, sanity); + const result = await stepInfographicsGenerating(doc, sanity); results.push(result); } catch (err) { console.error(`[check-research] Error in stepInfographicsGenerating for ${doc._id}:`, err); @@ -974,7 +849,7 @@ export async function GET(request: NextRequest) { } } - // Step 4: enriching → script_ready (no NotebookLM needed) + // Step 4: enriching → script_ready for (const doc of enriching.slice(0, MAX_DOCS_PER_STATUS)) { try { const result = await stepEnriching(doc, sanity); diff --git a/app/api/cron/ingest/route.ts b/app/api/cron/ingest/route.ts index 2ec77541..2a40ea68 100644 --- a/app/api/cron/ingest/route.ts +++ b/app/api/cron/ingest/route.ts @@ -7,8 +7,7 @@ import { writeClient } from "@/lib/sanity-write-client"; import { getConfigValue } from "@/lib/config"; import { discoverTrends, type TrendResult } from "@/lib/services/trend-discovery"; import type { ResearchPayload } from "@/lib/services/research"; -import { NotebookLMClient } from "@/lib/services/notebooklm/client"; -import { initAuth } from "@/lib/services/notebooklm/auth"; +import { submitResearch } from "@/lib/services/gemini-research"; // --------------------------------------------------------------------------- // Types @@ -469,11 +468,11 @@ async function createSanityDocuments( selectedTrend: TrendResult, qualityThreshold: number, research?: ResearchPayload, - researchMeta?: { notebookId: string; taskId: string }, + researchInteractionId?: string, ) { const isFlagged = criticResult.score < qualityThreshold; // When research is in-flight, status is "researching" (check-research cron will transition to script_ready) - const isResearching = !!researchMeta?.notebookId; + const isResearching = !!researchInteractionId; const status = isFlagged ? "flagged" : isResearching ? "researching" : "script_ready"; const contentIdea = await writeClient.create({ @@ -510,8 +509,7 @@ async function createSanityDocuments( }), trendScore: selectedTrend.score, trendSources: selectedTrend.signals.map(s => s.source).join(", "), - researchNotebookId: researchMeta?.notebookId ?? research?.notebookId, - ...(researchMeta?.taskId && { researchTaskId: researchMeta.taskId }), + researchInteractionId: researchInteractionId || undefined, }); console.log(`[CRON/ingest] Created automatedVideo: ${automatedVideo._id}`); @@ -548,9 +546,9 @@ export async function GET(request: NextRequest) { "systemInstruction", SYSTEM_INSTRUCTION_FALLBACK, ); - const enableNotebookLmResearch = await getConfigValue( + const enableDeepResearch = await getConfigValue( "pipeline_config", - "enableNotebookLmResearch", + "enableDeepResearch", false, ); const qualityThreshold = await getConfigValue( @@ -620,41 +618,21 @@ export async function GET(request: NextRequest) { console.log(`[CRON/ingest] Dedup: selected "${selectedTrend.topic}" (score: ${selectedTrend.score}, skipped ${skippedCount} topics)`); // Step 2: Optional deep research on selected topic (fire-and-forget) - // When research is enabled, we create a notebook and start research + // When research is enabled, we submit to Gemini Deep Research // but DON'T wait for it — the check-research cron will poll and enrich later - let researchMeta: { notebookId: string; taskId: string } | undefined; - if (enableNotebookLmResearch) { - console.log(`[CRON/ingest] Starting fire-and-forget research on: "${selectedTrend.topic}"...`); + let researchInteractionId: string | undefined; + if (enableDeepResearch) { + console.log(`[CRON/ingest] Starting Gemini Deep Research on: "${selectedTrend.topic}"...`); try { - const auth = await initAuth(); - const nbClient = new NotebookLMClient(auth); - - // Create notebook - const notebook = await nbClient.createNotebook(selectedTrend.topic); - const notebookId = notebook.id; - console.log(`[CRON/ingest] Created notebook: ${notebookId}`); - - // Add source URLs from trend signals const sourceUrls = (selectedTrend.signals ?? []) .map((s: { url?: string }) => s.url) .filter((u): u is string => !!u && u.startsWith("http")) .slice(0, 5); - for (const url of sourceUrls) { - await nbClient.addSource(notebookId, url).catch((err) => { - console.warn(`[CRON/ingest] Failed to add source ${url}:`, err); - }); - } - console.log(`[CRON/ingest] Added ${sourceUrls.length} source URLs to notebook`); - - // Start deep research (fire-and-forget — don't poll!) - const researchTask = await nbClient.startResearch(notebookId, selectedTrend.topic, "deep"); - const researchTaskId = researchTask?.taskId ?? ""; - console.log(`[CRON/ingest] Research started — taskId: ${researchTaskId}. check-research cron will poll.`); - - researchMeta = { notebookId, taskId: researchTaskId }; + researchInteractionId = await submitResearch(selectedTrend.topic, { sourceUrls }); + console.log(`[CRON/ingest] Deep Research submitted — interactionId: ${researchInteractionId}. check-research cron will poll.`); } catch (err) { - console.warn("[CRON/ingest] Research start failed, continuing without:", err); + console.warn("[CRON/ingest] Deep Research submission failed, continuing without:", err); } } @@ -692,7 +670,7 @@ export async function GET(request: NextRequest) { ); console.log("[CRON/ingest] Creating Sanity documents..."); - const result = await createSanityDocuments(script, criticResult, selectedTrend, qualityThreshold, undefined, researchMeta); + const result = await createSanityDocuments(script, criticResult, selectedTrend, qualityThreshold, undefined, researchInteractionId); console.log("[CRON/ingest] Done!", result); @@ -704,8 +682,8 @@ export async function GET(request: NextRequest) { trendCount: trends.length, trendScore: selectedTrend.score, skippedCount, - researchStarted: !!researchMeta, - researchNotebookId: researchMeta?.notebookId, + researchStarted: !!researchInteractionId, + researchInteractionId: researchInteractionId, }); } catch (err) { console.error("[CRON/ingest] Unexpected error:", err); diff --git a/lib/gemini.ts b/lib/gemini.ts index 0e3110da..7a30697f 100644 --- a/lib/gemini.ts +++ b/lib/gemini.ts @@ -1,7 +1,16 @@ -import { GoogleGenerativeAI } from "@google/generative-ai"; +import { GoogleGenAI } from "@google/genai"; import { getConfigValue } from "@/lib/config"; -const genAI = new GoogleGenerativeAI(process.env.GEMINI_API_KEY || ""); +let _ai: GoogleGenAI | null = null; + +/** Lazy-initialize the GoogleGenAI client (avoids crash at import time if GEMINI_API_KEY is missing). */ +function getAI(): GoogleGenAI { + if (!_ai) { + const apiKey = process.env.GEMINI_API_KEY || ""; + _ai = new GoogleGenAI({ apiKey }); + } + return _ai; +} /** * Generate text content using Gemini Flash. @@ -14,13 +23,13 @@ export async function generateWithGemini( systemInstruction?: string, ): Promise { const geminiModel = await getConfigValue("pipeline_config", "geminiModel", "gemini-2.0-flash"); - const model = genAI.getGenerativeModel({ + const ai = getAI(); + const response = await ai.models.generateContent({ model: geminiModel, - ...(systemInstruction && { systemInstruction }), + contents: prompt, + ...(systemInstruction && { config: { systemInstruction } }), }); - const result = await model.generateContent(prompt); - const response = result.response; - return response.text(); + return response.text ?? ""; } /** diff --git a/lib/services/gemini-infographics.ts b/lib/services/gemini-infographics.ts new file mode 100644 index 00000000..c7c8639f --- /dev/null +++ b/lib/services/gemini-infographics.ts @@ -0,0 +1,283 @@ +/** + * Gemini Infographic Generation Service + * + * Generates brand-consistent infographics using Google's Imagen 4 Fast model + * via the @google/genai SDK. Designed for the CodingCat.dev automated video + * pipeline — produces visual assets from research data for use in videos and + * blog posts. + * + * Pricing: Imagen 4 Fast — $0.02/image + * Supports seed-based reproducibility for brand consistency. + * + * @module lib/services/gemini-infographics + */ + +import { GoogleGenAI } from "@google/genai"; +import { getConfigValue } from "@/lib/config"; + +// --------------------------------------------------------------------------- +// Types +// --------------------------------------------------------------------------- + +/** A single generated infographic result. */ +export interface InfographicResult { + /** Base64-encoded PNG image bytes. */ + imageBase64: string; + /** MIME type — always "image/png" for Imagen. */ + mimeType: string; + /** The prompt used to generate this image. */ + prompt: string; + /** Seed used (if provided), for reproducibility. */ + seed?: number; +} + +/** Options for a single infographic generation request. */ +export interface InfographicRequest { + /** Text prompt describing the infographic to generate. */ + prompt: string; + /** Aspect ratio. Defaults to "16:9" for video pipeline use. */ + aspectRatio?: "1:1" | "3:4" | "4:3" | "9:16" | "16:9"; + /** Optional seed for reproducibility (not compatible with watermarks). */ + seed?: number; + /** Negative prompt — what to avoid in the image. */ + negativePrompt?: string; +} + +/** Options for batch infographic generation. */ +export interface InfographicBatchOptions { + /** Override the Imagen model (defaults to pipeline_config.infographicModel or "imagen-4-fast"). */ + model?: string; + /** Number of images per prompt (1–4). Defaults to 1. */ + numberOfImages?: number; +} + +/** Result of a batch generation run. */ +export interface InfographicBatchResult { + /** Successfully generated infographics. */ + results: InfographicResult[]; + /** Prompts that failed, with error messages. */ + errors: Array<{ prompt: string; error: string }>; +} + +// --------------------------------------------------------------------------- +// Internal: lazy client +// --------------------------------------------------------------------------- + +let _ai: GoogleGenAI | null = null; + +function getAI(): GoogleGenAI { + if (!_ai) { + const apiKey = process.env.GEMINI_API_KEY ?? ""; + _ai = new GoogleGenAI({ apiKey }); + } + return _ai; +} + +// --------------------------------------------------------------------------- +// Core: single image generation +// --------------------------------------------------------------------------- + +/** + * Generate a single infographic image using Imagen 4 Fast. + * + * @param request - Prompt and generation options. + * @param model - Imagen model ID (e.g. "imagen-4-fast"). + * @returns InfographicResult with base64 image bytes. + * @throws If the API call fails or no image is returned. + */ +export async function generateInfographic( + request: InfographicRequest, + model: string = "imagen-4-fast", +): Promise { + const ai = getAI(); + + const response = await ai.models.generateImages({ + model, + prompt: request.prompt, + config: { + numberOfImages: 1, + aspectRatio: request.aspectRatio ?? "16:9", + ...(request.seed !== undefined && { seed: request.seed }), + ...(request.negativePrompt && { negativePrompt: request.negativePrompt }), + }, + }); + + const generated = response.generatedImages?.[0]; + if (!generated?.image?.imageBytes) { + const reason = generated?.raiFilteredReason ?? "unknown"; + throw new Error( + `Imagen returned no image for prompt "${request.prompt.slice(0, 80)}…" — RAI reason: ${reason}`, + ); + } + + const imageBytes = generated.image.imageBytes; + // imageBytes may be a Uint8Array or base64 string depending on SDK version + const imageBase64 = + typeof imageBytes === "string" + ? imageBytes + : Buffer.from(imageBytes).toString("base64"); + + return { + imageBase64, + mimeType: "image/png", + prompt: request.prompt, + ...(request.seed !== undefined && { seed: request.seed }), + }; +} + +// --------------------------------------------------------------------------- +// Batch: generate multiple infographics +// --------------------------------------------------------------------------- + +/** + * Generate a batch of infographics from an array of prompts. + * + * Processes requests sequentially to avoid rate-limit issues. + * Failed individual images are collected in `errors` rather than throwing. + * + * @param requests - Array of infographic requests (prompts + options). + * @param options - Batch-level options (model override, numberOfImages). + * @returns InfographicBatchResult with successes and failures. + * + * @example + * ```ts + * const { results, errors } = await generateInfographicBatch([ + * { prompt: "A clean infographic showing React hooks lifecycle, dark theme, CodingCat.dev branding" }, + * { prompt: "Comparison chart: REST vs GraphQL vs tRPC, developer-friendly, purple accent" }, + * ]); + * console.log(`Generated ${results.length} images, ${errors.length} failed`); + * ``` + */ +export async function generateInfographicBatch( + requests: InfographicRequest[], + options: InfographicBatchOptions = {}, +): Promise { + const model = + options.model ?? + (await getConfigValue("pipeline_config", "infographicModel", "imagen-4-fast")); + + const results: InfographicResult[] = []; + const errors: Array<{ prompt: string; error: string }> = []; + + for (const request of requests) { + try { + const result = await generateInfographic(request, model); + results.push(result); + } catch (err) { + const message = err instanceof Error ? err.message : String(err); + errors.push({ prompt: request.prompt, error: message }); + } + } + + return { results, errors }; +} + +// --------------------------------------------------------------------------- +// Helpers +// --------------------------------------------------------------------------- + +/** + * Build a brand-consistent infographic prompt for CodingCat.dev. + * + * Wraps a topic description with standard brand guidelines so all generated + * infographics share a consistent visual identity. + * + * @param topic - The subject matter (e.g. "React Server Components"). + * @param style - Visual style hint (default: "dark tech"). + * @returns A fully-formed Imagen prompt string. + */ +export function buildInfographicPrompt( + topic: string, + style: string = "dark tech", +): string { + return ( + `Create a professional, visually striking infographic about: ${topic}. ` + + `Style: ${style}, purple and teal accent colors, clean sans-serif typography, ` + + `CodingCat.dev brand aesthetic. ` + + `Layout: structured sections with icons, data visualizations, and clear hierarchy. ` + + `No watermarks. High information density. Developer audience.` + ); +} + +// --------------------------------------------------------------------------- +// Default instructions (blueprint style from spec) +// --------------------------------------------------------------------------- + +/** Default infographic instructions if Sanity contentConfig is not set up */ +const DEFAULT_INSTRUCTIONS: string[] = [ + 'Create a technical architecture sketch using white "hand-drawn" ink lines on a deep navy blue background (#003366). Use rough-sketched server and database icons with visible "marker" strokes, handwritten labels in a casual font, and a subtle grid pattern. Style: blueprint meets whiteboard doodle.', + 'Create a comparison chart on a vibrant blue background (#004080) with hand-inked white headers and uneven, sketchy borders. Use white cross-hatching and doodle-style checkmarks to highlight feature differences. Include hand-drawn arrows and annotations. Style: technical chalkboard.', + 'Create a step-by-step workflow "blueprint" on a dark blue canvas (#003366). Use hand-drawn white arrows connecting rough-sketched boxes, simple "stick-figure" style worker avatars, and handwritten-style labels with a slight chalk texture. Add a subtle grid background. Style: engineering whiteboard.', + 'Create a hand-sketched timeline using a jagged white line on a royal blue background. Represent milestones with simple, iconic white doodles that look like they were quickly sketched during a brainstorming session. Use handwritten dates and labels. Style: notebook sketch on blue paper.', + 'Create a pros and cons summary with a "lo-fi" aesthetic. Use hand-drawn white thumbs-up/down icons and rough-sketched containers on a deep blue background (#003366). Add hand-drawn underlines and circled keywords. Style: high-contrast ink-on-blueprint with cyan accent highlights.', +]; + +// --------------------------------------------------------------------------- +// High-level API: generate all infographics for a topic +// --------------------------------------------------------------------------- + +/** + * Generate a deterministic seed from a topic + instruction index. + * Ensures the same topic always produces the same seed per instruction, + * enabling brand-consistent regeneration. + */ +function generateSeed(topic: string, index: number): number { + let hash = 0; + const str = `${topic}-infographic-${index}`; + for (let i = 0; i < str.length; i++) { + const char = str.charCodeAt(i); + hash = ((hash << 5) - hash) + char; + hash = hash & hash; + } + return Math.abs(hash) % 2147483647; +} + +/** + * Generate all infographics for a research topic using instructions + * from Sanity contentConfig. + * + * This is the main entry point for the check-research cron route. + * Reads infographic instructions from contentConfig.infographicInstructions, + * appends topic context to each, and generates images with deterministic + * seeds for brand consistency. + * + * @param topic - The research topic (e.g. "React Server Components") + * @param briefing - Optional research briefing text for additional context + * @returns InfographicBatchResult with generated images and any errors + */ +export async function generateInfographicsForTopic( + topic: string, + briefing?: string, +): Promise { + const instructions = await getConfigValue( + "content_config", "infographicInstructions", DEFAULT_INSTRUCTIONS + ); + + const model = await getConfigValue( + "pipeline_config", "infographicModel", "imagen-4-fast" + ); + + const contextSuffix = briefing + ? `\n\nTopic: ${topic}\nContext: ${briefing.slice(0, 500)}` + : `\n\nTopic: ${topic}`; + + const requests: InfographicRequest[] = instructions.map( + (instruction, index) => ({ + prompt: `${instruction}${contextSuffix}`, + seed: generateSeed(topic, index), + aspectRatio: "16:9" as const, + }) + ); + + console.log( + `[infographics] Generating ${requests.length} infographics for "${topic}" with ${model}` + ); + + const result = await generateInfographicBatch(requests, { model }); + + console.log( + `[infographics] Complete: ${result.results.length} generated, ${result.errors.length} failed` + ); + + return result; +} diff --git a/lib/services/gemini-research.ts b/lib/services/gemini-research.ts new file mode 100644 index 00000000..9f00d826 --- /dev/null +++ b/lib/services/gemini-research.ts @@ -0,0 +1,535 @@ +/** + * Gemini Deep Research Service + * + * Uses the Gemini Interactions API for autonomous web research. + * Replaces the NotebookLM-based research pipeline. + * + * Pipeline: + * topic → Gemini Deep Research → markdown report → structured ResearchPayload + * + * @module lib/services/gemini-research + */ + +import { GoogleGenAI, type Interactions } from "@google/genai"; +import { getConfigValue } from "@/lib/config"; +import { generateWithGemini, stripCodeFences } from "@/lib/gemini"; + +// Re-export types for backward compatibility +export type { + ResearchPayload, + ResearchSource, + CodeExample, + ComparisonData, + SceneHint, +} from "./research"; + +import type { + ResearchPayload, + ResearchSource, + CodeExample, + SceneHint, +} from "./research"; + +// --------------------------------------------------------------------------- +// Types +// --------------------------------------------------------------------------- + +export interface GeminiResearchConfig { + /** Timeout for research polling in ms (default: 1200000 = 20 min) */ + researchTimeout?: number; + /** Polling interval in ms (default: 15000 = 15s) */ + pollInterval?: number; + /** Source URLs from trend discovery (included in research prompt) */ + sourceUrls?: string[]; +} + +export interface ResearchStatus { + status: "in_progress" | "completed" | "failed" | "not_found"; + interactionId: string; + report?: string; + error?: string; +} + +// --------------------------------------------------------------------------- +// Lazy AI client init +// --------------------------------------------------------------------------- + +let _ai: GoogleGenAI | null = null; +function getAI(): GoogleGenAI { + if (!_ai) { + const apiKey = process.env.GEMINI_API_KEY || ""; + _ai = new GoogleGenAI({ apiKey }); + } + return _ai; +} + +// --------------------------------------------------------------------------- +// Default prompt template +// --------------------------------------------------------------------------- + +const DEFAULT_PROMPT_TEMPLATE = `Research comprehensively: "{topic}" + +Focus areas: +- What is it and why does it matter for web developers? +- How does it work technically? Include architecture details. +- Key features, capabilities, and limitations +- Comparison with alternatives (include specific metrics where possible) +- Real-world use cases and code examples +- Latest developments and future roadmap + +Target audience: Web developers who want to stay current with modern tools and frameworks. +Include code examples where relevant (TypeScript/JavaScript preferred). +Include specific version numbers, dates, and statistics where available.`; + +// --------------------------------------------------------------------------- +// Submit Research +// --------------------------------------------------------------------------- + +/** + * Submit a research query to Gemini Deep Research. + * Returns the interaction ID for polling. + */ +export async function submitResearch( + topic: string, + config?: GeminiResearchConfig, +): Promise { + const ai = getAI(); + const agent = await getConfigValue( + "pipeline_config", + "deepResearchAgent", + "deep-research-pro-preview-12-2025", + ); + const promptTemplate = await getConfigValue( + "pipeline_config", + "deepResearchPromptTemplate", + DEFAULT_PROMPT_TEMPLATE, + ); + + // Build the research prompt + let prompt = promptTemplate.replace(/\{topic\}/g, topic); + + // Add source URLs if provided (gives the researcher starting points) + const sourceUrls = config?.sourceUrls ?? []; + if (sourceUrls.length > 0) { + prompt += `\n\nStarting reference URLs:\n${sourceUrls.map((u) => `- ${u}`).join("\n")}`; + } + + console.log(`[gemini-research] Submitting research for: "${topic}"`); + console.log(`[gemini-research] Agent: ${agent}`); + + // Submit via Interactions API (background: true for async deep research) + const interaction = await ai.interactions.create({ + agent, + input: prompt, + background: true, + stream: false, + }); + + const interactionId = interaction.id; + if (!interactionId) { + throw new Error("[gemini-research] No interaction ID returned"); + } + + console.log(`[gemini-research] Interaction created: ${interactionId}`); + return interactionId; +} + +// --------------------------------------------------------------------------- +// Poll Research +// --------------------------------------------------------------------------- + +/** + * Check the status of a research interaction. + */ +export async function pollResearch( + interactionId: string, +): Promise { + const ai = getAI(); + + try { + const result = await ai.interactions.get(interactionId, { stream: false }); + + if (result.status === "completed") { + // Extract the report text from outputs + const report = extractTextFromOutputs(result.outputs); + + return { + status: "completed", + interactionId, + report: report || undefined, + }; + } + + if ( + result.status === "failed" || + result.status === "cancelled" || + result.status === "incomplete" + ) { + return { + status: "failed", + interactionId, + error: `Research ${result.status}`, + }; + } + + // 'in_progress' or 'requires_action' + return { status: "in_progress", interactionId }; + } catch (error) { + const message = error instanceof Error ? error.message : String(error); + + // 404 = interaction not found + if (message.includes("404") || message.includes("not found")) { + return { status: "not_found", interactionId, error: message }; + } + + throw error; // Re-throw unexpected errors + } +} + +// --------------------------------------------------------------------------- +// Extract text from Interaction outputs +// --------------------------------------------------------------------------- + +/** + * Extract text content from Interaction outputs array. + * Outputs are Content_2 items (TextContent, ImageContent, etc.). + * We only care about TextContent items. + */ +function extractTextFromOutputs( + outputs: Interactions.Interaction["outputs"], +): string { + if (!outputs || !Array.isArray(outputs)) return ""; + + const textParts: string[] = []; + for (const output of outputs) { + // TextContent has type: 'text' and text: string + if ( + output && + typeof output === "object" && + "type" in output && + output.type === "text" && + "text" in output + ) { + const textContent = output as Interactions.TextContent; + if (textContent.text) { + textParts.push(textContent.text); + } + } + } + + return textParts.join("\n").trim(); +} + +// --------------------------------------------------------------------------- +// Parse Report → ResearchPayload +// --------------------------------------------------------------------------- + +/** + * Parse a markdown research report into a structured ResearchPayload. + * Uses Gemini Flash to extract structured data. + */ +export async function parseResearchReport( + topic: string, + report: string, +): Promise { + const createdAt = new Date().toISOString(); + + // Use Gemini Flash to extract structured data from the report + const extractionPrompt = `You are a content analyst. Extract structured data from this research report for a web development video script. + +RESEARCH REPORT: +${report.slice(0, 30000)} + +Extract and return ONLY valid JSON (no markdown fences): +{ + "briefing": "A 2-3 paragraph executive summary of the key findings", + "sources": [{"title": "Source title", "url": "https://...", "type": "article|docs|youtube|unknown"}], + "talkingPoints": ["Key point 1", "Key point 2", ...], + "codeExamples": [{"snippet": "code here", "language": "typescript", "context": "What this code demonstrates"}], + "comparisonData": [{"leftLabel": "Option A", "rightLabel": "Option B", "rows": [{"left": "feature", "right": "feature"}]}], + "sceneHints": [{"content": "Brief description", "suggestedSceneType": "narration|code|list|comparison|mockup", "reason": "Why this scene type"}] +} + +Rules: +- Extract 5-8 talking points +- Extract ALL code examples from the report (up to 10) +- If the report compares technologies, create comparisonData +- Generate 6-10 scene hints covering the full report +- Sources should include URLs from citations in the report +- Keep the briefing concise but informative`; + + try { + const raw = await generateWithGemini(extractionPrompt); + const cleaned = stripCodeFences(raw); + const parsed = JSON.parse(cleaned) as Record; + + // Build the payload with safe defaults + return { + topic, + notebookId: "", // No notebook — using Gemini Deep Research + createdAt, + completedAt: new Date().toISOString(), + sources: Array.isArray(parsed.sources) + ? (parsed.sources as ResearchSource[]) + : [], + briefing: + typeof parsed.briefing === "string" + ? parsed.briefing + : report.slice(0, 2000), + talkingPoints: Array.isArray(parsed.talkingPoints) + ? (parsed.talkingPoints as string[]) + : [], + codeExamples: Array.isArray(parsed.codeExamples) + ? (parsed.codeExamples as CodeExample[]) + : [], + comparisonData: Array.isArray(parsed.comparisonData) + ? parsed.comparisonData + : undefined, + sceneHints: Array.isArray(parsed.sceneHints) + ? (parsed.sceneHints as SceneHint[]) + : [], + infographicUrls: undefined, // Infographics handled separately + }; + } catch (error) { + console.error( + "[gemini-research] Failed to parse report, using fallback extraction:", + error, + ); + + // Fallback: regex-based extraction (same logic as research.ts helpers) + return buildFallbackPayload(topic, report, createdAt); + } +} + +// --------------------------------------------------------------------------- +// Fallback extraction helpers (regex-based, from research.ts) +// --------------------------------------------------------------------------- + +function classifySourceType( + url: string, +): "youtube" | "article" | "docs" | "unknown" { + if (!url) return "unknown"; + const lower = url.toLowerCase(); + if ( + lower.includes("youtube.com") || + lower.includes("youtu.be") || + lower.includes("youtube") + ) { + return "youtube"; + } + if ( + lower.includes("/docs") || + lower.includes("documentation") || + lower.includes("developer.") || + lower.includes("devdocs") || + lower.includes("mdn") || + lower.includes("spec.") + ) { + return "docs"; + } + if ( + lower.includes("blog") || + lower.includes("medium.com") || + lower.includes("dev.to") || + lower.includes("hashnode") || + lower.includes("article") + ) { + return "article"; + } + return "unknown"; +} + +function extractTalkingPoints(text: string): string[] { + const lines = text.split("\n"); + const points: string[] = []; + + for (const line of lines) { + const cleaned = line.replace(/^[\s]*[-•*\d]+[.)]\s*/, "").trim(); + if (cleaned.length > 20) { + points.push(cleaned); + } + } + + return points.slice(0, 8); +} + +function extractCodeExamples(text: string): CodeExample[] { + const examples: CodeExample[] = []; + const codeBlockRegex = /```(\w*)\n([\s\S]*?)```/g; + let match: RegExpExecArray | null; + + while ((match = codeBlockRegex.exec(text)) !== null) { + const language = match[1] || "typescript"; + const snippet = match[2].trim(); + + const beforeBlock = text.slice(0, match.index); + const contextLines = beforeBlock.split("\n").filter((l) => l.trim()); + const context = + contextLines.length > 0 + ? contextLines[contextLines.length - 1].trim() + : "Code example"; + + examples.push({ snippet, language, context }); + } + + return examples; +} + +function classifyScene( + content: string, +): "narration" | "code" | "list" | "comparison" | "mockup" { + // Code blocks + if ( + /```[\s\S]*?```/.test(content) || + /^\s{2,}(const|let|var|function|import|export|class|def|return)\b/m.test( + content, + ) + ) { + return "code"; + } + // Numbered or bulleted lists (3+ items) + const listMatches = content.match(/^[\s]*[-•*\d]+[.)]\s/gm); + if (listMatches && listMatches.length >= 3) { + return "list"; + } + // Comparison language + if ( + /\bvs\.?\b/i.test(content) || + /\bcompare[ds]?\b/i.test(content) || + /\bdifference[s]?\b/i.test(content) || + /\bpros\s+(and|&)\s+cons\b/i.test(content) + ) { + return "comparison"; + } + // UI / mockup language + if ( + /\b(UI|interface|dashboard|screen|layout|component|widget|button|modal)\b/i.test( + content, + ) + ) { + return "mockup"; + } + return "narration"; +} + +function generateSceneHints(sections: string[]): SceneHint[] { + const hints: SceneHint[] = []; + + for (const section of sections) { + if (!section.trim()) continue; + + const sceneType = classifyScene(section); + const reasonMap: Record = { + code: "Contains code blocks or programming constructs", + list: "Contains a numbered or bulleted list with 3+ items", + comparison: "Contains comparison language (vs, compare, differences)", + mockup: "Describes UI elements or interface components", + narration: "General explanatory content best suited for narration", + }; + + hints.push({ + content: section.slice(0, 500), + suggestedSceneType: sceneType, + reason: reasonMap[sceneType], + }); + } + + return hints; +} + +function extractSourcesFromReport(report: string): ResearchSource[] { + const sources: ResearchSource[] = []; + // Match markdown links: [title](url) + const linkRegex = /\[([^\]]+)\]\((https?:\/\/[^)]+)\)/g; + let match: RegExpExecArray | null; + const seenUrls = new Set(); + + while ((match = linkRegex.exec(report)) !== null) { + const url = match[2]; + if (seenUrls.has(url)) continue; + seenUrls.add(url); + + sources.push({ + title: match[1], + url, + type: classifySourceType(url), + }); + } + + return sources; +} + +function buildFallbackPayload( + topic: string, + report: string, + createdAt: string, +): ResearchPayload { + const talkingPoints = extractTalkingPoints(report); + const codeExamples = extractCodeExamples(report); + const sources = extractSourcesFromReport(report); + + // Generate scene hints from report sections + const sections = report + .split(/\n(?=#{1,3}\s)|\n\n/) + .filter((s) => s.trim().length > 50); + const sceneHints = generateSceneHints(sections); + + return { + topic, + notebookId: "", // No notebook — using Gemini Deep Research + createdAt, + completedAt: new Date().toISOString(), + sources, + briefing: report.slice(0, 2000), + talkingPoints, + codeExamples, + sceneHints, + infographicUrls: undefined, + }; +} + +// --------------------------------------------------------------------------- +// Full pipeline: submit → poll → parse +// --------------------------------------------------------------------------- + +/** + * Run the full Gemini Deep Research pipeline for a topic. + * This is the high-level function called by the ingest route. + */ +export async function conductGeminiResearch( + topic: string, + config?: GeminiResearchConfig, +): Promise { + const timeout = config?.researchTimeout ?? 1_200_000; // 20 min + const pollInterval = config?.pollInterval ?? 15_000; // 15s + + // Submit + const interactionId = await submitResearch(topic, config); + + // Poll until complete + const startTime = Date.now(); + while (Date.now() - startTime < timeout) { + const status = await pollResearch(interactionId); + + if (status.status === "completed" && status.report) { + console.log( + `[gemini-research] Research completed (${Math.round((Date.now() - startTime) / 1000)}s)`, + ); + return parseResearchReport(topic, status.report); + } + + if (status.status === "failed") { + throw new Error(`[gemini-research] Research failed: ${status.error}`); + } + + if (status.status === "not_found") { + throw new Error( + `[gemini-research] Interaction not found: ${interactionId}`, + ); + } + + await new Promise((r) => setTimeout(r, pollInterval)); + } + + throw new Error(`[gemini-research] Research timed out after ${timeout}ms`); +} diff --git a/lib/sponsor/gemini-intent.ts b/lib/sponsor/gemini-intent.ts index 9a9024d4..8c816d8d 100644 --- a/lib/sponsor/gemini-intent.ts +++ b/lib/sponsor/gemini-intent.ts @@ -1,4 +1,4 @@ -import { GoogleGenerativeAI } from '@google/generative-ai' +import { GoogleGenAI } from '@google/genai' import { getConfigValue } from '@/lib/config' const SPONSORSHIP_TIERS = [ @@ -17,10 +17,6 @@ export interface SponsorIntent { urgency: 'low' | 'medium' | 'high' } -/** - * Uses Gemini Flash to parse inbound sponsor emails/messages - * and extract structured data for creating a sponsorLead. - */ export async function extractSponsorIntent(message: string): Promise { const apiKey = process.env.GEMINI_API_KEY if (!apiKey) { @@ -35,8 +31,7 @@ export async function extractSponsorIntent(message: string): Promise