// =============================================================================
// HeadlineSift.com — Prisma Schema (SQLite)
// =============================================================================
//
// ## Design Principles
//
// 1. **SQLite-first, PostgreSQL-ready:**
//    - All status/type fields use String (SQLite has no enum support).
//    - Valid values are documented in comments; these become native PostgreSQL
//      enums on migration.
//    - JSON payloads are stored as String; migrate to JSONB on PostgreSQL.
//    - No provider-specific type annotations (@db.Text, @db.JsonB, etc.).
//
// 2. **Index strategy:**
//    - Filtering & listing:   status, type, countryId, categoryId
//    - Dedup detection:        urlHash, titleHash, contentHash
//    - Ranking:                rankScore, impactScore, confidenceScore
//    - Job processing:         status + runAfter (polling pattern)
//    - Fetch logs:             sourceId + startedAt (per-source history)
//    - Foreign keys:           all FK columns indexed for JOIN performance
//
// 3. **ID convention:**
//    - All IDs use cuid() — the safest cross-provider default.
//    - Application code can override with its own ID generation when needed.
//
// 4. **Relations:**
//    - All relations use explicit field references.
//    - No cascade deletes on critical data (articles, stories) — these are
//      handled by application-level soft-deletion via status fields.
//    - Join tables use composite unique constraints.
//
// =============================================================================

generator client {
  provider = "prisma-client-js"
}

datasource db {
  provider = "sqlite"
  url      = env("DATABASE_URL")
}

// =============================================================================
// 1. AdminUser — Administrative dashboard users
// =============================================================================

model AdminUser {
  id           String   @id @default(cuid())
  name         String
  email        String   @unique
  passwordHash String
  role         String   @default("EDITOR") // SUPER_ADMIN | ADMIN | EDITOR | VIEWER
  status       String   @default("ACTIVE") // ACTIVE | INACTIVE | SUSPENDED
  createdAt    DateTime @default(now())
  updatedAt    DateTime @updatedAt

  // Future: sessions, password reset tokens, login audit log

  @@index([role])
  @@index([status])
  @@map("admin_users")
}

// =============================================================================
// 2. Country — Geographic regions for news filtering
// =============================================================================

model Country {
  id             String   @id @default(cuid())
  name           String
  code           String   @unique // ISO 3166-1 alpha-2 (e.g., "US", "GB", "JP")
  region         String?  // "North America" | "Europe" | "Asia Pacific" | etc.
  defaultLanguage String  @default("en")
  isGlobal       Boolean  @default(false) // true = "World" / global bucket
  status         String   @default("ACTIVE") // ACTIVE | INACTIVE
  displayOrder   Int      @default(0)
  createdAt      DateTime @default(now())
  updatedAt      DateTime @updatedAt

  // Relations
  sourceMappings SourceMapping[]
  rawArticles    RawArticle[]
  storyClusters  StoryCluster[]
  rankingRules   RankingRule[]

  @@index([status])
  @@index([displayOrder])
  @@map("countries")
}

// =============================================================================
// 3. Category — Topic categories with tiered visibility
// =============================================================================

model Category {
  id             String   @id @default(cuid())
  name           String
  slug           String   @unique // URL-safe slug (e.g., "technology", "world-politics")
  level          String   @default("BOTH") // GLOBAL | COUNTRY | BOTH
  description    String?
  aiSafetyLevel  String   @default("MEDIUM") // LOW | MEDIUM | HIGH (how safe to auto-generate AI summaries)
  maxPublicStories Int    @default(50) // Max stories shown on public pages
  status         String   @default("ACTIVE") // ACTIVE | INACTIVE
  displayOrder   Int      @default(0)
  createdAt      DateTime @default(now())
  updatedAt      DateTime @updatedAt

  // Relations
  sourceMappings SourceMapping[]
  rawArticles    RawArticle[]
  storyClusters  StoryCluster[]
  rankingRules   RankingRule[]

  @@index([level])
  @@index([status])
  @@index([displayOrder])
  @@map("categories")
}

// =============================================================================
// 4. Source — News sources (RSS feeds, APIs, manual entries)
// =============================================================================

model Source {
  id                  String    @id @default(cuid())
  name                String
  websiteUrl          String
  sourceType          String    @default("RSS") // RSS | API | MANUAL | SCRAPE
  feedUrl             String?   // RSS/Atom feed URL
  apiUrl              String?   // REST API endpoint
  language            String    @default("en")
  trustScore          Float     @default(50.0) // 0-100, higher = more trusted
  reliabilityScore    Float     @default(50.0) // 0-100, historical reliability
  duplicateRate       Float     @default(0.0) // 0-100, % of articles that were dupes
  clickbaitRate       Float     @default(0.0) // 0-100, % flagged as clickbait
  isOfficial          Boolean   @default(false) // Official government/institutional source?
  status              String    @default("ACTIVE") // ACTIVE | INACTIVE | ERROR | RATE_LIMITED
  fetchFrequencyMinutes Int     @default(15)
  lastFetchedAt       DateTime?
  lastFetchStatus     String?   // SUCCESS | PARTIAL | FAILED
  consecutiveFailures Int       @default(0)
  lastErrorMessage    String?
  usageNotes          String?
  createdAt           DateTime  @default(now())
  updatedAt           DateTime  @updatedAt

  // Relations
  sourceMappings SourceMapping[]
  rawArticles    RawArticle[]
  storyArticles  StoryArticle[]
  fetchLogs      FetchLog[]

  @@index([sourceType])
  @@index([status])
  @@index([lastFetchedAt])
  @@index([consecutiveFailures])
  @@index([trustScore])
  @@index([status, lastFetchedAt])   // Polling: find active sources due for fetch
  @@index([status, consecutiveFailures]) // Monitoring: find problematic sources
  @@map("sources")
}

// =============================================================================
// 5. SourceMapping — Many-to-many join: Source <-> Country <-> Category
// =============================================================================
// Each mapping assigns a source to a country+category pair with a priority.
// A single source (e.g., BBC) can map to:
//   (GB, general), (GB, politics), (US, general), (WORLD, general)

model SourceMapping {
  id         String   @id @default(cuid())
  sourceId   String
  countryId  String
  categoryId String
  priority   Int      @default(0) // Higher = preferred source for this country+category
  status     String   @default("ACTIVE") // ACTIVE | INACTIVE
  createdAt  DateTime @default(now())
  updatedAt  DateTime @updatedAt

  // Relations
  source   Source   @relation(fields: [sourceId], references: [id])
  country  Country  @relation(fields: [countryId], references: [id])
  category Category @relation(fields: [categoryId], references: [id])

  @@unique([sourceId, countryId, categoryId])
  @@index([sourceId])
  @@index([countryId])
  @@index([categoryId])
  @@index([priority])
  @@map("source_mappings")
}

// =============================================================================
// 6. RawArticle — Unprocessed articles fetched from sources
// =============================================================================
// This is the "inbox" table. Articles land here raw, then get clustered.

model RawArticle {
  id           String   @id @default(cuid())
  sourceId     String
  countryId    String?
  categoryId   String?
  title        String
  originalUrl  String
  canonicalUrl String?  // Normalized URL (tracking params stripped)
  author       String?
  publishedAt  DateTime
  fetchedAt    DateTime @default(now())
  rawSnippet   String?  // Short excerpt or first paragraph
  rawContent   String?  // Full article text (when available)
  language     String   @default("en")
  titleHash    String   // SHA-256 of normalized title (for dedup)
  urlHash      String   // SHA-256 of normalized URL (for dedup)
  contentHash  String?  // SHA-256 of rawContent (for content-level dedup)
  clustered    Boolean  @default(false) // Has this been assigned to a StoryCluster?
  status       String   @default("PENDING") // PENDING | PROCESSED | SKIPPED | ERROR
  createdAt    DateTime @default(now())
  updatedAt    DateTime @updatedAt

  // Relations
  source        Source         @relation(fields: [sourceId], references: [id])
  country       Country?       @relation(fields: [countryId], references: [id])
  category      Category?      @relation(fields: [categoryId], references: [id])
  storyArticles StoryArticle[]

  @@unique([urlHash])
  @@index([titleHash])
  @@index([contentHash])
  @@index([sourceId])
  @@index([countryId])
  @@index([categoryId])
  @@index([publishedAt])
  @@index([fetchedAt])
  @@index([clustered])
  @@index([status])
  @@index([language])
  @@index([sourceId, fetchedAt])    // Articles from source, newest first
  @@index([status, clustered])      // Unclustered articles pending processing
  @@index([status, publishedAt])    // Filter articles by status ordered by date
  @@map("raw_articles")
}

// =============================================================================
// 7. StoryCluster — Groups of articles covering the same story
// =============================================================================

model StoryCluster {
  id                   String   @id @default(cuid())
  canonicalTitle       String
  categoryId           String?
  countryId            String?   // null = global story
  firstSeenAt          DateTime
  lastSeenAt           DateTime  @default(now())
  sourceCount          Int       @default(1) // How many unique sources covered this
  trustedSourceCount   Int       @default(0) // Subset from high-trust sources
  officialSourcePresent Boolean  @default(false)
  rankScore            Float     @default(0.0) // Computed ranking score (0-100)
  impactScore          Float     @default(0.0) // Importance/impact weight
  confidenceScore      Float     @default(0.0) // How confident the cluster is correct
  status               String    @default("DRAFT") // DRAFT | NEEDS_REVIEW | APPROVED | PUBLISHED | HIDDEN | REJECTED
  createdAt            DateTime  @default(now())
  updatedAt            DateTime  @updatedAt

  // Relations
  category        Category?         @relation(fields: [categoryId], references: [id])
  country         Country?          @relation(fields: [countryId], references: [id])
  storyArticles   StoryArticle[]
  aiAnalysis      AiStoryAnalysis?

  @@index([categoryId])
  @@index([countryId])
  @@index([status])
  @@index([rankScore])
  @@index([impactScore])
  @@index([firstSeenAt])
  @@index([lastSeenAt])
  @@index([sourceCount])
  @@index([status, rankScore])      // Top published stories by rank
  @@index([countryId, status])      // Stories filtered by country + status
  @@index([categoryId, status])     // Stories filtered by category + status
  @@map("story_clusters")
}

// =============================================================================
// 8. StoryArticle — Join table: StoryCluster <-> RawArticle
// =============================================================================

model StoryArticle {
  id              String   @id @default(cuid())
  storyClusterId  String
  rawArticleId    String
  sourceId        String
  similarityScore Float    @default(0.0) // 0-1 similarity to cluster centroid
  relationType    String   @default("SAME_STORY") // EXACT_DUPLICATE | SAME_STORY | RELATED_STORY | DIFFERENT_STORY
  createdAt       DateTime @default(now())

  // Relations
  storyCluster StoryCluster @relation(fields: [storyClusterId], references: [id])
  rawArticle   RawArticle   @relation(fields: [rawArticleId], references: [id])
  source       Source       @relation(fields: [sourceId], references: [id])

  @@unique([storyClusterId, rawArticleId])
  @@index([storyClusterId])
  @@index([rawArticleId])
  @@index([sourceId])
  @@index([relationType])
  @@map("story_articles")
}

// =============================================================================
// 9. AiStoryAnalysis — AI-generated analysis attached to a story cluster
// =============================================================================

model AiStoryAnalysis {
  id                        String   @id @default(cuid())
  storyClusterId            String   @unique // One analysis per cluster
  summary                   String?  // AI-generated summary (2-3 sentences)
  whyItMatters              String?  // Context: why this story is significant
  positiveImpact            String?  // Who benefits / what improves
  negativeImpact            String?  // Who is harmed / what worsens
  affectedGroupsJson        String?  // JSON array: [{group: "farmers", impact: "high", direction: "negative"}, ...]
  impactLevel               String?  // LOW | MEDIUM | HIGH — overall societal impact
  confidenceLevel           String?  // LOW | MEDIUM | HIGH — AI confidence in this analysis
  confidenceReason          String?  // Why the confidence level was chosen
  neutralityCheck           String?  // Bias/neutrality assessment
  riskWarning               String?  // Content warning or sensitivity flag
  suggestedDisplayHeadline  String?  // AI-suggested headline for the cluster
  modelName                 String?  // e.g., "claude-opus-4-8", "gpt-4o"
  inputSourceCount          Int      @default(0) // How many articles were fed in
  generatedAt               DateTime @default(now())
  humanEdited               Boolean  @default(false)
  createdAt                 DateTime @default(now())
  updatedAt                 DateTime @updatedAt

  // Relations
  storyCluster StoryCluster @relation(fields: [storyClusterId], references: [id])

  @@index([impactLevel])
  @@index([confidenceLevel])
  @@index([generatedAt])
  @@map("ai_story_analyses")
}

// =============================================================================
// 10. FetchLog — Audit trail for source fetch operations
// =============================================================================

model FetchLog {
  id              String    @id @default(cuid())
  sourceId        String
  startedAt       DateTime  @default(now())
  endedAt         DateTime?
  status          String    @default("STARTED") // STARTED | SUCCESS | PARTIAL | FAILED
  articlesFound   Int       @default(0)
  articlesSaved   Int       @default(0) // New articles (not duplicates)
  duplicatesFound Int       @default(0) // Articles that matched existing urlHash
  errorMessage    String?
  createdAt       DateTime  @default(now())

  // Relations
  source Source @relation(fields: [sourceId], references: [id])

  @@index([sourceId])
  @@index([startedAt])
  @@index([status])
  @@index([sourceId, startedAt]) // Per-source fetch history, newest first
  @@map("fetch_logs")
}

// =============================================================================
// 11. RankingRule — Per-category (and optionally per-country) ranking config
// =============================================================================

model RankingRule {
  id                   String   @id @default(cuid())
  categoryId           String
  countryId            String?  // null = applies to all countries for this category
  freshnessWeight      Float    @default(1.0) // Multiplier for recency
  sourceTrustWeight    Float    @default(1.0) // Multiplier for trustScore
  sourceCountWeight    Float    @default(1.0) // Multiplier for coverage breadth
  officialSourceBoost  Float    @default(1.5) // Extra boost when officialSourcePresent
  impactWeight         Float    @default(1.0) // Multiplier for impactScore
  duplicatePenalty     Float    @default(0.8) // Penalty for high duplicateRate
  lowConfidencePenalty Float    @default(0.7) // Penalty for low confidenceScore
  clickbaitPenalty     Float    @default(0.5) // Penalty for clickbaitRate
  maxStories           Int      @default(100) // Cap public stories for this rule
  status               String   @default("ACTIVE") // ACTIVE | INACTIVE | DRAFT
  createdAt            DateTime @default(now())
  updatedAt            DateTime @updatedAt

  // Relations
  category Category @relation(fields: [categoryId], references: [id])
  country  Country? @relation(fields: [countryId], references: [id])

  @@unique([categoryId, countryId])
  @@index([status])
  @@map("ranking_rules")
}

// =============================================================================
// 12. AppSetting — Application configuration key-value store
// =============================================================================

model AppSetting {
  id        String   @id @default(cuid())
  key       String   @unique
  value     String   // Store as string; parse JSON for complex values
  createdAt DateTime @default(now())
  updatedAt DateTime @updatedAt

  @@map("app_settings")
}

// =============================================================================
// 13. Job — Background job queue (in-process, no Redis/BullMQ)
// =============================================================================

model Job {
  id           String    @id @default(cuid())
  type         String    // FETCH_SOURCE | FETCH_ALL_SOURCES | CLUSTER_ARTICLES | RANK_STORIES | GENERATE_AI_ANALYSIS | PUBLISH_APPROVED_STORIES | BACKUP_DATABASE
  status       String    @default("PENDING") // PENDING | RUNNING | COMPLETED | FAILED | CANCELLED
  payloadJson  String?   // JSON payload with job-specific parameters
  attempts     Int       @default(0)
  maxAttempts  Int       @default(3)
  runAfter     DateTime  @default(now()) // Earliest time this job should execute
  lockedAt     DateTime? // When a worker claimed this job
  lockedBy     String?   // Worker identifier that claimed this job
  startedAt    DateTime?
  completedAt  DateTime?
  errorMessage String?
  createdAt    DateTime  @default(now())
  updatedAt    DateTime  @updatedAt

  @@index([status])
  @@index([type])
  @@index([runAfter])
  @@index([lockedBy])
  @@index([status, runAfter])       // Primary polling query: pending + due
  @@index([type, status])           // Filter by job type and status
  @@map("jobs")
}

// =============================================================================
// 14. SystemLog — Application logging
// =============================================================================

model SystemLog {
  id           String   @id @default(cuid())
  level        String   @default("INFO") // DEBUG | INFO | WARN | ERROR | FATAL
  source       String   @default("app") // Component name: "fetcher", "clusterer", "ranker", "ai", "api"
  message      String
  metadataJson String?  // Optional JSON with structured context
  createdAt    DateTime @default(now())

  @@index([level])
  @@index([source])
  @@index([createdAt])
  @@map("system_logs")
}

// =============================================================================
// PostgreSQL Migration Notes
// =============================================================================
//
// When migrating to PostgreSQL, make these changes:
//
// 1. CREATE ENUM types for each String status field:
//    CREATE TYPE admin_role AS ENUM ('SUPER_ADMIN', 'ADMIN', 'EDITOR', 'VIEWER');
//    CREATE TYPE entity_status AS ENUM ('ACTIVE', 'INACTIVE', ...);
//    CREATE TYPE source_type AS ENUM ('RSS', 'API', 'MANUAL', 'SCRAPE');
//    CREATE TYPE story_status AS ENUM ('DRAFT','NEEDS_REVIEW','APPROVED','PUBLISHED','HIDDEN','REJECTED');
//    CREATE TYPE job_type AS ENUM ('FETCH_SOURCE', 'FETCH_ALL_SOURCES', ...);
//    ...etc.
//
// 2. Replace String with the enum type on each field.
//
// 3. Change JSON string fields to JSONB:
//    affectedGroupsJson  String?  → affectedGroups  Json?
//    payloadJson         String?  → payload         Json?
//    metadataJson        String?  → metadata        Json?
//
// 4. Add explicit index names (Prisma supports @@index([...], name: "ix_...") in PG).
//
// 5. Consider adding full-text search vectors (tsvector) on title and content fields.
//
// 6. Add connection pooling (PgBouncer or Prisma's built-in pooling).
// =============================================================================
