-- Database Schema for Semi-Automatic Moderation System

-- Table: User moderated subreddits cache
CREATE TABLE user_moderated_subreddits (
  id SERIAL PRIMARY KEY,
  user_id VARCHAR(50) NOT NULL,
  subreddit_name VARCHAR(100) NOT NULL,
  permissions TEXT[] DEFAULT '{}',
  subscriber_count INTEGER DEFAULT 0,
  last_updated TIMESTAMP DEFAULT CURRENT_TIMESTAMP,
  UNIQUE(user_id, subreddit_name)
);

-- Table: Subreddit rules and AI analysis
CREATE TABLE subreddit_rules (
  id SERIAL PRIMARY KEY,
  subreddit_name VARCHAR(100) UNIQUE NOT NULL,
  rules_json JSONB NOT NULL,
  description TEXT,
  sidebar_html TEXT,
  subreddit_type VARCHAR(20),
  submission_type VARCHAR(20),
  ai_analysis JSONB,
  analysis_version VARCHAR(10) DEFAULT '1.0',
  last_updated TIMESTAMP DEFAULT CURRENT_TIMESTAMP,
  created_at TIMESTAMP DEFAULT CURRENT_TIMESTAMP
);

-- Table: Moderation queue items cache
CREATE TABLE modqueue_items (
  id SERIAL PRIMARY KEY,
  subreddit_name VARCHAR(100) NOT NULL,
  reddit_id VARCHAR(50) UNIQUE NOT NULL,
  reddit_fullname VARCHAR(50) NOT NULL,
  item_type VARCHAR(20) NOT NULL CHECK (item_type IN ('submission', 'comment')),
  author VARCHAR(100),
  title TEXT,
  content TEXT,
  url TEXT,
  permalink VARCHAR(500),
  score INTEGER DEFAULT 0,
  num_reports INTEGER DEFAULT 0,
  user_reports JSONB DEFAULT '[]',
  mod_reports JSONB DEFAULT '[]',
  created_utc INTEGER NOT NULL,
  fetched_at TIMESTAMP DEFAULT CURRENT_TIMESTAMP,
  status VARCHAR(20) DEFAULT 'pending' CHECK (status IN ('pending', 'analyzed', 'actioned')),
  FOREIGN KEY (subreddit_name) REFERENCES subreddit_rules(subreddit_name)
);

-- Table: AI analysis results
CREATE TABLE ai_analysis_results (
  id SERIAL PRIMARY KEY,
  modqueue_item_id INTEGER NOT NULL,
  analysis_type VARCHAR(50) DEFAULT 'rule_violation',
  violations_found JSONB DEFAULT '[]',
  confidence_score DECIMAL(3,2) CHECK (confidence_score >= 0 AND confidence_score <= 10),
  suggested_action VARCHAR(20) CHECK (suggested_action IN ('approve', 'remove', 'spam', 'manual_review')),
  reasoning TEXT,
  severity VARCHAR(10) CHECK (severity IN ('low', 'medium', 'high')),
  removal_reason TEXT,
  requires_human_review BOOLEAN DEFAULT FALSE,
  additional_actions JSONB DEFAULT '[]',
  analysis_model VARCHAR(50) DEFAULT 'claude-sonnet-4-20250514',
  analysis_prompt_hash VARCHAR(64),
  created_at TIMESTAMP DEFAULT CURRENT_TIMESTAMP,
  FOREIGN KEY (modqueue_item_id) REFERENCES modqueue_items(id) ON DELETE CASCADE
);

-- Table: Moderation actions taken
CREATE TABLE moderation_actions (
  id SERIAL PRIMARY KEY,
  modqueue_item_id INTEGER NOT NULL,
  user_id VARCHAR(50) NOT NULL,
  action_type VARCHAR(20) NOT NULL CHECK (action_type IN ('approve', 'remove', 'spam', 'manual_review')),
  action_reason TEXT,
  was_ai_suggested BOOLEAN DEFAULT FALSE,
  ai_confidence_score DECIMAL(3,2),
  moderator_note TEXT,
  action_timestamp TIMESTAMP DEFAULT CURRENT_TIMESTAMP,
  reddit_response JSONB,
  FOREIGN KEY (modqueue_item_id) REFERENCES modqueue_items(id)
);

-- Table: AI analysis performance tracking
CREATE TABLE ai_performance_metrics (
  id SERIAL PRIMARY KEY,
  date DATE NOT NULL,
  subreddit_name VARCHAR(100),
  total_analyses INTEGER DEFAULT 0,
  correct_predictions INTEGER DEFAULT 0,
  false_positives INTEGER DEFAULT 0,
  false_negatives INTEGER DEFAULT 0,
  avg_confidence_score DECIMAL(4,3),
  human_overrides INTEGER DEFAULT 0,
  processing_time_ms INTEGER,
  api_cost_usd DECIMAL(10,4),
  model_version VARCHAR(50),
  created_at TIMESTAMP DEFAULT CURRENT_TIMESTAMP
);

-- Table: Batch analysis jobs
CREATE TABLE batch_analysis_jobs (
  id SERIAL PRIMARY KEY,
  subreddit_name VARCHAR(100) NOT NULL,
  user_id VARCHAR(50) NOT NULL,
  job_status VARCHAR(20) DEFAULT 'pending' CHECK (job_status IN ('pending', 'running', 'completed', 'failed')),
  total_items INTEGER DEFAULT 0,
  processed_items INTEGER DEFAULT 0,
  high_confidence_actions INTEGER DEFAULT 0,
  manual_review_items INTEGER DEFAULT 0,
  job_config JSONB,
  error_message TEXT,
  started_at TIMESTAMP,
  completed_at TIMESTAMP,
  created_at TIMESTAMP DEFAULT CURRENT_TIMESTAMP
);

-- Table: User preferences for moderation
CREATE TABLE user_moderation_preferences (
  id SERIAL PRIMARY KEY,
  user_id VARCHAR(50) UNIQUE NOT NULL,
  auto_approve_threshold DECIMAL(3,2) DEFAULT 8.5,
  auto_remove_threshold DECIMAL(3,2) DEFAULT 8.0,
  require_confirmation BOOLEAN DEFAULT TRUE,
  preferred_batch_size INTEGER DEFAULT 10,
  notification_settings JSONB DEFAULT '{}',
  custom_removal_templates JSONB DEFAULT '[]',
  created_at TIMESTAMP DEFAULT CURRENT_TIMESTAMP,
  updated_at TIMESTAMP DEFAULT CURRENT_TIMESTAMP
);

-- Indexes for performance
CREATE INDEX idx_modqueue_items_subreddit_status ON modqueue_items(subreddit_name, status);
CREATE INDEX idx_modqueue_items_created_utc ON modqueue_items(created_utc);
CREATE INDEX idx_ai_analysis_confidence ON ai_analysis_results(confidence_score);
CREATE INDEX idx_moderation_actions_timestamp ON moderation_actions(action_timestamp);
CREATE INDEX idx_subreddit_rules_updated ON subreddit_rules(last_updated);
CREATE INDEX idx_ai_performance_date_subreddit ON ai_performance_metrics(date, subreddit_name);

-- Triggers for updated timestamps
CREATE OR REPLACE FUNCTION update_updated_at_column()
RETURNS TRIGGER AS $$
BEGIN
    NEW.updated_at = CURRENT_TIMESTAMP;
    RETURN NEW;
END;
$$ language 'plpgsql';

CREATE TRIGGER update_user_prefs_updated_at 
    BEFORE UPDATE ON user_moderation_preferences 
    FOR EACH ROW EXECUTE FUNCTION update_updated_at_column();

-- Views for common queries
CREATE VIEW moderation_queue_summary AS
SELECT 
    m.subreddit_name,
    COUNT(*) as total_items,
    COUNT(CASE WHEN m.status = 'pending' THEN 1 END) as pending_items,
    COUNT(CASE WHEN a.suggested_action = 'remove' THEN 1 END) as suggested_removals,
    COUNT(CASE WHEN a.requires_human_review = true THEN 1 END) as manual_review_needed,
    AVG(a.confidence_score) as avg_confidence
FROM modqueue_items m
LEFT JOIN ai_analysis_results a ON m.id = a.modqueue_item_id
GROUP BY m.subreddit_name;

CREATE VIEW ai_accuracy_report AS
SELECT 
    DATE(ma.action_timestamp) as action_date,
    ma.action_type,
    COUNT(*) as total_actions,
    COUNT(CASE WHEN ma.was_ai_suggested = true THEN 1 END) as ai_suggested_actions,
    AVG(CASE WHEN ma.was_ai_suggested = true THEN ma.ai_confidence_score END) as avg_ai_confidence
FROM moderation_actions ma
WHERE ma.action_timestamp >= CURRENT_DATE - INTERVAL '30 days'
GROUP BY DATE(ma.action_timestamp), ma.action_type
ORDER BY action_date DESC;

-- Sample data insertion (for testing)
INSERT INTO subreddit_rules (subreddit_name, rules_json, description) VALUES 
('test', '[{"short_name": "Rule 1", "description": "Be civil and respectful"}]', 'A test subreddit'),
('mildlyinteresting', '[{"short_name": "Rule 1", "description": "Posts must be original content"}]', 'For mildly interesting content');

-- Initial user preferences
INSERT INTO user_moderation_preferences (user_id) VALUES ('default_user');