#!/bin/bash
#
# ConvertBot - Automated document to markdown converter
# Watches convertbot-inbox, converts files, moves to appropriate folders
#

set -e

# Paths
INBOX="$HOME/Desktop/convertbot/convertbot-inbox"
OUTBOX="$HOME/Desktop/convertbot/convertbot-outbox"
DONE="$HOME/Desktop/convertbot/convertbot-done"
LOGFILE="$HOME/Desktop/convertbot/convertbot.log"

# Mode flags
LEGAL_MODE=${LEGAL_MODE:-0}
CLEAN_MODE=${CLEAN_MODE:-1}  # Clean mode is DEFAULT (1=on, 0=off)
PDF_MODE=${PDF_MODE:-0}      # PDF output mode (0=md, 1=pdf)

# Detect Windows and set memory-safe defaults
IS_WINDOWS=false
DOCLING_BATCH_SIZE=""
DOCLING_THREADS=""
if [[ "$OSTYPE" == "msys" || "$OSTYPE" == "cygwin" || "$OSTYPE" == "win32" ]]; then
    IS_WINDOWS=true
    # Windows memory-safe defaults
    DOCLING_BATCH_SIZE="--page-batch-size 1"
    DOCLING_THREADS="--num-threads 1"
fi

# Ensure PATH includes our tools
export PATH="$HOME/.local/bin:/opt/homebrew/bin:$PATH"

# Colors for output
RED='\033[0;31m'
GREEN='\033[0;32m'
YELLOW='\033[1;33m'
BLUE='\033[0;34m'
CYAN='\033[0;36m'
MAGENTA='\033[0;35m'
NC='\033[0m' # No Color

# Logging function
log() {
    echo "[$(date '+%Y-%m-%d %H:%M:%S')] $1" | tee -a "$LOGFILE"
}

# Show help
show_help() {
    cat << 'EOF'
ConvertBot - Automated Document Converter

USAGE:
    convertbot                  # Process all files in inbox
    convertbot --watch          # Watch inbox continuously
    convertbot --status         # Show status and recent activity
    convertbot --legal          # Enable legal mode (explicit strikethrough)
    convertbot --fast           # Skip line break cleanup (fast mode)
    convertbot --pdf            # Output as PDF instead of Markdown
    convertbot --clean-overlap  # Remove page boundary overlaps from existing file
    convertbot --batch <dir>    # Convert all files in folder recursively
    convertbot --help           # Show this help

FOLDERS:
    ~/Desktop/convertbot/convertbot-inbox   - Drop files here to convert
    ~/Desktop/convertbot/convertbot-outbox  - Converted .md files appear here
    ~/Desktop/convertbot/convertbot-done    - Original files moved here after conversion

SUPPORTED FORMATS:
    PDF, DOCX, DOC, PPTX, PPT, XLSX, XLS, CSV, HTML, HTM, TXT, MD

EXAMPLES:
    # Convert all files in inbox
    convertbot

    # Watch inbox continuously (converts new files automatically)
    convertbot --watch

    # Legal mode - makes strikethrough explicit for LLMs
    convertbot --legal
    LEGAL_MODE=1 convertbot --watch

    # Fast mode - use markitdown instead of docling (quicker)
    convertbot --fast
    # Or rename: document_fast.pdf

    # Check recent activity
    convertbot --status

LEGAL MODE (for legal documents):
    Converts strikethrough formatting ~~text~~ to explicit [DELETED: text]
    This makes deletions clearer for LLM interpretation.
    
    Enable by:
    - Using --legal flag: convertbot --legal
    - Renaming file with "_legal" suffix: contract_legal.docx
    - Setting environment variable: LEGAL_MODE=1 convertbot

PDF OUTPUT MODE:
    Convert files TO PDF format instead of Markdown:
    - Markdown → PDF:     Clean PDF document
    - Word → PDF:         Preserves formatting
    - HTML → PDF:         Renders as print-ready PDF
    
    Enable by:
    - Using --pdf flag:   convertbot --pdf
    - Renaming file with "_pdf" suffix: report_pdf.md
    
    Requirements: pandoc (already installed)

CLEAN MODE (DEFAULT for PDFs):
    Automatically fixes unnecessary line breaks in PDF conversions:
    - Joining lines that are part of the same paragraph
    - Preserving paragraph breaks
    - Removing hyphenation at line breaks
    - Removing repeated copyright notices
    - Removing base64 image data
    - Removing standalone page numbers
    
    Clean mode is now ON by default for all PDFs!
    
    To skip cleanup (old behavior):
    - Use --fast flag: convertbot --fast
    - Renaming file with "_fast" suffix: document_fast.pdf
    - Setting environment variable: CLEAN_MODE=0 convertbot

PDF INPUT OPTIONS (PDF → Markdown):
    Default (docling):         BEST structure preservation (tables, headers) + cleanup
    _fast in filename:         Use markitdown - faster, less structure preservation
    _complex in filename:      Use marker AI - best for complex formulas
    _legal in filename:        Legal mode for strikethrough
    Combine:                   document_legal.pdf (docling + legal + cleanup)

PDF OUTPUT OPTIONS (→ PDF):
    Use --pdf flag:            Convert to PDF instead of Markdown
    Rename with _pdf suffix:   document_pdf.md → outputs PDF
    Supported inputs:          .md, .docx, .html, .txt
    Examples:
      convertbot --pdf          # Convert all files to PDF
      report_pdf.docx           # Converts to report.pdf

WINDOWS MEMORY OPTIMIZATION:
    On Windows, docling automatically uses memory-safe settings:
    - --page-batch-size 1 (process 1 page at a time)
    - --num-threads 1 (single-threaded)
    - This reduces RAM usage from 2-4 GB to ~800 MB - 1.2 GB
    
    If you still have memory issues:
    - Use _fast suffix for markitdown (uses ~400 MB)
    - Close other applications while converting
    - Increase Windows pagefile/swap size

BATCH/FOLDER CONVERSION:
    Convert all documents in a folder recursively:
    
    convertbot --batch ~/Documents/my-folder
    
    This will:
    - Recursively find all supported files in the folder
    - Convert each file to Markdown
    - Create output in same folder structure with "_converted" suffix
    - Failed conversions get "_failed" suffix
    - Unsupported file types are skipped
    
    Example output structure:
    ~/Documents/my-folder_converted/
        ├── report.pdf → report.md
        ├── contract.docx → contract.md
        ├── data.xlsx → data.md
        └── subfolder/
            └── notes.pdf → notes.md (or notes_failed.md if conversion failed)

TIPS:
    - Default PDF conversion uses docling (best structure preservation)
    - For complex PDFs with tables, use "_complex" in filename (marker AI)
    - For faster conversion: use "_fast" in filename (markitdown)
    - For legal documents with strikethrough, use "_legal" in filename
    - Use --batch for processing entire folders
    - Check convertbot.log on Desktop for detailed history
EOF
}

# Get file extension
get_extension() {
    echo "${1##*.}" | tr '[:upper:]' '[:lower:]'
}

# Check if file should use marker (AI conversion)
use_marker_for_file() {
    local filename="$1"
    if [[ "$filename" =~ (_complex|_paper|_table|_academic) ]]; then
        return 0
    fi
    return 1
}

# Check if file should use legal mode
use_legal_mode() {
    local filename="$1"
    if [ "$LEGAL_MODE" = "1" ] || [[ "$filename" =~ _legal ]]; then
        return 0
    fi
    return 1
}

# Check if file should skip clean mode (keep original formatting)
# Clean mode is now DEFAULT for PDFs - use _fast to skip
use_fast_mode() {
    local filename="$1"
    if [[ "$filename" =~ _fast ]]; then
        return 0  # Fast mode - skip cleanup
    fi
    return 1  # Default - do cleanup
}

# Check if file should output as PDF
use_pdf_mode() {
    local filename="$1"
    if [ "$PDF_MODE" = "1" ] || [[ "$filename" =~ _pdf ]]; then
        return 0
    fi
    return 1
}

# Apply legal mode formatting (make strikethrough explicit)
apply_legal_formatting() {
    local file="$1"
    local temp_file="${file}.tmp"
    
    # Replace ~~text~~ with [DELETED: text]
    # Handle nested tildes carefully
    sed -E 's/~~([^~]|~[^~])*~~/[DELETED: &]/g; s/~~//g; s/\[DELETED: ~~/[DELETED: /g; s/~~\]/]/g' "$file" > "$temp_file"
    
    # Alternative approach using Perl for better handling
    perl -i.bak -pe 's/~~(.*?)~~/[DELETED: $1]/g' "$file" 2>/dev/null || \
        sed -E 's/~~([^~]+)~~/[DELETED: \1]/g' "$file" > "$temp_file" && mv "$temp_file" "$file"
    
    rm -f "${file}.bak" "$temp_file" 2>/dev/null || true
}

# Clean up line breaks in PDF text and remove headers/footers/page numbers
cleanup_line_breaks() {
    local file="$1"
    local temp_file="${file}.tmp"
    
    if [ ! -f "$file" ]; then
        return
    fi
    
    echo -e "${MAGENTA}  → Cleaning up formatting${NC}"
    
    # Use Perl for sophisticated cleanup
    perl -0777 -pe '
        # Remove base64 image data (data:image/png;base64,...)
        s/!\[Image\]\(data:image\/[^;]+;base64,[A-Za-z0-9+\/=]+\)//g;
        
        # Remove standalone page numbers (lines with just 1-4 digit numbers)
        s/\n\s*\d{1,4}\s*\n/\n/g;
        
        # Remove common footer patterns (e.g., "Page 5 of 10", "5/10", "- 5 -")
        s/\n\s*Page\s+\d+\s*(of\s+\d+)?\s*\n/\n/gi;
        s/\n\s*-?\s*\d+\s*\/\s*\d+\s*-?\s*\n/\n/g;
        s/\n\s*-\s*\d+\s*-\s*\n/\n/g;
        
        # First, handle hyphenation at line breaks (word- \n word -> wordword)
        s/(\w)-\s*\n\s*(\w)/$1$2/g;
        
        # Join lines within paragraphs (lines that dont end with sentence-ending punctuation)
        # Dont join if next line is a header, list item, or blank
        while (s/([^\n\.\?\!])\n(?!\n)(?!#)(?!\s*[-*])(?!\s*\d+\.)([A-Z][^\n])/$1 $2/g) {}
        
        # Also join lines that are clearly mid-sentence
        while (s/([^\.\?\!\n])\n([a-z][^\n]{0,50})\n/$1 $2\n/g) {}
        
        # Clean up excessive blank lines (more than 2 consecutive newlines)
        s/\n{3,}/\n\n/g;
        
        # Clean up multiple spaces
        s/  +/ /g;
        
        # Ensure blank line before and after headers
        s/([^\n])\n(#)/$1\n\n$2/g;
        s/(#{1,6}[^\n]+)\n([^\n#])/$1\n\n$2/g;
    ' "$file" > "$temp_file"
    
    if [ -s "$temp_file" ]; then
        mv "$temp_file" "$file"
    else
        rm -f "$temp_file"
    fi
    
    # Second pass: remove repeated copyright notices and footers
    # Handle multi-line copyright notices specially
    perl -i -0777 -pe '
        # Remove ALL copyright notices except the very first one
        my $first = 1;
        s/(?:##?[ \t]*)?Copyright[ \t]+©[ \t]*202[0-9][^\n]*(?:\n+[ \t]*All rights reserved\.)?/
            if ($first) { $first = 0; $&; } else { ""; }
        /ges;
        
        # Also remove standalone "All rights reserved" lines that might be left
        s/\n+[ \t]*##?[ \t]*All rights reserved\.[ \t]*\n+/\n/g;
        
        # Clean up any leftover "National Conference..." lines that are just org names
        s/\n+[ \t]*National Conference of Commissioners on Uniform State Laws and The American Law Institute[ \t]*\n+/\n/g;
    ' "$file" 2>/dev/null || true
    
    # Third pass: remove other repeated short lines (headers/footers)
    perl -i -0777 -pe '
        my %lines;
        my @all_lines = split(/\n/, $_);
        
        # Count occurrences of short lines (but not table rows or headers)
        foreach my $line (@all_lines) {
            # Skip table rows, headers, and empty lines
            next if $line =~ /^\s*\|/;  # Skip table rows
            next if $line =~ /^\s*#/;   # Skip headers
            next if $line =~ /^\s*$/;   # Skip empty lines
            if (length($line) < 80 && length($line) > 10) {
                $lines{$line}++;
            }
        }
        
        # Remove lines that appear 4+ times (likely headers/footers)
        foreach my $line (keys %lines) {
            if ($lines{$line} >= 4) {
                my $escaped = quotemeta($line);
                s/\n$escaped\n/\n/g;
            }
        }
    ' "$file" 2>/dev/null || true
    
    # Final cleanup: remove excessive blank lines
    perl -i -0777 -pe 's/\n{4,}/\n\n\n/g;' "$file" 2>/dev/null || true
}

# Convert a single file
convert_file() {
    local input_file="$1"
    local filename=$(basename "$input_file")
    local extension=$(get_extension "$filename")
    local basename="${filename%.*}"
    # Remove _pdf suffix from basename if present (for output naming)
    basename="${basename%_pdf}"
    local output_file="$OUTBOX/${basename}.md"
    local use_legal=false
    local use_fast=false
    
    # Check for modes
    if use_legal_mode "$basename"; then
        use_legal=true
    fi
    # Clean mode is now default for PDFs, _fast disables it
    if use_fast_mode "$basename"; then
        use_fast=true
    fi
    
    log "Converting: $filename"
    
    # Check for PDF output mode
    local use_pdf=false
    if use_pdf_mode "$basename"; then
        use_pdf=true
        output_file="$OUTBOX/${basename}.pdf"
    fi
    
    # Build status line
    local status_line="${BLUE}Converting:${NC} $filename"
    if $use_legal; then
        status_line="$status_line ${CYAN}[LEGAL]${NC}"
    fi
    if $use_pdf; then
        status_line="$status_line ${RED}[→ PDF]${NC}"
    elif $use_fast; then
        status_line="$status_line ${MAGENTA}[FAST]${NC}"
    else
        status_line="$status_line ${MAGENTA}[CLEAN]${NC}"
    fi
    # Show Windows memory-safe mode
    if $IS_WINDOWS && ! $use_fast && ! $use_pdf; then
        status_line="$status_line ${YELLOW}[WIN-MEM-SAFE]${NC}"
    fi
    echo -e "$status_line"
    
    local success=0
    
    case "$extension" in
        pdf)
            if use_marker_for_file "$basename"; then
                # Complex mode - use marker AI for tables/formulas
                echo -e "${YELLOW}  → Using AI-powered conversion (marker)${NC}"
                if marker_single "$input_file" --output_format markdown --disable_image_extraction 2>/dev/null; then
                    local marker_output="${input_file%.*}.md"
                    if [ -f "$marker_output" ]; then
                        mv "$marker_output" "$output_file"
                        success=1
                    fi
                fi
            elif $use_fast; then
                # Fast mode - use markitdown (quicker, less structure preservation)
                echo -e "${YELLOW}  → Using markitdown (fast mode)${NC}"
                if markitdown "$input_file" > "$output_file" 2>/dev/null; then
                    success=1
                fi
                # Apply cleanup for fast mode too (unless explicitly disabled)
                if [ $success -eq 1 ]; then
                    cleanup_line_breaks "$output_file"
                fi
            else
                # DEFAULT: Use docling for best structure preservation
                echo -e "${YELLOW}  → Using docling (structure preservation)${NC}"
                if $IS_WINDOWS; then
                    echo -e "${YELLOW}    Windows detected: using memory-safe settings${NC}"
                fi
                local temp_dir=$(mktemp -d)
                # Use memory-safe settings on Windows
                if docling "$input_file" --output "$temp_dir" $DOCLING_BATCH_SIZE $DOCLING_THREADS 2>/dev/null; then
                    local docling_output="$temp_dir/${basename}.md"
                    if [ -f "$docling_output" ]; then
                        mv "$docling_output" "$output_file"
                        success=1
                    fi
                fi
                rm -rf "$temp_dir" 2>/dev/null || true
            fi
            ;;
        docx|doc)
            echo -e "${YELLOW}  → Using pandoc${NC}"
            if pandoc "$input_file" -t markdown --wrap=none -o "$output_file" 2>/dev/null; then
                success=1
            fi
            ;;
        pptx|ppt)
            echo -e "${YELLOW}  → Using markitdown${NC}"
            if markitdown "$input_file" > "$output_file" 2>/dev/null; then
                success=1
            fi
            ;;
        xlsx|xls|csv)
            echo -e "${YELLOW}  → Using markitdown${NC}"
            if markitdown "$input_file" > "$output_file" 2>/dev/null; then
                success=1
            fi
            ;;
        html|htm)
            # Check if it's an SEC filing style HTML
            if head -c 2000 "$input_file" 2>/dev/null | grep -qE '<DOCUMENT>|<TYPE>|Workiva|Wdesk|SEC\.gov'; then
                echo -e "${YELLOW}  → Detected SEC filing HTML, using specialized cleaner${NC}"
                if command -v clean-sec-html >/dev/null 2>&1; then
                    if clean-sec-html "$input_file" "$output_file" 2>/dev/null; then
                        success=1
                    fi
                else
                    echo -e "${RED}  clean-sec-html not found, falling back to pandoc${NC}"
                    if pandoc "$input_file" -f html -t markdown --wrap=none -o "$output_file" 2>/dev/null; then
                        success=1
                    fi
                fi
            else
                echo -e "${YELLOW}  → Using pandoc${NC}"
                if pandoc "$input_file" -f html -t markdown --wrap=none -o "$output_file" 2>/dev/null; then
                    success=1
                fi
            fi
            ;;
        txt|md|markdown)
            echo -e "${YELLOW}  → Copying text file${NC}"
            cp "$input_file" "$output_file"
            success=1
            ;;
        *)
            # Try markitdown for unknown formats
            echo -e "${YELLOW}  → Trying markitdown for .$extension${NC}"
            if markitdown "$input_file" > "$output_file" 2>/dev/null; then
                success=1
            fi
            ;;
    esac
    
    if [ $success -eq 1 ] && [ -f "$output_file" ] && [ -s "$output_file" ]; then
        # Apply legal mode formatting if enabled
        if $use_legal; then
            echo -e "${CYAN}  → Applying legal mode formatting${NC}"
            apply_legal_formatting "$output_file"
            local deletions=$(grep -c '\[DELETED:' "$output_file" 2>/dev/null || echo "0")
            if [ "$deletions" -gt 0 ]; then
                echo -e "${CYAN}  → Found $deletions deletion(s) made explicit${NC}"
                log "  Legal mode: $deletions deletions made explicit"
            fi
        fi
        
        # Apply PDF overlap removal for PDF conversions
        if [ "$extension" = "pdf" ] && command -v remove-pdf-overlap >/dev/null 2>&1; then
            echo -e "${MAGENTA}  → Removing page boundary overlaps${NC}"
            remove-pdf-overlap "$output_file" 2>/dev/null || true
        fi
        
        echo -e "${GREEN}  ✓ Success:${NC} ${basename}.md"
        log "✓ Success: $filename → ${basename}.md"
        
        # Move original to done folder
        mv "$input_file" "$DONE/"
        log "  Moved original to: convertbot/convertbot-done/"
        return 0
    else
        echo -e "${RED}  ✗ Failed:${NC} $filename"
        log "✗ Failed: $filename"
        [ -f "$output_file" ] && rm "$output_file"
        return 1
    fi
}

# Check if file type is supported
is_supported_type() {
    local ext="$1"
    case "$ext" in
        pdf|docx|doc|pptx|ppt|xlsx|xls|csv|html|htm|txt|md|markdown)
            return 0
            ;;
        *)
            return 1
            ;;
    esac
}

# Batch convert folder recursively
batch_convert() {
    local source_dir="$1"
    local output_suffix="_converted"
    local failed_suffix="_failed"
    
    if [ ! -d "$source_dir" ]; then
        echo -e "${RED}Error: Not a directory: $source_dir${NC}"
        exit 1
    fi
    
    # Get absolute path and base name
    source_dir=$(cd "$source_dir" && pwd)
    local base_name=$(basename "$source_dir")
    local output_dir="${source_dir}${output_suffix}"
    
    echo ""
    echo "=================================="
    echo "   ConvertBot Batch Mode"
    echo "=================================="
    echo ""
    echo -e "Source: ${BLUE}$source_dir${NC}"
    echo -e "Output: ${GREEN}$output_dir${NC}"
    echo ""
    
    # Create output directory
    mkdir -p "$output_dir"
    
    local total=0
    local converted=0
    local failed=0
    local skipped=0
    
    # Find all files recursively
    while IFS= read -r -d '' file; do
        total=$((total + 1))
        
        local rel_path="${file#$source_dir/}"
        local filename=$(basename "$file")
        local extension=$(get_extension "$filename")
        local basename_noext="${filename%.*}"
        local dir_path=$(dirname "$rel_path")
        
        # Check if supported
        if ! is_supported_type "$extension"; then
            echo -e "${YELLOW}  ⚋ Skipped (unsupported):${NC} $rel_path"
            skipped=$((skipped + 1))
            continue
        fi
        
        # Create output subdirectory
        local out_subdir="$output_dir/$dir_path"
        mkdir -p "$out_subdir"
        
        # Set output filename
        local output_file="$out_subdir/${basename_noext}.md"
        local failed_file="$out_subdir/${basename_noext}${failed_suffix}.md"
        
        echo ""
        echo -e "${BLUE}[$total]${NC} $rel_path"
        
        # Create temp file with proper extension for conversion
        local temp_dir=$(mktemp -d)
        local temp_input="$temp_dir/$filename"
        cp "$file" "$temp_input"
        local batch_success=0
        
        # Check for modes based on filename
        local batch_use_legal=false
        local batch_use_fast=false
        
        if use_legal_mode "$basename_noext"; then
            batch_use_legal=true
        fi
        if use_fast_mode "$basename_noext"; then
            batch_use_fast=true
        fi
        
        # Build status line
        local status_line="  ${CYAN}Converting:${NC} $filename"
        if $batch_use_legal; then
            status_line="$status_line ${CYAN}[LEGAL]${NC}"
        fi
        if $batch_use_fast; then
            status_line="$status_line ${MAGENTA}[FAST]${NC}"
        else
            status_line="$status_line ${MAGENTA}[CLEAN]${NC}"
        fi
        echo -e "$status_line"
        
        # Convert based on file type
        case "$extension" in
            pdf)
                if use_marker_for_file "$basename_noext"; then
                    echo -e "${YELLOW}    → Using AI-powered conversion (marker)${NC}"
                    local marker_output="$temp_dir/${basename_noext}.md"
                    if marker_single "$temp_input" --output_format markdown 2>/dev/null; then
                        if [ -f "$marker_output" ]; then
                            mv "$marker_output" "$output_file"
                            batch_success=1
                        fi
                    fi
                elif $batch_use_fast; then
                    echo -e "${YELLOW}    → Using markitdown (fast mode)${NC}"
                    if markitdown "$temp_input" > "$output_file" 2>/dev/null; then
                        batch_success=1
                    fi
                else
                    echo -e "${YELLOW}    → Using docling (structure preservation)${NC}"
                    local docling_temp=$(mktemp -d)
                    if docling "$temp_input" --output "$docling_temp" 2>/dev/null; then
                        local docling_output="$docling_temp/${basename_noext}.md"
                        if [ -f "$docling_output" ]; then
                            mv "$docling_output" "$output_file"
                            batch_success=1
                        fi
                    fi
                    rm -rf "$docling_temp" 2>/dev/null || true
                fi
                
                # Apply PDF overlap removal
                if [ $batch_success -eq 1 ] && command -v remove-pdf-overlap >/dev/null 2>&1; then
                    remove-pdf-overlap "$output_file" 2>/dev/null || true
                fi
                ;;
            docx|doc)
                echo -e "${YELLOW}    → Using pandoc${NC}"
                if pandoc "$temp_input" -t markdown --wrap=none -o "$output_file" 2>/dev/null; then
                    batch_success=1
                fi
                ;;
            pptx|ppt|xlsx|xls|csv)
                echo -e "${YELLOW}    → Using markitdown${NC}"
                if markitdown "$temp_input" > "$output_file" 2>/dev/null; then
                    batch_success=1
                fi
                ;;
            html|htm)
                # Check if SEC filing
                if head -c 2000 "$temp_input" 2>/dev/null | grep -qE '<DOCUMENT>|<TYPE>|Workiva|Wdesk|SEC\.gov'; then
                    echo -e "${YELLOW}    → Detected SEC filing HTML${NC}"
                    if command -v clean-sec-html >/dev/null 2>&1; then
                        if clean-sec-html "$temp_input" "$output_file" 2>/dev/null; then
                            batch_success=1
                        fi
                    else
                        if pandoc "$temp_input" -f html -t markdown --wrap=none -o "$output_file" 2>/dev/null; then
                            batch_success=1
                        fi
                    fi
                else
                    echo -e "${YELLOW}    → Using pandoc${NC}"
                    if pandoc "$temp_input" -f html -t markdown --wrap=none -o "$output_file" 2>/dev/null; then
                        batch_success=1
                    fi
                fi
                ;;
            txt|md|markdown)
                echo -e "${YELLOW}    → Copying text file${NC}"
                cp "$temp_input" "$output_file"
                batch_success=1
                ;;
        esac
        
        # Apply legal mode if enabled
        if [ $batch_success -eq 1 ] && $batch_use_legal; then
            echo -e "${CYAN}    → Applying legal mode formatting${NC}"
            apply_legal_formatting "$output_file"
        fi
        
        # Clean up temp dir
        rm -rf "$temp_dir" 2>/dev/null || true
        
        # Handle result
        if [ $batch_success -eq 1 ] && [ -f "$output_file" ] && [ -s "$output_file" ]; then
            echo -e "${GREEN}    ✓ Success:${NC} $(basename "$output_file")"
            converted=$((converted + 1))
        else
            echo -e "${RED}    ✗ Failed:${NC} $filename"
            rm -f "$output_file" 2>/dev/null || true
            # Create failed marker file
            echo "# Conversion Failed" > "$failed_file"
            echo "" >> "$failed_file"
            echo "Source: $rel_path" >> "$failed_file"
            echo "Date: $(date)" >> "$failed_file"
            failed=$((failed + 1))
        fi
        
    done < <(find "$source_dir" -type f -print0 2>/dev/null)
    
    echo ""
    echo "=================================="
    echo "   Batch Conversion Complete"
    echo "=================================="
    echo ""
    echo -e "  Total files:     ${BLUE}$total${NC}"
    echo -e "  ${GREEN}Converted:${NC}       $converted"
    if [ $failed -gt 0 ]; then
        echo -e "  ${RED}Failed:${NC}          $failed"
    fi
    if [ $skipped -gt 0 ]; then
        echo -e "  ${YELLOW}Skipped:${NC}         $skipped (unsupported types)"
    fi
    echo ""
    echo -e "Output location: ${GREEN}$output_dir${NC}"
    echo ""
}

# Process all files in inbox
process_inbox() {
    local files_found=0
    local converted=0
    local failed=0
    
    echo ""
    echo "=================================="
    echo -n "    ConvertBot Processing"
    if [ "$LEGAL_MODE" = "1" ] || $IS_WINDOWS; then
        echo ""
        [ "$LEGAL_MODE" = "1" ] && echo "    ${CYAN}LEGAL MODE${NC}"
        $IS_WINDOWS && echo "    ${YELLOW}WINDOWS MEMORY-SAFE MODE${NC}"
    else
        echo ""
    fi
    echo "=================================="
    echo ""
    
    # Check if inbox has files (skip helper files)
    for file in "$INBOX"/*; do
        [ -f "$file" ] || continue
        local fname=$(basename "$file")
        [[ "$fname" == *.command ]] && continue
        [[ "$fname" == 📖* ]] && continue
        files_found=$((files_found + 1))
    done
    
    if [ $files_found -eq 0 ]; then
        echo -e "${YELLOW}No files in inbox.${NC}"
        echo "Drop files in: ~/Desktop/convertbot/convertbot-inbox"
        echo ""
        return
    fi
    
    echo "Found $files_found file(s) to convert"
    if [ "$LEGAL_MODE" = "1" ]; then
        echo -e "${CYAN}Legal mode: Strikethrough → [DELETED: text]${NC}"
    fi
    if $IS_WINDOWS; then
        echo -e "${YELLOW}Windows: Memory-safe mode enabled (reduced batch/threads)${NC}"
    fi
    echo -e "${MAGENTA}Clean mode: Line break cleanup enabled (default)${NC}"
    echo ""
    
    # Process each file (skip helper files)
    for file in "$INBOX"/*; do
        [ -f "$file" ] || continue
        local fname=$(basename "$file")
        [[ "$fname" == *.command ]] && continue
        [[ "$fname" == 📖* ]] && continue
        if convert_file "$file"; then
            converted=$((converted + 1))
        else
            failed=$((failed + 1))
        fi
    done
    
    echo ""
    echo "=================================="
    echo -e "  ${GREEN}Converted:${NC} $converted"
    if [ $failed -gt 0 ]; then
        echo -e "  ${RED}Failed:${NC} $failed"
    fi
    echo "=================================="
    echo ""
    echo "Output:  ~/Desktop/convertbot/convertbot-outbox"
    echo "Originals: ~/Desktop/convertbot/convertbot-done"
    if [ "$LEGAL_MODE" = "1" ]; then
        echo -e "${CYAN}Legal mode: Deletions formatted as [DELETED: text]${NC}"
    fi
    if $IS_WINDOWS; then
        echo -e "${YELLOW}Windows: Memory usage reduced for stability${NC}"
    fi
    echo -e "${MAGENTA}Clean mode: Line breaks fixed by default${NC}"
    echo ""
}

# Watch mode
watch_mode() {
    echo ""
    echo "=================================="
    echo -n "   ConvertBot Watch Mode"
    if [ "$LEGAL_MODE" = "1" ] || $IS_WINDOWS; then
        echo ""
        [ "$LEGAL_MODE" = "1" ] && echo "   ${CYAN}LEGAL MODE${NC}"
        $IS_WINDOWS && echo "   ${YELLOW}WINDOWS MEMORY-SAFE MODE${NC}"
    else
        echo ""
    fi
    echo "=================================="
    echo ""
    echo "Monitoring: ~/Desktop/convertbot/convertbot-inbox"
    echo "Press Ctrl+C to stop"
    echo ""
    
    while true; do
        local has_files=false
        for file in "$INBOX"/*; do
            [ -f "$file" ] && has_files=true && break
        done
        
        if $has_files; then
            process_inbox
            echo "Waiting for new files..."
            echo ""
        fi
        
        sleep 2
    done
}

# Show status
show_status() {
    echo ""
    echo "=================================="
    echo "     ConvertBot Status"
    echo "=================================="
    echo ""
    
    # Count files in each folder (exclude helper files)
    local inbox_count=$(find "$INBOX" -type f ! -name "*.command" ! -name "📖*" 2>/dev/null | wc -l)
    local outbox_count=$(find "$OUTBOX" -type f 2>/dev/null | wc -l)
    local done_count=$(find "$DONE" -type f 2>/dev/null | wc -l)
    
    echo "Files in folders:"
    echo "  Inbox:   $inbox_count (waiting to convert)"
    echo "  Outbox:  $outbox_count (converted)"
    echo "  Done:    $done_count (originals)"
    echo ""
    
    if [ "$LEGAL_MODE" = "1" ]; then
        echo -e "${CYAN}Legal mode: ENABLED${NC}"
        echo "  Strikethrough → [DELETED: text]"
        echo ""
    fi
    
    if $IS_WINDOWS; then
        echo -e "${YELLOW}Windows mode: ENABLED${NC}"
        echo "  Memory-safe settings active"
        echo "  --page-batch-size 1 --num-threads 1"
        echo ""
    fi
    
    echo -e "${MAGENTA}Clean mode: ENABLED by default${NC}"
    echo "  Line breaks in PDFs will be fixed"
    echo "  Use _fast suffix to skip cleanup"
    echo ""
    
    if [ -f "$LOGFILE" ]; then
        echo "Recent activity:"
        tail -10 "$LOGFILE" | sed 's/^/  /'
    fi
    
    echo ""
}

# Create folders if they don't exist
ensure_folders() {
    mkdir -p "$INBOX" "$OUTBOX" "$DONE"
}

# Main
main() {
    ensure_folders
    
    case "${1:-}" in
        --help|-h)
            show_help
            ;;
        --watch|-w)
            watch_mode
            ;;
        --status|-s)
            show_status
            ;;
        --legal|-l)
            LEGAL_MODE=1
            process_inbox
            ;;
        --fast|-f)
            CLEAN_MODE=0
            process_inbox
            ;;
        --clean-overlap)
            shift
            if [ -f "$1" ]; then
                if command -v remove-pdf-overlap >/dev/null 2>&1; then
                    echo -e "${MAGENTA}Removing page boundary overlaps from: $1${NC}"
                    remove-pdf-overlap "$1"
                else
                    echo -e "${RED}remove-pdf-overlap not found${NC}"
                    exit 1
                fi
            else
                echo -e "${RED}Usage: convertbot --clean-overlap <file.md>${NC}"
                exit 1
            fi
            ;;
        --batch|-b)
            shift
            if [ -d "$1" ]; then
                batch_convert "$1"
            elif [ -n "$1" ]; then
                echo -e "${RED}Error: Not a directory: $1${NC}"
                echo "Usage: convertbot --batch <directory>"
                exit 1
            else
                echo -e "${RED}Usage: convertbot --batch <directory>${NC}"
                exit 1
            fi
            ;;
        "")
            process_inbox
            ;;
        *)
            echo "Unknown option: $1"
            echo "Run 'convertbot --help' for usage"
            exit 1
            ;;
    esac
}

main "$@"
