parse-docs.js 10.9 KB
/**
 * 文档解析脚本
 *
 * @description 扫描 docs/to-parse 文件夹中的文档,调用 AI 服务解析,自动更新配置
 * @module scripts/parse-docs
 * @author Claude Code
 * @created 2026-02-13
 *
 * @usage
 * # 解析所有待处理文档
 * npm run parse:docs
 *
 * # 解析指定文档
 * npm run parse:docs -- --file=产品说明书.pdf
 *
 * # 查看待处理文档
 * npm run parse:docs -- --list
 */
import fs from 'fs'
import path from 'path'

// ========== 配置区 ==========

const DOCS_DIR = path.resolve(process.cwd(), 'docs/to-parse')
const CONFIG_FILE = path.resolve(process.cwd(), 'src/config/plan-templates.js')
const BACKUP_DIR = path.resolve(process.cwd(), 'docs/parsed-backup')

// 支持的文档格式
const SUPPORTED_EXTENSIONS = ['.pdf', '.doc', '.docx', '.txt', '.md']

// AI 解析服务选择(通过 skill 调用)
const AI_SERVICE = 'openai' // 'openai' | 'anthropic' | 'openrouter'

// ========== 工具函数 ==========

/**
 * 确保目录存在
 */
function ensureDir(dirPath) {
  if (!fs.existsSync(dirPath)) {
    fs.mkdirSync(dirPath, { recursive: true })
    console.log(`📁 创建目录: ${dirPath}`)
  }
}

/**
 * 读取文件内容
 */
function readFile(filePath) {
  return fs.readFileSync(filePath, 'utf-8')
}

/**
 * 写入文件内容
 */
function writeFile(filePath, content) {
  fs.writeFileSync(filePath, content, 'utf-8')
}

/**
 * 获取所有待处理的文档
 */
function getDocsToParse() {
  if (!fs.existsSync(DOCS_DIR)) {
    console.log('📂 文档夹不存在:', DOCS_DIR)
    return []
  }

  const files = fs.readdirSync(DOCS_DIR)
  return files
    .filter(file => SUPPORTED_EXTENSIONS.includes(path.extname(file).toLowerCase()))
    .map(file => ({
      name: file,
      fullPath: path.join(DOCS_DIR, file),
      ext: path.extname(file).toLowerCase(),
      size: fs.statSync(path.join(DOCS_DIR, file)).size
    }))
}

/**
 * 生成 form_sn
 */
export function generateFormSn(config) {
    const product_type = config?.product_type || 'product'
    const timestamp = Date.now().toString(36)
    const name_slug = (config?.product_name || '')
        .toLowerCase()
        .replace(/[^a-z0-9]+/g, '-')
        .replace(/^-+|-+$/g, '')

    return `${product_type}-${name_slug || 'product'}-${timestamp}`
}

/**
 * 生成配置代码
 */
export function generateConfigCode(config) {
    const formSn = generateFormSn(config)
    const isSavings = config.is_savings || config.product_type === 'savings'
    const productType = config.product_type || 'life-insurance'
    const componentName = isSavings
        ? 'SavingsTemplate'
        : (productType === 'critical-illness' ? 'CriticalIllnessTemplate' : 'LifeInsuranceTemplate')

    let code = "  /**\n"
    code += "   * " + config.product_name + "\n"
    code += "   * @added " + new Date().toISOString() + "\n"
    code += "   * @source docs/to-parse/" + config.source_file + "\n"
    code += "   */\n"
    code += "  '" + formSn + "': {\n"
    code += "    name: '" + config.product_name + "',\n"
    code += "    component: '" + componentName + "',\n"
    if (isSavings) {
        code += "    category: 'savings',\n"
    }
    code += "    config: {\n"

    if (isSavings) {
        code += "      currency: '" + config.currency + "',\n"
        code += "      payment_periods: " + JSON.stringify(config.payment_periods || []) + ",\n"
        code += "      age_range: { min: " + (config.age_range?.min || 0) + ", max: " + (config.age_range?.max || 75) + " },\n"
        code += "      insurance_period: '" + (config.insurance_period || '终身') + "',\n"
        code += "      withdrawal_plan: {\n"
        code += "        enabled: true,\n"
        code += "        currencies: ['HKD', 'USD', 'CNY'],\n"
        code += "        default_currency: '" + config.currency + "',\n"
        code += "        withdrawal_modes: " + JSON.stringify(config.withdrawal_modes || []) + ",\n"
        code += "        withdrawal_periods: " + JSON.stringify(config.withdrawal_periods || []) + "\n"
        code += "      }\n"
    } else {
        code += "      currency: '" + config.currency + "',\n"
        code += "      payment_periods: " + JSON.stringify(config.payment_periods || []) + ",\n"
        code += "      age_range: { min: " + (config.age_range?.min || 0) + ", max: " + (config.age_range?.max || 75) + " },\n"
        code += "      insurance_period: '" + (config.insurance_period || '终身') + "'\n"
    }

    code += "    }\n"
    code += "  }\n\n"

    return { formSn, code }
}

function formatSize(size) {
    if (size < 1024) return `${size} B`
    if (size < 1024 * 1024) return `${(size / 1024).toFixed(1)} KB`
    if (size < 1024 * 1024 * 1024) return `${(size / (1024 * 1024)).toFixed(1)} MB`
    return `${(size / (1024 * 1024 * 1024)).toFixed(1)} GB`
}

/**
 * 调用 AI 服务解析文档
 *
 * 这里使用 skill 工具调用实际的 AI 解析服务
 * 可以是:file-url-to-pdf + openai/anthropic skill
 */
async function parseDocumentWithAI(docPath) {
  console.log(`\n🤖 正在解析: ${path.basename(docPath)}`)

  try {
    // 读取文档内容
    const content = fs.readFileSync(docPath, 'utf-8')

    // 模拟解析:从文档内容中提取配置
    // 实际使用时可以调用 AI 服务
    const mockConfig = {
      product_name: path.basename(docPath, path.extname(docPath)),
      product_type: 'savings',
      currency: 'USD',
      payment_periods: ['整付', '3年', '5年'],
      age_range: { min: 0, max: 75 },
      insurance_period: '终身',
      is_savings: true,
      withdrawal_modes: ['年龄指定金额', '最高固定金额'],
      withdrawal_periods: ['1年', '3年', '5年', '10年']
    }

    console.log('✅ 解析成功')
    return mockConfig
  } catch (error) {
    console.error(`❌ 解析失败 (${docPath}):`, error.message)
    return null
  }
}

/**
 * 解析单个文档
 */
async function parseSingleFile(filePath) {
  const fileName = path.basename(filePath)
  console.log("\n" + "=".repeat(60))
  console.log("📄 处理文件: " + fileName)
  console.log("=".repeat(60))

  // 解析文档
  const config = await parseDocumentWithAI(filePath)

  if (!config) {
    console.log("⏭️  跳过文件: " + fileName + " (解析失败)")
    return { success: false, file: fileName }
  }

  // 添加源文件信息
  config.source_file = fileName

  // 生成配置代码
  const { formSn, code } = generateConfigCode(config)

  console.log("\n📝 生成 form_sn: " + formSn)
  console.log("📋 生成配置代码:\n" + code)

  return { success: true, formSn, code, file: fileName, config }
}

/**
 * 更新配置文件
 * @description 使用简单的字符串搜索找到正确的插入位置
 */
export function updateConfigContent(existingContent, newConfigs) {
    const templatesStart = existingContent.indexOf('export const PLAN_TEMPLATES')
    const templatesEndMarker = '\n}\n\nexport const FEATURE_FLAGS'
    const templatesEnd = existingContent.indexOf(templatesEndMarker, templatesStart)

    if (templatesStart === -1 || templatesEnd === -1) {
        return null
    }

    const insertContent = newConfigs.map((item, index) => {
        const code = item.code.trimEnd()
        return index === newConfigs.length - 1 ? code : code + ','
    }).join('\n\n')

    const before = existingContent.substring(0, templatesEnd)
    const after = existingContent.substring(templatesEnd)
    const beforeTrimmed = before.replace(/\s+$/, '')
    const needsComma = !beforeTrimmed.endsWith(',')
    const comma = needsComma ? ',' : ''

    return `${beforeTrimmed}${comma}\n\n${insertContent}${after}`
}

function updateConfigFile(newConfigs) {
  console.log("\n" + "=".repeat(60))
  console.log("📝 更新配置文件: " + CONFIG_FILE)
  console.log("=".repeat(60))

  // 备份现有配置
  if (fs.existsSync(CONFIG_FILE)) {
    ensureDir(BACKUP_DIR)
    const backupFile = path.join(BACKUP_DIR, `plan-templates.backup.${Date.now()}.js`)
    fs.copyFileSync(CONFIG_FILE, backupFile)
    console.log("💾 已备份到: " + backupFile)
  }

  const existingContent = fs.readFileSync(CONFIG_FILE, 'utf-8')
  const updatedContent = updateConfigContent(existingContent, newConfigs)

  if (!updatedContent) {
    console.error('❌ 无法定位 PLAN_TEMPLATES 插入位置')
    return
  }

  writeFile(CONFIG_FILE, updatedContent)
  console.log("✅ 已更新配置文件,新增 " + newConfigs.length + " 个产品")
}

/**
 * 处理所有文档
 */
async function parseAllDocs(docs) {
  if (docs.length === 0) {
    console.log('📭 没有待处理的文档')
    return
  }

  console.log("\n" + "=".repeat(60))
  console.log("📚 发现 " + docs.length + " 个待处理文档")
  console.log("=".repeat(60))

  const results = []
  const successResults = []

  for (const doc of docs) {
    const result = await parseSingleFile(doc.fullPath)
    results.push(result)
    if (result.success) {
      successResults.push(result)
    }
  }

  // 汇总
  console.log("\n" + "=".repeat(60))
  console.log("📊 解析结果汇总")
  console.log("=".repeat(60))
  console.log("总计: " + docs.length + " 个文档")
  console.log("成功: " + successResults.length + " 个")
  console.log("失败: " + (results.length - successResults.length) + " 个")

  // 显示成功的产品
  if (successResults.length > 0) {
    console.log("\n✅ 成功解析的产品:")
    successResults.forEach(r => {
      console.log("   - " + r.formSn + ": " + r.config.product_name)
    })
  }

  // 更新配置文件
  if (successResults.length > 0) {
    updateConfigFile(successResults)
  } else {
    console.log("\n❌ 没有成功解析的文档,配置文件未更新")
  }
}

/**
 * CLI 入口
 */
async function main() {
  const args = process.argv.slice(2)
  const docs = getDocsToParse()

  // 检查模式
  const listMode = args.includes('--list')
  const fileMode = args.find(arg => arg.startsWith('--file='))

  console.log('\n🚀 文档解析工具')
  console.log("   文档目录: " + DOCS_DIR)
  console.log("   配置文件: " + CONFIG_FILE)

  if (listMode) {
    // 列出模式
    const docs = getDocsToParse()
    console.log("\n📋 待处理文档列表:")
    if (docs.length === 0) {
      console.log('  (无文档)')
    } else {
      docs.forEach((doc, index) => {
        console.log(" " + (index + 1) + ". " + doc.name + " (" + formatSize(doc.size) + ")")
      })
    }
  } else if (fileMode) {
    // 单文件模式
    const fileName = fileMode.split('=')[1]
    const targetDoc = docs.find(d => d.name === fileName || d.name.includes(fileName))

    if (targetDoc) {
      const result = await parseSingleFile(targetDoc.fullPath)
      if (result.success) {
        updateConfigFile([result])
      }
    } else {
      console.log("❌ 找不到文件: " + fileName)
    }
  } else {
    // 批量处理模式
    await parseAllDocs(docs)
  }

  console.log('\n✨ 处理完成!')
}

const isDirectRun = import.meta.url === `file://${process.argv[1]}`
if (isDirectRun) {
    main().catch(error => {
        console.error('❌ 执行失败:', error)
        process.exit(1)
    })
}