hookehuyr

feat(parse): 增强文档解析工具链和智能字段提取

主要改进:
- 优化 smartExtractList() 智能字段提取器
- 增强产品边界检测逻辑
- 完善 MCP 解析切换功能
- 优化 mockData 产品列表数据结构
- 更新计划书模板配置

Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
......@@ -5,3 +5,4 @@
{"action":"update","backup_file":"/Users/huyirui/program/itomix/git/manulife-weapp/docs/parsed-backup/plan-templates.backup.1771078080604.js","target_file":"/Users/huyirui/program/itomix/git/manulife-weapp/src/config/plan-templates.js","form_sn_list":["savings-readme-a4296d1f"],"at":"2026-02-14T14:08:00.605Z"}
{"action":"update","backup_file":"/Users/huyirui/program/itomix/git/manulife-weapp/docs/parsed-backup/plan-templates.backup.1771078351660.js","target_file":"/Users/huyirui/program/itomix/git/manulife-weapp/src/config/plan-templates.js","form_sn_list":["savings-2-148b3acd"],"at":"2026-02-14T14:12:31.660Z"}
{"action":"update","backup_file":"/Users/huyirui/program/itomix/git/manulife-weapp/docs/parsed-backup/plan-templates.backup.1771080130974.js","target_file":"/Users/huyirui/program/itomix/git/manulife-weapp/src/config/plan-templates.js","form_sn_list":["savings-2-55bcffc2"],"at":"2026-02-14T14:42:10.974Z"}
{"action":"update","backup_file":"/Users/huyirui/program/itomix/git/manulife-weapp/docs/parsed-backup/plan-templates.backup.1771137003708.js","target_file":"/Users/huyirui/program/itomix/git/manulife-weapp/src/config/plan-templates.js","form_sn_list":["life-insurance-3-d8fde07d"],"at":"2026-02-15T06:30:03.709Z"}
......
This diff is collapsed. Click to expand it.
......@@ -15,6 +15,12 @@
*
* # 查看待处理文档
* npm run parse:docs -- --list
*
* # 应用审核通过的配置
* npm run parse:docs -- --apply=计划书模版4
*
* # 预览应用配置(不实际修改)
* npm run parse:docs -- --apply=计划书模版4 --dry-run
*/
import crypto from 'crypto'
import fs from 'fs'
......@@ -976,17 +982,27 @@ ${code.trim()}
## 📋 审核后操作
### 确认无误
### 方法 1:自动应用(推荐)
\`\`\`bash
# 预览变更(不实际修改)
pnpm parse:docs -- --apply=${baseFileName} --dry-run
# 确认无误后,正式应用
pnpm parse:docs -- --apply=${baseFileName}
# 说明:
# 1. 自动提取配置代码并插入到 src/config/plan-templates.js
# 2. 自动创建备份文件(docs/parsed-backup/)
# 3. 自动将审核文件移动到 docs/parse-audit/approved/
\`\`\`
### 方法 2:手动操作
\`\`\`bash
# 1. 移动到 approved 目录
mv docs/parse-audit/pending/${baseFileName}/${auditFileName} \\
docs/parse-audit/approved/
# 2. 合并到正式配置
# 手动复制或使用工具合并到 src/config/plan-templates.js
# 3. 删除待审核文件(可选)
rm docs/parse-audit/pending/${baseFileName}/${auditFileName}
# 2. 手动复制"生成配置片段"到 src/config/plan-templates.js
\`\`\`
### 需要修改
......@@ -1402,6 +1418,189 @@ function rollbackConfigFile(backupFile) {
return true
}
/**
* 从审核文件应用配置到 plan-templates.js
*
* @description 读取审核 markdown 文件,提取配置代码,插入到配置文件中
* @param {string} auditFileName - 审核文件名(不含路径,如 "计划书模版4")
* @param {Object} options - 选项
* @param {boolean} options.dry_run - 是否仅预览
* @returns {Object} 应用结果
*/
function applyAuditFile(auditFileName, options = {}) {
const PENDING_DIR = path.resolve(process.cwd(), 'docs/parse-audit/pending')
const APPROVED_DIR = path.resolve(process.cwd(), 'docs/parse-audit/approved')
// 1. 查找审核文件
let auditFile = null
let sourceDir = null
// 先在 pending 目录查找
const pendingDirs = fs.existsSync(PENDING_DIR) ? fs.readdirSync(PENDING_DIR) : []
for (const dir of pendingDirs) {
const dirPath = path.join(PENDING_DIR, dir)
if (fs.statSync(dirPath).isDirectory()) {
const files = fs.readdirSync(dirPath).filter(f => f.endsWith('.md'))
for (const file of files) {
// 匹配文件名或目录名
const normalizedName = dir.replace(/\s+/g, '').toLowerCase()
const normalizedInput = auditFileName.replace(/\s+/g, '').toLowerCase()
if (normalizedName.includes(normalizedInput) || normalizedInput.includes(normalizedName)) {
auditFile = path.join(dirPath, file)
sourceDir = PENDING_DIR
break
}
}
}
if (auditFile) break
}
// 如果 pending 没找到,在 approved 目录查找
if (!auditFile && fs.existsSync(APPROVED_DIR)) {
const approvedFiles = fs.readdirSync(APPROVED_DIR).filter(f => f.endsWith('.md'))
for (const file of approvedFiles) {
// 从文件名提取产品名(格式:YYYY-MM-DD-产品名.md)
const match = file.match(/^\d{4}-\d{2}-\d{2}-(.+)\.md$/)
if (match) {
const normalizedName = match[1].replace(/\s+/g, '').toLowerCase()
const normalizedInput = auditFileName.replace(/\s+/g, '').toLowerCase()
if (normalizedName.includes(normalizedInput) || normalizedInput.includes(normalizedName)) {
auditFile = path.join(APPROVED_DIR, file)
sourceDir = APPROVED_DIR
break
}
}
}
}
if (!auditFile) {
console.error("❌ 找不到审核文件: " + auditFileName)
console.log(" 搜索目录:")
console.log(" - docs/parse-audit/pending/")
console.log(" - docs/parse-audit/approved/")
return { ok: false, reason: 'file_not_found' }
}
console.log("\n📄 找到审核文件: " + auditFile)
// 2. 读取审核文件内容
const content = fs.readFileSync(auditFile, 'utf-8')
// 3. 提取配置代码片段
const configMatch = content.match(/## 🧩 生成配置片段\s*\n+```javascript\s*\n([\s\S]*?)```/)
if (!configMatch) {
console.error("❌ 无法从审核文件中提取配置代码")
return { ok: false, reason: 'config_not_found' }
}
const configCode = configMatch[1].trim()
console.log("\n📝 提取的配置代码:")
console.log("-".repeat(40))
console.log(configCode)
console.log("-".repeat(40))
// 4. 提取 form_sn 用于去重检查
const formSnMatch = configCode.match(/'([^']+)':\s*\{/)
const formSn = formSnMatch ? formSnMatch[1] : null
if (!formSn) {
console.error("❌ 无法从配置代码中提取 form_sn")
return { ok: false, reason: 'form_sn_not_found' }
}
console.log("\n🔑 form_sn: " + formSn)
// 5. 读取现有配置文件
const existingContent = fs.readFileSync(CONFIG_FILE, 'utf-8')
// 检查是否已存在
if (existingContent.includes(`'${formSn}':`)) {
console.error("❌ 配置文件中已存在 form_sn: " + formSn)
console.log(" 如需更新,请先手动删除旧配置")
return { ok: false, reason: 'duplicate', formSn }
}
// 6. 找到插入位置(PLAN_TEMPLATES 对象的结束位置)
// 查找最后一个产品配置的结束位置
const insertPattern = /(\n\s*'\w+[^']+':\s*\{[\s\S]*?\n\s*\}\s*,?\s*)(\n\})/
const match = existingContent.match(insertPattern)
if (!match) {
console.error("❌ 无法定位插入位置")
return { ok: false, reason: 'insert_not_found' }
}
// 7. 构建新配置(确保有逗号)
let newConfigEntry = configCode
// 确保配置以逗号结尾
if (!newConfigEntry.trimEnd().endsWith(',')) {
newConfigEntry = newConfigEntry.trimEnd() + ','
}
// 8. 插入配置
const insertPosition = match.index + match[1].length
const updatedContent =
existingContent.slice(0, insertPosition) +
'\n\n' +
newConfigEntry +
existingContent.slice(insertPosition)
if (options.dry_run) {
console.log("\n🧪 dry-run 模式,变更预览:")
console.log("-".repeat(40))
console.log("将插入以下配置:")
console.log(newConfigEntry)
console.log("-".repeat(40))
return { ok: true, dry_run: true, formSn }
}
// 9. 备份并写入
let backupFile = null
if (fs.existsSync(CONFIG_FILE)) {
ensureDir(BACKUP_DIR)
backupFile = path.join(BACKUP_DIR, `plan-templates.backup.${Date.now()}.js`)
fs.copyFileSync(CONFIG_FILE, backupFile)
console.log("\n💾 已备份到: " + backupFile)
}
writeFile(CONFIG_FILE, updatedContent)
console.log("\n✅ 配置已更新: " + CONFIG_FILE)
writeBackupLog({
action: 'apply_audit',
backup_file: backupFile,
target_file: CONFIG_FILE,
audit_file: auditFile,
form_sn: formSn,
at: new Date().toISOString()
})
// 10. 移动审核文件到 approved 目录(如果是从 pending 来的)
if (sourceDir === PENDING_DIR) {
ensureDir(APPROVED_DIR)
const fileName = path.basename(auditFile)
const approvedPath = path.join(APPROVED_DIR, fileName)
// 检查目标是否已存在
if (fs.existsSync(approvedPath)) {
console.log("⚠️ approved 目录已存在同名文件,跳过移动")
} else {
fs.renameSync(auditFile, approvedPath)
console.log("📁 审核文件已移动到: " + approvedPath)
// 删除空的 pending 子目录
const pendingSubDir = path.dirname(auditFile)
const remainingFiles = fs.readdirSync(pendingSubDir).filter(f => !f.startsWith('.'))
if (remainingFiles.length === 0) {
fs.rmdirSync(pendingSubDir)
console.log("🗑️ 已删除空目录: " + pendingSubDir)
}
}
}
return { ok: true, formSn, backupFile }
}
function updateConfigFile(newConfigs, options = {}) {
console.log("\n" + "=".repeat(60))
console.log("📝 更新配置文件: " + CONFIG_FILE)
......@@ -1563,9 +1762,16 @@ async function main() {
const listMode = args.includes('--list')
const fileMode = args.find(arg => arg.startsWith('--file='))
const writeMode = args.includes('--write-config')
const dryRunMode = args.includes('--dry-run') || !writeMode
const rollbackMode = args.find(arg => arg.startsWith('--rollback='))
const statusMode = args.includes('--status')
const applyMode = args.find(arg => arg.startsWith('--apply='))
// dry-run 逻辑:
// 1. 如果显式指定 --dry-run,则 dry-run
// 2. 如果是 apply 模式,默认不 dry-run(除非显式指定)
// 3. 如果是解析模式,默认 dry-run(除非显式指定 --write-config)
const explicitDryRun = args.includes('--dry-run')
const dryRunMode = applyMode ? explicitDryRun : (!writeMode && !explicitDryRun || explicitDryRun)
// 检查解析器选择
const parserModeArg = args.find(arg => arg.startsWith('--parser='))
......@@ -1586,6 +1792,11 @@ async function main() {
if (rollbackMode) {
const backupFile = rollbackMode.split('=')[1]
rollbackConfigFile(backupFile)
} else if (applyMode) {
// 从审核文件应用配置
const auditFileName = applyMode.split('=')[1]
const applyOptions = { dry_run: dryRunMode }
applyAuditFile(auditFileName, applyOptions)
} else if (listMode) {
// 列出模式
const docs = getDocsToParse()
......
......@@ -16,6 +16,7 @@
* - GC宏摯家傳承保險計劃- 性別, 年齡, 出生年月日
* - FA 宏浚傳承保障計劃
* - LV2 赤霞珠終身壽險計劃2基本人壽保障選項
* - LV3 长宁終身壽險計劃3
*/
const PRODUCT_TITLE_PATTERNS = [
// 产品代码 + 产品名称 + 可选后缀
......@@ -31,14 +32,17 @@ const PRODUCT_TITLE_PATTERNS = [
/^([^\n]{2,30}?(?:計劃|计划|保障|保险|壽險|壽险)[^\n]*)/gm,
// 产品代码开头的行
/^([A-Z]{2,4}\d?)\s*[-:]\s*([^\n]+)/gm
/^([A-Z]{2,4}\d?)\s*[-:]\s*([^\n]+)/gm,
// 新增:产品代码 + 产品名称 + 数字后缀(如 "LV3 长宁終身壽險計劃3")
/^([A-Z]{2,3}\d?)\s+([^\n]{2,25}?(?:計劃|计划|壽險|壽险)\d?)/gm
]
/**
* 产品代码前缀列表(用于优先匹配)
*/
const PRODUCT_CODE_PREFIXES = [
'GS', 'GC', 'FA', 'LV2', 'LV', 'CR', 'HR', 'PR', 'SR',
'GS', 'GC', 'FA', 'LV2', 'LV3', 'LV', 'CR', 'HR', 'PR', 'SR',
'TR', 'UR', 'WR', 'XR', 'YR', 'ZR'
]
......@@ -62,10 +66,11 @@ export function detectProductCount(content) {
export function findProductTitles(content) {
const products = []
const seenCodes = new Set()
const seenNames = new Set()
// 策略1: 优先匹配产品代码前缀
for (const prefix of PRODUCT_CODE_PREFIXES) {
// 匹配 "GS宏摯傳承保障計劃" 或 "GS 宏摯傳承保障計劃"
// 匹配 "GS宏摯傳承保障計劃" 或 "GS 宏摯傳承保障計劃" 或 "LV3 长宁終身壽險計劃3"
const regex = new RegExp(
`^(${prefix}\\d?)\\s*([\\u4e00-\\u9fa5]+(?:計劃|计划|保障|保险|壽險|壽险)[^\\n]*)`,
'gm'
......@@ -76,9 +81,11 @@ export function findProductTitles(content) {
const code = match[1]
const name = match[2].trim()
// 去重
if (seenCodes.has(code)) continue
// 去重(基于代码或名称)
const nameKey = name.replace(/\s+/g, '').toLowerCase()
if (seenCodes.has(code) || seenNames.has(nameKey)) continue
seenCodes.add(code)
seenNames.add(nameKey)
products.push({
index: match.index,
......@@ -99,15 +106,52 @@ export function findProductTitles(content) {
const fullTitle = match[0].trim()
if (fullTitle.length < 5) continue // 过滤太短的匹配
const code = match[1] || null
const name = match[2] || fullTitle
// 去重
const nameKey = name.replace(/\s+/g, '').toLowerCase()
if (seenNames.has(nameKey)) continue
if (code) seenCodes.add(code)
seenNames.add(nameKey)
products.push({
index: match.index,
code: match[1] || null,
name: match[2] || fullTitle,
code,
name,
fullTitle
})
}
}
// 策略3: 新增 - 识别包含"计划"但不包含产品代码的行(纯计划书名称)
// 适用于标题如 "宏挚传承保障计划" 或 "长宁终身寿险计划3"
if (products.length === 0) {
const planNameRegex = /^([^\n]{2,30}?(?:計劃|计划)[^\n]*)/gm
let match
while ((match = planNameRegex.exec(content)) !== null) {
const fullTitle = match[1].trim()
// 排除太短或包含其他关键词的行
if (fullTitle.length < 5 || fullTitle.includes('選項') || fullTitle.includes('选项')) continue
// 检查是否是产品名称(通常包含"保障"、"保险"、"寿险"等关键词)
if (/(?:保障|保险|壽險|壽险|传承|家传)/.test(fullTitle)) {
const nameKey = fullTitle.replace(/\s+/g, '').toLowerCase()
if (!seenNames.has(nameKey)) {
seenNames.add(nameKey)
products.push({
index: match.index,
code: null,
name: fullTitle.split(/[-—::]/)[0].trim(), // 移除后缀说明
fullTitle
})
}
}
}
}
// 按出现位置排序
products.sort((a, b) => a.index - b.index)
......
This diff is collapsed. Click to expand it.
......@@ -368,6 +368,24 @@ export const PLAN_TEMPLATES = {
submit_mapping: savingsSubmitMapping
}
},
/**
* 长宁終身壽險計劃3
* @added 2026-02-15T06:30:03.691Z
* @source docs/to-parse/计划书模版4.docx
*/
'life-insurance-3-d8fde07d': {
name: '长宁終身壽險計劃3',
component: 'LifeInsuranceTemplate',
config: {
currency: 'USD',
payment_periods: ["5年","12年","15年","20年"],
age_range: { min: 0, max: 75 },
insurance_period: '终身',
form_schema: protectionFormSchema,
submit_mapping: baseSubmitMapping
}
}
}
/**
......
......@@ -415,9 +415,11 @@ export async function mockProductListAPI(params) {
const list = []
const startIndex = page * limit
// 🔧 测试商品:第一页第一位固定为储蓄产品(form_sn:savings-product-30b41aae)
// 🔧 测试商品:第一页前两位固定为测试产品
if (page === 0) {
const testCategory = PRODUCT_CATEGORIES.find(c => parseInt(c.id) === 1)
// 测试商品1: 储蓄产品
const testProduct1 = {
id: 'savings-2-148b3acd',
product_name: '测试计划书-智享未来2(form_sn:savings-2-148b3acd)',
......@@ -432,19 +434,38 @@ export async function mockProductListAPI(params) {
_test_note: 'form_sn:savings-2-148b3acd'
}
// 检查分类和关键词过滤
let shouldInclude = true
if (cid && !testProduct1.categories.some(c => parseInt(c.id) === parseInt(cid))) {
shouldInclude = false
}
if (keyword && !testProduct1.product_name.includes(keyword)) {
shouldInclude = false
// 测试商品2: 人寿保险产品
const testProduct2 = {
id: 'life-insurance-3-d8fde07d',
product_name: '测试计划书-人生无忧3(form_sn:life-insurance-3-d8fde07d)',
cover_image: 'https://picsum.photos/seed/life-insurance-3-d8fde07d/400/300',
recommend: 'hot',
form_sn: 'life-insurance-3-d8fde07d', // ✅ 关键字段:对应真实 API 的 form_sn
created_time: new Date().toISOString(),
categories: [testCategory], // ✅ 符合真实 API 结构:categories 是数组
tags: [{ id: '1', name: '热销', bg_color: '#FEE2E2', text_color: '#DC2626' }],
// 测试标识(不影响业务逻辑)
_test: true,
_test_note: 'form_sn:life-insurance-3-d8fde07d'
}
if (shouldInclude) {
list.push(testProduct1)
console.log('[Mock] listAPI - 测试商品已置顶: form_sn=savings-2-148b3acd')
}
// 检查分类和关键词过滤,依次添加测试商品
const testProducts = [testProduct1, testProduct2]
testProducts.forEach((testProduct, index) => {
let shouldInclude = true
if (cid && !testProduct.categories.some(c => parseInt(c.id) === parseInt(cid))) {
shouldInclude = false
}
if (keyword && !testProduct.product_name.includes(keyword)) {
shouldInclude = false
}
if (shouldInclude) {
list.push(testProduct)
console.log(`[Mock] listAPI - 测试商品${index + 1}已置顶: form_sn=${testProduct.form_sn}`)
}
})
}
for (let i = 0; i < limit; i++) {
......