是否可以对识别后的文字进行重新排列

使用问题 · 36 次浏览
涛涛涛 创建于 9天2小时前

使用umi-ocr识别后,文字的排列顺序有时会有点错乱,可以写一些函数进行重排优化,由于能力有限,无法对其识别后的json文本进行此重排。

以下是对识别后的json文本对ai的提问:

[{"box":[[12,71],[30,71],[30,91],[12,91]],"score":0.99603396654129028,"text":"6","end":" "},
{"box":[[11,120],[30,120],[30,139],[11,139]],"score":0.99924361705780029,"text":"7","end":" "},
{"box":[[12,170],[30,170],[30,190],[12,190]],"score":0.99798190593719482,"text":"8","end":"\n"},
{"box":[[43,17],[356,17],[356,40],[43,40]],"score":0.99932154161589482,"text":"男生复读3年高考查分喜极而泣","end":"\n"},
{"box":[[346,19],[381,19],[381,40],[346,40]],"score":0.99749338626861572,"text":"热","end":"\n"},
{"box":[[375,67],[413,67],[413,90],[375,90]],"score":0.64695890247821808,"text":"日新","end":"\n"},
{"box":[[46,68],[387,68],[387,91],[46,91]],"score":0.98347348372141519,"text":"中方回应特朗普要中国买美国石油","end":"\n"},
{"box":[[358,117],[397,117],[397,140],[358,140]],"score":0.83765721321105957,"text":"分新","end":"\n"},
{"box":[[47,119],[369,119],[369,139],[47,139]],"score":0.99872339197567528,"text":"湖北高二学生参加高考数学满分","end":"\n"},
{"box":[[41,166],[419,164],[420,190],[41,192]],"score":0.95793653235715981,"text":"个人养老金为何要交3%个税?专..","end":"\n"},
{"box":[[409,168],[435,168],[435,189],[409,189]],"score":0.99883073568344116,"text":"新","end":"\n"},
{"box":[[10,217],[315,217],[315,240],[10,240]],"score":0.99904135098824132,"text":"9A股收盘:沪指创年内新高","end":"\n"},
{"box":[[304,218],[337,218],[337,238],[304,238]],"score":0.9944075345993042,"text":"新","end":"\n"}]

这是图片识别的文字返回值:
请用js代码,按照从左到右,从上到下的顺序将其文字提取出来
首先按y坐标对元素进行排序
然后遍历元素,当y坐标差小于5时视为同一行
对每一行内的元素按x坐标从左到右排序
将同一行的内容合并为一个字符串
最后用换行符连接所有行

 

给出的js代码可以在wps 表格js编辑器中运行,可以得到需要的效果

        图片                                                                       重排前                                                                      重排后

function 识别文字重排()
{
function extractText(data) {
    // 1. 按y坐标从上到下排序
    data.sort((a, b) => a.box[0][1] - b.box[0][1]);
    
    let lines = [];
    let currentLine = [];
    let lastY = null;
    
    for (let i = 0; i < data.length; i++) {
        const item = data[i];
        const y = item.box[0][1]; // 使用左上角的y坐标
        
        // 2. 当y坐标差小于5时视为同一行
        if (lastY !== null && Math.abs(y - lastY) < 5) {
            currentLine.push(item);
        } else {
            // 如果当前行有内容,先处理当前行
            if (currentLine.length > 0) {
                // 3. 对每一行内的元素按x坐标从左到右排序
                currentLine.sort((a, b) => a.box[0][0] - b.box[0][0]);
                // 4. 将同一行的内容合并为一个字符串
                lines.push(currentLine.map(item => item.text).join(''));
            }
            // 开始新的一行
            currentLine = [item];
        }
        
        lastY = y;
    }
    
    // 处理最后一行
    if (currentLine.length > 0) {
        currentLine.sort((a, b) => a.box[0][0] - b.box[0][0]);
        lines.push(currentLine.map(item => item.text).join(''));
    }
    
    // 5. 用换行符连接所有行
    return lines.join('\n');
}

// 示例数据
const data = [

{"box":[[12,71],[30,71],[30,91],[12,91]],"score":0.99603396654129028,"text":"6","end":" "},
{"box":[[11,120],[30,120],[30,139],[11,139]],"score":0.99924361705780029,"text":"7","end":" "},
{"box":[[12,170],[30,170],[30,190],[12,190]],"score":0.99798190593719482,"text":"8","end":"\n"},
{"box":[[43,17],[356,17],[356,40],[43,40]],"score":0.99932154161589482,"text":"男生复读3年高考查分喜极而泣","end":"\n"},
{"box":[[346,19],[381,19],[381,40],[346,40]],"score":0.99749338626861572,"text":"热","end":"\n"},
{"box":[[375,67],[413,67],[413,90],[375,90]],"score":0.64695890247821808,"text":"日新","end":"\n"},
{"box":[[46,68],[387,68],[387,91],[46,91]],"score":0.98347348372141519,"text":"中方回应特朗普要中国买美国石油","end":"\n"},
{"box":[[358,117],[397,117],[397,140],[358,140]],"score":0.83765721321105957,"text":"分新","end":"\n"},
{"box":[[47,119],[369,119],[369,139],[47,139]],"score":0.99872339197567528,"text":"湖北高二学生参加高考数学满分","end":"\n"},
{"box":[[41,166],[419,164],[420,190],[41,192]],"score":0.95793653235715981,"text":"个人养老金为何要交3%个税?专..","end":"\n"},
{"box":[[409,168],[435,168],[435,189],[409,189]],"score":0.99883073568344116,"text":"新","end":"\n"},
{"box":[[10,217],[315,217],[315,240],[10,240]],"score":0.99904135098824132,"text":"9A股收盘:沪指创年内新高","end":"\n"},
{"box":[[304,218],[337,218],[337,238],[304,238]],"score":0.9944075345993042,"text":"新","end":"\n"}

];

// 提取并输出文字
const extractedText = extractText(data);
console.log(extractedText);
}

是否可以用qk将其函数改一下,调用

涛涛涛 最后更新于 2025/6/25

回复内容
ImmortalSty 8天8小时前
#1

首先确保你用的是 PaddleOCR 模型,然后更新动作到最新版,右键设置,先把动作模型里面的 OCR 模型改为“PaddleOCR”,再把图像识别里面的排版解析方案改为“单栏-按自然段换行”。


这个是 Umi-OCR 自带的功能,不需要另外另实现一遍。

回复主贴