From c174c19ddda6d3a82d2c62c1e60b0402b73f9c9b Mon Sep 17 00:00:00 2001 From: Nolan Lawson Date: Wed, 23 Oct 2019 18:07:24 -0700 Subject: [PATCH] fix: fix tesseract.js progress bar (#1599) * fix: fix tesseract.js progress bar * fixup --- src/routes/_utils/runTesseract.js | 68 ++++++++++++++++++------------- 1 file changed, 40 insertions(+), 28 deletions(-) diff --git a/src/routes/_utils/runTesseract.js b/src/routes/_utils/runTesseract.js index 171e569c..1896213d 100644 --- a/src/routes/_utils/runTesseract.js +++ b/src/routes/_utils/runTesseract.js @@ -2,24 +2,49 @@ import { importTesseractWorker } from '../_utils/asyncModules' const DESTROY_WORKER_DELAY = 300000 // 5 minutes -// TODO: it's flaky to try to estimate tesseract's total progress this way -const steps = [ - { status: 'loading tesseract core', proportion: 0.05 }, - { status: 'initializing tesseract', proportion: 0.05 }, - { status: 'loading language traineddata', proportion: 0.1 }, - { status: 'initializing api', proportion: 0.2 }, - { status: 'recognizing text', proportion: 0.6 } -] - let worker let destroyWorkerHandle -async function initWorker () { +// TODO: it seems hacky that we have to spy on the tesseract worker to figure out its progress +const steps = [ + { status: 'loading tesseract core', proportion: 0.1 }, + { status: 'initializing tesseract', proportion: 0.05 }, + { status: 'loading language traineddata', proportion: 0.1 }, + { status: 'initializing api', proportion: 0.2 }, + { status: 'recognizing text', proportion: 0.55 } +] + +if (process.env.NODE_ENV !== 'production') { + if (steps.map(_ => _.proportion).reduce((a, b) => a + b, 0) !== 1) { + console.error('Steps do not add up to 1! You should probably fix this.') + } +} + +async function spyOnWorkerProgress (onProgress, runnable) { + const listener = event => { + const { data } = event + if (onProgress && data.status === 'progress' && steps.find(({ status }) => status === data.data.status)) { + onProgress(getTotalProgress(data.data)) + } + } + + worker.worker.addEventListener('message', listener) + try { + const res = await runnable() + return res + } finally { + worker.worker.removeEventListener('message', listener) + } +} + +async function initWorker (onProgress) { if (!worker) { worker = (await importTesseractWorker())() - await worker.load() - await worker.loadLanguage('eng') - await worker.initialize('eng') + await spyOnWorkerProgress(onProgress, async () => { + await worker.load() + await worker.loadLanguage('eng') + await worker.initialize('eng') + }) } } @@ -55,25 +80,12 @@ function getTotalProgress (progressInfo) { } async function recognize (url, onProgress) { - // TODO: it seems hacky that we have to spy on the tesseract worker to figure out its progress - const listener = event => { - const { data } = event - if (onProgress && data.status === 'progress' && steps.find(({ status }) => status === data.data.status)) { - onProgress(getTotalProgress(data.data)) - } - } - worker.worker.addEventListener('message', listener) - try { - const res = await worker.recognize(url, 'eng') - return res - } finally { - worker.worker.removeEventListener('message', listener) - } + return spyOnWorkerProgress(onProgress, () => worker.recognize(url, 'eng')) } export async function runTesseract (url, onProgress) { cancelDestroyWorker() - await initWorker() + await initWorker(onProgress) try { const res = await recognize(url, onProgress) console.log('result', res)