From 5e61a8582bb1426c4f28d55d1bfd4e576960d9e4 Mon Sep 17 00:00:00 2001 From: Nolan Lawson Date: Sun, 14 Mar 2021 09:24:00 -0700 Subject: [PATCH] perf: slightly more efficient word filter format (#1991) --- .../computations/wordFilterComputations.js | 19 ++++++++++------ .../_store/observers/wordFilterObservers.js | 8 +++---- ...teFilterContextsForStatusOrNotification.js | 14 +++++------- src/routes/_utils/createRegexFromFilter.js | 20 ----------------- src/routes/_utils/createRegexFromFilters.js | 22 +++++++++++++++++++ src/routes/_utils/timelineItemToSummary.js | 6 ++--- 6 files changed, 47 insertions(+), 42 deletions(-) delete mode 100644 src/routes/_utils/createRegexFromFilter.js create mode 100644 src/routes/_utils/createRegexFromFilters.js diff --git a/src/routes/_store/computations/wordFilterComputations.js b/src/routes/_store/computations/wordFilterComputations.js index 46e46b95..a7f5824f 100644 --- a/src/routes/_store/computations/wordFilterComputations.js +++ b/src/routes/_store/computations/wordFilterComputations.js @@ -1,4 +1,5 @@ -import { createRegexFromFilter } from '../../_utils/createRegexFromFilter' +import { createRegexFromFilters } from '../../_utils/createRegexFromFilters' +import { WORD_FILTER_CONTEXTS } from '../../_static/wordFilters' export function wordFilterComputations (store) { // unexpiredInstanceFilters is calculated based on `now` and `instanceFilters`, @@ -9,13 +10,17 @@ export function wordFilterComputations (store) { (unexpiredInstanceFilters, currentInstance) => unexpiredInstanceFilters[currentInstance] || [] ) - store.compute('unexpiredInstanceFiltersWithRegexes', ['unexpiredInstanceFilters'], unexpiredInstanceFilters => { + store.compute('unexpiredInstanceFilterRegexes', ['unexpiredInstanceFilters'], unexpiredInstanceFilters => { return Object.fromEntries(Object.entries(unexpiredInstanceFilters).map(([instanceName, filters]) => { - const filtersWithRegexes = filters.map(filter => ({ - ...filter, - regex: createRegexFromFilter(filter) - })) - return [instanceName, filtersWithRegexes] + const contextsToRegex = Object.fromEntries(WORD_FILTER_CONTEXTS.map(context => { + const filtersForThisContext = filters.filter(_ => _.context.includes(context)) + if (!filtersForThisContext.length) { + return undefined // don't bother even adding it to the map + } + const regex = createRegexFromFilters(filtersForThisContext) + return [context, regex] + }).filter(Boolean)) + return [instanceName, contextsToRegex] })) }) } diff --git a/src/routes/_store/observers/wordFilterObservers.js b/src/routes/_store/observers/wordFilterObservers.js index 44849da6..d6216e91 100644 --- a/src/routes/_store/observers/wordFilterObservers.js +++ b/src/routes/_store/observers/wordFilterObservers.js @@ -40,8 +40,8 @@ export function wordFilterObservers () { updateUnexpiredInstanceFiltersIfUnchanged(now, instanceFilters) }) - store.observe('unexpiredInstanceFiltersWithRegexes', async unexpiredInstanceFiltersWithRegexes => { - console.log('unexpiredInstanceFiltersWithRegexes changed, recomputing filterContexts') + store.observe('unexpiredInstanceFilterRegexes', async unexpiredInstanceFilterRegexes => { + console.log('unexpiredInstanceFilterRegexes changed, recomputing filterContexts') mark('update timeline item summary filter contexts') // Whenever the filters change, we need to re-compute the filterContexts on the TimelineSummaries. // This is a bit of an odd design, but we do it for perf. See timelineItemToSummary.js for details. @@ -55,7 +55,7 @@ export function wordFilterObservers () { let somethingChanged = false - await Promise.all(Object.entries(unexpiredInstanceFiltersWithRegexes).map(async ([instanceName, filtersWithRegexes]) => { + await Promise.all(Object.entries(unexpiredInstanceFilterRegexes).map(async ([instanceName, contextsToRegex]) => { const timelinesToSummaries = timelineItemSummaries[instanceName] || {} const timelinesToSummariesToAdd = timelineItemSummariesToAdd[instanceName] || {} const summariesToUpdate = [ @@ -70,7 +70,7 @@ export function wordFilterObservers () { ? database.getNotification(instanceName, summary.id) : database.getStatus(instanceName, summary.id) ) - const newFilterContexts = computeFilterContextsForStatusOrNotification(item, filtersWithRegexes) + const newFilterContexts = computeFilterContextsForStatusOrNotification(item, contextsToRegex) if (!isEqual(summary.filterContexts, newFilterContexts)) { somethingChanged = true summary.filterContexts = newFilterContexts diff --git a/src/routes/_utils/computeFilterContextsForStatusOrNotification.js b/src/routes/_utils/computeFilterContextsForStatusOrNotification.js index 2ba8641f..b84090ed 100644 --- a/src/routes/_utils/computeFilterContextsForStatusOrNotification.js +++ b/src/routes/_utils/computeFilterContextsForStatusOrNotification.js @@ -1,18 +1,16 @@ import { createSearchIndexFromStatusOrNotification } from './createSearchIndexFromStatusOrNotification' -import { uniq } from 'lodash-es' -export function computeFilterContextsForStatusOrNotification (statusOrNotification, filtersWithRegexes) { - if (!filtersWithRegexes || !filtersWithRegexes.length) { +export function computeFilterContextsForStatusOrNotification (statusOrNotification, contextsToRegex) { + if (!contextsToRegex || !Object.keys(contextsToRegex).length) { // avoid computing the search index, just bail out return undefined } // the searchIndex is really just a string of text const searchIndex = createSearchIndexFromStatusOrNotification(statusOrNotification) - const res = filtersWithRegexes && uniq(filtersWithRegexes - .filter(({ regex }) => regex.test(searchIndex)) - .map(_ => _.context) - .flat()) + const res = Object.entries(contextsToRegex) + .filter(([context, regex]) => regex.test(searchIndex)) + .map(([context]) => context) // return undefined instead of a new array to reduce memory usage of TimelineSummary - return (res && res.length) ? res : undefined + return res.length ? res : undefined } diff --git a/src/routes/_utils/createRegexFromFilter.js b/src/routes/_utils/createRegexFromFilter.js deleted file mode 100644 index adaa3793..00000000 --- a/src/routes/_utils/createRegexFromFilter.js +++ /dev/null @@ -1,20 +0,0 @@ -// copy-pasta'd from mastodon -// https://github.com/tootsuite/mastodon/blob/2ff01f7/app/javascript/mastodon/selectors/index.js#L40-L63 -const escapeRegExp = string => - string.replace(/[.*+?^${}()|[\]\\]/g, '\\$&') // $& means the whole matched string - -export function createRegexFromFilter (filter) { - let expr = escapeRegExp(filter.phrase) - - if (filter.whole_word) { - if (/^[\w]/.test(expr)) { - expr = `\\b${expr}` - } - - if (/[\w]$/.test(expr)) { - expr = `${expr}\\b` - } - } - - return new RegExp(expr, 'i') -} diff --git a/src/routes/_utils/createRegexFromFilters.js b/src/routes/_utils/createRegexFromFilters.js new file mode 100644 index 00000000..7f4e3d5c --- /dev/null +++ b/src/routes/_utils/createRegexFromFilters.js @@ -0,0 +1,22 @@ +// copy-pasta'd from mastodon +// https://github.com/tootsuite/mastodon/blob/2ff01f7/app/javascript/mastodon/selectors/index.js#L40-L63 +const escapeRegExp = string => + string.replace(/[.*+?^${}()|[\]\\]/g, '\\$&') // $& means the whole matched string + +export const createRegexFromFilters = filters => { + return new RegExp(filters.map(filter => { + let expr = escapeRegExp(filter.phrase) + + if (filter.whole_word) { + if (/^[\w]/.test(expr)) { + expr = `\\b${expr}` + } + + if (/[\w]$/.test(expr)) { + expr = `${expr}\\b` + } + } + + return expr + }).join('|'), 'i') +} diff --git a/src/routes/_utils/timelineItemToSummary.js b/src/routes/_utils/timelineItemToSummary.js index c4b6d0a5..42d84f01 100644 --- a/src/routes/_utils/timelineItemToSummary.js +++ b/src/routes/_utils/timelineItemToSummary.js @@ -13,9 +13,9 @@ class TimelineSummary { // 1. Avoid computing html-to-text (expensive) for users who don't have any filters (probably most users) // 2. Avoiding keeping the entire html-to-text in memory at all times for all summaries // 3. Filters probably change infrequently. When they do, we can just update the summaries - const { unexpiredInstanceFiltersWithRegexes } = store.get() - const filtersWithRegexes = unexpiredInstanceFiltersWithRegexes[instanceName] - this.filterContexts = computeFilterContextsForStatusOrNotification(item, filtersWithRegexes) + const { unexpiredInstanceFilterRegexes } = store.get() + const contextsToRegex = unexpiredInstanceFilterRegexes[instanceName] + this.filterContexts = computeFilterContextsForStatusOrNotification(item, contextsToRegex) } }