Refactor streaming's filtering logic & improve documentation (#26213)
This commit is contained in:
		
							parent
							
								
									1814990a3d
								
							
						
					
					
						commit
						ddaf200c78
					
				| @ -622,29 +622,39 @@ const startServer = async () => { | |||||||
| 
 | 
 | ||||||
|     log.verbose(req.requestId, `Starting stream from ${ids.join(', ')} for ${accountId}`); |     log.verbose(req.requestId, `Starting stream from ${ids.join(', ')} for ${accountId}`); | ||||||
| 
 | 
 | ||||||
|     // Currently message is of type string, soon it'll be Record<string, any>
 |     const transmit = (event, payload) => { | ||||||
|  |       // TODO: Replace "string"-based delete payloads with object payloads:
 | ||||||
|  |       const encodedPayload = typeof payload === 'object' ? JSON.stringify(payload) : payload; | ||||||
|  | 
 | ||||||
|  |       log.silly(req.requestId, `Transmitting for ${accountId}: ${event} ${encodedPayload}`); | ||||||
|  |       output(event, encodedPayload); | ||||||
|  |     }; | ||||||
|  | 
 | ||||||
|  |     // The listener used to process each message off the redis subscription,
 | ||||||
|  |     // message here is an object with an `event` and `payload` property. Some
 | ||||||
|  |     // events also include a queued_at value, but this is being removed shortly.
 | ||||||
|     const listener = message => { |     const listener = message => { | ||||||
|       const { event, payload, queued_at } = message; |       const { event, payload } = message; | ||||||
| 
 | 
 | ||||||
|       const transmit = (payload) => { |       // Streaming only needs to apply filtering to some channels and only to
 | ||||||
|         const now = new Date().getTime(); |       // some events. This is because majority of the filtering happens on the
 | ||||||
|         const delta = now - queued_at; |       // Ruby on Rails side when producing the event for streaming.
 | ||||||
|         const encodedPayload = typeof payload === 'object' ? JSON.stringify(payload) : payload; |       //
 | ||||||
| 
 |       // The only events that require filtering from the streaming server are
 | ||||||
|         log.silly(req.requestId, `Transmitting for ${accountId}: ${event} ${encodedPayload} Delay: ${delta}ms`); |       // `update` and `status.update`, all other events are transmitted to the
 | ||||||
|         output(event, encodedPayload); |       // client as soon as they're received (pass-through).
 | ||||||
|       }; |       //
 | ||||||
| 
 |       // The channels that need filtering are determined in the function
 | ||||||
|       // Only messages that may require filtering are statuses, since notifications
 |       // `channelNameToIds` defined below:
 | ||||||
|       // are already personalized and deletes do not matter
 |       if (!needsFiltering || (event !== 'update' && event !== 'status.update')) { | ||||||
|       if (!needsFiltering || event !== 'update') { |         transmit(event, payload); | ||||||
|         transmit(payload); |  | ||||||
|         return; |         return; | ||||||
|       } |       } | ||||||
| 
 | 
 | ||||||
|       const targetAccountIds = [payload.account.id].concat(payload.mentions.map(item => item.id)); |       // The rest of the logic from here on in this function is to handle
 | ||||||
|       const accountDomain = payload.account.acct.split('@')[1]; |       // filtering of statuses:
 | ||||||
| 
 | 
 | ||||||
|  |       // Filter based on language:
 | ||||||
|       if (Array.isArray(req.chosenLanguages) && payload.language !== null && req.chosenLanguages.indexOf(payload.language) === -1) { |       if (Array.isArray(req.chosenLanguages) && payload.language !== null && req.chosenLanguages.indexOf(payload.language) === -1) { | ||||||
|         log.silly(req.requestId, `Message ${payload.id} filtered by language (${payload.language})`); |         log.silly(req.requestId, `Message ${payload.id} filtered by language (${payload.language})`); | ||||||
|         return; |         return; | ||||||
| @ -652,11 +662,16 @@ const startServer = async () => { | |||||||
| 
 | 
 | ||||||
|       // When the account is not logged in, it is not necessary to confirm the block or mute
 |       // When the account is not logged in, it is not necessary to confirm the block or mute
 | ||||||
|       if (!req.accountId) { |       if (!req.accountId) { | ||||||
|         transmit(payload); |         transmit(event, payload); | ||||||
|         return; |         return; | ||||||
|       } |       } | ||||||
| 
 | 
 | ||||||
|       pgPool.connect((err, client, done) => { |       // Filter based on domain blocks, blocks, mutes, or custom filters:
 | ||||||
|  |       const targetAccountIds = [payload.account.id].concat(payload.mentions.map(item => item.id)); | ||||||
|  |       const accountDomain = payload.account.acct.split('@')[1]; | ||||||
|  | 
 | ||||||
|  |       // TODO: Move this logic out of the message handling loop
 | ||||||
|  |       pgPool.connect((err, client, releasePgConnection) => { | ||||||
|         if (err) { |         if (err) { | ||||||
|           log.error(err); |           log.error(err); | ||||||
|           return; |           return; | ||||||
| @ -683,28 +698,45 @@ const startServer = async () => { | |||||||
|         } |         } | ||||||
| 
 | 
 | ||||||
|         Promise.all(queries).then(values => { |         Promise.all(queries).then(values => { | ||||||
|           done(); |           releasePgConnection(); | ||||||
| 
 | 
 | ||||||
|  |           // Handling blocks & mutes and domain blocks: If one of those applies,
 | ||||||
|  |           // then we don't transmit the payload of the event to the client
 | ||||||
|           if (values[0].rows.length > 0 || (accountDomain && values[1].rows.length > 0)) { |           if (values[0].rows.length > 0 || (accountDomain && values[1].rows.length > 0)) { | ||||||
|             return; |             return; | ||||||
|           } |           } | ||||||
| 
 | 
 | ||||||
|           if (!payload.filtered && !req.cachedFilters) { |           // If the payload already contains the `filtered` property, it means
 | ||||||
|  |           // that filtering has been applied on the ruby on rails side, as 
 | ||||||
|  |           // such, we don't need to construct or apply the filters in streaming:
 | ||||||
|  |           if (Object.prototype.hasOwnProperty.call(payload, "filtered")) { | ||||||
|  |             transmit(event, payload); | ||||||
|  |             return; | ||||||
|  |           } | ||||||
|  | 
 | ||||||
|  |           // Handling for constructing the custom filters and caching them on the request
 | ||||||
|  |           // TODO: Move this logic out of the message handling lifecycle
 | ||||||
|  |           if (!req.cachedFilters) { | ||||||
|             const filterRows = values[accountDomain ? 2 : 1].rows; |             const filterRows = values[accountDomain ? 2 : 1].rows; | ||||||
| 
 | 
 | ||||||
|             req.cachedFilters = filterRows.reduce((cache, row) => { |             req.cachedFilters = filterRows.reduce((cache, filter) => { | ||||||
|               if (cache[row.id]) { |               if (cache[filter.id]) { | ||||||
|                 cache[row.id].keywords.push([row.keyword, row.whole_word]); |                 cache[filter.id].keywords.push([filter.keyword, filter.whole_word]); | ||||||
|               } else { |               } else { | ||||||
|                 cache[row.id] = { |                 cache[filter.id] = { | ||||||
|                   keywords: [[row.keyword, row.whole_word]], |                   keywords: [[filter.keyword, filter.whole_word]], | ||||||
|                   expires_at: row.expires_at, |                   expires_at: filter.expires_at, | ||||||
|                   repr: { |                   filter: { | ||||||
|                     id: row.id, |                     id: filter.id, | ||||||
|                     title: row.title, |                     title: filter.title, | ||||||
|                     context: row.context, |                     context: filter.context, | ||||||
|                     expires_at: row.expires_at, |                     expires_at: filter.expires_at, | ||||||
|                     filter_action: ['warn', 'hide'][row.filter_action], |                     // filter.filter_action is the value from the
 | ||||||
|  |                     // custom_filters.action database column, it is an integer
 | ||||||
|  |                     // representing a value in an enum defined by Ruby on Rails:
 | ||||||
|  |                     //
 | ||||||
|  |                     // enum { warn: 0, hide: 1 }
 | ||||||
|  |                     filter_action: ['warn', 'hide'][filter.filter_action], | ||||||
|                   }, |                   }, | ||||||
|                 }; |                 }; | ||||||
|               } |               } | ||||||
| @ -712,6 +744,10 @@ const startServer = async () => { | |||||||
|               return cache; |               return cache; | ||||||
|             }, {}); |             }, {}); | ||||||
| 
 | 
 | ||||||
|  |             // Construct the regular expressions for the custom filters: This
 | ||||||
|  |             // needs to be done in a separate loop as the database returns one
 | ||||||
|  |             // filterRow per keyword, so we need all the keywords before
 | ||||||
|  |             // constructing the regular expression
 | ||||||
|             Object.keys(req.cachedFilters).forEach((key) => { |             Object.keys(req.cachedFilters).forEach((key) => { | ||||||
|               req.cachedFilters[key].regexp = new RegExp(req.cachedFilters[key].keywords.map(([keyword, whole_word]) => { |               req.cachedFilters[key].regexp = new RegExp(req.cachedFilters[key].keywords.map(([keyword, whole_word]) => { | ||||||
|                 let expr = keyword.replace(/[.*+?^${}()|[\]\\]/g, '\\$&'); |                 let expr = keyword.replace(/[.*+?^${}()|[\]\\]/g, '\\$&'); | ||||||
| @ -731,34 +767,56 @@ const startServer = async () => { | |||||||
|             }); |             }); | ||||||
|           } |           } | ||||||
| 
 | 
 | ||||||
|           // Check filters
 |           // Apply cachedFilters against the payload, constructing a
 | ||||||
|           if (req.cachedFilters && !payload.filtered) { |           // `filter_results` array of FilterResult entities
 | ||||||
|             const mutatedPayload = { ...payload }; |           if (req.cachedFilters) { | ||||||
|             const status = payload; |             const status = payload; | ||||||
|             const searchContent = ([status.spoiler_text || '', status.content].concat((status.poll && status.poll.options) ? status.poll.options.map(option => option.title) : [])).concat(status.media_attachments.map(att => att.description)).join('\n\n').replace(/<br\s*\/?>/g, '\n').replace(/<\/p><p>/g, '\n\n'); |             // TODO: Calculate searchableContent in Ruby on Rails:
 | ||||||
|             const searchIndex = JSDOM.fragment(searchContent).textContent; |             const searchableContent = ([status.spoiler_text || '', status.content].concat((status.poll && status.poll.options) ? status.poll.options.map(option => option.title) : [])).concat(status.media_attachments.map(att => att.description)).join('\n\n').replace(/<br\s*\/?>/g, '\n').replace(/<\/p><p>/g, '\n\n'); | ||||||
|  |             const searchableTextContent = JSDOM.fragment(searchableContent).textContent; | ||||||
| 
 | 
 | ||||||
|             const now = new Date(); |             const now = new Date(); | ||||||
|             mutatedPayload.filtered = []; |             const filter_results = Object.values(req.cachedFilters).reduce((results, cachedFilter) => { | ||||||
|             Object.values(req.cachedFilters).forEach((cachedFilter) => { |               // Check the filter hasn't expired before applying:
 | ||||||
|               if ((cachedFilter.expires_at === null || cachedFilter.expires_at > now)) { |               if (cachedFilter.expires_at !== null && cachedFilter.expires_at < now) { | ||||||
|                 const keyword_matches = searchIndex.match(cachedFilter.regexp); |                 return; | ||||||
|                 if (keyword_matches) { |  | ||||||
|                   mutatedPayload.filtered.push({ |  | ||||||
|                     filter: cachedFilter.repr, |  | ||||||
|                     keyword_matches, |  | ||||||
|                   }); |  | ||||||
|                 } |  | ||||||
|               } |               } | ||||||
|             }); |  | ||||||
| 
 | 
 | ||||||
|             transmit(mutatedPayload); |               // Just in-case JSDOM fails to find textContent in searchableContent
 | ||||||
|  |               if (!searchableTextContent) { | ||||||
|  |                 return; | ||||||
|  |               } | ||||||
|  | 
 | ||||||
|  |               const keyword_matches = searchableTextContent.match(cachedFilter.regexp); | ||||||
|  |               if (keyword_matches) { | ||||||
|  |                 // results is an Array of FilterResult; status_matches is always
 | ||||||
|  |                 // null as we only are only applying the keyword-based custom
 | ||||||
|  |                 // filters, not the status-based custom filters.
 | ||||||
|  |                 // https://docs.joinmastodon.org/entities/FilterResult/
 | ||||||
|  |                 results.push({ | ||||||
|  |                   filter: cachedFilter.filter, | ||||||
|  |                   keyword_matches, | ||||||
|  |                   status_matches: null | ||||||
|  |                 }); | ||||||
|  |               } | ||||||
|  |             }, []); | ||||||
|  | 
 | ||||||
|  |             // Send the payload + the FilterResults as the `filtered` property
 | ||||||
|  |             // to the streaming connection. To reach this code, the `event` must
 | ||||||
|  |             // have been either `update` or `status.update`, meaning the
 | ||||||
|  |             // `payload` is a Status entity, which has a `filtered` property:
 | ||||||
|  |             //
 | ||||||
|  |             // filtered: https://docs.joinmastodon.org/entities/Status/#filtered
 | ||||||
|  |             transmit(event, { | ||||||
|  |               ...payload, | ||||||
|  |               filtered: filter_results | ||||||
|  |             }); | ||||||
|           } else { |           } else { | ||||||
|             transmit(payload); |             transmit(event, payload); | ||||||
|           } |           } | ||||||
|         }).catch(err => { |         }).catch(err => { | ||||||
|  |           releasePgConnection(); | ||||||
|           log.error(err); |           log.error(err); | ||||||
|           done(); |  | ||||||
|         }); |         }); | ||||||
|       }); |       }); | ||||||
|     }; |     }; | ||||||
|  | |||||||
		Loading…
	
	
			
			x
			
			
		
	
		Reference in New Issue
	
	Block a user