this repo has no description
at main 22 kB view raw
1import { isNothing } from "@jet/environment"; 2import * as serverData from "../../foundation/json-parsing/server-data"; 3import { diversifyDataItems, getOrderedAppIds, getUpdatedScoreAfterBoosting, PersonalizedData, } from "./on-device-recommendations-common"; 4/** 5 * This utility class simplifies processing the raw data, by decorating with some key properties. 6 * */ 7class PersonalizedDataDefault extends PersonalizedData { 8 constructor(rawData) { 9 super(); 10 this.rawData = rawData; 11 this.isExactMatch = false; 12 this.isWildcardMatch = false; 13 this.isUnpersonalizedMatch = false; 14 this.isFallbackMatch = false; 15 this.appId = null; 16 this.groupId = null; 17 this.score = 0; 18 this.modifiedScore = 0; 19 this.onDeviceScore = 0; 20 } 21} 22// Represents a "match all" wildcard segment. Any data items that have this segment are always considered a match. 23const alwaysMatchUserSegment = "-1"; 24/** 25 * Converts a list of raw data blobs into a list that has been personalized for the user, based upon on device personalization data. 26 * 27 * If use_segment_scores is true, the rules we follow here are: 28 * 1. Choose the data items that have personalization segments which match the user 29 * 2. Remove some data items so that there is only one per group 30 * 3. Bring any data items where the user exactly matches the personalization segment to the front of the list 31 * 32 * If needed, we may also include fallback results to reach a preferred number of results. For any group where no matches are found, the last 33 * item in that group can be used as a fallback. We can only ever have one item per group, so it may not always be possible to reach the 34 * preferred number of results. 35 * 36 * If use_signals is true, we rerank content using the on-device scores 37 * 38 * @param dataItems The raw data blobs. 39 * @param onDevicePersonalizationDataContainer The on device personalization data container for the user, used for matching segments against the dataItems. 40 * @param includeItemsWithNoPersonalizationData Whether dataItems without any valid personalization data should always be included in the results. 41 * @param allowUnmatchedFallbackResults Whether to allow fallback results to be included in the results. This will only be utilised in order to reach a preferredResultCount. 42 * @param preferredResultCount The preferred number of items to be included in the results. 43 * @param parentAppId An optional appID, which is the parent for all the dataItems. Currently only used for search. 44 * @param diversify An optional flag that determines if we should diverse the personalized results on the basis of server side apps ranking 45 * @returns The personalized set of data. This will be a subset (or all) of the original dataItems, and metrics data. 46 */ 47export function personalizeDataItems(objectGraph, dataItems, onDevicePersonalizationDataContainer, includeItemsWithNoPersonalizationData, allowUnmatchedFallbackResults, preferredResultCount, parentAppId, diversify) { 48 var _a; 49 let sortResult = { sortedDataItems: [], processingType: 0 }; 50 const useSignals = (_a = onDevicePersonalizationDataContainer === null || onDevicePersonalizationDataContainer === void 0 ? void 0 : onDevicePersonalizationDataContainer.metricsData["use_signals"]) !== null && _a !== void 0 ? _a : false; 51 if (!useSignals) { 52 // First decorate our raw dataItems with segment and group information 53 const personalizedDataItems = personalizedDataItemsFromDataItems(objectGraph, dataItems, onDevicePersonalizationDataContainer === null || onDevicePersonalizationDataContainer === void 0 ? void 0 : onDevicePersonalizationDataContainer.personalizationData, includeItemsWithNoPersonalizationData, parentAppId); 54 // Get server side ordering of app Ids to be used for diversification 55 const serverSideAppIdsOrdering = getOrderedAppIds(personalizedDataItems); 56 // Now iterate through the list of personalizedDataItems, and choose one per group 57 const matchedDataItemsIncludingFallback = filterDataItemsIntoOnePerGroup(objectGraph, personalizedDataItems); 58 // Now sort the data items, respecting our preferredResultCount if needed 59 sortResult = sortDataItems(objectGraph, matchedDataItemsIncludingFallback, allowUnmatchedFallbackResults, serverSideAppIdsOrdering, preferredResultCount, diversify); 60 } 61 else { 62 // First decorate our raw dataItems with frequency, recency, usage information 63 const personalizedDataItems = personalizedDataItemsFromDataItemsOnDeviceSignals(objectGraph, dataItems, onDevicePersonalizationDataContainer === null || onDevicePersonalizationDataContainer === void 0 ? void 0 : onDevicePersonalizationDataContainer.personalizationData, includeItemsWithNoPersonalizationData, parentAppId); 64 // Now sort the data items 65 const sortedDataItems = getUpdatedScoreAfterBoosting(personalizedDataItems, onDevicePersonalizationDataContainer === null || onDevicePersonalizationDataContainer === void 0 ? void 0 : onDevicePersonalizationDataContainer.metricsData); 66 const orderWasNotChanged = personalizedDataItems.every((dataItem, index) => { 67 return dataItem === sortedDataItems[index]; 68 }); 69 sortResult = { 70 sortedDataItems: sortedDataItems, 71 processingType: orderWasNotChanged 72 ? 0 /* onDevicePersonalization.ProcessingType.contentsNotChanged */ 73 : 2 /* onDevicePersonalization.ProcessingType.contentsSorted */, 74 }; 75 if (serverData.isDefinedNonNull(preferredResultCount) && 76 sortResult.sortedDataItems.length >= preferredResultCount) { 77 sortResult.sortedDataItems = sortResult.sortedDataItems.slice(0, preferredResultCount); 78 } 79 } 80 // We only need to return the raw data blobs, so remove the personalization decoration 81 const finalDataItems = sortResult.sortedDataItems.map((personalizedDataItem) => personalizedDataItem.rawData); 82 // Generate the processing type value 83 const filterType = dataItems.length !== finalDataItems.length 84 ? 1 /* onDevicePersonalization.ProcessingType.contentsFiltered */ 85 : 0 /* onDevicePersonalization.ProcessingType.contentsNotChanged */; 86 const processingType = filterType + sortResult.processingType; 87 return { 88 personalizedData: finalDataItems, 89 processingType: processingType, 90 }; 91} 92/** 93 * Creates a list of `PersonalizedData` objects, based on the input raw data items. 94 * 95 * @param dataItems The raw data blobs. 96 * @param onDevicePersonalizationData The on device personalization data, used for matching personalization segments against the dataItems. 97 * @param includeItemsWithNoPersonalizationData Whether dataItems without any valid personalization data should be included in the results. 98 * @param parentAppId An optional appID, which is the parent for all the dataItems. Currently only used for search. 99 * @returns A list of PersonalizedData objects. 100 */ 101function personalizedDataItemsFromDataItemsOnDeviceSignals(objectGraph, dataItems, onDevicePersonalizationData, includeItemsWithNoPersonalizationData, parentAppId) { 102 const personalizedDataItems = []; 103 for (const data of dataItems) { 104 const personalizedData = new PersonalizedDataDefault(data); 105 // Filter out invalid data 106 const score = serverData.asNumber(data, "meta.personalizationData.score"); 107 let appId = serverData.asString(data, "meta.personalizationData.appId"); 108 if ((isNothing(appId) || appId.length === 0) && (parentAppId === null || parentAppId === void 0 ? void 0 : parentAppId.length) > 0) { 109 // If we have a parentAppId this means we are coming from search, where `appId` is not provided. 110 appId = parentAppId; 111 } 112 if (isNothing(appId) || appId.length === 0) { 113 // Personalization data is missing or invalid. This may sometimes be valid, eg. evergreen today stories for when reco times out. 114 if (includeItemsWithNoPersonalizationData) { 115 personalizedData.isUnpersonalizedMatch = true; 116 personalizedDataItems.push(personalizedData); 117 } 118 continue; 119 } 120 if (serverData.isDefinedNonNull(onDevicePersonalizationData)) { 121 const onDevicePersonalizationDataForApp = onDevicePersonalizationData[appId]; 122 if (serverData.isDefinedNonNull(onDevicePersonalizationDataForApp) && 123 serverData.isDefinedNonNull(onDevicePersonalizationDataForApp.onDeviceSignals)) { 124 personalizedData.onDeviceScore = +onDevicePersonalizationDataForApp.onDeviceSignals; 125 } 126 } 127 personalizedData.appId = appId; 128 personalizedData.score = score !== null && score !== void 0 ? score : 0; 129 personalizedDataItems.push(personalizedData); 130 } 131 return personalizedDataItems; 132} 133/** 134 * Creates a list of `PersonalizedData` objects, based on the input raw data items. 135 * 136 * @param dataItems The raw data blobs. 137 * @param onDevicePersonalizationData The on device personalization data, used for matching personalization segments against the dataItems. 138 * @param includeItemsWithNoPersonalizationData Whether dataItems without any valid personalization data should be included in the results. 139 * @param parentAppId An optional appID, which is the parent for all the dataItems. Currently only used for search. 140 * @returns A list of PersonalizedData objects. 141 */ 142function personalizedDataItemsFromDataItems(objectGraph, dataItems, onDevicePersonalizationData, includeItemsWithNoPersonalizationData, parentAppId) { 143 const personalizedDataItems = []; 144 for (const data of dataItems) { 145 const personalizedData = new PersonalizedDataDefault(data); 146 // Filter out invalid data 147 const rawDataUserSegments = serverData.asString(data, "meta.personalizationData.segId"); 148 let appId = serverData.asString(data, "meta.personalizationData.appId"); 149 let groupId = serverData.asString(data, "meta.personalizationData.grpId"); 150 if ((isNothing(appId) || appId.length === 0) && (parentAppId === null || parentAppId === void 0 ? void 0 : parentAppId.length) > 0) { 151 // If we have a parentAppId this means we are coming from search, where `appId` and `grpId` are not provided. 152 // Normally we filter our data items to only allow one item per group, so in this case we allocate a random 153 // group ID, so that none of the data items get filtered out for that reason. Later on as part of search 154 // results processing we will pick the first (valid) result, but only after ODP has finished. 155 appId = parentAppId; 156 groupId = objectGraph.random.nextUUID(); 157 } 158 if (serverData.isNullOrEmpty(rawDataUserSegments) || 159 serverData.isNullOrEmpty(appId) || 160 serverData.isNullOrEmpty(groupId)) { 161 // Personalization data is missing or invalid. This may sometimes be valid, eg. evergreen today stories for when reco times out. 162 if (includeItemsWithNoPersonalizationData) { 163 personalizedData.isUnpersonalizedMatch = true; 164 personalizedDataItems.push(personalizedData); 165 } 166 continue; 167 } 168 // Check if the data has the match all user segment 169 const dataUserSegments = rawDataUserSegments.split(","); 170 if (dataUserSegments.includes(alwaysMatchUserSegment)) { 171 personalizedData.isWildcardMatch = true; 172 } 173 // Check if any of the data segments match with the on device personalization data 174 if (serverData.isDefinedNonNull(onDevicePersonalizationData)) { 175 const onDevicePersonalizationDataForApp = onDevicePersonalizationData[appId]; 176 if (serverData.isDefinedNonNull(onDevicePersonalizationDataForApp)) { 177 for (const dataUserSegment of dataUserSegments) { 178 if (onDevicePersonalizationDataForApp.userSegments.includes(dataUserSegment)) { 179 personalizedData.isExactMatch = true; 180 break; 181 } 182 } 183 } 184 } 185 personalizedData.appId = appId; 186 personalizedData.groupId = groupId; 187 personalizedDataItems.push(personalizedData); 188 } 189 return personalizedDataItems; 190} 191/** 192 * Iterates through the list of given data items, and ensures we only have one per group. 193 * 194 * @param dataItems The data items to processed. 195 * @returns A subset of dataItems, with only one dataItem per group. 196 */ 197function filterDataItemsIntoOnePerGroup(objectGraph, dataItems) { 198 var _a; 199 const filledGroupIds = new Set(); 200 const matchedDataItemsIncludingMultipleFallbacksPerGroup = []; 201 // Determine which groups have any exact matches 202 const groupIdsWithExactMatchesArray = dataItems 203 .filter((dataItem) => { 204 return dataItem.isExactMatch; 205 }) 206 .map((dataItem) => { 207 return dataItem.groupId; 208 }); 209 const groupIdsWithExactMatches = new Set(groupIdsWithExactMatchesArray); 210 // Now iterate through our data items, and filter out any we don't need 211 dataItems.forEach((dataItem, index) => { 212 // If an item has no group, we always include it. This would only happen for 213 // data which is missing valid personalization metadata, and we have specifically 214 // opted in to including these items in the results. 215 if (serverData.isNullOrEmpty(dataItem.groupId)) { 216 matchedDataItemsIncludingMultipleFallbacksPerGroup.push(dataItem); 217 return; 218 } 219 // We already have a match for this group, so move onto the next item 220 if (filledGroupIds.has(dataItem.groupId)) { 221 return; 222 } 223 // This item is an unpersonalized match, which will only occur if we permit this. 224 // These are always added to the result set. 225 if (dataItem.isUnpersonalizedMatch) { 226 matchedDataItemsIncludingMultipleFallbacksPerGroup.push(dataItem); 227 return; 228 } 229 // This item is the first exact match for this group, so add it into our result set 230 if (dataItem.isExactMatch) { 231 filledGroupIds.add(dataItem.groupId); 232 matchedDataItemsIncludingMultipleFallbacksPerGroup.push(dataItem); 233 return; 234 } 235 // If we know we have an exact match somewhere else for this group, we can just 236 // continue on to the next item, as the exact match will be picked later. 237 if (groupIdsWithExactMatches.has(dataItem.groupId)) { 238 return; 239 } 240 // We have no exact matches for this group, so we can now take wildcard matches. 241 if (dataItem.isWildcardMatch) { 242 filledGroupIds.add(dataItem.groupId); 243 matchedDataItemsIncludingMultipleFallbacksPerGroup.push(dataItem); 244 return; 245 } 246 // This item is not a match. As we don't have any matches for this group yet, 247 // we can mark it as a fallback. This does not necessarily mean it will be used, 248 // but it does mean it becomes available for use. groupIDs are not necessarily in 249 // sequential order, so we mark all of these as fallbacks, and filter them further below. 250 dataItem.isFallbackMatch = true; 251 matchedDataItemsIncludingMultipleFallbacksPerGroup.push(dataItem); 252 }); 253 // We now need to remove all the fallback items except for the last one in each group, so iterate 254 // through in reverse order and filter out any duplicates 255 const matchedDataItemsWithOneFallbackPerGroup = []; 256 const reversedMatchedDataItems = matchedDataItemsIncludingMultipleFallbacksPerGroup.slice().reverse(); 257 for (const dataItem of reversedMatchedDataItems) { 258 if (dataItem.isFallbackMatch) { 259 if (filledGroupIds.has(dataItem.groupId)) { 260 continue; 261 } 262 } 263 matchedDataItemsWithOneFallbackPerGroup.push(dataItem); 264 if (((_a = dataItem.groupId) === null || _a === void 0 ? void 0 : _a.length) > 0) { 265 filledGroupIds.add(dataItem.groupId); 266 } 267 } 268 // Return to our original order 269 matchedDataItemsWithOneFallbackPerGroup.reverse(); 270 return matchedDataItemsWithOneFallbackPerGroup; 271} 272/** 273 * Sorts the given list of data items, and optionally restricts the list to a specified number of results. 274 * 275 * @param dataItems The data items to process. 276 * @param allowUnmatchedFallbackResults Whether to allow fallback results to be included in the results. This will only be utilised in order to reach a preferredResultCount. 277 * @param preferredResultCount? The preferrd number of results. 278 * @param serverSideAppIdsOrdering List of ordered app ids from server side 279 * @param diversify An optional flag that determines if we should diverse the personalized results on the basis of server side apps ranking 280 * @returns The sorted list of dataItems, optionally restricted in length, 281 */ 282function sortDataItems(objectGraph, dataItems, allowUnmatchedFallbackResults, serverSideAppIdsOrdering, preferredResultCount, diversify) { 283 let sortResult; 284 // Excluding fallback results is the preferred route, but if the number of results is less than our preferredResultCount, we will need to use the fallback results. 285 const dataItemsWithoutFallback = dataItems.filter((data) => data.isExactMatch || data.isWildcardMatch || data.isUnpersonalizedMatch || serverData.isNull(data.groupId)); 286 if (serverData.isNull(preferredResultCount)) { 287 // There is no preferred number of results, so simply perform our final sort and then return 288 sortResult = sortAndDiversify(dataItemsWithoutFallback, serverSideAppIdsOrdering, diversify); 289 } 290 else if (dataItemsWithoutFallback.length >= preferredResultCount || !allowUnmatchedFallbackResults) { 291 // There is a preferred number of results, but we either have enough items without needing to utilise 292 // any fallback matches, or we don't allow fallback results. 293 sortResult = sortAndDiversify(dataItemsWithoutFallback, serverSideAppIdsOrdering, diversify); 294 sortResult.sortedDataItems = sortResult.sortedDataItems.slice(0, preferredResultCount); 295 } 296 else { 297 // There is a preferred number of results, and we need to use fallback matches in order to 298 // meet this number. We may still fall short, but this gets us as close as possible. 299 sortResult = sortAndDiversify(dataItems, serverSideAppIdsOrdering, diversify); 300 sortResult.sortedDataItems = sortResult.sortedDataItems.slice(0, preferredResultCount); 301 } 302 return sortResult; 303} 304/** 305 * Rearranges a list of dataItems, so that any where there is an exact segment match are moved to the front of the list. 306 * 307 * @param dataItems The data items to process. 308 * @param serverSideAppIdsOrdering List of ordered app ids from server side 309 * @param diversify An optional flag that determines if we should diverse the personalized results on the basis of server side apps ranking 310 * @returns The sorted list of data items. 311 */ 312function sortAndDiversify(dataItems, serverSideAppIdsOrdering, diversify) { 313 const exactMatchDataItems = dataItems.filter((value) => value.isExactMatch); 314 let otherDataItems = dataItems.filter((value) => !value.isExactMatch); 315 if (serverData.isDefinedNonNull(diversify) && diversify) { 316 otherDataItems = diversifyDataItems(otherDataItems, serverSideAppIdsOrdering); 317 } 318 const sortedDataItems = exactMatchDataItems.concat(otherDataItems); 319 const orderWasNotChanged = dataItems.every((dataItem, index) => { 320 return dataItem === sortedDataItems[index]; 321 }); 322 return { 323 sortedDataItems: sortedDataItems, 324 processingType: orderWasNotChanged 325 ? 0 /* onDevicePersonalization.ProcessingType.contentsNotChanged */ 326 : 2 /* onDevicePersonalization.ProcessingType.contentsSorted */, 327 }; 328} 329/** 330 * Filters a list of raw data blobs into a list which only includes non-personalized data, or data that is set to "match all". 331 * 332 * @param dataItems The raw data blobs. 333 * @param preferredResultCount The preferred number of items to be included in the results. 334 * @returns The filtered set of data blobs. This will be a subset (or all) of the original dataItems. 335 */ 336export function removePersonalizedDataItems(objectGraph, dataItems, preferredResultCount) { 337 let filteredDataItems = []; 338 const filledGroupIds = new Set(); 339 for (const data of dataItems) { 340 // If the personalization data is invalid or empty, we keep this in our result set. 341 const rawDataUserSegments = serverData.asString(data, "meta.personalizationData.segId"); 342 const appId = serverData.asString(data, "meta.personalizationData.appId"); 343 const groupId = serverData.asString(data, "meta.personalizationData.grpId"); 344 if (serverData.isNullOrEmpty(rawDataUserSegments) || 345 serverData.isNullOrEmpty(appId) || 346 serverData.isNullOrEmpty(groupId)) { 347 filteredDataItems.push(data); 348 continue; 349 } 350 // We already have a match for this group, so move onto the next item 351 if (filledGroupIds.has(groupId)) { 352 continue; 353 } 354 // If the data has a match all user segment, we keep this in our result set. 355 const dataUserSegments = rawDataUserSegments.split(","); 356 if (dataUserSegments.includes(alwaysMatchUserSegment)) { 357 filteredDataItems.push(data); 358 filledGroupIds.add(groupId); 359 } 360 } 361 // Finally, if we have a preferredResultCount which is smaller than our result set, trim our results down to this count 362 if (serverData.isDefinedNonNull(preferredResultCount) && filteredDataItems.length > preferredResultCount) { 363 filteredDataItems = filteredDataItems.slice(0, preferredResultCount); 364 } 365 return { 366 personalizedData: filteredDataItems, 367 processingType: null, 368 }; 369} 370//# sourceMappingURL=on-device-personalization-processing.js.map