+132
-38
lib/utils/facet_utils.dart
+132
-38
lib/utils/facet_utils.dart
···
50
50
int aLength = a.end - a.start;
51
51
int bLength = b.end - b.start;
52
52
53
-
// For links, use the display text length
53
+
// For links, use the length of the text that will actually be found
54
54
if (a.type?.contains('link') == true && a.data['uri'] != null) {
55
-
String displayText = a.data['uri'] as String;
56
-
displayText = _extractDisplayTextFromUri(displayText);
57
-
aLength = displayText.length;
55
+
final uri = a.data['uri'] as String;
56
+
final possibleTexts = [_extractDisplayTextFromUri(uri), _extractDomainOnly(uri), uri];
57
+
// Use the longest text that exists in the original text
58
+
for (final testText in possibleTexts) {
59
+
if (text.contains(testText)) {
60
+
aLength = testText.length;
61
+
break;
62
+
}
63
+
}
58
64
}
59
65
60
66
if (b.type?.contains('link') == true && b.data['uri'] != null) {
61
-
String displayText = b.data['uri'] as String;
62
-
displayText = _extractDisplayTextFromUri(displayText);
63
-
bLength = displayText.length;
67
+
final uri = b.data['uri'] as String;
68
+
final possibleTexts = [_extractDisplayTextFromUri(uri), _extractDomainOnly(uri), uri];
69
+
// Use the longest text that exists in the original text
70
+
for (final testText in possibleTexts) {
71
+
if (text.contains(testText)) {
72
+
bLength = testText.length;
73
+
break;
74
+
}
75
+
}
64
76
}
65
77
66
78
// Sort by length descending, then by start position ascending
···
80
92
81
93
if (range.type?.contains('link') == true && range.data['uri'] != null) {
82
94
final uri = range.data['uri'] as String;
83
-
final displayText = _extractDisplayTextFromUri(uri);
95
+
96
+
// First, try to use the exact facet positions if they seem valid
97
+
if (range.start >= 0 && range.end <= text.length && range.start < range.end) {
98
+
final facetText = text.substring(range.start, range.end);
99
+
100
+
// Check if the facet text matches any of our expected URL formats
101
+
final possibleTexts = [
102
+
_extractDisplayTextFromUri(uri), // Full URL with protocol
103
+
_extractDomainOnly(uri), // Just the domain
104
+
uri, // Original URI as-is
105
+
];
106
+
107
+
bool facetTextMatches = possibleTexts.any(
108
+
(possible) =>
109
+
facetText == possible ||
110
+
facetText.contains(possible) ||
111
+
possible.contains(facetText),
112
+
);
84
113
85
-
// Find all occurrences of this text and pick the one that doesn't overlap with used positions
86
-
int searchIndex = 0;
87
-
bool foundValidMatch = false;
114
+
if (facetTextMatches) {
115
+
// Check if this range overlaps with used positions
116
+
bool overlaps = false;
117
+
for (int i = range.start; i < range.end; i++) {
118
+
if (usedPositions.contains(i)) {
119
+
overlaps = true;
120
+
break;
121
+
}
122
+
}
88
123
89
-
while (!foundValidMatch) {
90
-
final globalIndex = text.indexOf(displayText, searchIndex);
91
-
if (globalIndex == -1) break;
124
+
if (!overlaps) {
125
+
actualStart = range.start;
126
+
actualEnd = range.end;
127
+
actualContent =
128
+
facetText; // Use exactly what's in the original text at facet position
92
129
93
-
// Check if this range overlaps with any used positions
94
-
bool overlaps = false;
95
-
for (int i = globalIndex; i < globalIndex + displayText.length; i++) {
96
-
if (usedPositions.contains(i)) {
97
-
overlaps = true;
98
-
break;
130
+
// Mark these positions as used
131
+
for (int i = actualStart; i < actualEnd; i++) {
132
+
usedPositions.add(i);
133
+
}
99
134
}
100
135
}
136
+
}
101
137
102
-
if (!overlaps) {
103
-
actualStart = globalIndex;
104
-
actualEnd = globalIndex + displayText.length;
105
-
actualContent = displayText;
106
-
foundValidMatch = true;
138
+
// If facet positions didn't work, fall back to searching
139
+
if (actualContent == null) {
140
+
final possibleTexts = [
141
+
_extractDisplayTextFromUri(uri), // Full URL with protocol
142
+
_extractDomainOnly(uri), // Just the domain
143
+
uri, // Original URI as-is
144
+
];
107
145
108
-
// Mark these positions as used
109
-
for (int i = actualStart; i < actualEnd; i++) {
110
-
usedPositions.add(i);
146
+
int searchIndex = 0;
147
+
bool foundValidMatch = false;
148
+
149
+
// Try each possible text representation
150
+
for (final searchText in possibleTexts) {
151
+
searchIndex = 0;
152
+
while (!foundValidMatch) {
153
+
final globalIndex = text.indexOf(searchText, searchIndex);
154
+
if (globalIndex == -1) break;
155
+
156
+
// Check if this range overlaps with any used positions
157
+
bool overlaps = false;
158
+
for (int i = globalIndex; i < globalIndex + searchText.length; i++) {
159
+
if (usedPositions.contains(i)) {
160
+
overlaps = true;
161
+
break;
162
+
}
163
+
}
164
+
165
+
if (!overlaps) {
166
+
actualStart = globalIndex;
167
+
actualEnd = globalIndex + searchText.length;
168
+
actualContent = searchText; // Use exactly what we found in the text
169
+
foundValidMatch = true;
170
+
171
+
// Mark these positions as used
172
+
for (int i = actualStart; i < actualEnd; i++) {
173
+
usedPositions.add(i);
174
+
}
175
+
break;
176
+
} else {
177
+
searchIndex = globalIndex + 1;
178
+
}
111
179
}
112
-
} else {
113
-
searchIndex = globalIndex + 1;
180
+
if (foundValidMatch) break;
114
181
}
115
182
}
116
183
}
···
198
265
return spans;
199
266
}
200
267
201
-
/// Extracts the display text from a URI (removes protocol but keeps subdomain, removes path)
268
+
/// Extracts the display text from a URI (keeps protocol and domain, removes path)
202
269
static String _extractDisplayTextFromUri(String uri) {
203
-
String displayText = uri;
270
+
// Find the first slash after the protocol to remove the path
271
+
String protocolAndDomain = uri;
272
+
if (uri.startsWith('https://')) {
273
+
final pathIndex = uri.indexOf('/', 8); // Start search after "https://"
274
+
if (pathIndex != -1) {
275
+
protocolAndDomain = uri.substring(0, pathIndex);
276
+
}
277
+
} else if (uri.startsWith('http://')) {
278
+
final pathIndex = uri.indexOf('/', 7); // Start search after "http://"
279
+
if (pathIndex != -1) {
280
+
protocolAndDomain = uri.substring(0, pathIndex);
281
+
}
282
+
} else {
283
+
// For URIs without protocol, just remove the path
284
+
final slashIndex = uri.indexOf('/');
285
+
if (slashIndex != -1) {
286
+
protocolAndDomain = uri.substring(0, slashIndex);
287
+
}
288
+
}
289
+
290
+
return protocolAndDomain;
291
+
}
292
+
293
+
/// Extracts just the domain part from a URI (removes protocol and path)
294
+
static String _extractDomainOnly(String uri) {
295
+
String domain = uri;
204
296
if (uri.startsWith('https://')) {
205
-
displayText = uri.substring(8);
297
+
domain = uri.substring(8);
206
298
} else if (uri.startsWith('http://')) {
207
-
displayText = uri.substring(7);
299
+
domain = uri.substring(7);
208
300
}
209
-
// Remove path but keep subdomain (everything before the first slash after protocol)
210
-
final slashIndex = displayText.indexOf('/');
301
+
302
+
// Remove path
303
+
final slashIndex = domain.indexOf('/');
211
304
if (slashIndex != -1) {
212
-
displayText = displayText.substring(0, slashIndex);
305
+
domain = domain.substring(0, slashIndex);
213
306
}
214
-
return displayText;
307
+
308
+
return domain;
215
309
}
216
310
}