+14
-10
bluesky/common.py
+14
-10
bluesky/common.py
···
68
68
text: str = post.get('text', '')
69
69
if not text:
70
70
return []
71
-
text = text.encode(encoding='utf-8').decode(encoding='utf-8')
71
+
ut8_text = text.encode(encoding='utf-8')
72
+
73
+
def decode(ut8: bytes) -> str:
74
+
return ut8.decode(encoding='utf-8')
72
75
73
76
facets: list[dict] = post.get('facets', [])
74
77
if not facets:
75
-
return [cross.TextToken(text)]
78
+
return [cross.TextToken(decode(ut8_text))]
76
79
77
80
slices: list[tuple[int, int, str, str]] = []
78
81
···
94
97
slices.append((index['byteStart'], index['byteEnd'], 'mention', feature['did']))
95
98
96
99
if not slices:
97
-
return [cross.TextToken(text)]
100
+
return [cross.TextToken(decode(ut8_text))]
98
101
99
102
slices.sort(key=lambda s: s[0])
100
103
unique: list[tuple[int, int, str, str]] = []
···
105
108
current_end = end
106
109
107
110
if not unique:
108
-
return [cross.TextToken(text)]
111
+
return [cross.TextToken(decode(ut8_text))]
109
112
110
113
tokens: list[cross.Token] = []
111
114
prev = 0
···
113
116
for start, end, ttype, val in unique:
114
117
if start > prev:
115
118
# text between facets
116
-
tokens.append(cross.TextToken(text[prev:start]))
119
+
tokens.append(cross.TextToken(decode(ut8_text[prev:start])))
117
120
# facet token
118
121
match ttype:
119
122
case 'link':
120
-
label = text[start:end]
123
+
label = decode(ut8_text[start:end])
124
+
print(label)
121
125
122
126
# try to unflatten links
123
127
split = val.split('://')
···
129
133
else:
130
134
tokens.append(cross.LinkToken(val, label))
131
135
case 'tag':
132
-
tokens.append(cross.TagToken(text[start:end]))
136
+
tokens.append(cross.TagToken(decode(ut8_text[start:end])))
133
137
case 'mention':
134
-
tokens.append(cross.MentionToken(text[start:end], val))
138
+
tokens.append(cross.MentionToken(decode(ut8_text[start:end]), val))
135
139
prev = end
136
140
137
-
if prev < len(text):
138
-
tokens.append(cross.TextToken(text[prev:]))
141
+
if prev < len(ut8_text):
142
+
tokens.append(cross.TextToken(decode(ut8_text[prev:])))
139
143
140
144
return tokens
141
145