That fuck shit the fascists are using
1package org.tm.archive.linkpreview;
2
3import android.annotation.SuppressLint;
4import android.text.SpannableString;
5import android.text.style.URLSpan;
6import android.text.util.Linkify;
7
8import androidx.annotation.NonNull;
9import androidx.annotation.Nullable;
10import androidx.core.text.HtmlCompat;
11import androidx.core.text.util.LinkifyCompat;
12
13import com.annimon.stream.Collectors;
14import com.annimon.stream.Stream;
15
16import org.tm.archive.util.DateUtils;
17import org.tm.archive.util.LinkUtil;
18import org.tm.archive.util.Util;
19import org.whispersystems.signalservice.api.util.OptionalUtil;
20
21import java.util.Collections;
22import java.util.HashMap;
23import java.util.List;
24import java.util.Map;
25import java.util.Optional;
26import java.util.Set;
27import java.util.regex.Matcher;
28import java.util.regex.Pattern;
29
30import okhttp3.HttpUrl;
31
32public final class LinkPreviewUtil {
33
34 private static final Pattern OPEN_GRAPH_TAG_PATTERN = Pattern.compile("<\\s*meta[^>]*property\\s*=\\s*\"\\s*og:([^\"]+)\"[^>]*/?\\s*>");
35 private static final Pattern ARTICLE_TAG_PATTERN = Pattern.compile("<\\s*meta[^>]*property\\s*=\\s*\"\\s*article:([^\"]+)\"[^>]*/?\\s*>");
36 private static final Pattern OPEN_GRAPH_CONTENT_PATTERN = Pattern.compile("content\\s*=\\s*\"([^\"]*)\"");
37 private static final Pattern TITLE_PATTERN = Pattern.compile("<\\s*title[^>]*>(.*)<\\s*/title[^>]*>");
38 private static final Pattern FAVICON_PATTERN = Pattern.compile("<\\s*link[^>]*rel\\s*=\\s*\".*icon.*\"[^>]*>");
39 private static final Pattern FAVICON_HREF_PATTERN = Pattern.compile("href\\s*=\\s*\"([^\"]*)\"");
40
41 public static @Nullable String getTopLevelDomain(@Nullable String urlString) {
42 if (!Util.isEmpty(urlString)) {
43 HttpUrl url = HttpUrl.parse(urlString);
44 if (url != null) {
45 return url.topPrivateDomain();
46 }
47 }
48
49 return null;
50 }
51
52 /**
53 * @return All URLs allowed as previews in the source text.
54 */
55 public static @NonNull Links findValidPreviewUrls(@NonNull String text) {
56 SpannableString spannable = new SpannableString(text);
57 boolean found = LinkifyCompat.addLinks(spannable, Linkify.WEB_URLS);
58
59 if (!found) {
60 return Links.EMPTY;
61 }
62
63 return new Links(Stream.of(spannable.getSpans(0, spannable.length(), URLSpan.class))
64 .map(span -> new Link(span.getURL(), spannable.getSpanStart(span)))
65 .filter(link -> LinkUtil.isValidPreviewUrl(link.getUrl()))
66 .toList());
67 }
68
69 public static @NonNull OpenGraph parseOpenGraphFields(@Nullable String html) {
70 if (html == null) {
71 return new OpenGraph(Collections.emptyMap(), null, null);
72 }
73
74 Map<String, String> openGraphTags = new HashMap<>();
75 Matcher openGraphMatcher = OPEN_GRAPH_TAG_PATTERN.matcher(html);
76
77 while (openGraphMatcher.find()) {
78 String tag = openGraphMatcher.group();
79 String property = openGraphMatcher.groupCount() > 0 ? openGraphMatcher.group(1) : null;
80
81 if (property != null) {
82 Matcher contentMatcher = OPEN_GRAPH_CONTENT_PATTERN.matcher(tag);
83 if (contentMatcher.find() && contentMatcher.groupCount() > 0) {
84 String content = fromDoubleEncoded(contentMatcher.group(1));
85 openGraphTags.put(property.toLowerCase(), content);
86 }
87 }
88 }
89
90 Matcher articleMatcher = ARTICLE_TAG_PATTERN.matcher(html);
91
92 while (articleMatcher.find()) {
93 String tag = articleMatcher.group();
94 String property = articleMatcher.groupCount() > 0 ? articleMatcher.group(1) : null;
95
96 if (property != null) {
97 Matcher contentMatcher = OPEN_GRAPH_CONTENT_PATTERN.matcher(tag);
98 if (contentMatcher.find() && contentMatcher.groupCount() > 0) {
99 String content = fromDoubleEncoded(contentMatcher.group(1));
100 openGraphTags.put(property.toLowerCase(), content);
101 }
102 }
103 }
104
105 String htmlTitle = "";
106 String faviconUrl = "";
107
108 Matcher titleMatcher = TITLE_PATTERN.matcher(html);
109 if (titleMatcher.find() && titleMatcher.groupCount() > 0) {
110 htmlTitle = fromDoubleEncoded(titleMatcher.group(1));
111 }
112
113 Matcher faviconMatcher = FAVICON_PATTERN.matcher(html);
114 if (faviconMatcher.find()) {
115 Matcher faviconHrefMatcher = FAVICON_HREF_PATTERN.matcher(faviconMatcher.group());
116 if (faviconHrefMatcher.find() && faviconHrefMatcher.groupCount() > 0) {
117 faviconUrl = faviconHrefMatcher.group(1);
118 }
119 }
120
121 return new OpenGraph(openGraphTags, htmlTitle, faviconUrl);
122 }
123
124 private static @NonNull String fromDoubleEncoded(@NonNull String html) {
125 return HtmlCompat.fromHtml(HtmlCompat.fromHtml(html, 0).toString(), 0).toString();
126 }
127
128 public static final class OpenGraph {
129
130 private final Map<String, String> values;
131
132 private final @Nullable String htmlTitle;
133 private final @Nullable String faviconUrl;
134
135 private static final String KEY_TITLE = "title";
136 private static final String KEY_DESCRIPTION_URL = "description";
137 private static final String KEY_IMAGE_URL = "image";
138 private static final String KEY_PUBLISHED_TIME_1 = "published_time";
139 private static final String KEY_PUBLISHED_TIME_2 = "article:published_time";
140 private static final String KEY_MODIFIED_TIME_1 = "modified_time";
141 private static final String KEY_MODIFIED_TIME_2 = "article:modified_time";
142
143 public OpenGraph(@NonNull Map<String, String> values, @Nullable String htmlTitle, @Nullable String faviconUrl) {
144 this.values = values;
145 this.htmlTitle = htmlTitle;
146 this.faviconUrl = faviconUrl;
147 }
148
149 public @NonNull Optional<String> getTitle() {
150 return OptionalUtil.absentIfEmpty(Util.getFirstNonEmpty(values.get(KEY_TITLE), htmlTitle));
151 }
152
153 public @NonNull Optional<String> getImageUrl() {
154 return OptionalUtil.absentIfEmpty(Util.getFirstNonEmpty(values.get(KEY_IMAGE_URL), faviconUrl));
155 }
156
157 @SuppressLint("ObsoleteSdkInt")
158 public long getDate() {
159 return Stream.of(values.get(KEY_PUBLISHED_TIME_1),
160 values.get(KEY_PUBLISHED_TIME_2),
161 values.get(KEY_MODIFIED_TIME_1),
162 values.get(KEY_MODIFIED_TIME_2))
163 .map(DateUtils::parseIso8601)
164 .filter(time -> time > 0)
165 .findFirst()
166 .orElse(0L);
167 }
168
169 public @NonNull Optional<String> getDescription() {
170 return OptionalUtil.absentIfEmpty(values.get(KEY_DESCRIPTION_URL));
171 }
172 }
173
174 public static class Links {
175 static final Links EMPTY = new Links(Collections.emptyList());
176
177 private final List<Link> links;
178 private final Set<String> urlSet;
179
180 private Links(@NonNull List<Link> links) {
181 this.links = links;
182 this.urlSet = Stream.of(links)
183 .map(link -> trimTrailingSlash(link.getUrl()))
184 .collect(Collectors.toSet());
185 }
186
187 public Optional<Link> findFirst() {
188 return links.isEmpty() ? Optional.empty()
189 : Optional.of(links.get(0));
190 }
191
192 /**
193 * Slightly forgiving comparison where it will ignore trailing '/' on the supplied url.
194 */
195 public boolean containsUrl(@NonNull String url) {
196 return urlSet.contains(trimTrailingSlash(url));
197 }
198
199 private @NonNull String trimTrailingSlash(@NonNull String url) {
200 return url.endsWith("/") ? url.substring(0, url.length() - 1)
201 : url;
202 }
203
204 public int size() {
205 return links.size();
206 }
207 }
208}