That fuck shit the fascists are using
at master 208 lines 7.6 kB view raw
1package org.tm.archive.linkpreview; 2 3import android.annotation.SuppressLint; 4import android.text.SpannableString; 5import android.text.style.URLSpan; 6import android.text.util.Linkify; 7 8import androidx.annotation.NonNull; 9import androidx.annotation.Nullable; 10import androidx.core.text.HtmlCompat; 11import androidx.core.text.util.LinkifyCompat; 12 13import com.annimon.stream.Collectors; 14import com.annimon.stream.Stream; 15 16import org.tm.archive.util.DateUtils; 17import org.tm.archive.util.LinkUtil; 18import org.tm.archive.util.Util; 19import org.whispersystems.signalservice.api.util.OptionalUtil; 20 21import java.util.Collections; 22import java.util.HashMap; 23import java.util.List; 24import java.util.Map; 25import java.util.Optional; 26import java.util.Set; 27import java.util.regex.Matcher; 28import java.util.regex.Pattern; 29 30import okhttp3.HttpUrl; 31 32public final class LinkPreviewUtil { 33 34 private static final Pattern OPEN_GRAPH_TAG_PATTERN = Pattern.compile("<\\s*meta[^>]*property\\s*=\\s*\"\\s*og:([^\"]+)\"[^>]*/?\\s*>"); 35 private static final Pattern ARTICLE_TAG_PATTERN = Pattern.compile("<\\s*meta[^>]*property\\s*=\\s*\"\\s*article:([^\"]+)\"[^>]*/?\\s*>"); 36 private static final Pattern OPEN_GRAPH_CONTENT_PATTERN = Pattern.compile("content\\s*=\\s*\"([^\"]*)\""); 37 private static final Pattern TITLE_PATTERN = Pattern.compile("<\\s*title[^>]*>(.*)<\\s*/title[^>]*>"); 38 private static final Pattern FAVICON_PATTERN = Pattern.compile("<\\s*link[^>]*rel\\s*=\\s*\".*icon.*\"[^>]*>"); 39 private static final Pattern FAVICON_HREF_PATTERN = Pattern.compile("href\\s*=\\s*\"([^\"]*)\""); 40 41 public static @Nullable String getTopLevelDomain(@Nullable String urlString) { 42 if (!Util.isEmpty(urlString)) { 43 HttpUrl url = HttpUrl.parse(urlString); 44 if (url != null) { 45 return url.topPrivateDomain(); 46 } 47 } 48 49 return null; 50 } 51 52 /** 53 * @return All URLs allowed as previews in the source text. 54 */ 55 public static @NonNull Links findValidPreviewUrls(@NonNull String text) { 56 SpannableString spannable = new SpannableString(text); 57 boolean found = LinkifyCompat.addLinks(spannable, Linkify.WEB_URLS); 58 59 if (!found) { 60 return Links.EMPTY; 61 } 62 63 return new Links(Stream.of(spannable.getSpans(0, spannable.length(), URLSpan.class)) 64 .map(span -> new Link(span.getURL(), spannable.getSpanStart(span))) 65 .filter(link -> LinkUtil.isValidPreviewUrl(link.getUrl())) 66 .toList()); 67 } 68 69 public static @NonNull OpenGraph parseOpenGraphFields(@Nullable String html) { 70 if (html == null) { 71 return new OpenGraph(Collections.emptyMap(), null, null); 72 } 73 74 Map<String, String> openGraphTags = new HashMap<>(); 75 Matcher openGraphMatcher = OPEN_GRAPH_TAG_PATTERN.matcher(html); 76 77 while (openGraphMatcher.find()) { 78 String tag = openGraphMatcher.group(); 79 String property = openGraphMatcher.groupCount() > 0 ? openGraphMatcher.group(1) : null; 80 81 if (property != null) { 82 Matcher contentMatcher = OPEN_GRAPH_CONTENT_PATTERN.matcher(tag); 83 if (contentMatcher.find() && contentMatcher.groupCount() > 0) { 84 String content = fromDoubleEncoded(contentMatcher.group(1)); 85 openGraphTags.put(property.toLowerCase(), content); 86 } 87 } 88 } 89 90 Matcher articleMatcher = ARTICLE_TAG_PATTERN.matcher(html); 91 92 while (articleMatcher.find()) { 93 String tag = articleMatcher.group(); 94 String property = articleMatcher.groupCount() > 0 ? articleMatcher.group(1) : null; 95 96 if (property != null) { 97 Matcher contentMatcher = OPEN_GRAPH_CONTENT_PATTERN.matcher(tag); 98 if (contentMatcher.find() && contentMatcher.groupCount() > 0) { 99 String content = fromDoubleEncoded(contentMatcher.group(1)); 100 openGraphTags.put(property.toLowerCase(), content); 101 } 102 } 103 } 104 105 String htmlTitle = ""; 106 String faviconUrl = ""; 107 108 Matcher titleMatcher = TITLE_PATTERN.matcher(html); 109 if (titleMatcher.find() && titleMatcher.groupCount() > 0) { 110 htmlTitle = fromDoubleEncoded(titleMatcher.group(1)); 111 } 112 113 Matcher faviconMatcher = FAVICON_PATTERN.matcher(html); 114 if (faviconMatcher.find()) { 115 Matcher faviconHrefMatcher = FAVICON_HREF_PATTERN.matcher(faviconMatcher.group()); 116 if (faviconHrefMatcher.find() && faviconHrefMatcher.groupCount() > 0) { 117 faviconUrl = faviconHrefMatcher.group(1); 118 } 119 } 120 121 return new OpenGraph(openGraphTags, htmlTitle, faviconUrl); 122 } 123 124 private static @NonNull String fromDoubleEncoded(@NonNull String html) { 125 return HtmlCompat.fromHtml(HtmlCompat.fromHtml(html, 0).toString(), 0).toString(); 126 } 127 128 public static final class OpenGraph { 129 130 private final Map<String, String> values; 131 132 private final @Nullable String htmlTitle; 133 private final @Nullable String faviconUrl; 134 135 private static final String KEY_TITLE = "title"; 136 private static final String KEY_DESCRIPTION_URL = "description"; 137 private static final String KEY_IMAGE_URL = "image"; 138 private static final String KEY_PUBLISHED_TIME_1 = "published_time"; 139 private static final String KEY_PUBLISHED_TIME_2 = "article:published_time"; 140 private static final String KEY_MODIFIED_TIME_1 = "modified_time"; 141 private static final String KEY_MODIFIED_TIME_2 = "article:modified_time"; 142 143 public OpenGraph(@NonNull Map<String, String> values, @Nullable String htmlTitle, @Nullable String faviconUrl) { 144 this.values = values; 145 this.htmlTitle = htmlTitle; 146 this.faviconUrl = faviconUrl; 147 } 148 149 public @NonNull Optional<String> getTitle() { 150 return OptionalUtil.absentIfEmpty(Util.getFirstNonEmpty(values.get(KEY_TITLE), htmlTitle)); 151 } 152 153 public @NonNull Optional<String> getImageUrl() { 154 return OptionalUtil.absentIfEmpty(Util.getFirstNonEmpty(values.get(KEY_IMAGE_URL), faviconUrl)); 155 } 156 157 @SuppressLint("ObsoleteSdkInt") 158 public long getDate() { 159 return Stream.of(values.get(KEY_PUBLISHED_TIME_1), 160 values.get(KEY_PUBLISHED_TIME_2), 161 values.get(KEY_MODIFIED_TIME_1), 162 values.get(KEY_MODIFIED_TIME_2)) 163 .map(DateUtils::parseIso8601) 164 .filter(time -> time > 0) 165 .findFirst() 166 .orElse(0L); 167 } 168 169 public @NonNull Optional<String> getDescription() { 170 return OptionalUtil.absentIfEmpty(values.get(KEY_DESCRIPTION_URL)); 171 } 172 } 173 174 public static class Links { 175 static final Links EMPTY = new Links(Collections.emptyList()); 176 177 private final List<Link> links; 178 private final Set<String> urlSet; 179 180 private Links(@NonNull List<Link> links) { 181 this.links = links; 182 this.urlSet = Stream.of(links) 183 .map(link -> trimTrailingSlash(link.getUrl())) 184 .collect(Collectors.toSet()); 185 } 186 187 public Optional<Link> findFirst() { 188 return links.isEmpty() ? Optional.empty() 189 : Optional.of(links.get(0)); 190 } 191 192 /** 193 * Slightly forgiving comparison where it will ignore trailing '/' on the supplied url. 194 */ 195 public boolean containsUrl(@NonNull String url) { 196 return urlSet.contains(trimTrailingSlash(url)); 197 } 198 199 private @NonNull String trimTrailingSlash(@NonNull String url) { 200 return url.endsWith("/") ? url.substring(0, url.length() - 1) 201 : url; 202 } 203 204 public int size() { 205 return links.size(); 206 } 207 } 208}