diff --git a/application/src/main/java/org/togetherjava/tjbot/features/utils/LinkDetection.java b/application/src/main/java/org/togetherjava/tjbot/features/utils/LinkDetection.java index 3b6dc18112..7c560602da 100644 --- a/application/src/main/java/org/togetherjava/tjbot/features/utils/LinkDetection.java +++ b/application/src/main/java/org/togetherjava/tjbot/features/utils/LinkDetection.java @@ -4,27 +4,69 @@ import com.linkedin.urls.detection.UrlDetector; import com.linkedin.urls.detection.UrlDetectorOptions; +import java.net.URI; +import java.net.http.HttpClient; +import java.net.http.HttpRequest; +import java.net.http.HttpResponse; import java.util.List; +import java.util.Objects; import java.util.Optional; import java.util.Set; +import java.util.concurrent.CompletableFuture; /** - * Utility class to detect links. + * Utility methods for working with links inside arbitrary text. + * + *
+ * This class can: + *
+ * It is intentionally stateless and uses asynchronous HTTP requests to avoid blocking calling + * threads. */ + public class LinkDetection { + private static final HttpClient HTTP_CLIENT = HttpClient.newHttpClient(); /** - * Possible ways to filter a link. + * Default filters applied when extracting links from text. + * + *
+ * These filters intentionally ignore: + *
+ * This reduces false positives when scanning chat messages or source-code snippets.
+ */
+
+ private static final Set
+ * Such links are often intentionally suppressed in chat platforms.
*/
SUPPRESSED,
/**
- * Filters links that are not using http scheme.
+ * Ignores URLs that do not use the HTTP or HTTPS scheme.
+ *
+ *
+ * This helps avoid false positives such as {@code ftp://}, {@code file://}, or scheme-less
+ * matches.
*/
NON_HTTP_SCHEME
}
@@ -34,12 +76,26 @@ private LinkDetection() {
}
/**
- * Extracts all links from the given content.
+ * Extracts HTTP(S) links from the given text.
*
- * @param content the content to search through
- * @param filter the filters applied to the urls
- * @return a list of all found links, can be empty
+ *
+ * The text is scanned using a URL detector, then filtered and normalized according to the
+ * provided {@link LinkFilter}s.
+ *
+ *
+ * Example:
+ *
+ *
+ * This method performs a lightweight detection only and does not apply any {@link LinkFilter}s.
+ *
+ * @param content the text to scan
+ * @return {@code true} if at least one URL-like pattern is detected
*/
+
public static boolean containsLink(String content) {
return !(new UrlDetector(content, UrlDetectorOptions.BRACKET_MATCH).detect().isEmpty());
}
+ /**
+ * Asynchronously checks whether a URL is considered broken.
+ *
+ *
+ * The check is performed in two steps:
+ *
+ * A link is considered broken if:
+ *
+ * Successful responses (2xx) and redirects (3xx) are considered valid links. The response body
+ * is never inspected.
+ *
+ * @param url the URL to check
+ * @return a {@code CompletableFuture} completing with {@code true} if the link is broken,
+ * {@code false} otherwise
+ */
+
+ public static CompletableFuture
+ * Each detected link is checked asynchronously using {@link #isLinkBroken(String)}. Only links
+ * confirmed as broken are replaced. Duplicate URLs are checked only once and all occurrences
+ * are replaced if found to be broken.
+ *
+ *
+ * This method does not block - all link checks are performed asynchronously and combined into a
+ * single {@code CompletableFuture}.
+ *
+ *
+ * Example:
+ *
+ *
+ * Results in:
+ *
+ *
+ * Applies the provided {@link LinkFilter}s:
+ *
+ * Additionally removes trailing punctuation such as commas or periods from the detected URL.
+ *
+ * @param url the detected URL
+ * @param filter active link filters to apply
+ * @return an {@link Optional} containing the normalized link, or {@code Optional.empty()} if
+ * the link should be filtered out
+ */
+
private static Optional{@code
+ * Set
+ *
+ * @param content the text to scan for links
+ * @param filter a set of filters controlling which detected links are returned
+ * @return a list of extracted links in the order they appear in the text
*/
+
public static List
+ *
+ *
+ *
+ *
+ *
+ * {@code
+ * replaceDeadLinks("""
+ * Test
+ * http://deadlink/1
+ * http://workinglink/1
+ * """, "(broken link)")
+ * }
+ *
+ * {@code
+ * Test
+ * (broken link)
+ * http://workinglink/1
+ * }
+ *
+ * @param text the input text containing URLs
+ * @param replacement the string used to replace broken links
+ * @return a {@code CompletableFuture} that completes with the modified text, or the original
+ * text if no broken links were found
+ */
+
+
+ public static CompletableFuture
+ *
+ *
+ *