From 9004e6baf44200a51c59ea786886bb49c98e0250 Mon Sep 17 00:00:00 2001 From: barsh404error Date: Mon, 29 Dec 2025 13:47:52 +0300 Subject: [PATCH 1/8] Add utilities to detect and replace broken links. --- .../tjbot/features/utils/LinkDetection.java | 55 ++++++++++++++++++- 1 file changed, 54 insertions(+), 1 deletion(-) diff --git a/application/src/main/java/org/togetherjava/tjbot/features/utils/LinkDetection.java b/application/src/main/java/org/togetherjava/tjbot/features/utils/LinkDetection.java index 3b6dc18112..62883044e9 100644 --- a/application/src/main/java/org/togetherjava/tjbot/features/utils/LinkDetection.java +++ b/application/src/main/java/org/togetherjava/tjbot/features/utils/LinkDetection.java @@ -1,5 +1,11 @@ package org.togetherjava.tjbot.features.utils; +import java.net.URI; +import java.net.http.HttpClient; +import java.net.http.HttpRequest; +import java.net.http.HttpResponse; +import java.util.concurrent.CompletableFuture; + import com.linkedin.urls.Url; import com.linkedin.urls.detection.UrlDetector; import com.linkedin.urls.detection.UrlDetectorOptions; @@ -57,6 +63,54 @@ public static List extractLinks(String content, Set filter) public static boolean containsLink(String content) { return !(new UrlDetector(content, UrlDetectorOptions.BRACKET_MATCH).detect().isEmpty()); } + public static CompletableFuture isLinkBroken(String url) { + HttpClient client = HttpClient.newHttpClient(); + + HttpRequest request = HttpRequest.newBuilder(URI.create(url)) + .method("HEAD", HttpRequest.BodyPublishers.noBody()) + .build(); + + return client.sendAsync(request, HttpResponse.BodyHandlers.discarding()) + .thenApply(response -> response.statusCode() >= 400) + .exceptionally(ignored -> true); + } + public static CompletableFuture replaceDeadLinks( + String text, + String replacement + ) { + Set filters = Set.of( + LinkFilter.SUPPRESSED, + LinkFilter.NON_HTTP_SCHEME + ); + + List links = extractLinks(text, filters); + + if (links.isEmpty()) { + return CompletableFuture.completedFuture(text); + } + + StringBuilder result = new StringBuilder(text); + + List> checks = links.stream() + .map(link -> + isLinkBroken(link).thenAccept(isDead -> { + if (isDead) { + int index = result.indexOf(link); + if (index != -1) { + result.replace( + index, + index + link.length(), + replacement + ); + } + } + }) + ) + .toList(); + + return CompletableFuture.allOf(checks.toArray(new CompletableFuture[0])) + .thenApply(v -> result.toString()); + } private static Optional toLink(Url url, Set filter) { String raw = url.getOriginalUrl(); @@ -76,7 +130,6 @@ private static Optional toLink(Url url, Set filter) { // Remove trailing punctuation link = link.substring(0, link.length() - 1); } - return Optional.of(link); } From 715383387ccc7fb4d0a57a0efcb2220ce802c7d6 Mon Sep 17 00:00:00 2001 From: barsh404error Date: Wed, 31 Dec 2025 11:37:15 +0300 Subject: [PATCH 2/8] style: run spotlessApply --- .../tjbot/features/utils/LinkDetection.java | 58 ++++++++----------- 1 file changed, 23 insertions(+), 35 deletions(-) diff --git a/application/src/main/java/org/togetherjava/tjbot/features/utils/LinkDetection.java b/application/src/main/java/org/togetherjava/tjbot/features/utils/LinkDetection.java index 62883044e9..d36f257b31 100644 --- a/application/src/main/java/org/togetherjava/tjbot/features/utils/LinkDetection.java +++ b/application/src/main/java/org/togetherjava/tjbot/features/utils/LinkDetection.java @@ -1,18 +1,17 @@ package org.togetherjava.tjbot.features.utils; -import java.net.URI; -import java.net.http.HttpClient; -import java.net.http.HttpRequest; -import java.net.http.HttpResponse; -import java.util.concurrent.CompletableFuture; - import com.linkedin.urls.Url; import com.linkedin.urls.detection.UrlDetector; import com.linkedin.urls.detection.UrlDetectorOptions; +import java.net.URI; +import java.net.http.HttpClient; +import java.net.http.HttpRequest; +import java.net.http.HttpResponse; import java.util.List; import java.util.Optional; import java.util.Set; +import java.util.concurrent.CompletableFuture; /** * Utility class to detect links. @@ -63,25 +62,21 @@ public static List extractLinks(String content, Set filter) public static boolean containsLink(String content) { return !(new UrlDetector(content, UrlDetectorOptions.BRACKET_MATCH).detect().isEmpty()); } + public static CompletableFuture isLinkBroken(String url) { HttpClient client = HttpClient.newHttpClient(); HttpRequest request = HttpRequest.newBuilder(URI.create(url)) - .method("HEAD", HttpRequest.BodyPublishers.noBody()) - .build(); + .method("HEAD", HttpRequest.BodyPublishers.noBody()) + .build(); return client.sendAsync(request, HttpResponse.BodyHandlers.discarding()) - .thenApply(response -> response.statusCode() >= 400) - .exceptionally(ignored -> true); + .thenApply(response -> response.statusCode() >= 400) + .exceptionally(ignored -> true); } - public static CompletableFuture replaceDeadLinks( - String text, - String replacement - ) { - Set filters = Set.of( - LinkFilter.SUPPRESSED, - LinkFilter.NON_HTTP_SCHEME - ); + + public static CompletableFuture replaceDeadLinks(String text, String replacement) { + Set filters = Set.of(LinkFilter.SUPPRESSED, LinkFilter.NON_HTTP_SCHEME); List links = extractLinks(text, filters); @@ -91,25 +86,18 @@ public static CompletableFuture replaceDeadLinks( StringBuilder result = new StringBuilder(text); - List> checks = links.stream() - .map(link -> - isLinkBroken(link).thenAccept(isDead -> { - if (isDead) { - int index = result.indexOf(link); - if (index != -1) { - result.replace( - index, - index + link.length(), - replacement - ); - } - } - }) - ) - .toList(); + List> checks = + links.stream().map(link -> isLinkBroken(link).thenAccept(isDead -> { + if (isDead) { + int index = result.indexOf(link); + if (index != -1) { + result.replace(index, index + link.length(), replacement); + } + } + })).toList(); return CompletableFuture.allOf(checks.toArray(new CompletableFuture[0])) - .thenApply(v -> result.toString()); + .thenApply(v -> result.toString()); } private static Optional toLink(Url url, Set filter) { From d20507439d5075b804a9ba3a2ec3753fd1afa2ec Mon Sep 17 00:00:00 2001 From: barsh404error Date: Fri, 2 Jan 2026 13:18:49 +0300 Subject: [PATCH 3/8] Add utilities to detect and replace broken links V2 --- .../tjbot/features/utils/LinkDetection.java | 65 ++++++++++++------- 1 file changed, 43 insertions(+), 22 deletions(-) diff --git a/application/src/main/java/org/togetherjava/tjbot/features/utils/LinkDetection.java b/application/src/main/java/org/togetherjava/tjbot/features/utils/LinkDetection.java index d36f257b31..5ac1c0f5a7 100644 --- a/application/src/main/java/org/togetherjava/tjbot/features/utils/LinkDetection.java +++ b/application/src/main/java/org/togetherjava/tjbot/features/utils/LinkDetection.java @@ -9,6 +9,7 @@ import java.net.http.HttpRequest; import java.net.http.HttpResponse; import java.util.List; +import java.util.Objects; import java.util.Optional; import java.util.Set; import java.util.concurrent.CompletableFuture; @@ -17,6 +18,10 @@ * Utility class to detect links. */ public class LinkDetection { + private static final HttpClient HTTP_CLIENT = HttpClient.newHttpClient(); + + private static final Set DEFAULT_FILTERS = + Set.of(LinkFilter.SUPPRESSED, LinkFilter.NON_HTTP_SCHEME); /** * Possible ways to filter a link. @@ -63,41 +68,57 @@ public static boolean containsLink(String content) { return !(new UrlDetector(content, UrlDetectorOptions.BRACKET_MATCH).detect().isEmpty()); } + @SuppressWarnings("java:S2095") public static CompletableFuture isLinkBroken(String url) { - HttpClient client = HttpClient.newHttpClient(); - - HttpRequest request = HttpRequest.newBuilder(URI.create(url)) + HttpRequest headRequest = HttpRequest.newBuilder(URI.create(url)) .method("HEAD", HttpRequest.BodyPublishers.noBody()) .build(); - return client.sendAsync(request, HttpResponse.BodyHandlers.discarding()) - .thenApply(response -> response.statusCode() >= 400) - .exceptionally(ignored -> true); + return HTTP_CLIENT.sendAsync(headRequest, HttpResponse.BodyHandlers.discarding()) + .thenApply(response -> { + int status = response.statusCode(); + if (status >= 200 && status < 400) { + return false; + } + return status >= 400 ? true : null; + }) + .exceptionally(ignored -> null) + .thenCompose(result -> { + if (result != null) { + return CompletableFuture.completedFuture(result); + } + HttpRequest getRequest = HttpRequest.newBuilder(URI.create(url)).GET().build(); + + return HTTP_CLIENT.sendAsync(getRequest, HttpResponse.BodyHandlers.discarding()) + .thenApply(resp -> resp.statusCode() >= 400) + .exceptionally(ignored -> true); + }); } public static CompletableFuture replaceDeadLinks(String text, String replacement) { - Set filters = Set.of(LinkFilter.SUPPRESSED, LinkFilter.NON_HTTP_SCHEME); - - List links = extractLinks(text, filters); + List links = extractLinks(text, DEFAULT_FILTERS); if (links.isEmpty()) { return CompletableFuture.completedFuture(text); } - StringBuilder result = new StringBuilder(text); - - List> checks = - links.stream().map(link -> isLinkBroken(link).thenAccept(isDead -> { - if (isDead) { - int index = result.indexOf(link); - if (index != -1) { - result.replace(index, index + link.length(), replacement); - } - } - })).toList(); + List> deadLinkFutures = links.stream() + .distinct() + .map(link -> isLinkBroken(link).thenApply(isBroken -> isBroken ? link : null)) + .toList(); - return CompletableFuture.allOf(checks.toArray(new CompletableFuture[0])) - .thenApply(v -> result.toString()); + return CompletableFuture.allOf(deadLinkFutures.toArray(new CompletableFuture[0])) + .thenApply(ignored -> deadLinkFutures.stream() + .map(CompletableFuture::join) + .filter(Objects::nonNull) + .toList()) + .thenApply(deadLinks -> { + String result = text; + for (String deadLink : deadLinks) { + result = result.replace(deadLink, replacement); + } + return result; + }); } private static Optional toLink(Url url, Set filter) { From c3a64e39a4bfb2e1891896905d63e0b18659d855 Mon Sep 17 00:00:00 2001 From: barsh404error Date: Fri, 2 Jan 2026 22:04:21 +0300 Subject: [PATCH 4/8] Add utilities to detect and replace broken links V2 --- .../tjbot/features/utils/LinkDetection.java | 15 +++++---------- 1 file changed, 5 insertions(+), 10 deletions(-) diff --git a/application/src/main/java/org/togetherjava/tjbot/features/utils/LinkDetection.java b/application/src/main/java/org/togetherjava/tjbot/features/utils/LinkDetection.java index 5ac1c0f5a7..237ff174b7 100644 --- a/application/src/main/java/org/togetherjava/tjbot/features/utils/LinkDetection.java +++ b/application/src/main/java/org/togetherjava/tjbot/features/utils/LinkDetection.java @@ -68,7 +68,6 @@ public static boolean containsLink(String content) { return !(new UrlDetector(content, UrlDetectorOptions.BRACKET_MATCH).detect().isEmpty()); } - @SuppressWarnings("java:S2095") public static CompletableFuture isLinkBroken(String url) { HttpRequest headRequest = HttpRequest.newBuilder(URI.create(url)) .method("HEAD", HttpRequest.BodyPublishers.noBody()) @@ -77,21 +76,17 @@ public static CompletableFuture isLinkBroken(String url) { return HTTP_CLIENT.sendAsync(headRequest, HttpResponse.BodyHandlers.discarding()) .thenApply(response -> { int status = response.statusCode(); - if (status >= 200 && status < 400) { - return false; - } - return status >= 400 ? true : null; + return status < 200 || status >= 400; }) - .exceptionally(ignored -> null) + .exceptionally(ignored -> true) .thenCompose(result -> { - if (result != null) { - return CompletableFuture.completedFuture(result); + if (!result) { + return CompletableFuture.completedFuture(false); } HttpRequest getRequest = HttpRequest.newBuilder(URI.create(url)).GET().build(); - return HTTP_CLIENT.sendAsync(getRequest, HttpResponse.BodyHandlers.discarding()) .thenApply(resp -> resp.statusCode() >= 400) - .exceptionally(ignored -> true); + .exceptionally(ignored -> true); // still never null }); } From 48c23abb17254bfdd3d113e9cf635a2e83fbc98e Mon Sep 17 00:00:00 2001 From: barsh404error Date: Sat, 3 Jan 2026 13:48:10 +0300 Subject: [PATCH 5/8] Add utilities to detect and replace broken links V2 --- .../togetherjava/tjbot/features/utils/LinkDetection.java | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/application/src/main/java/org/togetherjava/tjbot/features/utils/LinkDetection.java b/application/src/main/java/org/togetherjava/tjbot/features/utils/LinkDetection.java index 237ff174b7..e3035a667b 100644 --- a/application/src/main/java/org/togetherjava/tjbot/features/utils/LinkDetection.java +++ b/application/src/main/java/org/togetherjava/tjbot/features/utils/LinkDetection.java @@ -80,7 +80,7 @@ public static CompletableFuture isLinkBroken(String url) { }) .exceptionally(ignored -> true) .thenCompose(result -> { - if (!result) { + if (!Boolean.TRUE.equals(result)) { return CompletableFuture.completedFuture(false); } HttpRequest getRequest = HttpRequest.newBuilder(URI.create(url)).GET().build(); @@ -99,7 +99,9 @@ public static CompletableFuture replaceDeadLinks(String text, String rep List> deadLinkFutures = links.stream() .distinct() - .map(link -> isLinkBroken(link).thenApply(isBroken -> isBroken ? link : null)) + .map(link -> isLinkBroken(link) + .thenApply(isBroken -> Boolean.TRUE.equals(isBroken) ? link : null)) + .toList(); return CompletableFuture.allOf(deadLinkFutures.toArray(new CompletableFuture[0])) From d556a334606e5dce493d3251a8e034551b20cedf Mon Sep 17 00:00:00 2001 From: barsh404error Date: Tue, 6 Jan 2026 19:25:57 +0300 Subject: [PATCH 6/8] Add utilities to detect and replace broken links V2 --- .../tjbot/features/utils/LinkDetection.java | 50 +++++++++++++++++++ 1 file changed, 50 insertions(+) diff --git a/application/src/main/java/org/togetherjava/tjbot/features/utils/LinkDetection.java b/application/src/main/java/org/togetherjava/tjbot/features/utils/LinkDetection.java index e3035a667b..8b48c01631 100644 --- a/application/src/main/java/org/togetherjava/tjbot/features/utils/LinkDetection.java +++ b/application/src/main/java/org/togetherjava/tjbot/features/utils/LinkDetection.java @@ -68,6 +68,28 @@ public static boolean containsLink(String content) { return !(new UrlDetector(content, UrlDetectorOptions.BRACKET_MATCH).detect().isEmpty()); } + /** + * Checks whether the given URL is considered broken. + * + *

+ * A link is considered broken if: + *

    + *
  • The URL is invalid or malformed
  • + *
  • An HTTP request fails
  • + *
  • The HTTP response status code is outside the 200–399 range
  • + *
+ * + *

+ * Notes: + *

    + *
  • Status code {@code 200} is considered valid, even if the response body is empty
  • + *
  • The response body content is not inspected
  • + *
+ * + * @param url the URL to check + * @return a future completing with {@code true} if the link is broken + */ + public static CompletableFuture isLinkBroken(String url) { HttpRequest headRequest = HttpRequest.newBuilder(URI.create(url)) .method("HEAD", HttpRequest.BodyPublishers.noBody()) @@ -90,6 +112,34 @@ public static CompletableFuture isLinkBroken(String url) { }); } + /** + * Replaces all broken links in the given text with the provided replacement string. + * + *

+ * Example: + * + *

{@code
+     * replaceDeadLinks("""
+     *         Test
+     *         http://deadlink/1
+     *         http://workinglink/1
+     *         """, "broken")
+     * }
+ * + *

+ * Results in: + * + *

{@code
+     * Test
+     * broken
+     * http://workinglink/1
+     * }
+ * + * @param text the input text containing URLs + * @param replacement the string to replace broken links with + * @return a future containing the modified text + */ + public static CompletableFuture replaceDeadLinks(String text, String replacement) { List links = extractLinks(text, DEFAULT_FILTERS); From 9414ead419326120a2f4b02b5d22386c8b0a6fb0 Mon Sep 17 00:00:00 2001 From: barsherror404 Date: Fri, 30 Jan 2026 20:31:32 +0300 Subject: [PATCH 7/8] Fixed link detection to handle 3xx redirects properly Updated isLinkBroken() to only treat 4xx/5xx status codes as broken. Previously 3xx redirects were incorrectly marked as broken links also improved javadoc clarity throughout LinkDetection class --- .../tjbot/features/utils/LinkDetection.java | 223 ++++++++++++------ 1 file changed, 146 insertions(+), 77 deletions(-) diff --git a/application/src/main/java/org/togetherjava/tjbot/features/utils/LinkDetection.java b/application/src/main/java/org/togetherjava/tjbot/features/utils/LinkDetection.java index 8b48c01631..9d26ab713e 100644 --- a/application/src/main/java/org/togetherjava/tjbot/features/utils/LinkDetection.java +++ b/application/src/main/java/org/togetherjava/tjbot/features/utils/LinkDetection.java @@ -15,26 +15,54 @@ import java.util.concurrent.CompletableFuture; /** - * Utility class to detect links. + * Utility methods for working with links inside arbitrary text. + * + *

This class can: + *

    + *
  • Extract HTTP(S) links from text
  • + *
  • Check whether a link is reachable via HTTP
  • + *
  • Replace broken links asynchronously
  • + *
+ * + *

It is intentionally stateless and uses asynchronous HTTP requests + * to avoid blocking calling threads. */ + public class LinkDetection { private static final HttpClient HTTP_CLIENT = HttpClient.newHttpClient(); + /** + * Default filters applied when extracting links from text. + * + *

These filters intentionally ignore: + *

    + *
  • Suppressed links like {@code }
  • + *
  • Non-HTTP(S) schemes such as {@code ftp://} or {@code file://}
  • + *
+ * + *

This reduces false positives when scanning chat messages + * or source-code snippets. + */ + private static final Set DEFAULT_FILTERS = Set.of(LinkFilter.SUPPRESSED, LinkFilter.NON_HTTP_SCHEME); /** - * Possible ways to filter a link. - * - * @see LinkDetection + * Filters that control which detected URLs are returned by {@link #extractLinks}. */ public enum LinkFilter { /** - * Filters links suppressed with {@literal }. + * Ignores URLs that are wrapped in angle brackets, + * e.g. {@code }. + * + *

Such links are often intentionally suppressed in chat platforms. */ SUPPRESSED, /** - * Filters links that are not using http scheme. + * Ignores URLs that do not use the HTTP or HTTPS scheme. + * + *

This helps avoid false positives such as {@code ftp://}, + * {@code file://}, or scheme-less matches. */ NON_HTTP_SCHEME } @@ -44,102 +72,126 @@ private LinkDetection() { } /** - * Extracts all links from the given content. + * Extracts HTTP(S) links from the given text. + * + *

The text is scanned using a URL detector, then filtered and normalized + * according to the provided {@link LinkFilter}s. + * + *

Example: + *

{@code
+     * Set filters = Set.of(LinkFilter.SUPPRESSED, LinkFilter.NON_HTTP_SCHEME);
+     * extractLinks("Visit https://example.com and ", filters)
+     * // returns ["https://example.com"]
+     * }
* - * @param content the content to search through - * @param filter the filters applied to the urls - * @return a list of all found links, can be empty + * @param content the text to scan for links + * @param filter a set of filters controlling which detected links are returned + * @return a list of extracted links in the order they appear in the text */ + public static List extractLinks(String content, Set filter) { return new UrlDetector(content, UrlDetectorOptions.BRACKET_MATCH).detect() - .stream() - .map(url -> toLink(url, filter)) - .flatMap(Optional::stream) - .toList(); + .stream() + .map(url -> toLink(url, filter)) + .flatMap(Optional::stream) + .toList(); } /** - * Checks whether the given content contains a link. + * Checks whether the given text contains at least one detectable URL. * - * @param content the content to search through - * @return true if the content contains at least one link + *

This method performs a lightweight detection only and does not + * apply any {@link LinkFilter}s. + * + * @param content the text to scan + * @return {@code true} if at least one URL-like pattern is detected */ + public static boolean containsLink(String content) { return !(new UrlDetector(content, UrlDetectorOptions.BRACKET_MATCH).detect().isEmpty()); } /** - * Checks whether the given URL is considered broken. + * Asynchronously checks whether a URL is considered broken. * - *

- * A link is considered broken if: - *

    - *
  • The URL is invalid or malformed
  • - *
  • An HTTP request fails
  • - *
  • The HTTP response status code is outside the 200–399 range
  • - *
+ *

The check is performed in two steps: + *

    + *
  1. A {@code HEAD} request is sent first (cheap and fast)
  2. + *
  3. If that fails or returns an error, a {@code GET} request is used as a fallback
  4. + *
* - *

- * Notes: + *

A link is considered broken if: *

    - *
  • Status code {@code 200} is considered valid, even if the response body is empty
  • - *
  • The response body content is not inspected
  • + *
  • The URL is malformed or unreachable
  • + *
  • The HTTP request fails with an exception
  • + *
  • The response status code is 4xx (client error) or 5xx (server error)
  • *
* + *

Successful responses (2xx) and redirects (3xx) are considered valid links. + * The response body is never inspected. + * * @param url the URL to check - * @return a future completing with {@code true} if the link is broken + * @return a {@code CompletableFuture} completing with {@code true} if the link is broken, + * {@code false} otherwise */ public static CompletableFuture isLinkBroken(String url) { HttpRequest headRequest = HttpRequest.newBuilder(URI.create(url)) - .method("HEAD", HttpRequest.BodyPublishers.noBody()) - .build(); + .method("HEAD", HttpRequest.BodyPublishers.noBody()) + .build(); return HTTP_CLIENT.sendAsync(headRequest, HttpResponse.BodyHandlers.discarding()) - .thenApply(response -> { - int status = response.statusCode(); - return status < 200 || status >= 400; - }) - .exceptionally(ignored -> true) - .thenCompose(result -> { - if (!Boolean.TRUE.equals(result)) { - return CompletableFuture.completedFuture(false); - } - HttpRequest getRequest = HttpRequest.newBuilder(URI.create(url)).GET().build(); - return HTTP_CLIENT.sendAsync(getRequest, HttpResponse.BodyHandlers.discarding()) - .thenApply(resp -> resp.statusCode() >= 400) - .exceptionally(ignored -> true); // still never null - }); + .thenApply(response -> { + int status = response.statusCode(); + // 2xx and 3xx are success, 4xx and 5xx are errors + return status >= 400; + }) + .exceptionally(ignored -> true) + .thenCompose(result -> { + if (!Boolean.TRUE.equals(result)) { + return CompletableFuture.completedFuture(false); + } + HttpRequest fallbackGetRequest = HttpRequest.newBuilder(URI.create(url)).GET().build(); + return HTTP_CLIENT.sendAsync(fallbackGetRequest, HttpResponse.BodyHandlers.discarding()) + .thenApply(resp -> resp.statusCode() >= 400) + .exceptionally(ignored -> true); + }); } /** - * Replaces all broken links in the given text with the provided replacement string. + * Replaces all broken HTTP(S) links in the given text. + * + *

Each detected link is checked asynchronously using + * {@link #isLinkBroken(String)}. Only links confirmed as broken + * are replaced. Duplicate URLs are checked only once and all occurrences + * are replaced if found to be broken. * - *

- * Example: - * + *

This method does not block - all link checks are performed + * asynchronously and combined into a single {@code CompletableFuture}. + * + *

Example: *

{@code
      * replaceDeadLinks("""
-     *         Test
-     *         http://deadlink/1
-     *         http://workinglink/1
-     *         """, "broken")
+     *   Test
+     *   http://deadlink/1
+     *   http://workinglink/1
+     * """, "(broken link)")
      * }
* - *

- * Results in: - * + *

Results in: *

{@code
      * Test
-     * broken
+     * (broken link)
      * http://workinglink/1
      * }
* * @param text the input text containing URLs - * @param replacement the string to replace broken links with - * @return a future containing the modified text + * @param replacement the string used to replace broken links + * @return a {@code CompletableFuture} that completes with the modified text, + * or the original text if no broken links were found */ + public static CompletableFuture replaceDeadLinks(String text, String replacement) { List links = extractLinks(text, DEFAULT_FILTERS); @@ -148,26 +200,44 @@ public static CompletableFuture replaceDeadLinks(String text, String rep } List> deadLinkFutures = links.stream() - .distinct() - .map(link -> isLinkBroken(link) - .thenApply(isBroken -> Boolean.TRUE.equals(isBroken) ? link : null)) + .distinct() + .map(link -> isLinkBroken(link) + .thenApply(isBroken -> Boolean.TRUE.equals(isBroken) ? link : null)) - .toList(); + .toList(); return CompletableFuture.allOf(deadLinkFutures.toArray(new CompletableFuture[0])) - .thenApply(ignored -> deadLinkFutures.stream() - .map(CompletableFuture::join) - .filter(Objects::nonNull) - .toList()) - .thenApply(deadLinks -> { - String result = text; - for (String deadLink : deadLinks) { - result = result.replace(deadLink, replacement); - } - return result; - }); + .thenApply(ignored -> deadLinkFutures.stream() + .map(CompletableFuture::join) + .filter(Objects::nonNull) + .toList()) + .thenApply(deadLinks -> { + String result = text; + for (String deadLink : deadLinks) { + result = result.replace(deadLink, replacement); + } + return result; + }); } + /** + * Converts a detected {@link Url} into a normalized link string. + * + *

Applies the provided {@link LinkFilter}s: + *

    + *
  • {@link LinkFilter#SUPPRESSED} - filters URLs wrapped in angle brackets
  • + *
  • {@link LinkFilter#NON_HTTP_SCHEME} - filters non-HTTP(S) schemes
  • + *
+ * + *

Additionally removes trailing punctuation such as commas or periods + * from the detected URL. + * + * @param url the detected URL + * @param filter active link filters to apply + * @return an {@link Optional} containing the normalized link, + * or {@code Optional.empty()} if the link should be filtered out + */ + private static Optional toLink(Url url, Set filter) { String raw = url.getOriginalUrl(); if (filter.contains(LinkFilter.SUPPRESSED) && raw.contains(">")) { @@ -188,5 +258,4 @@ private static Optional toLink(Url url, Set filter) { } return Optional.of(link); } - -} +} \ No newline at end of file From ef3898694d70672caa5b07e57ea4bf75d9934362 Mon Sep 17 00:00:00 2001 From: barsherror404 Date: Fri, 30 Jan 2026 22:18:13 +0300 Subject: [PATCH 8/8] Apply Spotless formatting and regenerate jOOQ sources --- .../tjbot/features/utils/LinkDetection.java | 192 ++++++++++-------- 1 file changed, 105 insertions(+), 87 deletions(-) diff --git a/application/src/main/java/org/togetherjava/tjbot/features/utils/LinkDetection.java b/application/src/main/java/org/togetherjava/tjbot/features/utils/LinkDetection.java index 9d26ab713e..7c560602da 100644 --- a/application/src/main/java/org/togetherjava/tjbot/features/utils/LinkDetection.java +++ b/application/src/main/java/org/togetherjava/tjbot/features/utils/LinkDetection.java @@ -17,15 +17,17 @@ /** * Utility methods for working with links inside arbitrary text. * - *

This class can: + *

+ * This class can: *

    - *
  • Extract HTTP(S) links from text
  • - *
  • Check whether a link is reachable via HTTP
  • - *
  • Replace broken links asynchronously
  • + *
  • Extract HTTP(S) links from text
  • + *
  • Check whether a link is reachable via HTTP
  • + *
  • Replace broken links asynchronously
  • *
* - *

It is intentionally stateless and uses asynchronous HTTP requests - * to avoid blocking calling threads. + *

+ * It is intentionally stateless and uses asynchronous HTTP requests to avoid blocking calling + * threads. */ public class LinkDetection { @@ -34,14 +36,15 @@ public class LinkDetection { /** * Default filters applied when extracting links from text. * - *

These filters intentionally ignore: + *

+ * These filters intentionally ignore: *

    - *
  • Suppressed links like {@code }
  • - *
  • Non-HTTP(S) schemes such as {@code ftp://} or {@code file://}
  • + *
  • Suppressed links like {@code }
  • + *
  • Non-HTTP(S) schemes such as {@code ftp://} or {@code file://}
  • *
* - *

This reduces false positives when scanning chat messages - * or source-code snippets. + *

+ * This reduces false positives when scanning chat messages or source-code snippets. */ private static final Set DEFAULT_FILTERS = @@ -52,17 +55,18 @@ public class LinkDetection { */ public enum LinkFilter { /** - * Ignores URLs that are wrapped in angle brackets, - * e.g. {@code }. + * Ignores URLs that are wrapped in angle brackets, e.g. {@code }. * - *

Such links are often intentionally suppressed in chat platforms. + *

+ * Such links are often intentionally suppressed in chat platforms. */ SUPPRESSED, /** * Ignores URLs that do not use the HTTP or HTTPS scheme. * - *

This helps avoid false positives such as {@code ftp://}, - * {@code file://}, or scheme-less matches. + *

+ * This helps avoid false positives such as {@code ftp://}, {@code file://}, or scheme-less + * matches. */ NON_HTTP_SCHEME } @@ -74,10 +78,13 @@ private LinkDetection() { /** * Extracts HTTP(S) links from the given text. * - *

The text is scanned using a URL detector, then filtered and normalized - * according to the provided {@link LinkFilter}s. + *

+ * The text is scanned using a URL detector, then filtered and normalized according to the + * provided {@link LinkFilter}s. * - *

Example: + *

+ * Example: + * *

{@code
      * Set filters = Set.of(LinkFilter.SUPPRESSED, LinkFilter.NON_HTTP_SCHEME);
      * extractLinks("Visit https://example.com and ", filters)
@@ -91,17 +98,17 @@ private LinkDetection() {
 
     public static List extractLinks(String content, Set filter) {
         return new UrlDetector(content, UrlDetectorOptions.BRACKET_MATCH).detect()
-                .stream()
-                .map(url -> toLink(url, filter))
-                .flatMap(Optional::stream)
-                .toList();
+            .stream()
+            .map(url -> toLink(url, filter))
+            .flatMap(Optional::stream)
+            .toList();
     }
 
     /**
      * Checks whether the given text contains at least one detectable URL.
      *
-     * 

This method performs a lightweight detection only and does not - * apply any {@link LinkFilter}s. + *

+ * This method performs a lightweight detection only and does not apply any {@link LinkFilter}s. * * @param content the text to scan * @return {@code true} if at least one URL-like pattern is detected @@ -114,21 +121,24 @@ public static boolean containsLink(String content) { /** * Asynchronously checks whether a URL is considered broken. * - *

The check is performed in two steps: + *

+ * The check is performed in two steps: *

    - *
  1. A {@code HEAD} request is sent first (cheap and fast)
  2. - *
  3. If that fails or returns an error, a {@code GET} request is used as a fallback
  4. + *
  5. A {@code HEAD} request is sent first (cheap and fast)
  6. + *
  7. If that fails or returns an error, a {@code GET} request is used as a fallback
  8. *
* - *

A link is considered broken if: + *

+ * A link is considered broken if: *

    - *
  • The URL is malformed or unreachable
  • - *
  • The HTTP request fails with an exception
  • - *
  • The response status code is 4xx (client error) or 5xx (server error)
  • + *
  • The URL is malformed or unreachable
  • + *
  • The HTTP request fails with an exception
  • + *
  • The response status code is 4xx (client error) or 5xx (server error)
  • *
* - *

Successful responses (2xx) and redirects (3xx) are considered valid links. - * The response body is never inspected. + *

+ * Successful responses (2xx) and redirects (3xx) are considered valid links. The response body + * is never inspected. * * @param url the URL to check * @return a {@code CompletableFuture} completing with {@code true} if the link is broken, @@ -137,48 +147,55 @@ public static boolean containsLink(String content) { public static CompletableFuture isLinkBroken(String url) { HttpRequest headRequest = HttpRequest.newBuilder(URI.create(url)) - .method("HEAD", HttpRequest.BodyPublishers.noBody()) - .build(); + .method("HEAD", HttpRequest.BodyPublishers.noBody()) + .build(); return HTTP_CLIENT.sendAsync(headRequest, HttpResponse.BodyHandlers.discarding()) - .thenApply(response -> { - int status = response.statusCode(); - // 2xx and 3xx are success, 4xx and 5xx are errors - return status >= 400; - }) - .exceptionally(ignored -> true) - .thenCompose(result -> { - if (!Boolean.TRUE.equals(result)) { - return CompletableFuture.completedFuture(false); - } - HttpRequest fallbackGetRequest = HttpRequest.newBuilder(URI.create(url)).GET().build(); - return HTTP_CLIENT.sendAsync(fallbackGetRequest, HttpResponse.BodyHandlers.discarding()) - .thenApply(resp -> resp.statusCode() >= 400) - .exceptionally(ignored -> true); - }); + .thenApply(response -> { + int status = response.statusCode(); + // 2xx and 3xx are success, 4xx and 5xx are errors + return status >= 400; + }) + .exceptionally(ignored -> true) + .thenCompose(result -> { + if (!Boolean.TRUE.equals(result)) { + return CompletableFuture.completedFuture(false); + } + HttpRequest fallbackGetRequest = + HttpRequest.newBuilder(URI.create(url)).GET().build(); + return HTTP_CLIENT + .sendAsync(fallbackGetRequest, HttpResponse.BodyHandlers.discarding()) + .thenApply(resp -> resp.statusCode() >= 400) + .exceptionally(ignored -> true); + }); } /** * Replaces all broken HTTP(S) links in the given text. * - *

Each detected link is checked asynchronously using - * {@link #isLinkBroken(String)}. Only links confirmed as broken - * are replaced. Duplicate URLs are checked only once and all occurrences + *

+ * Each detected link is checked asynchronously using {@link #isLinkBroken(String)}. Only links + * confirmed as broken are replaced. Duplicate URLs are checked only once and all occurrences * are replaced if found to be broken. * - *

This method does not block - all link checks are performed - * asynchronously and combined into a single {@code CompletableFuture}. + *

+ * This method does not block - all link checks are performed asynchronously and combined into a + * single {@code CompletableFuture}. * - *

Example: + *

+ * Example: + * *

{@code
      * replaceDeadLinks("""
-     *   Test
-     *   http://deadlink/1
-     *   http://workinglink/1
-     * """, "(broken link)")
+     *           Test
+     *           http://deadlink/1
+     *           http://workinglink/1
+     *         """, "(broken link)")
      * }
* - *

Results in: + *

+ * Results in: + * *

{@code
      * Test
      * (broken link)
@@ -187,8 +204,8 @@ public static CompletableFuture isLinkBroken(String url) {
      *
      * @param text the input text containing URLs
      * @param replacement the string used to replace broken links
-     * @return a {@code CompletableFuture} that completes with the modified text,
-     *         or the original text if no broken links were found
+     * @return a {@code CompletableFuture} that completes with the modified text, or the original
+     *         text if no broken links were found
      */
 
 
@@ -200,42 +217,43 @@ public static CompletableFuture replaceDeadLinks(String text, String rep
         }
 
         List> deadLinkFutures = links.stream()
-                .distinct()
-                .map(link -> isLinkBroken(link)
-                        .thenApply(isBroken -> Boolean.TRUE.equals(isBroken) ? link : null))
+            .distinct()
+            .map(link -> isLinkBroken(link)
+                .thenApply(isBroken -> Boolean.TRUE.equals(isBroken) ? link : null))
 
-                .toList();
+            .toList();
 
         return CompletableFuture.allOf(deadLinkFutures.toArray(new CompletableFuture[0]))
-                .thenApply(ignored -> deadLinkFutures.stream()
-                        .map(CompletableFuture::join)
-                        .filter(Objects::nonNull)
-                        .toList())
-                .thenApply(deadLinks -> {
-                    String result = text;
-                    for (String deadLink : deadLinks) {
-                        result = result.replace(deadLink, replacement);
-                    }
-                    return result;
-                });
+            .thenApply(ignored -> deadLinkFutures.stream()
+                .map(CompletableFuture::join)
+                .filter(Objects::nonNull)
+                .toList())
+            .thenApply(deadLinks -> {
+                String result = text;
+                for (String deadLink : deadLinks) {
+                    result = result.replace(deadLink, replacement);
+                }
+                return result;
+            });
     }
 
     /**
      * Converts a detected {@link Url} into a normalized link string.
      *
-     * 

Applies the provided {@link LinkFilter}s: + *

+ * Applies the provided {@link LinkFilter}s: *

    - *
  • {@link LinkFilter#SUPPRESSED} - filters URLs wrapped in angle brackets
  • - *
  • {@link LinkFilter#NON_HTTP_SCHEME} - filters non-HTTP(S) schemes
  • + *
  • {@link LinkFilter#SUPPRESSED} - filters URLs wrapped in angle brackets
  • + *
  • {@link LinkFilter#NON_HTTP_SCHEME} - filters non-HTTP(S) schemes
  • *
* - *

Additionally removes trailing punctuation such as commas or periods - * from the detected URL. + *

+ * Additionally removes trailing punctuation such as commas or periods from the detected URL. * * @param url the detected URL * @param filter active link filters to apply - * @return an {@link Optional} containing the normalized link, - * or {@code Optional.empty()} if the link should be filtered out + * @return an {@link Optional} containing the normalized link, or {@code Optional.empty()} if + * the link should be filtered out */ private static Optional toLink(Url url, Set filter) { @@ -258,4 +276,4 @@ private static Optional toLink(Url url, Set filter) { } return Optional.of(link); } -} \ No newline at end of file +}