diff --git a/lib/hexdocs/file_rewriter.ex b/lib/hexdocs/file_rewriter.ex
index 9243492..f5b0ac3 100644
--- a/lib/hexdocs/file_rewriter.ex
+++ b/lib/hexdocs/file_rewriter.ex
@@ -9,11 +9,14 @@ defmodule Hexdocs.FileRewriter do
@noindex_hook ~s||
+ @official_domains ~w(hex.pm hexdocs.pm elixir-lang.org erlang.org)
+
def run(path, content) do
content
|> add_elixir_org_link(path)
|> add_analytics(path)
|> remove_noindex(path)
+ |> add_nofollow(path)
end
defp add_elixir_org_link(content, path) do
@@ -42,4 +45,49 @@ defmodule Hexdocs.FileRewriter do
content
end
end
+
+ @a_tag_re ~r/]*href="https?:\/\/[^"]*"[^>]*>/
+ @href_re ~r/href="(https?:\/\/[^"]*)"/
+
+ defp add_nofollow(content, path) do
+ if String.ends_with?(path, ".html") do
+ Regex.replace(@a_tag_re, content, fn tag ->
+ case Regex.run(@href_re, tag) do
+ [_, href] ->
+ if official_link?(href) do
+ tag
+ else
+ add_rel_nofollow(tag)
+ end
+
+ _ ->
+ tag
+ end
+ end)
+ else
+ content
+ end
+ end
+
+ defp add_rel_nofollow(tag) do
+ if tag =~ ~r/\srel="/ do
+ Regex.replace(~r/\srel="([^"]*)"/, tag, fn _, existing ->
+ if "nofollow" in String.split(existing) do
+ ~s| rel="#{existing}"|
+ else
+ ~s| rel="#{existing} nofollow"|
+ end
+ end)
+ else
+ String.replace(tag, "
+ uri.host == domain or (uri.host && String.ends_with?(uri.host, "." <> domain))
+ end)
+ end
end
diff --git a/test/hexdocs/file_rewriter_test.exs b/test/hexdocs/file_rewriter_test.exs
index 3c4c5a9..174a050 100644
--- a/test/hexdocs/file_rewriter_test.exs
+++ b/test/hexdocs/file_rewriter_test.exs
@@ -12,8 +12,54 @@ defmodule Hexdocs.FileRewriterTest do
"index.html",
~s|Friedel Ziegelmayer|
) ==
- ~s|Friedel Ziegelmayer for the Elixir programming language|
+ ~s|Friedel Ziegelmayer for the Elixir programming language|
assert FileRewriter.run("index.html", ~s||) == ""
end
+
+ describe "add_nofollow" do
+ test "adds rel=nofollow to external links" do
+ assert FileRewriter.run("index.html", ~s|example|) ==
+ ~s|example|
+ end
+
+ test "appends nofollow to existing rel attribute" do
+ assert FileRewriter.run(
+ "index.html",
+ ~s|example|
+ ) ==
+ ~s|example|
+ end
+
+ test "does not duplicate nofollow" do
+ assert FileRewriter.run(
+ "index.html",
+ ~s|example|
+ ) ==
+ ~s|example|
+ end
+
+ test "does not add nofollow to official ecosystem links" do
+ for url <- [
+ "https://hex.pm/packages/foo",
+ "https://hexdocs.pm/foo",
+ "https://elixir-lang.org",
+ "https://www.erlang.org",
+ "https://preview.hexdocs.pm/foo"
+ ] do
+ input = ~s|link|
+ assert FileRewriter.run("index.html", input) == input
+ end
+ end
+
+ test "does not add nofollow to relative links" do
+ input = ~s|link|
+ assert FileRewriter.run("index.html", input) == input
+ end
+
+ test "does not modify non-html files" do
+ input = ~s|example|
+ assert FileRewriter.run("index.js", input) == input
+ end
+ end
end