Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
48 changes: 48 additions & 0 deletions lib/hexdocs/file_rewriter.ex
Original file line number Diff line number Diff line change
Expand Up @@ -9,11 +9,14 @@ defmodule Hexdocs.FileRewriter do

@noindex_hook ~s|<meta name="robots" content="noindex">|

@official_domains ~w(hex.pm hexdocs.pm elixir-lang.org erlang.org)

def run(path, content) do
content
|> add_elixir_org_link(path)
|> add_analytics(path)
|> remove_noindex(path)
|> add_nofollow(path)
end

defp add_elixir_org_link(content, path) do
Expand Down Expand Up @@ -42,4 +45,49 @@ defmodule Hexdocs.FileRewriter do
content
end
end

@a_tag_re ~r/<a\s[^>]*href="https?:\/\/[^"]*"[^>]*>/
@href_re ~r/href="(https?:\/\/[^"]*)"/

defp add_nofollow(content, path) do
if String.ends_with?(path, ".html") do
Regex.replace(@a_tag_re, content, fn tag ->
case Regex.run(@href_re, tag) do
[_, href] ->
if official_link?(href) do
tag
else
add_rel_nofollow(tag)
end

_ ->
tag
end
end)
else
content
end
end

defp add_rel_nofollow(tag) do
if tag =~ ~r/\srel="/ do
Regex.replace(~r/\srel="([^"]*)"/, tag, fn _, existing ->
if "nofollow" in String.split(existing) do
~s| rel="#{existing}"|
else
~s| rel="#{existing} nofollow"|
end
end)
else
String.replace(tag, "<a ", ~s|<a rel="nofollow" |)
end
end

defp official_link?(href) do
uri = URI.parse(href)

Enum.any?(@official_domains, fn domain ->
uri.host == domain or (uri.host && String.ends_with?(uri.host, "." <> domain))
end)
end
end
48 changes: 47 additions & 1 deletion test/hexdocs/file_rewriter_test.exs
Original file line number Diff line number Diff line change
Expand Up @@ -12,8 +12,54 @@ defmodule Hexdocs.FileRewriterTest do
"index.html",
~s|<a href="https://twitter.com/dignifiedquire" target="_blank" title="@dignifiedquire">Friedel Ziegelmayer</a>|
) ==
~s|<a href="https://twitter.com/dignifiedquire" target="_blank" title="@dignifiedquire">Friedel Ziegelmayer</a> for the <a href="https://elixir-lang.org" title="Elixir" target="_blank">Elixir programming language</a>|
~s|<a rel="nofollow" href="https://twitter.com/dignifiedquire" target="_blank" title="@dignifiedquire">Friedel Ziegelmayer</a> for the <a href="https://elixir-lang.org" title="Elixir" target="_blank">Elixir programming language</a>|

assert FileRewriter.run("index.html", ~s|<meta name="robots" content="noindex">|) == ""
end

describe "add_nofollow" do
test "adds rel=nofollow to external links" do
assert FileRewriter.run("index.html", ~s|<a href="https://example.com">example</a>|) ==
~s|<a rel="nofollow" href="https://example.com">example</a>|
end

test "appends nofollow to existing rel attribute" do
assert FileRewriter.run(
"index.html",
~s|<a href="https://example.com" rel="help">example</a>|
) ==
~s|<a href="https://example.com" rel="help nofollow">example</a>|
end

test "does not duplicate nofollow" do
assert FileRewriter.run(
"index.html",
~s|<a href="https://example.com" rel="nofollow">example</a>|
) ==
~s|<a href="https://example.com" rel="nofollow">example</a>|
end

test "does not add nofollow to official ecosystem links" do
for url <- [
"https://hex.pm/packages/foo",
"https://hexdocs.pm/foo",
"https://elixir-lang.org",
"https://www.erlang.org",
"https://preview.hexdocs.pm/foo"
] do
input = ~s|<a href="#{url}">link</a>|
assert FileRewriter.run("index.html", input) == input
end
end

test "does not add nofollow to relative links" do
input = ~s|<a href="other.html">link</a>|
assert FileRewriter.run("index.html", input) == input
end

test "does not modify non-html files" do
input = ~s|<a href="https://example.com">example</a>|
assert FileRewriter.run("index.js", input) == input
end
end
end