Skip to content

Commit 722e774

Browse files
begedinjoshsmith
authored andcommitted
Attempt to implement pagination - initial commit
1 parent a2a12db commit 722e774

File tree

5 files changed

+225
-0
lines changed

5 files changed

+225
-0
lines changed

lib/code_corps/github/api/api.ex

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -14,6 +14,9 @@ defmodule CodeCorps.GitHub.API do
1414
|> marshall_response()
1515
end
1616

17+
defdelegate eager_get_all(url, headers, opts), to: CodeCorps.GitHub.EagerAPI
18+
defdelegate lazy_get_all(url, headers, opts), to: CodeCorps.GitHub.StreamAPI
19+
1720
@doc """
1821
Get access token headers for a given `CodeCorps.User` and
1922
`CodeCorps.GithubAppInstallation`.
Lines changed: 92 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,92 @@
1+
defmodule CodeCorps.GitHub.EagerAPI do
2+
@moduledoc """
3+
This module attempts to implement eager loading of a resource, by trying to fetch
4+
all of it's pages in paralel.
5+
6+
This should technically be faster than lazy loading. However, it fails due to
7+
timeout errors, even when loading just two pages.
8+
9+
The assumption is that hackney needs to be configured to allow multiple
10+
requests.
11+
"""
12+
13+
def eager_get_all(url, headers, options) do
14+
{:ok, _status, headers, body} =
15+
:hackney.request(:get, url, headers, "", options)
16+
17+
first_page = Poison.decode!(body)
18+
19+
case headers |> retrieve_total_pages do
20+
1 -> first_page
21+
total -> first_page ++ get_remaining_pages(total, url, headers, options)
22+
end
23+
end
24+
25+
defp get_remaining_pages(total, url, headers, options) do
26+
uri_struct = url |> URI.parse
27+
base_query =
28+
uri_struct
29+
|> Map.get(:query)
30+
|> URI.decode_query
31+
|> Map.put_new("per_page", 100)
32+
33+
page_links =
34+
2..total
35+
|> Enum.to_list
36+
|> Enum.map(fn page ->
37+
uri_struct |> Map.put(:query, base_query |> Map.put("page", page) |> URI.encode_query)
38+
|> URI.to_string
39+
end)
40+
41+
page_links
42+
|> Enum.map(&fetch_page(&1, headers, options))
43+
|> List.flatten
44+
end
45+
46+
defp fetch_page(url, headers, options) do
47+
{:ok, _status, headers, body} =
48+
:hackney.request(:get, url, headers, "", options)
49+
50+
Poison.decode!(body)
51+
end
52+
53+
def retrieve_total_pages(headers) do
54+
case headers |> IO.inspect |> List.keyfind("Link", 0, nil) do
55+
nil -> 1
56+
{"Link", value} -> value |> extract_total_pages
57+
end
58+
end
59+
60+
def extract_total_pages(links_string) do
61+
links = String.split(links_string, ", ")
62+
last_page_url = Enum.map(links, fn link ->
63+
[_, name] = Regex.run(~r{rel="([a-z]+)"}, link)
64+
[_, url] = Regex.run(~r{<([^>]+)>}, link)
65+
66+
{name, url}
67+
end) |> Enum.into(%{}) |> Map.get("last")
68+
69+
last_page =
70+
last_page_url
71+
|> URI.parse
72+
|> Map.get(:query)
73+
|> URI.decode_query
74+
|> Map.get("page")
75+
|> String.to_integer
76+
|> IO.inspect
77+
end
78+
79+
defp process_page({nil, nil}, _, _) do
80+
{:halt, nil}
81+
end
82+
83+
defp process_page({nil, next_page_url}, headers, options) do
84+
next_page_url
85+
|> fetch_page(headers, options)
86+
|> process_page(headers, options)
87+
end
88+
89+
defp process_page({items, next_page_url}, _, _) do
90+
{items, {nil, next_page_url}}
91+
end
92+
end
Lines changed: 54 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,54 @@
1+
defmodule CodeCorps.GitHub.API.Repository do
2+
@moduledoc ~S"""
3+
Functions for working with issues on GitHub.
4+
"""
5+
6+
alias CodeCorps.{
7+
GitHub,
8+
GitHub.API,
9+
GithubAppInstallation,
10+
GithubRepo,
11+
Task,
12+
User
13+
}
14+
15+
@spec issues(GithubRepo.t) :: {:ok, list(map)} | {:error, GitHub.api_error_struct}
16+
def issues(%GithubRepo{github_app_installation: %GithubAppInstallation{} = installation} = github_repo) do
17+
with {:ok, access_token} <- installation |> API.Installation.get_access_token(),
18+
issues <-github_repo |> fetch_issues(access_token) do
19+
{:ok, issues}
20+
else
21+
{:error, error} -> {:error, error}
22+
end
23+
end
24+
25+
defp fetch_issues(%GithubRepo{github_account_login: owner, name: repo}, access_token) do
26+
# stream/lazy
27+
28+
before_operation = Timex.now
29+
30+
results =
31+
"repos/#{owner}/#{repo}/issues?per_page=8"
32+
|> GitHub.lazy_get_all(%{}, [access_token: access_token])
33+
|> Enum.to_list
34+
|> List.flatten
35+
36+
after_operation = Timex.now
37+
38+
IO.puts("Retrieving #{results |> Enum.count} took #{Timex.diff(after_operation, before_operation)} microseconds")
39+
40+
# eager
41+
42+
before_operation = Timex.now
43+
44+
results =
45+
"repos/#{owner}/#{repo}/issues?per_page=8"
46+
|> GitHub.eager_get_all(%{}, [access_token: access_token])
47+
|> Enum.to_list
48+
|> List.flatten
49+
50+
after_operation = Timex.now
51+
52+
IO.puts("Retrieving #{results |> Enum.count} took #{Timex.diff(after_operation, before_operation)} microseconds")
53+
end
54+
end
Lines changed: 60 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,60 @@
1+
defmodule CodeCorps.GitHub.StreamAPI do
2+
@moduledoc """
3+
This module implements retrieval of all pages for a resource in a lazy fashion,
4+
through `Stream.resource`.
5+
6+
That means we load one page at a time and simply concatentate the results.
7+
"""
8+
9+
def lazy_get_all(url, headers, options) do
10+
Stream.resource(
11+
fn -> fetch_page(url, headers, options) end,
12+
&process_page(&1, headers, options),
13+
fn _ -> nil end
14+
)
15+
end
16+
17+
defp fetch_page(url, headers, options) do
18+
{:ok, status, headers, body} =
19+
:hackney.request(:get, url, headers, "", options)
20+
21+
items = Poison.decode!(body)
22+
23+
links_map =
24+
headers
25+
|> List.keyfind("Link", 0, {nil, nil})
26+
|> elem(1)
27+
|> parse_links
28+
{items, links_map["next"]}
29+
end
30+
31+
def parse_links(nil) do
32+
%{}
33+
end
34+
35+
def parse_links(links_string) do
36+
links_string
37+
links = String.split(links_string, ", ")
38+
39+
Enum.map(links, fn link ->
40+
[_, name] = Regex.run(~r{rel="([a-z]+)"}, link)
41+
[_, url] = Regex.run(~r{<([^>]+)>}, link)
42+
43+
{name, url}
44+
end) |> Enum.into(%{})
45+
end
46+
47+
defp process_page({nil, nil}, _, _) do
48+
{:halt, nil}
49+
end
50+
51+
defp process_page({nil, next_page_url}, headers, options) do
52+
next_page_url
53+
|> fetch_page(headers, options)
54+
|> process_page(headers, options)
55+
end
56+
57+
defp process_page({items, next_page_url}, _, _) do
58+
{items, {nil, next_page_url}}
59+
end
60+
end

lib/code_corps/github/github.ex

Lines changed: 16 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -124,6 +124,22 @@ defmodule CodeCorps.GitHub do
124124
end
125125
end
126126

127+
def lazy_get_all(endpoint, headers, options) do
128+
api().lazy_get_all(
129+
api_url_for(endpoint),
130+
headers |> Headers.user_request(options),
131+
options |> add_default_options()
132+
)
133+
end
134+
135+
def eager_get_all(endpoint, headers, options) do
136+
api().eager_get_all(
137+
api_url_for(endpoint),
138+
headers |> Headers.user_request(options),
139+
options |> add_default_options()
140+
)
141+
end
142+
127143
@token_url "https://github.com/login/oauth/access_token"
128144

129145
@doc """

0 commit comments

Comments
 (0)