-
Notifications
You must be signed in to change notification settings - Fork 0
Expand file tree
/google
Copy pathMore file actions
executable file
·63 lines (53 loc) · 2.14 KB
/google
File metadata and controls
executable file
·63 lines (53 loc) · 2.14 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
#!/usr/bin/python
import requests
import sys
import getopt
def main():
term = "+".join(sys.argv[1:])
results = google_search(term)
for r in results:
print 'Title: %s\nLink: %s\nDesc: %s\n' % r
def google_search(term):
url = "http://www.google.com/search?hl=en&safe=off&q=%s" % term
results_page = get_webpage(url)
results_list = extract_results(results_page)
return results_list
def get_webpage(url):
user_agent = 'Mozilla/5.0 (Windows; U; Windows NT 5.1; en-US; rv:1.9.0.7) Gecko/2009021910 Firefox/3.0.7'
headers = {'User-Agent': user_agent,}
r = requests.get(url, headers=headers)
return r.content
def extract_results(results_page):
start_tag = '<li class="g">'
end_tag = '</div></li>'
indices = get_index_list(start_tag, results_page, [])
results = []
for i in indices:
snippet_end = results_page[i:].find(end_tag)
snippet = results_page[i:i + snippet_end]
link = pull_from_snippet(snippet, '<a href="/url?q=', '&')
title = pull_from_snippet(pull_from_snippet(snippet, '<a href="/url?q='), '>', '</a>')
formatted_title = title.replace('<b>', '\033[1m').replace('</b>', '\033[0m')
desc = pull_from_snippet(snippet, '<span class="st">', '</span>')
formatted_desc = desc.replace('<b>', '\033[1m').replace('</b>', '\033[0m')
results.append((formatted_title, link, formatted_desc, ))
return results
def pull_from_snippet(snippet, start_tag, end_tag=None):
if not start_tag in snippet:
return snippet
start_index = snippet.find(start_tag) + len(start_tag)
if end_tag and end_tag in snippet:
end_index = snippet[start_index:].find(end_tag) + start_index
else:
end_index = len(snippet) - 1
return snippet[start_index:end_index]
def get_index_list(tag, content, indices):
if tag in content:
index = content.find(tag)
previous_index = indices[len(indices) - 1] + len(tag) if indices else 0
indices.append(index + previous_index)
return get_index_list(tag, content[index + len(tag):], indices)
else:
return indices
if __name__ == "__main__":
main()