Skip to content

Commit 3768d2e

Browse files
committed
Add cocoapods support to package.py #116
Reference: #116 Signed-off-by: John M. Horan <johnmhoran@gmail.com>
1 parent ff0bad3 commit 3768d2e

File tree

5 files changed

+432
-37
lines changed

5 files changed

+432
-37
lines changed

requirements.txt

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -73,6 +73,7 @@ text-unidecode==1.3
7373
toml==0.10.2
7474
typecode==30.0.0
7575
typecode-libmagic==5.39.210531
76+
univers==30.11.0
7677
urllib3==1.26.9
7778
urlpy==0.5
7879
wcwidth==0.2.5

setup.cfg

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -59,6 +59,7 @@ install_requires =
5959
requests
6060
python-dateutil
6161
python-dotenv
62+
univers == 30.11.0
6263

6364

6465
[options.packages.find]
@@ -80,4 +81,3 @@ docs =
8081
sphinx-rtd-theme>=1.0.0
8182
sphinx-reredirects >= 0.1.2
8283
doc8>=0.11.2
83-

src/fetchcode/package.py

Lines changed: 298 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -15,15 +15,20 @@
1515
# specific language governing permissions and limitations under the License.
1616

1717
import dataclasses
18+
import json
19+
import logging
20+
import os
1821
import re
1922
import time
2023
from typing import List
2124
from urllib.parse import urljoin
2225

2326
import htmllistparse
27+
from bs4 import BeautifulSoup
2428
from packageurl import PackageURL
2529
from packageurl.contrib.route import NoRouteAvailable
2630
from packageurl.contrib.route import Router
31+
from univers import versions
2732

2833
from fetchcode.package_util import GITHUB_SOURCE_BY_PACKAGE
2934
from fetchcode.package_util import IPKG_RELEASES
@@ -33,10 +38,19 @@
3338
from fetchcode.package_util import MiniupnpPackagesGitHubSource
3439
from fetchcode.package_util import OpenSSLGitHubSource
3540
from fetchcode.packagedcode_models import Package
41+
from fetchcode.utils import get_complete_response
42+
from fetchcode.utils import get_github_rest
43+
from fetchcode.utils import get_github_rest_no_exception
44+
from fetchcode.utils import get_hashed_path
45+
from fetchcode.utils import get_json_response
3646
from fetchcode.utils import get_response
47+
from fetchcode.utils import make_head_request
3748

3849
router = Router()
3950

51+
LOG_FILE_LOCATION = os.path.join(os.path.expanduser("~"), "purlcli.log")
52+
logger = logging.getLogger(__name__)
53+
4054

4155
def info(url):
4256
"""
@@ -362,6 +376,290 @@ def get_gnu_data_from_purl(purl):
362376
)
363377

364378

379+
@router.route("pkg:cocoapods/.*")
380+
def get_cocoapods_data_from_purl(purl):
381+
logging.basicConfig(
382+
filename=LOG_FILE_LOCATION,
383+
level=logging.WARN,
384+
format="%(levelname)s - %(message)s",
385+
filemode="w",
386+
)
387+
input_purl = purl
388+
purl = PackageURL.from_string(purl)
389+
name = purl.name
390+
version = purl.version
391+
cocoapods_org_url = f"https://cocoapods.org/pods/{name}"
392+
repository_homepage_url = f"https://cocoapods.org/pods/{name}"
393+
394+
# This dictionary helped me monitor the values as I worked on the code.
395+
# Only 2 key-value pairs are currently used below to define variables:
396+
# `input_name` and `cocoapods_org_pod_name`.
397+
398+
pod_summary = {}
399+
pod_summary["input_purl"] = input_purl
400+
pod_summary["input_name"] = name
401+
pod_summary["cocoapods_org_url"] = cocoapods_org_url
402+
pod_summary["repository_homepage_url"] = repository_homepage_url
403+
pod_summary["no_github_repo"] = None
404+
pod_summary["gh_repo_four_o_four"] = None
405+
pod_summary["http_url"] = None
406+
407+
cocoapods_org_url_head_request = make_head_request(cocoapods_org_url)
408+
cocoapods_org_url_status_code = cocoapods_org_url_head_request.status_code
409+
pod_summary["cocoapods_org_url_status_code"] = cocoapods_org_url_status_code
410+
if cocoapods_org_url_status_code == 404:
411+
logger.error(f"cocoapods_org_url not found for {name}")
412+
return
413+
elif cocoapods_org_url_status_code == 302:
414+
redirect_url = cocoapods_org_url_head_request.headers['Location']
415+
redirect_message = f"The cocoapods.org URL {cocoapods_org_url} redirects to {redirect_url}"
416+
logger.error(redirect_message)
417+
print(redirect_message)
418+
return
419+
420+
cocoapods_org_response = get_complete_response(cocoapods_org_url)
421+
if "Failed to fetch" in cocoapods_org_response:
422+
logger.error(cocoapods_org_response)
423+
print(cocoapods_org_response)
424+
return
425+
426+
soup = BeautifulSoup(cocoapods_org_response.text, "html.parser")
427+
428+
cocoapods_org_gh_repo_owner = None
429+
cocoapods_org_gh_repo_name = None
430+
cocoapods_org_gh_repo_url = None
431+
cocoapods_org_podspec_url = None
432+
cocoapods_org_pkg_home_url = None
433+
434+
for sidebar_links in (soup.find_all('ul', class_ = "links" )):
435+
nested_links = sidebar_links.findChildren("a")
436+
for nested_link in nested_links:
437+
link_text = nested_link.text
438+
link_url = nested_link['href']
439+
if link_text == 'Homepage':
440+
cocoapods_org_pkg_home_url = link_url
441+
elif link_text == 'GitHub Repo':
442+
split_link = link_url.split('/')
443+
cocoapods_org_gh_repo_owner = split_link[-2]
444+
cocoapods_org_gh_repo_name = split_link[-1]
445+
elif link_text == 'See Podspec':
446+
cocoapods_org_podspec_url = link_url
447+
448+
if cocoapods_org_gh_repo_owner and cocoapods_org_gh_repo_name:
449+
cocoapods_org_gh_repo_url = f"https://github.com/{cocoapods_org_gh_repo_owner}/{cocoapods_org_gh_repo_name}"
450+
cocoapods_org_gh_repo_url_head_request = make_head_request(cocoapods_org_gh_repo_url)
451+
cocoapods_org_gh_repo_url_status_code = cocoapods_org_gh_repo_url_head_request.status_code
452+
pod_summary["cocoapods_org_gh_repo_url_status_code"] = cocoapods_org_gh_repo_url_status_code
453+
if cocoapods_org_gh_repo_url_status_code == 404:
454+
gh_repo_four_o_four = f"The cocoapods.org GitHub repo url for {name} returns 404"
455+
logger.error(gh_repo_four_o_four)
456+
print(gh_repo_four_o_four)
457+
pod_summary["gh_repo_four_o_four"] = gh_repo_four_o_four
458+
459+
name = cocoapods_org_gh_repo_name
460+
base_path = "https://api.github.com/repos"
461+
api_url = f"{base_path}/{cocoapods_org_gh_repo_owner}/{cocoapods_org_gh_repo_name}"
462+
response = get_github_rest_no_exception(api_url)
463+
464+
if "Failed to fetch" in response:
465+
logger.error(f"{response}")
466+
print(f"{response}")
467+
468+
pod_summary["cocoapods_org_gh_repo_owner"] = cocoapods_org_gh_repo_owner
469+
pod_summary["cocoapods_org_gh_repo_name"] = cocoapods_org_gh_repo_name
470+
pod_summary["cocoapods_org_gh_repo_url"] = cocoapods_org_gh_repo_url
471+
pod_summary["cocoapods_org_podspec_url"] = cocoapods_org_podspec_url
472+
pod_summary["cocoapods_org_pkg_home_url"] = cocoapods_org_pkg_home_url
473+
474+
if cocoapods_org_gh_repo_owner is None or cocoapods_org_gh_repo_name is None:
475+
no_github_repo = f"No GitHub repo found on cocoapods.org for {name}"
476+
print(f"{no_github_repo}")
477+
logger.warning(no_github_repo)
478+
pod_summary["no_github_repo"] = no_github_repo
479+
480+
if cocoapods_org_podspec_url is None:
481+
no_podspec = f"No podspec found on cocoapods.org for {name}"
482+
print(f"{no_podspec}")
483+
logger.warning(no_podspec)
484+
pod_summary["no_podspec"] = no_podspec
485+
486+
cocoapods_org_version = None
487+
if cocoapods_org_podspec_url:
488+
cocoapods_org_version = cocoapods_org_podspec_url.split("/")[-2]
489+
490+
cocoapods_org_pod_name = None
491+
head = soup.find("head")
492+
if head:
493+
og_title_tag = head.find("meta", property="og:title")
494+
if og_title_tag:
495+
og_title = og_title_tag.get("content")
496+
cocoapods_org_pod_name = og_title
497+
else:
498+
no_meta_tag = f"'og:title' meta tag not found in cocoapods.org page for {purl}"
499+
print(no_meta_tag)
500+
logger.error(no_meta_tag)
501+
else:
502+
no_head_section = f"\n<head> section not found in cocoapods.org page for {purl}"
503+
print(no_head_section)
504+
logger.error(no_head_section)
505+
506+
pod_summary["cocoapods_org_pod_name"] = cocoapods_org_pod_name
507+
508+
input_name = pod_summary["input_name"]
509+
if input_name != cocoapods_org_pod_name:
510+
name_change = (f"Input PURL name '{input_name}' analyzed as '{cocoapods_org_pod_name}' per {cocoapods_org_url}")
511+
input_name = cocoapods_org_pod_name
512+
print(f"{name_change}")
513+
logger.warn(name_change)
514+
515+
api = "https://cdn.cocoapods.org"
516+
hashed_path = get_hashed_path(cocoapods_org_pod_name)
517+
hashed_path_underscore = hashed_path.replace("/", "_")
518+
file_prefix = "all_pods_versions_"
519+
spec = f"{api}/{file_prefix}{hashed_path_underscore}.txt"
520+
data_list = get_cocoapod_tags(spec, cocoapods_org_pod_name)
521+
522+
print(f"\npod_summary = {json.dumps(pod_summary, indent=4, sort_keys=False)}\n")
523+
524+
if not version:
525+
version = cocoapods_org_version
526+
527+
for tag in data_list:
528+
if purl.version and tag != purl.version:
529+
continue
530+
531+
tag_pkg = construct_cocoapods_package(purl, name, hashed_path, repository_homepage_url, cocoapods_org_gh_repo_owner, cocoapods_org_gh_repo_name, tag, pod_summary)
532+
yield tag_pkg
533+
534+
if purl.version:
535+
break
536+
537+
538+
def get_cocoapod_tags(spec, cocoapods_org_pod_name):
539+
try:
540+
response = get_complete_response(spec)
541+
response.raise_for_status()
542+
data = response.text.strip()
543+
for line in data.splitlines():
544+
line = line.strip()
545+
if line.startswith(cocoapods_org_pod_name):
546+
data_list = line.split("/")
547+
if data_list[0] == cocoapods_org_pod_name:
548+
data_list.pop(0)
549+
sorted_data_list = sorted(
550+
data_list,
551+
key=lambda x: versions.SemverVersion(x),
552+
reverse=True,
553+
)
554+
return sorted_data_list
555+
return None
556+
except:
557+
print(f"Error retrieving data from API")
558+
return None
559+
560+
561+
def construct_cocoapods_package(
562+
purl,
563+
name,
564+
hashed_path,
565+
repository_homepage_url,
566+
cocoapods_org_gh_repo_owner,
567+
cocoapods_org_gh_repo_name,
568+
tag,
569+
pod_summary,
570+
):
571+
name = name
572+
homepage_url = None
573+
vcs_url = None
574+
github_url = None
575+
bug_tracking_url = None
576+
code_view_url = None
577+
license_data = None
578+
declared_license = None
579+
primary_language = None
580+
581+
if cocoapods_org_gh_repo_owner and cocoapods_org_gh_repo_name:
582+
name = cocoapods_org_gh_repo_name
583+
namespace = cocoapods_org_gh_repo_owner
584+
base_path = "https://api.github.com/repos"
585+
api_url = f"{base_path}/{namespace}/{name}"
586+
587+
response = get_github_rest_no_exception(api_url)
588+
589+
if "Failed to fetch" not in response:
590+
homepage_url = response.get("homepage")
591+
vcs_url = response.get("git_url")
592+
license_data = response.get("license") or {}
593+
declared_license = license_data.get("spdx_id")
594+
primary_language = response.get("language")
595+
596+
github_url = "https://github.com"
597+
bug_tracking_url = f"{github_url}/{namespace}/{name}/issues"
598+
code_view_url = f"{github_url}/{namespace}/{name}"
599+
600+
corrected_name = pod_summary["cocoapods_org_pod_name"]
601+
api_url = f"https://raw.githubusercontent.com/CocoaPods/Specs/master/Specs/{hashed_path}/{corrected_name}/{tag}/{corrected_name}.podspec.json"
602+
603+
response = get_json_response(api_url)
604+
if "Failed to fetch" in response:
605+
logger.error(f"{response}")
606+
print(f"{response}")
607+
return
608+
609+
homepage_url = response.get("homepage")
610+
611+
lic = response.get("license")
612+
extracted_license_statement = None
613+
if isinstance(lic, dict):
614+
extracted_license_statement = lic
615+
else:
616+
extracted_license_statement = lic
617+
if not declared_license:
618+
declared_license = extracted_license_statement
619+
620+
source = response.get("source")
621+
vcs_url = None
622+
download_url = None
623+
if isinstance(source, dict):
624+
git_url = source.get("git", "")
625+
http_url = source.get("http", "")
626+
if http_url:
627+
download_url = http_url
628+
pod_summary["http_url"] = http_url
629+
if git_url and not http_url:
630+
vcs_url = git_url
631+
if git_url.endswith(".git"):
632+
gh_path = git_url[:-4]
633+
634+
corrected_tag = tag
635+
if source.get("tag").startswith("v"):
636+
corrected_tag = source.get("tag")
637+
download_url = f"{gh_path}/archive/refs/tags/{corrected_tag}.tar.gz"
638+
else:
639+
download_url = None
640+
elif git_url:
641+
vcs_url = git_url
642+
elif isinstance(source, str):
643+
if not vcs_url:
644+
vcs_url = source
645+
646+
purl_pkg = Package(
647+
homepage_url=homepage_url,
648+
api_url=api_url,
649+
bug_tracking_url=bug_tracking_url,
650+
code_view_url=code_view_url,
651+
download_url=download_url,
652+
declared_license=declared_license,
653+
primary_language=primary_language,
654+
repository_homepage_url=repository_homepage_url,
655+
vcs_url=vcs_url,
656+
**purl.to_dict(),
657+
)
658+
purl_pkg.version = tag
659+
660+
return purl_pkg
661+
662+
365663
@dataclasses.dataclass
366664
class DirectoryListedSource:
367665
source_url: str = dataclasses.field(

0 commit comments

Comments
 (0)