|
15 | 15 | # specific language governing permissions and limitations under the License. |
16 | 16 |
|
17 | 17 | import dataclasses |
| 18 | +import json |
| 19 | +import logging |
| 20 | +import os |
18 | 21 | import re |
19 | 22 | import time |
20 | 23 | from typing import List |
21 | 24 | from urllib.parse import urljoin |
22 | 25 |
|
23 | 26 | import htmllistparse |
| 27 | +from bs4 import BeautifulSoup |
24 | 28 | from packageurl import PackageURL |
25 | 29 | from packageurl.contrib.route import NoRouteAvailable |
26 | 30 | from packageurl.contrib.route import Router |
| 31 | +from univers import versions |
27 | 32 |
|
28 | 33 | from fetchcode.package_util import GITHUB_SOURCE_BY_PACKAGE |
29 | 34 | from fetchcode.package_util import IPKG_RELEASES |
|
33 | 38 | from fetchcode.package_util import MiniupnpPackagesGitHubSource |
34 | 39 | from fetchcode.package_util import OpenSSLGitHubSource |
35 | 40 | from fetchcode.packagedcode_models import Package |
| 41 | +from fetchcode.utils import get_complete_response |
| 42 | +from fetchcode.utils import get_github_rest |
| 43 | +from fetchcode.utils import get_github_rest_no_exception |
| 44 | +from fetchcode.utils import get_hashed_path |
| 45 | +from fetchcode.utils import get_json_response |
36 | 46 | from fetchcode.utils import get_response |
| 47 | +from fetchcode.utils import make_head_request |
37 | 48 |
|
38 | 49 | router = Router() |
39 | 50 |
|
| 51 | +LOG_FILE_LOCATION = os.path.join(os.path.expanduser("~"), "purlcli.log") |
| 52 | +logger = logging.getLogger(__name__) |
| 53 | + |
40 | 54 |
|
41 | 55 | def info(url): |
42 | 56 | """ |
@@ -362,6 +376,290 @@ def get_gnu_data_from_purl(purl): |
362 | 376 | ) |
363 | 377 |
|
364 | 378 |
|
| 379 | +@router.route("pkg:cocoapods/.*") |
| 380 | +def get_cocoapods_data_from_purl(purl): |
| 381 | + logging.basicConfig( |
| 382 | + filename=LOG_FILE_LOCATION, |
| 383 | + level=logging.WARN, |
| 384 | + format="%(levelname)s - %(message)s", |
| 385 | + filemode="w", |
| 386 | + ) |
| 387 | + input_purl = purl |
| 388 | + purl = PackageURL.from_string(purl) |
| 389 | + name = purl.name |
| 390 | + version = purl.version |
| 391 | + cocoapods_org_url = f"https://cocoapods.org/pods/{name}" |
| 392 | + repository_homepage_url = f"https://cocoapods.org/pods/{name}" |
| 393 | + |
| 394 | + # This dictionary helped me monitor the values as I worked on the code. |
| 395 | + # Only 2 key-value pairs are currently used below to define variables: |
| 396 | + # `input_name` and `cocoapods_org_pod_name`. |
| 397 | + |
| 398 | + pod_summary = {} |
| 399 | + pod_summary["input_purl"] = input_purl |
| 400 | + pod_summary["input_name"] = name |
| 401 | + pod_summary["cocoapods_org_url"] = cocoapods_org_url |
| 402 | + pod_summary["repository_homepage_url"] = repository_homepage_url |
| 403 | + pod_summary["no_github_repo"] = None |
| 404 | + pod_summary["gh_repo_four_o_four"] = None |
| 405 | + pod_summary["http_url"] = None |
| 406 | + |
| 407 | + cocoapods_org_url_head_request = make_head_request(cocoapods_org_url) |
| 408 | + cocoapods_org_url_status_code = cocoapods_org_url_head_request.status_code |
| 409 | + pod_summary["cocoapods_org_url_status_code"] = cocoapods_org_url_status_code |
| 410 | + if cocoapods_org_url_status_code == 404: |
| 411 | + logger.error(f"cocoapods_org_url not found for {name}") |
| 412 | + return |
| 413 | + elif cocoapods_org_url_status_code == 302: |
| 414 | + redirect_url = cocoapods_org_url_head_request.headers['Location'] |
| 415 | + redirect_message = f"The cocoapods.org URL {cocoapods_org_url} redirects to {redirect_url}" |
| 416 | + logger.error(redirect_message) |
| 417 | + print(redirect_message) |
| 418 | + return |
| 419 | + |
| 420 | + cocoapods_org_response = get_complete_response(cocoapods_org_url) |
| 421 | + if "Failed to fetch" in cocoapods_org_response: |
| 422 | + logger.error(cocoapods_org_response) |
| 423 | + print(cocoapods_org_response) |
| 424 | + return |
| 425 | + |
| 426 | + soup = BeautifulSoup(cocoapods_org_response.text, "html.parser") |
| 427 | + |
| 428 | + cocoapods_org_gh_repo_owner = None |
| 429 | + cocoapods_org_gh_repo_name = None |
| 430 | + cocoapods_org_gh_repo_url = None |
| 431 | + cocoapods_org_podspec_url = None |
| 432 | + cocoapods_org_pkg_home_url = None |
| 433 | + |
| 434 | + for sidebar_links in (soup.find_all('ul', class_ = "links" )): |
| 435 | + nested_links = sidebar_links.findChildren("a") |
| 436 | + for nested_link in nested_links: |
| 437 | + link_text = nested_link.text |
| 438 | + link_url = nested_link['href'] |
| 439 | + if link_text == 'Homepage': |
| 440 | + cocoapods_org_pkg_home_url = link_url |
| 441 | + elif link_text == 'GitHub Repo': |
| 442 | + split_link = link_url.split('/') |
| 443 | + cocoapods_org_gh_repo_owner = split_link[-2] |
| 444 | + cocoapods_org_gh_repo_name = split_link[-1] |
| 445 | + elif link_text == 'See Podspec': |
| 446 | + cocoapods_org_podspec_url = link_url |
| 447 | + |
| 448 | + if cocoapods_org_gh_repo_owner and cocoapods_org_gh_repo_name: |
| 449 | + cocoapods_org_gh_repo_url = f"https://github.com/{cocoapods_org_gh_repo_owner}/{cocoapods_org_gh_repo_name}" |
| 450 | + cocoapods_org_gh_repo_url_head_request = make_head_request(cocoapods_org_gh_repo_url) |
| 451 | + cocoapods_org_gh_repo_url_status_code = cocoapods_org_gh_repo_url_head_request.status_code |
| 452 | + pod_summary["cocoapods_org_gh_repo_url_status_code"] = cocoapods_org_gh_repo_url_status_code |
| 453 | + if cocoapods_org_gh_repo_url_status_code == 404: |
| 454 | + gh_repo_four_o_four = f"The cocoapods.org GitHub repo url for {name} returns 404" |
| 455 | + logger.error(gh_repo_four_o_four) |
| 456 | + print(gh_repo_four_o_four) |
| 457 | + pod_summary["gh_repo_four_o_four"] = gh_repo_four_o_four |
| 458 | + |
| 459 | + name = cocoapods_org_gh_repo_name |
| 460 | + base_path = "https://api.github.com/repos" |
| 461 | + api_url = f"{base_path}/{cocoapods_org_gh_repo_owner}/{cocoapods_org_gh_repo_name}" |
| 462 | + response = get_github_rest_no_exception(api_url) |
| 463 | + |
| 464 | + if "Failed to fetch" in response: |
| 465 | + logger.error(f"{response}") |
| 466 | + print(f"{response}") |
| 467 | + |
| 468 | + pod_summary["cocoapods_org_gh_repo_owner"] = cocoapods_org_gh_repo_owner |
| 469 | + pod_summary["cocoapods_org_gh_repo_name"] = cocoapods_org_gh_repo_name |
| 470 | + pod_summary["cocoapods_org_gh_repo_url"] = cocoapods_org_gh_repo_url |
| 471 | + pod_summary["cocoapods_org_podspec_url"] = cocoapods_org_podspec_url |
| 472 | + pod_summary["cocoapods_org_pkg_home_url"] = cocoapods_org_pkg_home_url |
| 473 | + |
| 474 | + if cocoapods_org_gh_repo_owner is None or cocoapods_org_gh_repo_name is None: |
| 475 | + no_github_repo = f"No GitHub repo found on cocoapods.org for {name}" |
| 476 | + print(f"{no_github_repo}") |
| 477 | + logger.warning(no_github_repo) |
| 478 | + pod_summary["no_github_repo"] = no_github_repo |
| 479 | + |
| 480 | + if cocoapods_org_podspec_url is None: |
| 481 | + no_podspec = f"No podspec found on cocoapods.org for {name}" |
| 482 | + print(f"{no_podspec}") |
| 483 | + logger.warning(no_podspec) |
| 484 | + pod_summary["no_podspec"] = no_podspec |
| 485 | + |
| 486 | + cocoapods_org_version = None |
| 487 | + if cocoapods_org_podspec_url: |
| 488 | + cocoapods_org_version = cocoapods_org_podspec_url.split("/")[-2] |
| 489 | + |
| 490 | + cocoapods_org_pod_name = None |
| 491 | + head = soup.find("head") |
| 492 | + if head: |
| 493 | + og_title_tag = head.find("meta", property="og:title") |
| 494 | + if og_title_tag: |
| 495 | + og_title = og_title_tag.get("content") |
| 496 | + cocoapods_org_pod_name = og_title |
| 497 | + else: |
| 498 | + no_meta_tag = f"'og:title' meta tag not found in cocoapods.org page for {purl}" |
| 499 | + print(no_meta_tag) |
| 500 | + logger.error(no_meta_tag) |
| 501 | + else: |
| 502 | + no_head_section = f"\n<head> section not found in cocoapods.org page for {purl}" |
| 503 | + print(no_head_section) |
| 504 | + logger.error(no_head_section) |
| 505 | + |
| 506 | + pod_summary["cocoapods_org_pod_name"] = cocoapods_org_pod_name |
| 507 | + |
| 508 | + input_name = pod_summary["input_name"] |
| 509 | + if input_name != cocoapods_org_pod_name: |
| 510 | + name_change = (f"Input PURL name '{input_name}' analyzed as '{cocoapods_org_pod_name}' per {cocoapods_org_url}") |
| 511 | + input_name = cocoapods_org_pod_name |
| 512 | + print(f"{name_change}") |
| 513 | + logger.warn(name_change) |
| 514 | + |
| 515 | + api = "https://cdn.cocoapods.org" |
| 516 | + hashed_path = get_hashed_path(cocoapods_org_pod_name) |
| 517 | + hashed_path_underscore = hashed_path.replace("/", "_") |
| 518 | + file_prefix = "all_pods_versions_" |
| 519 | + spec = f"{api}/{file_prefix}{hashed_path_underscore}.txt" |
| 520 | + data_list = get_cocoapod_tags(spec, cocoapods_org_pod_name) |
| 521 | + |
| 522 | + print(f"\npod_summary = {json.dumps(pod_summary, indent=4, sort_keys=False)}\n") |
| 523 | + |
| 524 | + if not version: |
| 525 | + version = cocoapods_org_version |
| 526 | + |
| 527 | + for tag in data_list: |
| 528 | + if purl.version and tag != purl.version: |
| 529 | + continue |
| 530 | + |
| 531 | + tag_pkg = construct_cocoapods_package(purl, name, hashed_path, repository_homepage_url, cocoapods_org_gh_repo_owner, cocoapods_org_gh_repo_name, tag, pod_summary) |
| 532 | + yield tag_pkg |
| 533 | + |
| 534 | + if purl.version: |
| 535 | + break |
| 536 | + |
| 537 | + |
| 538 | +def get_cocoapod_tags(spec, cocoapods_org_pod_name): |
| 539 | + try: |
| 540 | + response = get_complete_response(spec) |
| 541 | + response.raise_for_status() |
| 542 | + data = response.text.strip() |
| 543 | + for line in data.splitlines(): |
| 544 | + line = line.strip() |
| 545 | + if line.startswith(cocoapods_org_pod_name): |
| 546 | + data_list = line.split("/") |
| 547 | + if data_list[0] == cocoapods_org_pod_name: |
| 548 | + data_list.pop(0) |
| 549 | + sorted_data_list = sorted( |
| 550 | + data_list, |
| 551 | + key=lambda x: versions.SemverVersion(x), |
| 552 | + reverse=True, |
| 553 | + ) |
| 554 | + return sorted_data_list |
| 555 | + return None |
| 556 | + except: |
| 557 | + print(f"Error retrieving data from API") |
| 558 | + return None |
| 559 | + |
| 560 | + |
| 561 | +def construct_cocoapods_package( |
| 562 | + purl, |
| 563 | + name, |
| 564 | + hashed_path, |
| 565 | + repository_homepage_url, |
| 566 | + cocoapods_org_gh_repo_owner, |
| 567 | + cocoapods_org_gh_repo_name, |
| 568 | + tag, |
| 569 | + pod_summary, |
| 570 | +): |
| 571 | + name = name |
| 572 | + homepage_url = None |
| 573 | + vcs_url = None |
| 574 | + github_url = None |
| 575 | + bug_tracking_url = None |
| 576 | + code_view_url = None |
| 577 | + license_data = None |
| 578 | + declared_license = None |
| 579 | + primary_language = None |
| 580 | + |
| 581 | + if cocoapods_org_gh_repo_owner and cocoapods_org_gh_repo_name: |
| 582 | + name = cocoapods_org_gh_repo_name |
| 583 | + namespace = cocoapods_org_gh_repo_owner |
| 584 | + base_path = "https://api.github.com/repos" |
| 585 | + api_url = f"{base_path}/{namespace}/{name}" |
| 586 | + |
| 587 | + response = get_github_rest_no_exception(api_url) |
| 588 | + |
| 589 | + if "Failed to fetch" not in response: |
| 590 | + homepage_url = response.get("homepage") |
| 591 | + vcs_url = response.get("git_url") |
| 592 | + license_data = response.get("license") or {} |
| 593 | + declared_license = license_data.get("spdx_id") |
| 594 | + primary_language = response.get("language") |
| 595 | + |
| 596 | + github_url = "https://github.com" |
| 597 | + bug_tracking_url = f"{github_url}/{namespace}/{name}/issues" |
| 598 | + code_view_url = f"{github_url}/{namespace}/{name}" |
| 599 | + |
| 600 | + corrected_name = pod_summary["cocoapods_org_pod_name"] |
| 601 | + api_url = f"https://raw.githubusercontent.com/CocoaPods/Specs/master/Specs/{hashed_path}/{corrected_name}/{tag}/{corrected_name}.podspec.json" |
| 602 | + |
| 603 | + response = get_json_response(api_url) |
| 604 | + if "Failed to fetch" in response: |
| 605 | + logger.error(f"{response}") |
| 606 | + print(f"{response}") |
| 607 | + return |
| 608 | + |
| 609 | + homepage_url = response.get("homepage") |
| 610 | + |
| 611 | + lic = response.get("license") |
| 612 | + extracted_license_statement = None |
| 613 | + if isinstance(lic, dict): |
| 614 | + extracted_license_statement = lic |
| 615 | + else: |
| 616 | + extracted_license_statement = lic |
| 617 | + if not declared_license: |
| 618 | + declared_license = extracted_license_statement |
| 619 | + |
| 620 | + source = response.get("source") |
| 621 | + vcs_url = None |
| 622 | + download_url = None |
| 623 | + if isinstance(source, dict): |
| 624 | + git_url = source.get("git", "") |
| 625 | + http_url = source.get("http", "") |
| 626 | + if http_url: |
| 627 | + download_url = http_url |
| 628 | + pod_summary["http_url"] = http_url |
| 629 | + if git_url and not http_url: |
| 630 | + vcs_url = git_url |
| 631 | + if git_url.endswith(".git"): |
| 632 | + gh_path = git_url[:-4] |
| 633 | + |
| 634 | + corrected_tag = tag |
| 635 | + if source.get("tag").startswith("v"): |
| 636 | + corrected_tag = source.get("tag") |
| 637 | + download_url = f"{gh_path}/archive/refs/tags/{corrected_tag}.tar.gz" |
| 638 | + else: |
| 639 | + download_url = None |
| 640 | + elif git_url: |
| 641 | + vcs_url = git_url |
| 642 | + elif isinstance(source, str): |
| 643 | + if not vcs_url: |
| 644 | + vcs_url = source |
| 645 | + |
| 646 | + purl_pkg = Package( |
| 647 | + homepage_url=homepage_url, |
| 648 | + api_url=api_url, |
| 649 | + bug_tracking_url=bug_tracking_url, |
| 650 | + code_view_url=code_view_url, |
| 651 | + download_url=download_url, |
| 652 | + declared_license=declared_license, |
| 653 | + primary_language=primary_language, |
| 654 | + repository_homepage_url=repository_homepage_url, |
| 655 | + vcs_url=vcs_url, |
| 656 | + **purl.to_dict(), |
| 657 | + ) |
| 658 | + purl_pkg.version = tag |
| 659 | + |
| 660 | + return purl_pkg |
| 661 | + |
| 662 | + |
365 | 663 | @dataclasses.dataclass |
366 | 664 | class DirectoryListedSource: |
367 | 665 | source_url: str = dataclasses.field( |
|
0 commit comments