-
Notifications
You must be signed in to change notification settings - Fork 0
Expand file tree
/
Copy pathmain.py
More file actions
98 lines (85 loc) · 3.83 KB
/
main.py
File metadata and controls
98 lines (85 loc) · 3.83 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
import argparse
import glob
import json
import os
import sys
from getpass import getpass
import requests
from analyzer.parser import index_python_file, load_meta_from_pyproject, load_setup_cfg, load_requirements, \
extract_group_artifact_version
def parse_cli_meta(meta_kvs):
meta = {}
for kv in meta_kvs:
if '=' not in kv:
print(f"Malformed --meta: {kv}", file=sys.stderr)
continue
k, v = kv.split('=', 1)
meta[k] = v
return meta
if __name__ == "__main__":
parser = argparse.ArgumentParser(description="Python codebase → Solr batch indexer")
parser.add_argument("--solrurl", required=True, help="Solr update URL, e.g. http://localhost:8983/solr/mycore/update/json/docs")
parser.add_argument("--meta", action='append', default=[], help="Meta field key=value to add (repeatable)")
parser.add_argument("--solruser", default=None, help="Solr username for basic auth")
parser.add_argument("--solrpass", default=None, help="Solr password for basic auth (omit to get prompted if user set)")
parser.add_argument("--projectroot", default=".", help="Path to code project root (default: .)")
parser.add_argument("--batchsize", type=int, default=50, help="Batch size for Solr updates (default: 50)")
args = parser.parse_args()
# Parse meta fields from --meta key=val args
cli_meta = parse_cli_meta(args.meta)
# Load pyproject.toml, setup.cfg, requirements.txt
meta_fields = (
load_meta_from_pyproject(os.path.join(args.projectroot, "pyproject.toml"))
or load_setup_cfg(os.path.join(args.projectroot, "setup.cfg"))
or {}
)
# Add from requirements if empty
if "meta_dependency_ss" not in meta_fields or not meta_fields["meta_dependency_ss"]:
deps = load_requirements(os.path.join(args.projectroot, "requirements.txt"))
if deps:
meta_fields["meta_dependency_ss"] = deps
# Add/overwrite with passed meta fields
meta_fields.update(cli_meta)
group_id, artifact_id, version = extract_group_artifact_version(
os.path.join(args.projectroot, "pyproject.toml"),
os.path.join(args.projectroot, "setup.cfg")
)
# Handle Solr basic auth if requested
solr_auth = None
if args.solruser:
if args.solrpass:
solr_auth = (args.solruser, args.solrpass)
else:
solr_auth = (args.solruser, getpass("Solr password: "))
pyfiles = glob.glob(os.path.join(args.projectroot, "**/*.py"), recursive=True)
def send_batch_to_solr_auth(solr_url, docs, auth=None):
headers = {"Content-type": "application/json"}
try:
resp = requests.post(
solr_url, data=json.dumps(docs), headers=headers, auth=auth, #timeout=30
)
except Exception as e:
print(f"Failed to send batch to Solr: {e}")
return
if not resp.ok:
print(f"Failed to send batch to Solr: {resp.text}")
else:
print(f"Batch sent to Solr, response: {resp.text}")
# Commit (auth if needed)
try:
requests.post(solr_url + '?commit=true', auth=auth)
except Exception:
pass
def batch_index_python_files_auth(files, solr_url, meta_fields, group_id, artifact_id, version, project_root, batch_size=50, auth=None):
batch = []
for f in files:
docs = index_python_file(f, meta_fields, group_id, artifact_id, version, project_root)
batch.extend(docs)
if len(batch) >= batch_size:
send_batch_to_solr_auth(solr_url, batch, auth=auth)
batch = []
if batch:
send_batch_to_solr_auth(solr_url, batch, auth=auth)
batch_index_python_files_auth(
pyfiles, args.solrurl, meta_fields, group_id, artifact_id, version, args.projectroot, args.batchsize, solr_auth
)