Skip to content

Commit c75b355

Browse files
Merge remote-tracking branch 'origin/ks-refactor-glossary-generator' into staging-aggregate-20260216-163655
2 parents 8546046 + a0f1e5b commit c75b355

8 files changed

Lines changed: 1314 additions & 472 deletions

File tree

.github/workflows/data-processing.yml

Lines changed: 45 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -174,6 +174,31 @@ jobs:
174174
echo "📁 Moving network graph to static/partials..."
175175
mv content/contributor-analysis/network-graph.html static/partials/
176176
177+
#========================================
178+
# Setup Node.js for bibliography processing
179+
#========================================
180+
- name: Setup Node.js
181+
uses: actions/setup-node@v4
182+
with:
183+
node-version: '18'
184+
cache: 'npm'
185+
cache-dependency-path: bibtex_to_apa/package-lock.json
186+
#========================================
187+
# Install Node.js dependencies for bibliography processing
188+
#========================================
189+
- name: Install Node.js dependencies
190+
run: |
191+
cd bibtex_to_apa
192+
npm install
193+
194+
#========================================
195+
# Process contributor data using Tenzing script
196+
#========================================
197+
- name: Run Tenzing script
198+
continue-on-error: true # Continue even if this step fails
199+
run: python3 scripts/forrt_contribs/tenzing.py
200+
env:
201+
GSHEET_CREDENTIALS: ${{ secrets.GSHEET_CREDENTIALS }}
177202
# Clean up HTML artifacts from index.md if any
178203
sed -i.bak -e '/^```{=html}$/d' -e '/^```$/d' content/contributor-analysis/index.md && rm content/contributor-analysis/index.md.bak
179204

@@ -215,6 +240,15 @@ jobs:
215240
fi
216241
done
217242
243+
#========================================
244+
# Generate APA lookup from bibliography
245+
#========================================
246+
- name: Generate APA lookup
247+
continue-on-error: true # Continue even if this step fails
248+
run: |
249+
cd bibtex_to_apa
250+
node bibtex_to_apa.js -o '../content/glossary/apa_lookup.json'
251+
218252
#========================================
219253
# Process and generate glossary files
220254
#========================================
@@ -225,6 +259,17 @@ jobs:
225259
run: python3 content/glossary/_create_glossaries.py
226260
# Execute the glossary script that generates glossary markdown files
227261

262+
- name: Check for missing references
263+
if: always()
264+
run: |
265+
if [ -f "content/glossary/missing_references.txt" ]; then
266+
echo "Missing references found:"
267+
cat content/glossary/missing_references.txt
268+
# Optionally fail the workflow or create an issue
269+
else
270+
echo "All references resolved successfully"
271+
fi
272+
228273
#========================================
229274
# Download Google Analytics data and validate
230275
#========================================

.gitignore

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -268,3 +268,6 @@ gha-creds-*.json
268268

269269
# Tenzing failure reports (temporary files for CI)
270270
scripts/forrt_contribs/tenzing_failures.json
271+
272+
# Bibtex to APA converter output
273+
bibtex_to_apa/node_modules/

bibtex_to_apa/bibtex_to_apa.js

Lines changed: 91 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,91 @@
1+
const { Cite } = require('@citation-js/core');
2+
require('@citation-js/plugin-bibtex');
3+
require('@citation-js/plugin-csl');
4+
const fs = require('fs');
5+
6+
const DEFAULT_INPUT = 'https://docs.google.com/document/d/1-KKsOYZWJ3LdgdO2b2uJsOG2AmUDaQBNqWVVTY2W4W8/edit?tab=t.0';
7+
const DEFAULT_OUTPUT = 'apa_lookup.json';
8+
9+
async function fetchBibtex(input) {
10+
if (!input.startsWith('http')) {
11+
return fs.readFileSync(input, 'utf-8');
12+
}
13+
14+
if (input.includes('docs.google.com')) {
15+
const match = input.match(/\/d\/([a-zA-Z0-9_-]+)/);
16+
if (!match) throw new Error('Invalid Google Doc URL');
17+
const exportUrl = `https://docs.google.com/document/d/${match[1]}/export?format=txt`;
18+
const response = await fetch(exportUrl);
19+
if (!response.ok) throw new Error(`Failed to fetch: ${response.status}`);
20+
let text = await response.text();
21+
return text.replace(/\[[a-z]+\]/gi, ''); // Remove Google Docs comment markers
22+
}
23+
24+
const response = await fetch(input);
25+
if (!response.ok) throw new Error(`Failed to fetch: ${response.status}`);
26+
return response.text();
27+
}
28+
29+
function extractUrl(entry) {
30+
if (entry.URL) return entry.URL;
31+
if (entry.note) {
32+
const match = entry.note.match(/https?:\/\/[^\s]+/);
33+
if (match) return match[0];
34+
}
35+
return null;
36+
}
37+
38+
function bibtexToApaJson(bibtexContent, includeUrl = true) {
39+
const cite = new Cite(bibtexContent);
40+
const result = {};
41+
42+
for (const entry of cite.data) {
43+
const key = entry.id || entry['citation-key'];
44+
let ref = new Cite(entry).format('bibliography', {
45+
format: 'text',
46+
template: 'apa',
47+
lang: 'en-US'
48+
}).trim();
49+
50+
if (includeUrl) {
51+
const url = extractUrl(entry);
52+
if (url && !url.includes('doi.org') && !ref.includes(url)) {
53+
ref = ref.match(/https?:\/\/[^\s]+$/)
54+
? `${ref} Retrieved from ${url}`
55+
: ref.replace(/\.?$/, `. Retrieved from ${url}`);
56+
}
57+
}
58+
59+
result[key] = ref;
60+
}
61+
62+
return result;
63+
}
64+
65+
async function main() {
66+
const args = process.argv.slice(2);
67+
let input = DEFAULT_INPUT;
68+
let output = DEFAULT_OUTPUT;
69+
let includeUrl = true;
70+
71+
for (let i = 0; i < args.length; i++) {
72+
if (args[i] === '-i' || args[i] === '--input') input = args[++i];
73+
else if (args[i] === '-o' || args[i] === '--output') output = args[++i];
74+
else if (args[i] === '--no-url') includeUrl = false;
75+
else if (args[i] === '-h' || args[i] === '--help') {
76+
console.log(`Usage: node bibtex_to_apa.js [-i INPUT] [-o OUTPUT] [--no-url]
77+
Options:
78+
-i, --input Input BibTeX (URL or file). Default: Google Doc
79+
-o, --output Output JSON file. Default: apa_lookup.json
80+
--no-url Don't append URLs to references`);
81+
process.exit(0);
82+
}
83+
}
84+
85+
const bibtex = await fetchBibtex(input);
86+
const apaJson = bibtexToApaJson(bibtex, includeUrl);
87+
fs.writeFileSync(output, JSON.stringify(apaJson, null, 2));
88+
console.log(`Wrote ${Object.keys(apaJson).length} references to ${output}`);
89+
}
90+
91+
main().catch(console.error);

bibtex_to_apa/package-lock.json

Lines changed: 224 additions & 0 deletions
Some generated files are not rendered by default. Learn more about customizing how changed files appear on GitHub.

0 commit comments

Comments
 (0)