forrtproject
diff --git a/‎.github/workflows/data-processing.yml‎
Lines changed: 45 additions & 0 deletions b/‎.github/workflows/data-processing.yml‎
Lines changed: 45 additions & 0 deletions
diff --git a/‎.gitignore‎
Lines changed: 3 additions & 0 deletions b/‎.gitignore‎
Lines changed: 3 additions & 0 deletions
diff --git a/‎bibtex_to_apa/bibtex_to_apa.js‎
Lines changed: 91 additions & 0 deletions b/‎bibtex_to_apa/bibtex_to_apa.js‎
Lines changed: 91 additions & 0 deletions
diff --git a/‎bibtex_to_apa/package-lock.json‎
Lines changed: 224 additions & 0 deletions b/‎bibtex_to_apa/package-lock.json‎
Lines changed: 224 additions & 0 deletions
@@ -174,6 +174,31 @@ jobs:
           echo "📁 Moving network graph to static/partials..."
           mv content/contributor-analysis/network-graph.html static/partials/
 
+      #========================================
+      # Setup Node.js for bibliography processing
+      #========================================
+      - name: Setup Node.js
+        uses: actions/setup-node@v4
+        with:
+          node-version: '18'
+          cache: 'npm'
+          cache-dependency-path: bibtex_to_apa/package-lock.json
+      #========================================
+      # Install Node.js dependencies for bibliography processing
+      #========================================
+      - name: Install Node.js dependencies
+        run: |
+          cd bibtex_to_apa
+          npm install
+
+      #========================================
+      # Process contributor data using Tenzing script
+      #========================================
+      - name: Run Tenzing script
+        continue-on-error: true  # Continue even if this step fails
+        run: python3 scripts/forrt_contribs/tenzing.py
+        env:
+          GSHEET_CREDENTIALS: ${{ secrets.GSHEET_CREDENTIALS }}
           # Clean up HTML artifacts from index.md if any
           sed -i.bak -e '/^```{=html}$/d' -e '/^```$/d' content/contributor-analysis/index.md && rm content/contributor-analysis/index.md.bak
 
@@ -215,6 +240,15 @@ jobs:
             fi
           done
 
+      #========================================
+      # Generate APA lookup from bibliography
+      #========================================
+      - name: Generate APA lookup
+        continue-on-error: true  # Continue even if this step fails
+        run: |
+          cd bibtex_to_apa
+          node bibtex_to_apa.js -o '../content/glossary/apa_lookup.json'
+
       #========================================
       # Process and generate glossary files
       #========================================
@@ -225,6 +259,17 @@ jobs:
         run: python3 content/glossary/_create_glossaries.py
         # Execute the glossary script that generates glossary markdown files
 
+      - name: Check for missing references
+        if: always()
+        run: |
+          if [ -f "content/glossary/missing_references.txt" ]; then
+            echo "Missing references found:"
+            cat content/glossary/missing_references.txt
+            # Optionally fail the workflow or create an issue
+          else
+            echo "All references resolved successfully"
+          fi
+
       #========================================
       # Download Google Analytics data and validate
       #========================================
 
@@ -268,3 +268,6 @@ gha-creds-*.json
 
 # Tenzing failure reports (temporary files for CI)
 scripts/forrt_contribs/tenzing_failures.json
+
+# Bibtex to APA converter output
+bibtex_to_apa/node_modules/
@@ -0,0 +1,91 @@
+const { Cite } = require('@citation-js/core');
+require('@citation-js/plugin-bibtex');
+require('@citation-js/plugin-csl');
+const fs = require('fs');
+
+const DEFAULT_INPUT = 'https://docs.google.com/document/d/1-KKsOYZWJ3LdgdO2b2uJsOG2AmUDaQBNqWVVTY2W4W8/edit?tab=t.0';
+const DEFAULT_OUTPUT = 'apa_lookup.json';
+
+async function fetchBibtex(input) {
+  if (!input.startsWith('http')) {
+    return fs.readFileSync(input, 'utf-8');
+  }
+
+  if (input.includes('docs.google.com')) {
+    const match = input.match(/\/d\/([a-zA-Z0-9_-]+)/);
+    if (!match) throw new Error('Invalid Google Doc URL');
+    const exportUrl = `https://docs.google.com/document/d/${match[1]}/export?format=txt`;
+    const response = await fetch(exportUrl);
+    if (!response.ok) throw new Error(`Failed to fetch: ${response.status}`);
+    let text = await response.text();
+    return text.replace(/\[[a-z]+\]/gi, ''); // Remove Google Docs comment markers
+  }
+
+  const response = await fetch(input);
+  if (!response.ok) throw new Error(`Failed to fetch: ${response.status}`);
+  return response.text();
+}
+
+function extractUrl(entry) {
+  if (entry.URL) return entry.URL;
+  if (entry.note) {
+    const match = entry.note.match(/https?:\/\/[^\s]+/);
+    if (match) return match[0];
+  }
+  return null;
+}
+
+function bibtexToApaJson(bibtexContent, includeUrl = true) {
+  const cite = new Cite(bibtexContent);
+  const result = {};
+
+  for (const entry of cite.data) {
+    const key = entry.id || entry['citation-key'];
+    let ref = new Cite(entry).format('bibliography', {
+      format: 'text',
+      template: 'apa',
+      lang: 'en-US'
+    }).trim();
+
+    if (includeUrl) {
+      const url = extractUrl(entry);
+      if (url && !url.includes('doi.org') && !ref.includes(url)) {
+        ref = ref.match(/https?:\/\/[^\s]+$/)
+          ? `${ref} Retrieved from ${url}`
+          : ref.replace(/\.?$/, `. Retrieved from ${url}`);
+      }
+    }
+
+    result[key] = ref;
+  }
+
+  return result;
+}
+
+async function main() {
+  const args = process.argv.slice(2);
+  let input = DEFAULT_INPUT;
+  let output = DEFAULT_OUTPUT;
+  let includeUrl = true;
+
+  for (let i = 0; i < args.length; i++) {
+    if (args[i] === '-i' || args[i] === '--input') input = args[++i];
+    else if (args[i] === '-o' || args[i] === '--output') output = args[++i];
+    else if (args[i] === '--no-url') includeUrl = false;
+    else if (args[i] === '-h' || args[i] === '--help') {
+      console.log(`Usage: node bibtex_to_apa.js [-i INPUT] [-o OUTPUT] [--no-url]
+                  Options:
+                    -i, --input   Input BibTeX (URL or file). Default: Google Doc
+                    -o, --output  Output JSON file. Default: apa_lookup.json
+                    --no-url      Don't append URLs to references`);
+      process.exit(0);
+    }
+  }
+
+  const bibtex = await fetchBibtex(input);
+  const apaJson = bibtexToApaJson(bibtex, includeUrl);
+  fs.writeFileSync(output, JSON.stringify(apaJson, null, 2));
+  console.log(`Wrote ${Object.keys(apaJson).length} references to ${output}`);
+}
+
+main().catch(console.error);