File tree Expand file tree Collapse file tree 3 files changed +42
-33
lines changed
Expand file tree Collapse file tree 3 files changed +42
-33
lines changed Original file line number Diff line number Diff line change @@ -614,7 +614,7 @@ If you need to change the website URL:
614614
615615#### Step 1: Update Critical Configuration
616616- [ ] Update `_config.yml` → `url:` field
617- - [ ] Update `robots.txt` → `Sitemap:` line
617+ - [ ] Verify `robots.txt` → `Sitemap:` line (generated from `{{ site.url }}{{ site.baseurl }}`)
618618- [ ] Update or remove `CNAME` file if using custom domain
619619
620620#### Step 2: Test Locally
Original file line number Diff line number Diff line change @@ -13,18 +13,39 @@ permalink: /robots.txt
1313# - Malicious crawlers may ignore this file
1414# - For GitHub Pages, this provides basic protection
1515
16- # Allow major search engines with rate limiting
16+ # Allow major search engines.
17+ # Note: Googlebot ignores Crawl-delay directives, so we omit it to avoid Search Console warnings.
1718User-agent: Googlebot
18- Crawl-delay: 10
19- Allow: /
19+ Disallow: /images/
20+ Disallow: /assets/
21+ Disallow: /_site/
22+ Disallow: /bin/
23+ Disallow: /CNAME
24+ Disallow: /README.md
25+ Disallow: /DEVELOPMENT.md
26+ Disallow: /.htaccess
2027
2128User-agent: Bingbot
2229Crawl-delay: 10
23- Allow: /
30+ Disallow: /images/
31+ Disallow: /assets/
32+ Disallow: /_site/
33+ Disallow: /bin/
34+ Disallow: /CNAME
35+ Disallow: /README.md
36+ Disallow: /DEVELOPMENT.md
37+ Disallow: /.htaccess
2438
2539User-agent: Slurp
2640Crawl-delay: 10
27- Allow: /
41+ Disallow: /images/
42+ Disallow: /assets/
43+ Disallow: /_site/
44+ Disallow: /bin/
45+ Disallow: /CNAME
46+ Disallow: /README.md
47+ Disallow: /DEVELOPMENT.md
48+ Disallow: /.htaccess
2849
2950# Block aggressive/problematic crawlers
3051User-agent: MJ 12 bot
@@ -64,18 +85,14 @@ Crawl-delay: 10
6485Disallow: /images/
6586Disallow: /assets/
6687Disallow: /_site/
88+ Disallow: /bin/
89+ Disallow: /CNAME
90+ Disallow: /README.md
91+ Disallow: /DEVELOPMENT.md
92+ Disallow: /.htaccess
6793
68- # Allow access to main pages
69- Allow: /$
70- Allow: /allnews
71- Allow: /allnews.html
72- Allow: /team
73- Allow: /publications
74- Allow: /contact
75- Allow: /funding
76- Allow: /gallery
77- Allow: /openings
78- Allow: /sitemap.xml
94+ # Allow access to main pages (everything else is allowed by default)
95+ Allow: /
7996
8097# Sitemap location (helps good crawlers index efficiently)
8198Sitemap: {{ site .url }}{{ site .baseurl }}/sitemap.xml
Original file line number Diff line number Diff line change @@ -4,25 +4,17 @@ permalink: /sitemap.xml
44---
55<?xml version =" 1.0" encoding =" UTF-8" ?>
66<urlset xmlns =" http://www.sitemaps.org/schemas/sitemap/0.9" >
7- {% for page in site.pages %}
8- {% if page.url == nil %}
9- {% continue %}
10- {% endif %}
11-
12- {% if page.exclude_from_sitemap == true %}
13- {% continue %}
14- {% endif %}
15-
16- {% if page.url == "/404.html" or page.url == "/sitemap.xml" or page.url == "/robots.txt" %}
17- {% continue %}
18- {% endif %}
19-
20- {% if page.url contains ".css" or page.url contains ".js" or page.url contains ".xml" or page.url contains ".txt" %}
21- {% continue %}
22- {% endif %}
7+ {% assign pages_list = site.pages | where_exp: "p", "p.url != nil" %}
8+ {% for page in pages_list %}
9+ {% if page.exclude_from_sitemap == true %}{% continue %}{% endif %}
10+ {% if page.url == "/404.html" or page.url == "/sitemap.xml" or page.url == "/robots.txt" %}{% continue %}{% endif %}
11+ {% if page.url contains ".css" or page.url contains ".js" or page.url contains ".xml" or page.url contains ".txt" %}{% continue %}{% endif %}
2312
2413 <url >
2514 <loc >{{ site.url }}{{ site.baseurl }}{{ page.url | replace: "index.html", "" }}</loc >
15+ {% if page.last_modified_at %}
16+ <lastmod >{{ page.last_modified_at | date_to_xmlschema }}</lastmod >
17+ {% endif %}
2618 </url >
2719 {% endfor %}
2820</urlset >
You can’t perform that action at this time.
0 commit comments