Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 2 additions & 0 deletions .cargo/config.toml
Original file line number Diff line number Diff line change
@@ -0,0 +1,2 @@
[alias]
xtask = "run --package xtask --"
12 changes: 5 additions & 7 deletions .vscode/settings.json
Original file line number Diff line number Diff line change
@@ -1,14 +1,12 @@
{
"editor.formatOnSave": true,
"editor.defaultFormatter": "esbenp.prettier-vscode",
"rust-analyzer.checkOnSave.command": "clippy",
"rust-analyzer.checkOnSave.extraArgs": [
"--all-features",
"--",
"-D",
"clippy::pedantic"
],
"rust-analyzer.check.command": "build",
"rust-analyzer.showSyntaxTree": true,
"[rust]": {
"editor.defaultFormatter": "rust-lang.rust-analyzer"
},
"[sql]": {
"editor.tabSize": 2
}
}
19 changes: 19 additions & 0 deletions crates/xtask/Cargo.toml
Original file line number Diff line number Diff line change
@@ -0,0 +1,19 @@
[package]
name = "xtask"
version = "0.1.0"

authors.workspace = true
edition.workspace = true
license.workspace = true
rust-version.workspace = true

[dependencies]
anyhow.workspace = true
clap.workspace = true
enum-iterator.workspace = true
reqwest = { version = "0.12.9", features = ["blocking", "json"] }
serde.workspace = true
convert_case.workspace = true

[lints]
workspace = true
9 changes: 9 additions & 0 deletions crates/xtask/README.md
Original file line number Diff line number Diff line change
@@ -0,0 +1,9 @@
# xtask

> see <https://github.com/matklad/cargo-xtask>

Run via:

```sh
cargo xtask --help
```
314 changes: 314 additions & 0 deletions crates/xtask/src/generate_keywords.rs
Original file line number Diff line number Diff line change
@@ -0,0 +1,314 @@
use crate::path_util::cwd_to_workspace_root;
use anyhow::{Context, Ok, Result};
use enum_iterator::{all, Sequence};
use std::collections::{HashMap, HashSet};

struct KeywordMeta {
category: KeywordCategory,
label: KeywordLabel,
}

enum KeywordLabel {
As,
Bare,
}

/// related:
/// - [postgres/src/backend/utils/adt/misc.c](https://github.com/postgres/postgres/blob/08691ea958c2646b6aadefff878539eb0b860bb0/src/backend/utils/adt/misc.c#L452-L467/)
/// - [postgres docs: sql keywords appendix](https://www.postgresql.org/docs/17/sql-keywords-appendix.html)
///
/// The header file isn't enough though because `json_scalar` can be a function
/// name, but `between` cannot be
///
/// The Postgres parser special cases certain calls like `json_scalar`:
/// <https://github.com/postgres/postgres/blob/028b4b21df26fee67b3ce75c6f14fcfd3c7cf2ee/src/backend/parser/gram.y#L15684C8-L16145>
///
/// | Category | Column | Table | Function | Type |
/// |--------------|--------|-------|----------|------|
/// | Unreserved | Y | Y | Y | Y |
/// | Reserved | N | N | N | N |
/// | ColName | Y | Y | N | Y |
/// | TypeFuncName | N | N | Y | Y |
///
#[derive(Clone, Copy)]
enum KeywordCategory {
Unreserved,
Reserved,
ColName,
TypeFuncName,
}

#[derive(Sequence, PartialEq)]
enum KWType {
ColumnTable,
Type,
}

impl std::fmt::Display for KWType {
fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
f.write_str(match self {
KWType::ColumnTable => "COLUMN_OR_TABLE_KEYWORDS",
KWType::Type => "TYPE_KEYWORDS",
})
}
}

fn keyword_allowed(cat: KeywordCategory, kw_type: KWType) -> bool {
match cat {
KeywordCategory::Unreserved => match kw_type {
KWType::ColumnTable => true,
KWType::Type => true,
},
KeywordCategory::Reserved => match kw_type {
KWType::ColumnTable => false,
KWType::Type => false,
},
KeywordCategory::ColName => match kw_type {
KWType::ColumnTable => true,
KWType::Type => true,
},
KeywordCategory::TypeFuncName => match kw_type {
KWType::ColumnTable => false,
KWType::Type => true,
},
}
}

pub(crate) fn generate_keywords() -> Result<()> {
let keywords = parse_header()?;

update_syntax_kind(&keywords)
}

fn update_syntax_kind(keywords: &HashMap<String, KeywordMeta>) -> Result<()> {
let path = "crates/parser/src/syntax_kind.rs";

let data = std::fs::read_to_string(path)?;

let mut keys: Vec<_> = keywords.keys().collect();
keys.sort();

let keywords_start = "// keywords";
let keywords_end = "// literals";
let mut in_keywords = false;

let from_kw_start = "pub(crate) fn from_keyword";
let from_kw_end = "} else {";
let mut in_from_keyword = false;
let mut is_first_from_keyword_case = true;

let token_set_start = "// Generated TokenSet start";
let token_set_end = "// Generated TokenSet end";
let mut in_token_sets = false;

let mut allowed_col_table_tokens = HashSet::new();
let mut allowed_type_tokens = HashSet::new();
let mut bare_label_keywords = keywords
.iter()
.filter(|(_key, value)| match value.label {
KeywordLabel::As => false,
KeywordLabel::Bare => true,
})
.map(|(key, _value)| key)
.collect::<Vec<_>>();
bare_label_keywords.sort();

let mut unreserved_keywords = keywords
.iter()
.filter(|(_key, value)| matches!(value.category, KeywordCategory::Unreserved))
.map(|(key, _value)| key)
.collect::<Vec<_>>();
unreserved_keywords.sort();

let mut reserved_keywords = keywords
.iter()
.filter(|(_key, value)| matches!(value.category, KeywordCategory::Reserved))
.map(|(key, _value)| key)
.collect::<Vec<_>>();
reserved_keywords.sort();

let mut all_keywords = keywords.iter().map(|(key, _value)| key).collect::<Vec<_>>();
all_keywords.sort();

for (key, meta) in keywords {
for variant in all::<KWType>() {
match variant {
KWType::ColumnTable => {
if keyword_allowed(meta.category, variant) {
allowed_col_table_tokens.insert(key);
}
}
KWType::Type => {
if keyword_allowed(meta.category, variant) {
allowed_type_tokens.insert(key);
}
}
}
}
}

let mut out = vec![];

for line in data.lines() {
if line.contains(keywords_end) {
for kw in &keys {
// /// `column`
// COLUMN_KW,
let comment = format!(" /// `{}`\n", kw);
let ident = format!(" {},", kw.to_uppercase() + "_KW");
out.push(comment + &ident);
}
out.push("".to_string());

in_keywords = false;
} else if line.contains(from_kw_end) {
let mut keys: Vec<_> = keywords.keys().collect();
keys.sort();
for kw in keys {
// } else if ident.eq_ignore_ascii_case("when") {
// SyntaxKind::WHEN_KW
let cond_op = if is_first_from_keyword_case {
"let kw = if"
} else {
"} else if"
};

let cond = format!(
r#" {} ident.eq_ignore_ascii_case("{}") {{"#,
cond_op, kw
) + "\n";
let ident = format!(" SyntaxKind::{}", kw.to_uppercase() + "_KW");
out.push(cond + &ident);

is_first_from_keyword_case = false;
}

in_from_keyword = false;
} else if line.contains(token_set_end) {
for variant in all::<KWType>() {
out.push(format!(
"pub(crate) const {}: TokenSet = TokenSet::new(&[",
variant
));
let mut tokens = match variant {
KWType::ColumnTable => &allowed_col_table_tokens,
KWType::Type => &allowed_type_tokens,
}
.iter()
.collect::<Vec<_>>();

tokens.sort();

for tk in tokens {
out.push(format!(" SyntaxKind::{},", tk.to_uppercase() + "_KW"));
}
out.push("]);".to_string());
out.push("".to_string());
}

// all keywords
{
out.push("pub(crate) const ALL_KEYWORDS: TokenSet = TokenSet::new(&[".to_string());
let tokens = &all_keywords;
for tk in tokens {
out.push(format!(" SyntaxKind::{},", tk.to_uppercase() + "_KW"));
}
out.push("]);".to_string());
out.push("".to_string());
}

{
out.push(
"pub(crate) const BARE_LABEL_KEYWORDS: TokenSet = TokenSet::new(&[".to_string(),
);
for tk in &bare_label_keywords {
out.push(format!(" SyntaxKind::{},", tk.to_uppercase() + "_KW"));
}
out.push("]);".to_string());
out.push("".to_string());
}

{
out.push(
"pub(crate) const UNRESERVED_KEYWORDS: TokenSet = TokenSet::new(&[".to_string(),
);
let tokens = &unreserved_keywords;
for tk in tokens {
out.push(format!(" SyntaxKind::{},", tk.to_uppercase() + "_KW"));
}
out.push("]);".to_string());
out.push("".to_string());
}

{
out.push(
"pub(crate) const RESERVED_KEYWORDS: TokenSet = TokenSet::new(&[".to_string(),
);
let tokens = &reserved_keywords;
for tk in tokens {
out.push(format!(" SyntaxKind::{},", tk.to_uppercase() + "_KW"));
}
out.push("]);".to_string());
out.push("".to_string());
}

out.push(line.to_string());
}
if !in_keywords && !in_from_keyword && !in_token_sets {
out.push(line.to_string());
}
if line.contains(keywords_start) {
in_keywords = true;
} else if line.contains(from_kw_start) {
in_from_keyword = true;
} else if line.contains(token_set_start) {
in_token_sets = true;
}
}

std::fs::write(path, out.join("\n") + "\n").context("writing to syntax_kind.rs")
}

fn parse_header() -> Result<HashMap<String, KeywordMeta>> {
cwd_to_workspace_root().context("Failed to cwd to root")?;

let data = std::fs::read_to_string("postgres/kwlist.h").context("Failed to read kwlist.h")?;

let mut keywords = HashMap::new();

for line in data.lines() {
if line.starts_with("PG_KEYWORD") {
let line = line
.split(&['(', ')'])
.nth(1)
.context("Invalid kwlist.h structure")?;

let row_items: Vec<&str> = line.split(',').collect();

match row_items[..] {
[name, _value, category, is_bare_label] => {
let label = match is_bare_label.trim() {
"AS_LABEL" => KeywordLabel::As,
"BARE_LABEL" => KeywordLabel::Bare,
unexpected => anyhow::bail!("Unexpected label: {}", unexpected),
};

let category = match category.trim() {
"UNRESERVED_KEYWORD" => KeywordCategory::Unreserved,
"RESERVED_KEYWORD" => KeywordCategory::Reserved,
"COL_NAME_KEYWORD" => KeywordCategory::ColName,
"TYPE_FUNC_NAME_KEYWORD" => KeywordCategory::TypeFuncName,
unexpected => anyhow::bail!("Unexpected category: {}", unexpected),
};

let meta = KeywordMeta { category, label };
let name = name.trim().replace('\"', "");
keywords.insert(name, meta);
}
_ => anyhow::bail!("Problem reading kwlist.h row"),
}
}
}

Ok(keywords)
}
Loading
Loading