Skip to content

Commit ff9b26b

Browse files
feat: add function that converts chemical formula to hill notation
1 parent a51b553 commit ff9b26b

File tree

3 files changed

+101
-0
lines changed

3 files changed

+101
-0
lines changed

news/hill-notation.rst

Lines changed: 23 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,23 @@
1+
**Added:**
2+
3+
* Function that converts a chemical formula to hill notation.
4+
5+
**Changed:**
6+
7+
* <news item>
8+
9+
**Deprecated:**
10+
11+
* <news item>
12+
13+
**Removed:**
14+
15+
* <news item>
16+
17+
**Fixed:**
18+
19+
* <news item>
20+
21+
**Security:**
22+
23+
* <news item>

src/diffpy/utils/tools.py

Lines changed: 56 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,7 @@
11
import importlib.metadata
22
import json
3+
import re
4+
from collections import defaultdict
35
from copy import copy
46
from pathlib import Path
57

@@ -214,6 +216,60 @@ def get_package_info(package_names, metadata=None):
214216
return metadata
215217

216218

219+
def _expand_formula(formula):
220+
"""Expands the formula if it contains parentheses with multipliers."""
221+
while "(" in formula and ")" in formula:
222+
formula = re.sub(
223+
r"\(([A-Za-z0-9]+)\)(\d+)",
224+
lambda m: m.group(1) * int(m.group(2)),
225+
formula,
226+
)
227+
return formula
228+
229+
230+
def to_hill_notation(formula):
231+
"""Converts a chemical formula to Hill notation.
232+
233+
The process is the following:
234+
1. Expand group elements, and parse the expanded formula
235+
into a dictionary of elements and their counts.
236+
e.g., "H2O" -> {"H": 2, "O": 1}.
237+
2. Apply Hill notation:
238+
- Carbon (C) comes first if present.
239+
- Hydrogen (H) follows Carbon (C) if present,
240+
but only if carbon is also present.
241+
- All remaining elements are listed in alphabetical order.
242+
3. Format the elements with their counts, omitting counts of 1.
243+
244+
Parameters
245+
----------
246+
formula : str
247+
The chemical formula of the material.
248+
249+
Returns
250+
-------
251+
str
252+
The formula formatted in Hill notation,
253+
with elements separated by spaces (e.g., "C6 H12 O6").
254+
"""
255+
element_counts = defaultdict(int)
256+
tokens = re.findall(r"([A-Z][a-z]*)(\d*)", _expand_formula(formula))
257+
for element, count in tokens:
258+
element_counts[element] += int(count) if count else 1
259+
260+
hill_parts = []
261+
if "C" in element_counts:
262+
c_count = element_counts.pop("C")
263+
hill_parts.append(f"C{c_count if c_count > 1 else ''}")
264+
if "H" in element_counts:
265+
h_count = element_counts.pop("H")
266+
hill_parts.append(f"H{h_count if h_count > 1 else ''}")
267+
for element in sorted(element_counts):
268+
count = element_counts[element]
269+
hill_parts.append(f"{element}{count if count > 1 else ''}")
270+
return " ".join(hill_parts)
271+
272+
217273
def get_density_from_cloud(sample_composition, mp_token=""):
218274
"""Function to get material density from the MP or COD database.
219275

tests/test_tools.py

Lines changed: 22 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -13,6 +13,7 @@
1313
compute_mud,
1414
get_package_info,
1515
get_user_info,
16+
to_hill_notation,
1617
)
1718

1819

@@ -270,6 +271,27 @@ def test_get_package_info(monkeypatch, inputs, expected):
270271
assert actual_metadata == expected
271272

272273

274+
@pytest.mark.parametrize(
275+
"input_formula, expected",
276+
[
277+
# C1: Formulas with C and/or H
278+
("C", "C"), # Only C
279+
("H", "H"), # Only H
280+
("CO2", "C O2"), # With C
281+
("C6H12O6", "C6 H12 O6"), # With C and H
282+
("CH3COOH", "C2 H4 O2"), # With C and H
283+
("NH3", "H3 N"), # With H only
284+
# C2: Formulas without C or H
285+
("O2", "O2"), # Single element
286+
("FeCl3", "Cl3 Fe"), # Compound
287+
# C3: Parentheses Expansion
288+
("Mg(OH)2", "H2 Mg O2"),
289+
],
290+
)
291+
def test_to_hill_notation(input_formula, expected):
292+
assert to_hill_notation(input_formula) == expected
293+
294+
273295
@pytest.mark.parametrize(
274296
"inputs",
275297
[

0 commit comments

Comments
 (0)