Skip to content
Draft
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
4 changes: 4 additions & 0 deletions matbench_discovery/enums.py
Original file line number Diff line number Diff line change
Expand Up @@ -335,6 +335,10 @@ class Model(Files, base_dir=f"{ROOT}/models"):
mace_mp_0 = auto(), "mace/mace-mp-0.yml"
mace_mpa_0 = auto(), "mace/mace-mpa-0.yml" # trained on MPtrj and Alexandria

# MACE-Osaka24-large as published in https://arxiv.org/abs/2412.13088 trained on MPtrj and OFF23 (extended SPICE)
mace_osaka24_large = auto(), "mace/mace-osaka24-large.yml" # MPtrj/OFF23 data is split into train: 2,324,357, val: 129,070, test: 128,168


# MatRIS-v0.5.0-MPtrj
matris_v050_mptrj = auto(), "matris/matris-v050-mptrj.yml"

Expand Down
117 changes: 117 additions & 0 deletions models/mace/convert_json_to_2024_format_mace_osaka24_large.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,117 @@
import json
import gzip
import os

# --- Execution Settings ---
# Specify input filenames here (supports both .gz and .json)
ref_filename = '../../data/phonons/2024-11-09-kappas-phononDB-PBE-noNAC.json.gz' # Reference file
model_name = 'mace-osaka24-large'
json_name = 'force-sets.json.gz'
target_filename = f'./{model_name}/2025-11-21-kappa-103-FIRE-dist=0.01-fmax=0.0001-symprec=1e-05/{json_name}' # Target file
output_filename = f'./{model_name}/2025-11-21-kappa-103-FIRE-dist=0.01-fmax=0.0001-symprec=1e-05/reformated_{json_name}'

def load_json_smart(filepath):
"""
Function to load JSON files.
Automatically handles gzip decompression if the file ends with .gz.
"""
print(f"Loading: {filepath}")
try:
if filepath.endswith('.gz'):
# Open with gzip module in text mode (rt) if extension is .gz
with gzip.open(filepath, 'rt', encoding='utf-8') as f:
return json.load(f)
else:
# Open normally for standard .json files
with open(filepath, 'r', encoding='utf-8') as f:
return json.load(f)
except FileNotFoundError:
print(f"Error: File {filepath} not found.")
return None

def align_sort_order_gz_input(ref_file, target_file, output_file):
print("Starting process...")

# 1. Load the reference file (2024 version)
# We read this to establish the "correct sort order" of mp_ids.
ref_data = load_json_smart(ref_file)
if ref_data is None: return

# 2. Load the target file (2025 version)
target_data = load_json_smart(target_file)
if target_data is None: return

# --- Establish Sorting Criteria ---
# Retrieve the mp_id dictionary from the reference file.
ref_mp_ids_dict = ref_data.get('mp_id', {})

# Create a list of mp_ids sorted by their index (0, 1, 2...) to ensure correct order.
sorted_indices = sorted(ref_mp_ids_dict.keys(), key=lambda x: int(x))
ordered_mp_ids = [ref_mp_ids_dict[idx] for idx in sorted_indices]

print(f"Number of materials in reference file: {len(ordered_mp_ids)}")

# --- Prepare Target Data Lookup ---
# Create a reverse lookup dictionary for the target file: {material_id: index}
target_mp_ids_dict = target_data.get('material_id', {})
mpid_to_target_idx = {v: k for k, v in target_mp_ids_dict.items()}

# --- Construct Output Data ---
data_output = {}

# List of keys to keep in the output file
keys_to_keep = [
'ph_freqs', 'q_points',
'kappa_tot_rta', 'mode_kappa_tot_rta', 'kappa_p_rta', 'kappa_c', 'mode_weights',
'max_stress', 'reached_max_steps', 'broken_symmetry', 'has_imag_ph_modes'
]

# Initialize dictionary for mp_id
data_output['mp_id'] = {}
# Initialize dictionaries for other keys if they exist in the target data
for key in keys_to_keep:
if key in target_data:
data_output[key] = {}

print("Aligning and sorting data...")
missing_count = 0

# Iterate through the reference order (0, 1, 2...) and populate the new data
for new_idx_int, mpid in enumerate(ordered_mp_ids):
new_idx_str = str(new_idx_int) # New index "0", "1"...

# Check if this mp_id exists in the target file (2025)
if mpid in mpid_to_target_idx:
old_idx_str = mpid_to_target_idx[mpid] # Original index in target file

# 1. Save mp_id
data_output['mp_id'][new_idx_str] = mpid

# 2. Copy other data fields
for key in keys_to_keep:
if key in target_data and old_idx_str in target_data[key]:
data_output[key][new_idx_str] = target_data[key][old_idx_str]
else:
missing_count += 1
# Skip if ID is not found (missing data)

# --- Report Results ---
if missing_count > 0:
print(f"Warning: {missing_count} materials were not found in the target file.")
else:
print("All IDs matched successfully.")

# --- Save Output (.gz) ---
if not output_file.endswith('.gz'):
output_file += '.gz'

print(f"Saving to: {output_file}")
with gzip.open(output_file, 'wt', encoding='utf-8') as f:
json.dump(data_output, f, indent=None, separators=(',', ':'))

print("Completed. The file is now aligned with the 2024 version.")

# Execution block
if __name__ == "__main__":
align_sort_order_gz_input(ref_filename, target_filename, output_filename)
pass
172 changes: 172 additions & 0 deletions models/mace/mace-osaka24-large.yml
Original file line number Diff line number Diff line change
@@ -0,0 +1,172 @@
model_name: mace-osaka24-large
model_key: mace-osaka24-large
model_version: v0.3.6
date_added: '2025-12-23'
date_published: '2024-12-18'
authors:
- name: Tomoya Shiota
affiliation: Center for Quantum Information and Quantum Biology, The University of Osaka
email: shiota.tomoya.ss@gmail.com
orcid: https://orcid.org/0009-0002-7044-4558
- name: Kenji Ishihara
affiliation: Center for Quantum Information and Quantum Biology, The University of Osaka
email: ishiharak25@chem.sci.osaka-u.ac.jp
orcid: https://orcid.org/0009-0004-5404-521X
- name: Tuan Minh Do
affiliation: Center for Quantum Information and Quantum Biology, The University of Osaka
email: do.tuan.minh.qiqb@osaka-u.ac.jp
orcid: https://orcid.org/0009-0006-2454-4899
- name: Toshio Mori
affiliation: Center for Quantum Information and Quantum Biology, The University of Osaka
email: t.mori.qiqb@osaka-u.ac.jp
orcid: https://orcid.org/0009-0004-0149-1224
- name: Wataru Mizukami
affiliation: Center for Quantum Information and Quantum Biology, The University of Osaka
email: mizukami.wataru.qiqb@osaka-u.ac.jp
orcid: https://orcid.org/0000-0003-3538-3542
trained_by:
- name: Tomoya Shiota
affiliation: Center for Quantum Information and Quantum Biology, The University of Osaka
email: shiota.tomoya.ss@gmail.com
orcid: https://orcid.org/0009-0002-7044-4558
- name: Kenji Ishihara
affiliation: Center for Quantum Information and Quantum Biology, The University of Osaka
email: ishiharak25@chem.sci.osaka-u.ac.jp
orcid: https://orcid.org/0009-0004-5404-521X
- name: Toshio Mori
affiliation: Center for Quantum Information and Quantum Biology, The University of Osaka
email: t.mori.qiqb@osaka-u.ac.jp
orcid: https://orcid.org/0009-0004-0149-1224

repo: https://github.com/ACEsuit/mace
doi: https://doi.org/10.48550/arXiv.2412.13088
paper: https://arxiv.org/pdf/2412.13088v2
pypi: https://pypi.org/project/mace-torch
pr_url: ## I will add after pr.
checkpoint_url: https://github.com/qiqb-osaka/mace-osaka24/releases/tag/v0.0.1/

license:
code: MIT
code_url: https://github.com/ACEsuit/mace/blob/b0fa4ef7c/LICENSE.md
checkpoint: MIT
checkpoint_url: https://github.com/qiqb-osaka/mace-osaka24/blob/main/LICENSE

requirements:
mace-torch: 0.3.6
torch: 2.2.0
ase: 3.23.0
numpy: 1.26.4

openness: OSOD
trained_for_benchmark: false
train_task: S2EFS
test_task: IS2RE-SR
targets: EFS_G
model_type: UIP
model_params: 5_725_072
n_estimators: 1

training_set: [MPtrj, Extended-SPICE(with Total Energy Alignment)]
training_cost: missing

hyperparams:
max_force: 0.05
max_steps: 500
ase_optimizer: FIRE
cell_filter: FrechetCellFilter
graph_construction_radius: 4.5 # Å
max_neighbors: .inf

notes:
Description: |
MACE is a higher-order equivariant message-passing neural network for fast and accurate force fields.
Training: MACE-Osaka24 model is the first MLIP model trained on a unified dataset covering molecular and crystalline systems, utilizing the MACE architecture developed by Batatia et al..


metrics:
phonons:
kappa_103:
κ_SRME: 0.7785
pred_file: models/mace/mace-osaka24-large/2025-11-21-kappa-103-FIRE-dist=0.01-fmax=0.0001-symprec=1e-05/reformated_force-sets.json.gz
pred_file_url:https://figshare.com/ndownloader/files/60020837
κ_SRE: 0.6499
geo_opt:
pred_file: models/mace/mace-osaka24-large/2025-11-22-wbm-IS2RE-FIRE.json.gz
pred_file_url: https://figshare.com/ndownloader/files/60021551
struct_col: mace_structure
symprec=1e-2:
rmsd: 0.0889 # unitless
n_sym_ops_mae: 1.8531 # unitless
symmetry_decrease: 0.0628 # fraction
symmetry_match: 0.8105 # fraction
symmetry_increase: 0.12 # fraction
n_structures: 256963 # count
analysis_file: models/mace/mace-osaka24-large/2025-11-22-wbm-IS2RE-FIRE-symprec=1e-2-moyo=0.7.1.csv.gz
analysis_file_url: https://figshare.com/ndownloader/files/60020843
symprec=1e-5:
rmsd: 0.0889 # unitless
n_sym_ops_mae: 1.835 # unitless
symmetry_decrease: 0.0378 # fraction
symmetry_match: 0.7355 # fraction
symmetry_increase: 0.2221 # fraction
n_structures: 256963 # count
analysis_file: models/mace/mace-osaka24-large/2025-11-22-wbm-IS2RE-FIRE-symprec=1e-5-moyo=0.7.1.csv.gz
analysis_file_url: https://figshare.com/ndownloader/files/60020846
discovery:
pred_file: models/mace/mace-osaka24-large/2025-11-22-wbm-IS2RE-FIRE.csv.gz
pred_file_url: https://figshare.com/ndownloader/files/60020840
pred_col: e_form_per_atom_mace
full_test_set:
F1: 0.645 # fraction
DAF: 3.375 # dimensionless
Precision: 0.579 # fraction
Recall: 0.728 # fraction
Accuracy: 0.863 # fraction
TPR: 0.728 # fraction
FPR: 0.11 # fraction
TNR: 0.89 # fraction
FNR: 0.272 # fraction
TP: 32088.0 # count
FP: 23319.0 # count
TN: 189552.0 # count
FN: 12004.0 # count
MAE: 0.058 # eV/atom
RMSE: 0.105 # eV/atom
R2: 0.66 # dimensionless
missing_preds: 16 # count
most_stable_10k:
F1: 0.855 # fraction
DAF: 4.887 # dimensionless
Precision: 0.747 # fraction
Recall: 1.0 # fraction
Accuracy: 0.747 # fraction
TPR: 1.0 # fraction
FPR: 1.0 # fraction
TNR: 0.0 # fraction
FNR: 0.0 # fraction
TP: 7471.0 # count
FP: 2529.0 # count
TN: 0.0 # count
FN: 0.0 # count
MAE: 0.118 # eV/atom
RMSE: 0.209 # eV/atom
R2: 0.321 # dimensionless
missing_preds: 0 # count
unique_prototypes:
F1: 0.65 # fraction
DAF: 3.769 # dimensionless
Precision: 0.576 # fraction
Recall: 0.746 # fraction
Accuracy: 0.876 # fraction
TPR: 0.746 # fraction
FPR: 0.1 # fraction
TNR: 0.9 # fraction
FNR: 0.254 # fraction
TP: 24887.0 # count
FP: 18302.0 # count
TN: 163812.0 # count
FN: 8487.0 # count
MAE: 0.061 # eV/atom
RMSE: 0.108 # eV/atom
R2: 0.657 # dimensionless
missing_preds: 14 # count
Loading