From 9067d825d99146783b933cdc3825b29a18af7f91 Mon Sep 17 00:00:00 2001 From: TShiotaSS <107021547+TShiotaSS@users.noreply.github.com> Date: Fri, 9 Jan 2026 17:37:50 +0900 Subject: [PATCH] Add mace-osaka24-large model --- matbench_discovery/enums.py | 4 + ..._json_to_2024_format_mace_osaka24_large.py | 117 ++++++++++++ models/mace/mace-osaka24-large.yml | 172 ++++++++++++++++++ 3 files changed, 293 insertions(+) create mode 100644 models/mace/convert_json_to_2024_format_mace_osaka24_large.py create mode 100644 models/mace/mace-osaka24-large.yml diff --git a/matbench_discovery/enums.py b/matbench_discovery/enums.py index a3891f88c..2d6cec03a 100644 --- a/matbench_discovery/enums.py +++ b/matbench_discovery/enums.py @@ -335,6 +335,10 @@ class Model(Files, base_dir=f"{ROOT}/models"): mace_mp_0 = auto(), "mace/mace-mp-0.yml" mace_mpa_0 = auto(), "mace/mace-mpa-0.yml" # trained on MPtrj and Alexandria + # MACE-Osaka24-large as published in https://arxiv.org/abs/2412.13088 trained on MPtrj and OFF23 (extended SPICE) + mace_osaka24_large = auto(), "mace/mace-osaka24-large.yml" # MPtrj/OFF23 data is split into train: 2,324,357, val: 129,070, test: 128,168 + + # MatRIS-v0.5.0-MPtrj matris_v050_mptrj = auto(), "matris/matris-v050-mptrj.yml" diff --git a/models/mace/convert_json_to_2024_format_mace_osaka24_large.py b/models/mace/convert_json_to_2024_format_mace_osaka24_large.py new file mode 100644 index 000000000..8a9d90a7c --- /dev/null +++ b/models/mace/convert_json_to_2024_format_mace_osaka24_large.py @@ -0,0 +1,117 @@ +import json +import gzip +import os + +# --- Execution Settings --- +# Specify input filenames here (supports both .gz and .json) +ref_filename = '../../data/phonons/2024-11-09-kappas-phononDB-PBE-noNAC.json.gz' # Reference file +model_name = 'mace-osaka24-large' +json_name = 'force-sets.json.gz' +target_filename = f'./{model_name}/2025-11-21-kappa-103-FIRE-dist=0.01-fmax=0.0001-symprec=1e-05/{json_name}' # Target file +output_filename = f'./{model_name}/2025-11-21-kappa-103-FIRE-dist=0.01-fmax=0.0001-symprec=1e-05/reformated_{json_name}' + +def load_json_smart(filepath): + """ + Function to load JSON files. + Automatically handles gzip decompression if the file ends with .gz. + """ + print(f"Loading: {filepath}") + try: + if filepath.endswith('.gz'): + # Open with gzip module in text mode (rt) if extension is .gz + with gzip.open(filepath, 'rt', encoding='utf-8') as f: + return json.load(f) + else: + # Open normally for standard .json files + with open(filepath, 'r', encoding='utf-8') as f: + return json.load(f) + except FileNotFoundError: + print(f"Error: File {filepath} not found.") + return None + +def align_sort_order_gz_input(ref_file, target_file, output_file): + print("Starting process...") + + # 1. Load the reference file (2024 version) + # We read this to establish the "correct sort order" of mp_ids. + ref_data = load_json_smart(ref_file) + if ref_data is None: return + + # 2. Load the target file (2025 version) + target_data = load_json_smart(target_file) + if target_data is None: return + + # --- Establish Sorting Criteria --- + # Retrieve the mp_id dictionary from the reference file. + ref_mp_ids_dict = ref_data.get('mp_id', {}) + + # Create a list of mp_ids sorted by their index (0, 1, 2...) to ensure correct order. + sorted_indices = sorted(ref_mp_ids_dict.keys(), key=lambda x: int(x)) + ordered_mp_ids = [ref_mp_ids_dict[idx] for idx in sorted_indices] + + print(f"Number of materials in reference file: {len(ordered_mp_ids)}") + + # --- Prepare Target Data Lookup --- + # Create a reverse lookup dictionary for the target file: {material_id: index} + target_mp_ids_dict = target_data.get('material_id', {}) + mpid_to_target_idx = {v: k for k, v in target_mp_ids_dict.items()} + + # --- Construct Output Data --- + data_output = {} + + # List of keys to keep in the output file + keys_to_keep = [ + 'ph_freqs', 'q_points', + 'kappa_tot_rta', 'mode_kappa_tot_rta', 'kappa_p_rta', 'kappa_c', 'mode_weights', + 'max_stress', 'reached_max_steps', 'broken_symmetry', 'has_imag_ph_modes' + ] + + # Initialize dictionary for mp_id + data_output['mp_id'] = {} + # Initialize dictionaries for other keys if they exist in the target data + for key in keys_to_keep: + if key in target_data: + data_output[key] = {} + + print("Aligning and sorting data...") + missing_count = 0 + + # Iterate through the reference order (0, 1, 2...) and populate the new data + for new_idx_int, mpid in enumerate(ordered_mp_ids): + new_idx_str = str(new_idx_int) # New index "0", "1"... + + # Check if this mp_id exists in the target file (2025) + if mpid in mpid_to_target_idx: + old_idx_str = mpid_to_target_idx[mpid] # Original index in target file + + # 1. Save mp_id + data_output['mp_id'][new_idx_str] = mpid + + # 2. Copy other data fields + for key in keys_to_keep: + if key in target_data and old_idx_str in target_data[key]: + data_output[key][new_idx_str] = target_data[key][old_idx_str] + else: + missing_count += 1 + # Skip if ID is not found (missing data) + + # --- Report Results --- + if missing_count > 0: + print(f"Warning: {missing_count} materials were not found in the target file.") + else: + print("All IDs matched successfully.") + + # --- Save Output (.gz) --- + if not output_file.endswith('.gz'): + output_file += '.gz' + + print(f"Saving to: {output_file}") + with gzip.open(output_file, 'wt', encoding='utf-8') as f: + json.dump(data_output, f, indent=None, separators=(',', ':')) + + print("Completed. The file is now aligned with the 2024 version.") + +# Execution block +if __name__ == "__main__": + align_sort_order_gz_input(ref_filename, target_filename, output_filename) + pass diff --git a/models/mace/mace-osaka24-large.yml b/models/mace/mace-osaka24-large.yml new file mode 100644 index 000000000..a90f613fc --- /dev/null +++ b/models/mace/mace-osaka24-large.yml @@ -0,0 +1,172 @@ +model_name: mace-osaka24-large +model_key: mace-osaka24-large +model_version: v0.3.6 +date_added: '2025-12-23' +date_published: '2024-12-18' +authors: + - name: Tomoya Shiota + affiliation: Center for Quantum Information and Quantum Biology, The University of Osaka + email: shiota.tomoya.ss@gmail.com + orcid: https://orcid.org/0009-0002-7044-4558 + - name: Kenji Ishihara + affiliation: Center for Quantum Information and Quantum Biology, The University of Osaka + email: ishiharak25@chem.sci.osaka-u.ac.jp + orcid: https://orcid.org/0009-0004-5404-521X + - name: Tuan Minh Do + affiliation: Center for Quantum Information and Quantum Biology, The University of Osaka + email: do.tuan.minh.qiqb@osaka-u.ac.jp + orcid: https://orcid.org/0009-0006-2454-4899 + - name: Toshio Mori + affiliation: Center for Quantum Information and Quantum Biology, The University of Osaka + email: t.mori.qiqb@osaka-u.ac.jp + orcid: https://orcid.org/0009-0004-0149-1224 + - name: Wataru Mizukami + affiliation: Center for Quantum Information and Quantum Biology, The University of Osaka + email: mizukami.wataru.qiqb@osaka-u.ac.jp + orcid: https://orcid.org/0000-0003-3538-3542 +trained_by: + - name: Tomoya Shiota + affiliation: Center for Quantum Information and Quantum Biology, The University of Osaka + email: shiota.tomoya.ss@gmail.com + orcid: https://orcid.org/0009-0002-7044-4558 + - name: Kenji Ishihara + affiliation: Center for Quantum Information and Quantum Biology, The University of Osaka + email: ishiharak25@chem.sci.osaka-u.ac.jp + orcid: https://orcid.org/0009-0004-5404-521X + - name: Toshio Mori + affiliation: Center for Quantum Information and Quantum Biology, The University of Osaka + email: t.mori.qiqb@osaka-u.ac.jp + orcid: https://orcid.org/0009-0004-0149-1224 + +repo: https://github.com/ACEsuit/mace +doi: https://doi.org/10.48550/arXiv.2412.13088 +paper: https://arxiv.org/pdf/2412.13088v2 +pypi: https://pypi.org/project/mace-torch +pr_url: ## I will add after pr. +checkpoint_url: https://github.com/qiqb-osaka/mace-osaka24/releases/tag/v0.0.1/ + +license: + code: MIT + code_url: https://github.com/ACEsuit/mace/blob/b0fa4ef7c/LICENSE.md + checkpoint: MIT + checkpoint_url: https://github.com/qiqb-osaka/mace-osaka24/blob/main/LICENSE + +requirements: + mace-torch: 0.3.6 + torch: 2.2.0 + ase: 3.23.0 + numpy: 1.26.4 + +openness: OSOD +trained_for_benchmark: false +train_task: S2EFS +test_task: IS2RE-SR +targets: EFS_G +model_type: UIP +model_params: 5_725_072 +n_estimators: 1 + +training_set: [MPtrj, Extended-SPICE(with Total Energy Alignment)] +training_cost: missing + +hyperparams: + max_force: 0.05 + max_steps: 500 + ase_optimizer: FIRE + cell_filter: FrechetCellFilter + graph_construction_radius: 4.5 # Å + max_neighbors: .inf + +notes: + Description: | + MACE is a higher-order equivariant message-passing neural network for fast and accurate force fields. + Training: MACE-Osaka24 model is the first MLIP model trained on a unified dataset covering molecular and crystalline systems, utilizing the MACE architecture developed by Batatia et al.. + + +metrics: + phonons: + kappa_103: + κ_SRME: 0.7785 + pred_file: models/mace/mace-osaka24-large/2025-11-21-kappa-103-FIRE-dist=0.01-fmax=0.0001-symprec=1e-05/reformated_force-sets.json.gz + pred_file_url:https://figshare.com/ndownloader/files/60020837 + κ_SRE: 0.6499 + geo_opt: + pred_file: models/mace/mace-osaka24-large/2025-11-22-wbm-IS2RE-FIRE.json.gz + pred_file_url: https://figshare.com/ndownloader/files/60021551 + struct_col: mace_structure + symprec=1e-2: + rmsd: 0.0889 # unitless + n_sym_ops_mae: 1.8531 # unitless + symmetry_decrease: 0.0628 # fraction + symmetry_match: 0.8105 # fraction + symmetry_increase: 0.12 # fraction + n_structures: 256963 # count + analysis_file: models/mace/mace-osaka24-large/2025-11-22-wbm-IS2RE-FIRE-symprec=1e-2-moyo=0.7.1.csv.gz + analysis_file_url: https://figshare.com/ndownloader/files/60020843 + symprec=1e-5: + rmsd: 0.0889 # unitless + n_sym_ops_mae: 1.835 # unitless + symmetry_decrease: 0.0378 # fraction + symmetry_match: 0.7355 # fraction + symmetry_increase: 0.2221 # fraction + n_structures: 256963 # count + analysis_file: models/mace/mace-osaka24-large/2025-11-22-wbm-IS2RE-FIRE-symprec=1e-5-moyo=0.7.1.csv.gz + analysis_file_url: https://figshare.com/ndownloader/files/60020846 + discovery: + pred_file: models/mace/mace-osaka24-large/2025-11-22-wbm-IS2RE-FIRE.csv.gz + pred_file_url: https://figshare.com/ndownloader/files/60020840 + pred_col: e_form_per_atom_mace + full_test_set: + F1: 0.645 # fraction + DAF: 3.375 # dimensionless + Precision: 0.579 # fraction + Recall: 0.728 # fraction + Accuracy: 0.863 # fraction + TPR: 0.728 # fraction + FPR: 0.11 # fraction + TNR: 0.89 # fraction + FNR: 0.272 # fraction + TP: 32088.0 # count + FP: 23319.0 # count + TN: 189552.0 # count + FN: 12004.0 # count + MAE: 0.058 # eV/atom + RMSE: 0.105 # eV/atom + R2: 0.66 # dimensionless + missing_preds: 16 # count + most_stable_10k: + F1: 0.855 # fraction + DAF: 4.887 # dimensionless + Precision: 0.747 # fraction + Recall: 1.0 # fraction + Accuracy: 0.747 # fraction + TPR: 1.0 # fraction + FPR: 1.0 # fraction + TNR: 0.0 # fraction + FNR: 0.0 # fraction + TP: 7471.0 # count + FP: 2529.0 # count + TN: 0.0 # count + FN: 0.0 # count + MAE: 0.118 # eV/atom + RMSE: 0.209 # eV/atom + R2: 0.321 # dimensionless + missing_preds: 0 # count + unique_prototypes: + F1: 0.65 # fraction + DAF: 3.769 # dimensionless + Precision: 0.576 # fraction + Recall: 0.746 # fraction + Accuracy: 0.876 # fraction + TPR: 0.746 # fraction + FPR: 0.1 # fraction + TNR: 0.9 # fraction + FNR: 0.254 # fraction + TP: 24887.0 # count + FP: 18302.0 # count + TN: 163812.0 # count + FN: 8487.0 # count + MAE: 0.061 # eV/atom + RMSE: 0.108 # eV/atom + R2: 0.657 # dimensionless + missing_preds: 14 # count