|
2 | 2 |
|
3 | 3 | import json |
4 | 4 | import unittest |
| 5 | +from functools import lru_cache |
5 | 6 | from io import StringIO |
6 | 7 | from pathlib import Path |
7 | 8 | from typing import Set |
|
10 | 11 | import yaml |
11 | 12 |
|
12 | 13 | from obofoundry.standardize_metadata import ModifiedDumper |
13 | | -from obofoundry.utils import ONTOLOGY_DIRECTORY, get_data |
| 14 | +from obofoundry.utils import ONTOLOGY_DIRECTORY, get_data, get_new_data |
14 | 15 |
|
15 | 16 | HERE = Path(__file__).parent.resolve() |
16 | 17 | ROOT = HERE.parent |
|
23 | 24 | ZENODO_PREFIX = "https://zenodo.org/record/" |
24 | 25 | DOI_PREFIX = "https://doi.org/" |
25 | 26 | CHEMRXIV_DOI_PREFIX = "https://doi.org/10.26434/chemrxiv" |
| 27 | +ALLOWED_SPDX = { |
| 28 | + "CC0-1.0", # see https://bioregistry.io/spdx:CC0-1.0 |
| 29 | + "CC-BY-3.0", # see https://bioregistry.io/spdx:CC-BY-3.0 |
| 30 | + "CC-BY-4.0", # see https://bioregistry.io/spdx:CC-BY-4.0 |
| 31 | +} |
| 32 | +OBO_TO_SPDX = { |
| 33 | + "CC BY 4.0": "CC-BY-4.0", |
| 34 | + "CC BY 3.0": "CC-BY-3.0", |
| 35 | + "CC0": "CC0-1.0", |
| 36 | +} |
26 | 37 |
|
27 | 38 |
|
28 | 39 | class TestIntegrity(unittest.TestCase): |
@@ -253,3 +264,73 @@ def _string_norm(s: str) -> str: |
253 | 264 | .replace(".", "") |
254 | 265 | .replace("-", "") |
255 | 266 | ) |
| 267 | + |
| 268 | + |
| 269 | +class TestModernIntegrity(unittest.TestCase): |
| 270 | + """A test case for data integrity exclusively for new ontologies. |
| 271 | +
|
| 272 | + Specifically, tests implemented in this integrity test are only |
| 273 | + "going-forwards" and don't need to be retroactively applied. This works |
| 274 | + since it only looks at ontologies that appear in the /ontologies folder |
| 275 | + with a markdown file but do not already appear in the published registry |
| 276 | + build. |
| 277 | + """ |
| 278 | + |
| 279 | + def setUp(self) -> None: |
| 280 | + """Set up the test case.""" |
| 281 | + self.ontologies = get_new_data() |
| 282 | + |
| 283 | + def test_github_references(self): |
| 284 | + """Test that new ontologies reference the pull request where they were added.""" |
| 285 | + for prefix, data in self.ontologies.items(): |
| 286 | + with self.subTest(prefix=prefix): |
| 287 | + self.assertIn("pull_request_added", data) |
| 288 | + self.assertIn("issue_requested", data) |
| 289 | + |
| 290 | + @lru_cache |
| 291 | + def _get_github_data(self, prefix: str): |
| 292 | + data = self.ontologies[prefix] |
| 293 | + repository = data["repository"] |
| 294 | + if not repository.startswith("https://github.com"): |
| 295 | + return None |
| 296 | + r = repository.removeprefix("https://github.com/").rstrip("/") |
| 297 | + url = f"https://api.github.com/repos/{r}" |
| 298 | + res = requests.get(url) |
| 299 | + res.raise_for_status() |
| 300 | + return res.json() |
| 301 | + |
| 302 | + def test_repository_license(self): |
| 303 | + """Test that the repository has a license that's correct.""" |
| 304 | + for prefix, data in self.ontologies.items(): |
| 305 | + repository = data["repository"] |
| 306 | + if not repository.startswith("https://github.com"): |
| 307 | + continue |
| 308 | + with self.subTest(prefix=prefix): |
| 309 | + github_data = self._get_github_data(prefix) |
| 310 | + self.assertIn("license", github_data) |
| 311 | + self.assertIn("spdx_id", github_data["license"]) |
| 312 | + spdx = github_data["license"]["spdx_id"] |
| 313 | + self.assertIsNotNone( |
| 314 | + spdx, msg="No LICENSE file found in the repository" |
| 315 | + ) |
| 316 | + self.assertNotEqual( |
| 317 | + "NOASSERTION", |
| 318 | + spdx, |
| 319 | + msg="Either no LICENSE file was found or the LICENSE file does not have a standard format that " |
| 320 | + "GitHub can parse. See https://docs.github.com/en/repositories/managing-your-" |
| 321 | + "repositorys-settings-and-features/customizing-your-repository/licensing-a-" |
| 322 | + "repository#detecting-a-license for information on how GitHub does this.", |
| 323 | + ) |
| 324 | + self.assertIn( |
| 325 | + spdx, |
| 326 | + ALLOWED_SPDX, |
| 327 | + msg=f"LICENSE file does not follow a standard format for" |
| 328 | + f" one of the allowed license types ({ALLOWED_SPDX})", |
| 329 | + ) |
| 330 | + |
| 331 | + obo_license = data["license"]["label"] |
| 332 | + self.assertEqual( |
| 333 | + spdx, |
| 334 | + OBO_TO_SPDX[obo_license], |
| 335 | + msg="OBO Foundry license annotation does not match GitHub license", |
| 336 | + ) |
0 commit comments