Skip to content

Commit b811768

Browse files
authored
Add automated license correspondence check (#2277)
* Add automated license correspondence check Closes #2276 This PR adds a check for new ontologies that are currently in review to check that the license in the submitted metadata matches what's shown in GitHub. Implicitly, this now requires new ontologies have an appropriate LICENSE file. * Update test_integrity.py * Update test_integrity.py * Add test for matching and pass flake8 * Update test_integrity.py * Add cached lookup of github data
1 parent aaa5e05 commit b811768

3 files changed

Lines changed: 103 additions & 3 deletions

File tree

src/obofoundry/utils.py

Lines changed: 19 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -3,11 +3,12 @@
33
import requests
44
import yaml
55

6-
from obofoundry.constants import ONTOLOGY_DIRECTORY
6+
from obofoundry.constants import ONTOLOGY_DIRECTORY, ROOT
77

88
__all__ = [
99
"get_data",
1010
"query_wikidata",
11+
"get_new_data",
1112
]
1213

1314

@@ -41,3 +42,20 @@ def query_wikidata(query: str):
4142
res.raise_for_status()
4243
res_json = res.json()
4344
return res_json["results"]["bindings"]
45+
46+
47+
def get_new_data():
48+
"""Get records for ontologies that have additional checks.
49+
50+
So far, this applies in the following scenarios:
51+
52+
1. New ontologies, i.e., there's a markdown file for the ontology in the ``/ontologies`` directory
53+
but has it not yet been published and does not appear in the config.yml
54+
"""
55+
data = get_data()
56+
config_path = ROOT.joinpath("_config.yml")
57+
config_data = yaml.safe_load(config_path.read_text())
58+
published = {record["id"] for record in config_data["ontologies"]}
59+
return {
60+
prefix: record for prefix, record in data.items() if prefix not in published
61+
}

tests/test_integrity.py

Lines changed: 82 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -2,6 +2,7 @@
22

33
import json
44
import unittest
5+
from functools import lru_cache
56
from io import StringIO
67
from pathlib import Path
78
from typing import Set
@@ -10,7 +11,7 @@
1011
import yaml
1112

1213
from obofoundry.standardize_metadata import ModifiedDumper
13-
from obofoundry.utils import ONTOLOGY_DIRECTORY, get_data
14+
from obofoundry.utils import ONTOLOGY_DIRECTORY, get_data, get_new_data
1415

1516
HERE = Path(__file__).parent.resolve()
1617
ROOT = HERE.parent
@@ -23,6 +24,16 @@
2324
ZENODO_PREFIX = "https://zenodo.org/record/"
2425
DOI_PREFIX = "https://doi.org/"
2526
CHEMRXIV_DOI_PREFIX = "https://doi.org/10.26434/chemrxiv"
27+
ALLOWED_SPDX = {
28+
"CC0-1.0", # see https://bioregistry.io/spdx:CC0-1.0
29+
"CC-BY-3.0", # see https://bioregistry.io/spdx:CC-BY-3.0
30+
"CC-BY-4.0", # see https://bioregistry.io/spdx:CC-BY-4.0
31+
}
32+
OBO_TO_SPDX = {
33+
"CC BY 4.0": "CC-BY-4.0",
34+
"CC BY 3.0": "CC-BY-3.0",
35+
"CC0": "CC0-1.0",
36+
}
2637

2738

2839
class TestIntegrity(unittest.TestCase):
@@ -253,3 +264,73 @@ def _string_norm(s: str) -> str:
253264
.replace(".", "")
254265
.replace("-", "")
255266
)
267+
268+
269+
class TestModernIntegrity(unittest.TestCase):
270+
"""A test case for data integrity exclusively for new ontologies.
271+
272+
Specifically, tests implemented in this integrity test are only
273+
"going-forwards" and don't need to be retroactively applied. This works
274+
since it only looks at ontologies that appear in the /ontologies folder
275+
with a markdown file but do not already appear in the published registry
276+
build.
277+
"""
278+
279+
def setUp(self) -> None:
280+
"""Set up the test case."""
281+
self.ontologies = get_new_data()
282+
283+
def test_github_references(self):
284+
"""Test that new ontologies reference the pull request where they were added."""
285+
for prefix, data in self.ontologies.items():
286+
with self.subTest(prefix=prefix):
287+
self.assertIn("pull_request_added", data)
288+
self.assertIn("issue_requested", data)
289+
290+
@lru_cache
291+
def _get_github_data(self, prefix: str):
292+
data = self.ontologies[prefix]
293+
repository = data["repository"]
294+
if not repository.startswith("https://github.com"):
295+
return None
296+
r = repository.removeprefix("https://github.com/").rstrip("/")
297+
url = f"https://api.github.com/repos/{r}"
298+
res = requests.get(url)
299+
res.raise_for_status()
300+
return res.json()
301+
302+
def test_repository_license(self):
303+
"""Test that the repository has a license that's correct."""
304+
for prefix, data in self.ontologies.items():
305+
repository = data["repository"]
306+
if not repository.startswith("https://github.com"):
307+
continue
308+
with self.subTest(prefix=prefix):
309+
github_data = self._get_github_data(prefix)
310+
self.assertIn("license", github_data)
311+
self.assertIn("spdx_id", github_data["license"])
312+
spdx = github_data["license"]["spdx_id"]
313+
self.assertIsNotNone(
314+
spdx, msg="No LICENSE file found in the repository"
315+
)
316+
self.assertNotEqual(
317+
"NOASSERTION",
318+
spdx,
319+
msg="Either no LICENSE file was found or the LICENSE file does not have a standard format that "
320+
"GitHub can parse. See https://docs.github.com/en/repositories/managing-your-"
321+
"repositorys-settings-and-features/customizing-your-repository/licensing-a-"
322+
"repository#detecting-a-license for information on how GitHub does this.",
323+
)
324+
self.assertIn(
325+
spdx,
326+
ALLOWED_SPDX,
327+
msg=f"LICENSE file does not follow a standard format for"
328+
f" one of the allowed license types ({ALLOWED_SPDX})",
329+
)
330+
331+
obo_license = data["license"]["label"]
332+
self.assertEqual(
333+
spdx,
334+
OBO_TO_SPDX[obo_license],
335+
msg="OBO Foundry license annotation does not match GitHub license",
336+
)

tests/test_memberships.py

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -56,7 +56,8 @@ def test_data(self):
5656
for person in res.members:
5757
with self.subTest(name=person.name):
5858
self.assertFalse(
59-
person.affiliation.ror is None and person.affiliation.wikidata is None,
59+
person.affiliation.ror is None
60+
and person.affiliation.wikidata is None,
6061
msg=dedent(
6162
f"""\
6263
No ROR nor Wikidata identifier was curated for {person.name}.

0 commit comments

Comments
 (0)