From 1d7c3e6cc69dd05e652309f02f8d439c76319a19 Mon Sep 17 00:00:00 2001 From: Adrian Edwards Date: Tue, 23 Jun 2026 15:12:07 -0400 Subject: [PATCH 01/29] initial stub of SystemPaths Signed-off-by: Adrian Edwards --- collectoss/application/paths.py | 21 +++++++++++++++++++++ 1 file changed, 21 insertions(+) create mode 100644 collectoss/application/paths.py diff --git a/collectoss/application/paths.py b/collectoss/application/paths.py new file mode 100644 index 000000000..b2cdbcf2c --- /dev/null +++ b/collectoss/application/paths.py @@ -0,0 +1,21 @@ +from collectoss.application.environment import SystemEnv + + +class SystemPaths: + """Enable consistent storage and retrieval of filesystem paths needed by the system""" + + def get_facade_directory(self) -> str: + """Get the facade directory""" + return SystemEnv.get("COLLECTOSS_FACADE_REPO_DIRECTORY") + + def get_config_directory(self) -> str: + """Get the config directory""" + return SystemEnv.get("CONFIG_DATADIR") + + def get_logs_directory(self) -> str: + """Get the logs directory""" + return SystemEnv.get("COLLECTOSS_LOGS_DIRECTORY") + + def get_cache_directory(self) -> str: + """Get the cache directory""" + return SystemEnv.get("CACHE_DATADIR") From 306ba82318fdc98dceb8de27daf741e411c47cf7 Mon Sep 17 00:00:00 2001 From: Adrian Edwards Date: Tue, 23 Jun 2026 16:43:03 -0400 Subject: [PATCH 02/29] helpers for processing paths Signed-off-by: Adrian Edwards --- collectoss/application/paths.py | 18 ++++++++++++++++++ 1 file changed, 18 insertions(+) diff --git a/collectoss/application/paths.py b/collectoss/application/paths.py index b2cdbcf2c..8aac11e0a 100644 --- a/collectoss/application/paths.py +++ b/collectoss/application/paths.py @@ -1,5 +1,23 @@ from collectoss.application.environment import SystemEnv +from pathlib import Path +def _verify_path(path: Path, create = True) -> Path: + """Verify the path is a valid directory""" + if create: + if not path.exists(): + path.mkdir(parents=True) + if not path.is_dir(): + raise ValueError(f"Path {path} is not a valid directory") + return path.resolve() + + +def _path_from_env(env_value: str) -> Path: + """Get the path from the environment variable""" + if env_value is None: + return None + if env_value == "": + return None + return Path(env_value) class SystemPaths: """Enable consistent storage and retrieval of filesystem paths needed by the system""" From 32c7969bdc39ed314d94408c2c0b0f7b8f908f24 Mon Sep 17 00:00:00 2001 From: Adrian Edwards Date: Tue, 23 Jun 2026 16:43:18 -0400 Subject: [PATCH 03/29] docs and setup Signed-off-by: Adrian Edwards --- collectoss/application/paths.py | 14 +++++++++++++- 1 file changed, 13 insertions(+), 1 deletion(-) diff --git a/collectoss/application/paths.py b/collectoss/application/paths.py index 8aac11e0a..ce50b5969 100644 --- a/collectoss/application/paths.py +++ b/collectoss/application/paths.py @@ -1,3 +1,4 @@ +from platformdirs import PlatformDirs from collectoss.application.environment import SystemEnv from pathlib import Path @@ -20,7 +21,18 @@ def _path_from_env(env_value: str) -> Path: return Path(env_value) class SystemPaths: - """Enable consistent storage and retrieval of filesystem paths needed by the system""" + """Enable consistent storage and retrieval of filesystem paths needed by the system + + The paths that are used follow the following hierarchy: + - Absolute path specified by an environment variable + - Relative path specified by an environment variable, resolved against the home directory + - Default path for the operating system based on accepted standards + + """ + app_name = "CollectOSS" + app_org = "CHAOSS" + # Automatically targets the proper OS directory and handles creation + dirs = PlatformDirs(app_name, app_org, ensure_exists=True) def get_facade_directory(self) -> str: """Get the facade directory""" From 69fc550f19ed40bc0cbeee204f1eca3ecfda8a39 Mon Sep 17 00:00:00 2001 From: Adrian Edwards Date: Tue, 23 Jun 2026 16:43:55 -0400 Subject: [PATCH 04/29] add create args to all path functions Signed-off-by: Adrian Edwards --- collectoss/application/paths.py | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/collectoss/application/paths.py b/collectoss/application/paths.py index ce50b5969..a4bbee867 100644 --- a/collectoss/application/paths.py +++ b/collectoss/application/paths.py @@ -34,18 +34,18 @@ class SystemPaths: # Automatically targets the proper OS directory and handles creation dirs = PlatformDirs(app_name, app_org, ensure_exists=True) - def get_facade_directory(self) -> str: + def get_facade_directory(self, create = True) -> Path: """Get the facade directory""" return SystemEnv.get("COLLECTOSS_FACADE_REPO_DIRECTORY") - def get_config_directory(self) -> str: + def get_config_directory(self, create = True) -> Path: """Get the config directory""" return SystemEnv.get("CONFIG_DATADIR") - def get_logs_directory(self) -> str: + def get_logs_directory(self, create = True) -> Path: """Get the logs directory""" return SystemEnv.get("COLLECTOSS_LOGS_DIRECTORY") - def get_cache_directory(self) -> str: + def get_cache_directory(self, create = True) -> Path: """Get the cache directory""" return SystemEnv.get("CACHE_DATADIR") From 8ff964235be245e4f1b974453df300a63d5296f5 Mon Sep 17 00:00:00 2001 From: Adrian Edwards Date: Tue, 23 Jun 2026 16:45:12 -0400 Subject: [PATCH 05/29] create general function for assembling paths so logic is consistent between all of them Signed-off-by: Adrian Edwards --- collectoss/application/paths.py | 15 +++++++++++++++ 1 file changed, 15 insertions(+) diff --git a/collectoss/application/paths.py b/collectoss/application/paths.py index a4bbee867..18916e70e 100644 --- a/collectoss/application/paths.py +++ b/collectoss/application/paths.py @@ -34,6 +34,21 @@ class SystemPaths: # Automatically targets the proper OS directory and handles creation dirs = PlatformDirs(app_name, app_org, ensure_exists=True) + def _build_path(self, env_path:str, default_path:Path) -> Path: + """Build a path from the environment variable or the default path. + + If the environment variable is an absolute path, return it. + If the environment variable is a relative path, resolve it against the home directory. + If the environment variable is not set, return the default path. + """ + if env_path is not None: + if env_path.is_absolute(): + return env_path + else: + return _path_from_env(SystemEnv.get("HOME")) / env_path + else: + return default_path + def get_facade_directory(self, create = True) -> Path: """Get the facade directory""" return SystemEnv.get("COLLECTOSS_FACADE_REPO_DIRECTORY") From cae9a1f92caddef78beed77c15cdd65144ba5a20 Mon Sep 17 00:00:00 2001 From: Adrian Edwards Date: Tue, 23 Jun 2026 16:45:32 -0400 Subject: [PATCH 06/29] populate facade repo directory Signed-off-by: Adrian Edwards --- collectoss/application/paths.py | 7 ++++++- 1 file changed, 6 insertions(+), 1 deletion(-) diff --git a/collectoss/application/paths.py b/collectoss/application/paths.py index 18916e70e..b2c1a6c33 100644 --- a/collectoss/application/paths.py +++ b/collectoss/application/paths.py @@ -51,7 +51,12 @@ def _build_path(self, env_path:str, default_path:Path) -> Path: def get_facade_directory(self, create = True) -> Path: """Get the facade directory""" - return SystemEnv.get("COLLECTOSS_FACADE_REPO_DIRECTORY") + env_path = _path_from_env(SystemEnv.get("COLLECTOSS_FACADE_REPO_DIRECTORY")) + + return _verify_path( + self._build_path(env_path, self.dirs.user_downloads_path / "collectoss_facade"), + create = create + ) def get_config_directory(self, create = True) -> Path: """Get the config directory""" From 3e5d0d5bdf79a1b62abc9ee7ae0e57cffc945b7e Mon Sep 17 00:00:00 2001 From: Adrian Edwards Date: Tue, 23 Jun 2026 16:45:51 -0400 Subject: [PATCH 07/29] populate config directory Signed-off-by: Adrian Edwards --- collectoss/application/paths.py | 7 ++++++- 1 file changed, 6 insertions(+), 1 deletion(-) diff --git a/collectoss/application/paths.py b/collectoss/application/paths.py index b2c1a6c33..10acff0b8 100644 --- a/collectoss/application/paths.py +++ b/collectoss/application/paths.py @@ -60,7 +60,12 @@ def get_facade_directory(self, create = True) -> Path: def get_config_directory(self, create = True) -> Path: """Get the config directory""" - return SystemEnv.get("CONFIG_DATADIR") + env_path = _path_from_env(SystemEnv.get("COLLECTOSS_CONFIG_DIRECTORY") or SystemEnv.get("CONFIG_DATADIR")) + + return _verify_path( + self._build_path(env_path, self.dirs.user_config_path), + create = create + ) def get_logs_directory(self, create = True) -> Path: """Get the logs directory""" From 1f81748c5cfd5b04e1ce4c06e4c9837d787382fb Mon Sep 17 00:00:00 2001 From: Adrian Edwards Date: Tue, 23 Jun 2026 16:48:34 -0400 Subject: [PATCH 08/29] populate logs directory Signed-off-by: Adrian Edwards --- collectoss/application/paths.py | 7 ++++++- 1 file changed, 6 insertions(+), 1 deletion(-) diff --git a/collectoss/application/paths.py b/collectoss/application/paths.py index 10acff0b8..e65c7e311 100644 --- a/collectoss/application/paths.py +++ b/collectoss/application/paths.py @@ -69,7 +69,12 @@ def get_config_directory(self, create = True) -> Path: def get_logs_directory(self, create = True) -> Path: """Get the logs directory""" - return SystemEnv.get("COLLECTOSS_LOGS_DIRECTORY") + env_path = _path_from_env(SystemEnv.get("COLLECTOSS_LOGS_DIRECTORY")) + + return _verify_path( + self._build_path(env_path, self.dirs.user_log_path), + create = create + ) def get_cache_directory(self, create = True) -> Path: """Get the cache directory""" From bc473354c5f1b925b06f8a3d9f5dd35b991e5f20 Mon Sep 17 00:00:00 2001 From: Adrian Edwards Date: Tue, 23 Jun 2026 16:48:43 -0400 Subject: [PATCH 09/29] populate cache directory Signed-off-by: Adrian Edwards --- collectoss/application/paths.py | 7 ++++++- 1 file changed, 6 insertions(+), 1 deletion(-) diff --git a/collectoss/application/paths.py b/collectoss/application/paths.py index e65c7e311..a35209678 100644 --- a/collectoss/application/paths.py +++ b/collectoss/application/paths.py @@ -78,4 +78,9 @@ def get_logs_directory(self, create = True) -> Path: def get_cache_directory(self, create = True) -> Path: """Get the cache directory""" - return SystemEnv.get("CACHE_DATADIR") + env_path = _path_from_env(SystemEnv.get("COLLECTOSS_CACHE_DIRECTORY") or SystemEnv.get("CACHE_DATADIR")) + + return _verify_path( + self._build_path(env_path, self.dirs.user_cache_path), + create = create + ) From 2bc4b047f2aa6e6f733680910efa55ed2ef99994 Mon Sep 17 00:00:00 2001 From: Adrian Edwards Date: Tue, 23 Jun 2026 16:51:13 -0400 Subject: [PATCH 10/29] add function to print all paths Signed-off-by: Adrian Edwards --- collectoss/application/paths.py | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/collectoss/application/paths.py b/collectoss/application/paths.py index a35209678..a6966bfdd 100644 --- a/collectoss/application/paths.py +++ b/collectoss/application/paths.py @@ -84,3 +84,9 @@ def get_cache_directory(self, create = True) -> Path: self._build_path(env_path, self.dirs.user_cache_path), create = create ) + + def print_all_paths(self, logger): + logger.info(f"Facade directory: {self.get_facade_directory(create = False)}") + logger.info(f"Config directory: {self.get_config_directory(create = False)}") + logger.info(f"Logs directory: {self.get_logs_directory(create = False)}") + logger.info(f"Cache directory: {self.get_cache_directory(create = False)}") From c95a7409d50c97fc9b17966f8f7fad5684c6f4ca Mon Sep 17 00:00:00 2001 From: Adrian Edwards Date: Tue, 23 Jun 2026 17:04:56 -0400 Subject: [PATCH 11/29] make SystemPaths static Signed-off-by: Adrian Edwards --- collectoss/application/paths.py | 66 ++++++++++++++++++--------------- 1 file changed, 37 insertions(+), 29 deletions(-) diff --git a/collectoss/application/paths.py b/collectoss/application/paths.py index a6966bfdd..f81662750 100644 --- a/collectoss/application/paths.py +++ b/collectoss/application/paths.py @@ -20,6 +20,21 @@ def _path_from_env(env_value: str) -> Path: return None return Path(env_value) +def _build_path(env_path:str, default_path:Path) -> Path: + """Build a path from the environment variable or the default path. + + If the environment variable is an absolute path, return it. + If the environment variable is a relative path, resolve it against the home directory. + If the environment variable is not set, return the default path. + """ + if env_path is not None: + if env_path.is_absolute(): + return env_path + else: + return _path_from_env(SystemEnv.get("HOME")) / env_path + else: + return default_path + class SystemPaths: """Enable consistent storage and retrieval of filesystem paths needed by the system @@ -31,62 +46,55 @@ class SystemPaths: """ app_name = "CollectOSS" app_org = "CHAOSS" - # Automatically targets the proper OS directory and handles creation - dirs = PlatformDirs(app_name, app_org, ensure_exists=True) - def _build_path(self, env_path:str, default_path:Path) -> Path: - """Build a path from the environment variable or the default path. - - If the environment variable is an absolute path, return it. - If the environment variable is a relative path, resolve it against the home directory. - If the environment variable is not set, return the default path. - """ - if env_path is not None: - if env_path.is_absolute(): - return env_path - else: - return _path_from_env(SystemEnv.get("HOME")) / env_path - else: - return default_path + @staticmethod + def os_defaults(create = True) -> PlatformDirs: + """Get the set of conventional directories for the operating system""" + return PlatformDirs(SystemPaths.app_name, SystemPaths.app_org, ensure_exists=create) - def get_facade_directory(self, create = True) -> Path: + @staticmethod + def get_facade_directory(create = True) -> Path: """Get the facade directory""" env_path = _path_from_env(SystemEnv.get("COLLECTOSS_FACADE_REPO_DIRECTORY")) return _verify_path( - self._build_path(env_path, self.dirs.user_downloads_path / "collectoss_facade"), + _build_path(env_path, SystemPaths.os_defaults(create).user_downloads_path / "collectoss_facade"), create = create ) - def get_config_directory(self, create = True) -> Path: + @staticmethod + def get_config_directory(create = True) -> Path: """Get the config directory""" env_path = _path_from_env(SystemEnv.get("COLLECTOSS_CONFIG_DIRECTORY") or SystemEnv.get("CONFIG_DATADIR")) return _verify_path( - self._build_path(env_path, self.dirs.user_config_path), + _build_path(env_path, SystemPaths.os_defaults(create).user_config_path), create = create ) - def get_logs_directory(self, create = True) -> Path: + @staticmethod + def get_logs_directory(create = True) -> Path: """Get the logs directory""" env_path = _path_from_env(SystemEnv.get("COLLECTOSS_LOGS_DIRECTORY")) return _verify_path( - self._build_path(env_path, self.dirs.user_log_path), + _build_path(env_path, SystemPaths.os_defaults(create).user_log_path), create = create ) - def get_cache_directory(self, create = True) -> Path: + @staticmethod + def get_cache_directory(create = True) -> Path: """Get the cache directory""" env_path = _path_from_env(SystemEnv.get("COLLECTOSS_CACHE_DIRECTORY") or SystemEnv.get("CACHE_DATADIR")) return _verify_path( - self._build_path(env_path, self.dirs.user_cache_path), + _build_path(env_path, SystemPaths.os_defaults(create).user_cache_path), create = create ) - def print_all_paths(self, logger): - logger.info(f"Facade directory: {self.get_facade_directory(create = False)}") - logger.info(f"Config directory: {self.get_config_directory(create = False)}") - logger.info(f"Logs directory: {self.get_logs_directory(create = False)}") - logger.info(f"Cache directory: {self.get_cache_directory(create = False)}") + @staticmethod + def print_all_paths(logger): + logger.info(f"Facade directory: {SystemPaths.get_facade_directory(create = False)}") + logger.info(f"Config directory: {SystemPaths.get_config_directory(create = False)}") + logger.info(f"Logs directory: {SystemPaths.get_logs_directory(create = False)}") + logger.info(f"Cache directory: {SystemPaths.get_cache_directory(create = False)}") From 80fc2fd8f625916f01a717b9a6789213badfcc44 Mon Sep 17 00:00:00 2001 From: Adrian Edwards Date: Tue, 23 Jun 2026 17:14:47 -0400 Subject: [PATCH 12/29] simplify inclusion of home directory path Signed-off-by: Adrian Edwards --- collectoss/application/paths.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/collectoss/application/paths.py b/collectoss/application/paths.py index f81662750..32e9b2bc9 100644 --- a/collectoss/application/paths.py +++ b/collectoss/application/paths.py @@ -31,7 +31,7 @@ def _build_path(env_path:str, default_path:Path) -> Path: if env_path.is_absolute(): return env_path else: - return _path_from_env(SystemEnv.get("HOME")) / env_path + return Path.home() / env_path else: return default_path From 1abd230454adfb7cfbf4606552e46d2d3348d504 Mon Sep 17 00:00:00 2001 From: Adrian Edwards Date: Tue, 23 Jun 2026 17:27:19 -0400 Subject: [PATCH 13/29] unit testing and fixes for core path builder logic Signed-off-by: Adrian Edwards --- collectoss/application/paths.py | 15 ++++++++++++--- tests/test_classes/test_paths.py | 8 ++++++++ 2 files changed, 20 insertions(+), 3 deletions(-) create mode 100644 tests/test_classes/test_paths.py diff --git a/collectoss/application/paths.py b/collectoss/application/paths.py index 32e9b2bc9..1906618f1 100644 --- a/collectoss/application/paths.py +++ b/collectoss/application/paths.py @@ -2,6 +2,14 @@ from collectoss.application.environment import SystemEnv from pathlib import Path + +def _clean_path(path: Path | str) -> Path: + if path is None: + return None + if isinstance(path, str): + path = Path(path) + return path.expanduser().resolve() + def _verify_path(path: Path, create = True) -> Path: """Verify the path is a valid directory""" if create: @@ -9,7 +17,7 @@ def _verify_path(path: Path, create = True) -> Path: path.mkdir(parents=True) if not path.is_dir(): raise ValueError(f"Path {path} is not a valid directory") - return path.resolve() + return _clean_path(path) def _path_from_env(env_value: str) -> Path: @@ -28,10 +36,11 @@ def _build_path(env_path:str, default_path:Path) -> Path: If the environment variable is not set, return the default path. """ if env_path is not None: + env_path = Path(env_path) if env_path.is_absolute(): - return env_path + return _clean_path(env_path) else: - return Path.home() / env_path + return _clean_path(Path.home() / env_path) else: return default_path diff --git a/tests/test_classes/test_paths.py b/tests/test_classes/test_paths.py new file mode 100644 index 000000000..93fa2f233 --- /dev/null +++ b/tests/test_classes/test_paths.py @@ -0,0 +1,8 @@ +from collectoss.application.paths import _build_path, _clean_path +from pathlib import Path + +class TestBuildPath: + def test_build_path(self): + assert _build_path(None, Path("/path")) == Path("/path") + assert _build_path("collectoss", Path("/path")) == Path.home() / "collectoss" + assert _build_path("/collectoss", Path("/path")) == Path("/collectoss") \ No newline at end of file From e20ed8b3973e91ea9e862c0fe14b00c76f74bb5d Mon Sep 17 00:00:00 2001 From: Adrian Edwards Date: Tue, 23 Jun 2026 17:27:32 -0400 Subject: [PATCH 14/29] print system paths on startup Signed-off-by: Adrian Edwards --- docker/backend/preflight.py | 1 + 1 file changed, 1 insertion(+) diff --git a/docker/backend/preflight.py b/docker/backend/preflight.py index 4207db8b7..49d22dc04 100755 --- a/docker/backend/preflight.py +++ b/docker/backend/preflight.py @@ -31,5 +31,6 @@ warn_import_repos(logger) print_platform_information(logger) + SystemPaths().print_all_paths(logger) sys.exit(0) From 0bf3c9ba59d815c75cf370dc5ec6e6c4b0b75c87 Mon Sep 17 00:00:00 2001 From: Adrian Edwards Date: Wed, 24 Jun 2026 09:14:27 -0400 Subject: [PATCH 15/29] don't write facade and log dirs to the database if they aren't provided via env Signed-off-by: Adrian Edwards --- collectoss/util/startup.py | 8 +++++--- 1 file changed, 5 insertions(+), 3 deletions(-) diff --git a/collectoss/util/startup.py b/collectoss/util/startup.py index 8fe5b2374..2b62a1f55 100644 --- a/collectoss/util/startup.py +++ b/collectoss/util/startup.py @@ -213,9 +213,11 @@ def merge_config( augmented_config["Keys"] = keys - augmented_config["Facade"]["repo_directory"] = facade_repo_directory - - augmented_config["Logging"]["logs_directory"] = logs_directory or (ROOT_PROJECT_REPO_DIRECTORY + "/logs/") + if facade_repo_directory and facade_repo_directory != "": + augmented_config["Facade"]["repo_directory"] = facade_repo_directory + + if logs_directory and logs_directory != "": + augmented_config["Logging"]["logs_directory"] = logs_directory config.load_config_from_dict(augmented_config) From 84980bd4ba85b2345256dafb157fe8909c2593e3 Mon Sep 17 00:00:00 2001 From: Adrian Edwards Date: Wed, 24 Jun 2026 09:14:50 -0400 Subject: [PATCH 16/29] type tweaks for accuracy/documentation Signed-off-by: Adrian Edwards --- collectoss/application/paths.py | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/collectoss/application/paths.py b/collectoss/application/paths.py index 1906618f1..288715329 100644 --- a/collectoss/application/paths.py +++ b/collectoss/application/paths.py @@ -3,14 +3,14 @@ from pathlib import Path -def _clean_path(path: Path | str) -> Path: +def _clean_path(path: Path | str) -> Path | None: if path is None: return None if isinstance(path, str): path = Path(path) return path.expanduser().resolve() -def _verify_path(path: Path, create = True) -> Path: +def _verify_path(path: Path, create = True) -> Path | None: """Verify the path is a valid directory""" if create: if not path.exists(): @@ -20,7 +20,7 @@ def _verify_path(path: Path, create = True) -> Path: return _clean_path(path) -def _path_from_env(env_value: str) -> Path: +def _path_from_env(env_value: str) -> Path | None: """Get the path from the environment variable""" if env_value is None: return None From d0e889e096ea53000c1c7a3ba84aaafadc798869 Mon Sep 17 00:00:00 2001 From: Adrian Edwards Date: Wed, 24 Jun 2026 09:15:02 -0400 Subject: [PATCH 17/29] use logging for paths class Signed-off-by: Adrian Edwards --- collectoss/application/paths.py | 3 +++ 1 file changed, 3 insertions(+) diff --git a/collectoss/application/paths.py b/collectoss/application/paths.py index 288715329..472f05d39 100644 --- a/collectoss/application/paths.py +++ b/collectoss/application/paths.py @@ -2,6 +2,9 @@ from collectoss.application.environment import SystemEnv from pathlib import Path +import logging + +logger = logging.getLogger(__name__) def _clean_path(path: Path | str) -> Path | None: if path is None: From 75830bbc335254addc93ed73675c6a9c33cbd98b Mon Sep 17 00:00:00 2001 From: Adrian Edwards Date: Wed, 24 Jun 2026 09:17:05 -0400 Subject: [PATCH 18/29] read from database for facade and logs directories This is done as a special case for historical compatibility for users who are not yet using the config file. No additional paths should be moved to the database. Doing so, especially with the config directory, will potentially create a catch-22. Signed-off-by: Adrian Edwards --- collectoss/application/paths.py | 25 +++++++++++++++++++++---- 1 file changed, 21 insertions(+), 4 deletions(-) diff --git a/collectoss/application/paths.py b/collectoss/application/paths.py index 472f05d39..d28f50e1a 100644 --- a/collectoss/application/paths.py +++ b/collectoss/application/paths.py @@ -66,11 +66,20 @@ def os_defaults(create = True) -> PlatformDirs: @staticmethod def get_facade_directory(create = True) -> Path: - """Get the facade directory""" + """Get the facade directory. Requires database for historical compatibility""" env_path = _path_from_env(SystemEnv.get("COLLECTOSS_FACADE_REPO_DIRECTORY")) + database_path = None + + from collectoss.application.config import SystemConfig + from collectoss.application.db.session import DatabaseSession + from collectoss.application.db import get_engine + with DatabaseSession(logger, get_engine()) as session: + config = SystemConfig(logger, session) + database_path = config.get_value("Facade", "repo_directory") + return _verify_path( - _build_path(env_path, SystemPaths.os_defaults(create).user_downloads_path / "collectoss_facade"), + _build_path(env_path or database_path, SystemPaths.os_defaults(create).user_downloads_path / "collectoss_facade"), create = create ) @@ -86,11 +95,19 @@ def get_config_directory(create = True) -> Path: @staticmethod def get_logs_directory(create = True) -> Path: - """Get the logs directory""" + """Get the logs directory. Requires database for historical compatibility""" env_path = _path_from_env(SystemEnv.get("COLLECTOSS_LOGS_DIRECTORY")) + database_path = None + + from collectoss.application.config import SystemConfig + from collectoss.application.db.session import DatabaseSession + from collectoss.application.db import get_engine + with DatabaseSession(logger, get_engine()) as session: + config = SystemConfig(logger, session) + database_path = config.get_value("Logging", "logs_directory") return _verify_path( - _build_path(env_path, SystemPaths.os_defaults(create).user_log_path), + _build_path(env_path or database_path, SystemPaths.os_defaults(create).user_log_path), create = create ) From ac9c87868531c1d8a66229c4afc4fdf013575d68 Mon Sep 17 00:00:00 2001 From: Adrian Edwards Date: Wed, 24 Jun 2026 09:24:33 -0400 Subject: [PATCH 19/29] Replace uses of ROOT_PROJECT_REPO_DIRECTORY used for logging Signed-off-by: Adrian Edwards --- collectoss/api/gunicorn_conf.py | 10 ++-------- collectoss/application/logs.py | 16 +++++++--------- collectoss/util/startup.py | 2 -- 3 files changed, 9 insertions(+), 19 deletions(-) diff --git a/collectoss/api/gunicorn_conf.py b/collectoss/api/gunicorn_conf.py index ee7797471..23f24feae 100644 --- a/collectoss/api/gunicorn_conf.py +++ b/collectoss/api/gunicorn_conf.py @@ -1,4 +1,3 @@ -# from collectoss import ROOT_PROJECT_REPO_DIRECTORY import multiprocessing import logging import os @@ -8,16 +7,11 @@ from collectoss.application.db.lib import get_value from collectoss.application.db import dispose_database_engine from collectoss.application.environment import SystemEnv +from collectoss.application.paths import SystemPaths logger = logging.getLogger(__name__) -# ROOT_PROJECT_REPO_DIRECTORY = os.path.dirname(os.path.dirname(os.path.realpath(__file__))) - -# base_log_dir = ROOT_PROJECT_REPO_DIRECTORY + "/logs/" - -# Path(base_log_dir).mkdir(exist_ok=True) - workers = multiprocessing.cpu_count() * 2 + 1 umask = 0o007 reload = True @@ -39,7 +33,7 @@ del is_dev # set the log location for gunicorn -logs_directory = get_value('Logging', 'logs_directory') +logs_directory = SystemPaths.get_logs_directory() # this syntax satisfies the type checker is_docker = SystemEnv.get_bool("AUGUR_DOCKER_DEPLOY", False) diff --git a/collectoss/application/logs.py b/collectoss/application/logs.py index aaf6cb5d8..0c9aa2ef9 100644 --- a/collectoss/application/logs.py +++ b/collectoss/application/logs.py @@ -7,6 +7,7 @@ import os from pathlib import Path import shutil +from collectoss.application.paths import SystemPaths import coloredlogs from sqlalchemy.orm import Session @@ -14,9 +15,6 @@ from collectoss.application.config import convert_type_of_value from collectoss.application.db.util import execute_session_query -ROOT_PROJECT_REPO_DIRECTORY = os.path.dirname(os.path.dirname(os.path.dirname(os.path.realpath(__file__)))) - - SIMPLE_FORMAT_STRING = "[%(process)d] %(name)s [%(levelname)s] %(message)s" VERBOSE_FORMAT_STRING = "%(asctime)s,%(msecs)dms [PID: %(process)d] %(name)s [%(levelname)s] %(message)s" CLI_FORMAT_STRING = "CLI: [%(module)s.%(funcName)s] [%(levelname)s] %(message)s" @@ -117,12 +115,12 @@ def get_log_config(): #TODO dynamically define loggers for every task names. class TaskLogConfig(): - def __init__(self, all_tasks, disable_log_files=False,reset_logfiles=False,base_log_dir=ROOT_PROJECT_REPO_DIRECTORY + "/logs/"): + def __init__(self, all_tasks, disable_log_files=False, reset_logfiles=False, base_log_dir=None): log_config = get_log_config() - if log_config["logs_directory"] != "": - base_log_dir=log_config["logs_directory"] + if not base_log_dir: + base_log_dir = SystemPaths.get_logs_directory() if reset_logfiles is True: try: @@ -188,12 +186,12 @@ def getLoggerNames(self): class SystemLogger(): - def __init__(self, logger_name, disable_log_files=False,reset_logfiles=False,base_log_dir=ROOT_PROJECT_REPO_DIRECTORY + "/logs/"): + def __init__(self, logger_name, disable_log_files=False, reset_logfiles=False, base_log_dir=None): log_config = get_log_config() - if log_config.get("logs_directory", "") != "": - base_log_dir=log_config.get("logs_directory") + if not base_log_dir: + base_log_dir = SystemPaths.get_logs_directory() if reset_logfiles is True: try: diff --git a/collectoss/util/startup.py b/collectoss/util/startup.py index 2b62a1f55..a9bd38487 100644 --- a/collectoss/util/startup.py +++ b/collectoss/util/startup.py @@ -17,8 +17,6 @@ from collectoss.util.inspect_without_import import get_phase_names_without_import -ROOT_PROJECT_REPO_DIRECTORY = os.path.dirname(os.path.dirname(os.path.dirname(os.path.dirname(os.path.realpath(__file__))))) - def check_init_schema(): """Initialize the CollectOSS database schema as appropriate """ From 4cc2178b72d6887c8d48da9c3d1c7da18b3619d2 Mon Sep 17 00:00:00 2001 From: Adrian Edwards Date: Wed, 24 Jun 2026 09:42:44 -0400 Subject: [PATCH 20/29] add models and discourse analysis paths to the SystemPaths object Signed-off-by: Adrian Edwards --- collectoss/application/paths.py | 38 +++++++++++++++++++++++++++++++++ 1 file changed, 38 insertions(+) diff --git a/collectoss/application/paths.py b/collectoss/application/paths.py index d28f50e1a..250827b17 100644 --- a/collectoss/application/paths.py +++ b/collectoss/application/paths.py @@ -121,9 +121,47 @@ def get_cache_directory(create = True) -> Path: create = create ) + + @staticmethod + def get_models_directory(create = True) -> Path: + """Get the models directory. Requires database for historical compatibility""" + database_dirname = None + + from collectoss.application.config import SystemConfig + from collectoss.application.db.session import DatabaseSession + from collectoss.application.db import get_engine + with DatabaseSession(logger, get_engine()) as session: + config = SystemConfig(logger, session) + database_dirname = config.get_value("Message_Insights", 'models_dir') or "message_models" + + return _verify_path( + SystemPaths.os_defaults(create).user_data_path / "tasks" / "data_analysis" / "message_insights" / database_dirname, + create = create + ) + + @staticmethod + def get_model_training_data_directory(create = True) -> Path: + """Get the model training data directory""" + return _verify_path( + SystemPaths.os_defaults(create).user_data_path / "tasks" / "data_analysis" / "message_insights" / "train_data", + create = create + ) + + @staticmethod + def get_discourse_analysis_directory(create = True) -> Path: + """Get the discourse analysis directory""" + + return _verify_path( + SystemPaths.os_defaults(create).user_data_path / "tasks" / "data_analysis" / "discourse_analysis", + create = create + ) + @staticmethod def print_all_paths(logger): logger.info(f"Facade directory: {SystemPaths.get_facade_directory(create = False)}") logger.info(f"Config directory: {SystemPaths.get_config_directory(create = False)}") logger.info(f"Logs directory: {SystemPaths.get_logs_directory(create = False)}") logger.info(f"Cache directory: {SystemPaths.get_cache_directory(create = False)}") + logger.info(f"Models directory: {SystemPaths.get_models_directory(create = False)}") + logger.info(f"Model training data directory: {SystemPaths.get_model_training_data_directory(create = False)}") + logger.info(f"Discourse analysis directory: {SystemPaths.get_discourse_analysis_directory(create = False)}") \ No newline at end of file From e942c82c1c23e3528838ca9c734022dab662cba9 Mon Sep 17 00:00:00 2001 From: Adrian Edwards Date: Wed, 24 Jun 2026 09:57:21 -0400 Subject: [PATCH 21/29] add install path to SystemPaths This may be useful for other parts of CollectOSS to initialize various directories from a default set of files Signed-off-by: Adrian Edwards --- collectoss/application/paths.py | 8 ++++++++ 1 file changed, 8 insertions(+) diff --git a/collectoss/application/paths.py b/collectoss/application/paths.py index 250827b17..7b713bc1d 100644 --- a/collectoss/application/paths.py +++ b/collectoss/application/paths.py @@ -155,9 +155,17 @@ def get_discourse_analysis_directory(create = True) -> Path: SystemPaths.os_defaults(create).user_data_path / "tasks" / "data_analysis" / "discourse_analysis", create = create ) + + @staticmethod + def get_install_path() -> Path: + """Get the path that CollectOSS is currently installed to. This should be treated as read- only.""" + # This paths file is only one level below the root of the module. + # accessing above that is not possible as the module could be installed separately + return _verify_path(Path(__file__).parent, create = False) @staticmethod def print_all_paths(logger): + logger.info(f"Install path: {SystemPaths.get_install_path()}") logger.info(f"Facade directory: {SystemPaths.get_facade_directory(create = False)}") logger.info(f"Config directory: {SystemPaths.get_config_directory(create = False)}") logger.info(f"Logs directory: {SystemPaths.get_logs_directory(create = False)}") From 1a7b8af6a4b83a36ebdb5eb567e7b66f4823596c Mon Sep 17 00:00:00 2001 From: Adrian Edwards Date: Wed, 24 Jun 2026 09:58:30 -0400 Subject: [PATCH 22/29] allow the analysis directories to be somewhat controllable with an environment variable Signed-off-by: Adrian Edwards --- collectoss/application/paths.py | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) diff --git a/collectoss/application/paths.py b/collectoss/application/paths.py index 7b713bc1d..e803a8248 100644 --- a/collectoss/application/paths.py +++ b/collectoss/application/paths.py @@ -142,17 +142,18 @@ def get_models_directory(create = True) -> Path: @staticmethod def get_model_training_data_directory(create = True) -> Path: """Get the model training data directory""" + env_path = _path_from_env(SystemEnv.get("COLLECTOSS_ANALYSIS_DIRECTORY")) return _verify_path( - SystemPaths.os_defaults(create).user_data_path / "tasks" / "data_analysis" / "message_insights" / "train_data", + _build_path(env_path / "message_insights" / "train_data", SystemPaths.os_defaults(create).user_data_path / "tasks" / "data_analysis" / "message_insights" / "train_data"), create = create ) @staticmethod def get_discourse_analysis_directory(create = True) -> Path: """Get the discourse analysis directory""" - + env_path = _path_from_env(SystemEnv.get("COLLECTOSS_ANALYSIS_DIRECTORY")) return _verify_path( - SystemPaths.os_defaults(create).user_data_path / "tasks" / "data_analysis" / "discourse_analysis", + _build_path(env_path / "discourse_analysis", SystemPaths.os_defaults(create).user_data_path / "tasks" / "data_analysis" / "discourse_analysis"), create = create ) From ef3de93cedc4bd41ec9fce4d3ac6b11bb2fd2c2b Mon Sep 17 00:00:00 2001 From: Adrian Edwards Date: Wed, 24 Jun 2026 10:01:45 -0400 Subject: [PATCH 23/29] replace all remaining uses of ROOT_PROJECT_REPO_DIRECTORY with a call to SystemPaths Signed-off-by: Adrian Edwards --- collectoss/tasks/data_analysis/discourse_analysis/tasks.py | 4 ++-- .../tasks/data_analysis/message_insights/message_novelty.py | 6 ++---- .../data_analysis/message_insights/message_sentiment.py | 5 ++--- collectoss/tasks/data_analysis/message_insights/tasks.py | 5 ++--- .../data_analysis/pull_request_analysis_worker/tasks.py | 5 ++--- 5 files changed, 10 insertions(+), 15 deletions(-) diff --git a/collectoss/tasks/data_analysis/discourse_analysis/tasks.py b/collectoss/tasks/data_analysis/discourse_analysis/tasks.py index fccf169e8..b026f2774 100644 --- a/collectoss/tasks/data_analysis/discourse_analysis/tasks.py +++ b/collectoss/tasks/data_analysis/discourse_analysis/tasks.py @@ -1,4 +1,5 @@ import logging +from collectoss.application.paths import SystemPaths import sqlalchemy as s import pandas as pd import pickle @@ -29,8 +30,7 @@ # from os import path stemmer = nltk.stem.snowball.SnowballStemmer("english") -ROOT_PROJECT_REPO_DIRECTORY = os.path.dirname(os.path.dirname(os.path.dirname(os.path.dirname(os.path.realpath(__file__))))) -DISCOURSE_ANALYSIS_DIR = f"{ROOT_PROJECT_REPO_DIRECTORY}/tasks/data_analysis/discourse_analysis/" +DISCOURSE_ANALYSIS_DIR = SystemPaths.get_discourse_analysis_directory() @celery.task(base=MLRepoCollectionTask, bind=True) def discourse_analysis_task(self, repo_git): diff --git a/collectoss/tasks/data_analysis/message_insights/message_novelty.py b/collectoss/tasks/data_analysis/message_insights/message_novelty.py index bb5531b04..dfed5a3fd 100644 --- a/collectoss/tasks/data_analysis/message_insights/message_novelty.py +++ b/collectoss/tasks/data_analysis/message_insights/message_novelty.py @@ -5,6 +5,7 @@ import os from datetime import datetime, timedelta +from collectoss.application.paths import SystemPaths import numpy as np import pandas as pd from gensim.models.doc2vec import Doc2Vec, TaggedDocument @@ -16,10 +17,7 @@ from collectoss.tasks.data_analysis.message_insights.preprocess_text import \ normalize_corpus as normalize_corpus -ROOT_PROJECT_REPO_DIRECTORY = os.path.dirname(os.path.dirname(os.path.dirname(os.path.dirname(os.path.realpath(__file__))))) - - -train_path = os.path.join(ROOT_PROJECT_REPO_DIRECTORY, "tasks", "data_analysis", "message_insights", "train_data") +train_path = SystemPaths.get_model_training_data_directory() # ''' Doc2Vec model training diff --git a/collectoss/tasks/data_analysis/message_insights/message_sentiment.py b/collectoss/tasks/data_analysis/message_insights/message_sentiment.py index 4ce60c7d6..6bc25680e 100644 --- a/collectoss/tasks/data_analysis/message_insights/message_sentiment.py +++ b/collectoss/tasks/data_analysis/message_insights/message_sentiment.py @@ -10,6 +10,7 @@ import warnings from statistics import mean +from collectoss.application.paths import SystemPaths import emoji import joblib import nltk @@ -30,11 +31,9 @@ warnings.filterwarnings('ignore') -ROOT_PROJECT_REPO_DIRECTORY = os.path.dirname(os.path.dirname(os.path.dirname(os.path.dirname(os.path.realpath(__file__))))) - CONTRACTION_MAP = contraction_map -train_path = os.path.join(ROOT_PROJECT_REPO_DIRECTORY, "tasks", "data_analysis", "message_insights", "train_data") +train_path = SystemPaths.get_model_training_data_directory() def replace_all(text, dic): if(sys.version_info[0] < 3): diff --git a/collectoss/tasks/data_analysis/message_insights/tasks.py b/collectoss/tasks/data_analysis/message_insights/tasks.py index 751da1ea7..1e3abb21f 100644 --- a/collectoss/tasks/data_analysis/message_insights/tasks.py +++ b/collectoss/tasks/data_analysis/message_insights/tasks.py @@ -1,6 +1,7 @@ import datetime import logging import os +from collectoss.application.paths import SystemPaths import numpy as np import pandas as pd import requests @@ -18,8 +19,6 @@ #SPDX-License-Identifier: MIT -ROOT_PROJECT_REPO_DIRECTORY = os.path.dirname(os.path.dirname(os.path.dirname(os.path.dirname(os.path.realpath(__file__))))) - @celery.task(base=MLRepoCollectionTask, bind=True) def message_insight_task(self, repo_git): @@ -45,7 +44,7 @@ def message_insight_model(repo_git: str,logger,engine) -> None: repo = get_repo_by_repo_git(repo_git) repo_id = repo.repo_id - models_dir = os.path.join(ROOT_PROJECT_REPO_DIRECTORY, "tasks", "data_analysis", "message_insights", get_value("Message_Insights", 'models_dir')) + models_dir = SystemPaths.get_models_directory() insight_days = get_value("Message_Insights", 'insight_days') # Any initial database instructions, like finding the last tuple inserted or generate the next ID value diff --git a/collectoss/tasks/data_analysis/pull_request_analysis_worker/tasks.py b/collectoss/tasks/data_analysis/pull_request_analysis_worker/tasks.py index 34512fbc9..b3ce827c8 100644 --- a/collectoss/tasks/data_analysis/pull_request_analysis_worker/tasks.py +++ b/collectoss/tasks/data_analysis/pull_request_analysis_worker/tasks.py @@ -2,6 +2,7 @@ import os import datetime +from collectoss.application.paths import SystemPaths import joblib import pandas as pd import sqlalchemy as s @@ -18,8 +19,6 @@ # from sklearn.preprocessing import LabelEncoder, MinMaxScaler # from xgboost import XGBClassifier -ROOT_PROJECT_REPO_DIRECTORY = os.path.dirname(os.path.dirname(os.path.dirname(os.path.dirname(os.path.realpath(__file__))))) - @celery.task(base=MLRepoCollectionTask, bind=True) def pull_request_analysis_task(self, repo_git): @@ -40,7 +39,7 @@ def pull_request_analysis_model(repo_git: str,logger,engine) -> None: repo_id = get_repo_by_repo_git(repo_git).repo_id - senti_models_dir = os.path.join(ROOT_PROJECT_REPO_DIRECTORY, "tasks", "data_analysis", "message_insights", get_value("Message_Insights", 'models_dir')) + senti_models_dir = SystemPaths.get_models_directory() logger.info(f'Sentiment model dir located - {senti_models_dir}') From ced6645d8235268a1b087637701abf9abf10e1b8 Mon Sep 17 00:00:00 2001 From: Adrian Edwards Date: Wed, 24 Jun 2026 10:05:41 -0400 Subject: [PATCH 24/29] use SystemPaths for remaining log and facade directory-building too Signed-off-by: Adrian Edwards --- collectoss/application/cli/backend.py | 3 ++- collectoss/tasks/git/dependency_libyear_tasks/core.py | 3 ++- collectoss/tasks/git/dependency_tasks/core.py | 3 ++- collectoss/tasks/git/scc_value_tasks/core.py | 3 ++- 4 files changed, 8 insertions(+), 4 deletions(-) diff --git a/collectoss/application/cli/backend.py b/collectoss/application/cli/backend.py index 3526a3c2c..a621a0675 100644 --- a/collectoss/application/cli/backend.py +++ b/collectoss/application/cli/backend.py @@ -13,6 +13,7 @@ import uuid import traceback import requests +from collectoss.application.paths import SystemPaths from redis.exceptions import ConnectionError as RedisConnectionError from collectoss.application.environment import SystemEnv @@ -101,7 +102,7 @@ def start(ctx, disable_collection, development, pidfile, port): cleanup_collection_status_and_rabbit(logger, ctx.obj.engine) # Retrieve the log directory from the configuration or default to current directory - log_dir = get_value("Logging", "logs_directory") or "." + log_dir = SystemPaths.get_logs_directory() gunicorn_log_file = os.path.join(log_dir, "gunicorn.log") gunicorn_command = f"gunicorn -c {gunicorn_location} -b {host}:{port} collectoss.api.server:app --log-file {gunicorn_log_file}" diff --git a/collectoss/tasks/git/dependency_libyear_tasks/core.py b/collectoss/tasks/git/dependency_libyear_tasks/core.py index 56b8f1a5b..13cea4166 100644 --- a/collectoss/tasks/git/dependency_libyear_tasks/core.py +++ b/collectoss/tasks/git/dependency_libyear_tasks/core.py @@ -1,6 +1,7 @@ from datetime import datetime from collectoss.application.db.models import * from collectoss.application.db.lib import get_value, bulk_insert_dicts, get_repo_by_repo_git +from collectoss.application.paths import SystemPaths from collectoss.tasks.git.dependency_libyear_tasks.libyear_util.util import get_deps_libyear_data from collectoss.tasks.git.util.facade_worker.facade_worker.utilitymethods import get_absolute_repo_path @@ -15,7 +16,7 @@ def deps_libyear_model(logger,repo_git): repo = get_repo_by_repo_git(repo_git) - absolute_repo_path = get_absolute_repo_path(get_value("Facade", "repo_directory"),repo.repo_id,repo.repo_path,repo.repo_name) + absolute_repo_path = get_absolute_repo_path(SystemPaths.get_facade_directory(),repo.repo_id,repo.repo_path,repo.repo_name) #config.get_section("Facade")['repo_directory'] + relative_repo_path#self.config['repo_directory'] + relative_repo_path generate_deps_libyear_data(logger, repo.repo_id, absolute_repo_path) diff --git a/collectoss/tasks/git/dependency_tasks/core.py b/collectoss/tasks/git/dependency_tasks/core.py index 0648231b0..1c4f90ed8 100644 --- a/collectoss/tasks/git/dependency_tasks/core.py +++ b/collectoss/tasks/git/dependency_tasks/core.py @@ -3,6 +3,7 @@ from collectoss.application.db.models import * from collectoss.application.db.lib import bulk_insert_dicts, get_repo_by_repo_git, get_value from collectoss.application.environment import SystemEnv +from collectoss.application.paths import SystemPaths from collectoss.tasks.github.util.github_api_key_handler import GithubApiKeyHandler from collectoss.tasks.git.dependency_tasks.dependency_util import dependency_calculator as dep_calc from collectoss.tasks.util.worker_util import parse_json_from_subprocess_call @@ -22,7 +23,7 @@ def generate_deps_data(logger, repo_git): repo = get_repo_by_repo_git(repo_git) repo_id = repo.repo_id - path = get_absolute_repo_path(get_value("Facade", "repo_directory"),repo.repo_id,repo.repo_path,repo.repo_name) + path = get_absolute_repo_path(SystemPaths.get_facade_directory(),repo.repo_id,repo.repo_path,repo.repo_name) logger.debug(f"This is the deps model repo: {repo_git}.") diff --git a/collectoss/tasks/git/scc_value_tasks/core.py b/collectoss/tasks/git/scc_value_tasks/core.py index 770165522..b665beb91 100644 --- a/collectoss/tasks/git/scc_value_tasks/core.py +++ b/collectoss/tasks/git/scc_value_tasks/core.py @@ -3,6 +3,7 @@ from collectoss.application.db.models import * from collectoss.application.db.lib import bulk_insert_dicts, get_repo_by_repo_git, get_value from collectoss.application.environment import SystemEnv +from collectoss.application.paths import SystemPaths from collectoss.tasks.util.worker_util import parse_json_from_subprocess_call from collectoss.tasks.git.util.facade_worker.facade_worker.utilitymethods import get_absolute_repo_path @@ -15,7 +16,7 @@ def value_model(logger,repo_git): repo = get_repo_by_repo_git(repo_git) repo_id = repo.repo_id - path = get_absolute_repo_path(get_value("Facade", "repo_directory"),repo_id,repo.repo_path,repo.repo_name) + path = get_absolute_repo_path(SystemPaths.get_facade_directory(),repo_id,repo.repo_path,repo.repo_name) logger.info('Generating value data for repo') logger.info(f"Repo ID: {repo_id}, Path: {path}") From aa7699b17d43350b7a4fd2fcf4fb5ece9714e385 Mon Sep 17 00:00:00 2001 From: Adrian Edwards Date: Wed, 24 Jun 2026 10:14:37 -0400 Subject: [PATCH 25/29] swap path in facade helper Signed-off-by: Adrian Edwards --- .../tasks/git/util/facade_worker/facade_worker/config.py | 8 +++----- 1 file changed, 3 insertions(+), 5 deletions(-) diff --git a/collectoss/tasks/git/util/facade_worker/facade_worker/config.py b/collectoss/tasks/git/util/facade_worker/facade_worker/config.py index 2b536a3a4..b546df243 100644 --- a/collectoss/tasks/git/util/facade_worker/facade_worker/config.py +++ b/collectoss/tasks/git/util/facade_worker/facade_worker/config.py @@ -31,6 +31,7 @@ import random import subprocess from urllib.parse import urlparse +from collectoss.application.paths import SystemPaths import sqlalchemy as s from sqlalchemy.exc import OperationalError from psycopg2.errors import DeadlockDetected @@ -140,15 +141,12 @@ def __init__(self,logger: Logger): self.tool_version = "1.4.4" # Get the location of the directory where git repos are stored - if 'repo_directory' in worker_options: - self.repo_base_directory = worker_options['repo_directory'] - else: - self.repo_base_directory = None + self.repo_base_directory = SystemPaths.get_facade_directory() # Determine if it's safe to start the script current_status = self.get_setting('utility_status') - if len(self.repo_base_directory) == 0: + if len(str(self.repo_base_directory)) == 0: self.cfg.log_activity('Error','No base directory. It is unsafe to continue.') raise Exception('Failed: No base directory') From dd0557bd57d60fd6e91277f12caa082fd1afd1f3 Mon Sep 17 00:00:00 2001 From: Adrian Edwards Date: Wed, 24 Jun 2026 10:15:20 -0400 Subject: [PATCH 26/29] deprecate get_absolute_repo_path in favor of SystemPaths.facade_repo_path Signed-off-by: Adrian Edwards --- collectoss/application/paths.py | 5 +++++ .../git/util/facade_worker/facade_worker/utilitymethods.py | 1 + 2 files changed, 6 insertions(+) diff --git a/collectoss/application/paths.py b/collectoss/application/paths.py index e803a8248..3d810ff9a 100644 --- a/collectoss/application/paths.py +++ b/collectoss/application/paths.py @@ -82,6 +82,11 @@ def get_facade_directory(create = True) -> Path: _build_path(env_path or database_path, SystemPaths.os_defaults(create).user_downloads_path / "collectoss_facade"), create = create ) + + @staticmethod + def facade_repo_path(repo) -> Path: + """Get the path to a specific facade repository""" + return SystemPaths.get_facade_directory() / f"{repo.repo_id}-{repo.repo_path}/{repo.repo_name}" @staticmethod def get_config_directory(create = True) -> Path: diff --git a/collectoss/tasks/git/util/facade_worker/facade_worker/utilitymethods.py b/collectoss/tasks/git/util/facade_worker/facade_worker/utilitymethods.py index afba70fa2..5d5e3dfcc 100644 --- a/collectoss/tasks/git/util/facade_worker/facade_worker/utilitymethods.py +++ b/collectoss/tasks/git/util/facade_worker/facade_worker/utilitymethods.py @@ -102,6 +102,7 @@ def trim_author(facade_helper, email): facade_helper.log_activity('Debug',f"Trimmed working author: {email}") +@deprecated("This method of getting the absolute repo path is deprecated. Use SystemPaths.facade_repo_path(repo) instead.") def get_absolute_repo_path(repo_base_dir, repo_id, repo_path,repo_name): return f"{repo_base_dir}{repo_id}-{repo_path}/{repo_name}" From 8c5675b240fa8656c0ced8958fad39361a8ddbc8 Mon Sep 17 00:00:00 2001 From: Adrian Edwards Date: Wed, 24 Jun 2026 10:17:53 -0400 Subject: [PATCH 27/29] fix syntax for path printing on startup Signed-off-by: Adrian Edwards --- docker/backend/preflight.py | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/docker/backend/preflight.py b/docker/backend/preflight.py index 49d22dc04..10f7130aa 100755 --- a/docker/backend/preflight.py +++ b/docker/backend/preflight.py @@ -1,3 +1,4 @@ +from collectoss.application.paths import SystemPaths from collectoss.util.startup import collect_env_variables, check_init_schema, check_update_schema, setup_facade_directory, merge_config, warn_import_repos, print_platform_information from collectoss.application.logs import getFormatter from collectoss.application.cli import DatabaseContext @@ -31,6 +32,6 @@ warn_import_repos(logger) print_platform_information(logger) - SystemPaths().print_all_paths(logger) + SystemPaths.print_all_paths(logger) sys.exit(0) From 24be1e94f3e9731a1d87f0b7020638457e8a44c0 Mon Sep 17 00:00:00 2001 From: Adrian Edwards Date: Wed, 24 Jun 2026 11:34:40 -0400 Subject: [PATCH 28/29] remove unused imports suggested by reviewdog Signed-off-by: Adrian Edwards --- collectoss/api/gunicorn_conf.py | 1 - .../tasks/data_analysis/pull_request_analysis_worker/tasks.py | 1 - collectoss/tasks/git/dependency_libyear_tasks/core.py | 2 +- collectoss/tasks/git/dependency_tasks/core.py | 2 +- collectoss/tasks/git/scc_value_tasks/core.py | 2 +- tests/test_classes/test_paths.py | 2 +- 6 files changed, 4 insertions(+), 6 deletions(-) diff --git a/collectoss/api/gunicorn_conf.py b/collectoss/api/gunicorn_conf.py index 23f24feae..f4eb6216d 100644 --- a/collectoss/api/gunicorn_conf.py +++ b/collectoss/api/gunicorn_conf.py @@ -1,6 +1,5 @@ import multiprocessing import logging -import os from pathlib import Path from glob import glob diff --git a/collectoss/tasks/data_analysis/pull_request_analysis_worker/tasks.py b/collectoss/tasks/data_analysis/pull_request_analysis_worker/tasks.py index b3ce827c8..bd4c3d1fa 100644 --- a/collectoss/tasks/data_analysis/pull_request_analysis_worker/tasks.py +++ b/collectoss/tasks/data_analysis/pull_request_analysis_worker/tasks.py @@ -1,5 +1,4 @@ import logging -import os import datetime from collectoss.application.paths import SystemPaths diff --git a/collectoss/tasks/git/dependency_libyear_tasks/core.py b/collectoss/tasks/git/dependency_libyear_tasks/core.py index 13cea4166..fb6ec3f7b 100644 --- a/collectoss/tasks/git/dependency_libyear_tasks/core.py +++ b/collectoss/tasks/git/dependency_libyear_tasks/core.py @@ -1,6 +1,6 @@ from datetime import datetime from collectoss.application.db.models import * -from collectoss.application.db.lib import get_value, bulk_insert_dicts, get_repo_by_repo_git +from collectoss.application.db.lib import bulk_insert_dicts, get_repo_by_repo_git from collectoss.application.paths import SystemPaths from collectoss.tasks.git.dependency_libyear_tasks.libyear_util.util import get_deps_libyear_data from collectoss.tasks.git.util.facade_worker.facade_worker.utilitymethods import get_absolute_repo_path diff --git a/collectoss/tasks/git/dependency_tasks/core.py b/collectoss/tasks/git/dependency_tasks/core.py index 1c4f90ed8..78c797b22 100644 --- a/collectoss/tasks/git/dependency_tasks/core.py +++ b/collectoss/tasks/git/dependency_tasks/core.py @@ -1,7 +1,7 @@ from datetime import datetime import os from collectoss.application.db.models import * -from collectoss.application.db.lib import bulk_insert_dicts, get_repo_by_repo_git, get_value +from collectoss.application.db.lib import bulk_insert_dicts, get_repo_by_repo_git from collectoss.application.environment import SystemEnv from collectoss.application.paths import SystemPaths from collectoss.tasks.github.util.github_api_key_handler import GithubApiKeyHandler diff --git a/collectoss/tasks/git/scc_value_tasks/core.py b/collectoss/tasks/git/scc_value_tasks/core.py index b665beb91..63d9f079c 100644 --- a/collectoss/tasks/git/scc_value_tasks/core.py +++ b/collectoss/tasks/git/scc_value_tasks/core.py @@ -1,7 +1,7 @@ from datetime import datetime import os from collectoss.application.db.models import * -from collectoss.application.db.lib import bulk_insert_dicts, get_repo_by_repo_git, get_value +from collectoss.application.db.lib import bulk_insert_dicts, get_repo_by_repo_git from collectoss.application.environment import SystemEnv from collectoss.application.paths import SystemPaths from collectoss.tasks.util.worker_util import parse_json_from_subprocess_call diff --git a/tests/test_classes/test_paths.py b/tests/test_classes/test_paths.py index 93fa2f233..d1ac250dd 100644 --- a/tests/test_classes/test_paths.py +++ b/tests/test_classes/test_paths.py @@ -1,4 +1,4 @@ -from collectoss.application.paths import _build_path, _clean_path +from collectoss.application.paths import _build_path from pathlib import Path class TestBuildPath: From be9e1ec645ed185c77dc25a09aba31cfa17a847a Mon Sep 17 00:00:00 2001 From: Adrian Edwards Date: Wed, 24 Jun 2026 12:51:58 -0400 Subject: [PATCH 29/29] ensure the path is getting resolved the same way in startup.py Signed-off-by: Adrian Edwards --- collectoss/util/startup.py | 10 ++-------- 1 file changed, 2 insertions(+), 8 deletions(-) diff --git a/collectoss/util/startup.py b/collectoss/util/startup.py index a9bd38487..4b61888af 100644 --- a/collectoss/util/startup.py +++ b/collectoss/util/startup.py @@ -9,6 +9,7 @@ import platform import sys +from collectoss.application.paths import SystemPaths from sqlalchemy.orm.attributes import get_history from collectoss.application.config import SystemConfig from collectoss.application.db.session import DatabaseSession @@ -101,14 +102,7 @@ def collect_env_variables(logger): def setup_facade_directory(logger): """Perform permission checks and create the facade directory if it doesnt exist """ - - facade_directory_path = SystemEnv.get("COLLECTOSS_FACADE_REPO_DIRECTORY") or "/collectoss/facade/" - - facade_directory = Path(facade_directory_path) - - if not facade_directory.exists(): - logger.debug(f"Specified facade directory {facade_directory_path} does not exist. Creating...") - facade_directory.mkdir() + facade_directory = SystemPaths.get_facade_directory() git_credentials = facade_directory.joinpath(".git-credentials") git_credentials.touch(exist_ok=True)