Skip to content

Commit 1e330f8

Browse files
committed
Implement resource submission optimization in DatavicODPTheme plugin
- Introduced logging for better traceability. - Added a new method `_submit_new_resources_only` to submit only newly added resources during dataset updates. - Enhanced `_trigger_after_resource_create` to utilize the new resource submission logic. - Implemented `_get_previous_resource_ids` to retrieve previous resource IDs for comparison. - Updated `_infer_format_and_submit` to ensure resource format is inferred correctly before submission.
1 parent e5bacad commit 1e330f8

1 file changed

Lines changed: 94 additions & 15 deletions

File tree

ckanext/datavic_odp_theme/plugin.py

Lines changed: 94 additions & 15 deletions
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,6 @@
11
from __future__ import annotations
22

3+
import logging
34
from typing import Any, Optional
45

56
from flask import Response, session
@@ -15,6 +16,8 @@
1516
from ckanext.datavic_odp_theme.views import get_blueprints
1617
from ckanext.datavic_odp_theme.helpers import get_helpers, group_list
1718

19+
log = logging.getLogger(__name__)
20+
1821

1922
class DatavicODPTheme(p.SingletonPlugin):
2023
p.implements(p.IConfigurer)
@@ -199,33 +202,109 @@ def after_dataset_create(self, context, pkg_dict):
199202
group.add_package_by_name(pkg_dict.get('name', None))
200203

201204
def after_dataset_update(self, context, pkg_dict):
202-
self._trigger_after_resource_create(pkg_dict)
205+
self._submit_new_resources_only(pkg_dict)
206+
203207
group_id = pkg_dict.get('category', None)
204208
if group_id:
205209
group = model.Group.get(group_id)
206210
groups = context.get('package').get_groups('group')
207211
if group not in groups:
208212
group.add_package_by_name(pkg_dict.get('name'))
209213

214+
def _submit_new_resources_only(self, pkg_dict):
215+
"""Submit only newly added resources to xloader during dataset update.
216+
217+
Compares current resource IDs against the previous activity snapshot
218+
to detect new resources. URL changes for existing resources are
219+
handled by the parent xloaderPlugin via ``notify()``.
220+
221+
Falls back to submitting all resources if no activity data is
222+
available (e.g. activity plugin disabled, data migration).
223+
"""
224+
current_resources = pkg_dict.get("resources", [])
225+
current_res_ids = {
226+
r.get("id") for r in current_resources if r.get("id")
227+
}
228+
229+
previous_res_ids = self._get_previous_resource_ids(pkg_dict.get("id"))
230+
231+
if previous_res_ids is None:
232+
log.info(
233+
"No previous activity for package %s — "
234+
"submitting all %d resources",
235+
pkg_dict.get("id"),
236+
len(current_resources),
237+
)
238+
for resource in current_resources:
239+
self._infer_format_and_submit(resource)
240+
return
241+
242+
new_res_ids = current_res_ids - previous_res_ids
243+
244+
if not new_res_ids:
245+
return
246+
247+
log.info(
248+
"Detected %d new resource(s) for package %s: %s",
249+
len(new_res_ids),
250+
pkg_dict.get("id"),
251+
new_res_ids,
252+
)
253+
254+
for resource in current_resources:
255+
if resource.get("id") in new_res_ids:
256+
self._infer_format_and_submit(resource)
257+
258+
def _get_previous_resource_ids(self, pkg_id):
259+
"""Return resource IDs from the most recent activity, or ``None``
260+
if unavailable.
261+
"""
262+
if not pkg_id or not p.plugin_loaded("activity"):
263+
return None
264+
265+
try:
266+
activities = tk.get_action("package_activity_list")(
267+
{"ignore_auth": True},
268+
{
269+
"id": pkg_id,
270+
"limit": 1,
271+
"include_hidden_activity": True,
272+
},
273+
)
274+
except Exception:
275+
return None
276+
277+
if not activities:
278+
return None
279+
280+
prev_pkg = activities[0].get("data", {}).get("package", {})
281+
prev_resources = prev_pkg.get("resources", [])
282+
return {r.get("id") for r in prev_resources if r.get("id")}
283+
284+
def _infer_format_and_submit(self, resource):
285+
"""Infer the resource format from its URL if missing, then submit."""
286+
if resource and not resource.get("format"):
287+
if not resource.get("url_type"):
288+
url_without_params = resource.get("url", "").split("?")[0]
289+
resource["format"] = (
290+
url_without_params.split(".")[-1].lower()
291+
)
292+
self._submit_to_xloader(resource)
293+
210294
def _trigger_after_resource_create(self, pkg_dict):
211-
"""Dataset syndication doesn't trigger the `after_resource_create` method.
212-
So here we want to run submit for each resource after dataset creation.
295+
"""Submit all resources after dataset creation.
296+
297+
Syndication via ``package_create`` does not trigger
298+
``after_resource_create``, so we handle it here.
213299
"""
214300
for resource in pkg_dict.get("resources", []):
215-
if resource and not resource.get("format"):
216-
if not resource["url_type"]:
217-
url_without_params = resource["url"].split('?')[0]
218-
resource["format"] = url_without_params.split('.')[-1].lower()
219-
self._submit_to_xloader(resource)
301+
self._infer_format_and_submit(resource)
220302

221303
def _submit_to_xloader(self, resource_dict):
222-
"""The original method doesn't check if `url_type` is here. Seems like
223-
it's not here if we are calling it from the `after_dataset_create`.
224-
Just set a default url_type and delete after to be sure, that it doesn't break
225-
some core logic.
226-
227-
Do not touch proper values, because it will definitely break something."""
228-
304+
"""Wrapper that ensures ``url_type`` and ``format`` are present
305+
before calling the parent, as they may be missing for resources
306+
created inline via ``package_create``/``package_update``.
307+
"""
229308
resource_dict.setdefault("url_type", "datavic_xloader")
230309
resource_dict.setdefault("format", "")
231310

0 commit comments

Comments
 (0)