Skip to content

Commit 6512415

Browse files
authored
Merge pull request #210 from GSA/publisher-filter
Publisher filter
2 parents 7f1fbbd + 406c871 commit 6512415

14 files changed

Lines changed: 715 additions & 240 deletions

app/api_schemas.py

Lines changed: 11 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -97,6 +97,7 @@ class SearchQuery(Schema):
9797
org_slug = String()
9898
org_type = Enum(ORGANIZATION_TYPE_ENUM)
9999
keyword = List(String())
100+
publisher = String()
100101
after = String()
101102
spatial_filter = Enum(SPATIAL_FILTER_ENUM)
102103
spatial_feature = GeoJson()
@@ -134,6 +135,16 @@ class OrganizationsResults(Schema):
134135
total = Integer()
135136

136137

138+
class PublisherResponse(Schema):
139+
name = String()
140+
count = Integer()
141+
142+
143+
class PublishersResults(Schema):
144+
publishers = List(Nested(PublisherResponse))
145+
total = Integer()
146+
147+
137148
class OpensearchHealth(Schema):
138149
status = String()
139150

app/database/interface.py

Lines changed: 26 additions & 33 deletions
Original file line numberDiff line numberDiff line change
@@ -70,6 +70,7 @@ def search_datasets(
7070
per_page=DEFAULT_PER_PAGE,
7171
org_id=None,
7272
org_types=None,
73+
publisher: str | None = None,
7374
spatial_filter=None,
7475
spatial_geometry=None,
7576
spatial_within=True,
@@ -78,6 +79,7 @@ def search_datasets(
7879
include_aggregations: bool = False,
7980
keyword_size: int = 100,
8081
org_size: int = 100,
82+
publisher_size: int = 100,
8183
collection: str = None,
8284
*args,
8385
**kwargs,
@@ -109,6 +111,7 @@ def search_datasets(
109111
per_page=per_page,
110112
org_id=org_id,
111113
org_types=org_types,
114+
publisher=publisher,
112115
search_after=search_after,
113116
spatial_filter=spatial_filter,
114117
spatial_geometry=spatial_geometry,
@@ -117,6 +120,7 @@ def search_datasets(
117120
include_aggregations=include_aggregations,
118121
keyword_size=keyword_size,
119122
org_size=org_size,
123+
publisher_size=publisher_size,
120124
collection=collection,
121125
)
122126

@@ -131,39 +135,6 @@ def get_unique_keywords(self, size=100, min_doc_count=1) -> list[dict]:
131135
size=size, min_doc_count=min_doc_count
132136
)
133137

134-
def get_contextual_aggregations(
135-
self,
136-
query: str = "",
137-
org_id=None,
138-
org_types=None,
139-
keywords: list[str] = None,
140-
spatial_filter=None,
141-
spatial_geometry=None,
142-
spatial_within=True,
143-
keyword_size=100,
144-
org_size=100,
145-
) -> dict:
146-
"""
147-
Get keyword and organization aggregations based on current search context.
148-
149-
Returns aggregations that reflect the current search query and filters,
150-
allowing for contextual filter counts.
151-
"""
152-
result = self.search_datasets(
153-
query,
154-
keywords=keywords or [],
155-
per_page=0,
156-
org_id=org_id,
157-
org_types=org_types,
158-
spatial_filter=spatial_filter,
159-
spatial_geometry=spatial_geometry,
160-
spatial_within=spatial_within,
161-
include_aggregations=True,
162-
keyword_size=keyword_size,
163-
org_size=org_size,
164-
)
165-
return result.aggregations or {"keywords": [], "organizations": []}
166-
167138
def search_locations(self, query, size=100):
168139
"""
169140
Get locations from the database. These are in type_order with first
@@ -298,9 +269,14 @@ def list_datasets_for_organization(
298269
dataset_search_query: str = "",
299270
num_results=DEFAULT_PER_PAGE,
300271
keywords: list[str] | None = None,
272+
publisher: str | None = None,
301273
spatial_filter: str | None = None,
302274
spatial_geometry: dict | None = None,
303275
spatial_within: bool = True,
276+
include_aggregations: bool = False,
277+
keyword_size: int = 100,
278+
org_size: int = 100,
279+
publisher_size: int = 100,
304280
) -> SearchResult:
305281
if not organization_id:
306282
return SearchResult.empty()
@@ -311,9 +287,14 @@ def list_datasets_for_organization(
311287
org_id=organization_id,
312288
sort_by=sort_by,
313289
per_page=num_results,
290+
publisher=publisher,
314291
spatial_filter=spatial_filter,
315292
spatial_geometry=spatial_geometry,
316293
spatial_within=spatial_within,
294+
include_aggregations=include_aggregations,
295+
keyword_size=keyword_size,
296+
org_size=org_size,
297+
publisher_size=publisher_size,
317298
)
318299

319300
def get_opensearch_org_dataset_counts(self, as_dict=False):
@@ -405,6 +386,18 @@ def _get_organizations_from_db(self) -> list[dict]:
405386
for row in rows
406387
]
407388

389+
def get_top_publishers(self) -> list[dict]:
390+
"""Return the top 100 publishers ordered by dataset count."""
391+
publishers = self.opensearch.get_publisher_counts(size=100)
392+
393+
return sorted(
394+
publishers,
395+
key=lambda item: (
396+
-item["count"],
397+
item["name"].lower(),
398+
),
399+
)
400+
408401
@staticmethod
409402
def to_dict(obj: Any) -> dict[str, Any] | None:
410403
if obj is None:

0 commit comments

Comments
 (0)