Skip to content

Commit 2c164c8

Browse files
committed
Merge branch 'main' of github.com:SFDO-Tooling/CumulusCI into feature/add_checks_command
2 parents 4726951 + 4c88b7d commit 2c164c8

22 files changed

Lines changed: 553 additions & 161 deletions

.github/workflows/feature_test.yml

Lines changed: 24 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -63,6 +63,30 @@ jobs:
6363
- name: Run Pytest
6464
run: uv run pytest --cov-report= --cov=cumulusci
6565

66+
unit_tests_opt_deps:
67+
name: "Unit tests with optional dependencies: ${{ matrix.os }}-${{ matrix.python-version }}"
68+
runs-on: ${{ matrix.os }}
69+
strategy:
70+
fail-fast: false
71+
matrix:
72+
os: [macos-latest, SFDO-Tooling-Ubuntu, SFDO-Tooling-Windows]
73+
python-version: ["3.11", "3.12", "3.13"]
74+
steps:
75+
- uses: actions/checkout@v4
76+
- name: Set up Python
77+
uses: actions/setup-python@v4
78+
with:
79+
python-version: "${{ matrix.python-version }}"
80+
- name: Set up uv
81+
uses: SFDO-Tooling/setup-uv@main
82+
with:
83+
version: "0.5.0"
84+
enable-cache: true
85+
- name: Install dependencies
86+
run: uv sync --all-extras -p ${{ matrix.python-version }}
87+
- name: Run Pytest
88+
run: uv run pytest --cov-report= --cov=cumulusci
89+
6690
robot_api:
6791
name: "Robot: No browser"
6892
runs-on: SFDO-Tooling-Ubuntu

.readthedocs.yml

Lines changed: 3 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -14,7 +14,9 @@ build:
1414
- asdf plugin add uv
1515
- asdf install uv latest
1616
- asdf global uv latest
17-
- uv sync --only-group docs --frozen
17+
- uv sync --group docs --frozen
18+
- uv run cci task doc --write
19+
- uv run cci flow doc > docs/flows.rst
1820
- uv run -m sphinx -T -b html -d docs/_build/doctrees -D language=en docs $READTHEDOCS_OUTPUT/html
1921

2022
# Build documentation in the docs/ directory with Sphinx

AUTHORS.rst

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -38,3 +38,4 @@ For example:
3838
* Gustavo Tandeciarz (dcinzona)
3939
* Chandler Anderson (zenibako)
4040
* Ben French (BenjaminFrench)
41+
* Rupert Barrow (rupertbarrow)

cumulusci/__about__.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1 +1 @@
1-
__version__ = "4.0.1"
1+
__version__ = "4.2.0"

cumulusci/tasks/bulkdata/mapping_parser.py

Lines changed: 4 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -338,7 +338,10 @@ def _get_required_permission_types(
338338
self, operation: DataOperationType
339339
) -> T.Tuple[str]:
340340
"""Return a tuple of the permission types required to execute an operation"""
341-
if operation is DataOperationType.QUERY:
341+
if (
342+
operation is DataOperationType.QUERY
343+
or self.action is DataOperationType.SELECT
344+
):
342345
return ("queryable",)
343346
if (
344347
operation is DataOperationType.INSERT

cumulusci/tasks/bulkdata/select_utils.py

Lines changed: 30 additions & 20 deletions
Original file line numberDiff line numberDiff line change
@@ -1,22 +1,34 @@
1+
import logging
12
import random
23
import re
34
import typing as T
45
from enum import Enum
56

6-
import numpy as np
7-
import pandas as pd
8-
from annoy import AnnoyIndex
97
from pydantic import Field, root_validator, validator
10-
from sklearn.feature_extraction.text import HashingVectorizer
11-
from sklearn.preprocessing import StandardScaler
128

139
from cumulusci.core.enums import StrEnum
14-
from cumulusci.tasks.bulkdata.extract_dataset_utils.hardcoded_default_declarations import (
15-
DEFAULT_DECLARATIONS,
16-
)
1710
from cumulusci.tasks.bulkdata.utils import CaseInsensitiveDict
11+
from cumulusci.utils import get_cci_upgrade_command
1812
from cumulusci.utils.yaml.model_parser import CCIDictModel
1913

14+
logger = logging.getLogger(__name__)
15+
try:
16+
import numpy as np
17+
import pandas as pd
18+
from annoy import AnnoyIndex
19+
from sklearn.feature_extraction.text import HashingVectorizer
20+
from sklearn.preprocessing import StandardScaler
21+
22+
OPTIONAL_DEPENDENCIES_AVAILABLE = True
23+
except ImportError:
24+
logger.warning(
25+
f"Optional dependencies are missing. "
26+
"Handling high volumes of records for the 'select' functionality will be significantly slower, "
27+
"as optimizations for this feature are currently disabled. "
28+
f"To enable optimized performance, install all required dependencies using: {get_cci_upgrade_command()}[select]\n"
29+
)
30+
OPTIONAL_DEPENDENCIES_AVAILABLE = False
31+
2032

2133
class SelectStrategy(StrEnum):
2234
"""Enum defining the different selection strategies requested."""
@@ -173,10 +185,6 @@ def standard_generate_query(
173185
filter_clause=user_filter, limit_clause=limit, offset_clause=offset
174186
)
175187
else:
176-
# Get the WHERE clause from DEFAULT_DECLARATIONS if available
177-
declaration = DEFAULT_DECLARATIONS.get(sobject)
178-
if declaration:
179-
query += f" WHERE {declaration.where}"
180188
query += f" LIMIT {limit}" if limit else ""
181189
query += f" OFFSET {offset}" if offset else ""
182190
return query, ["Id"]
@@ -266,10 +274,6 @@ def similarity_generate_query(
266274
filter_clause=user_filter, limit_clause=limit, offset_clause=offset
267275
)
268276
else:
269-
# Get the WHERE clause from DEFAULT_DECLARATIONS if available
270-
declaration = DEFAULT_DECLARATIONS.get(sobject)
271-
if declaration:
272-
query += f" WHERE {declaration.where}"
273277
query += f" LIMIT {limit}" if limit else ""
274278
query += f" OFFSET {offset}" if offset else ""
275279

@@ -292,7 +296,7 @@ def similarity_post_process(
292296
]:
293297
"""Processes the query results for the similarity selection strategy"""
294298
# Handle case where query returns 0 records
295-
if not query_records and not threshold:
299+
if not query_records and threshold is None:
296300
error_message = f"No records found for {sobject} in the target org."
297301
return [], [], error_message
298302

@@ -308,7 +312,7 @@ def similarity_post_process(
308312
select_records = []
309313
insert_records = []
310314

311-
if complexity_constant < 1000:
315+
if complexity_constant < 1000 or not OPTIONAL_DEPENDENCIES_AVAILABLE:
312316
select_records, insert_records = levenshtein_post_process(
313317
load_records, query_records, fields, weights, threshold
314318
)
@@ -328,6 +332,12 @@ def annoy_post_process(
328332
threshold: T.Union[float, None],
329333
) -> T.Tuple[T.List[dict], list]:
330334
"""Processes the query results for the similarity selection strategy using Annoy algorithm for large number of records"""
335+
# Add warning when threshold is 0
336+
if threshold is not None and threshold == 0:
337+
logger.warning(
338+
"Warning: A threshold of 0 may miss exact matches in high volumes. Use a small value like 0.1 for better accuracy."
339+
)
340+
331341
selected_records = []
332342
insertion_candidates = []
333343

@@ -397,7 +407,7 @@ def annoy_post_process(
397407
# Retrieve the corresponding record from the database
398408
record = query_record_data[neighbor_index]
399409
closest_record_id = record_to_id_map[tuple(record)]
400-
if threshold and (neighbor_distances[idx] >= threshold):
410+
if threshold is not None and (neighbor_distances[idx] >= threshold):
401411
selected_records.append(None)
402412
insertion_candidates.append(load_shaped_records[i])
403413
else:
@@ -445,7 +455,7 @@ def levenshtein_post_process(
445455
select_record, target_records, similarity_weights
446456
)
447457

448-
if distance_threshold and match_distance > distance_threshold:
458+
if distance_threshold is not None and match_distance > distance_threshold:
449459
# Append load record for insertion if distance exceeds threshold
450460
insertion_candidates.append(load_record)
451461
selected_records.append(None)

cumulusci/tasks/bulkdata/snowfakery.py

Lines changed: 4 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -583,8 +583,10 @@ def _generate_and_load_initial_batch(self, working_directory: Path):
583583
self.sets_finished_while_generating_template = num_records
584584

585585
new_template_dir = data_loader_new_directory_name(template_dir, self.run_until)
586-
shutil.move(template_dir, new_template_dir)
587-
template_dir = new_template_dir
586+
# rename only if new_template_dir does not match template_dir
587+
if template_dir.resolve() != new_template_dir.resolve():
588+
shutil.move(template_dir, new_template_dir)
589+
template_dir = new_template_dir
588590

589591
# don't send data tables to child processes. All they
590592
# care about are ID->OID mappings

cumulusci/tasks/bulkdata/step.py

Lines changed: 2 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -9,6 +9,7 @@
99
from contextlib import contextmanager
1010
from itertools import tee
1111
from typing import Any, Dict, List, NamedTuple, Optional, Union
12+
from urllib.parse import quote
1213

1314
import requests
1415
import salesforce_bulk
@@ -955,9 +956,7 @@ def _determine_limit_clause(self, total_num_records):
955956
def _execute_soql_query(self, select_query, query_fields):
956957
"""Executes the SOQL query and returns the flattened records."""
957958
query_records = []
958-
response = self.sf.restful(
959-
requests.utils.requote_uri(f"query/?q={select_query}"), method="GET"
960-
)
959+
response = self.sf.restful(f"query/?q={quote(select_query)}", method="GET")
961960
query_records.extend(self._flatten_response_records(response, query_fields))
962961

963962
while not response["done"]:

cumulusci/tasks/bulkdata/tests/cassettes/TestSelect.test_select_random_strategy.yaml

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -48,7 +48,7 @@ interactions:
4848

4949
- request:
5050
method: GET
51-
uri: https://orgname.my.salesforce.com/services/data/v62.0/query/?q=SELECT%20Id,%20Name,%20Description,%20Phone,%20AccountNumber%20FROM%20Account%20WHERE%20Name%20!=%20'Sample%20Account%20for%20Entitlements'
51+
uri: https://orgname.my.salesforce.com/services/data/v62.0/query/?q=SELECT%20Id,%20Name,%20Description,%20Phone,%20AccountNumber%20FROM%20Account
5252
body: null
5353
headers: *id004
5454
response:
@@ -125,7 +125,7 @@ interactions:
125125

126126
- request:
127127
method: GET
128-
uri: https://orgname.my.salesforce.com/services/data/v62.0/query/?q=SELECT%20Id%20FROM%20Account%20WHERE%20Name%20!=%20'Sample%20Account%20for%20Entitlements'%20LIMIT%205
128+
uri: https://orgname.my.salesforce.com/services/data/v62.0/query/?q=SELECT%20Id%20FROM%20Account%20LIMIT%205
129129
body: null
130130
headers: *id004
131131
response:

cumulusci/tasks/bulkdata/tests/cassettes/TestSelect.test_select_similarity_select_and_insert_strategy.yaml

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -225,7 +225,7 @@ interactions:
225225

226226
- request:
227227
method: GET
228-
uri: https://orgname.my.salesforce.com/services/data/v62.0/query/?q=SELECT%20Id,%20TYPEOF%20Who%20WHEN%20Contact%20THEN%20LastName,%20Email%20WHEN%20Lead%20THEN%20LastName,%20Company%20ELSE%20Id%20END,%20TYPEOF%20What%20WHEN%20Account%20THEN%20Name,%20Description,%20Phone,%20AccountNumber%20ELSE%20Id%20END,%20Subject,%20DurationInMinutes,%20ActivityDateTime%20FROM%20Event
228+
uri: https://orgname.my.salesforce.com/services/data/v62.0/query/?q=SELECT%20Id%2C%20TYPEOF%20Who%20WHEN%20Contact%20THEN%20LastName%2C%20Email%20WHEN%20Lead%20THEN%20LastName%2C%20Company%20ELSE%20Id%20END%2C%20TYPEOF%20What%20WHEN%20Account%20THEN%20Name%2C%20Description%2C%20Phone%2C%20AccountNumber%20ELSE%20Id%20END%2C%20Subject%2C%20DurationInMinutes%2C%20ActivityDateTime%20FROM%20Event
229229
body: null
230230
headers: *id004
231231
response:

0 commit comments

Comments
 (0)