Skip to content

Commit 2a30113

Browse files
authored
Merge pull request #3851 from SFDO-Tooling/feature/select_records
@W-17025471: Add integration tests for all selection strategies
2 parents e261563 + fee284b commit 2a30113

20 files changed

Lines changed: 2172 additions & 69 deletions

cumulusci/tasks/bulkdata/select_utils.py

Lines changed: 19 additions & 24 deletions
Original file line numberDiff line numberDiff line change
@@ -352,9 +352,6 @@ def annoy_post_process(
352352
insertion_candidates = load_shaped_records
353353
return selected_records, insertion_candidates
354354

355-
query_records = replace_empty_strings_with_missing(query_records)
356-
select_shaped_records = replace_empty_strings_with_missing(select_shaped_records)
357-
358355
hash_features = 100
359356
num_trees = 10
360357

@@ -589,7 +586,7 @@ def add_limit_offset_to_user_filter(
589586
return f" {filter_clause}"
590587

591588

592-
def determine_field_types(df, weights):
589+
def determine_field_types(df_db, df_query, weights):
593590
numerical_features = []
594591
boolean_features = []
595592
categorical_features = []
@@ -598,23 +595,35 @@ def determine_field_types(df, weights):
598595
boolean_weights = []
599596
categorical_weights = []
600597

601-
for col, weight in zip(df.columns, weights):
598+
for col, weight in zip(df_db.columns, weights):
602599
# Check if the column can be converted to numeric
603600
try:
604-
# Attempt to convert to numeric
605-
df[col] = pd.to_numeric(df[col], errors="raise")
601+
temp_df_db = pd.to_numeric(df_db[col], errors="raise")
602+
temp_df_query = pd.to_numeric(df_query[col], errors="raise")
603+
# Replace empty values with 0 for numerical features
604+
df_db[col] = temp_df_db.fillna(0).replace("", 0)
605+
df_query[col] = temp_df_query.fillna(0).replace("", 0)
606606
numerical_features.append(col)
607607
numerical_weights.append(weight)
608608
except ValueError:
609609
# Check for boolean values
610-
if df[col].str.lower().isin(["true", "false"]).all():
610+
if (
611+
df_db[col].str.lower().isin(["true", "false"]).all()
612+
and df_query[col].str.lower().isin(["true", "false"]).all()
613+
):
611614
# Map to actual boolean values
612-
df[col] = df[col].str.lower().map({"true": True, "false": False})
615+
df_db[col] = df_db[col].str.lower().map({"true": True, "false": False})
616+
df_query[col] = (
617+
df_query[col].str.lower().map({"true": True, "false": False})
618+
)
613619
boolean_features.append(col)
614620
boolean_weights.append(weight)
615621
else:
616622
categorical_features.append(col)
617623
categorical_weights.append(weight)
624+
# Replace empty values with 'missing' for categorical features
625+
df_db[col] = df_db[col].replace("", "missing")
626+
df_query[col] = df_query[col].replace("", "missing")
618627

619628
return (
620629
numerical_features,
@@ -640,14 +649,7 @@ def vectorize_records(db_records, query_records, hash_features, weights):
640649
numerical_weights,
641650
boolean_weights,
642651
categorical_weights,
643-
) = determine_field_types(df_db, weights)
644-
645-
# Modify query dataframe boolean columns to True or False
646-
for col in df_query.columns:
647-
if df_query[col].str.lower().isin(["true", "false"]).all():
648-
df_query[col] = (
649-
df_query[col].str.lower().map({"true": True, "false": False})
650-
)
652+
) = determine_field_types(df_db, df_query, weights)
651653

652654
# Fit StandardScaler on the numerical features of the database records
653655
scaler = StandardScaler()
@@ -705,13 +707,6 @@ def vectorize_records(db_records, query_records, hash_features, weights):
705707
return final_db_vectors, final_query_vectors
706708

707709

708-
def replace_empty_strings_with_missing(records):
709-
return [
710-
[(field if field != "" else "missing") for field in record]
711-
for record in records
712-
]
713-
714-
715710
def split_and_filter_fields(fields: T.List[str]) -> T.Tuple[T.List[str], T.List[str]]:
716711
# List to store non-lookup fields (load fields)
717712
load_fields = []

cumulusci/tasks/bulkdata/step.py

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -478,9 +478,11 @@ def select_records(self, records):
478478
)
479479

480480
# Execute the main select query using Bulk API
481+
self.logger.info("Retrieving records from org...")
481482
select_query_records = self._execute_select_query(
482483
select_query=select_query, query_fields=query_fields
483484
)
485+
self.logger.info(f"Retrieved {len(select_query_records)} from org")
484486

485487
query_records.extend(select_query_records)
486488
# Post-process the query results
@@ -895,7 +897,9 @@ def select_records(self, records):
895897
)
896898

897899
# Execute the query and gather the records
900+
self.logger.info("Retrieving records from org...")
898901
query_records = self._execute_soql_query(select_query, query_fields)
902+
self.logger.info(f"Retrieved {len(query_records)} from org")
899903

900904
# Post-process the query results for this batch
901905
(
Lines changed: 147 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,147 @@
1+
version: 1
2+
interactions:
3+
- &id001
4+
include_file: GET_sobjects_Global_describe.yaml
5+
- &id002
6+
include_file: GET_sobjects_Account_describe.yaml
7+
- *id001
8+
- *id002
9+
- *id002
10+
11+
- &id003
12+
include_file: GET_sobjects_Contact_describe.yaml
13+
- *id001
14+
- *id003
15+
- *id003
16+
- &id007
17+
include_file: GET_sobjects_Opportunity_describe.yaml
18+
- *id002
19+
- &id008
20+
include_file: GET_sobjects_Lead_describe.yaml # Added interaction for Lead
21+
- *id001
22+
- &id009
23+
include_file: GET_sobjects_Event_describe.yaml # Added interaction for Event
24+
- *id001
25+
- *id008
26+
- *id001
27+
- *id009
28+
- *id001
29+
30+
- request:
31+
method: GET
32+
uri: https://orgname.my.salesforce.com/services/data/v62.0/limits/recordCount?sObjects=Account
33+
body: null
34+
headers: &id004
35+
Request-Headers:
36+
- Elided
37+
response:
38+
status:
39+
code: 200
40+
message: OK
41+
headers: &id006
42+
Content-Type:
43+
- application/json;charset=UTF-8
44+
Others: Elided
45+
body:
46+
string: "{\n \"sObjects\" : [ {\n \"count\" : 3,\n \"name\" : \"Account\"\n
47+
\ } ]\n}"
48+
49+
- request:
50+
method: GET
51+
uri: https://orgname.my.salesforce.com/services/data/v62.0/query/?q=SELECT%20Id,%20Name,%20Description,%20Phone,%20AccountNumber%20FROM%20Account%20WHERE%20Name%20!=%20'Sample%20Account%20for%20Entitlements'
52+
body: null
53+
headers: *id004
54+
response:
55+
status:
56+
code: 200
57+
message: OK
58+
headers: *id006
59+
body:
60+
string: "{\n \"totalSize\" : 10,\n \"done\" : true,\n \"records\" : [ {\n
61+
\ \"attributes\" : {\n \"type\" : \"Account\",\n \"url\" : \"/services/data/v62.0/sobjects/Account/0019H00000H1RMDQA3\"\n
62+
\ },\n \"Id\" : \"0019H00000H1RMDQA3\",\n \"Name\" : \"Tom Cruise\",\n
63+
\ \"Description\" : \"Some Description\",\n \"Phone\" : \"12345632\",\n
64+
\ \"AccountNumber\" : \"123\"\n }, {\n \"attributes\" : {\n \"type\"
65+
: \"Account\",\n \"url\" : \"/services/data/v62.0/sobjects/Account/0019H00000H1RMEQA3\"\n
66+
\ },\n \"Id\" : \"0019H00000H1RMEQA3\",\n \"Name\" : \"Bob The Builder\",\n
67+
\ \"Description\" : \"Some Description\",\n \"Phone\" : \"12345632\",\n
68+
\ \"AccountNumber\" : \"123\"\n }, {\n \"attributes\" : {\n \"type\"
69+
: \"Account\",\n \"url\" : \"/services/data/v62.0/sobjects/Account/0019H00000H1RMFQA3\"\n
70+
\ },\n \"Id\" : \"0019H00000H1RMFQA3\",\n \"Name\" : \"Shah Rukh Khan\",\n
71+
\ \"Description\" : \"Bollywood actor\",\n \"Phone\" : \"12345612\",\n
72+
\ \"AccountNumber\" : \"123\"\n }, {\n \"attributes\" : {\n \"type\"
73+
: \"Account\",\n \"url\" : \"/services/data/v62.0/sobjects/Account/0019H00000H1RMGQA3\"\n
74+
\ },\n \"Id\" : \"0019H00000H1RMGQA3\",\n \"Name\" : \"Aamir Khan\",\n
75+
\ \"Description\" : \"Mr perfectionist, bollywood actor\",\n \"Phone\"
76+
: \"12345623\",\n \"AccountNumber\" : \"123\"\n }, {\n \"attributes\"
77+
: {\n \"type\" : \"Account\",\n \"url\" : \"/services/data/v62.0/sobjects/Account/0019H00000H1RMHQA3\"\n
78+
\ },\n \"Id\" : \"0019H00000H1RMHQA3\",\n \"Name\" : \"Salman Khan\",\n
79+
\ \"Description\" : \"Mr perfectionist, bollywood actor\",\n \"Phone\"
80+
: \"12345623\",\n \"AccountNumber\" : \"123\"\n }, {\n \"attributes\"
81+
: {\n \"type\" : \"Account\",\n \"url\" : \"/services/data/v62.0/sobjects/Account/0019H00000H1UzyQAF\"\n
82+
\ },\n \"Id\" : \"0019H00000H1UzyQAF\",\n \"Name\" : \"Tom Cruise\",\n
83+
\ \"Description\" : \"Some Description\",\n \"Phone\" : \"12345632\",\n
84+
\ \"AccountNumber\" : \"123\"\n }, {\n \"attributes\" : {\n \"type\"
85+
: \"Account\",\n \"url\" : \"/services/data/v62.0/sobjects/Account/0019H00000H1UzzQAF\"\n
86+
\ },\n \"Id\" : \"0019H00000H1UzzQAF\",\n \"Name\" : \"Bob The Builder\",\n
87+
\ \"Description\" : \"Some Description\",\n \"Phone\" : \"12345632\",\n
88+
\ \"AccountNumber\" : \"123\"\n }, {\n \"attributes\" : {\n \"type\"
89+
: \"Account\",\n \"url\" : \"/services/data/v62.0/sobjects/Account/0019H00000H1V00QAF\"\n
90+
\ },\n \"Id\" : \"0019H00000H1V00QAF\",\n \"Name\" : \"Shah Rukh Khan\",\n
91+
\ \"Description\" : \"Bollywood actor\",\n \"Phone\" : \"12345612\",\n
92+
\ \"AccountNumber\" : \"123\"\n }, {\n \"attributes\" : {\n \"type\"
93+
: \"Account\",\n \"url\" : \"/services/data/v62.0/sobjects/Account/0019H00000H1V01QAF\"\n
94+
\ },\n \"Id\" : \"0019H00000H1V01QAF\",\n \"Name\" : \"Aamir Khan\",\n
95+
\ \"Description\" : \"Mr perfectionist, bollywood actor\",\n \"Phone\"
96+
: \"12345623\",\n \"AccountNumber\" : \"123\"\n }, {\n \"attributes\"
97+
: {\n \"type\" : \"Account\",\n \"url\" : \"/services/data/v62.0/sobjects/Account/0019H00000H1V02QAF\"\n
98+
\ },\n \"Id\" : \"0019H00000H1V02QAF\",\n \"Name\" : \"Salman Khan\",\n
99+
\ \"Description\" : \"Mr perfectionist, bollywood actor\",\n \"Phone\"
100+
: \"12345623\",\n \"AccountNumber\" : \"123\"\n } ]\n}"
101+
102+
103+
104+
105+
- request:
106+
method: POST
107+
uri: https://orgname.my.salesforce.com/services/data/v62.0/composite/sobjects
108+
body: '{"allOrNone": false, "records": [{"LastName": "Contact of Tom Cruise",
109+
"AccountId": "0019H00000H1RMDQA3", "attributes": {"type": "Contact"}}, {"LastName":
110+
"Contact of Bob the Builder", "AccountId": "0019H00000H1RMDQA3", "attributes":
111+
{"type": "Contact"}}, {"LastName": "Contact of SRK", "AccountId": "0019H00000H1RMDQA3",
112+
"attributes": {"type": "Contact"}}]}'
113+
headers: *id004
114+
response:
115+
status:
116+
code: 200
117+
message: OK
118+
headers: *id006
119+
body:
120+
string: "[ {\n \"id\" : \"0039H00000BbbFBQAZ\",\n \"success\" : true,\n \"errors\"
121+
: [ ]\n}, {\n \"id\" : \"0039H00000BbbFCQAZ\",\n \"success\" : true,\n \"errors\"
122+
: [ ]\n}, {\n \"id\" : \"0039H00000BbbFDQAZ\",\n \"success\" : true,\n \"errors\"
123+
: [ ]\n} ]"
124+
125+
126+
- request:
127+
method: GET
128+
uri: https://orgname.my.salesforce.com/services/data/v62.0/query/?q=SELECT%20Id%20FROM%20Account%20WHERE%20Name%20!=%20'Sample%20Account%20for%20Entitlements'%20LIMIT%205
129+
body: null
130+
headers: *id004
131+
response:
132+
status:
133+
code: 200
134+
message: OK
135+
headers: *id006
136+
body:
137+
string: "{\n \"totalSize\" : 5,\n \"done\" : true,\n \"records\" : [ {\n
138+
\ \"attributes\" : {\n \"type\" : \"Account\",\n \"url\" : \"/services/data/v62.0/sobjects/Account/0019H00000H1RMDQA3\"\n
139+
\ },\n \"Id\" : \"0019H00000H1RMDQA3\"\n }, {\n \"attributes\" :
140+
{\n \"type\" : \"Account\",\n \"url\" : \"/services/data/v62.0/sobjects/Account/0019H00000H1RMEQA3\"\n
141+
\ },\n \"Id\" : \"0019H00000H1RMDQA3\"\n }, {\n \"attributes\" :
142+
{\n \"type\" : \"Account\",\n \"url\" : \"/services/data/v62.0/sobjects/Account/0019H00000H1RMFQA3\"\n
143+
\ },\n \"Id\" : \"0019H00000H1RMDQA3\"\n }, {\n \"attributes\" :
144+
{\n \"type\" : \"Account\",\n \"url\" : \"/services/data/v62.0/sobjects/Account/0019H00000H1RMGQA3\"\n
145+
\ },\n \"Id\" : \"0019H00000H1RMDQA3\"\n }, {\n \"attributes\" :
146+
{\n \"type\" : \"Account\",\n \"url\" : \"/services/data/v62.0/sobjects/Account/0019H00000H1RMHQA3\"\n
147+
\ },\n \"Id\" : \"0019H00000H1RMDQA3\"\n } ]\n}"
Lines changed: 123 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,123 @@
1+
version: 1
2+
interactions:
3+
- &id001
4+
include_file: GET_sobjects_Global_describe.yaml
5+
- &id002
6+
include_file: GET_sobjects_Account_describe.yaml
7+
- *id001
8+
- *id002
9+
- *id002
10+
11+
- &id003
12+
include_file: GET_sobjects_Contact_describe.yaml
13+
- *id001
14+
- *id003
15+
- *id003
16+
- &id007
17+
include_file: GET_sobjects_Opportunity_describe.yaml
18+
- *id002
19+
- &id008
20+
include_file: GET_sobjects_Lead_describe.yaml # Added interaction for Lead
21+
- *id001
22+
- &id009
23+
include_file: GET_sobjects_Event_describe.yaml # Added interaction for Event
24+
- *id001
25+
- *id008
26+
- *id001
27+
- *id009
28+
- *id001
29+
30+
- request:
31+
method: GET
32+
uri: https://orgname.my.salesforce.com/services/data/v62.0/limits/recordCount?sObjects=Account
33+
body: null
34+
headers: &id004
35+
Request-Headers:
36+
- Elided
37+
response:
38+
status:
39+
code: 200
40+
message: OK
41+
headers: &id006
42+
Content-Type:
43+
- application/json;charset=UTF-8
44+
Others: Elided
45+
body:
46+
string: "{\n \"sObjects\" : [ {\n \"count\" : 3,\n \"name\" : \"Account\"\n
47+
\ } ]\n}"
48+
49+
- request:
50+
method: GET
51+
uri: https://orgname.my.salesforce.com/services/data/v62.0/query/?q=SELECT%20Id,%20Name,%20Description,%20Phone,%20AccountNumber%20FROM%20Account%20WHERE%20Name%20!=%20'Sample%20Account%20for%20Entitlements'
52+
body: null
53+
headers: *id004
54+
response:
55+
status:
56+
code: 200
57+
message: OK
58+
headers: *id006
59+
body:
60+
string: "{\n \"totalSize\" : 10,\n \"done\" : true,\n \"records\" : [ {\n
61+
\ \"attributes\" : {\n \"type\" : \"Account\",\n \"url\" : \"/services/data/v62.0/sobjects/Account/0019H00000H1RMDQA3\"\n
62+
\ },\n \"Id\" : \"0019H00000H1RMDQA3\",\n \"Name\" : \"Tom Cruise\",\n
63+
\ \"Description\" : \"Some Description\",\n \"Phone\" : \"12345632\",\n
64+
\ \"AccountNumber\" : \"123\"\n }, {\n \"attributes\" : {\n \"type\"
65+
: \"Account\",\n \"url\" : \"/services/data/v62.0/sobjects/Account/0019H00000H1RMEQA3\"\n
66+
\ },\n \"Id\" : \"0019H00000H1RMEQA3\",\n \"Name\" : \"Bob The Builder\",\n
67+
\ \"Description\" : \"Some Description\",\n \"Phone\" : \"12345632\",\n
68+
\ \"AccountNumber\" : \"123\"\n }, {\n \"attributes\" : {\n \"type\"
69+
: \"Account\",\n \"url\" : \"/services/data/v62.0/sobjects/Account/0019H00000H1RMFQA3\"\n
70+
\ },\n \"Id\" : \"0019H00000H1RMFQA3\",\n \"Name\" : \"Shah Rukh Khan\",\n
71+
\ \"Description\" : \"Bollywood actor\",\n \"Phone\" : \"12345612\",\n
72+
\ \"AccountNumber\" : \"123\"\n }, {\n \"attributes\" : {\n \"type\"
73+
: \"Account\",\n \"url\" : \"/services/data/v62.0/sobjects/Account/0019H00000H1RMGQA3\"\n
74+
\ },\n \"Id\" : \"0019H00000H1RMGQA3\",\n \"Name\" : \"Aamir Khan\",\n
75+
\ \"Description\" : \"Mr perfectionist, bollywood actor\",\n \"Phone\"
76+
: \"12345623\",\n \"AccountNumber\" : \"123\"\n }, {\n \"attributes\"
77+
: {\n \"type\" : \"Account\",\n \"url\" : \"/services/data/v62.0/sobjects/Account/0019H00000H1RMHQA3\"\n
78+
\ },\n \"Id\" : \"0019H00000H1RMHQA3\",\n \"Name\" : \"Salman Khan\",\n
79+
\ \"Description\" : \"Mr perfectionist, bollywood actor\",\n \"Phone\"
80+
: \"12345623\",\n \"AccountNumber\" : \"123\"\n }, {\n \"attributes\"
81+
: {\n \"type\" : \"Account\",\n \"url\" : \"/services/data/v62.0/sobjects/Account/0019H00000H1UzyQAF\"\n
82+
\ },\n \"Id\" : \"0019H00000H1UzyQAF\",\n \"Name\" : \"Tom Cruise\",\n
83+
\ \"Description\" : \"Some Description\",\n \"Phone\" : \"12345632\",\n
84+
\ \"AccountNumber\" : \"123\"\n }, {\n \"attributes\" : {\n \"type\"
85+
: \"Account\",\n \"url\" : \"/services/data/v62.0/sobjects/Account/0019H00000H1UzzQAF\"\n
86+
\ },\n \"Id\" : \"0019H00000H1UzzQAF\",\n \"Name\" : \"Bob The Builder\",\n
87+
\ \"Description\" : \"Some Description\",\n \"Phone\" : \"12345632\",\n
88+
\ \"AccountNumber\" : \"123\"\n }, {\n \"attributes\" : {\n \"type\"
89+
: \"Account\",\n \"url\" : \"/services/data/v62.0/sobjects/Account/0019H00000H1V00QAF\"\n
90+
\ },\n \"Id\" : \"0019H00000H1V00QAF\",\n \"Name\" : \"Shah Rukh Khan\",\n
91+
\ \"Description\" : \"Bollywood actor\",\n \"Phone\" : \"12345612\",\n
92+
\ \"AccountNumber\" : \"123\"\n }, {\n \"attributes\" : {\n \"type\"
93+
: \"Account\",\n \"url\" : \"/services/data/v62.0/sobjects/Account/0019H00000H1V01QAF\"\n
94+
\ },\n \"Id\" : \"0019H00000H1V01QAF\",\n \"Name\" : \"Aamir Khan\",\n
95+
\ \"Description\" : \"Mr perfectionist, bollywood actor\",\n \"Phone\"
96+
: \"12345623\",\n \"AccountNumber\" : \"123\"\n }, {\n \"attributes\"
97+
: {\n \"type\" : \"Account\",\n \"url\" : \"/services/data/v62.0/sobjects/Account/0019H00000H1V02QAF\"\n
98+
\ },\n \"Id\" : \"0019H00000H1V02QAF\",\n \"Name\" : \"Salman Khan\",\n
99+
\ \"Description\" : \"Mr perfectionist, bollywood actor\",\n \"Phone\"
100+
: \"12345623\",\n \"AccountNumber\" : \"123\"\n } ]\n}"
101+
102+
103+
104+
105+
- request:
106+
method: POST
107+
uri: https://orgname.my.salesforce.com/services/data/v62.0/composite/sobjects
108+
body: '{"allOrNone": false, "records": [{"LastName": "Contact of Tom Cruise",
109+
"AccountId": "0019H00000H1RMDQA3", "attributes": {"type": "Contact"}}, {"LastName":
110+
"Contact of Bob the Builder", "AccountId": "0019H00000H1RMEQA3", "attributes":
111+
{"type": "Contact"}}, {"LastName": "Contact of SRK", "AccountId": "0019H00000H1RMFQA3",
112+
"attributes": {"type": "Contact"}}]}'
113+
headers: *id004
114+
response:
115+
status:
116+
code: 200
117+
message: OK
118+
headers: *id006
119+
body:
120+
string: "[ {\n \"id\" : \"0039H00000BbbFBQAZ\",\n \"success\" : true,\n \"errors\"
121+
: [ ]\n}, {\n \"id\" : \"0039H00000BbbFCQAZ\",\n \"success\" : true,\n \"errors\"
122+
: [ ]\n}, {\n \"id\" : \"0039H00000BbbFDQAZ\",\n \"success\" : true,\n \"errors\"
123+
: [ ]\n} ]"

0 commit comments

Comments
 (0)