Skip to content

Commit 22b63ad

Browse files
karenc-bqsophia-bqjoyc-bq
authored
fix: check if first connection after failover is stale (#416)
Co-authored-by: Sophia Chu <112967780+sophia-bq@users.noreply.github.com> Co-authored-by: joyc-bq <95259163+joyc-bq@users.noreply.github.com>
1 parent bfd8669 commit 22b63ad

5 files changed

Lines changed: 19 additions & 12 deletions

File tree

common/lib/host_list_provider/monitoring/cluster_topology_monitor.ts

Lines changed: 11 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -202,7 +202,7 @@ export class ClusterTopologyMonitorImpl implements ClusterTopologyMonitor {
202202
try {
203203
client = await this._pluginService.forceConnect(this.initialHostInfo, this._monitoringProperties);
204204
} catch {
205-
logger.error(Messages.get("ClusterTopologyMonitor.unableToConnect", this.initialHostInfo.hostId));
205+
// Unable to connect to host;
206206
return null;
207207
}
208208

@@ -214,7 +214,7 @@ export class ClusterTopologyMonitorImpl implements ClusterTopologyMonitor {
214214
if (writerId) {
215215
this.isVerifiedWriterConnection = true;
216216
this.writerHostInfo = this.initialHostInfo;
217-
logger.info(Messages.get("ClusterTopologyMonitor.writerMonitoringConnection", this.initialHostInfo.hostId));
217+
logger.info(Messages.get("ClusterTopologyMonitor.writerMonitoringConnection", this.initialHostInfo.host));
218218
writerVerifiedByThisTask = true;
219219
}
220220
} catch (error) {
@@ -449,11 +449,19 @@ export class HostMonitor {
449449
client = null;
450450
}
451451

452+
if (writerId) {
453+
// First connection after failover may be stale.
454+
if ((await this.monitor.pluginService.getHostRole(client)) !== HostRole.WRITER) {
455+
logger.debug(Messages.get("HostMonitor.writerIsStale", writerId));
456+
writerId = null;
457+
}
458+
}
459+
452460
if (writerId) {
453461
if (this.monitor.hostMonitorsWriterClient) {
454462
await this.monitor.closeConnection(client);
455463
} else {
456-
logger.debug(Messages.get("HostMonitor.detectedWriter", writerId));
464+
logger.debug(Messages.get("HostMonitor.detectedWriter", writerId, this.hostInfo.host));
457465
const updatedHosts: HostInfo[] = await this.monitor.fetchTopologyAndUpdateCache(client);
458466
if (updatedHosts && this.monitor.hostMonitorsWriterClient === null) {
459467
this.monitor.hostMonitorsWriterClient = client;

common/lib/plugins/failover2/failover2_plugin.ts

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -403,12 +403,12 @@ export class Failover2Plugin extends AbstractConnectionPlugin implements CanRele
403403
try {
404404
writerCandidateClient = await this.createConnectionForHost(writerCandidateHostInfo);
405405
} catch (err) {
406-
this.logAndThrowError("Failover.unableToConnectToWriter");
406+
this.logAndThrowError(Messages.get("Failover.unableToConnectToWriterDueToError", writerCandidateHostInfo.host, err.message));
407407
}
408408
}
409409

410410
if (!writerCandidateClient) {
411-
this.logAndThrowError("Failover.unableToConnectToWriter");
411+
this.logAndThrowError(Messages.get("Failover.unableToConnectToWriter"));
412412
}
413413

414414
if ((await this.pluginService.getHostRole(writerCandidateClient)) !== HostRole.WRITER) {
@@ -417,7 +417,7 @@ export class Failover2Plugin extends AbstractConnectionPlugin implements CanRele
417417
} catch (error) {
418418
// Do nothing.
419419
}
420-
this.logAndThrowError(Messages.get("Failover2.failoverWriterConnectedToReader"));
420+
this.logAndThrowError(Messages.get("Failover2.failoverWriterConnectedToReader", writerCandidateHostInfo.host));
421421
}
422422

423423
await this.pluginService.abortCurrentClient();

common/lib/utils/locales/en.json

Lines changed: 4 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -55,6 +55,7 @@
5555
"Failover.connectionChangedError": "The active SQL connection has changed due to a connection failure. Please re-configure session state if required.",
5656
"Failover.parameterValue": "%s = %s",
5757
"Failover.unableToConnectToWriter": "Unable to establish SQL connection to the writer instance.",
58+
"Failover.unableToConnectToWriterDueToError": "Unable to establish SQL connection to the writer instance: %s due to error: %s.",
5859
"Failover.unableToConnectToReader": "Unable to establish SQL connection to the reader instance.",
5960
"Failover.unableToDetermineWriter": "Unable to determine the current writer instance.",
6061
"Failover.detectedError": "Detected an error while executing a command: %s",
@@ -209,9 +210,8 @@
209210
"ConfigurationProfileBuilder.canNotUpdateKnownPreset": "Can't add or update a built-in preset configuration profile '%s'.",
210211
"AwsClient.configurationProfileNotFound": "Configuration profile '%s' not found.",
211212
"AwsClient.targetClientNotDefined": "AwsClient targetClient not defined.",
212-
"Failover2.unableToConnect": "Unable to establish SQL connection to the instance.",
213213
"Failover2.failoverReaderNotConnectedToReader": "Unable to establish SQL connection to the instance '%s' as a reader.",
214-
"Failover2.failoverWriterConnectedToReader": "Unable to establish SQL connection to a writer instance.",
214+
"Failover2.failoverWriterConnectedToReader": "The new writer was identified to be '%s', but querying the instance for its role returned a reader.",
215215
"Failover2.unableToFetchTopology": "Unable to establish SQL connection and fetch topology.",
216216
"Failover2.errorSelectingReaderHost": "An error occurred while attempting to select a reader host candidate: '%s'.",
217217
"Failover2.readerCandidateNull": "Reader candidate unable to be selected: '%s'.",
@@ -230,9 +230,10 @@
230230
"ClusterTopologyMonitor.errorDuringMonitoring": "Error thrown during cluster topology monitoring: '%s'.",
231231
"ClusterTopologyMonitor.endMonitoring": "Stop cluster topology monitoring.",
232232
"HostMonitor.startMonitoring": "Host monitor '%s' started.",
233-
"HostMonitor.detectedWriter": "Detected writer: '%s'.",
233+
"HostMonitor.detectedWriter": "Detected writer: '%s' - '%s'.",
234234
"HostMonitor.endMonitoring": "Host monitor '%s' completed in '%s'.",
235235
"HostMonitor.writerHostChanged": "Writer host has changed from '%s' to '%s'.",
236+
"HostMonitor.writerIsStale": "Connected writer instance '%s' is stale.",
236237
"SlidingExpirationCacheWithCleanupTask.cleaningUp": "Cleanup interval of '%s' minutes has passed, cleaning up sliding expiration cache '%s'.",
237238
"SlidingExpirationCacheWithCleanupTask.cleanUpTaskInterrupted": "Sliding expiration cache '%s' cleanup task has been interrupted and is exiting.",
238239
"SlidingExpirationCacheWithCleanupTask.cleanUpTaskStopped": "Sliding expiration cache '%s' cleanup task has been stopped and is exiting.",

mysql/lib/dialect/rds_multi_az_mysql_database_dialect.ts

Lines changed: 1 addition & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -146,9 +146,8 @@ export class RdsMultiAZMySQLDatabaseDialect extends MySQLDatabaseDialect impleme
146146
if (!writerHostId) {
147147
const currentConnection = await this.identifyConnection(targetClient);
148148
return currentConnection ?? null;
149-
} else {
150-
return null;
151149
}
150+
return writerHostId;
152151
} catch (error: any) {
153152
throw new AwsWrapperError(Messages.get("RdsMultiAZMySQLDatabaseDialect.invalidQuery", error.message));
154153
}

tests/integration/container/tests/aurora_failover2.test.ts

Lines changed: 0 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -53,7 +53,6 @@ async function initDefaultConfig(host: string, port: number, connectToProxy: boo
5353
password: env.databaseInfo.password,
5454
port: port,
5555
plugins: "failover2",
56-
failoverTimeoutMs: 250000,
5756
enableTelemetry: true,
5857
telemetryTracesBackend: "OTLP",
5958
telemetryMetricsBackend: "OTLP"

0 commit comments

Comments
 (0)