Skip to content

Commit 78bcf60

Browse files
authored
feat: multi-az failover2 (#396)
1 parent 4e4760d commit 78bcf60

8 files changed

Lines changed: 192 additions & 174 deletions

File tree

common/lib/host_list_provider/host_list_provider.ts

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -25,6 +25,8 @@ export type StaticHostListProvider = HostListProvider;
2525

2626
export interface BlockingHostListProvider extends HostListProvider {
2727
forceMonitoringRefresh(shouldVerifyWriter: boolean, timeoutMs: number): Promise<HostInfo[]>;
28+
29+
clearAll(): Promise<void>;
2830
}
2931

3032
export interface HostListProvider {

common/lib/plugin_service.ts

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -198,7 +198,7 @@ export class PluginService implements ErrorHandler, HostListProviderService {
198198
return false;
199199
}
200200

201-
protected isBlockingHostListProvider(arg: any): arg is BlockingHostListProvider {
201+
isBlockingHostListProvider(arg: any): arg is BlockingHostListProvider {
202202
return arg;
203203
}
204204

common/lib/plugins/failover2/failover2_plugin.ts

Lines changed: 6 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -39,8 +39,8 @@ import { HostAvailability } from "../../host_availability/host_availability";
3939
import { TelemetryTraceLevel } from "../../utils/telemetry/telemetry_trace_level";
4040
import { HostRole } from "../../host_role";
4141
import { CanReleaseResources } from "../../can_release_resources";
42-
import { MonitoringRdsHostListProvider } from "../../host_list_provider/monitoring/monitoring_host_list_provider";
4342
import { ReaderFailoverResult } from "../failover/reader_failover_result";
43+
import { HostListProvider } from "../../host_list_provider/host_list_provider";
4444

4545
export class Failover2Plugin extends AbstractConnectionPlugin implements CanReleaseResources {
4646
private static readonly TELEMETRY_WRITER_FAILOVER = "failover to writer instance";
@@ -401,7 +401,7 @@ export class Failover2Plugin extends AbstractConnectionPlugin implements CanRele
401401

402402
if ((await this.pluginService.getHostRole(writerCandidateClient)) !== HostRole.WRITER) {
403403
try {
404-
await writerCandidateClient.end();
404+
await writerCandidateClient?.end();
405405
} catch (error) {
406406
// Do nothing.
407407
}
@@ -481,6 +481,9 @@ export class Failover2Plugin extends AbstractConnectionPlugin implements CanRele
481481
}
482482

483483
async releaseResources(): Promise<void> {
484-
await (this.pluginService.getHostListProvider() as MonitoringRdsHostListProvider).clearAll();
484+
const hostListProvider: HostListProvider = this.pluginService.getHostListProvider();
485+
if (!!this.pluginService.isBlockingHostListProvider(hostListProvider)) {
486+
await hostListProvider.clearAll();
487+
}
485488
}
486489
}

common/lib/topology_aware_database_dialect.ts

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -26,5 +26,6 @@ export interface TopologyAwareDatabaseDialect {
2626

2727
getHostRole(client: ClientWrapper): Promise<HostRole>;
2828

29-
getWriterId(client: ClientWrapper): Promise<string | null>;
29+
// Returns the host id of the targetClient if it is connected to a writer, null otherwise.
30+
getWriterId(targetClient: ClientWrapper): Promise<string | null>;
3031
}

mysql/lib/dialect/rds_multi_az_mysql_database_dialect.ts

Lines changed: 27 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -26,16 +26,21 @@ import { AwsWrapperError } from "../../../common/lib/utils/errors";
2626
import { TopologyAwareDatabaseDialect } from "../../../common/lib/topology_aware_database_dialect";
2727
import { RdsHostListProvider } from "../../../common/lib/host_list_provider/rds_host_list_provider";
2828
import { FailoverRestriction } from "../../../common/lib/plugins/failover/failover_restriction";
29+
import { WrapperProperties } from "../../../common/lib/wrapper_property";
30+
import { PluginService } from "../../../common/lib/plugin_service";
31+
import { MonitoringRdsHostListProvider } from "../../../common/lib/host_list_provider/monitoring/monitoring_host_list_provider";
2932

3033
export class RdsMultiAZMySQLDatabaseDialect extends MySQLDatabaseDialect implements TopologyAwareDatabaseDialect {
3134
private static readonly TOPOLOGY_QUERY: string = "SELECT id, endpoint, port FROM mysql.rds_topology";
3235
private static readonly TOPOLOGY_TABLE_EXIST_QUERY: string =
3336
"SELECT 1 AS tmp FROM information_schema.tables WHERE" + " table_schema = 'mysql' AND table_name = 'rds_topology'";
37+
// For reader hosts, the query should return a writer host id. For a writer host, the query should return no data.
3438
private static readonly FETCH_WRITER_HOST_QUERY: string = "SHOW REPLICA STATUS";
3539
private static readonly FETCH_WRITER_HOST_QUERY_COLUMN_NAME: string = "Source_Server_Id";
3640
private static readonly HOST_ID_QUERY: string = "SELECT @@server_id AS host";
3741
private static readonly HOST_ID_QUERY_COLUMN_NAME: string = "host";
38-
private static readonly IS_READER_QUERY: string = "SELECT @@read_only";
42+
private static readonly IS_READER_QUERY: string = "SELECT @@read_only AS is_reader";
43+
private static readonly IS_READER_QUERY_COLUMN_NAME: string = "is_reader";
3944

4045
async isDialect(targetClient: ClientWrapper): Promise<boolean> {
4146
const res = await targetClient.query(RdsMultiAZMySQLDatabaseDialect.TOPOLOGY_TABLE_EXIST_QUERY).catch(() => false);
@@ -48,6 +53,9 @@ export class RdsMultiAZMySQLDatabaseDialect extends MySQLDatabaseDialect impleme
4853
}
4954

5055
getHostListProvider(props: Map<string, any>, originalUrl: string, hostListProviderService: HostListProviderService): HostListProvider {
56+
if (WrapperProperties.PLUGINS.get(props).includes("failover2")) {
57+
return new MonitoringRdsHostListProvider(props, originalUrl, hostListProviderService, <PluginService>hostListProviderService);
58+
}
5159
return new RdsHostListProvider(props, originalUrl, hostListProviderService);
5260
}
5361

@@ -118,11 +126,26 @@ export class RdsMultiAZMySQLDatabaseDialect extends MySQLDatabaseDialect impleme
118126
}
119127

120128
async getHostRole(client: ClientWrapper): Promise<HostRole> {
121-
return (await this.executeTopologyRelatedQuery(client, RdsMultiAZMySQLDatabaseDialect.IS_READER_QUERY)) ? HostRole.WRITER : HostRole.READER;
129+
return (await this.executeTopologyRelatedQuery(client, RdsMultiAZMySQLDatabaseDialect.IS_READER_QUERY, RdsMultiAZMySQLDatabaseDialect.IS_READER_QUERY_COLUMN_NAME)) == "0" ? HostRole.WRITER : HostRole.READER;
122130
}
123131

124-
getWriterId(client: ClientWrapper): Promise<string> {
125-
throw new Error("Method not implemented.");
132+
async getWriterId(targetClient: ClientWrapper): Promise<string> {
133+
try {
134+
const writerHostId: string = await this.executeTopologyRelatedQuery(
135+
targetClient,
136+
RdsMultiAZMySQLDatabaseDialect.FETCH_WRITER_HOST_QUERY,
137+
RdsMultiAZMySQLDatabaseDialect.FETCH_WRITER_HOST_QUERY_COLUMN_NAME
138+
);
139+
// The above query returns the writer host id if it is a reader, nothing if the writer.
140+
if (!writerHostId) {
141+
const currentConnection = await this.identifyConnection(targetClient);
142+
return currentConnection ?? null;
143+
} else {
144+
return null;
145+
}
146+
} catch (error: any) {
147+
throw new AwsWrapperError(Messages.get("RdsMultiAZMySQLDatabaseDialect.invalidQuery", error.message));
148+
}
126149
}
127150

128151
async identifyConnection(client: ClientWrapper): Promise<string> {

pg/lib/dialect/rds_multi_az_pg_database_dialect.ts

Lines changed: 23 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -27,7 +27,9 @@ import { RdsHostListProvider } from "../../../common/lib/host_list_provider/rds_
2727
import { PgDatabaseDialect } from "./pg_database_dialect";
2828
import { ErrorHandler } from "../../../common/lib/error_handler";
2929
import { MultiAzPgErrorHandler } from "../multi_az_pg_error_handler";
30-
import { error, info, query } from "winston";
30+
import { WrapperProperties } from "../../../common/lib/wrapper_property";
31+
import { PluginService } from "../../../common/lib/plugin_service";
32+
import { MonitoringRdsHostListProvider } from "../../../common/lib/host_list_provider/monitoring/monitoring_host_list_provider";
3133

3234
export class RdsMultiAZPgDatabaseDialect extends PgDatabaseDialect implements TopologyAwareDatabaseDialect {
3335
constructor() {
@@ -42,7 +44,8 @@ export class RdsMultiAZPgDatabaseDialect extends PgDatabaseDialect implements To
4244
private static readonly FETCH_WRITER_HOST_QUERY_COLUMN_NAME: string = "multi_az_db_cluster_source_dbi_resource_id";
4345
private static readonly HOST_ID_QUERY: string = "SELECT dbi_resource_id FROM rds_tools.dbi_resource_id()";
4446
private static readonly HOST_ID_QUERY_COLUMN_NAME: string = "dbi_resource_id";
45-
private static readonly IS_READER_QUERY: string = "SELECT pg_is_in_recovery()";
47+
private static readonly IS_READER_QUERY: string = "SELECT pg_is_in_recovery() AS is_reader";
48+
private static readonly IS_READER_QUERY_COLUMN_NAME: string = "is_reader";
4649

4750
async isDialect(targetClient: ClientWrapper): Promise<boolean> {
4851
const res = await targetClient.query(RdsMultiAZPgDatabaseDialect.WRITER_HOST_FUNC_EXIST_QUERY).catch(() => false);
@@ -55,6 +58,9 @@ export class RdsMultiAZPgDatabaseDialect extends PgDatabaseDialect implements To
5558
}
5659

5760
getHostListProvider(props: Map<string, any>, originalUrl: string, hostListProviderService: HostListProviderService): HostListProvider {
61+
if (WrapperProperties.PLUGINS.get(props).includes("failover2")) {
62+
return new MonitoringRdsHostListProvider(props, originalUrl, hostListProviderService, <PluginService>hostListProviderService);
63+
}
5864
return new RdsHostListProvider(props, originalUrl, hostListProviderService);
5965
}
6066

@@ -77,7 +83,7 @@ export class RdsMultiAZPgDatabaseDialect extends PgDatabaseDialect implements To
7783
}
7884
}
7985

80-
private async executeTopologyRelatedQuery(targetClient: ClientWrapper, query: string, resultColumnName?: string): Promise<string> {
86+
private async executeTopologyRelatedQuery(targetClient: ClientWrapper, query: string, resultColumnName?: string): Promise<any> {
8187
const res = await targetClient.query(query);
8288
const rows: any[] = res.rows;
8389
if (rows.length > 0) {
@@ -125,11 +131,22 @@ export class RdsMultiAZPgDatabaseDialect extends PgDatabaseDialect implements To
125131
}
126132

127133
async getHostRole(client: ClientWrapper): Promise<HostRole> {
128-
return (await this.executeTopologyRelatedQuery(client, RdsMultiAZPgDatabaseDialect.IS_READER_QUERY)) ? HostRole.WRITER : HostRole.READER;
134+
return (await this.executeTopologyRelatedQuery(client, RdsMultiAZPgDatabaseDialect.IS_READER_QUERY, RdsMultiAZPgDatabaseDialect.IS_READER_QUERY_COLUMN_NAME)) === false ? HostRole.WRITER : HostRole.READER;
129135
}
130136

131-
getWriterId(client: ClientWrapper): Promise<string> {
132-
throw new Error("Method not implemented.");
137+
async getWriterId(targetClient: ClientWrapper): Promise<string> {
138+
try {
139+
const writerHostId: string = await this.executeTopologyRelatedQuery(
140+
targetClient,
141+
RdsMultiAZPgDatabaseDialect.FETCH_WRITER_HOST_QUERY,
142+
RdsMultiAZPgDatabaseDialect.FETCH_WRITER_HOST_QUERY_COLUMN_NAME
143+
);
144+
const currentConnection = await this.identifyConnection(targetClient);
145+
146+
return (currentConnection && currentConnection === writerHostId) ? currentConnection : null;
147+
} catch (error: any) {
148+
throw new AwsWrapperError(Messages.get("RdsMultiAZPgDatabaseDialect.invalidQuery", error.message));
149+
}
133150
}
134151

135152
getErrorHandler(): ErrorHandler {

tests/integration/container/tests/aurora_failover2.test.ts

Lines changed: 4 additions & 141 deletions
Original file line numberDiff line numberDiff line change
@@ -18,15 +18,15 @@ import { TestEnvironment } from "./utils/test_environment";
1818
import { DriverHelper } from "./utils/driver_helper";
1919
import { AuroraTestUtility } from "./utils/aurora_test_utility";
2020
import { FailoverSuccessError, TransactionResolutionUnknownError } from "../../../../common/lib/utils/errors";
21+
import { DatabaseEngine } from "./utils/database_engine";
22+
import { QueryResult } from "pg";
2123
import { ProxyHelper } from "./utils/proxy_helper";
24+
import { RdsUtils } from "../../../../common/lib/utils/rds_utils";
2225
import { logger } from "../../../../common/logutils";
2326
import { features, instanceCount } from "./config";
2427
import { TestEnvironmentFeatures } from "./utils/test_environment_features";
2528
import { PluginManager } from "../../../../common/lib";
26-
import { DatabaseEngine } from "./utils/database_engine";
2729
import { TransactionIsolationLevel } from "../../../../common/lib/utils/transaction_isolation_level";
28-
import { RdsUtils } from "../../../../common/lib/utils/rds_utils";
29-
import { QueryResult } from "pg";
3030

3131
const itIf =
3232
features.includes(TestEnvironmentFeatures.FAILOVER_SUPPORTED) &&
@@ -36,7 +36,6 @@ const itIf =
3636
? it
3737
: it.skip;
3838
const itIfTwoInstance = instanceCount == 2 ? itIf : it.skip;
39-
const itIfMinThreeInstance = instanceCount >= 3 ? itIf : it.skip;
4039

4140
let env: TestEnvironment;
4241
let driver;
@@ -66,27 +65,6 @@ async function initDefaultConfig(host: string, port: number, connectToProxy: boo
6665
return config;
6766
}
6867

69-
async function initConfigWithRWSplitting(host: string, port: number, connectToProxy: boolean): Promise<any> {
70-
let config: any = {
71-
user: env.databaseInfo.username,
72-
host: host,
73-
database: env.databaseInfo.defaultDbName,
74-
password: env.databaseInfo.password,
75-
port: port,
76-
plugins: "readWriteSplitting,failover2",
77-
failoverTimeoutMs: 400000,
78-
enableTelemetry: true,
79-
telemetryTracesBackend: "OTLP",
80-
telemetryMetricsBackend: "OTLP"
81-
};
82-
83-
if (connectToProxy) {
84-
config["clusterInstanceHostPattern"] = "?." + env.proxyDatabaseInfo.instanceEndpointSuffix;
85-
}
86-
config = DriverHelper.addDriverSpecificConfiguration(config, env.engine);
87-
return config;
88-
}
89-
9068
describe("aurora failover2", () => {
9169
beforeEach(async () => {
9270
logger.info(`Test started: ${expect.getState().currentTestName}`);
@@ -97,8 +75,6 @@ describe("aurora failover2", () => {
9775
initClientFunc = DriverHelper.getClient(driver);
9876
await ProxyHelper.enableAllConnectivity();
9977
await TestEnvironment.verifyClusterStatus();
100-
await TestEnvironment.verifyAllInstancesHasRightState("available");
101-
await TestEnvironment.verifyAllInstancesUp();
10278

10379
client = null;
10480
secondaryClient = null;
@@ -112,6 +88,7 @@ describe("aurora failover2", () => {
11288
// pass
11389
}
11490
}
91+
11592
if (secondaryClient !== null) {
11693
try {
11794
await secondaryClient.end();
@@ -281,118 +258,4 @@ describe("aurora failover2", () => {
281258
},
282259
1320000
283260
);
284-
285-
itIfMinThreeInstance(
286-
"test failover to new writer set read only true false",
287-
async () => {
288-
// Connect to writer instance
289-
const writerConfig = await initConfigWithRWSplitting(
290-
env.proxyDatabaseInfo.writerInstanceEndpoint,
291-
env.proxyDatabaseInfo.instanceEndpointPort,
292-
true
293-
);
294-
client = initClientFunc(writerConfig);
295-
await client.connect();
296-
297-
const initialWriterId = await auroraTestUtility.queryInstanceId(client);
298-
expect(await auroraTestUtility.isDbInstanceWriter(initialWriterId)).toStrictEqual(true);
299-
300-
// Kill all reader instances
301-
for (const host of env.proxyDatabaseInfo.instances) {
302-
if (host.instanceId && host.instanceId !== initialWriterId) {
303-
await ProxyHelper.disableConnectivity(env.engine, host.instanceId);
304-
}
305-
}
306-
307-
// Force internal reader connection to the writer instance
308-
await client.setReadOnly(true);
309-
const currentId0 = await auroraTestUtility.queryInstanceId(client);
310-
311-
expect(currentId0).toStrictEqual(initialWriterId);
312-
await client.setReadOnly(false);
313-
314-
await ProxyHelper.enableAllConnectivity();
315-
// Crash instance 1 and nominate a new writer
316-
await auroraTestUtility.failoverClusterAndWaitUntilWriterChanged();
317-
await TestEnvironment.verifyClusterStatus();
318-
319-
await expect(async () => {
320-
await auroraTestUtility.queryInstanceId(client);
321-
}).rejects.toThrow(FailoverSuccessError);
322-
const newWriterId = await auroraTestUtility.queryInstanceId(client);
323-
expect(await auroraTestUtility.isDbInstanceWriter(newWriterId)).toStrictEqual(true);
324-
expect(newWriterId).not.toBe(initialWriterId);
325-
326-
await client.setReadOnly(true);
327-
const currentReaderId = await auroraTestUtility.queryInstanceId(client);
328-
expect(currentReaderId).not.toBe(newWriterId);
329-
330-
await client.setReadOnly(false);
331-
const currentId = await auroraTestUtility.queryInstanceId(client);
332-
expect(currentId).toStrictEqual(newWriterId);
333-
},
334-
1320000
335-
);
336-
337-
itIfMinThreeInstance(
338-
"test failover to new reader set read only false true",
339-
async () => {
340-
// Connect to writer instance
341-
const writerConfig = await initConfigWithRWSplitting(
342-
env.proxyDatabaseInfo.writerInstanceEndpoint,
343-
env.proxyDatabaseInfo.instanceEndpointPort,
344-
true
345-
);
346-
writerConfig["failoverMode"] = "reader-or-writer";
347-
client = initClientFunc(writerConfig);
348-
349-
await client.connect();
350-
const initialWriterId = await auroraTestUtility.queryInstanceId(client);
351-
expect(await auroraTestUtility.isDbInstanceWriter(initialWriterId)).toStrictEqual(true);
352-
await client.setReadOnly(true);
353-
354-
const readerConnectionId = await auroraTestUtility.queryInstanceId(client);
355-
expect(readerConnectionId).not.toBe(initialWriterId);
356-
357-
// Get a reader instance
358-
let otherReaderId;
359-
for (const host of env.proxyDatabaseInfo.instances) {
360-
if (host.instanceId && host.instanceId !== readerConnectionId && host.instanceId !== initialWriterId) {
361-
otherReaderId = host.instanceId;
362-
break;
363-
}
364-
}
365-
366-
if (!otherReaderId) {
367-
throw new Error("Could not find a reader instance");
368-
}
369-
// Kill all instances except one other reader
370-
for (const host of env.proxyDatabaseInfo.instances) {
371-
if (host.instanceId && host.instanceId !== otherReaderId) {
372-
await ProxyHelper.disableConnectivity(env.engine, host.instanceId);
373-
}
374-
}
375-
376-
await expect(async () => {
377-
await auroraTestUtility.queryInstanceId(client);
378-
}).rejects.toThrow(FailoverSuccessError);
379-
380-
const currentReaderId0 = await auroraTestUtility.queryInstanceId(client);
381-
382-
expect(currentReaderId0).toStrictEqual(otherReaderId);
383-
expect(currentReaderId0).not.toBe(readerConnectionId);
384-
385-
await ProxyHelper.enableAllConnectivity();
386-
await client.setReadOnly(false);
387-
388-
const currentId = await auroraTestUtility.queryInstanceId(client);
389-
expect(currentId).toStrictEqual(initialWriterId);
390-
391-
await client.setReadOnly(true);
392-
393-
const currentReaderId2 = await auroraTestUtility.queryInstanceId(client);
394-
expect(currentReaderId2).toStrictEqual(otherReaderId);
395-
},
396-
1320000
397-
);
398261
});

0 commit comments

Comments
 (0)