@@ -63,6 +63,10 @@ function retry {
6363 return $exit
6464 fi
6565 count=$(( $count + 1 ))
66+ retryfile=" /scripts/retry-stop"
67+ if [ -e " $retryfile " ]; then
68+ return 0
69+ fi
6670 done
6771 return 0
6872}
@@ -78,8 +82,8 @@ IFS=', ' read -r -a peers <<<"$hosts"
7882echo " ${peers[@]} "
7983log " INFO" " hosts are ${peers[@]} "
8084
81- report_host=" $HOSTNAME .$GOV_SVC .$POD_NAMESPACE .svc "
82- echo " report_host = $report_host "
85+ report_host=" $HOSTNAME .$GOV_SVC .$POD_NAMESPACE "
86+ echo " report_host = $report_host "
8387
8488# comma separated host names
8589export hosts=$( echo -n ${peers[*]} | sed -e " s/ /,/g" )
@@ -122,6 +126,8 @@ echo "!includedir /etc/mysql/conf.d/" >>/etc/mysql/my.cnf
122126cat >> /etc/mysql/group-replication.conf.d/group.cnf << EOL
123127[mysqld]
124128default-authentication-plugin=mysql_native_password
129+ log_error_suppression_list = 'MY-013360' #remove this message "Plugin mysql_native_password reported: ''mysql_native_password' is deprecated and will be removed in a future release. Please use caching_sha2_password instead'"
130+ #log_error_suppression_list = 'MY-013360,MY-011873,MY-011879' # NUMA related warnings
125131disabled_storage_engines="MyISAM,BLACKHOLE,FEDERATED,ARCHIVE,MEMORY"
126132
127133# General replication settings
@@ -136,6 +142,7 @@ binlog_format = ROW
136142transaction_write_set_extraction = XXHASH64
137143loose-group_replication_bootstrap_group = OFF
138144loose-group_replication_start_on_boot = OFF
145+ loose_group_replication_unreachable_majority_timeout = 20
139146
140147# recommended config
141148innodb_buffer_pool_size = "$INNODB_BUFFER_POOL_SIZE "
@@ -192,6 +199,12 @@ function wait_for_mysqld_running() {
192199 exit 1
193200 fi
194201 log " INFO" " mysql daemon is ready to use......."
202+
203+ # Set read-only immediately after MySQL starts to prevent any external
204+ # process (e.g. KubeDB health checker) from writing local GTIDs before
205+ # the node joins GR. Cannot be set in my.cnf because it blocks --initialize.
206+ ${mysql} -N -e " SET GLOBAL read_only=ON; SET GLOBAL super_read_only=ON;" 2> /dev/null
207+ log " INFO" " Set super_read_only=ON to prevent errant GTIDs"
195208}
196209
197210function create_replication_user() {
@@ -204,28 +217,40 @@ function create_replication_user() {
204217 local mysql=" $mysql_header --host=$localhost "
205218
206219 # At first, ensure that the command executes without any error. Then, run the command again and extract the output.
207- retry 120 ${mysql} -N -e " select count(host) from mysql.user where mysql.user.user='repl';" | awk ' {print$1}'
220+ retry 60 ${mysql} -N -e " select count(host) from mysql.user where mysql.user.user='repl';" | awk ' {print$1}'
208221 out=$( ${mysql} -N -e " select count(host) from mysql.user where mysql.user.user='repl';" | awk ' {print$1}' )
209- # if the user doesn't exist, crete new one.
222+ # if the user doesn't exist, create new one.
223+ # All operations run in a SINGLE session with SQL_LOG_BIN=0 to prevent
224+ # writing local GTIDs that would create errant transactions on rejoin.
210225 if [[ " $out " -eq " 0" ]]; then
211226 log " INFO" " Replication user not found. Creating new replication user........"
212- retry 120 ${mysql} -N -e " SET SQL_LOG_BIN=0;"
213- retry 120 ${mysql} -N -e " CREATE USER 'repl'@'%' IDENTIFIED BY '$MYSQL_ROOT_PASSWORD ' REQUIRE SSL;"
214- retry 120 ${mysql} -N -e " GRANT REPLICATION SLAVE ON *.* TO 'repl'@'%';"
215- # You must therefore give the `BACKUP_ADMIN` and `CLONE_ADMIN` privilege to this replication user on all group members that support cloning process
216- # https://dev.mysql.com/doc/refman/8.0/en/group-replication-cloning.html
217- # https://dev.mysql.com/doc/refman/8.0/en/clone-plugin-remote.html
218- retry 120 ${mysql} -N -e " GRANT BACKUP_ADMIN ON *.* TO 'repl'@'%';"
219- retry 120 ${mysql} -N -e " GRANT CLONE_ADMIN ON *.* TO 'repl'@'%';"
220- retry 120 ${mysql} -N -e " FLUSH PRIVILEGES;"
221- retry 120 ${mysql} -N -e " SET SQL_LOG_BIN=1;"
222-
223- retry 120 ${mysql} -N -e " CHANGE MASTER TO MASTER_USER='repl', MASTER_PASSWORD='$MYSQL_ROOT_PASSWORD ' FOR CHANNEL 'group_replication_recovery';"
224- retry 120 ${mysql} -N -e " RESET MASTER;"
227+ retry 60 ${mysql} -N -e "
228+ SET SQL_LOG_BIN=0;
229+ SET GLOBAL super_read_only=OFF;
230+ SET GLOBAL read_only=OFF;
231+ CREATE USER 'repl'@'%' IDENTIFIED BY '$MYSQL_ROOT_PASSWORD ' REQUIRE SSL;
232+ GRANT REPLICATION SLAVE ON *.* TO 'repl'@'%';
233+ GRANT BACKUP_ADMIN ON *.* TO 'repl'@'%';
234+ GRANT CLONE_ADMIN ON *.* TO 'repl'@'%';
235+ FLUSH PRIVILEGES;
236+ CHANGE MASTER TO MASTER_USER='repl', MASTER_PASSWORD='$MYSQL_ROOT_PASSWORD ' FOR CHANNEL 'group_replication_recovery';
237+ RESET MASTER;
238+ SET GLOBAL read_only=ON;
239+ SET GLOBAL super_read_only=ON;
240+ SET SQL_LOG_BIN=1;
241+ "
225242 else
226243 log " INFO" " Replication user exists. Skipping creating new one......."
227244 # Update replication channel password if it has been changed via RotateAuth
228- retry 120 ${mysql} -N -e " CHANGE MASTER TO MASTER_USER='repl', MASTER_PASSWORD='$MYSQL_ROOT_PASSWORD ' FOR CHANNEL 'group_replication_recovery';"
245+ retry 60 ${mysql} -N -e "
246+ SET SQL_LOG_BIN=0;
247+ SET GLOBAL super_read_only=OFF;
248+ SET GLOBAL read_only=OFF;
249+ CHANGE MASTER TO MASTER_USER='repl', MASTER_PASSWORD='$MYSQL_ROOT_PASSWORD ' FOR CHANNEL 'group_replication_recovery';
250+ SET GLOBAL read_only=ON;
251+ SET GLOBAL super_read_only=ON;
252+ SET SQL_LOG_BIN=1;
253+ "
229254 fi
230255 touch /scripts/ready.txt
231256}
@@ -235,14 +260,14 @@ function install_group_replication_plugin() {
235260 local mysql=" $mysql_header --host=$localhost "
236261
237262 # At first, ensure that the command executes without any error. Then, run the command again and extract the output.
238- retry 120 ${mysql} -N -e ' SHOW PLUGINS;' | grep group_replication
263+ retry 60 ${mysql} -N -e ' SHOW PLUGINS;' | grep group_replication
239264 out=$( ${mysql} -N -e ' SHOW PLUGINS;' | grep group_replication)
240265 if [[ -z " $out " ]]; then
241266 log " INFO" " Group replication plugin is not installed. Installing the plugin...."
242267 # replication plugin will be installed when the member getting bootstrapped or joined into the group first time.
243268 # that's why assign `joining_for_first_time` variable to 1 for making further reset process.
244269 joining_for_first_time=1
245- retry 120 ${mysql} -e " INSTALL PLUGIN group_replication SONAME 'group_replication.so';"
270+ retry 60 ${mysql} -e " SET SQL_LOG_BIN=0; SET GLOBAL super_read_only=OFF; SET GLOBAL read_only=OFF; INSTALL PLUGIN group_replication SONAME 'group_replication.so'; SET GLOBAL read_only=ON; SET GLOBAL super_read_only=ON; SET SQL_LOG_BIN=1 ;"
246271 log " INFO" " Group replication plugin successfully installed"
247272 else
248273 log " INFO" " Already group replication plugin is installed"
@@ -254,24 +279,30 @@ function install_clone_plugin() {
254279 local mysql=" $mysql_header --host=$localhost "
255280
256281 # At first, ensure that the command executes without any error. Then, run the command again and extract the output.
257- retry 120 ${mysql} -N -e ' SHOW PLUGINS;' | grep clone
282+ retry 60 ${mysql} -N -e ' SHOW PLUGINS;' | grep clone
258283 out=$( ${mysql} -N -e ' SHOW PLUGINS;' | grep clone)
259284 if [[ -z " $out " ]]; then
260285 log " INFO" " Clone plugin is not installed. Installing the plugin..."
261- retry 120 ${mysql} -e " INSTALL PLUGIN clone SONAME 'mysql_clone.so';"
286+ retry 60 ${mysql} -e " SET SQL_LOG_BIN=0; SET GLOBAL super_read_only=OFF; SET GLOBAL read_only=OFF; INSTALL PLUGIN clone SONAME 'mysql_clone.so'; SET GLOBAL read_only=ON; SET GLOBAL super_read_only=ON; SET SQL_LOG_BIN=1 ;"
262287 log " INFO" " Clone plugin successfully installed"
263288 else
264289 log " INFO" " Already clone plugin is installed"
265290 fi
266291}
267292
268293function check_member_list_updated() {
294+
269295 for host in $@ ; do
270296 local mysql=" $mysql_header --host=$host "
271297 if [[ " $report_host " == " $host " ]]; then
272298 continue
273299 fi
274300 for i in {60..0}; do
301+ kill -0 $pid
302+ exit=" $? "
303+ if [[ " $exit " != " 0" ]]; then
304+ break
305+ fi
275306 alive_members_id=($( ${mysql} -N -e " SELECT MEMBER_ID FROM performance_schema.replication_group_members WHERE MEMBER_STATE = 'ONLINE';" ) )
276307 alive_cluster_size=${# alive_members_id[@]}
277308 listed_members_id=($( ${mysql} -N -e " SELECT MEMBER_ID FROM performance_schema.replication_group_members;" ) )
@@ -299,8 +330,13 @@ function wait_for_primary() {
299330 local is_primary_found=0
300331 for member_id in ${members_id[*]} ; do
301332 for i in {60..0}; do
333+ kill -0 $pid
334+ exit=" $? "
335+ if [[ " $exit " != " 0" ]]; then
336+ break
337+ fi
302338 primary_member_id=$( ${mysql} -N -e " SHOW STATUS WHERE Variable_name = 'group_replication_primary_member';" | awk ' {print $2}' )
303- log " INFO" " Attempt $i : Trying to find primary member........................"
339+ log " INFO" " Attempt $i : Trying to find primary member, from ${host} ........................"
304340 if [[ -n " $primary_member_id " ]]; then
305341 is_primary_found=1
306342 primary_host=$( ${mysql} -N -e " SELECT MEMBER_HOST FROM performance_schema.replication_group_members WHERE MEMBER_ID = '${primary_member_id} ';" | awk ' {print $1}' )
@@ -330,14 +366,19 @@ function wait_for_primary() {
330366# declare donors array for further use
331367declare -a donors
332368function set_valid_donors() {
369+ kill -0 $pid
370+ exit=" $? "
371+ if [[ " $exit " != " 0" ]]; then
372+ return
373+ fi
333374 log " INFO" " Checking whether valid donor is found or not. If found, set this to 'clone_valid_donor_list'"
334375 local mysql=" $mysql_header --host=$localhost "
335376 # clone process run when the donor and recipient must have the same MySQL server version and
336377 # https://dev.mysql.com/doc/refman/8.0/en/clone-plugin-remote.html#:~:text=The%20clone%20plugin%20is%20supported,17%20and%20higher.&text=The%20donor%20and%20recipient%20MySQL%20server%20instances%20must%20run,same%20operating%20system%20and%20platform.
337378 report_host_version=$( ${mysql} -N -e " SHOW VARIABLES LIKE 'version';" | awk ' {print $2}' )
338379
339380 # At first, ensure that the command executes without any error. Then, run the command again and extract the output.
340- retry 120 ${mysql_header} --host=$primary_host -N -e " SELECT * FROM performance_schema.replication_group_members;"
381+ retry 60 ${mysql_header} --host=$primary_host -N -e " SELECT * FROM performance_schema.replication_group_members;"
341382
342383 donor_list=$( ${mysql_header} --host=$primary_host -N -e " SELECT MEMBER_HOST FROM performance_schema.replication_group_members WHERE MEMBER_STATE = 'ONLINE';" )
343384
@@ -367,7 +408,7 @@ function set_valid_donors() {
367408 valid_donors=$( echo -n ${donors[*]} | sed -e " s/ /:3306,/g" && echo -n " :3306" )
368409 log " INFO" " Valid donors found. The list of valid donor are: ${valid_donors} "
369410 # https://dev.mysql.com/doc/refman/8.0/en/clone-plugin-options-variables.html#sysvar_clone_valid_donor_list
370- retry 120 ${mysql} -N -e " SET GLOBAL clone_valid_donor_list='${valid_donors} ';"
411+ retry 60 ${mysql} -N -e " SET GLOBAL clone_valid_donor_list='${valid_donors} ';"
371412 fi
372413}
373414
@@ -381,26 +422,33 @@ function bootstrap_cluster() {
381422 # ref: https://dev.mysql.com/doc/refman/8.0/en/group-replication-bootstrap.html
382423 local mysql=" $mysql_header --host=$localhost "
383424 log " INFO" " bootstrapping cluster with host $report_host ..."
425+ # Temporarily disable read-only for bootstrap operations.
426+ # GR will manage read-only after START GROUP_REPLICATION.
427+ retry 60 ${mysql} -N -e " SET GLOBAL super_read_only=OFF; SET GLOBAL read_only=OFF;"
384428 if [[ " $joining_for_first_time " == " 1" ]]; then
385- retry 120 ${mysql} -N -e " RESET MASTER;"
429+ retry 60 ${mysql} -N -e " RESET MASTER;"
386430 fi
387- retry 120 ${mysql} -N -e " SET GLOBAL group_replication_bootstrap_group=ON;"
388- retry 120 ${mysql} -N -e " START GROUP_REPLICATION;"
389- retry 120 ${mysql} -N -e " SET GLOBAL group_replication_bootstrap_group=OFF;"
431+ retry 60 ${mysql} -N -e " SET GLOBAL group_replication_bootstrap_group=ON;"
432+ retry 60 ${mysql} -N -e " START GROUP_REPLICATION;"
433+ retry 60 ${mysql} -N -e " SET GLOBAL group_replication_bootstrap_group=OFF;"
390434}
391435
392436function join_into_cluster() {
393437 # member try to join into the existing group
394438 log " INFO" " The replica, ${report_host} is joining into the existing group..."
395439 local mysql=" $mysql_header --host=$localhost "
396440
441+ # Temporarily disable read-only for join operations.
442+ # GR will manage read-only after START GROUP_REPLICATION.
443+ retry 60 ${mysql} -N -e " SET GLOBAL super_read_only=OFF; SET GLOBAL read_only=OFF;"
444+
397445 # for 1st time joining, there need to run `RESET MASTER` to set the binlog and gtid's initial position.
398446 # then run clone process to copy data directly from valid donor. That's why pod will be restart for 1st time joining into the group replication.
399447 # https://dev.mysql.com/doc/refman/8.0/en/clone-plugin-remote.html
400448 export mysqld_alive=1
401449 if [[ " $joining_for_first_time " == " 1" ]]; then
402450 log " INFO" " Resetting binlog & gtid to initial state as $report_host is joining for first time.."
403- retry 120 ${mysql} -N -e " RESET MASTER;"
451+ retry 60 ${mysql} -N -e " RESET MASTER;"
404452 # clone process will run when the joiner get valid donor and the primary member's data will be be gather than or equal 128MB
405453 if [[ $valid_donor_found == 1 ]] && [[ $primary_db_size -ge 128 ]]; then
406454 for donor in ${donors[*]} ; do
@@ -418,7 +466,7 @@ function join_into_cluster() {
418466 fi
419467
420468 # wait for background process `mysqld` have been killed
421- for i in {120 ..0}; do
469+ for i in {60 ..0}; do
422470 kill -0 $pid
423471 exit=" $? "
424472 log " INFO" " Attempt $i : Checking mysqld(process id=$pid ) is alive or not, exit code: $exit "
@@ -439,13 +487,13 @@ function join_into_cluster() {
439487 fi
440488 # If the host is still alive, it will join the cluster directly.
441489 if [[ $mysqld_alive == 1 ]]; then
442- retry 120 ${mysql} -N -e " START GROUP_REPLICATION;"
490+ retry 60 ${mysql} -N -e " START GROUP_REPLICATION;"
443491 log " INFO" " Host (${report_host} ) has joined to the group......."
444492 else
445493 # run mysqld in background since mysqld can't restart after a clone process
446494 start_mysqld_in_background
447495 wait_for_mysqld_running
448- retry 120 ${mysql} -N -e " START GROUP_REPLICATION;"
496+ retry 60 ${mysql} -N -e " START GROUP_REPLICATION;"
449497 log " INFO" " Host (${report_host} ) has joined to the group......."
450498 #
451499 fi
@@ -458,12 +506,16 @@ function join_by_clone() {
458506 log " INFO" " The replica, ${report_host} is joining into the existing group..."
459507 local mysql=" $mysql_header --host=$localhost "
460508
509+ # Temporarily disable read-only for clone operations.
510+ # GR will manage read-only after START GROUP_REPLICATION.
511+ retry 60 ${mysql} -N -e " SET GLOBAL super_read_only=OFF; SET GLOBAL read_only=OFF;"
512+
461513 # for 1st time joining, there need to run `RESET MASTER` to set the binlog and gtid's initial position.
462514 # then run clone process to copy data directly from valid donor. That's why pod will be restart for 1st time joining into the group replication.
463515 # https://dev.mysql.com/doc/refman/8.0/en/clone-plugin-remote.html
464516 export mysqld_alive=1
465517 log " INFO" " Resetting binlog & gtid to initial state as $report_host is joining for first time.."
466- retry 120 ${mysql} -N -e " RESET MASTER;"
518+ retry 60 ${mysql} -N -e " RESET MASTER;"
467519 if [[ $valid_donor_found == 1 ]]; then
468520 for donor in ${donors[*]} ; do
469521 log " INFO" " Cloning data from $donor to $report_host ....."
@@ -480,7 +532,7 @@ function join_by_clone() {
480532 fi
481533
482534 # wait for background process `mysqld` have been killed
483- for i in {120 ..0}; do
535+ for i in {60 ..0}; do
484536 kill -0 $pid
485537 exit=" $? "
486538 log " INFO" " Attempt $i : Checking mysqld(process id=$pid ) is alive or not, exit code: $exit "
@@ -500,13 +552,13 @@ function join_by_clone() {
500552 fi
501553 # If the host is still alive, it will join the cluster directly.
502554 if [[ $mysqld_alive == 1 ]]; then
503- retry 120 ${mysql} -N -e " START GROUP_REPLICATION;"
555+ retry 60 ${mysql} -N -e " START GROUP_REPLICATION;"
504556 log " INFO" " Host (${report_host} ) has joined to the group......."
505557 else
506558 # run mysqld in background since mysqld can't restart after a clone process
507559 start_mysqld_in_background
508560 wait_for_mysqld_running
509- retry 120 ${mysql} -N -e " START GROUP_REPLICATION;"
561+ retry 60 ${mysql} -N -e " START GROUP_REPLICATION;"
510562 log " INFO" " Host (${report_host} ) has joined to the group......."
511563 #
512564 fi
0 commit comments