diff --git a/PR_DESCRIPTION.md b/PR_DESCRIPTION.md new file mode 100644 index 00000000000..27619dd8186 --- /dev/null +++ b/PR_DESCRIPTION.md @@ -0,0 +1,97 @@ +**What this PR does / why we need it**: + +Feature: MariaDB Galera Cluster Status Monitoring + +This PR addresses issue #20755 by adding support for monitoring MariaDB Galera cluster status. It registers the standard Galera status variables as metrics in the Cloudpods monitoring system, enabling alerting and dashboards for cluster health. It also updates the Hostman service to support configuring Telegraf to collect these metrics from MySQL/MariaDB instances. + +### Implementation Details +**1. `pkg/monitor/dbinit/measurements/mysql.go`**: +Registered standard Galera status variables to `mysql` measurement definition: +```go +{ +"wsrep_cluster_size", "wsrep_cluster_size", monitor.METRIC_UNIT_COUNT, +}, +{ +"wsrep_cluster_status", "wsrep_cluster_status", monitor.METRIC_UNIT_NULL, +}, +{ +"wsrep_ready", "wsrep_ready", monitor.METRIC_UNIT_NULL, +}, +{ +"wsrep_connected", "wsrep_connected", monitor.METRIC_UNIT_NULL, +}, +``` + +**2. `pkg/monitor/dbinit/measurements/metrics.csv`**: +- Updated to include the new wsrep metrics keys. + +**3. `pkg/hostman/options/options.go`**: +- Added `TelegrafMysqlServers` option to `SHostBaseOptions` to allow configuring MySQL connection strings for Telegraf. + +**4. `pkg/hostman/hostinfo/hostinfo.go`**: +- Updated `OnCatalogChanged` to read `TelegrafMysqlServers` and configure the `mysql` input for Telegraf, specifically requesting `wsrep` string fields. + +**5. `pkg/hostman/system_service/telegraf.go`**: +- Implemented generation of `[[inputs.mysql]]` configuration block in `telegraf.conf`. + + + +**Does this PR need to be backport to the previous release branch?**: + +NONE + + + +### Verification +**1. `pkg/monitor/dbinit/measurements/mysql_test.go`**: +Added unit test to verify metric presence: +```go +package measurements + +import ( +"testing" +) + +func TestMysqlMetrics(t *testing.T) { +metrics := map[string]bool{ +"wsrep_cluster_size": false, +"wsrep_cluster_status": false, +"wsrep_ready": false, +"wsrep_connected": false, +} + +for _, m := range mysql.Metrics { +if _, ok := metrics[m.Name]; ok { +metrics[m.Name] = true +} +} + +for name, found := range metrics { +if !found { +t.Errorf("metric %s not found in mysql definitions", name) +} +} +} +``` + +**Automated Tests**: +Run the verification test: +```bash +go test -v -run TestMysqlMetrics ./pkg/monitor/dbinit/measurements/ +``` + +**Output**: +``` +=== RUN TestMysqlMetrics +--- PASS: TestMysqlMetrics (0.00s) +PASS +ok yunion.io/x/onecloud/pkg/monitor/dbinit/measurements0.002s +``` + +**2. Hostman Telegraf Configuration Verification**: +Verified that `telegraf.conf` is correctly generated with `[[inputs.mysql]]` block when `TelegrafMysqlServers` is configured. diff --git a/pkg/hostman/hostinfo/hostinfo.go b/pkg/hostman/hostinfo/hostinfo.go index 53392dbec06..3f5d20be1a5 100644 --- a/pkg/hostman/hostinfo/hostinfo.go +++ b/pkg/hostman/hostinfo/hostinfo.go @@ -2571,6 +2571,17 @@ func (h *SHostInfo) OnCatalogChanged(catalog mcclient.KeystoneServiceCatalogV3) conf["kafka"] = kafkaConf } + if len(options.HostOptions.TelegrafMysqlServers) > 0 { + conf["mysql"] = map[string]interface{}{ + "servers": options.HostOptions.TelegrafMysqlServers, + "string_fields": []string{ + "wsrep_cluster_status", + "wsrep_ready", + "wsrep_connected", + }, + } + } + urls, _ = s.GetServiceURLs("opentsdb", defaultEndpointType) if len(urls) > 0 { conf["opentsdb"] = map[string]interface{}{ diff --git a/pkg/hostman/options/options.go b/pkg/hostman/options/options.go index 908fa9a74c8..2903f0a30df 100644 --- a/pkg/hostman/options/options.go +++ b/pkg/hostman/options/options.go @@ -67,6 +67,8 @@ type SHostBaseOptions struct { TelegrafKafkaOutputSaslPassword string `json:"telegraf_kafka_output_sasl_password" help:"telegraf kafka output sasl_password"` TelegrafKafkaOutputSaslMechanism string `json:"telegraf_kafka_output_sasl_mechanism" help:"telegraf kafka output sasl_mechanism"` + TelegrafMysqlServers []string `json:"telegraf_mysql_servers" help:"telegraf mysql input servers"` + BackupTaskWorkerCount int `default:"3" help:"backup task worker count"` } diff --git a/pkg/hostman/system_service/telegraf.go b/pkg/hostman/system_service/telegraf.go index 0e1f99e537d..5c6cb8952c0 100644 --- a/pkg/hostman/system_service/telegraf.go +++ b/pkg/hostman/system_service/telegraf.go @@ -309,6 +309,46 @@ func (s *STelegraf) GetConfig(kwargs map[string]interface{}) string { conf += "\n" } + if mysql, ok := kwargs["mysql"]; ok { + mysqlConf, _ := mysql.(map[string]interface{}) + conf += "[[inputs.mysql]]\n" + if servers, ok := mysqlConf["servers"]; ok { + srvs, _ := servers.([]string) + serverStrs := make([]string, len(srvs)) + for i, srv := range srvs { + serverStrs[i] = fmt.Sprintf("\"%s\"", srv) + } + conf += fmt.Sprintf(" servers = [%s]\n", strings.Join(serverStrs, ", ")) + } + keys := []string{} + for k := range mysqlConf { + if k != "servers" { + keys = append(keys, k) + } + } + sort.Strings(keys) + for _, k := range keys { + v := mysqlConf[k] + switch val := v.(type) { + case string: + conf += fmt.Sprintf(" %s = \"%s\"\n", k, val) + case int, int32, int64: + conf += fmt.Sprintf(" %s = %d\n", k, val) + case float32, float64: + conf += fmt.Sprintf(" %s = %f\n", k, val) + case bool: + conf += fmt.Sprintf(" %s = %v\n", k, val) + case []string: + quoted := make([]string, len(val)) + for i, s := range val { + quoted[i] = fmt.Sprintf("\"%s\"", s) + } + conf += fmt.Sprintf(" %s = [%s]\n", k, strings.Join(quoted, ", ")) + } + } + conf += "\n" + } + if radontop, ok := kwargs[TELEGRAF_INPUT_RADEONTOP]; ok { radontopMap, _ := radontop.(map[string]interface{}) devPaths := radontopMap[TELEGRAF_INPUT_RADEONTOP_DEV_PATHS].([]string) diff --git a/pkg/monitor/dbinit/measurements/metrics.csv b/pkg/monitor/dbinit/measurements/metrics.csv index ac50564a639..91ee3a43ec5 100644 --- a/pkg/monitor/dbinit/measurements/metrics.csv +++ b/pkg/monitor/dbinit/measurements/metrics.csv @@ -506,6 +506,10 @@ Measurement,MeasurementNote,ResourceType,Database,Metric,MetricNote,MetricUnit "mysql","mysql","ext_mysql","telegraf","info_schema_table_rows","info_schema_table_rows","count" "mysql","mysql","ext_mysql","telegraf","info_schema_table_size_data_length","info_schema_table_size_data_length","count" "mysql","mysql","ext_mysql","telegraf","info_schema_table_size_index_length","info_schema_table_size_index_length","count" +"mysql","mysql","ext_mysql","telegraf","wsrep_cluster_size","wsrep_cluster_size","count" +"mysql","mysql","ext_mysql","telegraf","wsrep_cluster_status","wsrep_cluster_status","NULL" +"mysql","mysql","ext_mysql","telegraf","wsrep_ready","wsrep_ready","NULL" +"mysql","mysql","ext_mysql","telegraf","wsrep_connected","wsrep_connected","NULL" "netstat","netstat","host","telegraf","tcp_established","","count" "netstat","netstat","host","telegraf","tcp_syn_sent","","count" "netstat","netstat","host","telegraf","tcp_syn_recv","","count" diff --git a/pkg/monitor/dbinit/measurements/mysql.go b/pkg/monitor/dbinit/measurements/mysql.go index 2fe94141f55..dc4431ea0c9 100644 --- a/pkg/monitor/dbinit/measurements/mysql.go +++ b/pkg/monitor/dbinit/measurements/mysql.go @@ -54,5 +54,17 @@ var mysql = SMeasurement{ { "info_schema_table_size_index_length", "info_schema_table_size_index_length", monitor.METRIC_UNIT_COUNT, }, + { + "wsrep_cluster_size", "wsrep_cluster_size", monitor.METRIC_UNIT_COUNT, + }, + { + "wsrep_cluster_status", "wsrep_cluster_status", monitor.METRIC_UNIT_NULL, + }, + { + "wsrep_ready", "wsrep_ready", monitor.METRIC_UNIT_NULL, + }, + { + "wsrep_connected", "wsrep_connected", monitor.METRIC_UNIT_NULL, + }, }, } diff --git a/pkg/monitor/dbinit/measurements/mysql_test.go b/pkg/monitor/dbinit/measurements/mysql_test.go new file mode 100644 index 00000000000..52d5f1e21ff --- /dev/null +++ b/pkg/monitor/dbinit/measurements/mysql_test.go @@ -0,0 +1,26 @@ +package measurements + +import ( + "testing" +) + +func TestMysqlMetrics(t *testing.T) { + metrics := map[string]bool{ + "wsrep_cluster_size": false, + "wsrep_cluster_status": false, + "wsrep_ready": false, + "wsrep_connected": false, + } + + for _, m := range mysql.Metrics { + if _, ok := metrics[m.Name]; ok { + metrics[m.Name] = true + } + } + + for name, found := range metrics { + if !found { + t.Errorf("metric %s not found in mysql definitions", name) + } + } +}