Skip to content
Draft
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
97 changes: 97 additions & 0 deletions PR_DESCRIPTION.md
Original file line number Diff line number Diff line change
@@ -0,0 +1,97 @@
**What this PR does / why we need it**:

Feature: MariaDB Galera Cluster Status Monitoring

This PR addresses issue #20755 by adding support for monitoring MariaDB Galera cluster status. It registers the standard Galera status variables as metrics in the Cloudpods monitoring system, enabling alerting and dashboards for cluster health. It also updates the Hostman service to support configuring Telegraf to collect these metrics from MySQL/MariaDB instances.

### Implementation Details
**1. `pkg/monitor/dbinit/measurements/mysql.go`**:
Registered standard Galera status variables to `mysql` measurement definition:
```go
{
"wsrep_cluster_size", "wsrep_cluster_size", monitor.METRIC_UNIT_COUNT,
},
{
"wsrep_cluster_status", "wsrep_cluster_status", monitor.METRIC_UNIT_NULL,
},
{
"wsrep_ready", "wsrep_ready", monitor.METRIC_UNIT_NULL,
},
{
"wsrep_connected", "wsrep_connected", monitor.METRIC_UNIT_NULL,
},
```

**2. `pkg/monitor/dbinit/measurements/metrics.csv`**:
- Updated to include the new wsrep metrics keys.

**3. `pkg/hostman/options/options.go`**:
- Added `TelegrafMysqlServers` option to `SHostBaseOptions` to allow configuring MySQL connection strings for Telegraf.

**4. `pkg/hostman/hostinfo/hostinfo.go`**:
- Updated `OnCatalogChanged` to read `TelegrafMysqlServers` and configure the `mysql` input for Telegraf, specifically requesting `wsrep` string fields.

**5. `pkg/hostman/system_service/telegraf.go`**:
- Implemented generation of `[[inputs.mysql]]` configuration block in `telegraf.conf`.

<!--
- [ ] Smoke testing completed
- [x] Unit test written
-->

**Does this PR need to be backport to the previous release branch?**:

NONE

<!--
If no, just write "NONE".
-->

### Verification
**1. `pkg/monitor/dbinit/measurements/mysql_test.go`**:
Added unit test to verify metric presence:
```go
package measurements

import (
"testing"
)

func TestMysqlMetrics(t *testing.T) {
metrics := map[string]bool{
"wsrep_cluster_size": false,
"wsrep_cluster_status": false,
"wsrep_ready": false,
"wsrep_connected": false,
}

for _, m := range mysql.Metrics {
if _, ok := metrics[m.Name]; ok {
metrics[m.Name] = true
}
}

for name, found := range metrics {
if !found {
t.Errorf("metric %s not found in mysql definitions", name)
}
}
}
```

**Automated Tests**:
Run the verification test:
```bash
go test -v -run TestMysqlMetrics ./pkg/monitor/dbinit/measurements/
```

**Output**:
```
=== RUN TestMysqlMetrics
--- PASS: TestMysqlMetrics (0.00s)
PASS
ok yunion.io/x/onecloud/pkg/monitor/dbinit/measurements0.002s
```

**2. Hostman Telegraf Configuration Verification**:
Verified that `telegraf.conf` is correctly generated with `[[inputs.mysql]]` block when `TelegrafMysqlServers` is configured.
11 changes: 11 additions & 0 deletions pkg/hostman/hostinfo/hostinfo.go
Original file line number Diff line number Diff line change
Expand Up @@ -2571,6 +2571,17 @@ func (h *SHostInfo) OnCatalogChanged(catalog mcclient.KeystoneServiceCatalogV3)
conf["kafka"] = kafkaConf
}

if len(options.HostOptions.TelegrafMysqlServers) > 0 {
conf["mysql"] = map[string]interface{}{
"servers": options.HostOptions.TelegrafMysqlServers,
"string_fields": []string{
"wsrep_cluster_status",
"wsrep_ready",
"wsrep_connected",
},
}
}

urls, _ = s.GetServiceURLs("opentsdb", defaultEndpointType)
if len(urls) > 0 {
conf["opentsdb"] = map[string]interface{}{
Expand Down
2 changes: 2 additions & 0 deletions pkg/hostman/options/options.go
Original file line number Diff line number Diff line change
Expand Up @@ -67,6 +67,8 @@ type SHostBaseOptions struct {
TelegrafKafkaOutputSaslPassword string `json:"telegraf_kafka_output_sasl_password" help:"telegraf kafka output sasl_password"`
TelegrafKafkaOutputSaslMechanism string `json:"telegraf_kafka_output_sasl_mechanism" help:"telegraf kafka output sasl_mechanism"`

TelegrafMysqlServers []string `json:"telegraf_mysql_servers" help:"telegraf mysql input servers"`

BackupTaskWorkerCount int `default:"3" help:"backup task worker count"`
}

Expand Down
40 changes: 40 additions & 0 deletions pkg/hostman/system_service/telegraf.go
Original file line number Diff line number Diff line change
Expand Up @@ -309,6 +309,46 @@ func (s *STelegraf) GetConfig(kwargs map[string]interface{}) string {
conf += "\n"
}

if mysql, ok := kwargs["mysql"]; ok {
mysqlConf, _ := mysql.(map[string]interface{})
conf += "[[inputs.mysql]]\n"
if servers, ok := mysqlConf["servers"]; ok {
srvs, _ := servers.([]string)
serverStrs := make([]string, len(srvs))
for i, srv := range srvs {
serverStrs[i] = fmt.Sprintf("\"%s\"", srv)
}
conf += fmt.Sprintf(" servers = [%s]\n", strings.Join(serverStrs, ", "))
}
keys := []string{}
for k := range mysqlConf {
if k != "servers" {
keys = append(keys, k)
}
}
sort.Strings(keys)
for _, k := range keys {
v := mysqlConf[k]
switch val := v.(type) {
case string:
conf += fmt.Sprintf(" %s = \"%s\"\n", k, val)
case int, int32, int64:
conf += fmt.Sprintf(" %s = %d\n", k, val)
case float32, float64:
conf += fmt.Sprintf(" %s = %f\n", k, val)
case bool:
conf += fmt.Sprintf(" %s = %v\n", k, val)
case []string:
quoted := make([]string, len(val))
for i, s := range val {
quoted[i] = fmt.Sprintf("\"%s\"", s)
}
conf += fmt.Sprintf(" %s = [%s]\n", k, strings.Join(quoted, ", "))
}
}
conf += "\n"
}

if radontop, ok := kwargs[TELEGRAF_INPUT_RADEONTOP]; ok {
radontopMap, _ := radontop.(map[string]interface{})
devPaths := radontopMap[TELEGRAF_INPUT_RADEONTOP_DEV_PATHS].([]string)
Expand Down
4 changes: 4 additions & 0 deletions pkg/monitor/dbinit/measurements/metrics.csv
Original file line number Diff line number Diff line change
Expand Up @@ -506,6 +506,10 @@ Measurement,MeasurementNote,ResourceType,Database,Metric,MetricNote,MetricUnit
"mysql","mysql","ext_mysql","telegraf","info_schema_table_rows","info_schema_table_rows","count"
"mysql","mysql","ext_mysql","telegraf","info_schema_table_size_data_length","info_schema_table_size_data_length","count"
"mysql","mysql","ext_mysql","telegraf","info_schema_table_size_index_length","info_schema_table_size_index_length","count"
"mysql","mysql","ext_mysql","telegraf","wsrep_cluster_size","wsrep_cluster_size","count"
"mysql","mysql","ext_mysql","telegraf","wsrep_cluster_status","wsrep_cluster_status","NULL"
"mysql","mysql","ext_mysql","telegraf","wsrep_ready","wsrep_ready","NULL"
"mysql","mysql","ext_mysql","telegraf","wsrep_connected","wsrep_connected","NULL"
"netstat","netstat","host","telegraf","tcp_established","","count"
"netstat","netstat","host","telegraf","tcp_syn_sent","","count"
"netstat","netstat","host","telegraf","tcp_syn_recv","","count"
Expand Down
12 changes: 12 additions & 0 deletions pkg/monitor/dbinit/measurements/mysql.go
Original file line number Diff line number Diff line change
Expand Up @@ -54,5 +54,17 @@ var mysql = SMeasurement{
{
"info_schema_table_size_index_length", "info_schema_table_size_index_length", monitor.METRIC_UNIT_COUNT,
},
{
"wsrep_cluster_size", "wsrep_cluster_size", monitor.METRIC_UNIT_COUNT,
},
{
"wsrep_cluster_status", "wsrep_cluster_status", monitor.METRIC_UNIT_NULL,
},
{
"wsrep_ready", "wsrep_ready", monitor.METRIC_UNIT_NULL,
},
{
"wsrep_connected", "wsrep_connected", monitor.METRIC_UNIT_NULL,
},
},
}
26 changes: 26 additions & 0 deletions pkg/monitor/dbinit/measurements/mysql_test.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,26 @@
package measurements

import (
"testing"
)

func TestMysqlMetrics(t *testing.T) {
metrics := map[string]bool{
"wsrep_cluster_size": false,
"wsrep_cluster_status": false,
"wsrep_ready": false,
"wsrep_connected": false,
}

for _, m := range mysql.Metrics {
if _, ok := metrics[m.Name]; ok {
metrics[m.Name] = true
}
}

for name, found := range metrics {
if !found {
t.Errorf("metric %s not found in mysql definitions", name)
}
}
}