Skip to content

Commit

Permalink
PMM-9757 Fixed panic on lost connection (#465)
Browse files Browse the repository at this point in the history
Changed the way we instantiate the topology info object.
Now we call the newTopologyInfo and it never returns an error and the
object is never nill. If it is not possible to connect, topology labels
remain empty but we return a valid and not nill object.
Also added context termination check to avoid the ugly stack traces
produced by context timeouts.

Co-authored-by: Carlos Salguero <[email protected]>
Co-authored-by: Denys Kondratenko <[email protected]>
  • Loading branch information
3 people authored Mar 31, 2022
1 parent 8a342d4 commit 5ceed65
Show file tree
Hide file tree
Showing 13 changed files with 27 additions and 32 deletions.
9 changes: 8 additions & 1 deletion exporter/base_collector.go
Original file line number Diff line number Diff line change
Expand Up @@ -17,6 +17,7 @@
package exporter

import (
"context"
"sync"

"github.com/prometheus/client_golang/prometheus"
Expand All @@ -40,7 +41,13 @@ func newBaseCollector(client *mongo.Client, logger *logrus.Logger) *baseCollecto
}
}

func (d *baseCollector) Describe(ch chan<- *prometheus.Desc, collect func(mCh chan<- prometheus.Metric)) {
func (d *baseCollector) Describe(ctx context.Context, ch chan<- *prometheus.Desc, collect func(mCh chan<- prometheus.Metric)) {
select {
case <-ctx.Done():
return
default:
}

d.lock.Lock()
defer d.lock.Unlock()

Expand Down
2 changes: 1 addition & 1 deletion exporter/collstats_collector.go
Original file line number Diff line number Diff line change
Expand Up @@ -52,7 +52,7 @@ func newCollectionStatsCollector(ctx context.Context, client *mongo.Client, logg
}

func (d *collstatsCollector) Describe(ch chan<- *prometheus.Desc) {
d.base.Describe(ch, d.collect)
d.base.Describe(d.ctx, ch, d.collect)
}

func (d *collstatsCollector) Collect(ch chan<- prometheus.Metric) {
Expand Down
2 changes: 1 addition & 1 deletion exporter/dbstats_collector.go
Original file line number Diff line number Diff line change
Expand Up @@ -49,7 +49,7 @@ func newDBStatsCollector(ctx context.Context, client *mongo.Client, logger *logr
}

func (d *dbstatsCollector) Describe(ch chan<- *prometheus.Desc) {
d.base.Describe(ch, d.collect)
d.base.Describe(d.ctx, ch, d.collect)
}

func (d *dbstatsCollector) Collect(ch chan<- prometheus.Metric) {
Expand Down
2 changes: 1 addition & 1 deletion exporter/diagnostic_data_collector.go
Original file line number Diff line number Diff line change
Expand Up @@ -46,7 +46,7 @@ func newDiagnosticDataCollector(ctx context.Context, client *mongo.Client, logge
}

func (d *diagnosticDataCollector) Describe(ch chan<- *prometheus.Desc) {
d.base.Describe(ch, d.collect)
d.base.Describe(d.ctx, ch, d.collect)
}

func (d *diagnosticDataCollector) Collect(ch chan<- prometheus.Metric) {
Expand Down
10 changes: 4 additions & 6 deletions exporter/diagnostic_data_collector_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -80,13 +80,12 @@ func TestAllDiagnosticDataCollectorMetrics(t *testing.T) {

client := tu.DefaultTestClient(ctx, t)

ti, err := newTopologyInfo(ctx, client)
require.NoError(t, err)
ti := newTopologyInfo(ctx, client)

c := newDiagnosticDataCollector(ctx, client, logrus.New(), true, ti)

reg := prometheus.NewRegistry()
err = reg.Register(c)
err := reg.Register(c)
require.NoError(t, err)
metrics := helpers.CollectMetrics(c)
actualMetrics := helpers.ReadMetrics(metrics)
Expand Down Expand Up @@ -121,12 +120,11 @@ func TestContextTimeout(t *testing.T) {

client := tu.DefaultTestClient(ctx, t)

ti, err := newTopologyInfo(ctx, client)
require.NoError(t, err)
ti := newTopologyInfo(ctx, client)

dbCount := 100

err = addTestData(ctx, client, dbCount)
err := addTestData(ctx, client, dbCount)
assert.NoError(t, err)

defer cleanTestData(ctx, client, dbCount) //nolint:errcheck
Expand Down
13 changes: 1 addition & 12 deletions exporter/exporter.go
Original file line number Diff line number Diff line change
Expand Up @@ -296,18 +296,7 @@ func (e *Exporter) Handler() http.Handler {
}

// Topology can change between requests, so we need to get it every time.
var ti *topologyInfo
if client != nil {
ti, err = newTopologyInfo(ctx, client)
if err != nil {
e.logger.Errorf("Cannot get topology info: %v", err)
http.Error(
w,
"An error has occurred while getting topology info:\n\n"+err.Error(),
http.StatusInternalServerError,
)
}
}
ti := newTopologyInfo(ctx, client)

registry := e.makeRegistry(ctx, client, ti, requestOpts)

Expand Down
2 changes: 1 addition & 1 deletion exporter/general_collector.go
Original file line number Diff line number Diff line change
Expand Up @@ -41,7 +41,7 @@ func newGeneralCollector(ctx context.Context, client *mongo.Client, logger *logr
}

func (d *generalCollector) Describe(ch chan<- *prometheus.Desc) {
d.base.Describe(ch, d.collect)
d.base.Describe(d.ctx, ch, d.collect)
}

func (d *generalCollector) Collect(ch chan<- prometheus.Metric) {
Expand Down
2 changes: 1 addition & 1 deletion exporter/indexstats_collector.go
Original file line number Diff line number Diff line change
Expand Up @@ -53,7 +53,7 @@ func newIndexStatsCollector(ctx context.Context, client *mongo.Client, logger *l
}

func (d *indexstatsCollector) Describe(ch chan<- *prometheus.Desc) {
d.base.Describe(ch, d.collect)
d.base.Describe(d.ctx, ch, d.collect)
}

func (d *indexstatsCollector) Collect(ch chan<- prometheus.Metric) {
Expand Down
2 changes: 1 addition & 1 deletion exporter/replset_status_collector.go
Original file line number Diff line number Diff line change
Expand Up @@ -50,7 +50,7 @@ func newReplicationSetStatusCollector(ctx context.Context, client *mongo.Client,
}

func (d *replSetGetStatusCollector) Describe(ch chan<- *prometheus.Desc) {
d.base.Describe(ch, d.collect)
d.base.Describe(d.ctx, ch, d.collect)
}

func (d *replSetGetStatusCollector) Collect(ch chan<- prometheus.Metric) {
Expand Down
2 changes: 1 addition & 1 deletion exporter/serverstatus_collector.go
Original file line number Diff line number Diff line change
Expand Up @@ -44,7 +44,7 @@ func newServerStatusCollector(ctx context.Context, client *mongo.Client, logger
}

func (d *serverStatusCollector) Describe(ch chan<- *prometheus.Desc) {
d.base.Describe(ch, d.collect)
d.base.Describe(d.ctx, ch, d.collect)
}

func (d *serverStatusCollector) Collect(ch chan<- prometheus.Metric) {
Expand Down
2 changes: 1 addition & 1 deletion exporter/top_collector.go
Original file line number Diff line number Diff line change
Expand Up @@ -49,7 +49,7 @@ func newTopCollector(ctx context.Context, client *mongo.Client, logger *logrus.L
}

func (d *topCollector) Describe(ch chan<- *prometheus.Desc) {
d.base.Describe(ch, d.collect)
d.base.Describe(d.ctx, ch, d.collect)
}

func (d *topCollector) Collect(ch chan<- prometheus.Metric) {
Expand Down
8 changes: 5 additions & 3 deletions exporter/topology_info.go
Original file line number Diff line number Diff line change
Expand Up @@ -24,6 +24,7 @@ import (
"github.com/percona/percona-toolkit/src/go/mongolib/proto"
"github.com/percona/percona-toolkit/src/go/mongolib/util"
"github.com/pkg/errors"
"github.com/sirupsen/logrus"
"go.mongodb.org/mongo-driver/bson/primitive"
"go.mongodb.org/mongo-driver/mongo"
)
Expand Down Expand Up @@ -62,18 +63,19 @@ type topologyInfo struct {
// ErrCannotGetTopologyLabels Cannot read topology labels.
var ErrCannotGetTopologyLabels = fmt.Errorf("cannot get topology labels")

func newTopologyInfo(ctx context.Context, client *mongo.Client) (*topologyInfo, error) {
func newTopologyInfo(ctx context.Context, client *mongo.Client) *topologyInfo {
ti := &topologyInfo{
client: client,
labels: make(map[string]string),
rw: sync.RWMutex{},
}

err := ti.loadLabels(ctx)
if err != nil {
return nil, err
logrus.Warnf("cannot load topology labels: %s", err)
}

return ti, nil
return ti
}

// baseLabels returns a copy of the topology labels because in some collectors like
Expand Down
3 changes: 1 addition & 2 deletions exporter/topology_info_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -88,8 +88,7 @@ func TestTopologyLabels(t *testing.T) {
require.NoError(t, err)

client := tu.TestClient(ctx, port, t)
ti, err := newTopologyInfo(ctx, client)
require.NoError(t, err)
ti := newTopologyInfo(ctx, client)
bl := ti.baseLabels()
assert.Equal(t, tc.want[labelReplicasetName], bl[labelReplicasetName], tc.containerName)
assert.Equal(t, tc.want[labelReplicasetState], bl[labelReplicasetState], tc.containerName)
Expand Down

0 comments on commit 5ceed65

Please sign in to comment.