Merge pull request #1391 from atombender/fix_prometheus_scrape_error_gauge

Fix for Prometheus metric "container_scrape_errors" getting stuck
This commit is contained in:
Tim St. Clair 2016-07-21 18:21:24 -07:00 committed by GitHub
commit fe765981c2
3 changed files with 57 additions and 1 deletions

View File

@ -493,6 +493,7 @@ func (c *PrometheusCollector) Describe(ch chan<- *prometheus.Desc) {
// Collect fetches the stats from all containers and delivers them as
// Prometheus metrics. It implements prometheus.PrometheusCollector.
func (c *PrometheusCollector) Collect(ch chan<- prometheus.Metric) {
c.errors.Set(0)
c.collectMachineInfo(ch)
c.collectVersionInfo(ch)
c.collectContainersInfo(ch)

View File

@ -15,6 +15,7 @@
package metrics
import (
"errors"
"io/ioutil"
"net/http"
"net/http/httptest"
@ -181,10 +182,13 @@ func TestPrometheusCollector(t *testing.T) {
prometheus.MustRegister(c)
defer prometheus.Unregister(c)
testPrometheusCollector(t, c, "testdata/prometheus_metrics")
}
func testPrometheusCollector(t *testing.T, c *PrometheusCollector, metricsFile string) {
rw := httptest.NewRecorder()
prometheus.Handler().ServeHTTP(rw, &http.Request{})
metricsFile := "testdata/prometheus_metrics"
wantMetrics, err := ioutil.ReadFile(metricsFile)
if err != nil {
t.Fatalf("unable to read input test file %s", metricsFile)
@ -206,3 +210,51 @@ func TestPrometheusCollector(t *testing.T) {
}
}
}
type erroringSubcontainersInfoProvider struct {
successfulProvider testSubcontainersInfoProvider
shouldFail bool
}
func (p *erroringSubcontainersInfoProvider) GetVersionInfo() (*info.VersionInfo, error) {
if p.shouldFail {
return nil, errors.New("Oops 1")
}
return p.successfulProvider.GetVersionInfo()
}
func (p *erroringSubcontainersInfoProvider) GetMachineInfo() (*info.MachineInfo, error) {
if p.shouldFail {
return nil, errors.New("Oops 2")
}
return p.successfulProvider.GetMachineInfo()
}
func (p *erroringSubcontainersInfoProvider) SubcontainersInfo(
a string, r *info.ContainerInfoRequest) ([]*info.ContainerInfo, error) {
if p.shouldFail {
return []*info.ContainerInfo{}, errors.New("Oops 3")
}
return p.successfulProvider.SubcontainersInfo(a, r)
}
func TestPrometheusCollector_scrapeFailure(t *testing.T) {
provider := &erroringSubcontainersInfoProvider{
successfulProvider: testSubcontainersInfoProvider{},
shouldFail: true,
}
c := NewPrometheusCollector(provider, func(name string) map[string]string {
return map[string]string{
"zone.name": "hello",
}
})
prometheus.MustRegister(c)
defer prometheus.Unregister(c)
testPrometheusCollector(t, c, "testdata/prometheus_metrics_failure")
provider.shouldFail = false
testPrometheusCollector(t, c, "testdata/prometheus_metrics")
}

View File

@ -0,0 +1,3 @@
# HELP container_scrape_error 1 if there was an error while getting container metrics, 0 otherwise
# TYPE container_scrape_error gauge
container_scrape_error 1