Merge pull request #648 from vmarmol/events
Split OOM event into OOM and OOM Kill
This commit is contained in:
commit
25f3124155
@ -497,9 +497,10 @@ type Event struct {
|
|||||||
type EventType int
|
type EventType int
|
||||||
|
|
||||||
const (
|
const (
|
||||||
EventOom EventType = iota
|
EventOom EventType = 0
|
||||||
EventContainerCreation
|
EventOomKill = 1
|
||||||
EventContainerDeletion
|
EventContainerCreation = 2
|
||||||
|
EventContainerDeletion = 3
|
||||||
)
|
)
|
||||||
|
|
||||||
// Extra information about an event. Only one type will be set.
|
// Extra information about an event. Only one type will be set.
|
||||||
@ -507,8 +508,8 @@ type EventData struct {
|
|||||||
// Information about a container creation event.
|
// Information about a container creation event.
|
||||||
Created *CreatedEventData `json:"created,omitempty"`
|
Created *CreatedEventData `json:"created,omitempty"`
|
||||||
|
|
||||||
// Information about an OOM event.
|
// Information about an OOM kill event.
|
||||||
Oom *OomEventData `json:"oom,omitempty"`
|
OomKill *OomKillEventData `json:"oom,omitempty"`
|
||||||
}
|
}
|
||||||
|
|
||||||
// Information related to a container creation event.
|
// Information related to a container creation event.
|
||||||
@ -518,7 +519,7 @@ type CreatedEventData struct {
|
|||||||
}
|
}
|
||||||
|
|
||||||
// Information related to an OOM kill instance
|
// Information related to an OOM kill instance
|
||||||
type OomEventData struct {
|
type OomKillEventData struct {
|
||||||
// process id of the killed process
|
// process id of the killed process
|
||||||
Pid int `json:"pid"`
|
Pid int `json:"pid"`
|
||||||
|
|
||||||
|
@ -876,21 +876,32 @@ func (self *manager) watchForNewOoms() error {
|
|||||||
|
|
||||||
go func() {
|
go func() {
|
||||||
for oomInstance := range outStream {
|
for oomInstance := range outStream {
|
||||||
|
// Surface OOM and OOM kill events.
|
||||||
newEvent := &info.Event{
|
newEvent := &info.Event{
|
||||||
ContainerName: oomInstance.ContainerName,
|
ContainerName: oomInstance.ContainerName,
|
||||||
Timestamp: oomInstance.TimeOfDeath,
|
Timestamp: oomInstance.TimeOfDeath,
|
||||||
EventType: info.EventOom,
|
EventType: info.EventOom,
|
||||||
|
}
|
||||||
|
err := self.eventHandler.AddEvent(newEvent)
|
||||||
|
if err != nil {
|
||||||
|
glog.Errorf("failed to add OOM event for %q: %v", oomInstance.ContainerName, err)
|
||||||
|
}
|
||||||
|
glog.V(3).Infof("Created an OOM event in container %q at %v", oomInstance.ContainerName, oomInstance.TimeOfDeath)
|
||||||
|
|
||||||
|
newEvent = &info.Event{
|
||||||
|
ContainerName: oomInstance.VictimContainerName,
|
||||||
|
Timestamp: oomInstance.TimeOfDeath,
|
||||||
|
EventType: info.EventOomKill,
|
||||||
EventData: info.EventData{
|
EventData: info.EventData{
|
||||||
Oom: &info.OomEventData{
|
OomKill: &info.OomKillEventData{
|
||||||
Pid: oomInstance.Pid,
|
Pid: oomInstance.Pid,
|
||||||
ProcessName: oomInstance.ProcessName,
|
ProcessName: oomInstance.ProcessName,
|
||||||
},
|
},
|
||||||
},
|
},
|
||||||
}
|
}
|
||||||
glog.V(2).Infof("Created an oom event in container %q at %v", oomInstance.ContainerName, oomInstance.TimeOfDeath)
|
err = self.eventHandler.AddEvent(newEvent)
|
||||||
err := self.eventHandler.AddEvent(newEvent)
|
|
||||||
if err != nil {
|
if err != nil {
|
||||||
glog.Errorf("failed to add event %v, got error: %v", newEvent, err)
|
glog.Errorf("failed to add OOM kill event for %q: %v", oomInstance.ContainerName, err)
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}()
|
}()
|
||||||
|
@ -30,7 +30,7 @@ import (
|
|||||||
)
|
)
|
||||||
|
|
||||||
var containerRegexp *regexp.Regexp = regexp.MustCompile(
|
var containerRegexp *regexp.Regexp = regexp.MustCompile(
|
||||||
`Task in (.*) killed as a result of limit of `)
|
`Task in (.*) killed as a result of limit of (.*)`)
|
||||||
var lastLineRegexp *regexp.Regexp = regexp.MustCompile(
|
var lastLineRegexp *regexp.Regexp = regexp.MustCompile(
|
||||||
`(^[A-Z]{1}[a-z]{2} .*[0-9]{1,2} [0-9]{1,2}:[0-9]{2}:[0-9]{2}) .* Killed process ([0-9]+) \(([0-9A-Za-z_]+)\)`)
|
`(^[A-Z]{1}[a-z]{2} .*[0-9]{1,2} [0-9]{1,2}:[0-9]{2}:[0-9]{2}) .* Killed process ([0-9]+) \(([0-9A-Za-z_]+)\)`)
|
||||||
var firstLineRegexp *regexp.Regexp = regexp.MustCompile(
|
var firstLineRegexp *regexp.Regexp = regexp.MustCompile(
|
||||||
@ -52,6 +52,9 @@ type OomInstance struct {
|
|||||||
TimeOfDeath time.Time
|
TimeOfDeath time.Time
|
||||||
// the absolute name of the container that OOMed
|
// the absolute name of the container that OOMed
|
||||||
ContainerName string
|
ContainerName string
|
||||||
|
// the absolute name of the container that was killed
|
||||||
|
// due to the OOM.
|
||||||
|
VictimContainerName string
|
||||||
}
|
}
|
||||||
|
|
||||||
// gets the container name from a line and adds it to the oomInstance.
|
// gets the container name from a line and adds it to the oomInstance.
|
||||||
@ -61,6 +64,7 @@ func getContainerName(line string, currentOomInstance *OomInstance) error {
|
|||||||
return nil
|
return nil
|
||||||
}
|
}
|
||||||
currentOomInstance.ContainerName = path.Join("/", parsedLine[1])
|
currentOomInstance.ContainerName = path.Join("/", parsedLine[1])
|
||||||
|
currentOomInstance.VictimContainerName = path.Join("/", parsedLine[2])
|
||||||
return nil
|
return nil
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -17,13 +17,14 @@ package oomparser
|
|||||||
import (
|
import (
|
||||||
"bufio"
|
"bufio"
|
||||||
"os"
|
"os"
|
||||||
|
"reflect"
|
||||||
"testing"
|
"testing"
|
||||||
"time"
|
"time"
|
||||||
)
|
)
|
||||||
|
|
||||||
const startLine = "Jan 21 22:01:49 localhost kernel: [62278.816267] ruby invoked oom-killer: gfp_mask=0x201da, order=0, oom_score_adj=0"
|
const startLine = "Jan 21 22:01:49 localhost kernel: [62278.816267] ruby invoked oom-killer: gfp_mask=0x201da, order=0, oom_score_adj=0"
|
||||||
const endLine = "Jan 21 22:01:49 localhost kernel: [62279.421192] Killed process 19667 (evilprogram2) total-vm:1460016kB, anon-rss:1414008kB, file-rss:4kB"
|
const endLine = "Jan 21 22:01:49 localhost kernel: [62279.421192] Killed process 19667 (evilprogram2) total-vm:1460016kB, anon-rss:1414008kB, file-rss:4kB"
|
||||||
const containerLine = "Jan 26 14:10:07 kateknister0.mtv.corp.google.com kernel: [1814368.465205] Task in /mem2 killed as a result of limit of /mem2"
|
const containerLine = "Jan 26 14:10:07 kateknister0.mtv.corp.google.com kernel: [1814368.465205] Task in /mem2 killed as a result of limit of /mem3"
|
||||||
const containerLogFile = "containerOomExampleLog.txt"
|
const containerLogFile = "containerOomExampleLog.txt"
|
||||||
const systemLogFile = "systemOomExampleLog.txt"
|
const systemLogFile = "systemOomExampleLog.txt"
|
||||||
|
|
||||||
@ -39,6 +40,7 @@ func createExpectedContainerOomInstance(t *testing.T) *OomInstance {
|
|||||||
ProcessName: "memorymonster",
|
ProcessName: "memorymonster",
|
||||||
TimeOfDeath: deathTime,
|
TimeOfDeath: deathTime,
|
||||||
ContainerName: "/mem2",
|
ContainerName: "/mem2",
|
||||||
|
VictimContainerName: "/mem3",
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -54,6 +56,7 @@ func createExpectedSystemOomInstance(t *testing.T) *OomInstance {
|
|||||||
ProcessName: "badsysprogram",
|
ProcessName: "badsysprogram",
|
||||||
TimeOfDeath: deathTime,
|
TimeOfDeath: deathTime,
|
||||||
ContainerName: "/",
|
ContainerName: "/",
|
||||||
|
VictimContainerName: "/",
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -73,6 +76,9 @@ func TestGetContainerName(t *testing.T) {
|
|||||||
if currentOomInstance.ContainerName != "/mem2" {
|
if currentOomInstance.ContainerName != "/mem2" {
|
||||||
t.Errorf("getContainerName should have set containerName to /mem2, not %s", currentOomInstance.ContainerName)
|
t.Errorf("getContainerName should have set containerName to /mem2, not %s", currentOomInstance.ContainerName)
|
||||||
}
|
}
|
||||||
|
if currentOomInstance.VictimContainerName != "/mem3" {
|
||||||
|
t.Errorf("getContainerName should have set victimContainerName to /mem3, not %s", currentOomInstance.VictimContainerName)
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
func TestGetProcessNamePid(t *testing.T) {
|
func TestGetProcessNamePid(t *testing.T) {
|
||||||
@ -139,7 +145,7 @@ func helpTestStreamOoms(oomCheckInstance *OomInstance, sysFile string, t *testin
|
|||||||
|
|
||||||
select {
|
select {
|
||||||
case oomInstance := <-outStream:
|
case oomInstance := <-outStream:
|
||||||
if *oomCheckInstance != *oomInstance {
|
if reflect.DeepEqual(*oomCheckInstance, *oomInstance) {
|
||||||
t.Errorf("wrong instance returned. Expected %v and got %v",
|
t.Errorf("wrong instance returned. Expected %v and got %v",
|
||||||
oomCheckInstance, oomInstance)
|
oomCheckInstance, oomInstance)
|
||||||
}
|
}
|
||||||
|
Loading…
Reference in New Issue
Block a user