Merge pull request #648 from vmarmol/events

Split OOM event into OOM and OOM Kill
This commit is contained in:
Rohit Jnagal 2015-04-15 13:49:45 -07:00
commit 25f3124155
4 changed files with 43 additions and 21 deletions

View File

@ -497,9 +497,10 @@ type Event struct {
type EventType int type EventType int
const ( const (
EventOom EventType = iota EventOom EventType = 0
EventContainerCreation EventOomKill = 1
EventContainerDeletion EventContainerCreation = 2
EventContainerDeletion = 3
) )
// Extra information about an event. Only one type will be set. // Extra information about an event. Only one type will be set.
@ -507,8 +508,8 @@ type EventData struct {
// Information about a container creation event. // Information about a container creation event.
Created *CreatedEventData `json:"created,omitempty"` Created *CreatedEventData `json:"created,omitempty"`
// Information about an OOM event. // Information about an OOM kill event.
Oom *OomEventData `json:"oom,omitempty"` OomKill *OomKillEventData `json:"oom,omitempty"`
} }
// Information related to a container creation event. // Information related to a container creation event.
@ -518,7 +519,7 @@ type CreatedEventData struct {
} }
// Information related to an OOM kill instance // Information related to an OOM kill instance
type OomEventData struct { type OomKillEventData struct {
// process id of the killed process // process id of the killed process
Pid int `json:"pid"` Pid int `json:"pid"`

View File

@ -876,21 +876,32 @@ func (self *manager) watchForNewOoms() error {
go func() { go func() {
for oomInstance := range outStream { for oomInstance := range outStream {
// Surface OOM and OOM kill events.
newEvent := &info.Event{ newEvent := &info.Event{
ContainerName: oomInstance.ContainerName, ContainerName: oomInstance.ContainerName,
Timestamp: oomInstance.TimeOfDeath, Timestamp: oomInstance.TimeOfDeath,
EventType: info.EventOom, EventType: info.EventOom,
}
err := self.eventHandler.AddEvent(newEvent)
if err != nil {
glog.Errorf("failed to add OOM event for %q: %v", oomInstance.ContainerName, err)
}
glog.V(3).Infof("Created an OOM event in container %q at %v", oomInstance.ContainerName, oomInstance.TimeOfDeath)
newEvent = &info.Event{
ContainerName: oomInstance.VictimContainerName,
Timestamp: oomInstance.TimeOfDeath,
EventType: info.EventOomKill,
EventData: info.EventData{ EventData: info.EventData{
Oom: &info.OomEventData{ OomKill: &info.OomKillEventData{
Pid: oomInstance.Pid, Pid: oomInstance.Pid,
ProcessName: oomInstance.ProcessName, ProcessName: oomInstance.ProcessName,
}, },
}, },
} }
glog.V(2).Infof("Created an oom event in container %q at %v", oomInstance.ContainerName, oomInstance.TimeOfDeath) err = self.eventHandler.AddEvent(newEvent)
err := self.eventHandler.AddEvent(newEvent)
if err != nil { if err != nil {
glog.Errorf("failed to add event %v, got error: %v", newEvent, err) glog.Errorf("failed to add OOM kill event for %q: %v", oomInstance.ContainerName, err)
} }
} }
}() }()

View File

@ -30,7 +30,7 @@ import (
) )
var containerRegexp *regexp.Regexp = regexp.MustCompile( var containerRegexp *regexp.Regexp = regexp.MustCompile(
`Task in (.*) killed as a result of limit of `) `Task in (.*) killed as a result of limit of (.*)`)
var lastLineRegexp *regexp.Regexp = regexp.MustCompile( var lastLineRegexp *regexp.Regexp = regexp.MustCompile(
`(^[A-Z]{1}[a-z]{2} .*[0-9]{1,2} [0-9]{1,2}:[0-9]{2}:[0-9]{2}) .* Killed process ([0-9]+) \(([0-9A-Za-z_]+)\)`) `(^[A-Z]{1}[a-z]{2} .*[0-9]{1,2} [0-9]{1,2}:[0-9]{2}:[0-9]{2}) .* Killed process ([0-9]+) \(([0-9A-Za-z_]+)\)`)
var firstLineRegexp *regexp.Regexp = regexp.MustCompile( var firstLineRegexp *regexp.Regexp = regexp.MustCompile(
@ -52,6 +52,9 @@ type OomInstance struct {
TimeOfDeath time.Time TimeOfDeath time.Time
// the absolute name of the container that OOMed // the absolute name of the container that OOMed
ContainerName string ContainerName string
// the absolute name of the container that was killed
// due to the OOM.
VictimContainerName string
} }
// gets the container name from a line and adds it to the oomInstance. // gets the container name from a line and adds it to the oomInstance.
@ -61,6 +64,7 @@ func getContainerName(line string, currentOomInstance *OomInstance) error {
return nil return nil
} }
currentOomInstance.ContainerName = path.Join("/", parsedLine[1]) currentOomInstance.ContainerName = path.Join("/", parsedLine[1])
currentOomInstance.VictimContainerName = path.Join("/", parsedLine[2])
return nil return nil
} }

View File

@ -17,13 +17,14 @@ package oomparser
import ( import (
"bufio" "bufio"
"os" "os"
"reflect"
"testing" "testing"
"time" "time"
) )
const startLine = "Jan 21 22:01:49 localhost kernel: [62278.816267] ruby invoked oom-killer: gfp_mask=0x201da, order=0, oom_score_adj=0" const startLine = "Jan 21 22:01:49 localhost kernel: [62278.816267] ruby invoked oom-killer: gfp_mask=0x201da, order=0, oom_score_adj=0"
const endLine = "Jan 21 22:01:49 localhost kernel: [62279.421192] Killed process 19667 (evilprogram2) total-vm:1460016kB, anon-rss:1414008kB, file-rss:4kB" const endLine = "Jan 21 22:01:49 localhost kernel: [62279.421192] Killed process 19667 (evilprogram2) total-vm:1460016kB, anon-rss:1414008kB, file-rss:4kB"
const containerLine = "Jan 26 14:10:07 kateknister0.mtv.corp.google.com kernel: [1814368.465205] Task in /mem2 killed as a result of limit of /mem2" const containerLine = "Jan 26 14:10:07 kateknister0.mtv.corp.google.com kernel: [1814368.465205] Task in /mem2 killed as a result of limit of /mem3"
const containerLogFile = "containerOomExampleLog.txt" const containerLogFile = "containerOomExampleLog.txt"
const systemLogFile = "systemOomExampleLog.txt" const systemLogFile = "systemOomExampleLog.txt"
@ -35,10 +36,11 @@ func createExpectedContainerOomInstance(t *testing.T) *OomInstance {
return nil return nil
} }
return &OomInstance{ return &OomInstance{
Pid: 13536, Pid: 13536,
ProcessName: "memorymonster", ProcessName: "memorymonster",
TimeOfDeath: deathTime, TimeOfDeath: deathTime,
ContainerName: "/mem2", ContainerName: "/mem2",
VictimContainerName: "/mem3",
} }
} }
@ -50,10 +52,11 @@ func createExpectedSystemOomInstance(t *testing.T) *OomInstance {
return nil return nil
} }
return &OomInstance{ return &OomInstance{
Pid: 1532, Pid: 1532,
ProcessName: "badsysprogram", ProcessName: "badsysprogram",
TimeOfDeath: deathTime, TimeOfDeath: deathTime,
ContainerName: "/", ContainerName: "/",
VictimContainerName: "/",
} }
} }
@ -73,6 +76,9 @@ func TestGetContainerName(t *testing.T) {
if currentOomInstance.ContainerName != "/mem2" { if currentOomInstance.ContainerName != "/mem2" {
t.Errorf("getContainerName should have set containerName to /mem2, not %s", currentOomInstance.ContainerName) t.Errorf("getContainerName should have set containerName to /mem2, not %s", currentOomInstance.ContainerName)
} }
if currentOomInstance.VictimContainerName != "/mem3" {
t.Errorf("getContainerName should have set victimContainerName to /mem3, not %s", currentOomInstance.VictimContainerName)
}
} }
func TestGetProcessNamePid(t *testing.T) { func TestGetProcessNamePid(t *testing.T) {
@ -139,7 +145,7 @@ func helpTestStreamOoms(oomCheckInstance *OomInstance, sysFile string, t *testin
select { select {
case oomInstance := <-outStream: case oomInstance := <-outStream:
if *oomCheckInstance != *oomInstance { if reflect.DeepEqual(*oomCheckInstance, *oomInstance) {
t.Errorf("wrong instance returned. Expected %v and got %v", t.Errorf("wrong instance returned. Expected %v and got %v",
oomCheckInstance, oomInstance) oomCheckInstance, oomInstance)
} }