fn: introducing docker-syslog driver as default logger (#1189)

* fn: introducing docker-syslog driver as default logger

With this change, fn-agent prefers RFC2454 docker-syslog driver
for logging stdout/stderr from containers. The advantage
of this is to offload it to docker itself instead of
streaming stderr along with stdout, which gets multiplexed
through single connection via docker-API.

The change will need support from FDKs in order to log
correct call-id and supress '\n' that splits syslog lines.
This commit is contained in:
Tolga Ceylan
2018-08-29 13:08:02 -07:00
committed by GitHub
parent 59f401d525
commit ad011fde7f
10 changed files with 134 additions and 295 deletions

View File

@@ -5,7 +5,6 @@ import (
"context"
"errors"
"io"
"log/syslog"
"strings"
"sync"
"sync/atomic"
@@ -696,7 +695,7 @@ type hotSlot struct {
done chan struct{} // signal we are done with slot
errC <-chan error // container error
container *container // TODO mask this
maxRespSize uint64 // TODO boo.
cfg *Config
fatalErr error
containerSpan trace.SpanContext
}
@@ -741,23 +740,10 @@ func (s *hotSlot) exec(ctx context.Context, call *call) error {
// NOTE: stderr is limited separately (though line writer is vulnerable to attack?)
// limit the bytes allowed to be written to the stdout pipe, which handles any
// buffering overflows (json to a string, http to a buffer, etc)
stdoutWrite := common.NewClampWriter(stdoutWritePipe, s.maxRespSize, models.ErrFunctionResponseTooBig)
// get our own syslogger with THIS call id (cheap), using the container's already open syslog conns (expensive)
// TODO? we can basically just do this whether there are conns or not, this is relatively cheap (despite appearances)
buf1 := bufPool.Get().(*bytes.Buffer)
buf2 := bufPool.Get().(*bytes.Buffer)
defer bufPool.Put(buf1)
defer bufPool.Put(buf2)
sw := newSyslogWriter(call.ID, call.Path, call.AppName, syslog.LOG_ERR, s.container.syslogConns, buf1)
var syslog io.WriteCloser = &nopCloser{sw}
syslog = newLineWriterWithBuffer(buf2, syslog)
defer syslog.Close() // close syslogger from here, but NOT the call log stderr OR conns
stderr := multiWriteCloser{call.stderr, syslog} // use multiWriteCloser for its error ignoring properties
stdoutWrite := common.NewClampWriter(stdoutWritePipe, s.cfg.MaxResponseSize, models.ErrFunctionResponseTooBig)
proto := protocol.New(protocol.Protocol(call.Format), stdinWrite, stdoutRead)
swapBack := s.container.swap(stdinRead, stdoutWrite, stderr, &call.Stats)
swapBack := s.container.swap(stdinRead, stdoutWrite, call.stderr, &call.Stats)
defer swapBack() // NOTE: it's important this runs before the pipes are closed.
errApp := make(chan error, 1)
@@ -815,6 +801,13 @@ func (a *agent) prepCold(ctx context.Context, call *call, tok ResourceToken, ch
cpus: uint64(call.CPUs),
fsSize: a.cfg.MaxFsSize,
timeout: time.Duration(call.Timeout) * time.Second, // this is unnecessary, but in case removal fails...
logCfg: drivers.LoggerConfig{
URL: strings.TrimSpace(call.SyslogURL),
Tags: []drivers.LoggerTag{
{Name: "app_name", Value: call.AppName},
{Name: "func_name", Value: call.Path},
},
},
stdin: call.req.Body,
stdout: common.NewClampWriter(call.w, a.cfg.MaxResponseSize, models.ErrFunctionResponseTooBig),
stderr: call.stderr,
@@ -899,7 +892,7 @@ func (a *agent) runHot(ctx context.Context, call *call, tok ResourceToken, state
done: make(chan struct{}),
errC: errC,
container: container,
maxRespSize: a.cfg.MaxResponseSize,
cfg: &a.cfg,
containerSpan: trace.FromContext(ctx).SpanContext(),
}
if !a.runHotReq(ctx, call, state, logger, cookie, slot) {
@@ -1028,11 +1021,11 @@ type container struct {
fsSize uint64
tmpFsSize uint64
timeout time.Duration // cold only (superfluous, but in case)
logCfg drivers.LoggerConfig
stdin io.Reader
stdout io.Writer
stderr io.Writer
syslogConns io.WriteCloser
// swapMu protects the stats swapping
swapMu sync.Mutex
@@ -1051,13 +1044,6 @@ func newHotContainer(ctx context.Context, call *call, cfg *Config) (*container,
stderr := common.NewGhostWriter()
stdout := common.NewGhostWriter()
// these are only the conns, this doesn't write the syslog format (since it will change between calls)
syslogConns, err := syslogConns(ctx, call.SyslogURL)
if err != nil {
// TODO we could write this to between stderr but between stderr doesn't go to user either. kill me.
common.Logger(ctx).WithError(err).WithFields(logrus.Fields{"app_id": call.AppID, "path": call.Path, "image": call.Image, "container_id": id}).Error("error dialing syslog urls")
}
// for use if no freezer (or we ever make up our minds)
var bufs []*bytes.Buffer
@@ -1070,7 +1056,7 @@ func newHotContainer(ctx context.Context, call *call, cfg *Config) (*container,
// wrap the syslog and debug loggers in the same (respective) line writer
// syslog complete chain for this (from top):
// stderr -> line writer -> syslog -> []conns
// stderr -> line writer
// TODO(reed): I guess this is worth it
// TODO(reed): there's a bug here where the between writers could have
@@ -1079,24 +1065,17 @@ func newHotContainer(ctx context.Context, call *call, cfg *Config) (*container,
// and mostly came to the conclusion that life is meaningless.
buf1 := bufPool.Get().(*bytes.Buffer)
buf2 := bufPool.Get().(*bytes.Buffer)
buf3 := bufPool.Get().(*bytes.Buffer)
buf4 := bufPool.Get().(*bytes.Buffer)
bufs = []*bytes.Buffer{buf1, buf2, buf3, buf4}
bufs = []*bytes.Buffer{buf1, buf2}
// stdout = LOG_INFO, stderr = LOG_ERR -- ONLY for the between writers, normal stdout is a response
so := newSyslogWriter(call.ID, call.Path, call.AppName, syslog.LOG_INFO, syslogConns, buf1)
se := newSyslogWriter(call.ID, call.Path, call.AppName, syslog.LOG_ERR, syslogConns, buf2)
// use multiWriteCloser since it ignores errors (io.MultiWriter does not)
soc := multiWriteCloser{&nopCloser{so}, &nopCloser{&logWriter{
soc := &nopCloser{&logWriter{
logrus.WithFields(logrus.Fields{"tag": "stdout", "app_id": call.AppID, "path": call.Path, "image": call.Image, "container_id": id}),
}}}
sec := multiWriteCloser{&nopCloser{se}, &nopCloser{&logWriter{
}}
sec := &nopCloser{&logWriter{
logrus.WithFields(logrus.Fields{"tag": "stderr", "app_id": call.AppID, "path": call.Path, "image": call.Image, "container_id": id}),
}}}
}}
stdout.Swap(newLineWriterWithBuffer(buf4, soc))
stderr.Swap(newLineWriterWithBuffer(buf3, sec))
stdout.Swap(newLineWriterWithBuffer(buf1, soc))
stderr.Swap(newLineWriterWithBuffer(buf2, sec))
}
return &container{
@@ -1108,10 +1087,16 @@ func newHotContainer(ctx context.Context, call *call, cfg *Config) (*container,
cpus: uint64(call.CPUs),
fsSize: cfg.MaxFsSize,
tmpFsSize: uint64(call.TmpFsSize),
logCfg: drivers.LoggerConfig{
URL: strings.TrimSpace(call.SyslogURL),
Tags: []drivers.LoggerTag{
{Name: "app_name", Value: call.AppName},
{Name: "func_name", Value: call.Path},
},
},
stdin: stdin,
stdout: stdout,
stderr: stderr,
syslogConns: syslogConns,
}, func() {
stdin.Close()
stderr.Close()
@@ -1119,7 +1104,6 @@ func newHotContainer(ctx context.Context, call *call, cfg *Config) (*container,
for _, b := range bufs {
bufPool.Put(b)
}
syslogConns.Close()
}
}
@@ -1159,6 +1143,7 @@ func (c *container) CPUs() uint64 { return c.cpus }
func (c *container) FsSize() uint64 { return c.fsSize }
func (c *container) TmpFsSize() uint64 { return c.tmpFsSize }
func (c *container) Extensions() map[string]string { return c.extensions }
func (c *container) LoggerConfig() drivers.LoggerConfig { return c.logCfg }
// WriteStat publishes each metric in the specified Stats structure as a histogram metric
func (c *container) WriteStat(ctx context.Context, stat drivers.Stat) {

View File

@@ -25,6 +25,34 @@ type cookie struct {
drv *DockerDriver
}
func (c *cookie) configureLogger(log logrus.FieldLogger) {
conf := c.task.LoggerConfig()
if conf.URL == "" {
c.opts.HostConfig.LogConfig = docker.LogConfig{
Type: "none",
}
return
}
c.opts.HostConfig.LogConfig = docker.LogConfig{
Type: "syslog",
Config: map[string]string{
"syslog-address": conf.URL,
"syslog-facility": "user",
"syslog-format": "rfc5424",
},
}
tags := make([]string, 0, len(conf.Tags))
for _, pair := range conf.Tags {
tags = append(tags, fmt.Sprintf("%s=%s", pair.Name, pair.Value))
}
if len(tags) > 0 {
c.opts.HostConfig.LogConfig.Config["tag"] = strings.Join(tags, ",")
}
}
func (c *cookie) configureMem(log logrus.FieldLogger) {
if c.task.Memory() == 0 {
return

View File

@@ -217,15 +217,11 @@ func (drv *DockerDriver) CreateCookie(ctx context.Context, task drivers.Containe
Image: task.Image(),
OpenStdin: true,
AttachStdout: true,
AttachStderr: true,
AttachStdin: true,
AttachStderr: true,
StdinOnce: true,
},
// turn off logs since we're collecting them from attach
HostConfig: &docker.HostConfig{
LogConfig: docker.LogConfig{
Type: "none",
},
ReadonlyRootfs: drv.conf.EnableReadOnlyRootFs,
},
Context: ctx,
@@ -237,6 +233,7 @@ func (drv *DockerDriver) CreateCookie(ctx context.Context, task drivers.Containe
drv: drv,
}
cookie.configureLogger(log)
cookie.configureMem(log)
cookie.configureCmd(log)
cookie.configureEnv(log)

View File

@@ -71,6 +71,7 @@ func (c *poolTask) CPUs() uint64 { return 0
func (c *poolTask) FsSize() uint64 { return 0 }
func (c *poolTask) TmpFsSize() uint64 { return 0 }
func (c *poolTask) Extensions() map[string]string { return nil }
func (c *poolTask) LoggerConfig() drivers.LoggerConfig { return drivers.LoggerConfig{} }
func (c *poolTask) WriteStat(ctx context.Context, stat drivers.Stat) {}
type dockerPoolItem struct {

View File

@@ -37,6 +37,7 @@ func (f *taskDockerTest) WorkDir() string { return "" }
func (f *taskDockerTest) Close() {}
func (f *taskDockerTest) Input() io.Reader { return f.input }
func (f *taskDockerTest) Extensions() map[string]string { return nil }
func (f *taskDockerTest) LoggerConfig() drivers.LoggerConfig { return drivers.LoggerConfig{} }
func TestRunnerDocker(t *testing.T) {
dkr := NewDocker(drivers.Config{})

View File

@@ -86,6 +86,21 @@ type RunResult interface {
Status() string
}
// Logger Tags for container
type LoggerTag struct {
Name string
Value string
}
// Logger Configuration for container
type LoggerConfig struct {
// Log Sink URL
URL string
// Log Tag Pairs
Tags []LoggerTag
}
// The ContainerTask interface guides container execution across a wide variety of
// container oriented runtimes.
type ContainerTask interface {
@@ -136,6 +151,9 @@ type ContainerTask interface {
// leaves it unset.
WorkDir() string
// Logger Config to use in driver
LoggerConfig() LoggerConfig
// Close is used to perform cleanup after task execution.
// Close should be safe to call multiple times.
Close()

View File

@@ -1,145 +0,0 @@
package agent
import (
"bytes"
"context"
"crypto/tls"
"fmt"
"io"
"log/syslog"
"net"
"net/url"
"strings"
"time"
"github.com/fnproject/fn/api/common"
"go.opencensus.io/trace"
)
// syslogConns may return a non-nil io.WriteCloser and an error simultaneously,
// the error containing any errors from connecting to any of the syslog URLs, and the
// io.WriteCloser writing to any syslogURLs that were successfully connected to.
// the returned io.WriteCloser is a Writer to each conn, it should be wrapped in another
// writer that writes syslog formatted messages (by line).
func syslogConns(ctx context.Context, syslogURLs string) (io.WriteCloser, error) {
// TODO(reed): we should likely add a trace per conn, need to plumb tagging better
ctx, span := trace.StartSpan(ctx, "syslog_conns")
defer span.End()
if len(syslogURLs) == 0 {
return nullReadWriter{}, nil
}
// gather all the conns, re-use the line we make in the syslogWriter
// to write the same bytes to each of the conns.
var conns []io.WriteCloser
var errs []error
sinks := strings.Split(syslogURLs, ",")
for _, s := range sinks {
conn, err := dialSyslog(ctx, strings.TrimSpace(s))
if err != nil {
errs = append(errs, fmt.Errorf("failed to setup remote syslog connection to %v: %v", s, err))
continue
}
conns = append(conns, conn)
}
// do this before checking length of conns
var err error
if len(errs) > 0 {
for _, e := range errs {
err = fmt.Errorf("%v%v, ", err, e)
}
}
if len(conns) == 0 {
return nullReadWriter{}, err
}
return multiWriteCloser(conns), err
}
func dialSyslog(ctx context.Context, syslogURL string) (io.WriteCloser, error) {
url, err := url.Parse(syslogURL)
if err != nil {
return nil, err
}
common.Logger(ctx).WithField("syslog_url", url).Debug("dialing syslog url")
var dialer net.Dialer
deadline, ok := ctx.Deadline()
if ok {
dialer.Deadline = deadline
}
// slice off 'xxx://' and dial it
switch url.Scheme {
case "udp", "tcp":
return dialer.Dial(url.Scheme, syslogURL[6:])
case "tls":
return tls.DialWithDialer(&dialer, "tcp", syslogURL[6:], nil)
default:
return nil, fmt.Errorf("Unsupported scheme, please use {tcp|udp|tls}: %s: ", url.Scheme)
}
}
// syslogWriter prepends a syslog format with call-specific details
// for each data segment provided in Write(). This doesn't use
// log/syslog pkg because we do not need pid for every line (expensive),
// and we have a format that is easier to read than hiding in preamble.
// this writes logfmt formatted syslog with values for call, function, and
// app, it is up to the user to use logfmt from their functions to get a
// fully formatted line out.
// TODO not pressing, but we could support json & other formats, too, upon request.
type syslogWriter struct {
pres []byte
post []byte
b *bytes.Buffer
clock func() time.Time
// the syslog conns (presumably)
io.Writer
}
const severityMask = 0x07
const facilityMask = 0xf8
func newSyslogWriter(call, function, appName string, severity syslog.Priority, wc io.Writer, buf *bytes.Buffer) *syslogWriter {
// Facility = LOG_USER
pr := (syslog.LOG_USER & facilityMask) | (severity & severityMask)
// <priority>VERSION ISOTIMESTAMP HOSTNAME APPLICATION PID MESSAGEID STRUCTURED-DATA MSG
//
// and for us:
// <22>2 ISOTIMESTAMP fn appName funcName callID - MSG
// ex:
//<11>2 2018-02-31T07:42:21Z Fn - - - - call_id=123 func_name=rdallman/yodawg app_id=123 loggo hereo
// TODO we could use json for structured data and do that whole thing. up to whoever.
return &syslogWriter{
pres: []byte(fmt.Sprintf(`<%d>2`, pr)),
post: []byte(fmt.Sprintf(`fn - - - - call_id=%s func_name=%s app_name=%s `, call, function, appName)),
b: buf,
Writer: wc,
clock: time.Now,
}
}
func (sw *syslogWriter) Write(p []byte) (int, error) {
// re-use buffer to write in timestamp hodge podge and reduce writes to
// the conn by buffering a whole line here before writing to conn.
buf := sw.b
buf.Reset()
buf.Write(sw.pres)
buf.WriteString(" ")
buf.WriteString(sw.clock().UTC().Format(time.RFC3339))
buf.WriteString(" ")
buf.Write(sw.post)
buf.Write(p)
n, err := io.Copy(sw.Writer, buf)
return int(n), err
}

View File

@@ -1,29 +0,0 @@
package agent
import (
"bytes"
"log/syslog"
"testing"
"time"
)
func TestSyslogFormat(t *testing.T) {
var b1 bytes.Buffer
var b2 bytes.Buffer
call := "12345"
fn := "yo/dawg"
appName := "myapp"
now := time.Date(1982, 6, 25, 12, 0, 0, 0, time.UTC)
clock := func() time.Time { return now }
writer := newSyslogWriter(call, fn, appName, syslog.LOG_ERR, &nopCloser{&b1}, &b2)
writer.clock = clock
writer.Write([]byte("yo"))
gold := `<11>2 1982-06-25T12:00:00Z fn - - - - call_id=12345 func_name=yo/dawg app_name=myapp yo`
if b1.String() != gold {
t.Fatal("syslog was not what we expected: ", b1.String())
}
}

View File

@@ -85,20 +85,18 @@ func (a *App) Validate() error {
}
if a.SyslogURL != nil && *a.SyslogURL != "" {
sinks := strings.Split(*a.SyslogURL, ",")
for _, s := range sinks {
url, err := url.Parse(strings.TrimSpace(s))
url, err := url.Parse(strings.TrimSpace(*a.SyslogURL))
fail := err != nil
if !fail {
// See: https://docs.docker.com/config/containers/logging/syslog/#options
switch url.Scheme {
case "udp", "tcp", "tls":
case "udp", "tcp", "unix", "unixgram", "tcp+tls":
default:
fail = true
}
}
if fail {
return ErrInvalidSyslog(fmt.Sprintf(`invalid syslog url: "%v"`, s))
}
return ErrInvalidSyslog(fmt.Sprintf(`invalid syslog url: "%v"`, *a.SyslogURL))
}
}
return nil

View File

@@ -13,41 +13,26 @@ All logs are emitted in [logfmt](https://godoc.org/github.com/kr/logfmt) format
## Call ID
Every function call/request is assigned a `call_id`. If you search your logs, you can track all the activity
for each function call and find errors on a call by call basis. For example, these are the log lines for an aynschronous
function call:
![async logs](/docs/assets/async-log-full.png)
Note the easily searchable `call_id=x` format.
```sh
call_id=477949e2-922c-5da9-8633-0b2887b79f6e
```
for each function call and find errors on a call by call basis.
## Remote syslog for functions
You may add a syslog url to any function application and all functions that
exist under that application will ship all of their logs to it. You may
provide a comma separated list, if desired. Currently, we support `tcp`,
`udp`, and `tls`, and this will not work if behind a proxy [yet?] (this is my
life now). This feature only works for 'hot' functions.
An example syslog url is:
Fn uses [docker syslog driver](https://docs.docker.com/config/containers/logging/syslog/) for remote logging if syslog URL is defined for the application. An example syslog URL is:
```
tls://logs.papertrailapp.com:1
tcp+tls://logs.papertrailapp.com:1
```
We log in a syslog format, with some variables added in logfmt format. If you
find logfmt format offensive, please open an issue and we will consider adding
more formats (or open a PR that does it, with tests, and you will receive 1
free cookie along with the feature you want). The logs from the functions
themselves are not formatted, only our pre-amble, thus, if you'd like a fully
logfmt line, you must use a logfmt logger to log from your function.
See docker syslog driver [syslog-address](https://docs.docker.com/config/containers/logging/syslog/#options) for supported formats.
* All log lines are sent as level error w/ the current time and `fn` as hostname.
* call_id, func_name, and app_id will prefix every log line.
Fn adds `func_name` and `app_name` tags to the container logs using [docker syslog driver tags](https://docs.docker.com/config/containers/logging/syslog/#options):
```
<11>2 1982-06-25T12:00:00Z fn - - - - call_id=12345 func_name=yo/yo app_id=54321 this is your log line
Aug 29 19:41:31 fnserver 178 <11>1 2018-08-29T19:41:31Z 59ff8628a941 app_name=fn-http-func,func_name=/fn-http-func 17 app_name=fn-http-func,func_name=/fn-http-func - 2018/08/29 19:41:31 Example log output
Aug 29 19:41:31 fnserver 154 <14>1 2018-08-29T19:41:31Z 59ff8628a941 app_name=fn-http-func,func_name=/fn-http-func 17 app_name=fn-http-func,func_name=/fn-http-func - HTTP/1.1 200 OK
Aug 29 19:41:31 fnserver 158 <14>1 2018-08-29T19:41:31Z 59ff8628a941 app_name=fn-http-func,func_name=/fn-http-func 17 app_name=fn-http-func,func_name=/fn-http-func - Content-Length: 11
Aug 29 19:41:31 fnserver 139 <14>1 2018-08-29T19:41:31Z 59ff8628a941 app_name=fn-http-func,func_name=/fn-http-func 17 app_name=fn-http-func,func_name=/fn-http-func -
Aug 29 19:41:31 fnserver 761 <14>1 2018-08-29T19:41:31Z 59ff8628a941 app_name=fn-http-func,func_name=/fn-http-func 17 app_name=fn-http-func,func_name=/fn-http-func - Hello World
```
Currently, Fn does not add `call_id` to the log tags and it's user function responsibility to include this. FDKs will add support for this.