diff --git a/cache/cache.go b/cache/cache.go index cf8a5ba..b94d6a3 100644 --- a/cache/cache.go +++ b/cache/cache.go @@ -30,6 +30,49 @@ type ActionID [HashSize]byte // An OutputID is a cache output key, the hash of an output of a computation. type OutputID [HashSize]byte +// Cacher is the interface as used by the cmd/go. +// NOTE: name is changed from upstream's Cache to avoid breaking changes. +type Cacher interface { + // Get returns the cache entry for the provided ActionID. + // On miss, the error type should be of type *entryNotFoundError. + // + // After a successful call to Get, OutputFile(Entry.OutputID) must + // exist on disk until Close is called (at the end of the process). + Get(ActionID) (Entry, error) + + // Put adds an item to the cache. + // + // The seeker is only used to seek to the beginning. After a call to Put, + // the seek position is not guaranteed to be in any particular state. + // + // As a special case, if the ReadSeeker is of type noVerifyReadSeeker, + // the verification from GODEBUG=goverifycache=1 is skipped. + // + // After a successful call to Put, OutputFile(OutputID) must + // exist on disk until Close is called (at the end of the process). + Put(ActionID, io.ReadSeeker) (_ OutputID, size int64, _ error) + + // Close is called at the end of the go process. Implementations can do + // cache cleanup work at this phase, or wait for and report any errors from + // background cleanup work started earlier. Any cache trimming in one + // process should not cause the invariants of this interface to be + // violated in another process. Namely, a cache trim from one process should + // not delete an OutputID from disk that was recently Get or Put from + // another process. As a rule of thumb, don't trim things used in the last + // day. + Close() error + + // OutputFile returns the path on disk where OutputID is stored. + // + // It's only called after a successful get or put call so it doesn't need + // to return an error; it's assumed that if the previous get or put succeeded, + // it's already on disk. + OutputFile(OutputID) string + + // FuzzDir returns where fuzz files are stored. + FuzzDir() string +} + // A Cache is a package cache, backed by a file system directory tree. type Cache struct { dir string @@ -266,6 +309,39 @@ func (c *Cache) GetMmap(id ActionID) ([]byte, Entry, error) { } */ +// GetFile looks up the action ID in the cache and returns +// the name of the corresponding data file. +func GetFile(c Cacher, id ActionID) (file string, entry Entry, err error) { + entry, err = c.Get(id) + if err != nil { + return "", Entry{}, err + } + file = c.OutputFile(entry.OutputID) + info, err := os.Stat(file) + if err != nil { + return "", Entry{}, &entryNotFoundError{Err: err} + } + if info.Size() != entry.Size { + return "", Entry{}, &entryNotFoundError{Err: errors.New("file incomplete")} + } + return file, entry, nil +} + +// GetBytes looks up the action ID in the cache and returns +// the corresponding output bytes. +// GetBytes should only be used for data that can be expected to fit in memory. +func GetBytes(c Cacher, id ActionID) ([]byte, Entry, error) { + entry, err := c.Get(id) + if err != nil { + return nil, entry, err + } + data, _ := os.ReadFile(c.OutputFile(entry.OutputID)) + if sha256.Sum256(data) != entry.OutputID { + return nil, entry, &entryNotFoundError{Err: errors.New("bad checksum")} + } + return data, entry, nil +} + // OutputFile returns the name of the cache file storing output with the given OutputID. func (c *Cache) OutputFile(out OutputID) string { file := c.fileName(out, "d") @@ -308,6 +384,8 @@ func (c *Cache) used(file string) { os.Chtimes(file, c.now(), c.now()) } +func (c *Cache) Close() error { return c.Trim() } + // Trim removes old cache entries that are likely not to be reused. func (c *Cache) Trim() error { now := c.now() @@ -431,10 +509,21 @@ func (c *Cache) putIndexEntry(id ActionID, out OutputID, size int64, allowVerify return nil } +// noVerifyReadSeeker is an io.ReadSeeker wrapper sentinel type +// that says that Cache.Put should skip the verify check +// (from GODEBUG=goverifycache=1). +type noVerifyReadSeeker struct { + io.ReadSeeker +} + // Put stores the given output in the cache as the output for the action ID. // It may read file twice. The content of file must not change between the two passes. func (c *Cache) Put(id ActionID, file io.ReadSeeker) (OutputID, int64, error) { - return c.put(id, file, true) + wrapper, isNoVerify := file.(noVerifyReadSeeker) + if isNoVerify { + file = wrapper.ReadSeeker + } + return c.put(id, file, !isNoVerify) } // PutNoVerify is like Put but disables the verify check @@ -445,6 +534,14 @@ func (c *Cache) PutNoVerify(id ActionID, file io.ReadSeeker) (OutputID, int64, e return c.put(id, file, false) } +// PutNoVerify is like Put but disables the verify check +// when GODEBUG=goverifycache=1 is set. +// It is meant for data that is OK to cache but that we expect to vary slightly from run to run, +// like test output containing times and the like. +func PutNoVerify(c Cacher, id ActionID, file io.ReadSeeker) (OutputID, int64, error) { + return c.Put(id, noVerifyReadSeeker{file}) +} + func (c *Cache) put(id ActionID, file io.ReadSeeker, allowVerify bool) (OutputID, int64, error) { // Compute output ID. h := sha256.New() @@ -473,6 +570,12 @@ func (c *Cache) PutBytes(id ActionID, data []byte) error { return err } +// PutBytes stores the given bytes in the cache as the output for the action ID. +func PutBytes(c Cacher, id ActionID, data []byte) error { + _, _, err := c.Put(id, bytes.NewReader(data)) + return err +} + // copyFile copies file into the cache, expecting it to have the given // output ID and size, if that file is not present already. func (c *Cache) copyFile(file io.ReadSeeker, out OutputID, size int64) error { diff --git a/cache/cacheprog/cacheprog.go b/cache/cacheprog/cacheprog.go new file mode 100644 index 0000000..9379636 --- /dev/null +++ b/cache/cacheprog/cacheprog.go @@ -0,0 +1,126 @@ +// Copyright 2024 The Go Authors. All rights reserved. +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +// Package cacheprog defines the protocol for a GOCACHEPROG program. +// +// By default, the go command manages a build cache stored in the file system +// itself. GOCACHEPROG can be set to the name of a command (with optional +// space-separated flags) that implements the go command build cache externally. +// This permits defining a different cache policy. +// +// The go command will start the GOCACHEPROG as a subprocess and communicate +// with it via JSON messages over stdin/stdout. The subprocess's stderr will be +// connected to the go command's stderr. +// +// The subprocess should immediately send a [Response] with its capabilities. +// After that, the go command will send a stream of [Request] messages and the +// subprocess should reply to each [Request] with a [Response] message. +package cacheprog + +import ( + "io" + "time" +) + +// Cmd is a command that can be issued to a child process. +// +// If the interface needs to grow, the go command can add new commands or new +// versioned commands like "get2" in the future. The initial [Response] from +// the child process indicates which commands it supports. +type Cmd string + +const ( + // CmdPut tells the cache program to store an object in the cache. + // + // [Request.ActionID] is the cache key of this object. The cache should + // store [Request.OutputID] and [Request.Body] under this key for a + // later "get" request. It must also store the Body in a file in the local + // file system and return the path to that file in [Response.DiskPath], + // which must exist at least until a "close" request. + CmdPut = Cmd("put") + + // CmdGet tells the cache program to retrieve an object from the cache. + // + // [Request.ActionID] specifies the key of the object to get. If the + // cache does not contain this object, it should set [Response.Miss] to + // true. Otherwise, it should populate the fields of [Response], + // including setting [Response.OutputID] to the OutputID of the original + // "put" request and [Response.DiskPath] to the path of a local file + // containing the Body of the original "put" request. That file must + // continue to exist at least until a "close" request. + CmdGet = Cmd("get") + + // CmdClose requests that the cache program exit gracefully. + // + // The cache program should reply to this request and then exit + // (thus closing its stdout). + CmdClose = Cmd("close") +) + +// Request is the JSON-encoded message that's sent from the go command to +// the GOCACHEPROG child process over stdin. Each JSON object is on its own +// line. A ProgRequest of Type "put" with BodySize > 0 will be followed by a +// line containing a base64-encoded JSON string literal of the body. +type Request struct { + // ID is a unique number per process across all requests. + // It must be echoed in the Response from the child. + ID int64 + + // Command is the type of request. + // The go command will only send commands that were declared + // as supported by the child. + Command Cmd + + // ActionID is the cache key for "put" and "get" requests. + ActionID []byte `json:",omitempty"` // or nil if not used + + // OutputID is stored with the body for "put" requests. + OutputID []byte `json:",omitempty"` // or nil if not used + + // Body is the body for "put" requests. It's sent after the JSON object + // as a base64-encoded JSON string when BodySize is non-zero. + // It's sent as a separate JSON value instead of being a struct field + // send in this JSON object so large values can be streamed in both directions. + // The base64 string body of a Request will always be written + // immediately after the JSON object and a newline. + Body io.Reader `json:"-"` + + // BodySize is the number of bytes of Body. If zero, the body isn't written. + BodySize int64 `json:",omitempty"` +} + +// Response is the JSON response from the child process to the go command. +// +// With the exception of the first protocol message that the child writes to its +// stdout with ID==0 and KnownCommands populated, these are only sent in +// response to a Request from the go command. +// +// Responses can be sent in any order. The ID must match the request they're +// replying to. +type Response struct { + ID int64 // that corresponds to Request; they can be answered out of order + Err string `json:",omitempty"` // if non-empty, the error + + // KnownCommands is included in the first message that cache helper program + // writes to stdout on startup (with ID==0). It includes the + // Request.Command types that are supported by the program. + // + // This lets the go command extend the protocol gracefully over time (adding + // "get2", etc), or fail gracefully when needed. It also lets the go command + // verify the program wants to be a cache helper. + KnownCommands []Cmd `json:",omitempty"` + + // For "get" requests. + + Miss bool `json:",omitempty"` // cache miss + OutputID []byte `json:",omitempty"` // the OutputID stored with the body + Size int64 `json:",omitempty"` // body size in bytes + Time *time.Time `json:",omitempty"` // when the object was put in the cache (optional; used for cache expiration) + + // For "get" and "put" requests. + + // DiskPath is the absolute path on disk of the body corresponding to a + // "get" (on cache hit) or "put" request's ActionID. + DiskPath string `json:",omitempty"` +} diff --git a/cache/default.go b/cache/default.go index a20b33c..fc8f843 100644 --- a/cache/default.go +++ b/cache/default.go @@ -14,15 +14,11 @@ import ( // Default returns the default cache to use. // It never returns nil. -func Default() *Cache { - defaultOnce.Do(initDefaultCache) - return defaultCache +func Default() Cacher { + return initDefaultCacheOnce() } -var ( - defaultOnce sync.Once - defaultCache *Cache -) +var initDefaultCacheOnce = sync.OnceValue(initDefaultCache) // cacheREADME is a message stored in a README in the cache directory. // Because the cache lives outside the normal Go trees, we leave the @@ -35,7 +31,7 @@ See golang.org to learn more about Go. // initDefaultCache does the work of finding the default cache // the first time Default is called. -func initDefaultCache() { +func initDefaultCache() Cacher { dir := DefaultDir() if dir == "off" { if defaultDirErr != nil { @@ -51,11 +47,18 @@ func initDefaultCache() { os.WriteFile(filepath.Join(dir, "README"), []byte(cacheREADME), 0666) } - c, err := Open(dir) + diskCache, err := Open(dir) if err != nil { log.Fatalf("failed to initialize build cache at %s: %s\n", dir, err) } - defaultCache = c + + // NOTE: changed from upstream's cfg.Getenv, so it will ignore "go env -w". + // Consider calling "go env" or copying the cfg package instead. + if cacheprog := os.Getenv("GOCACHEPROG"); cacheprog != "" { + return startCacheProg(cacheprog, diskCache) + } + + return diskCache } var ( diff --git a/cache/prog.go b/cache/prog.go new file mode 100644 index 0000000..268ba84 --- /dev/null +++ b/cache/prog.go @@ -0,0 +1,374 @@ +// Copyright 2023 The Go Authors. All rights reserved. +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +package cache + +import ( + "bufio" + "context" + "crypto/sha256" + "encoding/base64" + "encoding/json" + "errors" + "fmt" + "io" + "log" + "os" + "os/exec" + "sync" + "sync/atomic" + "time" + + "github.com/rogpeppe/go-internal/cache/cacheprog" + "github.com/rogpeppe/go-internal/internal/quoted" +) + +// ProgCache implements Cache via JSON messages over stdin/stdout to a child +// helper process which can then implement whatever caching policy/mechanism it +// wants. +// +// See https://github.com/golang/go/issues/59719 +type ProgCache struct { + cmd *exec.Cmd + stdout io.ReadCloser // from the child process + stdin io.WriteCloser // to the child process + bw *bufio.Writer // to stdin + jenc *json.Encoder // to bw + + // can are the commands that the child process declared that it supports. + // This is effectively the versioning mechanism. + can map[cacheprog.Cmd]bool + + // fuzzDirCache is another Cache implementation to use for the FuzzDir + // method. In practice this is the default GOCACHE disk-based + // implementation. + // + // TODO(bradfitz): maybe this isn't ideal. But we'd need to extend the Cache + // interface and the fuzzing callers to be less disk-y to do more here. + fuzzDirCache Cacher + + closing atomic.Bool + ctx context.Context // valid until Close via ctxClose + ctxCancel context.CancelFunc // called on Close + readLoopDone chan struct{} // closed when readLoop returns + + mu sync.Mutex // guards following fields + nextID int64 + inFlight map[int64]chan<- *cacheprog.Response + outputFile map[OutputID]string // object => abs path on disk + + // writeMu serializes writing to the child process. + // It must never be held at the same time as mu. + writeMu sync.Mutex +} + +// startCacheProg starts the prog binary (with optional space-separated flags) +// and returns a Cache implementation that talks to it. +// +// It blocks a few seconds to wait for the child process to successfully start +// and advertise its capabilities. +func startCacheProg(progAndArgs string, fuzzDirCache Cacher) Cacher { + if fuzzDirCache == nil { + panic("missing fuzzDirCache") + } + args, err := quoted.Split(progAndArgs) + if err != nil { + log.Fatalf("GOCACHEPROG args: %v", err) + } + var prog string + if len(args) > 0 { + prog = args[0] + args = args[1:] + } + + ctx, ctxCancel := context.WithCancel(context.Background()) + + cmd := exec.CommandContext(ctx, prog, args...) + out, err := cmd.StdoutPipe() + if err != nil { + log.Fatalf("StdoutPipe to GOCACHEPROG: %v", err) + } + in, err := cmd.StdinPipe() + if err != nil { + log.Fatalf("StdinPipe to GOCACHEPROG: %v", err) + } + cmd.Stderr = os.Stderr + // On close, we cancel the context. Rather than killing the helper, + // close its stdin. + cmd.Cancel = in.Close + + if err := cmd.Start(); err != nil { + log.Fatalf("error starting GOCACHEPROG program %q: %v", prog, err) + } + + pc := &ProgCache{ + ctx: ctx, + ctxCancel: ctxCancel, + fuzzDirCache: fuzzDirCache, + cmd: cmd, + stdout: out, + stdin: in, + bw: bufio.NewWriter(in), + inFlight: make(map[int64]chan<- *cacheprog.Response), + outputFile: make(map[OutputID]string), + readLoopDone: make(chan struct{}), + } + + // Register our interest in the initial protocol message from the child to + // us, saying what it can do. + capResc := make(chan *cacheprog.Response, 1) + pc.inFlight[0] = capResc + + pc.jenc = json.NewEncoder(pc.bw) + go pc.readLoop(pc.readLoopDone) + + // Give the child process a few seconds to report its capabilities. This + // should be instant and not require any slow work by the program. + timer := time.NewTicker(5 * time.Second) + defer timer.Stop() + for { + select { + case <-timer.C: + log.Printf("# still waiting for GOCACHEPROG %v ...", prog) + case capRes := <-capResc: + can := map[cacheprog.Cmd]bool{} + for _, cmd := range capRes.KnownCommands { + can[cmd] = true + } + if len(can) == 0 { + log.Fatalf("GOCACHEPROG %v declared no supported commands", prog) + } + pc.can = can + return pc + } + } +} + +func (c *ProgCache) readLoop(readLoopDone chan<- struct{}) { + defer close(readLoopDone) + jd := json.NewDecoder(c.stdout) + for { + res := new(cacheprog.Response) + if err := jd.Decode(res); err != nil { + if c.closing.Load() { + c.mu.Lock() + for _, ch := range c.inFlight { + close(ch) + } + c.inFlight = nil + c.mu.Unlock() + return // quietly + } + if err == io.EOF { + c.mu.Lock() + inFlight := len(c.inFlight) + c.mu.Unlock() + log.Fatalf("GOCACHEPROG exited pre-Close with %v pending requests", inFlight) + } + log.Fatalf("error reading JSON from GOCACHEPROG: %v", err) + } + c.mu.Lock() + ch, ok := c.inFlight[res.ID] + delete(c.inFlight, res.ID) + c.mu.Unlock() + if ok { + ch <- res + } else { + log.Fatalf("GOCACHEPROG sent response for unknown request ID %v", res.ID) + } + } +} + +var errCacheprogClosed = errors.New("GOCACHEPROG program closed unexpectedly") + +func (c *ProgCache) send(ctx context.Context, req *cacheprog.Request) (*cacheprog.Response, error) { + resc := make(chan *cacheprog.Response, 1) + if err := c.writeToChild(req, resc); err != nil { + return nil, err + } + select { + case res := <-resc: + if res == nil { + return nil, errCacheprogClosed + } + if res.Err != "" { + return nil, errors.New(res.Err) + } + return res, nil + case <-ctx.Done(): + return nil, ctx.Err() + } +} + +func (c *ProgCache) writeToChild(req *cacheprog.Request, resc chan<- *cacheprog.Response) (err error) { + c.mu.Lock() + if c.inFlight == nil { + return errCacheprogClosed + } + c.nextID++ + req.ID = c.nextID + c.inFlight[req.ID] = resc + c.mu.Unlock() + + defer func() { + if err != nil { + c.mu.Lock() + if c.inFlight != nil { + delete(c.inFlight, req.ID) + } + c.mu.Unlock() + } + }() + + c.writeMu.Lock() + defer c.writeMu.Unlock() + + if err := c.jenc.Encode(req); err != nil { + return err + } + if err := c.bw.WriteByte('\n'); err != nil { + return err + } + if req.Body != nil && req.BodySize > 0 { + if err := c.bw.WriteByte('"'); err != nil { + return err + } + e := base64.NewEncoder(base64.StdEncoding, c.bw) + wrote, err := io.Copy(e, req.Body) + if err != nil { + return err + } + if err := e.Close(); err != nil { + return nil + } + if wrote != req.BodySize { + return fmt.Errorf("short write writing body to GOCACHEPROG for action %x, output %x: wrote %v; expected %v", + req.ActionID, req.OutputID, wrote, req.BodySize) + } + if _, err := c.bw.WriteString("\"\n"); err != nil { + return err + } + } + if err := c.bw.Flush(); err != nil { + return err + } + return nil +} + +func (c *ProgCache) Get(a ActionID) (Entry, error) { + if !c.can[cacheprog.CmdGet] { + // They can't do a "get". Maybe they're a write-only cache. + // + // TODO(bradfitz,bcmills): figure out the proper error type here. Maybe + // errors.ErrUnsupported? Is entryNotFoundError even appropriate? There + // might be places where we rely on the fact that a recent Put can be + // read through a corresponding Get. Audit callers and check, and document + // error types on the Cache interface. + return Entry{}, &entryNotFoundError{} + } + res, err := c.send(c.ctx, &cacheprog.Request{ + Command: cacheprog.CmdGet, + ActionID: a[:], + }) + if err != nil { + return Entry{}, err // TODO(bradfitz): or entryNotFoundError? Audit callers. + } + if res.Miss { + return Entry{}, &entryNotFoundError{} + } + e := Entry{ + Size: res.Size, + } + if res.Time != nil { + e.Time = *res.Time + } else { + e.Time = time.Now() + } + if res.DiskPath == "" { + return Entry{}, &entryNotFoundError{errors.New("GOCACHEPROG didn't populate DiskPath on get hit")} + } + if copy(e.OutputID[:], res.OutputID) != len(res.OutputID) { + return Entry{}, &entryNotFoundError{errors.New("incomplete ProgResponse OutputID")} + } + c.noteOutputFile(e.OutputID, res.DiskPath) + return e, nil +} + +func (c *ProgCache) noteOutputFile(o OutputID, diskPath string) { + c.mu.Lock() + defer c.mu.Unlock() + c.outputFile[o] = diskPath +} + +func (c *ProgCache) OutputFile(o OutputID) string { + c.mu.Lock() + defer c.mu.Unlock() + return c.outputFile[o] +} + +func (c *ProgCache) Put(a ActionID, file io.ReadSeeker) (_ OutputID, size int64, _ error) { + // Compute output ID. + h := sha256.New() + if _, err := file.Seek(0, 0); err != nil { + return OutputID{}, 0, err + } + size, err := io.Copy(h, file) + if err != nil { + return OutputID{}, 0, err + } + var out OutputID + h.Sum(out[:0]) + + if _, err := file.Seek(0, 0); err != nil { + return OutputID{}, 0, err + } + + if !c.can[cacheprog.CmdPut] { + // Child is a read-only cache. Do nothing. + return out, size, nil + } + + res, err := c.send(c.ctx, &cacheprog.Request{ + Command: cacheprog.CmdPut, + ActionID: a[:], + OutputID: out[:], + Body: file, + BodySize: size, + }) + if err != nil { + return OutputID{}, 0, err + } + if res.DiskPath == "" { + return OutputID{}, 0, errors.New("GOCACHEPROG didn't return DiskPath in put response") + } + c.noteOutputFile(out, res.DiskPath) + return out, size, err +} + +func (c *ProgCache) Close() error { + c.closing.Store(true) + var err error + + // First write a "close" message to the child so it can exit nicely + // and clean up if it wants. Only after that exchange do we cancel + // the context that kills the process. + if c.can[cacheprog.CmdClose] { + _, err = c.send(c.ctx, &cacheprog.Request{Command: cacheprog.CmdClose}) + if errors.Is(err, errCacheprogClosed) { + // Allow the child to quit without responding to close. + err = nil + } + } + // Cancel the context, which will close the helper's stdin. + c.ctxCancel() + // Wait until the helper closes its stdout. + <-c.readLoopDone + return err +} + +func (c *ProgCache) FuzzDir() string { + // TODO(bradfitz): figure out what to do here. For now just use the + // disk-based default. + return c.fuzzDirCache.FuzzDir() +} diff --git a/internal/quoted/quoted.go b/internal/quoted/quoted.go new file mode 100644 index 0000000..a812275 --- /dev/null +++ b/internal/quoted/quoted.go @@ -0,0 +1,129 @@ +// Copyright 2017 The Go Authors. All rights reserved. +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +// Package quoted provides string manipulation utilities. +package quoted + +import ( + "flag" + "fmt" + "strings" + "unicode" +) + +func isSpaceByte(c byte) bool { + return c == ' ' || c == '\t' || c == '\n' || c == '\r' +} + +// Split splits s into a list of fields, +// allowing single or double quotes around elements. +// There is no unescaping or other processing within +// quoted fields. +// +// Keep in sync with cmd/dist/quoted.go +func Split(s string) ([]string, error) { + // Split fields allowing '' or "" around elements. + // Quotes further inside the string do not count. + var f []string + for len(s) > 0 { + for len(s) > 0 && isSpaceByte(s[0]) { + s = s[1:] + } + if len(s) == 0 { + break + } + // Accepted quoted string. No unescaping inside. + if s[0] == '"' || s[0] == '\'' { + quote := s[0] + s = s[1:] + i := 0 + for i < len(s) && s[i] != quote { + i++ + } + if i >= len(s) { + return nil, fmt.Errorf("unterminated %c string", quote) + } + f = append(f, s[:i]) + s = s[i+1:] + continue + } + i := 0 + for i < len(s) && !isSpaceByte(s[i]) { + i++ + } + f = append(f, s[:i]) + s = s[i:] + } + return f, nil +} + +// Join joins a list of arguments into a string that can be parsed +// with Split. Arguments are quoted only if necessary; arguments +// without spaces or quotes are kept as-is. No argument may contain both +// single and double quotes. +func Join(args []string) (string, error) { + var buf []byte + for i, arg := range args { + if i > 0 { + buf = append(buf, ' ') + } + var sawSpace, sawSingleQuote, sawDoubleQuote bool + for _, c := range arg { + switch { + case c > unicode.MaxASCII: + continue + case isSpaceByte(byte(c)): + sawSpace = true + case c == '\'': + sawSingleQuote = true + case c == '"': + sawDoubleQuote = true + } + } + switch { + case !sawSpace && !sawSingleQuote && !sawDoubleQuote: + buf = append(buf, arg...) + + case !sawSingleQuote: + buf = append(buf, '\'') + buf = append(buf, arg...) + buf = append(buf, '\'') + + case !sawDoubleQuote: + buf = append(buf, '"') + buf = append(buf, arg...) + buf = append(buf, '"') + + default: + return "", fmt.Errorf("argument %q contains both single and double quotes and cannot be quoted", arg) + } + } + return string(buf), nil +} + +// A Flag parses a list of string arguments encoded with Join. +// It is useful for flags like cmd/link's -extldflags. +type Flag []string + +var _ flag.Value = (*Flag)(nil) + +func (f *Flag) Set(v string) error { + fs, err := Split(v) + if err != nil { + return err + } + *f = fs[:len(fs):len(fs)] + return nil +} + +func (f *Flag) String() string { + if f == nil { + return "" + } + s, err := Join(*f) + if err != nil { + return strings.Join(*f, " ") + } + return s +} diff --git a/internal/quoted/quoted_test.go b/internal/quoted/quoted_test.go new file mode 100644 index 0000000..d76270c --- /dev/null +++ b/internal/quoted/quoted_test.go @@ -0,0 +1,88 @@ +// Copyright 2020 The Go Authors. All rights reserved. +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +package quoted + +import ( + "reflect" + "strings" + "testing" +) + +func TestSplit(t *testing.T) { + for _, test := range []struct { + name string + value string + want []string + wantErr string + }{ + {name: "empty", value: "", want: nil}, + {name: "space", value: " ", want: nil}, + {name: "one", value: "a", want: []string{"a"}}, + {name: "leading_space", value: " a", want: []string{"a"}}, + {name: "trailing_space", value: "a ", want: []string{"a"}}, + {name: "two", value: "a b", want: []string{"a", "b"}}, + {name: "two_multi_space", value: "a b", want: []string{"a", "b"}}, + {name: "two_tab", value: "a\tb", want: []string{"a", "b"}}, + {name: "two_newline", value: "a\nb", want: []string{"a", "b"}}, + {name: "quote_single", value: `'a b'`, want: []string{"a b"}}, + {name: "quote_double", value: `"a b"`, want: []string{"a b"}}, + {name: "quote_both", value: `'a '"b "`, want: []string{"a ", "b "}}, + {name: "quote_contains", value: `'a "'"'b"`, want: []string{`a "`, `'b`}}, + {name: "escape", value: `\'`, want: []string{`\'`}}, + {name: "quote_unclosed", value: `'a`, wantErr: "unterminated ' string"}, + } { + t.Run(test.name, func(t *testing.T) { + got, err := Split(test.value) + if err != nil { + if test.wantErr == "" { + t.Fatalf("unexpected error: %v", err) + } else if errMsg := err.Error(); !strings.Contains(errMsg, test.wantErr) { + t.Fatalf("error %q does not contain %q", errMsg, test.wantErr) + } + return + } + if test.wantErr != "" { + t.Fatalf("unexpected success; wanted error containing %q", test.wantErr) + } + if !reflect.DeepEqual(got, test.want) { + t.Errorf("got %q; want %q", got, test.want) + } + }) + } +} + +func TestJoin(t *testing.T) { + for _, test := range []struct { + name string + args []string + want, wantErr string + }{ + {name: "empty", args: nil, want: ""}, + {name: "one", args: []string{"a"}, want: "a"}, + {name: "two", args: []string{"a", "b"}, want: "a b"}, + {name: "space", args: []string{"a ", "b"}, want: "'a ' b"}, + {name: "newline", args: []string{"a\n", "b"}, want: "'a\n' b"}, + {name: "quote", args: []string{`'a `, "b"}, want: `"'a " b`}, + {name: "unquoteable", args: []string{`'"`}, wantErr: "contains both single and double quotes and cannot be quoted"}, + } { + t.Run(test.name, func(t *testing.T) { + got, err := Join(test.args) + if err != nil { + if test.wantErr == "" { + t.Fatalf("unexpected error: %v", err) + } else if errMsg := err.Error(); !strings.Contains(errMsg, test.wantErr) { + t.Fatalf("error %q does not contain %q", errMsg, test.wantErr) + } + return + } + if test.wantErr != "" { + t.Fatalf("unexpected success; wanted error containing %q", test.wantErr) + } + if got != test.want { + t.Errorf("got %s; want %s", got, test.want) + } + }) + } +}