151 lines
4.7 KiB
Go
151 lines
4.7 KiB
Go
package command
|
|
|
|
import (
|
|
"bytes"
|
|
"errors"
|
|
"fmt"
|
|
"strings"
|
|
"unicode/utf8"
|
|
)
|
|
|
|
var (
|
|
ErrCommandEmpty = errors.New("command is empty")
|
|
ErrSingleQuoteUnterminated = errors.New("unterminated single-quoted string")
|
|
ErrDoubleQuoteUnterminated = errors.New("unterminated double-quoted string")
|
|
ErrEscapeUnterminated = errors.New("unterminated backslash-escape")
|
|
)
|
|
|
|
// Split splits a command string into a slice of arguments.
|
|
// This is especially important for commands such as:
|
|
// /bin/sh -c "echo hello"
|
|
// which should be split into: ["/bin/sh", "-c", "echo hello"]
|
|
// It supports backslash-escapes, single-quotes and double-quotes.
|
|
// It does not support:
|
|
// - the $" quoting style.
|
|
// - expansion (brace, shell or pathname).
|
|
func Split(command string) (words []string, err error) {
|
|
if command == "" {
|
|
return nil, fmt.Errorf("%w", ErrCommandEmpty)
|
|
}
|
|
|
|
const bufferSize = 1024
|
|
buffer := bytes.NewBuffer(make([]byte, bufferSize))
|
|
|
|
startIndex := 0
|
|
|
|
for startIndex < len(command) {
|
|
// skip any split characters at the start
|
|
character, runeSize := utf8.DecodeRuneInString(command[startIndex:])
|
|
switch {
|
|
case strings.ContainsRune(" \n\t", character):
|
|
startIndex += runeSize
|
|
case character == '\\':
|
|
// Look ahead to eventually skip an escaped newline
|
|
if command[startIndex+runeSize:] == "" {
|
|
return nil, fmt.Errorf("%w: %q", ErrEscapeUnterminated, command)
|
|
}
|
|
character, runeSize := utf8.DecodeRuneInString(command[startIndex+runeSize:])
|
|
if character == '\n' {
|
|
startIndex += runeSize + runeSize // backslash and newline
|
|
}
|
|
default:
|
|
var word string
|
|
buffer.Reset()
|
|
word, startIndex, err = splitWord(command, startIndex, buffer)
|
|
if err != nil {
|
|
return nil, fmt.Errorf("splitting word in %q: %w", command, err)
|
|
}
|
|
words = append(words, word)
|
|
}
|
|
}
|
|
return words, nil
|
|
}
|
|
|
|
// WARNING: buffer must be cleared before calling this function.
|
|
func splitWord(input string, startIndex int, buffer *bytes.Buffer) (
|
|
word string, newStartIndex int, err error,
|
|
) {
|
|
cursor := startIndex
|
|
for cursor < len(input) {
|
|
character, runeLength := utf8.DecodeRuneInString(input[cursor:])
|
|
cursor += runeLength
|
|
if character == '"' ||
|
|
character == '\'' ||
|
|
character == '\\' ||
|
|
character == ' ' ||
|
|
character == '\n' ||
|
|
character == '\t' {
|
|
buffer.WriteString(input[startIndex : cursor-runeLength])
|
|
}
|
|
|
|
switch {
|
|
case strings.ContainsRune(" \n\t", character): // spacing character
|
|
return buffer.String(), cursor, nil
|
|
case character == '"':
|
|
return handleDoubleQuoted(input, cursor, buffer)
|
|
case character == '\'':
|
|
return handleSingleQuoted(input, cursor, buffer)
|
|
case character == '\\':
|
|
return handleEscaped(input, cursor, buffer)
|
|
}
|
|
}
|
|
|
|
buffer.WriteString(input[startIndex:])
|
|
return buffer.String(), len(input), nil
|
|
}
|
|
|
|
func handleDoubleQuoted(input string, startIndex int, buffer *bytes.Buffer) (
|
|
word string, newStartIndex int, err error,
|
|
) {
|
|
cursor := startIndex
|
|
for cursor < len(input) {
|
|
nextCharacter, nextRuneLength := utf8.DecodeRuneInString(input[cursor:])
|
|
cursor += nextRuneLength
|
|
switch nextCharacter {
|
|
case '"': // end of the double quoted string
|
|
buffer.WriteString(input[startIndex : cursor-nextRuneLength])
|
|
return splitWord(input, cursor, buffer)
|
|
case '\\': // escaped character
|
|
escapedCharacter, escapedRuneLength := utf8.DecodeRuneInString(input[cursor:])
|
|
cursor += escapedRuneLength
|
|
if !strings.ContainsRune("$`\"\n\\", escapedCharacter) {
|
|
break
|
|
}
|
|
buffer.WriteString(input[startIndex : cursor-nextRuneLength-escapedRuneLength])
|
|
if escapedCharacter != '\n' {
|
|
// skip backslash entirely for the newline character
|
|
buffer.WriteRune(escapedCharacter)
|
|
}
|
|
startIndex = cursor
|
|
}
|
|
}
|
|
return "", 0, fmt.Errorf("%w", ErrDoubleQuoteUnterminated)
|
|
}
|
|
|
|
func handleSingleQuoted(input string, startIndex int, buffer *bytes.Buffer) (
|
|
word string, newStartIndex int, err error,
|
|
) {
|
|
closingQuoteIndex := strings.IndexRune(input[startIndex:], '\'')
|
|
if closingQuoteIndex == -1 {
|
|
return "", 0, fmt.Errorf("%w", ErrSingleQuoteUnterminated)
|
|
}
|
|
buffer.WriteString(input[startIndex : startIndex+closingQuoteIndex])
|
|
const singleQuoteRuneLength = 1
|
|
startIndex += closingQuoteIndex + singleQuoteRuneLength
|
|
return splitWord(input, startIndex, buffer)
|
|
}
|
|
|
|
func handleEscaped(input string, startIndex int, buffer *bytes.Buffer) (
|
|
word string, newStartIndex int, err error,
|
|
) {
|
|
if input[startIndex:] == "" {
|
|
return "", 0, fmt.Errorf("%w", ErrEscapeUnterminated)
|
|
}
|
|
character, runeLength := utf8.DecodeRuneInString(input[startIndex:])
|
|
if character != '\n' { // backslash-escaped newline is ignored
|
|
buffer.WriteString(input[startIndex : startIndex+runeLength])
|
|
}
|
|
startIndex += runeLength
|
|
return splitWord(input, startIndex, buffer)
|
|
}
|