workgroups/vendor/gocloud.dev/internal/escape/escape.go

215 lines
6.1 KiB
Go
Raw Normal View History

2021-09-24 17:34:17 +02:00
// Copyright 2019 The Go Cloud Development Kit Authors
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// https://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
// Package escape includes helpers for escaping and unescaping strings.
package escape
import (
"fmt"
"net/url"
"strconv"
"strings"
)
// NonUTF8String is a string for which utf8.ValidString returns false.
const NonUTF8String = "\xbd\xb2"
// IsASCIIAlphanumeric returns true iff r is alphanumeric: a-z, A-Z, 0-9.
func IsASCIIAlphanumeric(r rune) bool {
switch {
case 'A' <= r && r <= 'Z':
return true
case 'a' <= r && r <= 'z':
return true
case '0' <= r && r <= '9':
return true
}
return false
}
// HexEscape returns s, with all runes for which shouldEscape returns true
// escaped to "__0xXXX__", where XXX is the hex representation of the rune
// value. For example, " " would escape to "__0x20__".
//
// Non-UTF-8 strings will have their non-UTF-8 characters escaped to
// unicode.ReplacementChar; the original value is lost. Please file an
// issue if you need non-UTF8 support.
//
// Note: shouldEscape takes the whole string as a slice of runes and an
// index. Passing it a single byte or a single rune doesn't provide
// enough context for some escape decisions; for example, the caller might
// want to escape the second "/" in "//" but not the first one.
// We pass a slice of runes instead of the string or a slice of bytes
// because some decisions will be made on a rune basis (e.g., encode
// all non-ASCII runes).
func HexEscape(s string, shouldEscape func(s []rune, i int) bool) string {
// Do a first pass to see which runes (if any) need escaping.
runes := []rune(s)
var toEscape []int
for i := range runes {
if shouldEscape(runes, i) {
toEscape = append(toEscape, i)
}
}
if len(toEscape) == 0 {
return s
}
// Each escaped rune turns into at most 14 runes ("__0x7fffffff__"),
// so allocate an extra 13 for each. We'll reslice at the end
// if we didn't end up using them.
escaped := make([]rune, len(runes)+13*len(toEscape))
n := 0 // current index into toEscape
j := 0 // current index into escaped
for i, r := range runes {
if n < len(toEscape) && i == toEscape[n] {
// We were asked to escape this rune.
for _, x := range fmt.Sprintf("__%#x__", r) {
escaped[j] = x
j++
}
n++
} else {
escaped[j] = r
j++
}
}
return string(escaped[0:j])
}
// unescape tries to unescape starting at r[i].
// It returns a boolean indicating whether the unescaping was successful,
// and (if true) the unescaped rune and the last index of r that was used
// during unescaping.
func unescape(r []rune, i int) (bool, rune, int) {
// Look for "__0x".
if r[i] != '_' {
return false, 0, 0
}
i++
if i >= len(r) || r[i] != '_' {
return false, 0, 0
}
i++
if i >= len(r) || r[i] != '0' {
return false, 0, 0
}
i++
if i >= len(r) || r[i] != 'x' {
return false, 0, 0
}
i++
// Capture the digits until the next "_" (if any).
var hexdigits []rune
for ; i < len(r) && r[i] != '_'; i++ {
hexdigits = append(hexdigits, r[i])
}
// Look for the trailing "__".
if i >= len(r) || r[i] != '_' {
return false, 0, 0
}
i++
if i >= len(r) || r[i] != '_' {
return false, 0, 0
}
// Parse the hex digits into an int32.
retval, err := strconv.ParseInt(string(hexdigits), 16, 32)
if err != nil {
return false, 0, 0
}
return true, rune(retval), i
}
// HexUnescape reverses HexEscape.
func HexUnescape(s string) string {
var unescaped []rune
runes := []rune(s)
for i := 0; i < len(runes); i++ {
if ok, newR, newI := unescape(runes, i); ok {
// We unescaped some runes starting at i, resulting in the
// unescaped rune newR. The last rune used was newI.
if unescaped == nil {
// This is the first rune we've encountered that
// needed unescaping. Allocate a buffer and copy any
// previous runes.
unescaped = make([]rune, i)
copy(unescaped, runes)
}
unescaped = append(unescaped, newR)
i = newI
} else if unescaped != nil {
unescaped = append(unescaped, runes[i])
}
}
if unescaped == nil {
return s
}
return string(unescaped)
}
// URLEscape uses url.PathEscape to escape s.
func URLEscape(s string) string {
return url.PathEscape(s)
}
// URLUnescape reverses URLEscape using url.PathUnescape. If the unescape
// returns an error, it returns s.
func URLUnescape(s string) string {
if u, err := url.PathUnescape(s); err == nil {
return u
}
return s
}
func makeASCIIString(start, end int) string {
var s []byte
for i := start; i < end; i++ {
if i >= 'a' && i <= 'z' {
continue
}
if i >= 'A' && i <= 'Z' {
continue
}
if i >= '0' && i <= '9' {
continue
}
s = append(s, byte(i))
}
return string(s)
}
// WeirdStrings are unusual/weird strings for use in testing escaping.
// The keys are descriptive strings, the values are the weird strings.
var WeirdStrings = map[string]string{
"fwdslashes": "foo/bar/baz",
"repeatedfwdslashes": "foo//bar///baz",
"dotdotslash": "../foo/../bar/../../baz../",
"backslashes": "foo\\bar\\baz",
"repeatedbackslashes": "..\\foo\\\\bar\\\\\\baz",
"dotdotbackslash": "..\\foo\\..\\bar\\..\\..\\baz..\\",
"quote": "foo\"bar\"baz",
"spaces": "foo bar baz",
"startwithdigit": "12345",
"unicode": strings.Repeat("☺", 3),
// The ASCII characters 0-128, split up to avoid the possibly-escaped
// versions from getting too long.
"ascii-1": makeASCIIString(0, 16),
"ascii-2": makeASCIIString(16, 32),
"ascii-3": makeASCIIString(32, 48),
"ascii-4": makeASCIIString(48, 64),
"ascii-5": makeASCIIString(64, 80),
"ascii-6": makeASCIIString(80, 96),
"ascii-7": makeASCIIString(96, 112),
"ascii-8": makeASCIIString(112, 128),
}