Skip to content

Commit

Permalink
utf7: simplify implementation
Browse files Browse the repository at this point in the history
Closes: #609
  • Loading branch information
emersion committed Apr 17, 2024
1 parent 8a0126f commit 1e3a4df
Show file tree
Hide file tree
Showing 10 changed files with 54 additions and 164 deletions.
2 changes: 0 additions & 2 deletions go.mod
Original file line number Diff line number Diff line change
Expand Up @@ -2,8 +2,6 @@ module github.com/emersion/go-imap/v2

go 1.18

require golang.org/x/text v0.14.0

require (
github.com/emersion/go-message v0.18.0
github.com/emersion/go-sasl v0.0.0-20231106173351-e73c9f7bad43
Expand Down
1 change: 0 additions & 1 deletion go.sum
Original file line number Diff line number Diff line change
Expand Up @@ -29,7 +29,6 @@ golang.org/x/text v0.3.0/go.mod h1:NqM8EUOU14njkJ3fqMW+pc6Ldnwhi/IjpwHt7yyuwOQ=
golang.org/x/text v0.3.3/go.mod h1:5Zoc/QRtKVWzQhOtBMvqHzDpF6irO9z98xDceosuGiQ=
golang.org/x/text v0.3.7/go.mod h1:u+2+/6zg+i71rQMx5EYifcz6MCKuco9NR6JIITiCfzQ=
golang.org/x/text v0.7.0/go.mod h1:mrYo+phRRbMaCq/xk9113O4dZlRixOauAjOtrjsXDZ8=
golang.org/x/text v0.14.0 h1:ScX5w1eTa3QqT8oi6+ziP7dTV1S2+ALU0bI+0zXKWiQ=
golang.org/x/text v0.14.0/go.mod h1:18ZOQIKpY8NJVqYksKHtTdi31H5itFRjB5/qKTNYzSU=
golang.org/x/tools v0.0.0-20180917221912-90fa682c2a6e/go.mod h1:n7NCudcB/nEzxVGmLbDWY5pfWTLqBcC2KZ6jyYvM4mQ=
golang.org/x/tools v0.0.0-20191119224855-298f0cb1881e/go.mod h1:b+2E5dAYhXwXZwtnZ6UAqBI28+e2cm9otk0dWdXHAEo=
Expand Down
2 changes: 1 addition & 1 deletion imapserver/list.go
Original file line number Diff line number Diff line change
Expand Up @@ -205,7 +205,7 @@ func readListMailbox(dec *imapwire.Decoder) (string, error) {
return "", dec.Err()
}
}
return utf7.Encoding.NewDecoder().String(mailbox)
return utf7.Decode(mailbox)
}

func isListChar(ch byte) bool {
Expand Down
2 changes: 1 addition & 1 deletion internal/imapwire/decoder.go
Original file line number Diff line number Diff line change
Expand Up @@ -502,7 +502,7 @@ func (dec *Decoder) ExpectMailbox(ptr *string) bool {
*ptr = "INBOX"
return true
}
name, err := utf7.Encoding.NewDecoder().String(name)
name, err := utf7.Decode(name)
if err == nil {
*ptr = name
}
Expand Down
6 changes: 3 additions & 3 deletions internal/imapwire/encoder.go
Original file line number Diff line number Diff line change
Expand Up @@ -153,11 +153,11 @@ func (enc *Encoder) Mailbox(name string) *Encoder {
if strings.EqualFold(name, "INBOX") {
return enc.Atom("INBOX")
} else {
utf7Enc := utf7.Encoding
if enc.QuotedUTF8 {
utf7Enc = utf7.AcceptUTF8Encoding
name = utf7.Escape(name)
} else {
name = utf7.Encode(name)
}
name, _ = utf7Enc.NewEncoder().String(name)
return enc.String(name)
}
}
Expand Down
91 changes: 29 additions & 62 deletions internal/utf7/decoder.go
Original file line number Diff line number Diff line change
Expand Up @@ -2,105 +2,72 @@ package utf7

import (
"errors"
"strings"
"unicode/utf16"
"unicode/utf8"

"golang.org/x/text/transform"
)

// ErrInvalidUTF7 means that a transformer encountered invalid UTF-7.
// ErrInvalidUTF7 means that a decoder encountered invalid UTF-7.
var ErrInvalidUTF7 = errors.New("utf7: invalid UTF-7")

type decoder struct {
ascii bool
}
// Decode decodes a string encoded with modified UTF-7.
//
// Note, raw UTF-8 is accepted.
func Decode(src string) (string, error) {
if !utf8.ValidString(src) {
return "", errors.New("invalid UTF-8")
}

func (d *decoder) Transform(dst, src []byte, atEOF bool) (nDst, nSrc int, err error) {
var sb strings.Builder
sb.Grow(len(src))

ascii := true
for i := 0; i < len(src); i++ {
ch := src[i]

if ch < min || (ch > max && ch < utf8.RuneSelf) {
// Illegal code point in ASCII mode. Note, UTF-8 codepoints are
// always allowed.
err = ErrInvalidUTF7
return
return "", ErrInvalidUTF7
}

if ch != '&' {
if nDst+1 > len(dst) {
err = transform.ErrShortDst
return
}

nSrc++

dst[nDst] = ch
nDst++

d.ascii = true
sb.WriteByte(ch)
ascii = true
continue
}

// Find the end of the Base64 or "&-" segment
start := i + 1
for i++; i < len(src) && src[i] != '-'; i++ {
if src[i] == '\r' || src[i] == '\n' { // base64 package ignores CR and LF
err = ErrInvalidUTF7
return
return "", ErrInvalidUTF7
}
}

if i == len(src) { // Implicit shift ("&...")
if atEOF {
err = ErrInvalidUTF7
} else {
err = transform.ErrShortSrc
}
return
return "", ErrInvalidUTF7
}

var b []byte
if i == start { // Escape sequence "&-"
b = []byte{'&'}
d.ascii = true
sb.WriteByte('&')
ascii = true
} else { // Control or non-ASCII code points in base64
if !d.ascii { // Null shift ("&...-&...-")
err = ErrInvalidUTF7
return
if !ascii { // Null shift ("&...-&...-")
return "", ErrInvalidUTF7
}

b = decode(src[start:i])
d.ascii = false
}

if len(b) == 0 { // Bad encoding
err = ErrInvalidUTF7
return
}

if nDst+len(b) > len(dst) {
d.ascii = true
err = transform.ErrShortDst
return
}

nSrc = i + 1
b := decode([]byte(src[start:i]))
if len(b) == 0 { // Bad encoding
return "", ErrInvalidUTF7
}
sb.Write(b)

for _, ch := range b {
dst[nDst] = ch
nDst++
ascii = false
}
}

if atEOF {
d.ascii = true
}

return
}

func (d *decoder) Reset() {
d.ascii = true
return sb.String(), nil
}

// Extracts UTF-16-BE bytes from base64 data and converts them to UTF-8.
Expand Down
4 changes: 1 addition & 3 deletions internal/utf7/decoder_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -99,10 +99,8 @@ var decode = []struct {
}

func TestDecoder(t *testing.T) {
dec := utf7.Encoding.NewDecoder()

for _, test := range decode {
out, err := dec.String(test.in)
out, err := utf7.Decode(test.in)
if out != test.out {
t.Errorf("UTF7Decode(%+q) expected %+q; got %+q", test.in, test.out, out)
}
Expand Down
68 changes: 18 additions & 50 deletions internal/utf7/encoder.go
Original file line number Diff line number Diff line change
@@ -1,23 +1,23 @@
package utf7

import (
"strings"
"unicode/utf16"
"unicode/utf8"

"golang.org/x/text/transform"
)

type encoder struct{}
// Encode encodes a string with modified UTF-7.
func Encode(src string) string {
var sb strings.Builder
sb.Grow(len(src))

func (e *encoder) Transform(dst, src []byte, atEOF bool) (nDst, nSrc int, err error) {
for i := 0; i < len(src); {
ch := src[i]

var b []byte
if min <= ch && ch <= max {
b = []byte{ch}
sb.WriteByte(ch)
if ch == '&' {
b = append(b, '-')
sb.WriteByte('-')
}

i++
Expand All @@ -30,32 +30,13 @@ func (e *encoder) Transform(dst, src []byte, atEOF bool) (nDst, nSrc int, err er
i++
}

if !atEOF && i == len(src) {
err = transform.ErrShortSrc
return
}

b = encode(src[start:i])
}

if nDst+len(b) > len(dst) {
err = transform.ErrShortDst
return
}

nSrc = i

for _, ch := range b {
dst[nDst] = ch
nDst++
sb.Write(encode([]byte(src[start:i])))
}
}

return
return sb.String()
}

func (e *encoder) Reset() {}

// Converts string s from UTF-8 to UTF-16-BE, encodes the result as base64,
// removes the padding, and adds UTF-7 shifts.
func encode(s []byte) []byte {
Expand Down Expand Up @@ -90,31 +71,18 @@ func encode(s []byte) []byte {
return b64
}

type escaper struct{}
// Escape passes through raw UTF-8 as-is and escapes the special UTF-7 marker
// (the ampersand character).
func Escape(src string) string {
var sb strings.Builder
sb.Grow(len(src))

func (e *escaper) Transform(dst, src []byte, atEOF bool) (nDst, nSrc int, err error) {
for i := 0; i < len(src); {
ch := src[i]
i++

b := []byte{ch}
for _, ch := range src {
sb.WriteRune(ch)
if ch == '&' {
b = append(b, '-')
}

if nDst+len(b) > len(dst) {
return nDst, nSrc, transform.ErrShortDst
}

nSrc = i

for _, ch := range b {
dst[nDst] = ch
nDst++
sb.WriteByte('-')
}
}

return nDst, nSrc, nil
return sb.String()
}

func (e *escaper) Reset() {}
4 changes: 1 addition & 3 deletions internal/utf7/encoder_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -115,10 +115,8 @@ var encode = []struct {
}

func TestEncoder(t *testing.T) {
enc := utf7.Encoding.NewEncoder()

for _, test := range encode {
out, _ := enc.String(test.in)
out := utf7.Encode(test.in)
if out != test.out {
t.Errorf("UTF7Encode(%+q) expected %+q; got %+q", test.in, test.out, out)
}
Expand Down
38 changes: 0 additions & 38 deletions internal/utf7/utf7.go
Original file line number Diff line number Diff line change
Expand Up @@ -3,9 +3,6 @@ package utf7

import (
"encoding/base64"

"golang.org/x/text/encoding"
"golang.org/x/text/transform"
)

const (
Expand All @@ -16,38 +13,3 @@ const (
)

var b64Enc = base64.NewEncoding("ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789+,")

type enc struct{}

func (e enc) NewDecoder() *encoding.Decoder {
return &encoding.Decoder{
Transformer: transform.Chain(encoding.UTF8Validator, &decoder{ascii: true}),
}
}

func (e enc) NewEncoder() *encoding.Encoder {
return &encoding.Encoder{
Transformer: &encoder{},
}
}

// Encoding is the modified UTF-7 encoding.
//
// Note, raw UTF-8 is accepted when decoding.
var Encoding encoding.Encoding = enc{}

type acceptUTF8Enc struct{}

func (e acceptUTF8Enc) NewDecoder() *encoding.Decoder {
return Encoding.NewDecoder()
}

func (e acceptUTF8Enc) NewEncoder() *encoding.Encoder {
return &encoding.Encoder{
Transformer: transform.Chain(encoding.UTF8Validator, &escaper{}),
}
}

// AcceptUTF8Encoding is an encoding whose encoder passes through raw UTF-8
// as-is, only escaping the special UTF-7 marker (ampersand).
var AcceptUTF8Encoding encoding.Encoding = acceptUTF8Enc{}

0 comments on commit 1e3a4df

Please sign in to comment.