From 5dc00375d9a19214a4d63eba99871d3dd6d42946 Mon Sep 17 00:00:00 2001 From: Neil Date: Fri, 6 Sep 2019 15:44:44 -0400 Subject: [PATCH 1/5] make line wrapping common for base64 and plain text --- encode.go | 50 +++++++++++++++++++++++++++++++++++++++----------- 1 file changed, 39 insertions(+), 11 deletions(-) diff --git a/encode.go b/encode.go index b1e90a2c..b577ae8e 100644 --- a/encode.go +++ b/encode.go @@ -18,6 +18,9 @@ import ( // from quoted-printable to base64 encoding. const b64Percent = 20 +// lineWrapLength is the length at which we wrap base64 and plain text content +const lineWrapLength = 76 + type transferEncoding byte const ( @@ -159,17 +162,10 @@ func (p *Part) encodeContent(b *bufio.Writer, cte transferEncoding) (err error) text := make([]byte, enc.EncodedLen(len(p.Content))) base64.StdEncoding.Encode(text, p.Content) // Wrap lines. - lineLen := 76 - for len(text) > 0 { - if lineLen > len(text) { - lineLen = len(text) - } - if _, err = b.Write(text[:lineLen]); err != nil { - return err - } - b.Write(crnl) - text = text[lineLen:] + if err := wrapContent(text, b); err != nil { + return err } + //b.Write(crnl) case teQuoted: qp := quotedprintable.NewWriter(b) if _, err = qp.Write(p.Content); err != nil { @@ -177,7 +173,14 @@ func (p *Part) encodeContent(b *bufio.Writer, cte transferEncoding) (err error) } err = qp.Close() default: - _, err = b.Write(p.Content) + if p.ContentType == ctTextPlain { + // Wrap lines. + if err := wrapContent(p.Content, b); err != nil { + return err + } + } else { + _, err = b.Write(p.Content) + } } return err } @@ -213,3 +216,28 @@ func setParamValue(p map[string]string, k, v string) { p[k] = v } } + +func wrapContent(text []byte, b *bufio.Writer) error { + lineLen := lineWrapLength + //if len(text) < lineLen { + // if _, err := b.Write(text); err != nil { + // return err + // } + // return nil + //} + beginning := true + for len(text) > 0 { + if !beginning { + b.Write(crnl) + } + if lineLen > len(text) { + lineLen = len(text) + } + if _, err := b.Write(text[:lineLen]); err != nil { + return err + } + text = text[lineLen:] + beginning = false + } + return nil +} From 39303e2b544fa60a3402fe3272019ca1570ecb95 Mon Sep 17 00:00:00 2001 From: Neil Date: Fri, 6 Sep 2019 16:23:42 -0400 Subject: [PATCH 2/5] RFC822 document does not require naked CRLF terminator --- encode_test.go | 4 ++-- testdata/encode/part-bin-content.golden | 2 +- testdata/encode/part-bin-header.golden | 2 +- testdata/encode/part-default-headers.golden | 2 +- testdata/encode/part-plain.golden | 2 +- testdata/encode/part-quoted-headers.golden | 2 +- 6 files changed, 7 insertions(+), 7 deletions(-) diff --git a/encode_test.go b/encode_test.go index fe039b72..f120ed12 100644 --- a/encode_test.go +++ b/encode_test.go @@ -77,7 +77,7 @@ func TestEncodePartBinaryHeader(t *testing.T) { 0xa2, 0xb2, 0xc0, 0x90, 0x59, 0xe3, 0x35, 0xf8, 0x60, 0xb7, 0xb1, 0x63, 0x77, 0xd7, 0x5f, 0x92, 0x58, 0xa8, 0x75, })) - p.Content = []byte("This is a test of a plain text part.\r\n\r\nAnother line.\r\n") + p.Content = []byte("This is a test of a plain text part.\r\n\r\nAnother line.") b := &bytes.Buffer{} err := p.Encode(b) @@ -101,7 +101,7 @@ func TestEncodePartContentOnly(t *testing.T) { func TestEncodePartPlain(t *testing.T) { p := enmime.NewPart("text/plain") - p.Content = []byte("This is a test of a plain text part.\r\n\r\nAnother line.\r\n") + p.Content = []byte("This is a test of a plain text part.\r\n\r\nAnother line.") b := &bytes.Buffer{} err := p.Encode(b) diff --git a/testdata/encode/part-bin-content.golden b/testdata/encode/part-bin-content.golden index 2dea714f..204348f8 100644 --- a/testdata/encode/part-bin-content.golden +++ b/testdata/encode/part-bin-content.golden @@ -36,4 +36,4 @@ rq+wsbKztLW2t7i5uru8vb6/wMHCw8TFxsfIycrLzM3Oz9DR0tPU1dbX2Nna29zd3t/g4eLj5OXm ICEiIyQlJicoKSorLC0uLzAxMjM0NTY3ODk6Ozw9Pj9AQUJDREVGR0hJSktMTU5PUFFSU1RVVldY WVpbXF1eX2BhYmNkZWZnaGlqa2xtbm9wcXJzdHV2d3h5ent8fX5/gIGCg4SFhoeIiYqLjI2Oj5CR kpOUlZaXmJmam5ydnp+goaKjpKWmp6ipqqusra6vsLGys7S1tre4ubq7vL2+v8DBwsPExcbHyMnK -y8zNzs8= +y8zNzs8= \ No newline at end of file diff --git a/testdata/encode/part-bin-header.golden b/testdata/encode/part-bin-header.golden index 4f8f5d6a..d1b671e2 100644 --- a/testdata/encode/part-bin-header.golden +++ b/testdata/encode/part-bin-header.golden @@ -5,4 +5,4 @@ X-Data: =?utf-8?b?AxfhfujropadladnggnfjgwsaiubvnmkadiuhterqHJSFfuAjkfhrqpeorLA?= This is a test of a plain text part. -Another line. +Another line. \ No newline at end of file diff --git a/testdata/encode/part-default-headers.golden b/testdata/encode/part-default-headers.golden index 382dcfdf..97b495c9 100644 --- a/testdata/encode/part-default-headers.golden +++ b/testdata/encode/part-default-headers.golden @@ -5,4 +5,4 @@ Content-Transfer-Encoding: base64 Content-Type: application/zip; boundary=enmime-abcdefg0123456789; charset=binary; name=stuff.zip -WklQWklQWklQ +WklQWklQWklQ \ No newline at end of file diff --git a/testdata/encode/part-plain.golden b/testdata/encode/part-plain.golden index 1d41436a..7ff4ead9 100644 --- a/testdata/encode/part-plain.golden +++ b/testdata/encode/part-plain.golden @@ -2,4 +2,4 @@ Content-Type: text/plain; charset=utf-8 This is a test of a plain text part. -Another line. +Another line. \ No newline at end of file diff --git a/testdata/encode/part-quoted-headers.golden b/testdata/encode/part-quoted-headers.golden index d7c62934..e123c2eb 100644 --- a/testdata/encode/part-quoted-headers.golden +++ b/testdata/encode/part-quoted-headers.golden @@ -5,4 +5,4 @@ Content-Transfer-Encoding: base64 Content-Type: application/zip; boundary=enmime-abcdefg0123456789; charset=binary; name="arvizturo \"x\" tukorfurogep.zip" -WklQWklQWklQ +WklQWklQWklQ \ No newline at end of file From c8575e7c91ee8e598cbb704c54ac33b8315ad3df Mon Sep 17 00:00:00 2001 From: Neil Date: Fri, 6 Sep 2019 16:26:19 -0400 Subject: [PATCH 3/5] removed commented-out code --- encode.go | 6 ------ 1 file changed, 6 deletions(-) diff --git a/encode.go b/encode.go index b577ae8e..c8a45575 100644 --- a/encode.go +++ b/encode.go @@ -219,12 +219,6 @@ func setParamValue(p map[string]string, k, v string) { func wrapContent(text []byte, b *bufio.Writer) error { lineLen := lineWrapLength - //if len(text) < lineLen { - // if _, err := b.Write(text); err != nil { - // return err - // } - // return nil - //} beginning := true for len(text) > 0 { if !beginning { From 826f9c50c710b91b07abab77bd24318ec1fed486 Mon Sep 17 00:00:00 2001 From: Neil Date: Fri, 6 Sep 2019 16:38:57 -0400 Subject: [PATCH 4/5] check error when writing CRLF for line wrapping --- encode.go | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/encode.go b/encode.go index c8a45575..0fedce1e 100644 --- a/encode.go +++ b/encode.go @@ -222,7 +222,9 @@ func wrapContent(text []byte, b *bufio.Writer) error { beginning := true for len(text) > 0 { if !beginning { - b.Write(crnl) + if _, err := b.Write(crnl); err != nil { + return err + } } if lineLen > len(text) { lineLen = len(text) From ededc16878caa9fec74d6179c08d3d34ef2dc7c2 Mon Sep 17 00:00:00 2001 From: Neil Date: Tue, 26 Nov 2019 18:15:03 -0500 Subject: [PATCH 5/5] initial port of flowed-text utility functions --- internal/stringutil/flow.go | 232 +++++++++++++++++++++++++++++++ internal/stringutil/flow_test.go | 25 ++++ 2 files changed, 257 insertions(+) create mode 100644 internal/stringutil/flow.go create mode 100644 internal/stringutil/flow_test.go diff --git a/internal/stringutil/flow.go b/internal/stringutil/flow.go new file mode 100644 index 00000000..e26f94e1 --- /dev/null +++ b/internal/stringutil/flow.go @@ -0,0 +1,232 @@ +package stringutil + +import ( + "regexp" + "strings" +) + +const ( + rfc2646Space = " " + rfc2646Quote = ">" + rfc2646Signature = "-- " + rfc2646CRLF = "\r\n" + rfc2646From = "From " + rfc2646Width = 78 +) + +var lineTerm = regexp.MustCompile("\r\n|\n") + +// Deflow decodes a text previously wrapped using "format=flowed". +// +// In order to decode, the input text must belong to a mail with headers similar to: +// Content-Type: text/plain; charset="CHARSET"; [delsp="yes|no"; ]format="flowed" +// (the quotes around CHARSET are not mandatory). +// Furthermore the header Content-Transfer-Encoding MUST NOT BE Quoted-Printable +// (see RFC3676 paragraph 4.2).(In fact this happens often for non 7bit messages). +func Deflow(text string, delSp bool) string { + lines := regSplitAfter(text) + var result *strings.Builder + resultLine := &strings.Builder{} + resultLineQuoteDepth := 0 + resultLineFlowed := false + var line *string + for i := 0; i <= len(lines); i++ { + if i < len(lines) { + line = &lines[i] + } else { + line = nil + } + actualQuoteDepth := 0 + if line != nil && len(*line) > 0 { + tmpString := *line + if tmpString == rfc2646Signature { + // signature handling (the previous line is not flowed) + resultLineFlowed = false + } + if strings.HasPrefix(*line, rfc2646Quote) { + // Quote + actualQuoteDepth = 1 + for actualQuoteDepth < len(tmpString) && string(tmpString[actualQuoteDepth]) == rfc2646Quote { + actualQuoteDepth++ + } + // if quote-depth changes wrt the previous line then this is not flowed + if resultLineQuoteDepth != actualQuoteDepth { + resultLineFlowed = false + } + tmpString = tmpString[actualQuoteDepth:] + line = &tmpString + } else { + // id quote-depth changes wrt the first line then this is not flowed + if resultLineQuoteDepth > 0 { + resultLineFlowed = false + } + } + + if len(tmpString) > 0 && strings.HasPrefix(tmpString, rfc2646Space) { + // Line space-stuffed + tmpString = tmpString[1:] + line = &tmpString + } + } + + // If the previous line was the last then it was not flowed. + if line == nil { + resultLineFlowed = false + } + + // Add the PREVIOUS line. + // This often will find the flow looking for a space as the last char of the line. + // With quote changes or signatures it could be the following line to void the flow. + if !resultLineFlowed && i > 0 { + for j := 0; j < resultLineQuoteDepth; j++ { + resultLine.WriteString(rfc2646Quote) + } + if resultLineQuoteDepth > 0 { + resultLine.WriteString(rfc2646Space) + } + if result == nil { + result = &strings.Builder{} + } else { + result.WriteString(rfc2646CRLF) + } + result.WriteString(resultLine.String()) + resultLine = &strings.Builder{} + resultLineFlowed = false + } + resultLineQuoteDepth = actualQuoteDepth + + if line != nil { + if !(*line == rfc2646Signature) && strings.HasSuffix(*line, rfc2646Space) && i < len(lines)-1 { + // Line flowed (NOTE: for the split operation the line having i == len(lines) is the last that does not end with rfc2646CRLF) + if delSp { + tmpString := *line + tmpString = tmpString[:len(tmpString)-1] + line = &tmpString + } + resultLineFlowed = true + } else { + resultLineFlowed = false + } + + resultLine.WriteString(*line) + } + } + + if result == nil { + result = &strings.Builder{} + } + + return result.String() +} + +// Flow encodes a text (using standard width). +// +// When encoding the input text will be changed eliminating every space found before CRLF, +// otherwise it won't be possible to recognize hard breaks from soft breaks. +// In this scenario encoding and decoding a message will not return a message identical to +// the original (lines with hard breaks will be trimmed). +func Flow(text string, delSp bool) string { + return FlowN(text, delSp, rfc2646Width) +} + +// Flow encodes a text (using N with). +// +// When encoding the input text will be changed eliminating every space found before CRLF, +// otherwise it won't be possible to recognize hard breaks from soft breaks. +// In this scenario encoding and decoding a message will not return a message identical to +// the original (lines with hard breaks will be trimmed). +func FlowN(text string, delSp bool, n int) string { + result := &strings.Builder{} + lines := regSplitAfter(text) + for i := 0; i < len(lines); i++ { + line := lines[i] + notEmpty := len(line) > 0 + quoteDepth := 0 + for quoteDepth < len(line) && string(line[quoteDepth]) == rfc2646Quote { + quoteDepth++ + } + if quoteDepth > 0 { + if quoteDepth+1 < len(line) && string(line[quoteDepth]) == rfc2646Space { + line = line[quoteDepth+1:] + } else { + line = line[quoteDepth:] + } + } + for notEmpty { + extra := 0 + if quoteDepth == 0 { + if strings.HasPrefix(line, rfc2646Space) || strings.HasPrefix(line, rfc2646Quote) || strings.HasPrefix(line, rfc2646From) { + line = rfc2646Space + line + extra = 1 + } + } else { + line = rfc2646Space + line + for j := 0; j < quoteDepth; j++ { + line = rfc2646Space + line + } + extra = quoteDepth + 1 + } + + j := n - 1 + if j > len(line) { + j = len(line) - 1 + } else { + for j >= extra && (delSp && isAlphaChar(text, j)) || (!delSp && string(line[j]) != rfc2646Space) { + j-- + } + if j < extra { + // Not able to cut a word: skip to word end even if greater than the max width + j = n - 1 + for j < len(line)-1 && (delSp && isAlphaChar(text, j)) || (!delSp && string(line[j]) != rfc2646Space) { + j++ + } + } + } + + result.WriteString(line[:j+1]) + if j < len(line)-1 { + if delSp { + result.WriteString(rfc2646Space) + } + result.WriteString(rfc2646CRLF) + } + line = line[j+1:] + notEmpty = len(line) > 0 + } + + if i < len(lines)-1 { + // NOTE: Have to trim the spaces before, otherwise it won't recognize soft-break from hard break. + // Deflow of flowed message will not be identical to the original. + for result.Len() > 0 && string(result.String()[result.Len()-1]) == rfc2646Space { + result.WriteString(rfc2646CRLF) + } + } + } + + return result.String() +} + +// isAlphaChar checks whether the char is part of a word. +// RFC asserts a word cannot be split (even if the length is greater than the maximum length). +func isAlphaChar(text string, index int) bool { + // Note: a list of chars is available here: + // http://www.zvon.org/tmRFC/RFC2646/Output/index.html + c := text[index] + return (c >= 'a' && c <= 'z') || (c >= 'A' && c <= 'Z') || (c >= '0' && c <= '9') +} + +func regSplitAfter(s string) []string { + var ( + r []string + p int + ) + is := lineTerm.FindAllStringIndex(s, -1) + if is == nil { + return append(r, s) + } + for _, i := range is { + r = append(r, s[p:i[1]]) + p = i[1] + } + return append(r, s[p:]) +} diff --git a/internal/stringutil/flow_test.go b/internal/stringutil/flow_test.go new file mode 100644 index 00000000..aa166373 --- /dev/null +++ b/internal/stringutil/flow_test.go @@ -0,0 +1,25 @@ +package stringutil + +import "testing" + +func TestFlowN(t *testing.T) { + have := "`Take some more tea,' the March Hare said to Alice, very earnestly.\r\n\r\n`I've had nothing yet,' Alice replied in an offended tone, `so I can't take more.'\r\n\r\n`You mean you can't take LESS,' said the Hatter: `it's very easy to take MORE than nothing.'\r\n" + want := "`Take some more tea,' the March Hare said to Alice, very \r\nearnestly.\r\n\r\n`I've had nothing yet,' Alice replied in an offended tone, `so \r\nI can't take more.'\r\n\r\n`You mean you can't take LESS,' said the Hatter: `it's very \r\neasy to take MORE than nothing.'\r\n" + flowed := FlowN(have, false, 63) + t.Logf("%q", flowed) + t.Logf("\n%s", flowed) + if flowed != want { + t.Fatal("Flowed text output did not match expected result") + } +} + +//func TestDeflow(t *testing.T) { +// have := ">>>Take some more tea.\r\n>>T've had nothing yet, so I can't take more.\r\n>You mean you can't take LESS, it's very easy to take MORE than nothing.\r\n" +// want := " Take some more tea.\r\n T've had nothing yet, so I can't take more.\r\n You mean you can't take LESS, it's very easy to take \r\n MORE than nothing.\r\n" +// flowed := FlowN(have, false, 58) +// t.Logf("%q", flowed) +// t.Logf("\n%s", flowed) +// if flowed != want { +// t.Fatal("Flowed text output did not match expected result") +// } +//}