Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

WIP: Line wrapping for text/plain content #135

Closed
wants to merge 5 commits into from
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
46 changes: 35 additions & 11 deletions encode.go
Original file line number Diff line number Diff line change
Expand Up @@ -18,6 +18,9 @@ import (
// from quoted-printable to base64 encoding.
const b64Percent = 20

// lineWrapLength is the length at which we wrap base64 and plain text content
const lineWrapLength = 76

type transferEncoding byte

const (
Expand Down Expand Up @@ -159,25 +162,25 @@ func (p *Part) encodeContent(b *bufio.Writer, cte transferEncoding) (err error)
text := make([]byte, enc.EncodedLen(len(p.Content)))
base64.StdEncoding.Encode(text, p.Content)
// Wrap lines.
lineLen := 76
for len(text) > 0 {
if lineLen > len(text) {
lineLen = len(text)
}
if _, err = b.Write(text[:lineLen]); err != nil {
return err
}
b.Write(crnl)
text = text[lineLen:]
if err := wrapContent(text, b); err != nil {
return err
}
//b.Write(crnl)
case teQuoted:
qp := quotedprintable.NewWriter(b)
if _, err = qp.Write(p.Content); err != nil {
return err
}
err = qp.Close()
default:
_, err = b.Write(p.Content)
if p.ContentType == ctTextPlain {
// Wrap lines.
if err := wrapContent(p.Content, b); err != nil {
return err
}
} else {
_, err = b.Write(p.Content)
}
}
return err
}
Expand Down Expand Up @@ -213,3 +216,24 @@ func setParamValue(p map[string]string, k, v string) {
p[k] = v
}
}

func wrapContent(text []byte, b *bufio.Writer) error {
lineLen := lineWrapLength
beginning := true
for len(text) > 0 {
if !beginning {
if _, err := b.Write(crnl); err != nil {
return err
}
}
if lineLen > len(text) {
lineLen = len(text)
}
if _, err := b.Write(text[:lineLen]); err != nil {
return err
}
text = text[lineLen:]
beginning = false
}
return nil
}
4 changes: 2 additions & 2 deletions encode_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -77,7 +77,7 @@ func TestEncodePartBinaryHeader(t *testing.T) {
0xa2, 0xb2, 0xc0, 0x90, 0x59, 0xe3, 0x35, 0xf8, 0x60, 0xb7, 0xb1, 0x63, 0x77, 0xd7,
0x5f, 0x92, 0x58, 0xa8, 0x75,
}))
p.Content = []byte("This is a test of a plain text part.\r\n\r\nAnother line.\r\n")
p.Content = []byte("This is a test of a plain text part.\r\n\r\nAnother line.")

b := &bytes.Buffer{}
err := p.Encode(b)
Expand All @@ -101,7 +101,7 @@ func TestEncodePartContentOnly(t *testing.T) {

func TestEncodePartPlain(t *testing.T) {
p := enmime.NewPart("text/plain")
p.Content = []byte("This is a test of a plain text part.\r\n\r\nAnother line.\r\n")
p.Content = []byte("This is a test of a plain text part.\r\n\r\nAnother line.")

b := &bytes.Buffer{}
err := p.Encode(b)
Expand Down
232 changes: 232 additions & 0 deletions internal/stringutil/flow.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,232 @@
package stringutil

import (
"regexp"
"strings"
)

const (
rfc2646Space = " "
rfc2646Quote = ">"
rfc2646Signature = "-- "
rfc2646CRLF = "\r\n"
rfc2646From = "From "
rfc2646Width = 78
)

var lineTerm = regexp.MustCompile("\r\n|\n")

// Deflow decodes a text previously wrapped using "format=flowed".
//
// In order to decode, the input text must belong to a mail with headers similar to:
// Content-Type: text/plain; charset="CHARSET"; [delsp="yes|no"; ]format="flowed"
// (the quotes around CHARSET are not mandatory).
// Furthermore the header Content-Transfer-Encoding MUST NOT BE Quoted-Printable
// (see RFC3676 paragraph 4.2).(In fact this happens often for non 7bit messages).
func Deflow(text string, delSp bool) string {
lines := regSplitAfter(text)
var result *strings.Builder
resultLine := &strings.Builder{}
resultLineQuoteDepth := 0
resultLineFlowed := false
var line *string
for i := 0; i <= len(lines); i++ {
if i < len(lines) {
line = &lines[i]
} else {
line = nil
}
actualQuoteDepth := 0
if line != nil && len(*line) > 0 {
tmpString := *line
if tmpString == rfc2646Signature {
// signature handling (the previous line is not flowed)
resultLineFlowed = false
}
if strings.HasPrefix(*line, rfc2646Quote) {
// Quote
actualQuoteDepth = 1
for actualQuoteDepth < len(tmpString) && string(tmpString[actualQuoteDepth]) == rfc2646Quote {
actualQuoteDepth++
}
// if quote-depth changes wrt the previous line then this is not flowed
if resultLineQuoteDepth != actualQuoteDepth {
resultLineFlowed = false
}
tmpString = tmpString[actualQuoteDepth:]
line = &tmpString
} else {
// id quote-depth changes wrt the first line then this is not flowed
if resultLineQuoteDepth > 0 {
resultLineFlowed = false
}
}

if len(tmpString) > 0 && strings.HasPrefix(tmpString, rfc2646Space) {
// Line space-stuffed
tmpString = tmpString[1:]
line = &tmpString
}
}

// If the previous line was the last then it was not flowed.
if line == nil {
resultLineFlowed = false
}

// Add the PREVIOUS line.
// This often will find the flow looking for a space as the last char of the line.
// With quote changes or signatures it could be the following line to void the flow.
if !resultLineFlowed && i > 0 {
for j := 0; j < resultLineQuoteDepth; j++ {
resultLine.WriteString(rfc2646Quote)
}
if resultLineQuoteDepth > 0 {
resultLine.WriteString(rfc2646Space)
}
if result == nil {
result = &strings.Builder{}
} else {
result.WriteString(rfc2646CRLF)
}
result.WriteString(resultLine.String())
resultLine = &strings.Builder{}
resultLineFlowed = false
}
resultLineQuoteDepth = actualQuoteDepth

if line != nil {
if !(*line == rfc2646Signature) && strings.HasSuffix(*line, rfc2646Space) && i < len(lines)-1 {
// Line flowed (NOTE: for the split operation the line having i == len(lines) is the last that does not end with rfc2646CRLF)
if delSp {
tmpString := *line
tmpString = tmpString[:len(tmpString)-1]
line = &tmpString
}
resultLineFlowed = true
} else {
resultLineFlowed = false
}

resultLine.WriteString(*line)
}
}

if result == nil {
result = &strings.Builder{}
}

return result.String()
}

// Flow encodes a text (using standard width).
//
// When encoding the input text will be changed eliminating every space found before CRLF,
// otherwise it won't be possible to recognize hard breaks from soft breaks.
// In this scenario encoding and decoding a message will not return a message identical to
// the original (lines with hard breaks will be trimmed).
func Flow(text string, delSp bool) string {
return FlowN(text, delSp, rfc2646Width)
}

// Flow encodes a text (using N with).
//
// When encoding the input text will be changed eliminating every space found before CRLF,
// otherwise it won't be possible to recognize hard breaks from soft breaks.
// In this scenario encoding and decoding a message will not return a message identical to
// the original (lines with hard breaks will be trimmed).
func FlowN(text string, delSp bool, n int) string {
result := &strings.Builder{}
lines := regSplitAfter(text)
for i := 0; i < len(lines); i++ {
line := lines[i]
notEmpty := len(line) > 0
quoteDepth := 0
for quoteDepth < len(line) && string(line[quoteDepth]) == rfc2646Quote {
quoteDepth++
}
if quoteDepth > 0 {
if quoteDepth+1 < len(line) && string(line[quoteDepth]) == rfc2646Space {
line = line[quoteDepth+1:]
} else {
line = line[quoteDepth:]
}
}
for notEmpty {
extra := 0
if quoteDepth == 0 {
if strings.HasPrefix(line, rfc2646Space) || strings.HasPrefix(line, rfc2646Quote) || strings.HasPrefix(line, rfc2646From) {
line = rfc2646Space + line
extra = 1
}
} else {
line = rfc2646Space + line
for j := 0; j < quoteDepth; j++ {
line = rfc2646Space + line
}
extra = quoteDepth + 1
}

j := n - 1
if j > len(line) {
j = len(line) - 1
} else {
for j >= extra && (delSp && isAlphaChar(text, j)) || (!delSp && string(line[j]) != rfc2646Space) {
j--
}
if j < extra {
// Not able to cut a word: skip to word end even if greater than the max width
j = n - 1
for j < len(line)-1 && (delSp && isAlphaChar(text, j)) || (!delSp && string(line[j]) != rfc2646Space) {
j++
}
}
}

result.WriteString(line[:j+1])
if j < len(line)-1 {
if delSp {
result.WriteString(rfc2646Space)
}
result.WriteString(rfc2646CRLF)
}
line = line[j+1:]
notEmpty = len(line) > 0
}

if i < len(lines)-1 {
// NOTE: Have to trim the spaces before, otherwise it won't recognize soft-break from hard break.
// Deflow of flowed message will not be identical to the original.
for result.Len() > 0 && string(result.String()[result.Len()-1]) == rfc2646Space {
result.WriteString(rfc2646CRLF)
}
}
}

return result.String()
}

// isAlphaChar checks whether the char is part of a word.
// RFC asserts a word cannot be split (even if the length is greater than the maximum length).
func isAlphaChar(text string, index int) bool {
// Note: a list of chars is available here:
// http://www.zvon.org/tmRFC/RFC2646/Output/index.html
c := text[index]
return (c >= 'a' && c <= 'z') || (c >= 'A' && c <= 'Z') || (c >= '0' && c <= '9')
}

func regSplitAfter(s string) []string {
var (
r []string
p int
)
is := lineTerm.FindAllStringIndex(s, -1)
if is == nil {
return append(r, s)
}
for _, i := range is {
r = append(r, s[p:i[1]])
p = i[1]
}
return append(r, s[p:])
}
25 changes: 25 additions & 0 deletions internal/stringutil/flow_test.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,25 @@
package stringutil

import "testing"

func TestFlowN(t *testing.T) {
have := "`Take some more tea,' the March Hare said to Alice, very earnestly.\r\n\r\n`I've had nothing yet,' Alice replied in an offended tone, `so I can't take more.'\r\n\r\n`You mean you can't take LESS,' said the Hatter: `it's very easy to take MORE than nothing.'\r\n"
want := "`Take some more tea,' the March Hare said to Alice, very \r\nearnestly.\r\n\r\n`I've had nothing yet,' Alice replied in an offended tone, `so \r\nI can't take more.'\r\n\r\n`You mean you can't take LESS,' said the Hatter: `it's very \r\neasy to take MORE than nothing.'\r\n"
flowed := FlowN(have, false, 63)
t.Logf("%q", flowed)
t.Logf("\n%s", flowed)
if flowed != want {
t.Fatal("Flowed text output did not match expected result")
}
}

//func TestDeflow(t *testing.T) {
// have := ">>>Take some more tea.\r\n>>T've had nothing yet, so I can't take more.\r\n>You mean you can't take LESS, it's very easy to take MORE than nothing.\r\n"
// want := " Take some more tea.\r\n T've had nothing yet, so I can't take more.\r\n You mean you can't take LESS, it's very easy to take \r\n MORE than nothing.\r\n"
// flowed := FlowN(have, false, 58)
// t.Logf("%q", flowed)
// t.Logf("\n%s", flowed)
// if flowed != want {
// t.Fatal("Flowed text output did not match expected result")
// }
//}
2 changes: 1 addition & 1 deletion testdata/encode/part-bin-content.golden
Original file line number Diff line number Diff line change
Expand Up @@ -36,4 +36,4 @@ rq+wsbKztLW2t7i5uru8vb6/wMHCw8TFxsfIycrLzM3Oz9DR0tPU1dbX2Nna29zd3t/g4eLj5OXm
ICEiIyQlJicoKSorLC0uLzAxMjM0NTY3ODk6Ozw9Pj9AQUJDREVGR0hJSktMTU5PUFFSU1RVVldY
WVpbXF1eX2BhYmNkZWZnaGlqa2xtbm9wcXJzdHV2d3h5ent8fX5/gIGCg4SFhoeIiYqLjI2Oj5CR
kpOUlZaXmJmam5ydnp+goaKjpKWmp6ipqqusra6vsLGys7S1tre4ubq7vL2+v8DBwsPExcbHyMnK
y8zNzs8=
y8zNzs8=
2 changes: 1 addition & 1 deletion testdata/encode/part-bin-header.golden
Original file line number Diff line number Diff line change
Expand Up @@ -5,4 +5,4 @@ X-Data: =?utf-8?b?AxfhfujropadladnggnfjgwsaiubvnmkadiuhterqHJSFfuAjkfhrqpeorLA?=

This is a test of a plain text part.

Another line.
Another line.
2 changes: 1 addition & 1 deletion testdata/encode/part-default-headers.golden
Original file line number Diff line number Diff line change
Expand Up @@ -5,4 +5,4 @@ Content-Transfer-Encoding: base64
Content-Type: application/zip; boundary=enmime-abcdefg0123456789;
charset=binary; name=stuff.zip

WklQWklQWklQ
WklQWklQWklQ
2 changes: 1 addition & 1 deletion testdata/encode/part-plain.golden
Original file line number Diff line number Diff line change
Expand Up @@ -2,4 +2,4 @@ Content-Type: text/plain; charset=utf-8

This is a test of a plain text part.

Another line.
Another line.
2 changes: 1 addition & 1 deletion testdata/encode/part-quoted-headers.golden
Original file line number Diff line number Diff line change
Expand Up @@ -5,4 +5,4 @@ Content-Transfer-Encoding: base64
Content-Type: application/zip; boundary=enmime-abcdefg0123456789;
charset=binary; name="arvizturo \"x\" tukorfurogep.zip"

WklQWklQWklQ
WklQWklQWklQ