smtpsrv: Strict CRLF enforcement in DATA contents

The RFCs are very clear that in DATA contents: > CR and LF MUST only occur together as CRLF; they MUST NOT appear > independently in the body. https://www.rfc-editor.org/rfc/rfc5322#section-2.3 https://www.rfc-editor.org/rfc/rfc5321#section-2.3.8 Allowing "independent" CR and LF can cause a number of problems. In particular, there is a new "SMTP smuggling attack" published recently that involves the server incorrectly parsing the end of DATA marker `\r\n.\r\n`, which an attacker can exploit to impersonate a server when email is transmitted server-to-server. https://www.postfix.org/smtp-smuggling.html https://sec-consult.com/blog/detail/smtp-smuggling-spoofing-e-mails-worldwide/ Currently, chasquid is vulnerable to this attack, because Go's standard libraries net/textproto and net/mail do not enforce CRLF strictly. This patch fixes the problem by introducing a new "dot reader" function that strictly enforces CRLF when reading dot-terminated data, used in the DATA input processing. When an invalid newline terminator is found, the connection is aborted immediately because we cannot safely recover from that state. We still keep the internal representation as LF-terminated for convenience and simplicity. However, the MDA courier is changed to pass CRLF-terminated lines, since that is an external program which could be strict when receiving email messages. See #47 for more details and discussion.
albertito · Dec 24, 2023 · a996106 · a996106
1 parent e03594a
commit a996106
Show file tree

Hide file tree

Showing 15 changed files with 431 additions and 86 deletions.
diff --git a/internal/courier/mda.go b/internal/courier/mda.go
@@ -11,6 +11,7 @@ import (
 	"unicode"
 
 	"blitiri.com.ar/go/chasquid/internal/envelope"
+	"blitiri.com.ar/go/chasquid/internal/normalize"
 	"blitiri.com.ar/go/chasquid/internal/trace"
 )
 
@@ -60,7 +61,12 @@ func (p *MDA) Deliver(from string, to string, data []byte) (error, bool) {
 	ctx, cancel := context.WithTimeout(context.Background(), p.Timeout)
 	defer cancel()
 	cmd := exec.CommandContext(ctx, p.Binary, args...)
-	cmd.Stdin = bytes.NewReader(data)
+
+	// Pass the email data via stdin. Normalize it to CRLF which is what the
+	// RFC-compliant representation require. By doing this at this end, we can
+	// keep a simpler internal representation and ensure there won't be any
+	// inconsistencies in newlines within the message (e.g. added headers).
+	cmd.Stdin = bytes.NewReader(normalize.ToCRLF(data))
 
 	output, err := cmd.CombinedOutput()
 	if ctx.Err() == context.DeadlineExceeded {

diff --git a/internal/normalize/normalize.go b/internal/normalize/normalize.go
@@ -3,6 +3,7 @@
 package normalize
 
 import (
+	"bytes"
 	"strings"
 
 	"blitiri.com.ar/go/chasquid/internal/envelope"
@@ -72,3 +73,23 @@ func DomainToUnicode(addr string) (string, error) {
 	domain, err := Domain(domain)
 	return user + "@" + domain, err
 }
+
+// ToCRLF converts the given buffer to CRLF line endings. If a line has a
+// preexisting CRLF, it leaves it be. It assumes that CR is never used on its
+// own.
+func ToCRLF(in []byte) []byte {
+	b := bytes.NewBuffer(nil)
+	b.Grow(len(in))
+	for _, c := range in {
+		switch c {
+		case '\r':
+			// Ignore CR, we'll add it back later. It should never appear
+			// alone in the contexts where this function is used.
+		case '\n':
+			b.Write([]byte("\r\n"))
+		default:
+			b.WriteByte(c)
+		}
+	}
+	return b.Bytes()
+}
diff --git a/internal/normalize/normalize_test.go b/internal/normalize/normalize_test.go
@@ -129,6 +129,22 @@ func TestDomainToUnicode(t *testing.T) {
 	}
 }
 
+func TestToCRLF(t *testing.T) {
+	cases := []struct {
+		in, out string
+	}{
+		{"", ""},
+		{"a\nb", "a\r\nb"},
+		{"a\r\nb", "a\r\nb"},
+	}
+	for _, c := range cases {
+		got := string(ToCRLF([]byte(c.in)))
+		if got != c.out {
+			t.Errorf("ToCRLF(%q) = %q, expected %q", c.in, got, c.out)
+		}
+	}
+}
+
 func FuzzUser(f *testing.F) {
 	f.Fuzz(func(t *testing.T, user string) {
 		User(user)

diff --git a/internal/smtpsrv/conn.go b/internal/smtpsrv/conn.go
@@ -11,7 +11,6 @@ import (
 	"math/rand"
 	"net"
 	"net/mail"
-	"net/textproto"
 	"os"
 	"os/exec"
 	"strconv"
@@ -312,6 +311,12 @@ loop:
 			if err != nil {
 				break
 			}
+		} else if code < 0 {
+			// Negative code means that we have to break the connection.
+			// TODO: This is hacky, it's probably worth it at this point to
+			// refactor this into using a custom response type.
+			c.tr.Errorf("%s closed the connection: %s", cmd, msg)
+			break
 		}
 	}
 
@@ -638,19 +643,19 @@ func (c *Conn) DATA(params string) (code int, msg string) {
 	// one, we don't want the command timeout to interfere.
 	c.conn.SetDeadline(c.deadline)
 
-	// Create a dot reader, limited to the maximum size.
-	dotr := textproto.NewReader(bufio.NewReader(
-		io.LimitReader(c.reader, c.maxDataSize))).DotReader()
-	c.data, err = io.ReadAll(dotr)
+	// Read the data. Enforce CRLF correctness, and maximum size.
+	c.data, err = readUntilDot(c.reader, c.maxDataSize)
 	if err != nil {
-		if err == io.ErrUnexpectedEOF {
-			// Message is too big already. But we need to keep reading until we see
-			// the "\r\n.\r\n", otherwise we will treat the remanent data that
-			// the user keeps sending as commands, and that's a security
-			// issue.
-			readUntilDot(c.reader)
+		if err == errMessageTooLarge {
+			// Message is too big; excess data has already been discarded.
 			return 552, "5.3.4 Message too big"
 		}
+		if err == errInvalidLineEnding {
+			// We can't properly recover from this, so we have to drop the
+			// connection.
+			c.writeResponse(521, "5.5.2 Error reading DATA: invalid line ending")
+			return -1, "Invalid line ending, closing connection"
+		}
 		return 554, fmt.Sprintf("5.4.0 Error reading DATA: %v", err)
 	}
 
@@ -952,24 +957,6 @@ func boolToStr(b bool) string {
 	return "0"
 }
 
-func readUntilDot(r *bufio.Reader) {
-	prevMore := false
-	for {
-		// The reader will not read more than the size of the buffer,
-		// so this doesn't cause increased memory consumption.
-		// The reader's data deadline will prevent this from continuing
-		// forever.
-		l, more, err := r.ReadLine()
-		if err != nil {
-			break
-		}
-		if !more && !prevMore && string(l) == "." {
-			break
-		}
-		prevMore = more
-	}
-}
-
 // STARTTLS SMTP command handler.
 func (c *Conn) STARTTLS(params string) (code int, msg string) {
 	if c.onTLS {

diff --git a/internal/smtpsrv/conn_test.go b/internal/smtpsrv/conn_test.go
@@ -1,10 +1,8 @@
 package smtpsrv
 
 import (
-	"bufio"
 	"net"
 	"os"
-	"strings"
 	"testing"
 
 	"blitiri.com.ar/go/chasquid/internal/domaininfo"
@@ -87,57 +85,6 @@ func TestIsHeader(t *testing.T) {
 	}
 }
 
-func TestReadUntilDot(t *testing.T) {
-	// This must be > than the minimum buffer size for bufio.Reader, which
-	// unfortunately is not available to us. The current value is 16, these
-	// tests will break if it gets increased, and the nonfinal cases will need
-	// to be adjusted.
-	size := 20
-	xs := "12345678901234567890"
-
-	final := []string{
-		"", ".", "..",
-		".\r\n", "\r\n.", "\r\n.\r\n",
-		".\n", "\n.", "\n.\n",
-		".\r", "\r.", "\r.\r",
-		xs + "\r\n.\r\n",
-		xs + "1234\r\n.\r\n",
-		xs + xs + "\r\n.\r\n",
-		xs + xs + xs + "\r\n.\r\n",
-		xs + "." + xs + "\n.",
-		xs + ".\n" + xs + "\n.",
-	}
-	for _, s := range final {
-		t.Logf("testing %q", s)
-		buf := bufio.NewReaderSize(strings.NewReader(s), size)
-		readUntilDot(buf)
-		if r := buf.Buffered(); r != 0 {
-			t.Errorf("%q: there are %d remaining bytes", s, r)
-		}
-	}
-
-	nonfinal := []struct {
-		s string
-		r int
-	}{
-		{".\na", 1},
-		{"\n.\na", 1},
-		{"\n.\nabc", 3},
-		{"\n.\n12345678", 8},
-		{"\n.\n" + xs, size - 3},
-		{"\n.\n" + xs + xs, size - 3},
-		{"\n.\n.\n", 2},
-	}
-	for _, c := range nonfinal {
-		t.Logf("testing %q", c.s)
-		buf := bufio.NewReaderSize(strings.NewReader(c.s), size)
-		readUntilDot(buf)
-		if r := buf.Buffered(); r != c.r {
-			t.Errorf("%q: expected %d remaining bytes, got %d", c.s, c.r, r)
-		}
-	}
-}
-
 func TestAddrLiteral(t *testing.T) {
 	// TCP addresses.
 	casesTCP := []struct {

diff --git a/internal/smtpsrv/dotreader.go b/internal/smtpsrv/dotreader.go
@@ -0,0 +1,111 @@
+package smtpsrv
+
+import (
+	"bufio"
+	"bytes"
+	"errors"
+	"io"
+)
+
+var (
+	// TODO: Include the line number and specific error, and have the
+	// caller add them to the trace.
+	errMessageTooLarge   = errors.New("message too large")
+	errInvalidLineEnding = errors.New("invalid line ending")
+)
+
+// readUntilDot reads from r until it encounters a dot-terminated line, or we
+// read max bytes. It enforces that input lines are terminated by "\r\n", and
+// that there are not "lonely" "\r" or "\n"s in the input.
+// It returns \n-terminated lines, which is what we use for our internal
+// representation for convenience (same as textproto DotReader does).
+func readUntilDot(r *bufio.Reader, max int64) ([]byte, error) {
+	buf := make([]byte, 0, 1024)
+	n := int64(0)
+
+	// Little state machine.
+	const (
+		prevOther = iota
+		prevCR
+		prevCRLF
+	)
+	// Start as if we just came from a '\r\n'; that way we avoid the need
+	// for special-casing the dot-stuffing at the very beginning.
+	prev := prevCRLF
+	last4 := make([]byte, 4)
+	skip := false
+
+loop:
+	for {
+		b, err := r.ReadByte()
+		if err == io.EOF {
+			return buf, io.ErrUnexpectedEOF
+		} else if err != nil {
+			return buf, err
+		}
+		n++
+
+		switch b {
+		case '\r':
+			if prev == prevCR {
+				return buf, errInvalidLineEnding
+			}
+			prev = prevCR
+			// We return a LF-terminated line, so skip the CR. This simplifies
+			// internal representation and makes it easier/less error prone to
+			// work with. It is converted back to CRLF on endpoints (e.g. in
+			// the couriers).
+			skip = true
+		case '\n':
+			if prev != prevCR {
+				return buf, errInvalidLineEnding
+			}
+			// If we come from a '\r\n.\r', we're done.
+			if bytes.Equal(last4, []byte("\r\n.\r")) {
+				break loop
+			}
+
+			// If we are only starting and see ".\r\n", we're also done; in
+			// that case the message is empty.
+			if n == 3 && bytes.Equal(last4, []byte("\x00\x00.\r")) {
+				return []byte{}, nil
+			}
+			prev = prevCRLF
+		default:
+			if prev == prevCR {
+				return buf, errInvalidLineEnding
+			}
+			if b == '.' && prev == prevCRLF {
+				// We come from "\r\n" and got a "."; as per dot-stuffing
+				// rules, we should skip that '.' in the output.
+				// https://www.rfc-editor.org/rfc/rfc5321#section-4.5.2
+				skip = true
+			}
+			prev = prevOther
+		}
+
+		// Keep the last 4 bytes separately, because they may not be in buf on
+		// messages that are too large.
+		copy(last4, last4[1:])
+		last4[3] = b
+
+		if int64(len(buf)) < max && !skip {
+			buf = append(buf, b)
+		}
+		skip = false
+	}
+
+	// Return an error if the message is too large. It is important to do this
+	// _outside_ the loop, because we need to keep reading until we get to the
+	// final "." before we return an error, so the SMTP dialog can continue
+	// properly after that.
+	// If we return too early, the remainder of the email is interpreted as
+	// part of the SMTP dialog (and exposing ourselves to smuggling attacks).
+	if n > max {
+		return buf, errMessageTooLarge
+	}
+
+	// If we made it this far, buf naturally ends in "\n" because we skipped
+	// the '.' due to dot-stuffing, and skip "\r"s.
+	return buf, nil
+}