fix(GODT-2966): Allow permissive parsing of MediaType parameters for import.

This commit is contained in:
Romain Le Jeune 2023-10-09 15:14:51 +00:00
parent d7dadd7578
commit 5c7e4e04f9
7 changed files with 110 additions and 61 deletions

View File

@ -547,8 +547,8 @@ func parseAttachment(h message.Header, body []byte) (Attachment, error) {
return Attachment{}, err
}
att.Header = mimeHeader
mimeType, mimeTypeParams, err := pmmime.ParseMediaType(h.Get("Content-Type"))
mimeType, mimeTypeParams, err := h.ContentType()
if err != nil {
return Attachment{}, err
}
@ -558,7 +558,8 @@ func parseAttachment(h message.Header, body []byte) (Attachment, error) {
// Prefer attachment name from filename param in content disposition.
// If not available, try to get it from name param in content type.
// Otherwise fallback to attachment.bin.
if disp, dispParams, err := h.ContentDisposition(); err == nil {
disp, dispParams, err := pmmime.ParseMediaType(h.Get("Content-Disposition"))
if err == nil {
att.Disposition = proton.Disposition(disp)
if filename, ok := dispParams["filename"]; ok {
@ -585,7 +586,7 @@ func parseAttachment(h message.Header, body []byte) (Attachment, error) {
// (This is necessary because some clients don't set Content-Disposition at all,
// so we need to rely on other information to deduce if it's inline or attachment.)
if h.Has("Content-Disposition") {
disp, _, err := h.ContentDisposition()
disp, _, err := pmmime.ParseMediaType(h.Get("Content-Disposition"))
if err != nil {
return Attachment{}, err
}

View File

@ -539,6 +539,18 @@ func TestParseMultipartAlternativeLatin1(t *testing.T) {
assert.Equal(t, "*aoeuaoeu*\n\n", string(m.PlainBody))
}
func TestParseMultipartAttachmentEncodedButUnquoted(t *testing.T) {
f := getFileReader("multipart_attachment_encoded_no_quote.eml")
p, err := parser.New(f)
require.NoError(t, err)
m, err := ParseWithParser(p, false)
require.NoError(t, err)
assert.Equal(t, `"Bridge Test" <bridgetest@pm.test>`, m.Sender.String())
assert.Equal(t, `"Internal Bridge" <bridgetest@protonmail.com>`, m.ToList[0].String())
}
func TestParseWithTrailingEndOfMailIndicator(t *testing.T) {
f := getFileReader("text_html_trailing_end_of_mail.eml")

View File

@ -0,0 +1,27 @@
From: Bridge Test <bridgetest@pm.test>
Date: 01 Jan 1980 00:00:00 +0000
To: Internal Bridge <bridgetest@protonmail.com>
Subject: Message with attachment name
Content-type: multipart/mixed; boundary="boundary"
Received: by 2002:0:0:0:0:0:0:0 with SMTP id 0123456789abcdef; Wed, 30 Dec 2020 01:23:45 0000
This is a multi-part message in MIME format.
--boundary
Content-Type: text/plain
Hello
--boundary
Content-Type: text/html; charset=utf-8
Content-Transfer-Encoding: 7bit
<h1> HELLO </h1>
--boundary
Content-Type: application/pdf; name==?US-ASCII?Q?filename?=
Content-Disposition: attachment; filename==?US-ASCII?Q?filename?=
somebytes
--boundary--

View File

@ -256,6 +256,10 @@ func DecodeCharset(original []byte, contentType string) ([]byte, error) {
// ParseMediaType from MIME doesn't support RFC2231 for non asci / utf8 encodings so we have to pre-parse it.
func ParseMediaType(v string) (mediatype string, params map[string]string, err error) {
v, _ = changeEncodingAndKeepLastParamDefinition(v)
decoded, err := DecodeHeader(v)
if err != nil {
return "", nil, err
}
v, _ = changeEncodingAndKeepLastParamDefinition(decoded)
return mime.ParseMediaType(v)
}

View File

@ -93,9 +93,9 @@ Feature: IMAP import messages
},
{
"content-type": "application/pdf",
"content-type-name": <filename>,
"content-type-name": "filename",
"content-disposition": "attachment",
"content-disposition-filename": <filename>,
"content-disposition-filename": "filename",
"body-is": "somebytes"
}
]
@ -103,10 +103,11 @@ Feature: IMAP import messages
}
"""
Examples:
| message | filename |
| "multipart/mixed_with_attachment_encoded.eml" | "=?US-ASCII?Q?filename?=" |
# | "multipart/mixed_with_attachment_encoded_no_quote.eml" | =?US-ASCII?Q?filename?= | @todo GODT-2966
# | "multipart/mixed_with_attachment_no_quote.eml" | "filename" | @todo GODT-2966
| message |
| "multipart/mixed_with_attachment_encoded.eml" |
| "multipart/mixed_with_attachment_encoded_no_quote.eml" |
| "multipart/mixed_with_attachment_no_quote.eml" |
# The message is imported as UTF-8 and the content type is determined at build time.
Scenario: Import message as latin1 without content type

View File

@ -141,6 +141,28 @@ Feature: SMTP sending with attachment
}
}
"""
And IMAP client "1" eventually sees the following message in "Sent" with this structure:
"""
{
"subject": "Test with cyrillic attachment",
"body-contains": "Shake that body",
"content": {
"content-type": "multipart/mixed",
"sections":[
{
"content-type": "text/plain",
"body-is": "Shake that body"
},
{
"content-type": "application/vnd.openxmlformats-officedocument.wordprocessingml.document",
"content-type-name": "АБВГДЃЕЖЗЅИЈКЛЉМНЊОПРСТЌУФХЧЏЗШ.docx",
"content-disposition": "attachment",
"content-disposition-filename": "АБВГДЃЕЖЗЅИЈКЛЉМНЊОПРСТЌУФХЧЏЗШ.docx"
}
]
}
}
"""
Scenario Outline: Send message with attachment <UseCase>
@ -193,29 +215,5 @@ Feature: SMTP sending with attachment
Examples:
| UseCase | filename |
| encoded quoted | "=?US-ASCII?Q?filename?=" |
# | non quoted | filename | @todo GODT-2974
Scenario: Send message with attachment with name unquoted containing special character
When SMTP client "1" sends the following message from "[user:user1]@[domain]" to "[user:user2]@[domain]":
"""
Subject: Message with attachment name
Content-type: multipart/mixed; boundary="boundary"
Received: by 2002:0:0:0:0:0:0:0 with SMTP id 0123456789abcdef; Wed, 30 Dec 2020 01:23:45 0000
This is a multi-part message in MIME format.
--boundary
Content-Type: text/plain
Hello
--boundary
Content-Type: application/pdf; name==?US-ASCII?Q?filename?=
Content-Disposition: attachment; filename==?US-ASCII?Q?filename?=
somebytes
--boundary--
"""
Then it fails
And bridge reports a message with "failed to collect attachments: mime: invalid media parameter"
| encoded unquoted | =?US-ASCII?Q?filename?= |
| non quoted | filename |

View File

@ -29,6 +29,8 @@ import (
"github.com/ProtonMail/gluon/rfc822"
"github.com/ProtonMail/proton-bridge/v3/pkg/message"
"github.com/ProtonMail/proton-bridge/v3/pkg/message/parser"
pmmime "github.com/ProtonMail/proton-bridge/v3/pkg/mime"
"github.com/bradenaw/juniper/xslices"
"github.com/cucumber/messages-go/v16"
"github.com/emersion/go-imap"
@ -202,10 +204,16 @@ func newMessageStructFromIMAP(msg *imap.Message) MessageStruct {
panic(err)
}
m, err := message.Parse(bytes.NewReader(literal))
parser, err := parser.New(bytes.NewReader(literal))
if err != nil {
panic(err)
}
m, err := message.ParseWithParser(parser, true)
if err != nil {
panic(err)
}
var body string
switch {
case m.MIMEType == rfc822.TextPlain:
@ -245,34 +253,23 @@ func formatAddressList(list []*imap.Address) string {
}
func parseMessageSection(literal []byte, body string) MessageSection {
mimeType, boundary, charset, name := parseContentType(literal)
headers, err := rfc822.Parse(literal).ParseHeader()
if err != nil {
panic(err)
}
msgSect := MessageSection{
ContentType: string(mimeType),
ContentTypeBoundary: boundary,
ContentTypeCharset: charset,
ContentTypeName: name,
TransferEncoding: headers.Get("content-transfer-encoding"),
BodyIs: body,
}
mimeType, boundary, charset, name := parseContentType(headers.Get("Content-Type"))
disp, filename := parseContentDisposition(headers.Get("Content-Disposition"))
contentDisposition := bytes.Split([]byte(headers.Get("content-disposition")), []byte(";"))
for id, value := range contentDisposition {
if id == 0 {
msgSect.ContentDisposition = strings.TrimSpace(string(value))
continue
}
param := bytes.Split(value, []byte("="))
if strings.TrimSpace(string(param[0])) == "filename" && len(param) >= 2 {
_, filename, _ := strings.Cut(string(value), "filename=")
filename = strings.Trim(filename, "\"")
msgSect.ContentDispositionFilename = strings.TrimSpace(filename)
}
msgSect := MessageSection{
ContentType: mimeType,
ContentTypeBoundary: boundary,
ContentTypeCharset: charset,
ContentTypeName: name,
ContentDisposition: disp,
ContentDispositionFilename: filename,
TransferEncoding: headers.Get("content-transfer-encoding"),
BodyIs: body,
}
if msgSect.ContentTypeBoundary != "" {
@ -294,8 +291,8 @@ func parseMessageSection(literal []byte, body string) MessageSection {
return msgSect
}
func parseContentType(literal []byte) (rfc822.MIMEType, string, string, string) {
mimeType, params, err := rfc822.Parse(literal).ContentType()
func parseContentType(contentType string) (string, string, string, string) {
mimeType, params, err := pmmime.ParseMediaType(contentType)
if err != nil {
panic(err)
}
@ -314,6 +311,15 @@ func parseContentType(literal []byte) (rfc822.MIMEType, string, string, string)
return mimeType, boundary, charset, name
}
func parseContentDisposition(contentDisp string) (string, string) {
disp, params, _ := pmmime.ParseMediaType(contentDisp)
name, ok := params["filename"]
if !ok {
name = ""
}
return disp, name
}
func matchMessages(have, want []Message) error {
slices.SortFunc(have, func(a, b Message) bool {
return a.Subject < b.Subject