fix: duplicate charset param

This commit is contained in:
James Houlahan 2020-09-24 14:18:05 +02:00
parent a89a3f6612
commit 15c1d7bc24
9 changed files with 44 additions and 10 deletions

View File

@ -7,6 +7,7 @@ Changelog [format](http://keepachangelog.com/en/1.0.0/)
### Fixed
* GODT-752 Parsing message with empty CC.
* GODT-752 Parsing non-utf8 multipart/alternative message.
* GODT-752 Parsing message with duplicate charset parameter.
## [IE 1.1.x] Danube (v1.1.0 beta 2020-09-XX)

View File

@ -103,7 +103,7 @@ func convertForeignEncodings(p *parser.Parser) error {
return p.ConvertToUTF8()
}).
RegisterDefaultHandler(func(p *parser.Part) error {
t, params, _ := p.Header.ContentType()
t, params, _ := p.ContentType()
// multipart/alternative, for example, can contain extra charset.
if params != nil && params["charset"] != "" {
return p.ConvertToUTF8()
@ -297,7 +297,7 @@ func allPartsHaveContentType(parts parser.Parts, contentType string) bool {
}
for _, part := range parts {
t, _, err := part.Header.ContentType()
t, _, err := part.ContentType()
if err != nil {
return false
}
@ -333,7 +333,7 @@ func determineMIMEType(p *parser.Parser) (string, error) {
// getPlainBody returns the body of the given part, converting html to
// plaintext where possible.
func getPlainBody(part *parser.Part) []byte {
contentType, _, err := part.Header.ContentType()
contentType, _, err := part.ContentType()
if err != nil {
return part.Body
}

View File

@ -17,7 +17,9 @@
package parser
import "regexp"
import (
"regexp"
)
type HandlerFunc func(*Part) error
@ -35,7 +37,7 @@ func (h *handler) matchType(p *Part) bool {
return false
}
t, _, err := p.Header.ContentType()
t, _, err := p.ContentType()
if err != nil {
t = ""
}

View File

@ -40,6 +40,18 @@ type Part struct {
children Parts
}
func (p *Part) ContentType() (string, map[string]string, error) {
t, params, err := p.Header.ContentType()
if err != nil {
// go-message's implementation of ContentType() doesn't handle duplicate parameters
// e.g. Content-Type: text/plain; charset=utf-8; charset=UTF-8
// so if it fails, we try again with pmmime's implementation, which does.
t, params, err = pmmime.ParseMediaType(p.Header.Get("Content-Type"))
}
return t, params, err
}
func (p *Part) Child(n int) (part *Part, err error) {
if len(p.children) < n {
return nil, errors.New("no such part")
@ -72,7 +84,7 @@ func (p *Part) AddChild(child *Part) {
func (p *Part) ConvertToUTF8() error {
logrus.Trace("Converting part to utf-8")
t, params, err := p.Header.ContentType()
t, params, err := p.ContentType()
if err != nil {
return err
}
@ -163,7 +175,7 @@ func (p *Part) is7BitClean() bool {
}
func (p *Part) isMultipartMixed() bool {
t, _, err := p.Header.ContentType()
t, _, err := p.ContentType()
if err != nil {
return false
}

View File

@ -49,7 +49,7 @@ func TestPart(t *testing.T) {
part, err := p.Section(getSectionNumber(partNumber))
require.NoError(t, err)
contType, _, err := part.Header.ContentType()
contType, _, err := part.ContentType()
require.NoError(t, err)
assert.Equal(t, wantContType, contType)
}

View File

@ -58,7 +58,7 @@ func (v *Visitor) Visit() (interface{}, error) {
}
func (v *Visitor) visit(p *Part) (interface{}, error) {
t, _, err := p.Header.ContentType()
t, _, err := p.ContentType()
if err != nil {
return nil, err
}

View File

@ -259,6 +259,21 @@ func TestParseTextPlainWithImageInline(t *testing.T) {
assert.Equal(t, 8, img.Height)
}
func TestParseTextPlainWithDuplicateCharset(t *testing.T) {
f := getFileReader("text_plain_duplicate_charset.eml")
m, _, plainBody, attReaders, err := Parse(f, "", "")
require.NoError(t, err)
assert.Equal(t, `"Sender" <sender@pm.me>`, m.Sender.String())
assert.Equal(t, `"Receiver" <receiver@pm.me>`, m.ToList[0].String())
assert.Equal(t, "body", m.Body)
assert.Equal(t, "body", plainBody)
assert.Len(t, attReaders, 0)
}
func TestParseWithMultipleTextParts(t *testing.T) {
f := getFileReader("multiple_text_parts.eml")

View File

@ -0,0 +1,5 @@
From: Sender <sender@pm.me>
To: Receiver <receiver@pm.me>
Content-Type: text/plain; charset=utf-8; charset=UTF-8
body

View File

@ -4,7 +4,6 @@ Feature: IMAP import messages
And there is IMAP client logged in as "user"
And there is IMAP client selected in "INBOX"
@ignore
Scenario: Import message with double charset in content type
When IMAP client imports message to "INBOX"
"""