Parsing non-utf8 multipart/alternative message

This commit is contained in:
Michal Horejsek 2020-09-24 13:09:19 +02:00
parent ef1671d4ab
commit d956b04062
4 changed files with 55 additions and 0 deletions

View File

@ -6,6 +6,7 @@ Changelog [format](http://keepachangelog.com/en/1.0.0/)
### Fixed
* GODT-752 Parsing message with empty CC.
* GODT-752 Parsing non-utf8 multipart/alternative message.
## [IE 1.1.x] Danube (v1.1.0 beta 2020-09-XX)

View File

@ -102,6 +102,9 @@ func convertForeignEncodings(p *parser.Parser) error {
RegisterContentTypeHandler("text/.*", func(p *parser.Part) error {
return p.ConvertToUTF8()
}).
RegisterContentTypeHandler("multipart/alternative", func(p *parser.Part) error {
return p.ConvertToUTF8()
}).
RegisterDefaultHandler(func(p *parser.Part) error {
t, _, _ := p.Header.ContentType()
logrus.WithField("type", t).Trace("Not converting part to utf-8")

View File

@ -431,6 +431,27 @@ func TestParseMultipartAlternativeNested(t *testing.T) {
assert.Equal(t, "*multipart 2.1*\n\n", plainBody)
}
func TestParseMultipartAlternativeLatin1(t *testing.T) {
f := getFileReader("multipart_alternative_latin1.eml")
m, _, plainBody, _, err := Parse(f, "", "")
require.NoError(t, err)
assert.Equal(t, `"schizofrenic" <schizofrenic@pm.me>`, m.Sender.String())
assert.Equal(t, `<pmbridgeietest@outlook.com>`, m.ToList[0].String())
assert.Equal(t, `<html><head>
<meta http-equiv="content-type" content="text/html; charset=UTF-8"/>
</head>
<body>
<b>aoeuaoeu</b>
</body></html>`, m.Body)
assert.Equal(t, "*aoeuaoeu*\n\n", plainBody)
}
func getFileReader(filename string) io.Reader {
f, err := os.Open(filepath.Join("testdata", filename))
if err != nil {

View File

@ -0,0 +1,30 @@
To: pmbridgeietest@outlook.com
From: schizofrenic <schizofrenic@pm.me>
Subject: aoeuaoeu
Date: Thu, 30 Jul 2020 13:35:24 +0200
MIME-Version: 1.0
Content-Type: multipart/alternative; boundary="------------22BC647264E52252E386881A"; charset="iso-8859-1"
Content-Language: en-US
This is a multi-part message in MIME format.
--------------22BC647264E52252E386881A
Content-Type: text/plain
Content-Transfer-Encoding: 7bit
*aoeuaoeu*
--------------22BC647264E52252E386881A
Content-Type: text/html
Content-Transfer-Encoding: 7bit
<html>
<head>
<meta http-equiv="content-type" content="text/html; charset=iso-8859-1">
</head>
<body>
<b>aoeuaoeu</b>
</body>
</html>
--------------22BC647264E52252E386881A--