fix(GODT-3151): Only modify HTML Meta content if UTF-8 charset override is needed.

This commit is contained in:
Romain LE JEUNE 2023-12-04 10:37:19 +01:00
parent a8f270405f
commit 9efaf9184c
No known key found for this signature in database
GPG Key ID: 664A57E2F9CD8118
4 changed files with 60 additions and 15 deletions

View File

@ -137,15 +137,15 @@ func (p *Part) ConvertMetaCharset() error {
if val, ok := sel.Attr("content"); ok {
t, params, err := pmmime.ParseMediaType(val)
if err != nil {
logrus.WithField("pkg", "parser").WithError(err).Error("Meta tag parsing fails.")
return
}
if charset, ok := params["charset"]; ok && charset != utf8Charset {
params["charset"] = utf8Charset
sel.SetAttr("content", mime.FormatMediaType(t, params))
metaModified = true
}
sel.SetAttr("content", mime.FormatMediaType(t, params))
metaModified = true
}
if charset, ok := sel.Attr("charset"); ok && charset != utf8Charset {

View File

@ -18,6 +18,7 @@
package parser
import (
"reflect"
"strconv"
"strings"
"testing"
@ -71,3 +72,45 @@ func getSectionNumber(s string) (part []int) {
return
}
func TestPart_ConvertMetaCharset(t *testing.T) {
tests := []struct {
name string
body string
wantErr bool
wantSame bool
}{
{
"html no meta",
"<body></body>",
false,
true,
},
{
"html meta no charset",
"<header><meta name=ProgId content=Word.Document></header><body><meta></body>",
false,
true,
},
{
"html meta UTF-8 charset",
"<header><meta charset=UTF-8></header><body><meta></body>",
false,
true,
},
{
"html meta not UTF-8 charset",
"<header><meta charset=UTF-7></header><body><meta></body>",
false,
false,
},
}
for _, tt := range tests {
t.Run(tt.name, func(t *testing.T) {
var p = Part{Body: []byte(tt.body)}
err := p.ConvertMetaCharset()
assert.Equal(t, tt.wantErr, err != nil)
assert.Equal(t, tt.wantSame, reflect.DeepEqual([]byte(tt.body), p.Body))
})
}
}

View File

@ -552,14 +552,15 @@ func TestParseMultipartAlternative(t *testing.T) {
assert.Equal(t, `"schizofrenic" <schizofrenic@pm.me>`, m.Sender.String())
assert.Equal(t, `<pmbridgeietest@outlook.com>`, m.ToList[0].String())
assert.Equal(t, `<html><head>
<meta http-equiv="content-type" content="text/html; charset=UTF-8"/>
assert.Equal(t, `<html>
<head>
<meta http-equiv="content-type" content="text/html; charset=UTF-8">
</head>
<body>
<b>aoeuaoeu</b>
</body></html>`, string(m.RichBody))
</body>
</html>
`, string(m.RichBody))
assert.Equal(t, "*aoeuaoeu*\n\n", string(m.PlainBody))
}
@ -573,14 +574,15 @@ func TestParseMultipartAlternativeNested(t *testing.T) {
assert.Equal(t, `"schizofrenic" <schizofrenic@pm.me>`, m.Sender.String())
assert.Equal(t, `<pmbridgeietest@outlook.com>`, m.ToList[0].String())
assert.Equal(t, `<html><head>
<meta http-equiv="content-type" content="text/html; charset=UTF-8"/>
assert.Equal(t, `<html>
<head>
<meta http-equiv="content-type" content="text/html; charset=UTF-8">
</head>
<body>
<b>multipart 2.2</b>
</body></html>`, string(m.RichBody))
</body>
</html>
`, string(m.RichBody))
assert.Equal(t, "*multipart 2.1*\n\n", string(m.PlainBody))
}

View File

@ -261,13 +261,13 @@ func ParseMediaType(v string) (string, map[string]string, error) {
}
decoded, err := DecodeHeader(v)
if err != nil {
logrus.WithField("value", v).WithError(err).Error("Media Type parsing error.")
logrus.WithField("value", v).WithField("pkg", "mime").WithError(err).Error("Cannot decode Headers.")
return "", nil, err
}
v, _ = changeEncodingAndKeepLastParamDefinition(decoded)
mediatype, params, err := mime.ParseMediaType(v)
if err != nil {
logrus.WithField("value", v).WithError(err).Error("Media Type parsing error.")
logrus.WithField("value", v).WithField("pkg", "mime").WithError(err).Error("Media Type parsing error.")
return "", nil, err
}
return mediatype, params, err