feat: [GODT-360] detect charset embedded in html and xml

2020-05-28 12:36:42 +02:00 · 2020-05-28 12:36:42 +02:00 · 9e633400b0
parent 84d344cb0a
commit 9e633400b0
8 changed files with 65 additions and 48 deletions
--- a/Changelog.md
+++ b/Changelog.md
@ -4,6 +4,9 @@ Changelog [format](http://keepachangelog.com/en/1.0.0/)

 ## Unreleased

+### Added
+* GODT-360 Detect charset embedded in html/xml.
+
 ### Changed
 * GODT-388 Support for both bridge and import/export credentials by package users.
 * GODT-387 Store factory to make store optional.
--- a/go.mod
+++ b/go.mod
@ -27,7 +27,6 @@ require (
 	github.com/chzyer/logex v1.1.10 // indirect
 	github.com/chzyer/test v0.0.0-20180213035817-a1ea475d72b1 // indirect
 	github.com/cucumber/godog v0.8.1
-	github.com/danieljoos/wincred v1.0.2 // indirect
 	github.com/emersion/go-imap v0.0.0-20200415151653-89df427d2794
 	github.com/emersion/go-imap-appendlimit v0.0.0-20190308131241-25671c986a6a
 	github.com/emersion/go-imap-idle v0.0.0-20190519112320-2704abd7050e
--- a/go.sum
+++ b/go.sum
@ -5,8 +5,8 @@ github.com/ProtonMail/bcrypt v0.0.0-20170924085257-7509ea014998 h1:YT2uVwQiRQZxC
 github.com/ProtonMail/bcrypt v0.0.0-20170924085257-7509ea014998/go.mod h1:HecWFHognK8GfRDGnFQbW/LiV7A3MX3gZVs45vk5h8I=
 github.com/ProtonMail/crypto v0.0.0-20190604143603-d3d8a14a4d4f h1:cFhATQTJGK2iZ0dc+jRhr75mh6bsc5Ug6NliaBya8Kw=
 github.com/ProtonMail/crypto v0.0.0-20190604143603-d3d8a14a4d4f/go.mod h1:yigFU9vqHzYiE8UmvKecakEJjdnWj3jj499lnFckfCI=
-github.com/ProtonMail/docker-credential-helpers v1.0.0 h1:0DQXbZNvUszWgXUuP7TzvQdwnkK1D5Zf/glBgCFJFCk=
-github.com/ProtonMail/docker-credential-helpers v1.0.0/go.mod h1:R1gQindzdYFcWJuuGXteYHDJzUCVtyU+EpEqp9aWcFs=
+github.com/ProtonMail/docker-credential-helpers v1.1.0 h1:+kvUIpwWcbtP3WFv5sSvkFn/XLzSqPOB5AAthuk9xPk=
+github.com/ProtonMail/docker-credential-helpers v1.1.0/go.mod h1:mK0aBveCxhnQ756AmaTfXMZDeULvheYVhF/MWMErN5g=
 github.com/ProtonMail/go-appdir v1.1.0 h1:9hdNDlU9kTqRKVNzmoqah8qqrj5QZyLByQdwQNlFWig=
 github.com/ProtonMail/go-appdir v1.1.0/go.mod h1:3d8Y9F5mbEUjrYbcJ3rcDxcWbqbttF+011nVZmdRdzc=
 github.com/ProtonMail/go-apple-mobileconfig v0.0.0-20160701194735-7ea9927a11f6 h1:YsSJ/mvZFYydQm/hRrt8R8UtgETixN2y3LK98f5LT60=
@ -43,8 +43,8 @@ github.com/cpuguy83/go-md2man/v2 v2.0.0-20190314233015-f79a8a8ca69d h1:U+s90UTSY
 github.com/cpuguy83/go-md2man/v2 v2.0.0-20190314233015-f79a8a8ca69d/go.mod h1:maD7wRr/U5Z6m/iR4s+kqSMx2CaBsrgA7czyZG/E6dU=
 github.com/cucumber/godog v0.8.1 h1:lVb+X41I4YDreE+ibZ50bdXmySxgRviYFgKY6Aw4XE8=
 github.com/cucumber/godog v0.8.1/go.mod h1:vSh3r/lM+psC1BPXvdkSEuNjmXfpVqrMGYAElF6hxnA=
-github.com/danieljoos/wincred v1.0.2 h1:zf4bhty2iLuwgjgpraD2E9UbvO+fe54XXGJbOwe23fU=
-github.com/danieljoos/wincred v1.0.2/go.mod h1:SnuYRW9lp1oJrZX/dXJqr0cPK5gYXqx3EJbmjhLdK9U=
+github.com/danieljoos/wincred v1.1.0 h1:3RNcEpBg4IhIChZdFRSdlQt1QjCp1sMAPIrOnm7Yf8g=
+github.com/danieljoos/wincred v1.1.0/go.mod h1:XYlo+eRTsVA9aHGp7NGjFkPla4m+DCL7hqDjlFjiygg=
 github.com/davecgh/go-spew v1.1.0/go.mod h1:J7Y8YcW2NihsgmVo/mv3lAwl/skON4iLHjSsI+c5H38=
 github.com/davecgh/go-spew v1.1.1 h1:vj9j/u1bqnvCEfJOwUhtlOARqs3+rkHYY13jYWTU97c=
 github.com/davecgh/go-spew v1.1.1/go.mod h1:J7Y8YcW2NihsgmVo/mv3lAwl/skON4iLHjSsI+c5H38=
--- a/pkg/message/parser.go
+++ b/pkg/message/parser.go
@ -106,7 +106,7 @@ func combineParts(m *pmapi.Message, parts []io.Reader, headers []textproto.MIMEH
 			if b, err = ioutil.ReadAll(d); err != nil {
 				continue
 			}
-			b, err = pmmime.DecodeCharset(b, params)
+			b, err = pmmime.DecodeCharset(b, contentType)
 			if err != nil {
 				log.Warn("Decode charset error: ", err)
 				return false, err
--- a/pkg/mime/encoding.go
+++ b/pkg/mime/encoding.go
@ -29,8 +29,9 @@ import (
 	"encoding/base64"

 	"github.com/pkg/errors"
+	"github.com/sirupsen/logrus"
+	"golang.org/x/net/html/charset"
 	"golang.org/x/text/encoding"
-	"golang.org/x/text/encoding/charmap"
 	"golang.org/x/text/encoding/htmlindex"
 )

@ -197,18 +198,26 @@ func EncodeHeader(s string) string {
 }

 // DecodeCharset decodes the orginal using content type parameters.
-// When charset is missing it checks that the content is valid utf8.
-// If it isn't, it checks whether the content is valid latin1 (iso-8859-1), and if so,
-// reencodes it as utf-8.
-func DecodeCharset(original []byte, contentTypeParams map[string]string) ([]byte, error) {
-	// If the charset is specified, use that.
-	if charset, ok := contentTypeParams["charset"]; ok {
-		decoder, err := selectDecoder(charset)
+// If the charset parameter is missing it checks that the content is valid utf8.
+// If it isn't, it checks if it's embedded in the html/xml.
+// If it isn't, it falls back to windows-1252.
+// It then reencodes it as utf-8.
+func DecodeCharset(original []byte, contentType string) ([]byte, error) {
+	// If the contentType itself is specified, use that.
+	if contentType != "" {
+		_, params, err := ParseMediaType(contentType)
 		if err != nil {
-			return original, errors.Wrap(err, "unknown charset was specified")
+			return nil, err
 		}

-		return decoder.Bytes(original)
+		if charset, ok := params["charset"]; ok {
+			decoder, err := selectDecoder(charset)
+			if err != nil {
+				return original, errors.Wrap(err, "unknown charset was specified")
+			}
+
+			return decoder.Bytes(original)
+		}
 	}

 	// The charset was not specified. First try utf8.
@ -216,16 +225,22 @@ func DecodeCharset(original []byte, contentTypeParams map[string]string) ([]byte
 		return original, nil
 	}

-	// Fallback to latin1.
-	// In future this should fallback to whatever default encoding user specified.
-	decoded, err := charmap.ISO8859_1.NewDecoder().Bytes(original)
-	if err != nil {
-		return original, errors.Wrap(err, "failed to decode as latin1")
+	// encoding will be windows-1252 if it can't be determined properly.
+	encoding, name, certain := charset.DetermineEncoding(original, contentType)
+
+	if !certain {
+		logrus.WithField("encoding", name).Warn("Determined encoding but was not certain")
 	}

-	// If the decoded string is not valid utf8, it wasn't latin1, so give up.
+	// Reencode as UTF-8.
+	decoded, err := encoding.NewDecoder().Bytes(original)
+	if err != nil {
+		return original, errors.Wrap(err, "failed to decode as windows-1252")
+	}
+
+	// If the decoded string is not valid utf8, it wasn't windows-1252, so give up.
 	if !utf8.Valid(decoded) {
-		return original, errors.Wrap(err, "failed to decode as latin1")
+		return original, errors.Wrap(err, "failed to decode as windows-1252")
 	}

 	return decoded, nil
--- a/pkg/mime/encoding_test.go
+++ b/pkg/mime/encoding_test.go
@ -330,81 +330,81 @@ func TestGetEncoding(t *testing.T) {
 func TestEncodeReader(t *testing.T) {
 	// define test data
 	testData := []struct {
-		params   map[string]string
+		charset  string
 		original []byte
 		message  string
 	}{
 		// russian
 		{
-			map[string]string{"charset": "koi8-r"},
+			"koi8-r",
 			//     а, з, б, у, к, а, а, б, в, г, д, е, ё
 			[]byte{0xC1, 0xDA, 0xC2, 0xD5, 0xCB, 0xC1, 0xC1, 0xC2, 0xD7, 0xC7, 0xC4, 0xC5, 0xA3},
 			"азбукаабвгдеё",
 		},
 		{
-			map[string]string{"charset": "KOI8-R"},
+			"KOI8-R",
 			[]byte{0xC1, 0xDA, 0xC2, 0xD5, 0xCB, 0xC1, 0xC1, 0xC2, 0xD7, 0xC7, 0xC4, 0xC5, 0xA3},
 			"азбукаабвгдеё",
 		},
 		{
-			map[string]string{"charset": "csKOI8R"},
+			"csKOI8R",
 			[]byte{0xC1, 0xDA, 0xC2, 0xD5, 0xCB, 0xC1, 0xC1, 0xC2, 0xD7, 0xC7, 0xC4, 0xC5, 0xA3},
 			"азбукаабвгдеё",
 		},
 		{
-			map[string]string{"charset": "koi8-u"},
+			"koi8-u",
 			[]byte{0xC1, 0xDA, 0xC2, 0xD5, 0xCB, 0xC1, 0xC1, 0xC2, 0xD7, 0xC7, 0xC4, 0xC5, 0xA3},
 			"азбукаабвгдеё",
 		},
 		{
-			map[string]string{"charset": "iso-8859-5"},
+			"iso-8859-5",
 			//     а    , з    , б    , у    , к    , а    , а    , б    , в    , г    , д    , е    , ё
 			[]byte{0xD0, 0xD7, 0xD1, 0xE3, 0xDA, 0xD0, 0xD0, 0xD1, 0xD2, 0xD3, 0xD4, 0xD5, 0xF1},
 			"азбукаабвгдеё",
 		},
 		{
-			map[string]string{"charset": "csWrong"},
+			"csWrong",
 			[]byte{0xD0, 0xD7, 0xD1, 0xE3, 0xDA, 0xD0, 0xD0, 0xD1, 0xD2, 0xD3, 0xD4, 0xD5, 0xD6},
 			"",
 		},
 		{
-			map[string]string{"charset": "utf8"},
+			"utf8",
 			[]byte{0xD0, 0xB0, 0xD0, 0xB7, 0xD0, 0xB1, 0xD1, 0x83, 0xD0, 0xBA, 0xD0, 0xB0, 0xD0, 0xB0, 0xD0, 0xB1, 0xD0, 0xB2, 0xD0, 0xB3, 0xD0, 0xB4, 0xD0, 0xB5, 0xD1, 0x91},
 			"азбукаабвгдеё",
 		},
 		// czechoslovakia
 		{
-			map[string]string{"charset": "windows-1250"},
+			"windows-1250",
 			[]byte{225, 228, 232, 233, 236, 244},
 			"áäčéěô",
 		},
 		// umlauts
 		{
-			map[string]string{"charset": "iso-8859-1"},
+			"iso-8859-1",
 			[]byte{196, 203, 214, 220, 228, 235, 246, 252},
 			"ÄËÖÜäëöü",
 		},
 		// latvia
 		{
-			map[string]string{"charset": "iso-8859-4"},
+			"iso-8859-4",
 			[]byte{224, 239, 243, 182, 254},
 			"āīķļū",
 		},
 		{ // encoded by https://www.motobit.com/util/charset-codepage-conversion.asp
-			map[string]string{"charset": "utf7"},
+			"utf7",
 			[]byte("He wes Leovena+APA-es sone -- li+APA-e him be Drihten.+A6QDtw- +A7MDuwPOA8MDwwOx- +A7wDvwPF- +A60DtAPJA8MDsQO9- +A7UDuwO7A7cDvQO5A7oDrg-. +BCcENQRABD0ENQQ7BDg- +BDgENwQxBEs- +BDcENAQ1BEEETA- +BDg- +BEIEMAQ8-,+BCcENQRABD0ENQQ7BDg- +BDgENwQxBEs- +BDcENAQ1BEEETA- +BDg- +BEIEMAQ8-,+C68LvguuC7ELvwuoC80LpA- +C64Lygu0C78LlQuzC78LsgvH- +C6QLrgu/C7QLzQuuC8oLtAu/- +C6oLywuyC80- +C4cLqQu/C6QLvgu1C6QLwQ- +C44LmQvNC5ULwQuuC80- +C5ULvgujC8sLrgvN-."),
 			"He wes Leovenaðes sone -- liðe him be Drihten.Τη γλώσσα μου έδωσαν ελληνική. Чернели избы здесь и там,Чернели избы здесь и там,யாமறிந்த மொழிகளிலே தமிழ்மொழி போல் இனிதாவது எங்கும் காணோம்.",
 		},

 		// iconv -f UTF8 -t GB2312 utf8.txt | hexdump -v -e '"0x" 1/1 "%x, "'
 		{ // encoded by iconv; dump by `cat gb2312.txt | hexdump -v -e '"0x" 1/1 "%x "'` and reformat; text from https://zh.wikipedia.org/wiki/GB_2312
-			map[string]string{"charset": "GB2312"},
+			"GB2312",
 			[]byte{0x47, 0x42, 0x20, 0x32, 0x33, 0x31, 0x32, 0xb5, 0xc4, 0xb3, 0xf6, 0xcf, 0xd6, 0xa3, 0xac, 0xbb, 0xf9, 0xb1, 0xbe, 0xc2, 0xfa, 0xd7, 0xe3, 0xc1, 0xcb, 0xba, 0xba, 0xd7, 0xd6, 0xb5, 0xc4, 0xbc, 0xc6, 0xcb, 0xe3, 0xbb, 0xfa, 0xb4, 0xa6, 0xc0, 0xed, 0xd0, 0xe8, 0xd2, 0xaa, 0xa3, 0xac, 0xcb, 0xfc, 0xcb, 0xf9, 0xca, 0xd5, 0xc2, 0xbc, 0xb5, 0xc4, 0xba, 0xba, 0xd7, 0xd6, 0xd2, 0xd1, 0xbe, 0xad, 0xb8, 0xb2, 0xb8, 0xc7, 0xd6, 0xd0, 0xb9, 0xfa, 0xb4, 0xf3, 0xc2, 0xbd, 0x39, 0x39, 0x2e, 0x37, 0x35, 0x25, 0xb5, 0xc4, 0xca, 0xb9, 0xd3, 0xc3, 0xc6, 0xb5, 0xc2, 0xca, 0xa1, 0xa3, 0xb5, 0xab, 0xb6, 0xd4, 0xd3, 0xda, 0xc8, 0xcb, 0xc3, 0xfb},
 			"GB 2312的出现，基本满足了汉字的计算机处理需要，它所收录的汉字已经覆盖中国大陆99.75%的使用频率。但对于人名",
 		},

 		{ // encoded by iconv; text from https://jp.wikipedia.org/wiki/Shift_JIS
-			map[string]string{"charset": "shift-jis"},
+			"shift-jis",
 			[]byte{0x95, 0xb6, 0x8e, 0x9a, 0x95, 0x84, 0x8d, 0x86, 0x89, 0xbb, 0x95, 0xfb, 0x8e, 0xae, 0x53, 0x68, 0x69, 0x66, 0x74, 0x5f, 0x4a, 0x49, 0x53, 0x82, 0xcc, 0x90, 0xdd, 0x8c, 0x76, 0x8e, 0xd2, 0x82, 0xe7, 0x82, 0xcd, 0x81, 0x41, 0x90, 0xe6, 0x8d, 0x73, 0x82, 0xb5, 0x82, 0xc4, 0x82, 0xe6, 0x82, 0xad, 0x97, 0x98, 0x97, 0x70, 0x82, 0xb3, 0x82, 0xea, 0x82, 0xc4, 0x82, 0xa2, 0x82, 0xbd, 0x4a, 0x49, 0x53, 0x20, 0x43, 0x20, 0x36, 0x32, 0x32, 0x30, 0x81, 0x69, 0x8c, 0xbb, 0x8d, 0xdd, 0x82, 0xcc, 0x4a, 0x49, 0x53, 0x20, 0x58, 0x20, 0x30, 0x32, 0x30, 0x31, 0x81, 0x6a, 0x82, 0xcc, 0x38, 0x83, 0x72, 0x83, 0x62, 0x83, 0x67, 0x95, 0x84, 0x8d, 0x86, 0x81, 0x69, 0x88, 0xc8, 0x89, 0xba, 0x81, 0x75, 0x89, 0x70, 0x90, 0x94, 0x8e, 0x9a, 0x81, 0x45, 0x94, 0xbc, 0x8a, 0x70, 0x83, 0x4a, 0x83, 0x69, 0x81, 0x76, 0x81, 0x6a, 0x82, 0xc6, 0x81, 0x41, 0x4a, 0x49, 0x53, 0x20, 0x43, 0x20, 0x36, 0x32, 0x32, 0x36, 0x81, 0x69, 0x8c, 0xbb, 0x8d, 0xdd, 0x82, 0xcc, 0x4a, 0x49, 0x53, 0x20, 0x58, 0x20, 0x30, 0x32, 0x30, 0x38, 0x81, 0x41, 0x88, 0xc8, 0x89, 0xba, 0x81, 0x75, 0x8a, 0xbf, 0x8e, 0x9a, 0x81, 0x76, 0x81, 0x6a, 0x82, 0xcc, 0x97, 0xbc, 0x95, 0xb6, 0x8e, 0x9a, 0x8f, 0x57, 0x8d, 0x87, 0x82, 0xf0, 0x95, 0x5c, 0x8c, 0xbb, 0x82, 0xb5, 0x82, 0xe6, 0x82, 0xa4, 0x82, 0xc6, 0x82, 0xb5, 0x82, 0xbd, 0x81, 0x42, 0x82, 0xdc, 0x82, 0xbd, 0x81, 0x41, 0x83, 0x74, 0x83, 0x40, 0x83, 0x43, 0x83, 0x8b, 0x82, 0xcc, 0x91, 0xe5, 0x82, 0xab, 0x82, 0xb3, 0x82, 0xe2, 0x8f, 0x88, 0x97, 0x9d, 0x8e, 0x9e, 0x8a, 0xd4, 0x82, 0xcc, 0x92, 0x5a, 0x8f, 0x6b, 0x82, 0xf0, 0x90, 0x7d, 0x82, 0xe9, 0x82, 0xbd, 0x82, 0xdf, 0x81, 0x41, 0x83, 0x47, 0x83, 0x58, 0x83, 0x50, 0x81, 0x5b, 0x83, 0x76, 0x83, 0x56, 0x81, 0x5b, 0x83, 0x50, 0x83, 0x93, 0x83, 0x58, 0x82, 0xc8, 0x82, 0xb5, 0x82, 0xc5, 0x8d, 0xac, 0x8d, 0xdd, 0x89, 0xc2, 0x94, 0x5c, 0x82, 0xc9, 0x82, 0xb7, 0x82, 0xe9, 0x82, 0xb1, 0x82, 0xc6, 0x82, 0xf0, 0x8a, 0xe9, 0x90, 0x7d, 0x82, 0xb5, 0x82, 0xbd, 0x81, 0x42},
 			"文字符号化方式Shift_JISの設計者らは、先行してよく利用されていたJIS C 6220（現在のJIS X 0201）の8ビット符号（以下「英数字・半角カナ」）と、JIS C 6226（現在のJIS X 0208、以下「漢字」）の両文字集合を表現しようとした。また、ファイルの大きさや処理時間の短縮を図るため、エスケープシーケンスなしで混在可能にすることを企図した。",
 		},
@ -417,7 +417,7 @@ func TestEncodeReader(t *testing.T) {
 	for _, val := range testData {
 		//fmt.Println("Testing ", val)
 		expected := []byte(val.message)
-		decoded, err := DecodeCharset(val.original, val.params)
+		decoded, err := DecodeCharset(val.original, "text/plain; charset="+val.charset)
 		if len(expected) == 0 {
 			if err == nil {
 				t.Error("Expected err but have ", err)
@ -434,10 +434,10 @@ func TestEncodeReader(t *testing.T) {
 		if bytes.Equal(decoded, expected) {
 			// fmt.Println("Succesfull decoding of ", val.params, ":", string(decoded))
 		} else {
-			t.Error("Wrong encoding of ", val.params, ".Expected\n", expected, "\nbut have\n", decoded)
+			t.Error("Wrong encoding of ", val.charset, ".Expected\n", expected, "\nbut have\n", decoded)
 		}
 		if strings.Compare(val.message, string(decoded)) != 0 {
-			t.Error("Wrong message for ", val.params, ".Expected\n", val.message, "\nbut have\n", string(decoded))
+			t.Error("Wrong message for ", val.charset, ".Expected\n", val.message, "\nbut have\n", string(decoded))
 		}
 	}
 }
--- a/pkg/mime/mediaType.go
+++ b/pkg/mime/mediaType.go
@ -174,7 +174,7 @@ func convertHexToUTF(charset, value string) (string, error) {
 	if err != nil {
 		return "", err
 	}
-	utf8, err := DecodeCharset(raw, map[string]string{"charset": charset})
+	utf8, err := DecodeCharset(raw, "text/plain; charset="+charset)
 	return "utf-8''" + percentHexEscape(utf8), err
 }

--- a/pkg/mime/parser.go
+++ b/pkg/mime/parser.go
@ -243,7 +243,7 @@ func getContentType(header textproto.MIMEHeader) (mediatype string, params map[s
 		contentType = "text/plain"
 	}

-	return mime.ParseMediaType(contentType)
+	return ParseMediaType(contentType)
 }

 // ===================== MIME Printer ===================================
@ -322,14 +322,14 @@ func NewPlainTextCollector(targetAccepter VisitAcceptor) *PlainTextCollector {
 func (ptc *PlainTextCollector) Accept(partReader io.Reader, header textproto.MIMEHeader, hasPlainSibling bool, isFirst, isLast bool) (err error) {
 	if isFirst {
 		if IsLeaf(header) {
-			mediaType, params, _ := getContentType(header)
+			mediaType, _, _ := getContentType(header)
 			disp, _, _ := mime.ParseMediaType(header.Get("Content-Disposition"))
 			if mediaType == "text/plain" && disp != "attachment" {
 				partData, _ := ioutil.ReadAll(partReader)
 				decodedPart := decodePart(bytes.NewReader(partData), header)

 				if buffer, err := ioutil.ReadAll(decodedPart); err == nil {
-					buffer, err = DecodeCharset(buffer, params)
+					buffer, err = DecodeCharset(buffer, header.Get("Content-Type"))
 					if err != nil {
 						log.Warnln("Decode charset error:", err)
 						return err
@ -377,13 +377,13 @@ func (bc *BodyCollector) Accept(partReader io.Reader, header textproto.MIMEHeade
 	// TODO: Collect html and plaintext - if there's html with plain sibling don't include plain/text.
 	if isFirst {
 		if IsLeaf(header) {
-			mediaType, params, _ := getContentType(header)
+			mediaType, _, _ := getContentType(header)
 			disp, _, _ := mime.ParseMediaType(header.Get("Content-Disposition"))
 			if disp != "attachment" {
 				partData, _ := ioutil.ReadAll(partReader)
 				decodedPart := decodePart(bytes.NewReader(partData), header)
 				if buffer, err := ioutil.ReadAll(decodedPart); err == nil {
-					buffer, err = DecodeCharset(buffer, params)
+					buffer, err = DecodeCharset(buffer, header.Get("Content-Type"))
 					if err != nil {
 						log.Warnln("Decode charset error:", err)
 						return err
@ -444,14 +444,14 @@ func NewAttachmentsCollector(targetAccepter VisitAcceptor) *AttachmentsCollector
 func (ac *AttachmentsCollector) Accept(partReader io.Reader, header textproto.MIMEHeader, hasPlainSibling bool, isFirst, isLast bool) (err error) {
 	if isFirst {
 		if IsLeaf(header) {
-			mediaType, params, _ := getContentType(header)
+			mediaType, _, _ := getContentType(header)
 			disp, _, _ := mime.ParseMediaType(header.Get("Content-Disposition"))
 			if (mediaType != "text/html" && mediaType != "text/plain") || disp == "attachment" {
 				partData, _ := ioutil.ReadAll(partReader)
 				decodedPart := decodePart(bytes.NewReader(partData), header)

 				if buffer, err := ioutil.ReadAll(decodedPart); err == nil {
-					buffer, err = DecodeCharset(buffer, params)
+					buffer, err = DecodeCharset(buffer, header.Get("Content-Type"))
 					if err != nil {
 						log.Warnln("Decode charset error:", err)
 						return err