2023-01-02 10:02:26 +00:00
// Copyright (c) 2023 Proton AG
2020-04-08 10:59:16 +00:00
//
2022-04-05 13:50:21 +00:00
// This file is part of Proton Mail Bridge.
2020-04-08 10:59:16 +00:00
//
2022-04-05 13:50:21 +00:00
// Proton Mail Bridge is free software: you can redistribute it and/or modify
2020-04-08 10:59:16 +00:00
// it under the terms of the GNU General Public License as published by
// the Free Software Foundation, either version 3 of the License, or
// (at your option) any later version.
//
2022-04-05 13:50:21 +00:00
// Proton Mail Bridge is distributed in the hope that it will be useful,
2020-04-08 10:59:16 +00:00
// but WITHOUT ANY WARRANTY; without even the implied warranty of
// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
// GNU General Public License for more details.
//
// You should have received a copy of the GNU General Public License
2022-04-05 13:50:21 +00:00
// along with Proton Mail Bridge. If not, see <https://www.gnu.org/licenses/>.
2020-04-08 10:59:16 +00:00
package pmmime
import (
"bytes"
"strings"
"testing"
"golang.org/x/text/encoding/htmlindex"
a "github.com/stretchr/testify/assert"
)
func TestDecodeHeader ( t * testing . T ) {
testData := [ ] struct { raw , expected string } {
{
"" ,
"" ,
} ,
{
"=?iso-2022-jp?Q?=1B$B!Z=1B(BTimes_Car_PLUS=1B$B![JV5Q>Z=1B(B?=" ,
"【Times Car PLUS】返却証" ,
} ,
{
` =?iso-2022-jp?Q?iTunes_Movie_=1B$B%K%e!<%j%j!<%9$HCmL\:nIJ=1B(B?= ` ,
"iTunes Movie ニューリリースと注目作品" ,
} ,
{
"=?UTF-8?B?w4TDi8OPw5bDnA==?= =?UTF-8?B?IMOkw6vDr8O2w7w=?=" ,
"ÄËÏÖÜ äëïöü" ,
} ,
{
"=?ISO-8859-2?B?xMtJ1tw=?= =?ISO-8859-2?B?IOTrafb8?=" ,
"ÄËIÖÜ äëiöü" ,
} ,
{
"=?uknown?B?xMtJ1tw=?= =?ISO-8859-2?B?IOTrafb8?=" ,
"=?uknown?B?xMtJ1tw=?= =?ISO-8859-2?B?IOTrafb8?=" ,
} ,
}
for _ , val := range testData {
if decoded , err := DecodeHeader ( val . raw ) ; strings . Compare ( val . expected , decoded ) != 0 {
t . Errorf ( "Incorrect decoding of header %q expected %q but have %q; Error %v" , val . raw , val . expected , decoded , err )
}
}
}
type testParseMediaTypeData struct {
arg , wantMediaType string
wantParams map [ string ] string
}
func ( d * testParseMediaTypeData ) run ( t * testing . T ) {
gotMediaType , params , err := ParseMediaType ( d . arg )
a . Nil ( t , err )
a . Equal ( t , d . wantMediaType , gotMediaType )
a . Equal ( t , d . wantParams , params )
}
func TestParseMediaType ( t * testing . T ) {
testTable := map [ string ] testParseMediaTypeData {
"TwiceTheSameParameter" : {
arg : "attachment; filename=joy.txt; filename=JOY.TXT; title=hi;" ,
wantMediaType : "attachment" ,
wantParams : map [ string ] string { "filename" : "JOY.TXT" , "title" : "hi" } ,
} ,
"SingleLineUTF8" : {
arg : "attachment;\nfilename*=utf-8''%F0%9F%98%81%F0%9F%98%82.txt;\n title=smile" ,
wantMediaType : "attachment" ,
wantParams : map [ string ] string { "filename" : "😁😂.txt" , "title" : "smile" } ,
} ,
"MultiLineUTF8" : {
arg : "attachment;\nfilename*0*=utf-8''%F0%9F%98%81; title=smile;\nfilename*1*=%F0%9F%98%82;\nfilename*2=.txt" ,
wantMediaType : "attachment" ,
wantParams : map [ string ] string { "filename" : "😁😂.txt" , "title" : "smile" } ,
} ,
"MultiLineFirstNoEncNextUTF8" : {
arg : "attachment;\nfilename*0*=utf-8''joy ;\n title*=utf-8''smile; \nfilename*1*=%F0%9F%98%82;\nfilename*2=.txt" ,
wantMediaType : "attachment" ,
wantParams : map [ string ] string { "filename" : "joy😂.txt" , "title" : "smile" } ,
} ,
"SingleLineBig5" : {
arg : "attachment;\nfilename*=big5''%B3%C6%A7%D1%BF%FD.m4a; title*=utf8''memorandum" ,
wantMediaType : "attachment" ,
wantParams : map [ string ] string { "filename" : "備忘錄.m4a" , "title" : "memorandum" } ,
} ,
"MultiLineBig5" : {
arg : "attachment;\nfilename*0*=big5''%B3%C6a; title*0=utf8''memorandum; filename*2=%BF%FD.m4a; \nfilename*1*=%A7%D1b;" ,
wantMediaType : "attachment" ,
wantParams : map [ string ] string { "filename" : "備a忘b錄.m4a" , "title" : "memorandum" } ,
} ,
2020-06-30 14:33:29 +00:00
"SingleLineBadEncoding" : {
2020-06-29 10:51:39 +00:00
arg : "attachment;\nfilename*=utf-8'%F0%9F%98%81%F0%9F%98%82.txt;\n title=smile" ,
wantMediaType : "attachment" ,
wantParams : map [ string ] string { "title" : "smile" } ,
} ,
2020-06-30 14:33:29 +00:00
"MultiLineBadEncoding" : {
arg : "attachment;\nfilename*0*=utf-8'%F0%9F%98%81; title=smile;\nfilename*1*=%F0%9F%98%82;\nfilename*2=.txt" ,
wantMediaType : "attachment" ,
wantParams : map [ string ] string { "filename" : "😂.txt" , "title" : "smile" } ,
} ,
2020-04-08 10:59:16 +00:00
}
for name , testData := range testTable {
t . Run ( name , testData . run )
}
}
func TestGetEncoding ( t * testing . T ) {
// All MIME charsets with aliases can be found here:
// https://www.iana.org/assignments/character-sets/character-sets.xhtml
mimesets := map [ string ] [ ] string {
2022-05-31 13:54:04 +00:00
"utf-8" : { // MIB 16
2020-04-08 10:59:16 +00:00
"utf8" ,
"csutf8" ,
"unicode-1-1-utf-8" ,
"iso-utf-8" ,
"utf8mb4" ,
} ,
2022-05-31 13:54:04 +00:00
"gbk" : {
2020-04-08 10:59:16 +00:00
"gb2312" , // MIB 2025
//"euc-cn": []string{
"euccn" ,
"ibm-euccn" ,
} ,
//"utf7": []string{"utf-7", "unicode-1-1-utf-7"},
2022-05-31 13:54:04 +00:00
"iso-8859-2" : { // MIB 5
2020-04-08 10:59:16 +00:00
"iso-ir-101" ,
"iso_8859-2" ,
"iso8859-2" ,
"latin2" ,
"l2" ,
"csisolatin2" ,
"ibm852" ,
//"FAILEDibm852",
} ,
2022-05-31 13:54:04 +00:00
"iso-8859-3" : { // MIB 6
2020-04-08 10:59:16 +00:00
"iso-ir-109" ,
"iso_8859-3" ,
"latin3" ,
"l3" ,
"csisolatin3" ,
} ,
2022-05-31 13:54:04 +00:00
"iso-8859-4" : { // MIB 7
2020-04-08 10:59:16 +00:00
"iso-ir-110" ,
"iso_8859-4" ,
"latin4" ,
"l4" ,
"csisolatin4" ,
} ,
2022-05-31 13:54:04 +00:00
"iso-8859-5" : { // MIB 8
2020-04-08 10:59:16 +00:00
"iso-ir-144" ,
"iso_8859-5" ,
"cyrillic" ,
"csisolatincyrillic" ,
} ,
2022-05-31 13:54:04 +00:00
"iso-8859-6" : { // MIB 9
2020-04-08 10:59:16 +00:00
"iso-ir-127" ,
"iso_8859-6" ,
"ecma-114" ,
"asmo-708" ,
"arabic" ,
"csisolatinarabic" ,
//"iso-8859-6e": []string{ // MIB 81 just direction
"csiso88596e" ,
"iso-8859-6-e" ,
//"iso-8859-6i": []string{ // MIB 82
"csiso88596i" ,
2022-05-31 13:54:04 +00:00
"iso-8859-6-i" ,
} ,
"iso-8859-7" : { // MIB 10
2020-04-08 10:59:16 +00:00
"iso-ir-126" ,
"iso_8859-7" ,
"elot_928" ,
"ecma-118" ,
"greek" ,
"greek8" ,
2022-05-31 13:54:04 +00:00
"csisolatingreek" ,
} ,
"iso-8859-8" : { // MIB 11
2020-04-08 10:59:16 +00:00
"iso-ir-138" ,
"iso_8859-8" ,
"hebrew" ,
"csisolatinhebrew" ,
//"iso-8859-8e": []string{ // MIB 84 (directionality
"csiso88598e" ,
"iso-8859-8-e" ,
} ,
2022-05-31 13:54:04 +00:00
"iso-8859-8-i" : { // MIB 85
2020-04-08 10:59:16 +00:00
"logical" ,
"csiso88598i" ,
"iso-8859-8-i" , // Hebrew, the "i" means right-to-left, probably unnecessary with ISO cleaning above.
} ,
2022-05-31 13:54:04 +00:00
"iso-8859-10" : { // MIB 13
2020-04-08 10:59:16 +00:00
"iso-ir-157" ,
"l6" ,
"iso_8859-10:1992" ,
"csisolatin6" ,
2022-05-31 13:54:04 +00:00
"latin6" ,
} ,
"iso-8859-13" : { // MIB 109
2020-04-08 10:59:16 +00:00
"csiso885913" } ,
2022-05-31 13:54:04 +00:00
"iso-8859-14" : { // MIB 110
2020-04-08 10:59:16 +00:00
"iso-ir-199" ,
"iso_8859-14:1998" ,
"iso_8859-14" ,
"latin8" ,
"iso-celtic" ,
"l8" ,
2022-05-31 13:54:04 +00:00
"csiso885914" ,
} ,
"iso-8859-15" : { // MIB 111
2020-04-08 10:59:16 +00:00
"iso_8859-15" ,
"latin-9" ,
"csiso885915" ,
2022-05-31 13:54:04 +00:00
"ISO8859-15" ,
} ,
"iso-8859-16" : { // MIB 112
2020-04-08 10:59:16 +00:00
"iso-ir-226" ,
"iso_8859-16:2001" ,
"iso_8859-16" ,
"latin10" ,
"l10" ,
"csiso885916" ,
} ,
2022-05-31 13:54:04 +00:00
"windows-874" : { // MIB 2109
2020-04-08 10:59:16 +00:00
"cswindows874" ,
"cp874" ,
"iso-8859-11" ,
"tis-620" ,
} ,
2022-05-31 13:54:04 +00:00
"windows-1250" : { // MIB 2250
2020-04-08 10:59:16 +00:00
"cswindows1250" ,
"cp1250" ,
} ,
2022-05-31 13:54:04 +00:00
"windows-1251" : { // MIB 2251
2020-04-08 10:59:16 +00:00
"cswindows1251" ,
"cp1251" ,
} ,
2022-05-31 13:54:04 +00:00
"windows-1252" : { // MIB 2252
2020-04-08 10:59:16 +00:00
"cswindows1252" ,
"cp1252" ,
"3dwindows-1252" ,
"we8mswin1252" ,
"us-ascii" , // MIB 3
"ansi_x3.110-1983" , // MIB 74 // usascii
//"iso-8859-1": []string{ // MIB 4 succeed by win1252
"iso8859-1" ,
"iso-ir-100" ,
"iso_8859-1" ,
"latin1" ,
"l1" ,
"ibm819" ,
"cp819" ,
"csisolatin1" ,
"ansi_x3.4-1968" ,
"ansi_x3.4-1986" ,
"cp850" ,
"cp858" , // "cp850" Mostly correct except for the Euro sign.
"iso_646.irv:1991" ,
"iso646-us" ,
"us" ,
"ibm367" ,
"cp367" ,
"csascii" ,
"ascii" ,
"iso-ir-6" ,
"we8iso8859p1" ,
} ,
2022-05-31 13:54:04 +00:00
"windows-1253" : { "cswindows1253" , "cp1253" } , // MIB 2253
"windows-1254" : { "cswindows1254" , "cp1254" } , // MIB 2254
"windows-1255" : { "cSwindows1255" , "cp1255" } , // MIB 2255
"windows-1256" : { "cswIndows1256" , "cp1256" } , // MIB 2256
"windows-1257" : { "cswinDows1257" , "cp1257" } , // MIB 2257
"windows-1258" : { "cswindoWs1258" , "cp1258" } , // MIB 2257
"koi8-r" : { "cskoi8r" , "koi8r" } , // MIB 2084
"koi8-u" : { "cskoi8u" , "koi8u" } , // MIB 2088
"macintosh" : { "mac" , "macroman" , "csmacintosh" } , // MIB 2027
"big5" : {
2020-04-08 10:59:16 +00:00
"zht16mswin950" , // cp950
"cp950" ,
} ,
2022-05-31 13:54:04 +00:00
"euc-kr" : {
2020-04-08 10:59:16 +00:00
"euckr" , // MIB 38
"ibm-euckr" ,
//"uhc": []string{ // Korea
"ks_c_5601-1987" ,
"ksc5601" ,
"cp949" ,
} ,
2022-05-31 13:54:04 +00:00
"euc-jp" : {
2020-04-08 10:59:16 +00:00
"eucjp" ,
"ibm-eucjp" ,
} ,
2022-05-31 13:54:04 +00:00
"shift_jis" : {
2020-04-08 10:59:16 +00:00
"CP932" ,
"MS932" ,
"Windows-932" ,
"Windows-31J" ,
"MS_Kanji" ,
"IBM-943" ,
"CP943" ,
} ,
2022-05-31 13:54:04 +00:00
"iso-2022-jp" : { // MIB 39
2020-04-08 10:59:16 +00:00
"iso2022jp" ,
"csiso2022jp" ,
} ,
}
for expected , names := range mimesets {
expenc , _ := htmlindex . Get ( expected )
if canonical , err := htmlindex . Name ( expenc ) ; canonical != expected || err != nil {
t . Fatalf ( "Error while get canonical name. Expected '%v' but have %v `%#v`: %v" , expected , canonical , expenc , err )
}
for _ , name := range names {
enc , err := getEncoding ( name )
if err != nil || enc == nil {
t . Errorf ( "Error while getting encoding for %v returned: '%#v' and error: '%v'" , name , enc , err )
}
if expenc != enc {
t . Errorf ( "For %v expected %v '%v' but have '%v'" , name , expected , expenc , enc )
}
}
}
}
// sample text for UTF8 http://www.columbia.edu/~fdc/utf8/index.html
func TestEncodeReader ( t * testing . T ) {
// define test data
testData := [ ] struct {
2020-05-28 10:36:42 +00:00
charset string
2020-04-08 10:59:16 +00:00
original [ ] byte
message string
} {
// russian
{
2020-05-28 10:36:42 +00:00
"koi8-r" ,
2020-04-08 10:59:16 +00:00
// а , з, б , у , к, а , а , б , в, г , д, е , ё
[ ] byte { 0xC1 , 0xDA , 0xC2 , 0xD5 , 0xCB , 0xC1 , 0xC1 , 0xC2 , 0xD7 , 0xC7 , 0xC4 , 0xC5 , 0xA3 } ,
"азбукаабвгдеё" ,
} ,
{
2020-05-28 10:36:42 +00:00
"KOI8-R" ,
2020-04-08 10:59:16 +00:00
[ ] byte { 0xC1 , 0xDA , 0xC2 , 0xD5 , 0xCB , 0xC1 , 0xC1 , 0xC2 , 0xD7 , 0xC7 , 0xC4 , 0xC5 , 0xA3 } ,
"азбукаабвгдеё" ,
} ,
{
2020-05-28 10:36:42 +00:00
"csKOI8R" ,
2020-04-08 10:59:16 +00:00
[ ] byte { 0xC1 , 0xDA , 0xC2 , 0xD5 , 0xCB , 0xC1 , 0xC1 , 0xC2 , 0xD7 , 0xC7 , 0xC4 , 0xC5 , 0xA3 } ,
"азбукаабвгдеё" ,
} ,
{
2020-05-28 10:36:42 +00:00
"koi8-u" ,
2020-04-08 10:59:16 +00:00
[ ] byte { 0xC1 , 0xDA , 0xC2 , 0xD5 , 0xCB , 0xC1 , 0xC1 , 0xC2 , 0xD7 , 0xC7 , 0xC4 , 0xC5 , 0xA3 } ,
"азбукаабвгдеё" ,
} ,
{
2020-05-28 10:36:42 +00:00
"iso-8859-5" ,
2020-04-08 10:59:16 +00:00
// а , з , б , у , к , а , а , б , в , г , д , е , ё
[ ] byte { 0xD0 , 0xD7 , 0xD1 , 0xE3 , 0xDA , 0xD0 , 0xD0 , 0xD1 , 0xD2 , 0xD3 , 0xD4 , 0xD5 , 0xF1 } ,
"азбукаабвгдеё" ,
} ,
{
2020-05-28 10:36:42 +00:00
"csWrong" ,
2020-04-08 10:59:16 +00:00
[ ] byte { 0xD0 , 0xD7 , 0xD1 , 0xE3 , 0xDA , 0xD0 , 0xD0 , 0xD1 , 0xD2 , 0xD3 , 0xD4 , 0xD5 , 0xD6 } ,
"" ,
} ,
{
2020-05-28 10:36:42 +00:00
"utf8" ,
2020-04-08 10:59:16 +00:00
[ ] byte { 0xD0 , 0xB0 , 0xD0 , 0xB7 , 0xD0 , 0xB1 , 0xD1 , 0x83 , 0xD0 , 0xBA , 0xD0 , 0xB0 , 0xD0 , 0xB0 , 0xD0 , 0xB1 , 0xD0 , 0xB2 , 0xD0 , 0xB3 , 0xD0 , 0xB4 , 0xD0 , 0xB5 , 0xD1 , 0x91 } ,
"азбукаабвгдеё" ,
} ,
// czechoslovakia
{
2020-05-28 10:36:42 +00:00
"windows-1250" ,
2020-04-08 10:59:16 +00:00
[ ] byte { 225 , 228 , 232 , 233 , 236 , 244 } ,
"áäčéěô" ,
} ,
// umlauts
{
2020-05-28 10:36:42 +00:00
"iso-8859-1" ,
2020-04-08 10:59:16 +00:00
[ ] byte { 196 , 203 , 214 , 220 , 228 , 235 , 246 , 252 } ,
"ÄËÖÜäëöü" ,
} ,
// latvia
{
2020-05-28 10:36:42 +00:00
"iso-8859-4" ,
2020-04-08 10:59:16 +00:00
[ ] byte { 224 , 239 , 243 , 182 , 254 } ,
"āīķļū" ,
} ,
{ // encoded by https://www.motobit.com/util/charset-codepage-conversion.asp
2020-05-28 10:36:42 +00:00
"utf7" ,
2020-04-08 10:59:16 +00:00
[ ] byte ( "He wes Leovena+APA-es sone -- li+APA-e him be Drihten.+A6QDtw- +A7MDuwPOA8MDwwOx- +A7wDvwPF- +A60DtAPJA8MDsQO9- +A7UDuwO7A7cDvQO5A7oDrg-. +BCcENQRABD0ENQQ7BDg- +BDgENwQxBEs- +BDcENAQ1BEEETA- +BDg- +BEIEMAQ8-,+BCcENQRABD0ENQQ7BDg- +BDgENwQxBEs- +BDcENAQ1BEEETA- +BDg- +BEIEMAQ8-,+C68LvguuC7ELvwuoC80LpA- +C64Lygu0C78LlQuzC78LsgvH- +C6QLrgu/C7QLzQuuC8oLtAu/- +C6oLywuyC80- +C4cLqQu/C6QLvgu1C6QLwQ- +C44LmQvNC5ULwQuuC80- +C5ULvgujC8sLrgvN-." ) ,
"He wes Leovenaðes sone -- liðe him be Drihten.Τη γλώσσα μου έδωσαν ελληνική. Чернели избы здесь и там,Чернели избы здесь и там,யாமறிந்த மொழிகளிலே தமிழ்மொழி போல் இனிதாவது எங்கும் காணோம்." ,
} ,
// iconv -f UTF8 -t GB2312 utf8.txt | hexdump -v -e '"0x" 1/1 "%x, "'
{ // encoded by iconv; dump by `cat gb2312.txt | hexdump -v -e '"0x" 1/1 "%x "'` and reformat; text from https://zh.wikipedia.org/wiki/GB_2312
2020-05-28 10:36:42 +00:00
"GB2312" ,
2020-04-08 10:59:16 +00:00
[ ] byte { 0x47 , 0x42 , 0x20 , 0x32 , 0x33 , 0x31 , 0x32 , 0xb5 , 0xc4 , 0xb3 , 0xf6 , 0xcf , 0xd6 , 0xa3 , 0xac , 0xbb , 0xf9 , 0xb1 , 0xbe , 0xc2 , 0xfa , 0xd7 , 0xe3 , 0xc1 , 0xcb , 0xba , 0xba , 0xd7 , 0xd6 , 0xb5 , 0xc4 , 0xbc , 0xc6 , 0xcb , 0xe3 , 0xbb , 0xfa , 0xb4 , 0xa6 , 0xc0 , 0xed , 0xd0 , 0xe8 , 0xd2 , 0xaa , 0xa3 , 0xac , 0xcb , 0xfc , 0xcb , 0xf9 , 0xca , 0xd5 , 0xc2 , 0xbc , 0xb5 , 0xc4 , 0xba , 0xba , 0xd7 , 0xd6 , 0xd2 , 0xd1 , 0xbe , 0xad , 0xb8 , 0xb2 , 0xb8 , 0xc7 , 0xd6 , 0xd0 , 0xb9 , 0xfa , 0xb4 , 0xf3 , 0xc2 , 0xbd , 0x39 , 0x39 , 0x2e , 0x37 , 0x35 , 0x25 , 0xb5 , 0xc4 , 0xca , 0xb9 , 0xd3 , 0xc3 , 0xc6 , 0xb5 , 0xc2 , 0xca , 0xa1 , 0xa3 , 0xb5 , 0xab , 0xb6 , 0xd4 , 0xd3 , 0xda , 0xc8 , 0xcb , 0xc3 , 0xfb } ,
"GB 2312的出现, 基本满足了汉字的计算机处理需要, 它所收录的汉字已经覆盖中国大陆99.75%的使用频率。但对于人名" ,
} ,
{ // encoded by iconv; text from https://jp.wikipedia.org/wiki/Shift_JIS
2020-05-28 10:36:42 +00:00
"shift-jis" ,
2020-04-08 10:59:16 +00:00
[ ] byte { 0x95 , 0xb6 , 0x8e , 0x9a , 0x95 , 0x84 , 0x8d , 0x86 , 0x89 , 0xbb , 0x95 , 0xfb , 0x8e , 0xae , 0x53 , 0x68 , 0x69 , 0x66 , 0x74 , 0x5f , 0x4a , 0x49 , 0x53 , 0x82 , 0xcc , 0x90 , 0xdd , 0x8c , 0x76 , 0x8e , 0xd2 , 0x82 , 0xe7 , 0x82 , 0xcd , 0x81 , 0x41 , 0x90 , 0xe6 , 0x8d , 0x73 , 0x82 , 0xb5 , 0x82 , 0xc4 , 0x82 , 0xe6 , 0x82 , 0xad , 0x97 , 0x98 , 0x97 , 0x70 , 0x82 , 0xb3 , 0x82 , 0xea , 0x82 , 0xc4 , 0x82 , 0xa2 , 0x82 , 0xbd , 0x4a , 0x49 , 0x53 , 0x20 , 0x43 , 0x20 , 0x36 , 0x32 , 0x32 , 0x30 , 0x81 , 0x69 , 0x8c , 0xbb , 0x8d , 0xdd , 0x82 , 0xcc , 0x4a , 0x49 , 0x53 , 0x20 , 0x58 , 0x20 , 0x30 , 0x32 , 0x30 , 0x31 , 0x81 , 0x6a , 0x82 , 0xcc , 0x38 , 0x83 , 0x72 , 0x83 , 0x62 , 0x83 , 0x67 , 0x95 , 0x84 , 0x8d , 0x86 , 0x81 , 0x69 , 0x88 , 0xc8 , 0x89 , 0xba , 0x81 , 0x75 , 0x89 , 0x70 , 0x90 , 0x94 , 0x8e , 0x9a , 0x81 , 0x45 , 0x94 , 0xbc , 0x8a , 0x70 , 0x83 , 0x4a , 0x83 , 0x69 , 0x81 , 0x76 , 0x81 , 0x6a , 0x82 , 0xc6 , 0x81 , 0x41 , 0x4a , 0x49 , 0x53 , 0x20 , 0x43 , 0x20 , 0x36 , 0x32 , 0x32 , 0x36 , 0x81 , 0x69 , 0x8c , 0xbb , 0x8d , 0xdd , 0x82 , 0xcc , 0x4a , 0x49 , 0x53 , 0x20 , 0x58 , 0x20 , 0x30 , 0x32 , 0x30 , 0x38 , 0x81 , 0x41 , 0x88 , 0xc8 , 0x89 , 0xba , 0x81 , 0x75 , 0x8a , 0xbf , 0x8e , 0x9a , 0x81 , 0x76 , 0x81 , 0x6a , 0x82 , 0xcc , 0x97 , 0xbc , 0x95 , 0xb6 , 0x8e , 0x9a , 0x8f , 0x57 , 0x8d , 0x87 , 0x82 , 0xf0 , 0x95 , 0x5c , 0x8c , 0xbb , 0x82 , 0xb5 , 0x82 , 0xe6 , 0x82 , 0xa4 , 0x82 , 0xc6 , 0x82 , 0xb5 , 0x82 , 0xbd , 0x81 , 0x42 , 0x82 , 0xdc , 0x82 , 0xbd , 0x81 , 0x41 , 0x83 , 0x74 , 0x83 , 0x40 , 0x83 , 0x43 , 0x83 , 0x8b , 0x82 , 0xcc , 0x91 , 0xe5 , 0x82 , 0xab , 0x82 , 0xb3 , 0x82 , 0xe2 , 0x8f , 0x88 , 0x97 , 0x9d , 0x8e , 0x9e , 0x8a , 0xd4 , 0x82 , 0xcc , 0x92 , 0x5a , 0x8f , 0x6b , 0x82 , 0xf0 , 0x90 , 0x7d , 0x82 , 0xe9 , 0x82 , 0xbd , 0x82 , 0xdf , 0x81 , 0x41 , 0x83 , 0x47 , 0x83 , 0x58 , 0x83 , 0x50 , 0x81 , 0x5b , 0x83 , 0x76 , 0x83 , 0x56 , 0x81 , 0x5b , 0x83 , 0x50 , 0x83 , 0x93 , 0x83 , 0x58 , 0x82 , 0xc8 , 0x82 , 0xb5 , 0x82 , 0xc5 , 0x8d , 0xac , 0x8d , 0xdd , 0x89 , 0xc2 , 0x94 , 0x5c , 0x82 , 0xc9 , 0x82 , 0xb7 , 0x82 , 0xe9 , 0x82 , 0xb1 , 0x82 , 0xc6 , 0x82 , 0xf0 , 0x8a , 0xe9 , 0x90 , 0x7d , 0x82 , 0xb5 , 0x82 , 0xbd , 0x81 , 0x42 } ,
"文字符号化方式Shift_JISの設計者らは、先行してよく利用されていたJIS C 6220( 現在のJIS X 0201) の8ビット符号( 以下「英数字・半角カナ」) と、JIS C 6226( 現在のJIS X 0208、以下「漢字」) の両文字集合を表現しようとした。また、ファイルの大きさや処理時間の短縮を図るため、エスケープシーケンスなしで混在可能にすることを企図した。" ,
} ,
// add more from mutations of https://en.wikipedia.org/wiki/World_Wide_Web
}
// run tests
for _ , val := range testData {
2022-05-31 13:54:04 +00:00
// fmt.Println("Testing ", val)
2020-04-08 10:59:16 +00:00
expected := [ ] byte ( val . message )
2020-05-28 10:36:42 +00:00
decoded , err := DecodeCharset ( val . original , "text/plain; charset=" + val . charset )
2020-04-08 10:59:16 +00:00
if len ( expected ) == 0 {
if err == nil {
t . Error ( "Expected err but have " , err )
} else {
2022-05-31 13:54:04 +00:00
// fmt.Println("Expected err: ", err)
2020-04-08 10:59:16 +00:00
continue
}
} else {
if err != nil {
t . Error ( "Expected ok but have " , err )
}
}
if bytes . Equal ( decoded , expected ) {
2023-07-07 12:41:10 +00:00
// fmt.Println("Successful decoding of ", val.params, ":", string(decoded))
2020-04-08 10:59:16 +00:00
} else {
2020-05-28 10:36:42 +00:00
t . Error ( "Wrong encoding of " , val . charset , ".Expected\n" , expected , "\nbut have\n" , decoded )
2020-04-08 10:59:16 +00:00
}
if strings . Compare ( val . message , string ( decoded ) ) != 0 {
2020-05-28 10:36:42 +00:00
t . Error ( "Wrong message for " , val . charset , ".Expected\n" , val . message , "\nbut have\n" , string ( decoded ) )
2020-04-08 10:59:16 +00:00
}
}
}