feat: fallback to latin1 if charset not specified and not utf8

This commit is contained in:
Michal Horejsek 2020-04-30 09:22:22 +02:00 committed by James Houlahan
parent a7b9572e6b
commit 37f4e46bdc
5 changed files with 135 additions and 6 deletions

View File

@ -9,6 +9,7 @@ Changelog [format](http://keepachangelog.com/en/1.0.0/)
* IMAP mailbox info update when new mailbox is created
* IMAP extension Unselect
* More logs about event loop activity
* GODT-72 Try ISO-8859-1 encoding if charset is not specified and it isn't UTF-8
### Changed
* GODT-162 User Agent does not contain bridge version, only client in format `client name/client version (os)`

View File

@ -30,6 +30,7 @@ import (
"encoding/base64"
"golang.org/x/text/encoding"
"golang.org/x/text/encoding/charmap"
"golang.org/x/text/encoding/htmlindex"
"golang.org/x/text/transform"
)
@ -197,16 +198,20 @@ func EncodeHeader(s string) string {
}
// DecodeCharset decodes the orginal using content type parameters.
// When charset is missing it checks thaht the content is valid utf8.
// When charset is missing it checks that the content is valid utf8.
// If it isn't, it checks whether the content is valid latin1 (iso-8859-1), and if so,
// reencodes it as utf-8.
func DecodeCharset(original []byte, contentTypeParams map[string]string) ([]byte, error) {
var decoder *encoding.Decoder
var err error
if charset, ok := contentTypeParams["charset"]; ok {
decoder, err = selectDecoder(charset)
} else if utf8.Valid(original) {
return original, nil
} else if decoded, err = charmap.ISO8859_1.NewDecoder().Bytes(original); err == nil {
return decoded, nil
} else {
if utf8.Valid(original) {
return original, nil
}
err = fmt.Errorf("non-utf8 content without charset specification")
}

View File

@ -0,0 +1,87 @@
Feature: IMAP import messages
Background:
Given there is connected user "user"
And there is IMAP client logged in as "user"
And there is IMAP client selected in "INBOX"
@ignore
Scenario: Import message with double charset in content type
When IMAP client imports message to "INBOX"
"""
From: Bridge Test <bridgetest@pm.test>
To: Internal Bridge <bridgetest@protonmail.com>
Subject: Message with double charset in content type
Content-Type: text/plain; charset=utf-8; charset=utf-8
Content-Disposition: inline
Hello
"""
Then IMAP response is "OK"
@ignore
Scenario: Import message with attachment name encoded by RFC 2047 without quoting
When IMAP client imports message to "INBOX"
"""
From: Bridge Test <bridgetest@pm.test>
To: Internal Bridge <bridgetest@protonmail.com>
Subject: Message with attachment name encoded by RFC 2047 without quoting
Content-type: multipart/mixed; boundary="boundary"
--boundary
Content-Type: text/plain
Hello
--boundary
Content-Type: application/pdf; name==?US-ASCII?Q?filename?=
Content-Disposition: attachment; filename==?US-ASCII?Q?filename?=
somebytes
--boundary--
"""
Then IMAP response is "OK"
Scenario: Import message as latin1 without content type
When IMAP client imports message to "INBOX" with encoding "latin1"
"""
From: Bridge Test <bridgetest@pm.test>
To: Internal Bridge <bridgetest@protonmail.com>
Subject: Message in latin1 without content type
Content-Disposition: inline
Hello íááá
"""
Then IMAP response is "OK"
Scenario: Import message as latin1 with content type
When IMAP client imports message to "INBOX" with encoding "latin1"
"""
From: Bridge Test <bridgetest@pm.test>
To: Internal Bridge <bridgetest@protonmail.com>
Subject: Message in latin1 with content type
Content-Disposition: inline
Content-Type: text/plain; charset=latin1
Hello íááá
"""
Then IMAP response is "OK"
Scenario: Import message as latin1 with wrong content type
When IMAP client imports message to "INBOX" with encoding "latin1"
"""
From: Bridge Test <bridgetest@pm.test>
To: Internal Bridge <bridgetest@protonmail.com>
Subject: Message in latin1 with wrong content type
Content-Disposition: inline
Content-Type: text/plain; charset=KOI8R
Hello íááá
"""
Then IMAP response is "OK"

View File

@ -18,7 +18,11 @@
package tests
import (
"fmt"
"github.com/cucumber/godog"
"github.com/cucumber/godog/gherkin"
"golang.org/x/net/html/charset"
)
func IMAPActionsMessagesFeatureContext(s *godog.Suite) {
@ -29,6 +33,8 @@ func IMAPActionsMessagesFeatureContext(s *godog.Suite) {
s.Step(`^IMAP client "([^"]*)" deletes messages "([^"]*)"$`, imapClientNamedDeletesMessages)
s.Step(`^IMAP client copies messages "([^"]*)" to "([^"]*)"$`, imapClientCopiesMessagesTo)
s.Step(`^IMAP client moves messages "([^"]*)" to "([^"]*)"$`, imapClientMovesMessagesTo)
s.Step(`^IMAP client imports message to "([^"]*)"$`, imapClientCreatesMessage)
s.Step(`^IMAP client imports message to "([^"]*)" with encoding "([^"]*)"$`, imapClientCreatesMessageWithEncoding)
s.Step(`^IMAP client creates message "([^"]*)" from "([^"]*)" to "([^"]*)" with body "([^"]*)" in "([^"]*)"$`, imapClientCreatesMessageFromToWithBody)
s.Step(`^IMAP client creates message "([^"]*)" from "([^"]*)" to address "([^"]*)" of "([^"]*)" with body "([^"]*)" in "([^"]*)"$`, imapClientCreatesMessageFromToAddressOfUserWithBody)
s.Step(`^IMAP client creates message "([^"]*)" from address "([^"]*)" of "([^"]*)" to "([^"]*)" with body "([^"]*)" in "([^"]*)"$`, imapClientCreatesMessageFromAddressOfUserToWithBody)
@ -84,8 +90,33 @@ func imapClientMovesMessagesTo(messageRange, newMailboxName string) error {
return nil
}
func imapClientCreatesMessage(mailboxName string, message *gherkin.DocString) error {
return imapClientCreatesMessageWithEncoding(mailboxName, "utf8", message)
}
func imapClientCreatesMessageWithEncoding(mailboxName, encodingName string, message *gherkin.DocString) error {
encoding, _ := charset.Lookup(encodingName)
msg := message.Content
if encodingName != "utf8" {
if encoding == nil {
return fmt.Errorf("unsupported encoding %s", encodingName)
}
var err error
msg, err = encoding.NewEncoder().String(message.Content)
if err != nil {
return internalError(err, "encoding message content")
}
}
res := ctx.GetIMAPClient("imap").Append(mailboxName, msg)
ctx.SetIMAPLastResponse("imap", res)
return nil
}
func imapClientCreatesMessageFromToWithBody(subject, from, to, body, mailboxName string) error {
res := ctx.GetIMAPClient("imap").Append(mailboxName, subject, from, to, body)
res := ctx.GetIMAPClient("imap").AppendBody(mailboxName, subject, from, to, body)
ctx.SetIMAPLastResponse("imap", res)
return nil
}

View File

@ -162,7 +162,12 @@ func (c *IMAPClient) Search(query string) *IMAPResponse {
// Message
func (c *IMAPClient) Append(mailboxName, subject, from, to, body string) *IMAPResponse {
func (c *IMAPClient) Append(mailboxName, msg string) *IMAPResponse {
cmd := fmt.Sprintf("APPEND \"%s\" (\\Seen) \"25-Mar-2021 00:30:00 +0100\" {%d}\r\n%s", mailboxName, len(msg), msg)
return c.SendCommand(cmd)
}
func (c *IMAPClient) AppendBody(mailboxName, subject, from, to, body string) *IMAPResponse {
msg := fmt.Sprintf("Subject: %s\r\n", subject)
msg += fmt.Sprintf("From: %s\r\n", from)
msg += fmt.Sprintf("To: %s\r\n", to)