209 lines
4.8 KiB
Go
209 lines
4.8 KiB
Go
// Copyright (c) 2023 Proton AG
|
|
//
|
|
// This file is part of Proton Mail Bridge.
|
|
//
|
|
// Proton Mail Bridge is free software: you can redistribute it and/or modify
|
|
// it under the terms of the GNU General Public License as published by
|
|
// the Free Software Foundation, either version 3 of the License, or
|
|
// (at your option) any later version.
|
|
//
|
|
// Proton Mail Bridge is distributed in the hope that it will be useful,
|
|
// but WITHOUT ANY WARRANTY; without even the implied warranty of
|
|
// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
|
// GNU General Public License for more details.
|
|
//
|
|
// You should have received a copy of the GNU General Public License
|
|
// along with Proton Mail Bridge. If not, see <https://www.gnu.org/licenses/>.
|
|
|
|
package parser
|
|
|
|
import (
|
|
"bytes"
|
|
"errors"
|
|
"mime"
|
|
"unicode/utf8"
|
|
|
|
pmmime "github.com/ProtonMail/proton-bridge/v3/pkg/mime"
|
|
"github.com/PuerkitoBio/goquery"
|
|
"github.com/emersion/go-message"
|
|
"github.com/sirupsen/logrus"
|
|
"golang.org/x/net/html"
|
|
"golang.org/x/net/html/charset"
|
|
"golang.org/x/text/encoding"
|
|
)
|
|
|
|
type Parts []*Part
|
|
|
|
type Part struct {
|
|
Header message.Header
|
|
Body []byte
|
|
children Parts
|
|
}
|
|
|
|
func (p *Part) ContentType() (string, map[string]string, error) {
|
|
t, params, err := p.Header.ContentType()
|
|
if err != nil {
|
|
// go-message's implementation of ContentType() doesn't handle duplicate parameters
|
|
// e.g. Content-Type: text/plain; charset=utf-8; charset=UTF-8
|
|
// so if it fails, we try again with pmmime's implementation, which does.
|
|
t, params, err = pmmime.ParseMediaType(p.Header.Get("Content-Type"))
|
|
}
|
|
|
|
return t, params, err
|
|
}
|
|
|
|
func (p *Part) Child(n int) (part *Part, err error) {
|
|
if len(p.children) < n {
|
|
return nil, errors.New("no such part")
|
|
}
|
|
|
|
return p.children[n-1], nil
|
|
}
|
|
|
|
func (p *Part) Children() Parts {
|
|
return p.children
|
|
}
|
|
|
|
func (p *Part) AddChild(child *Part) {
|
|
if p.isMultipartMixed() {
|
|
p.children = append(p.children, child)
|
|
} else {
|
|
root := &Part{
|
|
Header: getContentHeaders(p.Header),
|
|
Body: p.Body,
|
|
children: p.children,
|
|
}
|
|
|
|
p.Body = nil
|
|
p.children = Parts{root, child}
|
|
stripContentHeaders(&p.Header)
|
|
p.Header.Set("Content-Type", "multipart/mixed")
|
|
}
|
|
}
|
|
|
|
func (p *Part) ConvertToUTF8() error {
|
|
logrus.Trace("Converting part to utf-8")
|
|
|
|
t, params, err := p.ContentType()
|
|
if err != nil {
|
|
return err
|
|
}
|
|
|
|
decoder := selectSuitableDecoder(p, t, params)
|
|
|
|
if p.Body, err = decoder.Bytes(p.Body); err != nil {
|
|
return err
|
|
}
|
|
|
|
if params == nil {
|
|
params = make(map[string]string)
|
|
}
|
|
|
|
params["charset"] = "UTF-8"
|
|
|
|
p.Header.SetContentType(t, params)
|
|
|
|
return nil
|
|
}
|
|
|
|
func (p *Part) ConvertMetaCharset() error {
|
|
doc, err := html.Parse(bytes.NewReader(p.Body))
|
|
if err != nil {
|
|
return err
|
|
}
|
|
|
|
goquery.NewDocumentFromNode(doc).Find("meta").Each(func(n int, sel *goquery.Selection) {
|
|
if val, ok := sel.Attr("content"); ok {
|
|
t, params, err := pmmime.ParseMediaType(val)
|
|
if err != nil {
|
|
return
|
|
}
|
|
|
|
params["charset"] = "UTF-8"
|
|
|
|
sel.SetAttr("content", mime.FormatMediaType(t, params))
|
|
}
|
|
|
|
if _, ok := sel.Attr("charset"); ok {
|
|
sel.SetAttr("charset", "UTF-8")
|
|
}
|
|
})
|
|
|
|
buf := new(bytes.Buffer)
|
|
|
|
if err := html.Render(buf, doc); err != nil {
|
|
return err
|
|
}
|
|
|
|
p.Body = buf.Bytes()
|
|
|
|
return nil
|
|
}
|
|
|
|
func selectSuitableDecoder(p *Part, t string, params map[string]string) *encoding.Decoder {
|
|
if charset, ok := params["charset"]; ok {
|
|
logrus.WithField("charset", charset).Trace("The part has a specified charset")
|
|
|
|
if decoder, err := pmmime.SelectDecoder(charset); err == nil {
|
|
logrus.Trace("The charset is known; decoder has been selected")
|
|
return decoder
|
|
}
|
|
|
|
logrus.Warn("The charset is unknown; no decoder could be selected")
|
|
}
|
|
|
|
if utf8.Valid(p.Body) {
|
|
logrus.Trace("The part is already valid utf-8, returning noop encoder")
|
|
return encoding.Nop.NewDecoder()
|
|
}
|
|
|
|
encoding, name, _ := charset.DetermineEncoding(p.Body, t)
|
|
|
|
logrus.WithField("name", name).Warn("Determined encoding by reading body")
|
|
|
|
return encoding.NewDecoder()
|
|
}
|
|
|
|
func (p *Part) is7BitClean() bool {
|
|
for _, b := range p.Body {
|
|
if b > 1<<7 {
|
|
return false
|
|
}
|
|
}
|
|
|
|
return true
|
|
}
|
|
|
|
func (p *Part) isMultipartMixed() bool {
|
|
t, _, err := p.ContentType()
|
|
if err != nil {
|
|
return false
|
|
}
|
|
|
|
return t == "multipart/mixed"
|
|
}
|
|
|
|
func getContentHeaders(header message.Header) message.Header {
|
|
var res message.Header
|
|
|
|
if contentType := header.Get("Content-Type"); contentType != "" {
|
|
res.Set("Content-Type", contentType)
|
|
}
|
|
|
|
if contentDisposition := header.Get("Content-Disposition"); contentDisposition != "" {
|
|
res.Set("Content-Disposition", contentDisposition)
|
|
}
|
|
|
|
if contentTransferEncoding := header.Get("Content-Transfer-Encoding"); contentTransferEncoding != "" {
|
|
res.Set("Content-Transfer-Encoding", contentTransferEncoding)
|
|
}
|
|
|
|
return res
|
|
}
|
|
|
|
func stripContentHeaders(header *message.Header) {
|
|
header.Del("Content-Type")
|
|
header.Del("Content-Disposition")
|
|
header.Del("Content-Transfer-Encoding")
|
|
}
|