2024-01-02 13:32:11 +00:00
|
|
|
// Copyright (c) 2024 Proton AG
|
2020-04-08 10:59:16 +00:00
|
|
|
//
|
2022-04-05 13:50:21 +00:00
|
|
|
// This file is part of Proton Mail Bridge.
|
2020-04-08 10:59:16 +00:00
|
|
|
//
|
2022-04-05 13:50:21 +00:00
|
|
|
// Proton Mail Bridge is free software: you can redistribute it and/or modify
|
2020-04-08 10:59:16 +00:00
|
|
|
// it under the terms of the GNU General Public License as published by
|
|
|
|
// the Free Software Foundation, either version 3 of the License, or
|
|
|
|
// (at your option) any later version.
|
|
|
|
//
|
2022-04-05 13:50:21 +00:00
|
|
|
// Proton Mail Bridge is distributed in the hope that it will be useful,
|
2020-04-08 10:59:16 +00:00
|
|
|
// but WITHOUT ANY WARRANTY; without even the implied warranty of
|
|
|
|
// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
|
|
|
// GNU General Public License for more details.
|
|
|
|
//
|
|
|
|
// You should have received a copy of the GNU General Public License
|
2022-04-05 13:50:21 +00:00
|
|
|
// along with Proton Mail Bridge. If not, see <https://www.gnu.org/licenses/>.
|
2020-04-08 10:59:16 +00:00
|
|
|
|
|
|
|
package message
|
|
|
|
|
|
|
|
import (
|
|
|
|
"bytes"
|
2020-07-02 14:17:04 +00:00
|
|
|
"fmt"
|
2020-04-08 10:59:16 +00:00
|
|
|
"io"
|
|
|
|
"mime"
|
|
|
|
"net/mail"
|
2021-01-29 11:04:02 +00:00
|
|
|
"regexp"
|
2020-04-08 10:59:16 +00:00
|
|
|
"strings"
|
|
|
|
|
2023-03-17 14:23:22 +00:00
|
|
|
"github.com/ProtonMail/gluon/rfc5322"
|
2022-08-26 15:00:21 +00:00
|
|
|
"github.com/ProtonMail/gluon/rfc822"
|
2022-11-23 14:17:56 +00:00
|
|
|
"github.com/ProtonMail/go-proton-api"
|
2022-11-23 14:25:41 +00:00
|
|
|
"github.com/ProtonMail/proton-bridge/v3/pkg/message/parser"
|
|
|
|
pmmime "github.com/ProtonMail/proton-bridge/v3/pkg/mime"
|
2020-07-02 08:59:15 +00:00
|
|
|
"github.com/emersion/go-message"
|
2023-10-31 10:41:50 +00:00
|
|
|
"github.com/google/uuid"
|
2020-04-08 10:59:16 +00:00
|
|
|
"github.com/jaytaylor/html2text"
|
2020-08-26 14:03:33 +00:00
|
|
|
"github.com/pkg/errors"
|
|
|
|
"github.com/sirupsen/logrus"
|
2020-04-08 10:59:16 +00:00
|
|
|
)
|
|
|
|
|
2022-08-26 15:00:21 +00:00
|
|
|
type MIMEBody string
|
|
|
|
|
|
|
|
type Body string
|
|
|
|
|
|
|
|
type Message struct {
|
2022-10-02 11:28:41 +00:00
|
|
|
MIMEBody MIMEBody
|
|
|
|
RichBody Body
|
|
|
|
PlainBody Body
|
|
|
|
Attachments []Attachment
|
|
|
|
MIMEType rfc822.MIMEType
|
2022-10-28 14:46:56 +00:00
|
|
|
IsReply bool
|
2022-08-26 15:00:21 +00:00
|
|
|
|
|
|
|
Subject string
|
|
|
|
Sender *mail.Address
|
|
|
|
ToList []*mail.Address
|
|
|
|
CCList []*mail.Address
|
|
|
|
BCCList []*mail.Address
|
|
|
|
ReplyTos []*mail.Address
|
|
|
|
|
2022-10-02 11:28:41 +00:00
|
|
|
References []string
|
|
|
|
ExternalID string
|
2022-10-28 14:46:56 +00:00
|
|
|
InReplyTo string
|
2023-11-16 14:48:04 +00:00
|
|
|
XForward string
|
2022-08-26 15:00:21 +00:00
|
|
|
}
|
|
|
|
|
|
|
|
type Attachment struct {
|
|
|
|
Header mail.Header
|
|
|
|
Name string
|
|
|
|
ContentID string
|
|
|
|
MIMEType string
|
2023-01-30 19:12:41 +00:00
|
|
|
MIMEParams map[string]string
|
2023-01-24 15:09:01 +00:00
|
|
|
Disposition proton.Disposition
|
2022-08-26 15:00:21 +00:00
|
|
|
Data []byte
|
|
|
|
}
|
|
|
|
|
|
|
|
// Parse parses an RFC822 message.
|
|
|
|
func Parse(r io.Reader) (m Message, err error) {
|
2023-03-17 12:11:07 +00:00
|
|
|
return parseIOReaderImpl(r, false)
|
|
|
|
}
|
|
|
|
|
|
|
|
// ParseAndAllowInvalidAddressLists parses an RFC822 message and allows email address lists to be invalid.
|
|
|
|
func ParseAndAllowInvalidAddressLists(r io.Reader) (m Message, err error) {
|
|
|
|
return parseIOReaderImpl(r, true)
|
|
|
|
}
|
|
|
|
|
|
|
|
func parseIOReaderImpl(r io.Reader, allowInvalidAddressLists bool) (m Message, err error) {
|
2020-11-23 10:56:57 +00:00
|
|
|
defer func() {
|
2022-08-26 15:00:21 +00:00
|
|
|
if r := recover(); r != nil {
|
|
|
|
err = fmt.Errorf("panic while parsing message: %v", r)
|
2020-11-23 10:56:57 +00:00
|
|
|
}
|
|
|
|
}()
|
|
|
|
|
2020-08-14 12:09:24 +00:00
|
|
|
p, err := parser.New(r)
|
2020-07-02 10:50:09 +00:00
|
|
|
if err != nil {
|
2022-08-26 15:00:21 +00:00
|
|
|
return Message{}, errors.Wrap(err, "failed to create new parser")
|
2020-12-16 10:17:00 +00:00
|
|
|
}
|
|
|
|
|
2023-03-17 12:11:07 +00:00
|
|
|
return parse(p, allowInvalidAddressLists)
|
2022-08-26 15:00:21 +00:00
|
|
|
}
|
2020-04-08 10:59:16 +00:00
|
|
|
|
2022-10-18 11:54:12 +00:00
|
|
|
// ParseWithParser parses an RFC822 message using an existing parser.
|
2023-03-17 12:11:07 +00:00
|
|
|
func ParseWithParser(p *parser.Parser, allowInvalidAddressLists bool) (m Message, err error) {
|
2022-08-26 15:00:21 +00:00
|
|
|
defer func() {
|
|
|
|
if r := recover(); r != nil {
|
|
|
|
err = fmt.Errorf("panic while parsing message: %v", r)
|
|
|
|
}
|
|
|
|
}()
|
2020-12-16 10:17:00 +00:00
|
|
|
|
2023-03-17 12:11:07 +00:00
|
|
|
return parse(p, allowInvalidAddressLists)
|
2020-12-16 10:17:00 +00:00
|
|
|
}
|
|
|
|
|
2023-03-17 12:11:07 +00:00
|
|
|
func parse(p *parser.Parser, allowInvalidAddressLists bool) (Message, error) {
|
2022-08-26 15:00:21 +00:00
|
|
|
if err := convertEncodedTransferEncoding(p); err != nil {
|
|
|
|
return Message{}, errors.Wrap(err, "failed to convert encoded transfer encoding")
|
2020-12-07 11:27:43 +00:00
|
|
|
}
|
|
|
|
|
2022-08-26 15:00:21 +00:00
|
|
|
if err := convertForeignEncodings(p); err != nil {
|
|
|
|
return Message{}, errors.Wrap(err, "failed to convert foreign encodings")
|
2020-08-06 08:27:08 +00:00
|
|
|
}
|
|
|
|
|
2023-10-31 10:41:50 +00:00
|
|
|
if err := patchInlineImages(p); err != nil {
|
2024-01-18 16:45:08 +00:00
|
|
|
return Message{}, errors.Wrap(err, "patching inline images failed")
|
2023-10-31 10:41:50 +00:00
|
|
|
}
|
|
|
|
|
2023-03-17 12:11:07 +00:00
|
|
|
m, err := parseMessageHeader(p.Root().Header, allowInvalidAddressLists)
|
2022-08-26 15:00:21 +00:00
|
|
|
if err != nil {
|
|
|
|
return Message{}, errors.Wrap(err, "failed to parse message header")
|
|
|
|
}
|
2020-04-08 10:59:16 +00:00
|
|
|
|
2022-08-26 15:00:21 +00:00
|
|
|
atts, err := collectAttachments(p)
|
|
|
|
if err != nil {
|
|
|
|
return Message{}, errors.Wrap(err, "failed to collect attachments")
|
2020-04-08 10:59:16 +00:00
|
|
|
}
|
|
|
|
|
2022-08-26 15:00:21 +00:00
|
|
|
m.Attachments = atts
|
|
|
|
|
|
|
|
richBody, plainBody, err := buildBodies(p)
|
|
|
|
if err != nil {
|
|
|
|
return Message{}, errors.Wrap(err, "failed to build bodies")
|
2020-04-08 10:59:16 +00:00
|
|
|
}
|
|
|
|
|
2022-08-26 15:00:21 +00:00
|
|
|
mimeBody, err := buildMIMEBody(p)
|
|
|
|
if err != nil {
|
|
|
|
return Message{}, errors.Wrap(err, "failed to build mime body")
|
2020-04-08 10:59:16 +00:00
|
|
|
}
|
|
|
|
|
2022-08-26 15:00:21 +00:00
|
|
|
m.RichBody = Body(richBody)
|
|
|
|
m.PlainBody = Body(plainBody)
|
|
|
|
m.MIMEBody = MIMEBody(mimeBody)
|
|
|
|
|
2023-11-03 08:55:01 +00:00
|
|
|
mimeType, err := determineBodyMIMEType(p)
|
2022-08-26 15:00:21 +00:00
|
|
|
if err != nil {
|
|
|
|
return Message{}, errors.Wrap(err, "failed to get mime type")
|
2020-07-02 14:31:12 +00:00
|
|
|
}
|
|
|
|
|
2022-08-26 15:00:21 +00:00
|
|
|
m.MIMEType = rfc822.MIMEType(mimeType)
|
|
|
|
|
|
|
|
return m, nil
|
2020-12-16 10:17:00 +00:00
|
|
|
}
|
2020-08-21 11:45:20 +00:00
|
|
|
|
2022-08-26 15:00:21 +00:00
|
|
|
// buildMIMEBody builds mime body from the parser returned by NewParser.
|
|
|
|
func buildMIMEBody(p *parser.Parser) (mimeBody string, err error) {
|
|
|
|
buf := new(bytes.Buffer)
|
2020-08-14 12:09:24 +00:00
|
|
|
|
2022-08-26 15:00:21 +00:00
|
|
|
if err := p.NewWriter().Write(buf); err != nil {
|
|
|
|
return "", fmt.Errorf("failed to write message: %w", err)
|
2020-08-14 12:09:24 +00:00
|
|
|
}
|
|
|
|
|
2022-08-26 15:00:21 +00:00
|
|
|
return buf.String(), nil
|
2020-08-06 08:27:08 +00:00
|
|
|
}
|
2020-04-08 10:59:16 +00:00
|
|
|
|
2020-12-07 11:27:43 +00:00
|
|
|
// convertEncodedTransferEncoding decodes any RFC2047-encoded content transfer encodings.
|
|
|
|
// Such content transfer encodings go against RFC but still exist in the wild anyway.
|
|
|
|
func convertEncodedTransferEncoding(p *parser.Parser) error {
|
|
|
|
logrus.Trace("Converting encoded transfer encoding")
|
|
|
|
|
|
|
|
return p.NewWalker().
|
|
|
|
RegisterDefaultHandler(func(p *parser.Part) error {
|
|
|
|
encoding := p.Header.Get("Content-Transfer-Encoding")
|
|
|
|
if encoding == "" {
|
|
|
|
return nil
|
|
|
|
}
|
|
|
|
|
|
|
|
dec, err := pmmime.WordDec.DecodeHeader(encoding)
|
|
|
|
if err != nil {
|
|
|
|
return err
|
|
|
|
}
|
|
|
|
|
|
|
|
p.Header.Set("Content-Transfer-Encoding", dec)
|
|
|
|
|
|
|
|
return nil
|
|
|
|
}).
|
|
|
|
Walk()
|
|
|
|
}
|
|
|
|
|
2020-08-06 08:27:08 +00:00
|
|
|
func convertForeignEncodings(p *parser.Parser) error {
|
2020-09-14 13:27:48 +00:00
|
|
|
logrus.Trace("Converting foreign encodings")
|
2020-08-26 14:03:33 +00:00
|
|
|
|
|
|
|
return p.NewWalker().
|
2020-09-04 10:52:07 +00:00
|
|
|
RegisterContentTypeHandler("text/html", func(p *parser.Part) error {
|
|
|
|
if err := p.ConvertToUTF8(); err != nil {
|
|
|
|
return err
|
|
|
|
}
|
|
|
|
|
|
|
|
return p.ConvertMetaCharset()
|
|
|
|
}).
|
2020-08-26 14:03:33 +00:00
|
|
|
RegisterContentTypeHandler("text/.*", func(p *parser.Part) error {
|
|
|
|
return p.ConvertToUTF8()
|
|
|
|
}).
|
|
|
|
Walk()
|
2020-04-08 10:59:16 +00:00
|
|
|
}
|
|
|
|
|
2022-08-26 15:00:21 +00:00
|
|
|
func collectAttachments(p *parser.Parser) ([]Attachment, error) {
|
2020-08-06 08:01:00 +00:00
|
|
|
var (
|
2022-08-26 15:00:21 +00:00
|
|
|
atts []Attachment
|
2020-08-06 08:01:00 +00:00
|
|
|
err error
|
|
|
|
)
|
|
|
|
|
2020-08-03 12:44:12 +00:00
|
|
|
w := p.NewWalker().
|
|
|
|
RegisterContentDispositionHandler("attachment", func(p *parser.Part) error {
|
2022-08-26 15:00:21 +00:00
|
|
|
att, err := parseAttachment(p.Header, p.Body)
|
2020-08-03 12:44:12 +00:00
|
|
|
if err != nil {
|
|
|
|
return err
|
|
|
|
}
|
|
|
|
|
|
|
|
atts = append(atts, att)
|
2020-07-29 11:59:52 +00:00
|
|
|
|
2020-08-03 12:44:12 +00:00
|
|
|
return nil
|
|
|
|
}).
|
|
|
|
RegisterContentTypeHandler("text/calendar", func(p *parser.Part) error {
|
2022-08-26 15:00:21 +00:00
|
|
|
att, err := parseAttachment(p.Header, p.Body)
|
2020-07-02 10:50:09 +00:00
|
|
|
if err != nil {
|
2020-08-03 12:44:12 +00:00
|
|
|
return err
|
2020-04-08 10:59:16 +00:00
|
|
|
}
|
|
|
|
|
2020-07-02 10:50:09 +00:00
|
|
|
atts = append(atts, att)
|
2020-04-08 10:59:16 +00:00
|
|
|
|
2020-08-03 12:44:12 +00:00
|
|
|
return nil
|
|
|
|
}).
|
|
|
|
RegisterContentTypeHandler("text/.*", func(p *parser.Part) error {
|
|
|
|
return nil
|
|
|
|
}).
|
|
|
|
RegisterDefaultHandler(func(p *parser.Part) error {
|
|
|
|
if len(p.Children()) > 0 {
|
|
|
|
return nil
|
|
|
|
}
|
|
|
|
|
2022-08-26 15:00:21 +00:00
|
|
|
att, err := parseAttachment(p.Header, p.Body)
|
2020-08-03 12:44:12 +00:00
|
|
|
if err != nil {
|
|
|
|
return err
|
|
|
|
}
|
|
|
|
|
|
|
|
atts = append(atts, att)
|
|
|
|
|
|
|
|
return nil
|
2020-07-02 10:50:09 +00:00
|
|
|
})
|
2020-07-02 08:59:15 +00:00
|
|
|
|
2020-07-02 10:50:09 +00:00
|
|
|
if err = w.Walk(); err != nil {
|
2022-08-26 15:00:21 +00:00
|
|
|
return nil, err
|
2020-07-02 08:59:15 +00:00
|
|
|
}
|
|
|
|
|
2022-08-26 15:00:21 +00:00
|
|
|
return atts, nil
|
2020-07-02 08:59:15 +00:00
|
|
|
}
|
2020-04-08 10:59:16 +00:00
|
|
|
|
2020-08-18 08:55:26 +00:00
|
|
|
// buildBodies collects all text/html and text/plain parts and returns two bodies,
|
2022-10-17 09:02:56 +00:00
|
|
|
// - a rich text body (in which html is allowed), and
|
|
|
|
// - a plaintext body (in which html is converted to plaintext).
|
2020-08-18 08:55:26 +00:00
|
|
|
//
|
|
|
|
// text/html parts are converted to plaintext in order to build the plaintext body,
|
|
|
|
// unless there is already a plaintext part provided via multipart/alternative,
|
|
|
|
// in which case the provided alternative is chosen.
|
2020-07-30 12:51:34 +00:00
|
|
|
func buildBodies(p *parser.Parser) (richBody, plainBody string, err error) {
|
|
|
|
richParts, err := collectBodyParts(p, "text/html")
|
|
|
|
if err != nil {
|
|
|
|
return
|
|
|
|
}
|
2020-07-02 14:17:04 +00:00
|
|
|
|
2020-07-30 12:51:34 +00:00
|
|
|
plainParts, err := collectBodyParts(p, "text/plain")
|
|
|
|
if err != nil {
|
|
|
|
return
|
|
|
|
}
|
2020-07-29 11:59:52 +00:00
|
|
|
|
2020-07-30 12:51:34 +00:00
|
|
|
richBuilder, plainBuilder := strings.Builder{}, strings.Builder{}
|
2020-06-30 15:43:04 +00:00
|
|
|
|
2020-08-24 08:04:08 +00:00
|
|
|
for _, richPart := range richParts {
|
|
|
|
_, _ = richBuilder.Write(richPart.Body)
|
|
|
|
}
|
|
|
|
|
|
|
|
for _, plainPart := range plainParts {
|
|
|
|
_, _ = plainBuilder.Write(getPlainBody(plainPart))
|
2020-07-30 12:51:34 +00:00
|
|
|
}
|
2020-07-29 11:59:52 +00:00
|
|
|
|
2020-07-30 12:51:34 +00:00
|
|
|
return richBuilder.String(), plainBuilder.String(), nil
|
|
|
|
}
|
|
|
|
|
|
|
|
// collectBodyParts collects all body parts in the parse tree, preferring
|
|
|
|
// parts of the given content type if alternatives exist.
|
|
|
|
func collectBodyParts(p *parser.Parser, preferredContentType string) (parser.Parts, error) {
|
2020-08-03 12:44:12 +00:00
|
|
|
v := p.
|
2020-07-30 12:51:34 +00:00
|
|
|
NewVisitor(func(p *parser.Part, visit parser.Visit) (interface{}, error) {
|
|
|
|
childParts, err := collectChildParts(p, visit)
|
|
|
|
if err != nil {
|
|
|
|
return nil, err
|
2020-07-29 11:59:52 +00:00
|
|
|
}
|
|
|
|
|
2020-07-30 12:51:34 +00:00
|
|
|
return joinChildParts(childParts), nil
|
|
|
|
}).
|
|
|
|
RegisterRule("multipart/alternative", func(p *parser.Part, visit parser.Visit) (interface{}, error) {
|
|
|
|
childParts, err := collectChildParts(p, visit)
|
|
|
|
if err != nil {
|
|
|
|
return nil, err
|
|
|
|
}
|
|
|
|
|
2020-08-06 08:01:00 +00:00
|
|
|
return bestChoice(childParts, preferredContentType), nil
|
2020-07-30 12:51:34 +00:00
|
|
|
}).
|
|
|
|
RegisterRule("text/plain", func(p *parser.Part, visit parser.Visit) (interface{}, error) {
|
2023-11-10 08:36:46 +00:00
|
|
|
if p.IsAttachment() {
|
2020-08-06 08:27:08 +00:00
|
|
|
return parser.Parts{}, nil
|
|
|
|
}
|
|
|
|
|
2020-07-30 12:51:34 +00:00
|
|
|
return parser.Parts{p}, nil
|
|
|
|
}).
|
|
|
|
RegisterRule("text/html", func(p *parser.Part, visit parser.Visit) (interface{}, error) {
|
2023-11-10 08:36:46 +00:00
|
|
|
if p.IsAttachment() {
|
2020-08-06 08:27:08 +00:00
|
|
|
return parser.Parts{}, nil
|
|
|
|
}
|
|
|
|
|
2020-07-30 12:51:34 +00:00
|
|
|
return parser.Parts{p}, nil
|
2020-06-30 15:43:04 +00:00
|
|
|
})
|
|
|
|
|
2020-08-03 12:44:12 +00:00
|
|
|
res, err := v.Visit()
|
2020-07-30 12:51:34 +00:00
|
|
|
if err != nil {
|
|
|
|
return nil, err
|
2020-06-30 15:43:04 +00:00
|
|
|
}
|
|
|
|
|
2022-04-05 13:50:21 +00:00
|
|
|
return res.(parser.Parts), nil //nolint:forcetypeassert
|
2020-07-29 11:59:52 +00:00
|
|
|
}
|
|
|
|
|
2020-07-30 12:51:34 +00:00
|
|
|
func collectChildParts(p *parser.Part, visit parser.Visit) ([]parser.Parts, error) {
|
|
|
|
childParts := []parser.Parts{}
|
|
|
|
|
|
|
|
for _, child := range p.Children() {
|
|
|
|
res, err := visit(child)
|
|
|
|
if err != nil {
|
|
|
|
return nil, err
|
|
|
|
}
|
|
|
|
|
2022-04-05 13:50:21 +00:00
|
|
|
childParts = append(childParts, res.(parser.Parts)) //nolint:forcetypeassert
|
2020-07-29 11:59:52 +00:00
|
|
|
}
|
|
|
|
|
2020-07-30 12:51:34 +00:00
|
|
|
return childParts, nil
|
|
|
|
}
|
|
|
|
|
|
|
|
func joinChildParts(childParts []parser.Parts) parser.Parts {
|
|
|
|
res := parser.Parts{}
|
|
|
|
|
|
|
|
for _, parts := range childParts {
|
|
|
|
res = append(res, parts...)
|
|
|
|
}
|
|
|
|
|
|
|
|
return res
|
|
|
|
}
|
|
|
|
|
2020-08-06 08:01:00 +00:00
|
|
|
func bestChoice(childParts []parser.Parts, preferredContentType string) parser.Parts {
|
2020-07-30 12:51:34 +00:00
|
|
|
// If one of the parts has preferred content type, use that.
|
|
|
|
for i := len(childParts) - 1; i >= 0; i-- {
|
2020-08-05 15:00:06 +00:00
|
|
|
if allPartsHaveContentType(childParts[i], preferredContentType) {
|
2020-08-06 08:01:00 +00:00
|
|
|
return childParts[i]
|
2020-07-30 12:51:34 +00:00
|
|
|
}
|
|
|
|
}
|
|
|
|
|
2021-01-13 15:42:10 +00:00
|
|
|
// Otherwise, choose the last one, if it exists.
|
|
|
|
if len(childParts) > 0 {
|
|
|
|
return childParts[len(childParts)-1]
|
|
|
|
}
|
|
|
|
|
|
|
|
return parser.Parts{}
|
2020-07-30 12:51:34 +00:00
|
|
|
}
|
|
|
|
|
2020-08-05 15:00:06 +00:00
|
|
|
func allPartsHaveContentType(parts parser.Parts, contentType string) bool {
|
2020-08-11 08:57:08 +00:00
|
|
|
if len(parts) == 0 {
|
|
|
|
return false
|
|
|
|
}
|
|
|
|
|
2020-07-30 12:51:34 +00:00
|
|
|
for _, part := range parts {
|
2020-09-24 12:18:05 +00:00
|
|
|
t, _, err := part.ContentType()
|
2020-07-30 12:51:34 +00:00
|
|
|
if err != nil {
|
|
|
|
return false
|
|
|
|
}
|
|
|
|
|
|
|
|
if t != contentType {
|
|
|
|
return false
|
|
|
|
}
|
2020-07-29 11:59:52 +00:00
|
|
|
}
|
|
|
|
|
2020-07-30 12:51:34 +00:00
|
|
|
return true
|
2020-07-29 11:59:52 +00:00
|
|
|
}
|
|
|
|
|
2023-11-03 08:55:01 +00:00
|
|
|
func determineBodyMIMEType(p *parser.Parser) (string, error) {
|
2020-07-29 11:59:52 +00:00
|
|
|
var isHTML bool
|
|
|
|
|
2020-08-03 12:44:12 +00:00
|
|
|
w := p.NewWalker().
|
|
|
|
RegisterContentTypeHandler("text/html", func(p *parser.Part) (err error) {
|
2020-07-29 11:59:52 +00:00
|
|
|
isHTML = true
|
|
|
|
return
|
|
|
|
})
|
|
|
|
|
2023-11-03 08:55:01 +00:00
|
|
|
if err := w.WalkSkipAttachment(); err != nil {
|
2020-07-29 11:59:52 +00:00
|
|
|
return "", err
|
|
|
|
}
|
|
|
|
|
|
|
|
if isHTML {
|
|
|
|
return "text/html", nil
|
|
|
|
}
|
|
|
|
|
|
|
|
return "text/plain", nil
|
2020-07-02 10:50:09 +00:00
|
|
|
}
|
|
|
|
|
2020-08-18 08:55:26 +00:00
|
|
|
// getPlainBody returns the body of the given part, converting html to
|
|
|
|
// plaintext where possible.
|
2020-07-30 12:51:34 +00:00
|
|
|
func getPlainBody(part *parser.Part) []byte {
|
2020-09-24 12:18:05 +00:00
|
|
|
contentType, _, err := part.ContentType()
|
2020-07-30 12:51:34 +00:00
|
|
|
if err != nil {
|
|
|
|
return part.Body
|
|
|
|
}
|
|
|
|
|
|
|
|
switch contentType {
|
|
|
|
case "text/html":
|
|
|
|
text, err := html2text.FromReader(bytes.NewReader(part.Body))
|
|
|
|
if err != nil {
|
|
|
|
return part.Body
|
|
|
|
}
|
|
|
|
|
|
|
|
return []byte(text)
|
|
|
|
|
|
|
|
default:
|
|
|
|
return part.Body
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
2023-03-17 12:11:07 +00:00
|
|
|
func parseMessageHeader(h message.Header, allowInvalidAddressLists bool) (Message, error) {
|
2022-08-26 15:00:21 +00:00
|
|
|
var m Message
|
2020-07-02 14:17:04 +00:00
|
|
|
|
2022-10-02 11:28:41 +00:00
|
|
|
for fields := h.Fields(); fields.Next(); {
|
2020-10-21 11:37:41 +00:00
|
|
|
switch strings.ToLower(fields.Key()) {
|
2020-07-02 10:50:09 +00:00
|
|
|
case "subject":
|
2020-10-21 11:37:41 +00:00
|
|
|
s, err := fields.Text()
|
|
|
|
if err != nil {
|
|
|
|
if s, err = pmmime.DecodeHeader(fields.Value()); err != nil {
|
2022-08-26 15:00:21 +00:00
|
|
|
return Message{}, errors.Wrap(err, "failed to parse subject")
|
2020-10-21 11:37:41 +00:00
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
m.Subject = s
|
2020-04-08 10:59:16 +00:00
|
|
|
|
2020-07-02 10:50:09 +00:00
|
|
|
case "from":
|
2020-10-21 11:37:41 +00:00
|
|
|
sender, err := rfc5322.ParseAddressList(fields.Value())
|
2020-08-05 09:55:28 +00:00
|
|
|
if err != nil {
|
2023-03-17 12:11:07 +00:00
|
|
|
if !allowInvalidAddressLists {
|
|
|
|
return Message{}, errors.Wrap(err, "failed to parse from")
|
|
|
|
}
|
|
|
|
|
|
|
|
logrus.WithError(err).Warn("failed to parse from")
|
2020-07-02 10:50:09 +00:00
|
|
|
}
|
2022-10-02 11:28:41 +00:00
|
|
|
|
2020-09-14 12:41:14 +00:00
|
|
|
if len(sender) > 0 {
|
|
|
|
m.Sender = sender[0]
|
|
|
|
}
|
2020-04-08 10:59:16 +00:00
|
|
|
|
2020-07-02 10:50:09 +00:00
|
|
|
case "to":
|
2020-10-21 11:37:41 +00:00
|
|
|
toList, err := rfc5322.ParseAddressList(fields.Value())
|
2020-08-05 09:55:28 +00:00
|
|
|
if err != nil {
|
2023-03-17 12:11:07 +00:00
|
|
|
if !allowInvalidAddressLists {
|
|
|
|
return Message{}, errors.Wrap(err, "failed to parse to")
|
|
|
|
}
|
|
|
|
|
|
|
|
logrus.WithError(err).Warn("failed to parse to")
|
2020-07-02 10:50:09 +00:00
|
|
|
}
|
2022-10-02 11:28:41 +00:00
|
|
|
|
2020-08-05 09:55:28 +00:00
|
|
|
m.ToList = toList
|
2020-04-08 10:59:16 +00:00
|
|
|
|
2020-07-02 10:50:09 +00:00
|
|
|
case "reply-to":
|
2020-10-21 11:37:41 +00:00
|
|
|
replyTos, err := rfc5322.ParseAddressList(fields.Value())
|
2020-08-05 09:55:28 +00:00
|
|
|
if err != nil {
|
2023-03-17 12:11:07 +00:00
|
|
|
if !allowInvalidAddressLists {
|
|
|
|
return Message{}, errors.Wrap(err, "failed to parse reply-to")
|
|
|
|
}
|
|
|
|
|
|
|
|
logrus.WithError(err).Warn("failed to parse reply-to")
|
2020-07-02 10:50:09 +00:00
|
|
|
}
|
2022-10-02 11:28:41 +00:00
|
|
|
|
2020-08-05 09:55:28 +00:00
|
|
|
m.ReplyTos = replyTos
|
2020-04-08 10:59:16 +00:00
|
|
|
|
2020-07-02 10:50:09 +00:00
|
|
|
case "cc":
|
2020-10-21 11:37:41 +00:00
|
|
|
ccList, err := rfc5322.ParseAddressList(fields.Value())
|
2020-08-05 09:55:28 +00:00
|
|
|
if err != nil {
|
2023-03-17 12:11:07 +00:00
|
|
|
if !allowInvalidAddressLists {
|
|
|
|
return Message{}, errors.Wrap(err, "failed to parse cc")
|
|
|
|
}
|
|
|
|
|
|
|
|
logrus.WithError(err).Warn("failed to parse cc")
|
2020-07-02 10:50:09 +00:00
|
|
|
}
|
2022-10-02 11:28:41 +00:00
|
|
|
|
2020-08-05 09:55:28 +00:00
|
|
|
m.CCList = ccList
|
2020-04-08 10:59:16 +00:00
|
|
|
|
2020-07-02 10:50:09 +00:00
|
|
|
case "bcc":
|
2020-10-21 11:37:41 +00:00
|
|
|
bccList, err := rfc5322.ParseAddressList(fields.Value())
|
2020-08-05 09:55:28 +00:00
|
|
|
if err != nil {
|
2023-03-17 12:11:07 +00:00
|
|
|
if !allowInvalidAddressLists {
|
|
|
|
return Message{}, errors.Wrap(err, "failed to parse bcc")
|
|
|
|
}
|
|
|
|
|
|
|
|
logrus.WithError(err).Warn("failed to parse bcc")
|
2020-07-02 10:50:09 +00:00
|
|
|
}
|
2020-04-08 10:59:16 +00:00
|
|
|
|
2022-10-02 11:28:41 +00:00
|
|
|
m.BCCList = bccList
|
2021-01-14 11:55:29 +00:00
|
|
|
|
|
|
|
case "message-id":
|
2021-01-29 11:04:02 +00:00
|
|
|
m.ExternalID = regexp.MustCompile("<(.*)>").ReplaceAllString(fields.Value(), "$1")
|
2022-10-02 11:28:41 +00:00
|
|
|
|
2022-10-28 14:46:56 +00:00
|
|
|
case "in-reply-to":
|
|
|
|
m.InReplyTo = regexp.MustCompile("<(.*)>").ReplaceAllString(fields.Value(), "$1")
|
|
|
|
|
2023-11-16 14:48:04 +00:00
|
|
|
case "x-forwarded-message-id":
|
|
|
|
m.XForward = regexp.MustCompile("<(.*)>").ReplaceAllString(fields.Value(), "$1")
|
|
|
|
|
2022-10-02 11:28:41 +00:00
|
|
|
case "references":
|
2022-11-22 18:08:03 +00:00
|
|
|
for _, ref := range strings.Fields(fields.Value()) {
|
|
|
|
for _, ref := range strings.Split(ref, ",") {
|
|
|
|
m.References = append(m.References, strings.Trim(ref, "<>"))
|
|
|
|
}
|
|
|
|
}
|
2020-07-02 10:50:09 +00:00
|
|
|
}
|
2020-04-08 10:59:16 +00:00
|
|
|
}
|
|
|
|
|
2022-08-26 15:00:21 +00:00
|
|
|
return m, nil
|
2020-07-02 10:50:09 +00:00
|
|
|
}
|
2020-04-08 10:59:16 +00:00
|
|
|
|
2022-08-26 15:00:21 +00:00
|
|
|
func parseAttachment(h message.Header, body []byte) (Attachment, error) {
|
|
|
|
att := Attachment{
|
|
|
|
Data: body,
|
|
|
|
}
|
2020-04-08 10:59:16 +00:00
|
|
|
|
2022-08-26 15:00:21 +00:00
|
|
|
mimeHeader, err := toMailHeader(h)
|
2020-08-06 07:49:24 +00:00
|
|
|
if err != nil {
|
2022-08-26 15:00:21 +00:00
|
|
|
return Attachment{}, err
|
2020-08-06 07:49:24 +00:00
|
|
|
}
|
|
|
|
att.Header = mimeHeader
|
2023-10-09 15:14:51 +00:00
|
|
|
mimeType, mimeTypeParams, err := pmmime.ParseMediaType(h.Get("Content-Type"))
|
2020-08-06 07:49:24 +00:00
|
|
|
|
2020-08-06 08:01:00 +00:00
|
|
|
if err != nil {
|
2022-08-26 15:00:21 +00:00
|
|
|
return Attachment{}, err
|
2020-04-08 10:59:16 +00:00
|
|
|
}
|
2020-08-06 08:01:00 +00:00
|
|
|
att.MIMEType = mimeType
|
2023-01-30 19:12:41 +00:00
|
|
|
att.MIMEParams = mimeTypeParams
|
2020-04-08 10:59:16 +00:00
|
|
|
|
2021-04-19 06:21:18 +00:00
|
|
|
// Prefer attachment name from filename param in content disposition.
|
|
|
|
// If not available, try to get it from name param in content type.
|
|
|
|
// Otherwise fallback to attachment.bin.
|
2023-10-09 15:14:51 +00:00
|
|
|
disp, dispParams, err := pmmime.ParseMediaType(h.Get("Content-Disposition"))
|
|
|
|
if err == nil {
|
2023-01-24 15:09:01 +00:00
|
|
|
att.Disposition = proton.Disposition(disp)
|
2020-07-02 10:50:09 +00:00
|
|
|
|
2022-08-26 15:00:21 +00:00
|
|
|
if filename, ok := dispParams["filename"]; ok {
|
|
|
|
att.Name = filename
|
2020-07-02 10:50:09 +00:00
|
|
|
}
|
2021-07-19 12:23:46 +00:00
|
|
|
}
|
2022-08-26 15:00:21 +00:00
|
|
|
|
2021-04-19 06:21:18 +00:00
|
|
|
if att.Name == "" {
|
2022-08-26 15:00:21 +00:00
|
|
|
if filename, ok := mimeTypeParams["name"]; ok {
|
|
|
|
att.Name = filename
|
|
|
|
} else if mimeType == string(rfc822.MessageRFC822) {
|
|
|
|
att.Name = "message.eml"
|
|
|
|
} else if ext, err := mime.ExtensionsByType(att.MIMEType); err == nil && len(ext) > 0 {
|
|
|
|
att.Name = "attachment" + ext[0]
|
|
|
|
} else {
|
|
|
|
att.Name = "attachment.bin"
|
|
|
|
}
|
2020-04-08 10:59:16 +00:00
|
|
|
}
|
|
|
|
|
2020-12-09 14:08:48 +00:00
|
|
|
// Only set ContentID if it should be inline;
|
|
|
|
// API infers content disposition based on whether ContentID is present.
|
2021-01-28 13:53:08 +00:00
|
|
|
// If Content-Disposition is present, we base our decision on that.
|
|
|
|
// Otherwise, if Content-Disposition is missing but there is a ContentID, set it.
|
|
|
|
// (This is necessary because some clients don't set Content-Disposition at all,
|
|
|
|
// so we need to rely on other information to deduce if it's inline or attachment.)
|
|
|
|
if h.Has("Content-Disposition") {
|
2023-10-09 15:14:51 +00:00
|
|
|
disp, _, err := pmmime.ParseMediaType(h.Get("Content-Disposition"))
|
2022-08-26 15:00:21 +00:00
|
|
|
if err != nil {
|
|
|
|
return Attachment{}, err
|
|
|
|
}
|
|
|
|
|
2022-11-23 14:17:56 +00:00
|
|
|
if disp == string(proton.InlineDisposition) {
|
2021-01-28 13:53:08 +00:00
|
|
|
att.ContentID = strings.Trim(h.Get("Content-Id"), " <>")
|
|
|
|
}
|
|
|
|
} else if h.Has("Content-Id") {
|
2020-12-09 14:08:48 +00:00
|
|
|
att.ContentID = strings.Trim(h.Get("Content-Id"), " <>")
|
|
|
|
}
|
2020-08-03 12:44:12 +00:00
|
|
|
|
2020-08-06 08:01:00 +00:00
|
|
|
return att, nil
|
2020-04-08 10:59:16 +00:00
|
|
|
}
|
2020-08-06 07:49:24 +00:00
|
|
|
|
2020-09-02 13:17:01 +00:00
|
|
|
func toMailHeader(h message.Header) (mail.Header, error) {
|
|
|
|
mimeHeader := make(mail.Header)
|
|
|
|
|
|
|
|
if err := forEachDecodedHeaderField(h, func(key, val string) error {
|
|
|
|
mimeHeader[key] = []string{val}
|
|
|
|
return nil
|
|
|
|
}); err != nil {
|
|
|
|
return nil, err
|
2020-08-06 07:49:24 +00:00
|
|
|
}
|
|
|
|
|
|
|
|
return mimeHeader, nil
|
|
|
|
}
|
|
|
|
|
2020-10-21 11:37:41 +00:00
|
|
|
func forEachDecodedHeaderField(h message.Header, fn func(string, string) error) error {
|
|
|
|
fields := h.Fields()
|
|
|
|
|
|
|
|
for fields.Next() {
|
|
|
|
text, err := fields.Text()
|
|
|
|
if err != nil {
|
|
|
|
if !message.IsUnknownCharset(err) {
|
|
|
|
return err
|
|
|
|
}
|
|
|
|
|
|
|
|
if text, err = pmmime.DecodeHeader(fields.Value()); err != nil {
|
|
|
|
return err
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
if err := fn(fields.Key(), text); err != nil {
|
|
|
|
return err
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
return nil
|
|
|
|
}
|
2023-10-31 10:41:50 +00:00
|
|
|
|
|
|
|
func patchInlineImages(p *parser.Parser) error {
|
|
|
|
// This code will only attempt to patch the root level children. I tested with different email clients and as soon
|
|
|
|
// as you reply/forward a message the entire content gets converted into HTML (Apple Mail/Thunderbird/Evolution).
|
|
|
|
// If you are forcing text formatting (Evolution), the inline images of the original email are stripped.
|
|
|
|
// The only reason we need to apply this modification is that Apple Mail can send out text + inline image parts
|
|
|
|
// if the text does not exceed the 76 char column limit.
|
|
|
|
// Based on this, it's unlikely we will see any other variations.
|
|
|
|
root := p.Root()
|
|
|
|
|
|
|
|
children := root.Children()
|
|
|
|
|
|
|
|
if len(children) < 2 {
|
|
|
|
return nil
|
|
|
|
}
|
|
|
|
|
|
|
|
result := make([]inlinePatchJob, len(children))
|
|
|
|
|
|
|
|
var (
|
|
|
|
transformationNeeded bool
|
|
|
|
prevPart *parser.Part
|
|
|
|
prevContentType string
|
|
|
|
prevContentTypeMap map[string]string
|
|
|
|
)
|
|
|
|
|
|
|
|
for i := 0; i < len(children); i++ {
|
|
|
|
curPart := children[i]
|
|
|
|
|
|
|
|
contentType, contentTypeMap, err := curPart.ContentType()
|
|
|
|
if err != nil {
|
|
|
|
return fmt.Errorf("failed to get content type for for child %v:%w", i, err)
|
|
|
|
}
|
|
|
|
|
|
|
|
if rfc822.MIMEType(contentType) == rfc822.TextPlain {
|
|
|
|
result[i] = &inlinePatchBodyOnly{part: curPart, contentTypeMap: contentTypeMap}
|
|
|
|
} else if strings.HasPrefix(contentType, "image/") {
|
2024-01-18 16:45:08 +00:00
|
|
|
disposition, err := getImageContentDisposition(curPart)
|
2023-10-31 10:41:50 +00:00
|
|
|
if err != nil {
|
2024-01-18 16:45:08 +00:00
|
|
|
return fmt.Errorf("failed to get content disposition for child %v:%w", i, err)
|
2023-10-31 10:41:50 +00:00
|
|
|
}
|
|
|
|
if disposition == "inline" && !curPart.HasContentID() {
|
|
|
|
if rfc822.MIMEType(prevContentType) == rfc822.TextPlain {
|
|
|
|
result[i-1] = &inlinePatchBodyWithInlineImage{
|
|
|
|
textPart: prevPart,
|
|
|
|
imagePart: curPart,
|
|
|
|
textContentTypeMap: prevContentTypeMap,
|
|
|
|
}
|
|
|
|
} else {
|
|
|
|
result[i] = &inlinePatchInlineImageOnly{part: curPart, partIndex: i, root: root}
|
|
|
|
}
|
|
|
|
transformationNeeded = true
|
|
|
|
}
|
|
|
|
}
|
|
|
|
prevPart = curPart
|
|
|
|
prevContentType = contentType
|
|
|
|
prevContentTypeMap = contentTypeMap
|
|
|
|
}
|
|
|
|
|
|
|
|
if !transformationNeeded {
|
|
|
|
return nil
|
|
|
|
}
|
|
|
|
|
|
|
|
for _, t := range result {
|
|
|
|
if t != nil {
|
|
|
|
t.Patch()
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
return nil
|
|
|
|
}
|
|
|
|
|
2024-01-18 16:45:08 +00:00
|
|
|
func getImageContentDisposition(curPart *parser.Part) (string, error) {
|
|
|
|
disposition, _, err := curPart.ContentDisposition()
|
|
|
|
if err == nil {
|
|
|
|
return disposition, nil
|
|
|
|
}
|
|
|
|
|
|
|
|
if curPart.Header.Get("Content-Disposition") != "" {
|
|
|
|
return "", err
|
|
|
|
}
|
|
|
|
|
|
|
|
if curPart.HasContentID() {
|
|
|
|
return "inline", nil
|
|
|
|
}
|
|
|
|
|
|
|
|
return "attachment", nil
|
|
|
|
}
|
|
|
|
|
2023-10-31 10:41:50 +00:00
|
|
|
type inlinePatchJob interface {
|
|
|
|
Patch()
|
|
|
|
}
|
|
|
|
|
|
|
|
// inlinePatchBodyOnly is meant to be used for standalone text parts that need to be converted to html once we applty
|
|
|
|
// one of the changes.
|
|
|
|
type inlinePatchBodyOnly struct {
|
|
|
|
part *parser.Part
|
|
|
|
contentTypeMap map[string]string
|
|
|
|
}
|
|
|
|
|
|
|
|
func (i *inlinePatchBodyOnly) Patch() {
|
|
|
|
newBody := []byte(`<html><body><p>`)
|
|
|
|
newBody = append(newBody, patchNewLineWithHTMLBreaks(i.part.Body)...)
|
|
|
|
newBody = append(newBody, []byte(`</p></body></html>`)...)
|
|
|
|
|
|
|
|
i.part.Body = newBody
|
|
|
|
i.part.Header.SetContentType("text/html", i.contentTypeMap)
|
|
|
|
}
|
|
|
|
|
|
|
|
// inlinePatchBodyWithInlineImage patches a previous text part so that it refers to that inline image.
|
|
|
|
type inlinePatchBodyWithInlineImage struct {
|
|
|
|
textPart *parser.Part
|
|
|
|
textContentTypeMap map[string]string
|
|
|
|
imagePart *parser.Part
|
|
|
|
}
|
|
|
|
|
|
|
|
// inlinePatchInlineImageOnly handle the case where the inline image is not proceeded by a text part. To avoid
|
|
|
|
// having to parse any possible previous part, we just inject a new part that references this image.
|
|
|
|
type inlinePatchInlineImageOnly struct {
|
|
|
|
part *parser.Part
|
|
|
|
partIndex int
|
|
|
|
root *parser.Part
|
|
|
|
}
|
|
|
|
|
|
|
|
func (i inlinePatchInlineImageOnly) Patch() {
|
|
|
|
contentID := uuid.NewString()
|
|
|
|
// Convert previous part to text/html && inject image.
|
|
|
|
newBody := []byte(fmt.Sprintf(`<html><body><img src="cid:%v"/></body></html>`, contentID))
|
|
|
|
|
|
|
|
i.part.Header.Set("content-id", contentID)
|
|
|
|
|
|
|
|
// create new text part
|
|
|
|
textPart := &parser.Part{
|
|
|
|
Header: message.Header{},
|
|
|
|
Body: newBody,
|
|
|
|
}
|
|
|
|
|
|
|
|
textPart.Header.SetContentType("text/html", map[string]string{"charset": "UTF-8"})
|
|
|
|
|
|
|
|
i.root.InsertChild(i.partIndex, textPart)
|
|
|
|
}
|
|
|
|
|
|
|
|
func (i *inlinePatchBodyWithInlineImage) Patch() {
|
|
|
|
contentID := uuid.NewString()
|
|
|
|
// Convert previous part to text/html && inject image.
|
|
|
|
newBody := []byte(`<html><body><p>`)
|
|
|
|
newBody = append(newBody, patchNewLineWithHTMLBreaks(i.textPart.Body)...)
|
|
|
|
newBody = append(newBody, []byte(`</p>`)...)
|
|
|
|
newBody = append(newBody, []byte(fmt.Sprintf(`<img src="cid:%v"/>`, contentID))...)
|
|
|
|
newBody = append(newBody, []byte(`</body></html>`)...)
|
|
|
|
|
|
|
|
i.textPart.Body = newBody
|
|
|
|
i.textPart.Header.SetContentType("text/html", i.textContentTypeMap)
|
|
|
|
|
|
|
|
// Add content id to curPart
|
|
|
|
i.imagePart.Header.Set("content-id", contentID)
|
|
|
|
}
|
|
|
|
|
|
|
|
func patchNewLineWithHTMLBreaks(input []byte) []byte {
|
|
|
|
dst := make([]byte, 0, len(input))
|
|
|
|
index := 0
|
|
|
|
for {
|
|
|
|
slice := input[index:]
|
|
|
|
newLineIndex := bytes.IndexByte(slice, '\n')
|
|
|
|
|
|
|
|
if newLineIndex == -1 {
|
|
|
|
dst = append(dst, input[index:]...)
|
|
|
|
return dst
|
|
|
|
}
|
|
|
|
|
|
|
|
injectIndex := newLineIndex
|
|
|
|
if newLineIndex > 0 && slice[newLineIndex-1] == '\r' {
|
|
|
|
injectIndex--
|
|
|
|
}
|
|
|
|
|
|
|
|
dst = append(dst, slice[0:injectIndex]...)
|
|
|
|
dst = append(dst, '<', 'b', 'r', '/', '>')
|
|
|
|
dst = append(dst, slice[injectIndex:newLineIndex+1]...)
|
|
|
|
|
|
|
|
index += newLineIndex + 1
|
|
|
|
}
|
|
|
|
}
|