mirror of
https://github.com/go-gitea/gitea.git
synced 2026-06-17 19:10:22 +03:00
chore(deps): update chroma, regexp2 v2, replace dimiro1/reply (#37858)
- Update `github.com/alecthomas/chroma/v2` to `v2.25.0`. - Migrate `github.com/dlclark/regexp2` to `/v2` (incorporates https://github.com/go-gitea/gitea/pull/37664); drop the renovate pin. - Replace the unmaintained `github.com/dimiro1/reply` (the last consumer of `regexp2` v1 in our own code) with a small built-in reply parser for incoming mail. Signed-off-by: wxiaoguang <wxiaoguang@gmail.com> Co-authored-by: Claude (Opus 4.7) <noreply@anthropic.com> Co-authored-by: wxiaoguang <wxiaoguang@gmail.com> Co-authored-by: Giteabot <teabot@gitea.io> Co-authored-by: Nicolas <bircni@icloud.com>
This commit is contained in:
@@ -17,7 +17,6 @@ import (
|
||||
"gitea.dev/modules/setting"
|
||||
"gitea.dev/services/mailer/token"
|
||||
|
||||
"github.com/dimiro1/reply"
|
||||
"github.com/emersion/go-imap"
|
||||
"github.com/emersion/go-imap/client"
|
||||
"github.com/jhillyerd/enmime/v2"
|
||||
@@ -356,7 +355,7 @@ func getContentFromMailReader(env *enmime.Envelope) *MailContent {
|
||||
}
|
||||
|
||||
return &MailContent{
|
||||
Content: reply.FromText(env.Text),
|
||||
Content: extractReply(env.Text),
|
||||
Attachments: attachments,
|
||||
}
|
||||
}
|
||||
|
||||
@@ -150,3 +150,56 @@ func TestGetContentFromMailReader(t *testing.T) {
|
||||
assert.Equal(t, "mail content without signature", content.Content)
|
||||
assert.Empty(t, content.Attachments)
|
||||
}
|
||||
|
||||
func TestExtractReply(t *testing.T) {
|
||||
cases := []struct {
|
||||
name string
|
||||
input string
|
||||
expected string
|
||||
}{
|
||||
{"plain text", "Email with only text.", "Email with only text."},
|
||||
{"crlf normalized", "line one\r\nline two\r\n", "line one\nline two"},
|
||||
{"trim blank lines", "\n\n\nactual reply\n\n\n", "actual reply"},
|
||||
{"signature delimiter", "the reply\n--\nJohn Doe\nAcme", "the reply"},
|
||||
{"rfc signature delimiter", "the reply\n-- \nJohn Doe", "the reply"},
|
||||
{"mobile signature", "My answer is yes.\n\nSent from my iPhone", "My answer is yes."},
|
||||
{"quote only kept", "> Email with only quote.", "> Email with only quote."},
|
||||
{"leading quote kept", "> This is a quote.\n\nAnd this is some text.", "> This is a quote.\n\nAnd this is some text."},
|
||||
{"trailing quote stripped", "My reply.\n\n> original line 1\n> original line 2", "My reply."},
|
||||
{"attribution and quote", "Looks good.\n\nOn Mon, Jan 1, 2024 John <j@x.com> wrote:\n> please review", "Looks good."},
|
||||
{"attribution without quote marks", "My reply.\n\nOn Wed, Sep 25, 2013, richard wrote:\noriginal text", "My reply."},
|
||||
{"original message separator", "Foo\n\n-------- Original Message --------\n\nTHE END.", "Foo"},
|
||||
{"outlook header block", "This is the actual reply.\n\nFrom: Some One <a@b.com>\nSent: Monday\nTo: Someone\nSubject: hi\n\nquoted body", "This is the actual reply."},
|
||||
{"french attribution", "C'est super !\n\nLe 4 janv. 2016 19:03, \"Neil\" <a@b.com> a écrit :\n> quoted", "C'est super !"},
|
||||
{"german attribution", "Hey :)\n\nAm 03.02.2016 3:35 schrieb Max <a@b.com>:\n> quoted", "Hey :)"},
|
||||
{"cyrillic wrote verb", "Yes.\n\n6 октября 2014 lidel написал:\n> quoted", "Yes."},
|
||||
{"localized signature", "My answer.\n\nEnvoyé depuis mon iPhone", "My answer."},
|
||||
{"swedish header block", "Hi everyone!\n\nFrån: Foo <a@b.com>\nSkickat: den 5 juni\nTill: x@y.com\nÄmne: hi\n\nbody", "Hi everyone!"},
|
||||
{"attribution only is empty", "On Mon, Jan 1, 2024 at 10:00 John <j@x.com> wrote:\n> please review", ""},
|
||||
{"prose ending in wrote kept", "Hi Bob,\nThanks for the report you wrote\nI'll fix it.", "Hi Bob,\nThanks for the report you wrote\nI'll fix it."},
|
||||
{"on with year and no time kept", "Hi,\nOn the 2024 roadmap we have three items.\nPlease review.", "Hi,\nOn the 2024 roadmap we have three items.\nPlease review."},
|
||||
{"date prose kept", "Notes:\n5 issues 2024 fixed at 9:15 today\nmore notes", "Notes:\n5 issues 2024 fixed at 9:15 today\nmore notes"},
|
||||
{"header needs from first", "Quick note:\nTo: which server?\nFrom: tests pass.\nThanks", "Quick note:\nTo: which server?\nFrom: tests pass.\nThanks"},
|
||||
{"indented header block", "Reply text.\n\n From: A <a@b.com>\n Sent: Monday\n To: x\n Subject: hi\n\nbody", "Reply text."},
|
||||
{"chinese signature", "回复内容\n\n發自我的iPhone", "回复内容"},
|
||||
{"japanese signature", "返信します\n\niPhoneから送信", "返信します"},
|
||||
{"chinese header block", "回复内容\n\n发件人:张三\n收件人:李四\n主题:你好\n\n原文", "回复内容"},
|
||||
{"japanese header block", "本文です\n\n差出人:山田\n宛先:田中\n件名:こんにちは\n\n原文", "本文です"},
|
||||
{"name-first attribution", "Okay.\n\nErlend <meta@x.com> schrieb am Di., 16. Aug. 2016\num 12:52 Uhr:\n> quoted", "Okay."},
|
||||
{"chinese attribution", "你好,谢谢回复。\n\n在 2024年1月1日,张三 <z@x.com> 写道:\n> 原始内容", "你好,谢谢回复。"},
|
||||
{"japanese attribution", "了解しました。\n\n田中さんは書きました:\n> 引用", "了解しました。"},
|
||||
{"korean attribution", "감사합니다.\n\n홍길동님이 작성:\n> 인용", "감사합니다."},
|
||||
{"email mention kept", "I asked Bob <bob@x.com> and he wrote back yes.\nSo we proceed.", "I asked Bob <bob@x.com> and he wrote back yes.\nSo we proceed."},
|
||||
{"trailing mailbox glyph", "My reply here.\n\nᐧ", "My reply here."},
|
||||
{"on with year and time prose kept", "On the 2024 roadmap we should meet at 10:00.\nI'll send invites.", "On the 2024 roadmap we should meet at 10:00.\nI'll send invites."},
|
||||
{"spanish year and time prose kept", "El informe del 2024 estará listo a las 10:00.\nGracias.", "El informe del 2024 estará listo a las 10:00.\nGracias."},
|
||||
{"chinese prose kept", "谢谢,已测试。\n发自我的内心的感谢", "谢谢,已测试。\n发自我的内心的感谢"},
|
||||
{"korean prose kept", "확인했습니다.\n이 문서는 회사에서 보냄", "확인했습니다.\n이 문서는 회사에서 보냄"},
|
||||
{"japanese prose kept", "了解しました。\n資料は会議から送信", "了解しました。\n資料は会議から送信"},
|
||||
}
|
||||
for _, c := range cases {
|
||||
t.Run(c.name, func(t *testing.T) {
|
||||
assert.Equal(t, c.expected, extractReply(c.input))
|
||||
})
|
||||
}
|
||||
}
|
||||
|
||||
@@ -0,0 +1,137 @@
|
||||
// Copyright 2026 The Gitea Authors. All rights reserved.
|
||||
// SPDX-License-Identifier: MIT
|
||||
|
||||
package incoming
|
||||
|
||||
import (
|
||||
"regexp"
|
||||
"strings"
|
||||
"sync"
|
||||
|
||||
"gitea.dev/modules/util"
|
||||
)
|
||||
|
||||
const (
|
||||
yearToken = `\b\d{4}\b` // 4-digit year
|
||||
timeToken = `\b\d{1,2}[:.]\d{2}\b` // HH:MM or HH.MM
|
||||
// "wrote" verbs ending an attribution line; CJK ones are matched without a
|
||||
// preceding word-separator since those scripts don't space their words
|
||||
wroteVerbs = `wrote|writes|schrieb|skrev|napisał|escreveu|escribió|написал|пише|a écrit`
|
||||
cjkWroteVerbs = `写道|寫道|書きました|작성`
|
||||
// device names anchoring CJK mobile signatures, so prose isn't mistaken for one
|
||||
cjkDevice = `iphone|ipad|ipod|android|galaxy|手机|手機|平板`
|
||||
)
|
||||
|
||||
// forwarded-mail header fields across the common mail clients/locales. headerFromFields
|
||||
// (the "From"-equivalents) must begin a block; headerFields is the full set allowed to
|
||||
// follow. Matched as a prefix by headerLine, so adding a locale is a one-line change.
|
||||
var (
|
||||
headerFromFields = []string{
|
||||
"from", "fra", "de", "von", "da", "van", "från", "expéditeur",
|
||||
"发件人", "寄件者", "差出人", "보낸사람",
|
||||
}
|
||||
headerFields = append([]string{
|
||||
"to", "cc", "bcc", "sent", "date", "subject", "reply-to",
|
||||
"til", "emne", "an", "betreff", "gesendet", "para", "assunto", "asunto",
|
||||
"risposta", "inviato", "oggetto", "destinataire", "objet", "répondre à",
|
||||
"aan", "onderwerp", "beantwoorden", "skickat", "till", "ämne",
|
||||
"收件人", "主题", "主旨", "主題", "收件者", "抄送", "日期", "宛先", "件名", "받는사람", "제목",
|
||||
}, headerFromFields...)
|
||||
)
|
||||
|
||||
// patterns are compiled on first use so the incoming-mail feature adds nothing to startup.
|
||||
var patterns = sync.OnceValue(func() (ret struct {
|
||||
signature, attribution, separator *regexp.Regexp
|
||||
},
|
||||
) {
|
||||
// "-- " delimiter and common mobile footers with frequent localizations. The CJK
|
||||
// forms require a device name so ordinary prose like "发自我的内心" or "会議から送信"
|
||||
// is not mistaken for a signature.
|
||||
ret.signature = regexp.MustCompile(`(?i)^(--|__|—` +
|
||||
`|sent (from|via|with) .+|get outlook for .+` +
|
||||
`|envoyé depuis mon .+|sendt fra min .+|von meinem .+|verzonden (met|vanaf) .+` +
|
||||
`|(發|发)自我的.*(` + cjkDevice + `).*` +
|
||||
`|.*(` + cjkDevice + `).*(から送信|에서 보냄|傳送|发送))$`)
|
||||
|
||||
// attribution introducing quoted history: a line ending in a "wrote:" verb
|
||||
// (Latin/Cyrillic or CJK), a "Name <email> wrote" line, a lead word directly
|
||||
// followed by a day number or weekday plus a year and a time, or an ISO-date-led
|
||||
// line. The date phrasing, trailing colon and the email before the verb guard
|
||||
// against prose (so "On the 2024 roadmap … at 10:00" is not an attribution).
|
||||
ret.attribution = regexp.MustCompile(`(?i)^>*\s*(` +
|
||||
`.*[\s">'](` + wroteVerbs + `)\s*[::]` +
|
||||
`|.*(` + cjkWroteVerbs + `)\s*[::]` +
|
||||
`|.*<\S+@\S+>\s+(` + wroteVerbs + `)\b.*` +
|
||||
`|(on|at|le|am|el|em|den|il|op|dnia|w dniu)\b[\s,]*(\d|(?:mon|tue|wed|thu|fri|sat|sun)\b).*` + yearToken + `.*` + timeToken + `.*` +
|
||||
`|\d{4}-\d{2}-\d{2}\b.*` + timeToken + `.*` +
|
||||
`)$`)
|
||||
|
||||
// a dash/underscore rule line, or text fenced by dashes such as
|
||||
// "-------- Original Message --------" or "-----Mensaje original-----"
|
||||
ret.separator = regexp.MustCompile(`(?i)^\s*\*?\s*([-_]{5,}|-{2,}.+-{2,}|original message|forwarded message)\s*\*?\s*$`)
|
||||
return ret
|
||||
})
|
||||
|
||||
// extractReply returns the user-written part of a plain-text email body, dropping
|
||||
// quoted history, the reply attribution, signatures and forwarded headers. It is a
|
||||
// slim, dependency-free reimplementation based on github.com/dimiro1/reply (MIT),
|
||||
// covering the common mail-client formats and languages; bottom posting and
|
||||
// forwarded bodies are not handled.
|
||||
func extractReply(text string) string {
|
||||
p := patterns()
|
||||
lines := strings.Split(util.NormalizeStringEOL(text), "\n")
|
||||
|
||||
// cut at the first line that begins quoted history, a signature or a header block
|
||||
for i := range lines {
|
||||
trimmed := strings.TrimSpace(lines[i])
|
||||
if p.signature.MatchString(trimmed) || p.attribution.MatchString(trimmed) ||
|
||||
p.separator.MatchString(trimmed) || headerBlock(trimmed, lines[i+1:]) {
|
||||
lines = lines[:i]
|
||||
break
|
||||
}
|
||||
}
|
||||
|
||||
// drop the trailing block of quoted/blank lines, unless the whole body is quoted
|
||||
end := len(lines)
|
||||
for end > 0 {
|
||||
// "ᐧ" is the trailing marker some mobile clients (Mailbox) append
|
||||
if t := strings.TrimSpace(lines[end-1]); t != "" && t != "ᐧ" && !strings.HasPrefix(t, ">") {
|
||||
break
|
||||
}
|
||||
end--
|
||||
}
|
||||
if end > 0 {
|
||||
lines = lines[:end]
|
||||
}
|
||||
|
||||
return strings.TrimSpace(strings.Join(lines, "\n"))
|
||||
}
|
||||
|
||||
// headerBlock reports whether a forwarded-mail header block starts here: the
|
||||
// (already-trimmed) first line is a "From" field and the next non-blank line is
|
||||
// another field, so a lone "Subject:" sentence is not a boundary.
|
||||
func headerBlock(first string, rest []string) bool {
|
||||
if !headerLine(first, headerFromFields) {
|
||||
return false
|
||||
}
|
||||
for _, next := range rest {
|
||||
if t := strings.TrimSpace(next); t != "" {
|
||||
return headerLine(t, headerFields)
|
||||
}
|
||||
}
|
||||
return false
|
||||
}
|
||||
|
||||
// headerLine reports whether the already-trimmed line is a "Field:" header for one
|
||||
// of fields. An ASCII colon must be followed by a space so prose like "To:do this"
|
||||
// is ignored; the CJK fullwidth colon ":" needs no space.
|
||||
func headerLine(line string, fields []string) bool {
|
||||
lower := strings.ToLower(line)
|
||||
for _, field := range fields {
|
||||
if rest, ok := strings.CutPrefix(lower, field); ok &&
|
||||
(strings.HasPrefix(rest, ": ") || strings.HasPrefix(rest, ":")) {
|
||||
return true
|
||||
}
|
||||
}
|
||||
return false
|
||||
}
|
||||
Reference in New Issue
Block a user