From f7fbb79ccb040682d5fb188f6d9ae90244a8e0e3 Mon Sep 17 00:00:00 2001 From: tgallice Date: Thu, 6 Mar 2025 16:15:52 +0100 Subject: [PATCH 1/2] stringutil/decoder: Until remove not ascii use space to separe work --- stringutil/decoder.go | 13 +++++++++++-- stringutil/decoder_test.go | 10 +++++++--- 2 files changed, 18 insertions(+), 5 deletions(-) diff --git a/stringutil/decoder.go b/stringutil/decoder.go index 3c56a39..eb875c4 100644 --- a/stringutil/decoder.go +++ b/stringutil/decoder.go @@ -1,6 +1,7 @@ package stringutil import ( + "regexp" "strings" "unicode" "unicode/utf8" @@ -75,6 +76,8 @@ func IsASCII(s string) bool { return true } +var spaceRegex = regexp.MustCompile(`\s+`) + func DecodeToASCII(s string, opts ...ASCIIDecodeOption) string { if IsASCII(s) { return s @@ -90,7 +93,13 @@ func DecodeToASCII(s string, opts ...ASCIIDecodeOption) string { t = transform.Chain( os.decomposer, runes.Remove(runes.In(unicode.Mn)), - runes.Remove(setFunc(isAboveASCII)), + runes.Map(func(r rune) rune { + if isAboveASCII(r) { + return rune(' ') + } + + return r + }), os.composer, ) @@ -102,5 +111,5 @@ func DecodeToASCII(s string, opts ...ASCIIDecodeOption) string { return "" } - return result + return strings.Trim(spaceRegex.ReplaceAllString(result, " "), " ") } diff --git a/stringutil/decoder_test.go b/stringutil/decoder_test.go index d27623d..2a7ec32 100644 --- a/stringutil/decoder_test.go +++ b/stringutil/decoder_test.go @@ -41,11 +41,15 @@ func TestDecodeToASCII(t *testing.T) { }, { in: "Collaboration: ๐•ธ๐–Ž๐–†๐–’๐–Ž ๐ŸŒž x KiwiKurve", - out: "Collaboration: x KiwiKurve", + out: "Collaboration: x KiwiKurve", }, { - in: "Collaboration: ๐•ธ๐–Ž๐–†๐–’๐–Ž ๐ŸŒž x KiwiKurve", - out: "Collaboration: Miami x KiwiKurve", + in: "back soon โœŒ๐Ÿฝ๐Ÿ“ashleyrchand@gmail.com", + out: "back soon ashleyrchand@gmail.com", + }, + { + in: "Golden Girl ๐ŸŒด\n๐ŸŒฟDiscounts/linksโฌ‡๏ธ\nPR/Collab๐Ÿ“งspfpleaseka๐“ie@gmail.com", + out: "Golden Girl Discounts/links PR/Collab spfpleasekaTie@gmail.com", opts: nfkd, }, { From 440474eeba210f1728ac437aa4e481eebc00c75f Mon Sep 17 00:00:00 2001 From: tgallice Date: Thu, 6 Mar 2025 18:27:07 +0100 Subject: [PATCH 2/2] decoder_test: Readd nfkd test --- stringutil/decoder_test.go | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/stringutil/decoder_test.go b/stringutil/decoder_test.go index 2a7ec32..93c1a65 100644 --- a/stringutil/decoder_test.go +++ b/stringutil/decoder_test.go @@ -43,6 +43,11 @@ func TestDecodeToASCII(t *testing.T) { in: "Collaboration: ๐•ธ๐–Ž๐–†๐–’๐–Ž ๐ŸŒž x KiwiKurve", out: "Collaboration: x KiwiKurve", }, + { + in: "Collaboration: ๐•ธ๐–Ž๐–†๐–’๐–Ž ๐ŸŒž x KiwiKurve", + out: "Collaboration: Miami x KiwiKurve", + opts: nfkd, + }, { in: "back soon โœŒ๐Ÿฝ๐Ÿ“ashleyrchand@gmail.com", out: "back soon ashleyrchand@gmail.com",