itdoxy-lab/modules/charset/escape_test.go

203 lines
16 KiB
Go
Raw Blame History

This file contains invisible Unicode characters

This file contains invisible Unicode characters that are indistinguishable to humans but may be processed differently by a computer. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.

This file contains Unicode characters that might be confused with other characters. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.

// Copyright 2021 The Gitea Authors. All rights reserved.
// Use of this source code is governed by a MIT-style
// license that can be found in the LICENSE file.
package charset
import (
"reflect"
"strings"
"testing"
)
type escapeControlTest struct {
name string
text string
status EscapeStatus
result string
}
var escapeControlTests = []escapeControlTest{
{
name: "<empty>",
},
{
name: "single line western",
text: "single line western",
result: "single line western",
status: EscapeStatus{HasLTRScript: true},
},
{
name: "multi line western",
text: "single line western\nmulti line western\n",
result: "single line western\nmulti line western\n",
status: EscapeStatus{HasLTRScript: true},
},
{
name: "multi line western non-breaking space",
text: "single lineย western\nmulti lineย western\n",
result: `single line<span class="escaped-code-point" data-escaped="[U+00A0]"><span class="char">ย </span></span>western` + "\n" + `multi line<span class="escaped-code-point" data-escaped="[U+00A0]"><span class="char">ย </span></span>western` + "\n",
status: EscapeStatus{Escaped: true, HasLTRScript: true, HasSpaces: true},
},
{
name: "mixed scripts: western + japanese",
text: "ๆ—ฅๅฑž็ง˜ใžใ—ใกใ‚…ใ€‚Then some western.",
result: "ๆ—ฅๅฑž็ง˜ใžใ—ใกใ‚…ใ€‚Then some western.",
status: EscapeStatus{HasLTRScript: true},
},
{
name: "japanese",
text: "ๆ—ฅๅฑž็ง˜ใžใ—ใกใ‚…ใ€‚",
result: "ๆ—ฅๅฑž็ง˜ใžใ—ใกใ‚…ใ€‚",
status: EscapeStatus{HasLTRScript: true},
},
{
name: "hebrew",
text: "ืขื“ ืชืงื•ืคืช ื™ื•ื•ืŸ ื”ืขืชื™ืงื” ื”ื™ื” ื”ืขื™ืกื•ืง ื‘ืžืชืžื˜ื™ืงื” ืชื›ืœื™ืชื™ ื‘ืœื‘ื“: ื”ื™ื ืฉื™ืžืฉื” ื›ืื•ืกืฃ ืฉืœ ื ื•ืกื—ืื•ืช ืœื—ื™ืฉื•ื‘ ืงืจืงืข, ืื•ื›ืœื•ืกื™ืŸ ื•ื›ื•'. ืคืจื™ืฆืช ื”ื“ืจืš ืฉืœ ื”ื™ื•ื•ื ื™ื, ืคืจื˜ ืœืชืจื•ืžื•ืชื™ื”ื ื”ื’ื“ื•ืœื•ืช ืœื™ื“ืข ื”ืžืชืžื˜ื™, ื”ื™ื™ืชื” ื‘ืœื™ืžื•ื“ ื”ืžืชืžื˜ื™ืงื” ื›ืฉืœืขืฆืžื”, ืžืชื•ืงืฃ ืขืจื›ื” ื”ืจื•ื—ื ื™. ื™ื—ืกื ืฉืœ ื—ืœืง ืžื”ื™ื•ื•ื ื™ื ื”ืงื“ืžื•ื ื™ื ืœืžืชืžื˜ื™ืงื” ื”ื™ื” ื“ืชื™ - ืœืžืฉืœ, ื”ื›ืช ืฉืืกืฃ ืกื‘ื™ื‘ื• ืคื™ืชื’ื•ืจืก ื”ืืžื™ื ื” ื›ื™ ื”ืžืชืžื˜ื™ืงื” ื”ื™ื ื”ื‘ืกื™ืก ืœื›ืœ ื”ื“ื‘ืจื™ื. ื”ื™ื•ื•ื ื™ื ื ื—ืฉื‘ื™ื ืœื™ื•ืฆืจื™ ืžื•ืฉื’ ื”ื”ื•ื›ื—ื” ื”ืžืชืžื˜ื™ืช, ื•ื›ืŸ ืœืจืืฉื•ื ื™ื ืฉืขืกืงื• ื‘ืžืชืžื˜ื™ืงื” ืœืฉื ืขืฆืžื”, ื›ืœื•ืžืจ ื›ืชื—ื•ื ืžื—ืงืจื™ ืขื™ื•ื ื™ ื•ืžื•ืคืฉื˜ ื•ืœื ืจืง ื›ืขื–ืจ ืฉื™ืžื•ืฉื™. ืขื ื–ืืช, ืœืฆื“ื”",
result: "ืขื“ ืชืงื•ืคืช ื™ื•ื•ืŸ ื”ืขืชื™ืงื” ื”ื™ื” ื”ืขื™ืกื•ืง ื‘ืžืชืžื˜ื™ืงื” ืชื›ืœื™ืชื™ ื‘ืœื‘ื“: ื”ื™ื ืฉื™ืžืฉื” ื›ืื•ืกืฃ ืฉืœ ื ื•ืกื—ืื•ืช ืœื—ื™ืฉื•ื‘ ืงืจืงืข, ืื•ื›ืœื•ืกื™ืŸ ื•ื›ื•'. ืคืจื™ืฆืช ื”ื“ืจืš ืฉืœ ื”ื™ื•ื•ื ื™ื, ืคืจื˜ ืœืชืจื•ืžื•ืชื™ื”ื ื”ื’ื“ื•ืœื•ืช ืœื™ื“ืข ื”ืžืชืžื˜ื™, ื”ื™ื™ืชื” ื‘ืœื™ืžื•ื“ ื”ืžืชืžื˜ื™ืงื” ื›ืฉืœืขืฆืžื”, ืžืชื•ืงืฃ ืขืจื›ื” ื”ืจื•ื—ื ื™. ื™ื—ืกื ืฉืœ ื—ืœืง ืžื”ื™ื•ื•ื ื™ื ื”ืงื“ืžื•ื ื™ื ืœืžืชืžื˜ื™ืงื” ื”ื™ื” ื“ืชื™ - ืœืžืฉืœ, ื”ื›ืช ืฉืืกืฃ ืกื‘ื™ื‘ื• ืคื™ืชื’ื•ืจืก ื”ืืžื™ื ื” ื›ื™ ื”ืžืชืžื˜ื™ืงื” ื”ื™ื ื”ื‘ืกื™ืก ืœื›ืœ ื”ื“ื‘ืจื™ื. ื”ื™ื•ื•ื ื™ื ื ื—ืฉื‘ื™ื ืœื™ื•ืฆืจื™ ืžื•ืฉื’ ื”ื”ื•ื›ื—ื” ื”ืžืชืžื˜ื™ืช, ื•ื›ืŸ ืœืจืืฉื•ื ื™ื ืฉืขืกืงื• ื‘ืžืชืžื˜ื™ืงื” ืœืฉื ืขืฆืžื”, ื›ืœื•ืžืจ ื›ืชื—ื•ื ืžื—ืงืจื™ ืขื™ื•ื ื™ ื•ืžื•ืคืฉื˜ ื•ืœื ืจืง ื›ืขื–ืจ ืฉื™ืžื•ืฉื™. ืขื ื–ืืช, ืœืฆื“ื”",
status: EscapeStatus{HasRTLScript: true},
},
{
name: "more hebrew",
text: `ื‘ืชืงื•ืคื” ืžืื•ื—ืจืช ื™ื•ืชืจ, ื”ืฉืชืžืฉื• ื”ื™ื•ื•ื ื™ื ื‘ืฉื™ื˜ืช ืกื™ืžื•ืŸ ืžืชืงื“ืžืช ื™ื•ืชืจ, ืฉื‘ื” ื”ื•ืฆื’ื• ื”ืžืกืคืจื™ื ืœืคื™ 22 ืื•ืชื™ื•ืช ื”ืืœืคื‘ื™ืช ื”ื™ื•ื•ื ื™. ืœืกื™ืžื•ืŸ ื”ืžืกืคืจื™ื ื‘ื™ืŸ 1 ืœ-9 ื ืงื‘ืขื• ืชืฉืข ื”ืื•ืชื™ื•ืช ื”ืจืืฉื•ื ื•ืช, ื‘ืชื•ืกืคืช ื’ืจืฉ ( ' ) ื‘ืฆื“ ื™ืžื™ืŸ ืฉืœ ื”ืื•ืช, ืœืžืขืœื”; ืชืฉืข ื”ืื•ืชื™ื•ืช ื”ื‘ืื•ืช ื™ื™ืฆื’ื• ืืช ื”ืขืฉืจื•ืช ืž-10 ืขื“ 90, ื•ื”ื‘ืื•ืช ืืช ื”ืžืื•ืช. ืœืกื™ืžื•ืŸ ื”ืกืคืจื•ืช ื‘ื™ืŸ 1000 ืœ-900,000, ื”ืฉืชืžืฉื• ื”ื™ื•ื•ื ื™ื ื‘ืื•ืชืŸ ืื•ืชื™ื•ืช, ืืš ื”ื•ืกื™ืคื• ืœืื•ืชื™ื•ืช ืืช ื”ื’ืจืฉ ื“ื•ื•ืงื ืžืฆื“ ืฉืžืืœ ืฉืœ ื”ืื•ืชื™ื•ืช, ืœืžื˜ื”. ืžืžื™ืœื™ื•ืŸ ื•ืžืขืœื”, ื›ื ืจืื” ื”ืฉืชืžืฉื• ื”ื™ื•ื•ื ื™ื ื‘ืฉื ื™ ืชื’ื™ื ื‘ืžืงื•ื ืื—ื“.
ื”ืžืชืžื˜ื™ืงืื™ ื”ื‘ื•ืœื˜ ื”ืจืืฉื•ืŸ ื‘ื™ื•ื•ืŸ ื”ืขืชื™ืงื”, ื•ื™ืฉ ื”ืื•ืžืจื™ื ื‘ืชื•ืœื“ื•ืช ื”ืื ื•ืฉื•ืช, ื”ื•ื ืชืืœืก (624 ืœืคื ื”"ืก - 546 ืœืคื ื”"ืก ื‘ืงื™ืจื•ื‘).[1] ืœื ื™ื”ื™ื” ื–ื” ืžืฉื•ืœืœ ื™ืกื•ื“ ืœื”ื ื™ื— ืฉื”ื•ื ื”ืื“ื ื”ืจืืฉื•ืŸ ืฉื”ื•ื›ื™ื— ืžืฉืคื˜ ืžืชืžื˜ื™, ื•ืœื ืจืง ื’ื™ืœื” ืื•ืชื•. ืชืืœืก ื”ื•ื›ื™ื— ืฉื™ืฉืจื™ื ืžืงื‘ื™ืœื™ื ื—ื•ืชื›ื™ื ืžืฆื“ ืื—ื“ ืฉืœ ืฉื•ืงื™ ื–ื•ื•ื™ืช ืงื˜ืขื™ื ื‘ืขืœื™ ื™ื—ืกื™ื ืฉื•ื•ื™ื (ืžืฉืคื˜ ืชืืœืก ื”ืจืืฉื•ืŸ), ืฉื”ื–ื•ื•ื™ืช ื”ืžื•ื ื—ืช ืขืœ ืงื•ื˜ืจ ื‘ืžืขื’ืœ ื”ื™ื ื–ื•ื•ื™ืช ื™ืฉืจื” (ืžืฉืคื˜ ืชืืœืก ื”ืฉื ื™), ืฉื”ืงื•ื˜ืจ ืžื—ืœืง ืืช ื”ืžืขื’ืœ ืœืฉื ื™ ื—ืœืงื™ื ืฉื•ื•ื™ื, ื•ืฉื–ื•ื•ื™ื•ืช ื”ื‘ืกื™ืก ื‘ืžืฉื•ืœืฉ ืฉื•ื•ื”-ืฉื•ืงื™ื™ื ืฉื•ื•ืช ื–ื• ืœื–ื•. ืžื™ื•ื—ืกื•ืช ืœื• ื’ื ืฉื™ื˜ื•ืช ืœืžื“ื™ื“ืช ื’ื•ื‘ื”ืŸ ืฉืœ ื”ืคื™ืจืžื™ื“ื•ืช ื‘ืขื–ืจืช ืžื“ื™ื“ืช ืฆื™ืœืŸ ื•ืœืงื‘ื™ืขืช ืžื™ืงื•ืžื” ืฉืœ ืกืคื™ื ื” ื”ื ืจืื™ืช ืžืŸ ื”ื—ื•ืฃ.
ื‘ืฉื ื™ื 582 ืœืคื ื”"ืก ืขื“ 496 ืœืคื ื”"ืก, ื‘ืงื™ืจื•ื‘, ื—ื™ ืžืชืžื˜ื™ืงืื™ ื—ืฉื•ื‘ ื‘ืžื™ื•ื—ื“ - ืคื™ืชื’ื•ืจืก. ื”ืžืงื•ืจื•ืช ื”ืจืืฉื•ื ื™ื™ื ืขืœื™ื• ืžื•ืขื˜ื™ื, ื•ื”ื”ื™ืกื˜ื•ืจื™ื•ื ื™ื ืžืชืงืฉื™ื ืœื”ืคืจื™ื“ ืืช ื”ืขื•ื‘ื“ื•ืช ืžืฉื›ื‘ืช ื”ืžืกืชื•ืจื™ืŸ ื•ื”ืื’ื“ื•ืช ืฉื ืงืฉืจื• ื‘ื•. ื™ื“ื•ืข ืฉืกื‘ื™ื‘ื• ื”ืชืงื‘ืฆื” ื”ืืกื›ื•ืœื” ื”ืคื™ืชื’ื•ืจืื™ืช ืžืขื™ืŸ ื›ืช ืคืกื‘ื“ื•-ืžืชืžื˜ื™ืช ืฉื”ืืžื™ื ื” ืฉ"ื”ื›ืœ ืžืกืคืจ", ืื• ืœื™ืชืจ ื“ื™ื•ืง ื”ื›ืœ ื ื™ืชืŸ ืœื›ื™ืžื•ืช, ื•ื™ื™ื—ืกื” ืœืžืกืคืจื™ื ืžืฉืžืขื•ื™ื•ืช ืžื™ืกื˜ื™ื•ืช. ื›ื›ืœ ื”ื ืจืื” ื”ืคื™ืชื’ื•ืจืื™ื ื™ื“ืขื• ืœื‘ื ื•ืช ืืช ื”ื’ื•ืคื™ื ื”ืืคืœื˜ื•ื ื™ื™ื, ื”ื›ื™ืจื• ืืช ื”ืžืžื•ืฆืข ื”ืืจื™ืชืžื˜ื™, ื”ืžืžื•ืฆืข ื”ื’ืื•ืžื˜ืจื™ ื•ื”ืžืžื•ืฆืข ื”ื”ืจืžื•ื ื™ ื•ื”ื’ื™ืขื• ืœื”ื™ืฉื’ื™ื ื—ืฉื•ื‘ื™ื ื ื•ืกืคื™ื. ื ื™ืชืŸ ืœื•ืžืจ ืฉื”ืคื™ืชื’ื•ืจืื™ื ื’ื™ืœื• ืืช ื”ื™ื•ืชื• ืฉืœ ื”ืฉื•ืจืฉ ื”ืจื™ื‘ื•ืขื™ ืฉืœ 2, ืฉื”ื•ื ื’ื ื”ืืœื›ืกื•ืŸ ื‘ืจื™ื‘ื•ืข ืฉืื•ืจืš ืฆืœืขื•ืชื™ื• 1, ืื™ ืจืฆื™ื•ื ืœื™, ืืš ืชื’ืœื™ืชื ื”ื™ื™ืชื” ืœืžืขืฉื” ืจืง ืฉื”ืงื˜ืขื™ื "ื—ืกืจื™ ืžื™ื“ื” ืžืฉื•ืชืคืช", ื•ืžื•ืฉื’ ื”ืžืกืคืจ ื”ืื™ ืจืฆื™ื•ื ืœื™ ืžืื•ื—ืจ ื™ื•ืชืจ.[2] ืื–ื›ื•ืจ ืจืืฉื•ืŸ ืœืงื™ื•ืžื ืฉืœ ืงื˜ืขื™ื ื—ืกืจื™ ืžื™ื“ื” ืžืฉื•ืชืคืช ืžื•ืคื™ืข ื‘ื“ื™ืืœื•ื’ "ืชืื™ื˜ื™ื˜ื•ืก" ืฉืœ ืืคืœื˜ื•ืŸ, ืืš ืจืขื™ื•ืŸ ื–ื” ื”ื™ื” ืžื•ื›ืจ ืขื•ื“ ืงื•ื“ื ืœื›ืŸ, ื‘ืžืื” ื”ื—ืžื™ืฉื™ืช ืœืคื ื”"ืก ืœื”ื™ืคืืกื•ืก, ื‘ืŸ ื”ืืกื›ื•ืœื” ื”ืคื™ืชื’ื•ืจืื™ืช, ื•ืื•ืœื™ ืœืคื™ืชื’ื•ืจืก ืขืฆืžื•.[3]`,
result: `ื‘ืชืงื•ืคื” ืžืื•ื—ืจืช ื™ื•ืชืจ, ื”ืฉืชืžืฉื• ื”ื™ื•ื•ื ื™ื ื‘ืฉื™ื˜ืช ืกื™ืžื•ืŸ ืžืชืงื“ืžืช ื™ื•ืชืจ, ืฉื‘ื” ื”ื•ืฆื’ื• ื”ืžืกืคืจื™ื ืœืคื™ 22 ืื•ืชื™ื•ืช ื”ืืœืคื‘ื™ืช ื”ื™ื•ื•ื ื™. ืœืกื™ืžื•ืŸ ื”ืžืกืคืจื™ื ื‘ื™ืŸ 1 ืœ-9 ื ืงื‘ืขื• ืชืฉืข ื”ืื•ืชื™ื•ืช ื”ืจืืฉื•ื ื•ืช, ื‘ืชื•ืกืคืช ื’ืจืฉ ( ' ) ื‘ืฆื“ ื™ืžื™ืŸ ืฉืœ ื”ืื•ืช, ืœืžืขืœื”; ืชืฉืข ื”ืื•ืชื™ื•ืช ื”ื‘ืื•ืช ื™ื™ืฆื’ื• ืืช ื”ืขืฉืจื•ืช ืž-10 ืขื“ 90, ื•ื”ื‘ืื•ืช ืืช ื”ืžืื•ืช. ืœืกื™ืžื•ืŸ ื”ืกืคืจื•ืช ื‘ื™ืŸ 1000 ืœ-900,000, ื”ืฉืชืžืฉื• ื”ื™ื•ื•ื ื™ื ื‘ืื•ืชืŸ ืื•ืชื™ื•ืช, ืืš ื”ื•ืกื™ืคื• ืœืื•ืชื™ื•ืช ืืช ื”ื’ืจืฉ ื“ื•ื•ืงื ืžืฆื“ ืฉืžืืœ ืฉืœ ื”ืื•ืชื™ื•ืช, ืœืžื˜ื”. ืžืžื™ืœื™ื•ืŸ ื•ืžืขืœื”, ื›ื ืจืื” ื”ืฉืชืžืฉื• ื”ื™ื•ื•ื ื™ื ื‘ืฉื ื™ ืชื’ื™ื ื‘ืžืงื•ื ืื—ื“.
ื”ืžืชืžื˜ื™ืงืื™ ื”ื‘ื•ืœื˜ ื”ืจืืฉื•ืŸ ื‘ื™ื•ื•ืŸ ื”ืขืชื™ืงื”, ื•ื™ืฉ ื”ืื•ืžืจื™ื ื‘ืชื•ืœื“ื•ืช ื”ืื ื•ืฉื•ืช, ื”ื•ื ืชืืœืก (624 ืœืคื ื”"ืก - 546 ืœืคื ื”"ืก ื‘ืงื™ืจื•ื‘).[1] ืœื ื™ื”ื™ื” ื–ื” ืžืฉื•ืœืœ ื™ืกื•ื“ ืœื”ื ื™ื— ืฉื”ื•ื ื”ืื“ื ื”ืจืืฉื•ืŸ ืฉื”ื•ื›ื™ื— ืžืฉืคื˜ ืžืชืžื˜ื™, ื•ืœื ืจืง ื’ื™ืœื” ืื•ืชื•. ืชืืœืก ื”ื•ื›ื™ื— ืฉื™ืฉืจื™ื ืžืงื‘ื™ืœื™ื ื—ื•ืชื›ื™ื ืžืฆื“ ืื—ื“ ืฉืœ ืฉื•ืงื™ ื–ื•ื•ื™ืช ืงื˜ืขื™ื ื‘ืขืœื™ ื™ื—ืกื™ื ืฉื•ื•ื™ื (ืžืฉืคื˜ ืชืืœืก ื”ืจืืฉื•ืŸ), ืฉื”ื–ื•ื•ื™ืช ื”ืžื•ื ื—ืช ืขืœ ืงื•ื˜ืจ ื‘ืžืขื’ืœ ื”ื™ื ื–ื•ื•ื™ืช ื™ืฉืจื” (ืžืฉืคื˜ ืชืืœืก ื”ืฉื ื™), ืฉื”ืงื•ื˜ืจ ืžื—ืœืง ืืช ื”ืžืขื’ืœ ืœืฉื ื™ ื—ืœืงื™ื ืฉื•ื•ื™ื, ื•ืฉื–ื•ื•ื™ื•ืช ื”ื‘ืกื™ืก ื‘ืžืฉื•ืœืฉ ืฉื•ื•ื”-ืฉื•ืงื™ื™ื ืฉื•ื•ืช ื–ื• ืœื–ื•. ืžื™ื•ื—ืกื•ืช ืœื• ื’ื ืฉื™ื˜ื•ืช ืœืžื“ื™ื“ืช ื’ื•ื‘ื”ืŸ ืฉืœ ื”ืคื™ืจืžื™ื“ื•ืช ื‘ืขื–ืจืช ืžื“ื™ื“ืช ืฆื™ืœืŸ ื•ืœืงื‘ื™ืขืช ืžื™ืงื•ืžื” ืฉืœ ืกืคื™ื ื” ื”ื ืจืื™ืช ืžืŸ ื”ื—ื•ืฃ.
ื‘ืฉื ื™ื 582 ืœืคื ื”"ืก ืขื“ 496 ืœืคื ื”"ืก, ื‘ืงื™ืจื•ื‘, ื—ื™ ืžืชืžื˜ื™ืงืื™ ื—ืฉื•ื‘ ื‘ืžื™ื•ื—ื“ - ืคื™ืชื’ื•ืจืก. ื”ืžืงื•ืจื•ืช ื”ืจืืฉื•ื ื™ื™ื ืขืœื™ื• ืžื•ืขื˜ื™ื, ื•ื”ื”ื™ืกื˜ื•ืจื™ื•ื ื™ื ืžืชืงืฉื™ื ืœื”ืคืจื™ื“ ืืช ื”ืขื•ื‘ื“ื•ืช ืžืฉื›ื‘ืช ื”ืžืกืชื•ืจื™ืŸ ื•ื”ืื’ื“ื•ืช ืฉื ืงืฉืจื• ื‘ื•. ื™ื“ื•ืข ืฉืกื‘ื™ื‘ื• ื”ืชืงื‘ืฆื” ื”ืืกื›ื•ืœื” ื”ืคื™ืชื’ื•ืจืื™ืช ืžืขื™ืŸ ื›ืช ืคืกื‘ื“ื•-ืžืชืžื˜ื™ืช ืฉื”ืืžื™ื ื” ืฉ"ื”ื›ืœ ืžืกืคืจ", ืื• ืœื™ืชืจ ื“ื™ื•ืง ื”ื›ืœ ื ื™ืชืŸ ืœื›ื™ืžื•ืช, ื•ื™ื™ื—ืกื” ืœืžืกืคืจื™ื ืžืฉืžืขื•ื™ื•ืช ืžื™ืกื˜ื™ื•ืช. ื›ื›ืœ ื”ื ืจืื” ื”ืคื™ืชื’ื•ืจืื™ื ื™ื“ืขื• ืœื‘ื ื•ืช ืืช ื”ื’ื•ืคื™ื ื”ืืคืœื˜ื•ื ื™ื™ื, ื”ื›ื™ืจื• ืืช ื”ืžืžื•ืฆืข ื”ืืจื™ืชืžื˜ื™, ื”ืžืžื•ืฆืข ื”ื’ืื•ืžื˜ืจื™ ื•ื”ืžืžื•ืฆืข ื”ื”ืจืžื•ื ื™ ื•ื”ื’ื™ืขื• ืœื”ื™ืฉื’ื™ื ื—ืฉื•ื‘ื™ื ื ื•ืกืคื™ื. ื ื™ืชืŸ ืœื•ืžืจ ืฉื”ืคื™ืชื’ื•ืจืื™ื ื’ื™ืœื• ืืช ื”ื™ื•ืชื• ืฉืœ ื”ืฉื•ืจืฉ ื”ืจื™ื‘ื•ืขื™ ืฉืœ 2, ืฉื”ื•ื ื’ื ื”ืืœื›ืกื•ืŸ ื‘ืจื™ื‘ื•ืข ืฉืื•ืจืš ืฆืœืขื•ืชื™ื• 1, ืื™ ืจืฆื™ื•ื ืœื™, ืืš ืชื’ืœื™ืชื ื”ื™ื™ืชื” ืœืžืขืฉื” ืจืง ืฉื”ืงื˜ืขื™ื "ื—ืกืจื™ ืžื™ื“ื” ืžืฉื•ืชืคืช", ื•ืžื•ืฉื’ ื”ืžืกืคืจ ื”ืื™ ืจืฆื™ื•ื ืœื™ ืžืื•ื—ืจ ื™ื•ืชืจ.[2] ืื–ื›ื•ืจ ืจืืฉื•ืŸ ืœืงื™ื•ืžื ืฉืœ ืงื˜ืขื™ื ื—ืกืจื™ ืžื™ื“ื” ืžืฉื•ืชืคืช ืžื•ืคื™ืข ื‘ื“ื™ืืœื•ื’ "ืชืื™ื˜ื™ื˜ื•ืก" ืฉืœ ืืคืœื˜ื•ืŸ, ืืš ืจืขื™ื•ืŸ ื–ื” ื”ื™ื” ืžื•ื›ืจ ืขื•ื“ ืงื•ื“ื ืœื›ืŸ, ื‘ืžืื” ื”ื—ืžื™ืฉื™ืช ืœืคื ื”"ืก ืœื”ื™ืคืืกื•ืก, ื‘ืŸ ื”ืืกื›ื•ืœื” ื”ืคื™ืชื’ื•ืจืื™ืช, ื•ืื•ืœื™ ืœืคื™ืชื’ื•ืจืก ืขืฆืžื•.[3]`,
status: EscapeStatus{HasRTLScript: true},
},
{
name: "Mixed RTL+LTR",
text: `Many computer programs fail to display bidirectional text correctly.
For example, the Hebrew name Sarah (ืฉืจื”) is spelled: sin (ืฉ) (which appears rightmost),
then resh (ืจ), and finally heh (ื”) (which should appear leftmost).`,
result: `Many computer programs fail to display bidirectional text correctly.
For example, the Hebrew name Sarah (ืฉืจื”) is spelled: sin (ืฉ) (which appears rightmost),
then resh (ืจ), and finally heh (ื”) (which should appear leftmost).`,
status: EscapeStatus{
HasRTLScript: true,
HasLTRScript: true,
},
},
{
name: "Mixed RTL+LTR+BIDI",
text: `Many computer programs fail to display bidirectional text correctly.
For example, the Hebrew name Sarah ` + "\u2067" + `ืฉืจื”` + "\u2066\n" +
`sin (ืฉ) (which appears rightmost), then resh (ืจ), and finally heh (ื”) (which should appear leftmost).`,
result: `Many computer programs fail to display bidirectional text correctly.
For example, the Hebrew name Sarah <span class="escaped-code-point" data-escaped="[U+2067]"><span class="char">` + "\u2067" + `</span></span>ืฉืจื”<span class="escaped-code-point" data-escaped="[U+2066]"><span class="char">` + "\u2066" + `</span></span>` + "\n" +
`sin (ืฉ) (which appears rightmost), then resh (ืจ), and finally heh (ื”) (which should appear leftmost).`,
status: EscapeStatus{
Escaped: true,
HasBIDI: true,
HasRTLScript: true,
HasLTRScript: true,
},
},
{
name: "Accented characters",
text: string([]byte{0xc3, 0xa1, 0xc3, 0xa9, 0xc3, 0xad, 0xc3, 0xb3, 0xc3, 0xba}),
result: string([]byte{0xc3, 0xa1, 0xc3, 0xa9, 0xc3, 0xad, 0xc3, 0xb3, 0xc3, 0xba}),
status: EscapeStatus{HasLTRScript: true},
},
{
name: "Program",
text: "string([]byte{0xc3, 0xa1, 0xc3, 0xa9, 0xc3, 0xad, 0xc3, 0xb3, 0xc3, 0xba})",
result: "string([]byte{0xc3, 0xa1, 0xc3, 0xa9, 0xc3, 0xad, 0xc3, 0xb3, 0xc3, 0xba})",
status: EscapeStatus{HasLTRScript: true},
},
{
name: "CVE testcase",
text: "if access_level != \"user\u202E \u2066// Check if admin\u2069 \u2066\" {",
result: `if access_level != "user<span class="escaped-code-point" data-escaped="[U+202E]"><span class="char">` + "\u202e" + `</span></span> <span class="escaped-code-point" data-escaped="[U+2066]"><span class="char">` + "\u2066" + `</span></span>// Check if admin<span class="escaped-code-point" data-escaped="[U+2069]"><span class="char">` + "\u2069" + `</span></span> <span class="escaped-code-point" data-escaped="[U+2066]"><span class="char">` + "\u2066" + `</span></span>" {`,
status: EscapeStatus{Escaped: true, HasBIDI: true, BadBIDI: true, HasLTRScript: true},
},
{
name: "Mixed testcase with fail",
text: `Many computer programs fail to display bidirectional text correctly.
For example, the Hebrew name Sarah ` + "\u2067" + `ืฉืจื”` + "\u2066\n" +
`sin (ืฉ) (which appears rightmost), then resh (ืจ), and finally heh (ื”) (which should appear leftmost).` +
"\nif access_level != \"user\u202E \u2066// Check if admin\u2069 \u2066\" {\n",
result: `Many computer programs fail to display bidirectional text correctly.
For example, the Hebrew name Sarah <span class="escaped-code-point" data-escaped="[U+2067]"><span class="char">` + "\u2067" + `</span></span>ืฉืจื”<span class="escaped-code-point" data-escaped="[U+2066]"><span class="char">` + "\u2066" + `</span></span>` + "\n" +
`sin (ืฉ) (which appears rightmost), then resh (ืจ), and finally heh (ื”) (which should appear leftmost).` +
"\n" + `if access_level != "user<span class="escaped-code-point" data-escaped="[U+202E]"><span class="char">` + "\u202e" + `</span></span> <span class="escaped-code-point" data-escaped="[U+2066]"><span class="char">` + "\u2066" + `</span></span>// Check if admin<span class="escaped-code-point" data-escaped="[U+2069]"><span class="char">` + "\u2069" + `</span></span> <span class="escaped-code-point" data-escaped="[U+2066]"><span class="char">` + "\u2066" + `</span></span>" {` + "\n",
status: EscapeStatus{Escaped: true, HasBIDI: true, BadBIDI: true, HasLTRScript: true, HasRTLScript: true},
},
}
func TestEscapeControlString(t *testing.T) {
for _, tt := range escapeControlTests {
t.Run(tt.name, func(t *testing.T) {
status, result := EscapeControlString(tt.text)
if !reflect.DeepEqual(status, tt.status) {
t.Errorf("EscapeControlString() status = %v, wanted= %v", status, tt.status)
}
if result != tt.result {
t.Errorf("EscapeControlString()\nresult= %v,\nwanted= %v", result, tt.result)
}
})
}
}
func TestEscapeControlBytes(t *testing.T) {
for _, tt := range escapeControlTests {
t.Run(tt.name, func(t *testing.T) {
status, result := EscapeControlBytes([]byte(tt.text))
if !reflect.DeepEqual(status, tt.status) {
t.Errorf("EscapeControlBytes() status = %v, wanted= %v", status, tt.status)
}
if string(result) != tt.result {
t.Errorf("EscapeControlBytes()\nresult= %v,\nwanted= %v", result, tt.result)
}
})
}
}
func TestEscapeControlReader(t *testing.T) {
// lets add some control characters to the tests
tests := make([]escapeControlTest, 0, len(escapeControlTests)*3)
copy(tests, escapeControlTests)
for _, test := range escapeControlTests {
test.name += " (+Control)"
test.text = "\u001E" + test.text
test.result = `<span class="escaped-code-point" data-escaped="[U+001E]"><span class="char">` + "\u001e" + `</span></span>` + test.result
test.status.Escaped = true
test.status.HasControls = true
tests = append(tests, test)
}
for _, test := range escapeControlTests {
test.name += " (+Mark)"
test.text = "\u0300" + test.text
test.result = `<span class="escaped-code-point" data-escaped="[U+0300]"><span class="char">` + "\u0300" + `</span></span>` + test.result
test.status.Escaped = true
test.status.HasMarks = true
tests = append(tests, test)
}
for _, tt := range tests {
t.Run(tt.name, func(t *testing.T) {
input := strings.NewReader(tt.text)
output := &strings.Builder{}
status, err := EscapeControlReader(input, output)
result := output.String()
if err != nil {
t.Errorf("EscapeControlReader(): err = %v", err)
}
if !reflect.DeepEqual(status, tt.status) {
t.Errorf("EscapeControlReader() status = %v, wanted= %v", status, tt.status)
}
if result != tt.result {
t.Errorf("EscapeControlReader()\nresult= %v,\nwanted= %v", result, tt.result)
}
})
}
}