Update dependencies

This commit is contained in:
James
2017-08-16 11:15:14 -07:00
parent b9c0374fe3
commit f46ea14760
1220 changed files with 142690 additions and 47576 deletions

View File

@@ -133,7 +133,10 @@ var consts = map[string]string{
// locales is taken from
// https://html.spec.whatwg.org/multipage/syntax.html#encoding-sniffing-algorithm.
var locales = []struct{ tag, name string }{
{"und", "windows-1252"}, // The default value.
// The default value. Explicitly state latin to benefit from the exact
// script option, while still making 1252 the default encoding for languages
// written in Latin script.
{"und_Latn", "windows-1252"},
{"ar", "windows-1256"},
{"ba", "windows-1251"},
{"be", "windows-1251"},

View File

@@ -313,7 +313,7 @@ var nameMap = map[string]htmlEncoding{
}
var localeMap = []htmlEncoding{
windows1252, // und
windows1252, // und_Latn
windows1256, // ar
windows1251, // ba
windows1251, // be
@@ -349,4 +349,4 @@ var localeMap = []htmlEncoding{
big5, // zh-hant
}
const locales = "und ar ba be bg cs el et fa he hr hu ja kk ko ku ky lt lv mk pl ru sah sk sl sr tg th tr tt uk vi zh-hans zh-hant"
const locales = "und_Latn ar ba be bg cs el et fa he hr hu ja kk ko ku ky lt lv mk pl ru sah sk sl sr tg th tr tt uk vi zh-hans zh-hant"

View File

@@ -44,16 +44,30 @@ const maxIntDigits = 20
//
// Examples:
// Number Decimal
// 12345 Digits: [1, 2, 3, 4, 5], Exp: 5
// 12.345 Digits: [1, 2, 3, 4, 5], Exp: 2
// 12000 Digits: [1, 2], Exp: 5
// 0.00123 Digits: [1, 2, 3], Exp: -2
// decimal
// 12345 Digits: [1, 2, 3, 4, 5], Exp: 5 End: 5
// 12.345 Digits: [1, 2, 3, 4, 5], Exp: 2 End: 5
// 12000 Digits: [1, 2], Exp: 5 End: 5
// 12000.00 Digits: [1, 2], Exp: 5 End: 7
// 0.00123 Digits: [1, 2, 3], Exp: -2 End: 3
// 0 Digits: [], Exp: 0 End: 1
// scientific:
// 0 Digits: [], Exp: 0, End: 1, Comma: 0
// 1.23e4 Digits: [1, 2, 3], Exp: 5, End: 3, Comma: 1
// engineering
// 12.3e3 Digits: [1, 2, 3], Exp: 5, End: 3, Comma: 2
type Decimal struct {
Digits []byte // mantissa digits, big-endian
Exp int32 // exponent
Neg bool
Inf bool // Takes precedence over Digits and Exp.
NaN bool // Takes precedence over Inf.
// End indicates the end position of the number.
End int32 // For decimals Exp <= End. For scientific len(Digits) <= End.
// Comma is used for the comma position for scientific (always 0 or 1) and
// engineering notation (always 0, 1, 2, or 3).
Comma uint8
Neg bool
Inf bool // Takes precedence over Digits and Exp.
NaN bool // Takes precedence over Inf.
buf [maxIntDigits]byte
}

View File

@@ -131,23 +131,87 @@ func (f *Formatter) Format(dst []byte, d *Decimal) []byte {
return result
}
// appendDecimal appends a formatted number to dst. It returns two possible
// insertion points for padding.
func appendDecimal(dst []byte, f *Formatter, d *Decimal) (b []byte, postPre, preSuf int) {
if dst, ok := f.renderSpecial(dst, d); ok {
return dst, 0, len(dst)
// TODO: just return visible digits.
func decimalVisibleDigits(f *Formatter, d *Decimal) Decimal {
if d.NaN || d.Inf {
return *d
}
n := d.normalize()
if maxSig := int(f.MaxSignificantDigits); maxSig > 0 {
// TODO: really round to zero?
n.round(ToZero, maxSig)
}
digits := n.Digits
exp := n.Exp
exp += int32(f.Pattern.DigitShift)
// Cap integer digits. Remove *most-significant* digits.
if f.MaxIntegerDigits > 0 {
if p := int(exp) - int(f.MaxIntegerDigits); p > 0 {
if p > len(digits) {
p = len(digits)
}
if digits = digits[p:]; len(digits) == 0 {
exp = 0
} else {
exp -= int32(p)
}
// Strip leading zeros.
for len(digits) > 0 && digits[0] == 0 {
digits = digits[1:]
exp--
}
}
}
// Rounding usually is done by convert, but we don't rely on it.
numFrac := len(digits) - int(exp)
if f.MaxSignificantDigits == 0 && int(f.MaxFractionDigits) < numFrac {
p := int(exp) + int(f.MaxFractionDigits)
if p <= 0 {
p = 0
} else if p >= len(digits) {
p = len(digits)
}
digits = digits[:p] // TODO: round
}
// set End (trailing zeros)
n.End = int32(len(digits))
if len(digits) == 0 {
if f.MinFractionDigits > 0 {
n.End = int32(f.MinFractionDigits)
}
if p := int32(f.MinSignificantDigits) - 1; p > n.End {
n.End = p
}
} else {
if end := exp + int32(f.MinFractionDigits); end > n.End {
n.End = end
}
if n.End < int32(f.MinSignificantDigits) {
n.End = int32(f.MinSignificantDigits)
}
}
n.Digits = digits
n.Exp = exp
return n
}
// appendDecimal appends a formatted number to dst. It returns two possible
// insertion points for padding.
func appendDecimal(dst []byte, f *Formatter, d *Decimal) (b []byte, postPre, preSuf int) {
if dst, ok := f.renderSpecial(dst, d); ok {
return dst, 0, len(dst)
}
n := decimalVisibleDigits(f, d)
digits := n.Digits
exp := n.Exp
// Split in integer and fraction part.
var intDigits, fracDigits []byte
var numInt, numFrac int
numInt := 0
numFrac := int(n.End - n.Exp)
if exp > 0 {
numInt = int(exp)
if int(exp) >= len(digits) { // ddddd | ddddd00
@@ -155,39 +219,9 @@ func appendDecimal(dst []byte, f *Formatter, d *Decimal) (b []byte, postPre, pre
} else { // ddd.dd
intDigits = digits[:exp]
fracDigits = digits[exp:]
numFrac = len(fracDigits)
}
} else {
fracDigits = digits
numFrac = -int(exp) + len(digits)
}
// Cap integer digits. Remove *most-significant* digits.
if f.MaxIntegerDigits > 0 && numInt > int(f.MaxIntegerDigits) {
offset := numInt - int(f.MaxIntegerDigits)
if offset > len(intDigits) {
numInt = 0
intDigits = nil
} else {
numInt = int(f.MaxIntegerDigits)
intDigits = intDigits[offset:]
// for keeping track of significant digits
digits = digits[offset:]
}
// Strip leading zeros. Resulting number of digits is significant digits.
for len(intDigits) > 0 && intDigits[0] == 0 {
intDigits = intDigits[1:]
digits = digits[1:]
numInt--
}
}
if f.MaxSignificantDigits == 0 && int(f.MaxFractionDigits) < numFrac {
if extra := numFrac - int(f.MaxFractionDigits); extra > len(fracDigits) {
numFrac = 0
fracDigits = nil
} else {
numFrac = int(f.MaxFractionDigits)
fracDigits = fracDigits[:len(fracDigits)-extra]
}
}
neg := d.Neg
@@ -220,43 +254,41 @@ func appendDecimal(dst []byte, f *Formatter, d *Decimal) (b []byte, postPre, pre
}
}
trailZero := int(f.MinFractionDigits) - numFrac
if d := int(f.MinSignificantDigits) - len(digits); d > 0 && d > trailZero {
trailZero = d
}
if numFrac > 0 || trailZero > 0 || f.Flags&AlwaysDecimalSeparator != 0 {
if numFrac > 0 || f.Flags&AlwaysDecimalSeparator != 0 {
dst = append(dst, f.Symbol(SymDecimal)...)
}
// Add leading zeros
for i := numFrac - len(fracDigits); i > 0; i-- {
i = 0
for n := -int(n.Exp); i < n; i++ {
dst = f.AppendDigit(dst, 0)
}
i = 0
for ; i < len(fracDigits); i++ {
dst = f.AppendDigit(dst, fracDigits[i])
for _, d := range fracDigits {
i++
dst = f.AppendDigit(dst, d)
}
for ; trailZero > 0; trailZero-- {
for ; i < numFrac; i++ {
dst = f.AppendDigit(dst, 0)
}
return appendAffix(dst, f, suffix, neg), savedLen, len(dst)
}
// appendScientific appends a formatted number to dst. It returns two possible
// insertion points for padding.
func appendScientific(dst []byte, f *Formatter, d *Decimal) (b []byte, postPre, preSuf int) {
if dst, ok := f.renderSpecial(dst, d); ok {
return dst, 0, 0
func scientificVisibleDigits(f *Formatter, d *Decimal) Decimal {
if d.NaN || d.Inf {
return *d
}
// Significant digits are transformed by parser for scientific notation and
// do not need to be handled here.
n := d.normalize()
// Significant digits are transformed by the parser for scientific notation
// and do not need to be handled here.
maxInt, numInt := int(f.MaxIntegerDigits), int(f.MinIntegerDigits)
if numInt == 0 {
numInt = 1
}
maxSig := int(f.MaxFractionDigits) + numInt
minSig := int(f.MinFractionDigits) + numInt
n := d.normalize()
if maxSig > 0 {
// TODO: really round to zero?
n.round(ToZero, maxSig)
}
digits := n.Digits
@@ -282,6 +314,30 @@ func appendScientific(dst []byte, f *Formatter, d *Decimal) (b []byte, postPre,
} else {
exp -= int32(numInt)
}
n.Comma = uint8(numInt)
n.End = int32(len(digits))
if n.End < int32(minSig) {
n.End = int32(minSig)
}
n.Digits = digits
n.Exp = exp
return n
}
// appendScientific appends a formatted number to dst. It returns two possible
// insertion points for padding.
func appendScientific(dst []byte, f *Formatter, d *Decimal) (b []byte, postPre, preSuf int) {
if dst, ok := f.renderSpecial(dst, d); ok {
return dst, 0, 0
}
// n := d.normalize()
n := scientificVisibleDigits(f, d)
digits := n.Digits
exp := n.Exp
numInt := int(n.Comma)
numFrac := int(n.End) - int(n.Comma)
var intDigits, fracDigits []byte
if numInt <= len(digits) {
intDigits = digits[:numInt]
@@ -308,15 +364,14 @@ func appendScientific(dst []byte, f *Formatter, d *Decimal) (b []byte, postPre,
}
}
trailZero := minSig - numInt - len(fracDigits)
if len(fracDigits) > 0 || trailZero > 0 || f.Flags&AlwaysDecimalSeparator != 0 {
if numFrac > 0 || f.Flags&AlwaysDecimalSeparator != 0 {
dst = append(dst, f.Symbol(SymDecimal)...)
}
i = 0
for ; i < len(fracDigits); i++ {
dst = f.AppendDigit(dst, fracDigits[i])
}
for ; trailZero > 0; trailZero-- {
for ; i < numFrac; i++ {
dst = f.AppendDigit(dst, 0)
}

View File

@@ -40,7 +40,9 @@ var matchTests = []matchTest{
{"en", "sh"},
{"en", "hr"},
{"en", "bs"},
{"en", "nl-Cyrl"},
// TODO: consider if the following match is a good one.
// Due to new script first rule, which maybe should be an option.
{"sr", "nl-Cyrl"},
},
},
{
@@ -228,6 +230,14 @@ var matchTests = []matchTest{
{"nl-NL", "nl-NL"},
},
},
{
"region may replace matched if matched is enclosing",
"es-419,es",
[]struct{ match, desired string }{
{"es-MX", "es-MX"},
{"es", "es-SG"},
},
},
{
"more specific region wins over more specific script",
"nl, nl-Latn, nl-NL, nl-BE",
@@ -265,7 +275,7 @@ var matchTests = []matchTest{
"en, en-GB, es-ES, es-419",
[]struct{ match, desired string }{
{"en-GB", "en-AU"},
{"es-419", "es-MX"},
{"es-MX", "es-MX"},
{"es-ES", "es-PT"},
},
},
@@ -355,6 +365,7 @@ var matchTests = []matchTest{
"en, en-US, en-GB, es, es-419, pt, pt-BR, pt-PT, zh, zh-Hant, zh-Hant-HK",
[]struct{ match, desired string }{
{"en-GB", "en-150"},
// {"en-GB", "en-001"}, // TODO: currently en, should probably be en-GB
{"en-GB", "en-AU"},
{"en-GB", "en-BE"},
{"en-GB", "en-GG"},
@@ -370,26 +381,27 @@ var matchTests = []matchTest{
{"en-GB", "en-SG"},
{"en-GB", "en-DE"},
{"en-GB", "en-MT"},
{"es-419", "es-AR"},
{"es-419", "es-BO"},
{"es-419", "es-CL"},
{"es-419", "es-CO"},
{"es-419", "es-CR"},
{"es-419", "es-CU"},
{"es-419", "es-DO"},
{"es-419", "es-EC"},
{"es-419", "es-GT"},
{"es-419", "es-HN"},
{"es-419", "es-MX"},
{"es-419", "es-NI"},
{"es-419", "es-PA"},
{"es-419", "es-PE"},
{"es-419", "es-PR"},
{"es-419", "es-PY"},
{"es-419", "es-SV"},
{"es-419", "es-US"},
{"es-419", "es-UY"},
{"es-419", "es-VE"},
{"es-AR", "es-AR"},
{"es-BO", "es-BO"},
{"es-CL", "es-CL"},
{"es-CO", "es-CO"},
{"es-CR", "es-CR"},
{"es-CU", "es-CU"},
{"es-DO", "es-DO"},
{"es-EC", "es-EC"},
{"es-GT", "es-GT"},
{"es-HN", "es-HN"},
{"es-MX", "es-MX"},
{"es-NI", "es-NI"},
{"es-PA", "es-PA"},
{"es-PE", "es-PE"},
{"es-PR", "es-PR"},
{"es", "es-PT"},
{"es-PY", "es-PY"},
{"es-SV", "es-SV"},
{"es-419", "es-US"}, // US is not in Latin America, so don't make more specific.
{"es-UY", "es-UY"},
{"es-VE", "es-VE"},
{"pt-PT", "pt-AO"},
{"pt-PT", "pt-CV"},
{"pt-PT", "pt-GW"},
@@ -397,9 +409,6 @@ var matchTests = []matchTest{
{"pt-PT", "pt-MZ"},
{"pt-PT", "pt-ST"},
{"pt-PT", "pt-TL"},
// TODO for CLDR 24+
// - en-001
// - {"zh-Hant-HK", "zh-Hant-MO"},
},
},
// Options and variants are inherited from user-defined settings.

View File

@@ -129,8 +129,15 @@ const (
// specific language or locale. All language tag values are guaranteed to be
// well-formed.
type Tag struct {
lang langID
region regionID
lang langID
region regionID
// TODO: we will soon run out of positions for script. Idea: instead of
// storing lang, region, and script codes, store only the compact index and
// have a lookup table from this code to its expansion. This greatly speeds
// up table lookup, speed up common variant cases.
// This will also immediately free up 3 extra bytes. Also, the pVariant
// field can now be moved to the lookup table, as the compact index uniquely
// determines the offset of a possible variant.
script scriptID
pVariant byte // offset in str, includes preceding '-'
pExt uint16 // offset of first extension, includes preceding '-'

View File

@@ -6,6 +6,16 @@ package language
import "errors"
// A MatchOption configures a Matcher.
type MatchOption func(*matcher)
// PreferSameScript will, in the absence of a match, result in the first
// preferred tag with the same script as a supported tag to match this supported
// tag. The default is currently true, but this may change in the future.
func PreferSameScript(preferSame bool) MatchOption {
return func(m *matcher) { m.preferSameScript = preferSame }
}
// Matcher is the interface that wraps the Match method.
//
// Match returns the best match for any of the given tags, along with
@@ -36,23 +46,44 @@ func Comprehends(speaker, alternative Tag) Confidence {
// matched tag in t, but is augmented with the Unicode extension ('u')of the
// corresponding preferred tag. This allows user locale options to be passed
// transparently.
func NewMatcher(t []Tag) Matcher {
return newMatcher(t)
func NewMatcher(t []Tag, options ...MatchOption) Matcher {
return newMatcher(t, options)
}
func (m *matcher) Match(want ...Tag) (t Tag, index int, c Confidence) {
match, w, c := m.getBest(want...)
if match == nil {
t = m.default_.tag
} else {
if match != nil {
t, index = match.tag, match.index
} else {
// TODO: this should be an option
t = m.default_.tag
if m.preferSameScript {
outer:
for _, w := range want {
script, _ := w.Script()
if script.scriptID == 0 {
// Don't do anything if there is no script, such as with
// private subtags.
continue
}
for i, h := range m.supported {
if script.scriptID == h.maxScript {
t, index = h.tag, i
break outer
}
}
}
}
// TODO: select first language tag based on script.
}
if w.region != 0 && t.region != 0 && t.region.contains(w.region) {
t, _ = Raw.Compose(t, Region{w.region})
}
// Copy options from the user-provided tag into the result tag. This is hard
// to do after the fact, so we do it here.
// TODO: consider also adding in variants that are compatible with the
// matched language.
// TODO: Add back region if it is non-ambiguous? Or create another tag to
// preserve the region?
// TODO: add in alternative variants to -u-va-.
// TODO: add preferred region to -u-rg-.
// TODO: add other extensions. Merge with existing extensions.
if u, ok := w.Extension('u'); ok {
t, _ = Raw.Compose(t, u)
}
@@ -388,9 +419,11 @@ func minimizeTags(t Tag) (Tag, error) {
// matcher keeps a set of supported language tags, indexed by language.
type matcher struct {
default_ *haveTag
index map[langID]*matchHeader
passSettings bool
default_ *haveTag
supported []*haveTag
index map[langID]*matchHeader
passSettings bool
preferSameScript bool
}
// matchHeader has the lists of tags for exact matches and matches based on
@@ -501,9 +534,13 @@ func toConf(d uint8) Confidence {
// newMatcher builds an index for the given supported tags and returns it as
// a matcher. It also expands the index by considering various equivalence classes
// for a given tag.
func newMatcher(supported []Tag) *matcher {
func newMatcher(supported []Tag, options []MatchOption) *matcher {
m := &matcher{
index: make(map[langID]*matchHeader),
index: make(map[langID]*matchHeader),
preferSameScript: true,
}
for _, o := range options {
o(m)
}
if len(supported) == 0 {
m.default_ = &haveTag{}
@@ -514,6 +551,7 @@ func newMatcher(supported []Tag) *matcher {
for i, tag := range supported {
pair, _ := makeHaveTag(tag, i)
m.header(tag.lang).addIfNew(pair, true)
m.supported = append(m.supported, &pair)
}
m.default_ = m.header(supported[0].lang).exact[0]
for i, tag := range supported {
@@ -523,6 +561,9 @@ func newMatcher(supported []Tag) *matcher {
}
}
// TODO: include alt script.
// - don't replace regions, but allow regions to be made more specific.
// update is used to add indexes in the map for equivalent languages.
// If force is true, the update will also apply to derived entries. To
// avoid applying a "transitive closure", use false.
@@ -648,11 +689,12 @@ type bestMatch struct {
want Tag
conf Confidence
// Cached results from applying tie-breaking rules.
origLang bool
origReg bool
regDist uint8
origScript bool
parentDist uint8 // 255 if have is not an ancestor of want tag.
origLang bool
origReg bool
regGroupDist uint8
regDist uint8
origScript bool
parentDist uint8 // 255 if have is not an ancestor of want tag.
}
// update updates the existing best match if the new pair is considered to be a
@@ -706,6 +748,14 @@ func (m *bestMatch) update(have *haveTag, tag Tag, maxScript scriptID, maxRegion
beaten = true
}
regGroupDist := regionGroupDist(have.maxRegion, maxRegion, maxScript, tag.lang)
if !beaten && m.regGroupDist != regGroupDist {
if regGroupDist > m.regGroupDist {
return
}
beaten = true
}
// We prefer if the pre-maximized region was specified and identical.
origReg := have.tag.region == tag.region && tag.region != 0
if !beaten && m.origReg != origReg {
@@ -715,8 +765,22 @@ func (m *bestMatch) update(have *haveTag, tag Tag, maxScript scriptID, maxRegion
beaten = true
}
// Next we prefer smaller distances between regions, as defined by regionDist.
regDist := regionDist(have.maxRegion, maxRegion, tag.lang)
// TODO: remove the region distance rule. Region distance has been replaced
// by the region grouping rule. For now we leave it as it still seems to
// have a net positive effect when applied after the grouping rule.
// Possible solutions:
// - apply the primary locale rule first to effectively disable region
// region distance if groups are defined.
// - express the following errors in terms of grouping (if possible)
// - find another method of handling the following cases.
// maximization of legacy: find mo in
// "sr-Cyrl, sr-Latn, ro, ro-MD": have ro; want ro-MD (High)
// region distance French: find fr-US in
// "en, fr, fr-CA, fr-CH": have fr; want fr-CA (High)
// Next we prefer smaller distances between regions, as defined by
// regionDist.
regDist := uint8(regionDistance(have.maxRegion, maxRegion))
if !beaten && m.regDist != regDist {
if regDist > m.regDist {
return
@@ -734,6 +798,9 @@ func (m *bestMatch) update(have *haveTag, tag Tag, maxScript scriptID, maxRegion
}
// Finally we prefer tags which have a closer parent relationship.
// TODO: the parent relationship no longer seems necessary. It doesn't hurt
// to leave it in as the final tie-breaker, though, especially until the
// grouping data has further matured.
parentDist := parentDistance(have.tag.region, tag)
if !beaten && m.parentDist != parentDist {
if parentDist > m.parentDist {
@@ -750,6 +817,7 @@ func (m *bestMatch) update(have *haveTag, tag Tag, maxScript scriptID, maxRegion
m.origLang = origLang
m.origReg = origReg
m.origScript = origScript
m.regGroupDist = regGroupDist
m.regDist = regDist
m.parentDist = parentDist
}
@@ -772,15 +840,27 @@ func parentDistance(haveRegion regionID, tag Tag) uint8 {
return d
}
// regionDist wraps regionDistance with some exceptions to the algorithmic distance.
func regionDist(a, b regionID, lang langID) uint8 {
if lang == _en {
// Two variants of non-US English are close to each other, regardless of distance.
if a != _US && b != _US {
return 2
// regionGroupDist computes the distance between two regions based on their
// CLDR grouping.
func regionGroupDist(a, b regionID, script scriptID, lang langID) uint8 {
aGroup := uint(regionToGroups[a]) << 1
bGroup := uint(regionToGroups[b]) << 1
for _, ri := range matchRegion {
if langID(ri.lang) == lang && (ri.script == 0 || scriptID(ri.script) == script) {
group := uint(1 << (ri.group &^ 0x80))
if 0x80&ri.group == 0 {
if aGroup&bGroup&group != 0 { // Both regions are in the group.
return ri.distance
}
} else {
if (aGroup|bGroup)&group == 0 { // Both regions are not in the group.
return ri.distance
}
}
}
}
return uint8(regionDistance(a, b))
const defaultDistance = 4
return defaultDistance
}
// regionDistance computes the distance between two regions based on the

View File

@@ -8,14 +8,109 @@ import (
"bytes"
"flag"
"fmt"
"os"
"path"
"strings"
"testing"
"golang.org/x/text/internal/testtext"
"golang.org/x/text/internal/ucd"
)
var verbose = flag.Bool("verbose", false, "set to true to print the internal tables of matchers")
func TestCLDRCompliance(t *testing.T) {
r, err := os.Open("testdata/localeMatcherTest.txt")
if err != nil {
t.Fatal(err)
}
ucd.Parse(r, func(p *ucd.Parser) {
name := strings.Replace(path.Join(p.String(0), p.String(1)), " ", "", -1)
if skip[name] {
return
}
t.Run(name, func(t *testing.T) {
supported := makeTagList(p.String(0))
desired := makeTagList(p.String(1))
gotCombined, index, _ := NewMatcher(supported).Match(desired...)
gotMatch := supported[index]
wantMatch := Make(p.String(2))
if gotMatch != wantMatch {
t.Fatalf("match: got %q; want %q", gotMatch, wantMatch)
}
wantCombined, err := Parse(p.String(3))
if err == nil && gotCombined != wantCombined {
t.Errorf("combined: got %q; want %q", gotCombined, wantCombined)
}
})
})
}
var skip = map[string]bool{
// TODO: bugs
// und-<region> is not expanded to the appropriate language.
"en-Hant-TW,und-TW/zh-Hant": true, // match: got "en-Hant-TW"; want "und-TW"
"en-Hant-TW,und-TW/zh": true, // match: got "en-Hant-TW"; want "und-TW"
// Honor the wildcard match. This may only be useful to select non-exact
// stuff.
"mul,af/nl": true, // match: got "af"; want "mul"
// TODO: include other extensions.
// combined: got "en-GB-u-ca-buddhist-nu-arab"; want "en-GB-fonipa-t-m0-iso-i0-pinyin-u-ca-buddhist-nu-arab"
"und,en-GB-u-sd-gbsct/en-fonipa-u-nu-Arab-ca-buddhist-t-m0-iso-i0-pinyin": true,
// Inconsistencies with Mark Davis' implementation where it is not clear
// which is better.
// Go prefers exact matches over less exact preferred ones.
// Preferring desired ones might be better.
"en,de,fr,ja/de-CH,fr": true, // match: got "fr"; want "de"
"en-GB,en,de,fr,ja/de-CH,fr": true, // match: got "fr"; want "de"
"pt-PT,pt-BR,es,es-419/pt-US,pt-PT": true, // match: got "pt-PT"; want "pt-BR"
"pt-PT,pt,es,es-419/pt-US,pt-PT,pt": true, // match: got "pt-PT"; want "pt"
"en,sv/en-GB,sv": true, // match: got "sv"; want "en"
"en-NZ,en-IT/en-US": true, // match: got "en-IT"; want "en-NZ"
// Inconsistencies in combined. I think the Go approach is more appropriate.
// We could use -u-rg- and -u-va- as alternative.
"und,fr/fr-BE-fonipa": true, // combined: got "fr"; want "fr-BE-fonipa"
"und,fr-CA/fr-BE-fonipa": true, // combined: got "fr-CA"; want "fr-BE-fonipa"
"und,fr-fonupa/fr-BE-fonipa": true, // combined: got "fr-fonupa"; want "fr-BE-fonipa"
"und,no/nn-BE-fonipa": true, // combined: got "no"; want "no-BE-fonipa"
"50,und,fr-CA-fonupa/fr-BE-fonipa": true, // combined: got "fr-CA-fonupa"; want "fr-BE-fonipa"
// Spec says prefer primary locales. But what is the benefit? Shouldn't
// the developer just not specify the primary locale first in the list?
// TODO: consider adding a SortByPreferredLocale function to ensure tags
// are ordered such that the preferred locale rule is observed.
// TODO: most of these cases are solved by getting rid of the region
// distance tie-breaker rule (see comments there).
"und,es,es-MA,es-MX,es-419/es-EA": true, // match: got "es-MA"; want "es"
"und,es-MA,es,es-419,es-MX/es-EA": true, // match: got "es-MA"; want "es"
"und,en,en-GU,en-IN,en-GB/en-ZA": true, // match: got "en-IN"; want "en-GB"
"und,en,en-GU,en-IN,en-GB/en-VI": true, // match: got "en-GU"; want "en"
"und,en-GU,en,en-GB,en-IN/en-VI": true, // match: got "en-GU"; want "en"
// Falling back to the default seems more appropriate than falling back
// on a language with the same script.
"50,und,fr-Cyrl-CA-fonupa/fr-BE-fonipa": true,
// match: got "und"; want "fr-Cyrl-CA-fonupa"
// combined: got "und"; want "fr-Cyrl-BE-fonipa"
// Other interesting cases to test:
// - Should same language or same script have the preference if there is
// usually no understanding of the other script?
// - More specific region in desired may replace enclosing supported.
}
func makeTagList(s string) (tags []Tag) {
for _, s := range strings.Split(s, ",") {
tags = append(tags, Make(strings.TrimSpace(s)))
}
return tags
}
func TestAddLikelySubtags(t *testing.T) {
tests := []struct{ in, out string }{
{"aa", "aa-Latn-ET"},
@@ -164,6 +259,31 @@ func TestMinimize(t *testing.T) {
}
}
func TestRegionGroups(t *testing.T) {
testCases := []struct {
a, b string
distance uint8
}{
{"zh-TW", "zh-HK", 5},
{"zh-MO", "zh-HK", 4},
}
for _, tc := range testCases {
a := MustParse(tc.a)
aScript, _ := a.Script()
b := MustParse(tc.b)
bScript, _ := b.Script()
if aScript != bScript {
t.Errorf("scripts differ: %q vs %q", aScript, bScript)
continue
}
d := regionGroupDist(a.region, b.region, aScript.scriptID, a.lang)
if d != tc.distance {
t.Errorf("got %q; want %q", d, tc.distance)
}
}
}
func TestRegionDistance(t *testing.T) {
tests := []struct {
a, b string
@@ -259,17 +379,20 @@ func parseSupported(list string) (out []Tag) {
// The test set for TestBestMatch is defined in data_test.go.
func TestBestMatch(t *testing.T) {
for i, tt := range matchTests {
for _, tt := range matchTests {
supported := parseSupported(tt.supported)
m := newMatcher(supported)
m := newMatcher(supported, nil)
if *verbose {
fmt.Printf("%s:\n%v\n", tt.comment, m)
}
for _, tm := range tt.test {
tag, _, conf := m.Match(parseSupported(tm.desired)...)
if tag.String() != tm.match {
t.Errorf("%d:%s: find %s in %q: have %s; want %s (%v)\n", i, tt.comment, tm.desired, tt.supported, tag, tm.match, conf)
}
t.Run(path.Join(tt.comment, tt.supported, tm.desired), func(t *testing.T) {
tag, _, conf := m.Match(parseSupported(tm.desired)...)
if tag.String() != tm.match {
t.Errorf("find %s in %q: have %s; want %s (%v)", tm.desired, tt.supported, tag, tm.match, conf)
}
})
}
}
}
@@ -352,7 +475,7 @@ var benchWant = [][]Tag{
}
func BenchmarkMatch(b *testing.B) {
m := newMatcher(benchHave)
m := newMatcher(benchHave, nil)
for i := 0; i < b.N; i++ {
for _, want := range benchWant {
m.getBest(want...)
@@ -362,7 +485,7 @@ func BenchmarkMatch(b *testing.B) {
func BenchmarkMatchExact(b *testing.B) {
want := mk("en")
m := newMatcher(benchHave)
m := newMatcher(benchHave, nil)
for i := 0; i < b.N; i++ {
m.getBest(want)
}
@@ -370,7 +493,7 @@ func BenchmarkMatchExact(b *testing.B) {
func BenchmarkMatchAltLanguagePresent(b *testing.B) {
want := mk("hr")
m := newMatcher(benchHave)
m := newMatcher(benchHave, nil)
for i := 0; i < b.N; i++ {
m.getBest(want)
}
@@ -378,7 +501,7 @@ func BenchmarkMatchAltLanguagePresent(b *testing.B) {
func BenchmarkMatchAltLanguageNotPresent(b *testing.B) {
want := mk("nn")
m := newMatcher(benchHave)
m := newMatcher(benchHave, nil)
for i := 0; i < b.N; i++ {
m.getBest(want)
}
@@ -386,7 +509,7 @@ func BenchmarkMatchAltLanguageNotPresent(b *testing.B) {
func BenchmarkMatchAltScriptPresent(b *testing.B) {
want := mk("zh-Hant-CN")
m := newMatcher(benchHave)
m := newMatcher(benchHave, nil)
for i := 0; i < b.N; i++ {
m.getBest(want)
}
@@ -394,7 +517,7 @@ func BenchmarkMatchAltScriptPresent(b *testing.B) {
func BenchmarkMatchAltScriptNotPresent(b *testing.B) {
want := mk("fr-Cyrl")
m := newMatcher(benchHave)
m := newMatcher(benchHave, nil)
for i := 0; i < b.N; i++ {
m.getBest(want)
}
@@ -402,7 +525,7 @@ func BenchmarkMatchAltScriptNotPresent(b *testing.B) {
func BenchmarkMatchLimitedExact(b *testing.B) {
want := []Tag{mk("he-NL"), mk("iw-NL")}
m := newMatcher(benchHave)
m := newMatcher(benchHave, nil)
for i := 0; i < b.N; i++ {
m.getBest(want...)
}

View File

@@ -0,0 +1,389 @@
# TODO: this file has not yet been included in the main CLDR release.
# The intent is to verify this file against the Go implementation and then
# correct the cases and add merge in other interesting test cases.
# See TestCLDRCompliance in match_test.go, as well as the list of exceptions
# defined in the map skip below it, for the work in progress.
# Data-driven test for the XLocaleMatcher.
# Format
# • Everything after "#" is a comment
# • Arguments are separated by ";". They are:
# supported ; desired ; expected
# • The supported may have the threshold distance reset as a first item, eg 50, en, fr
# A line starting with @debug will reach a statement in the test code where you can put a breakpoint for debugging
# The test code also supports reformatting this file, by setting the REFORMAT flag.
##################################################
# testParentLocales
# es-419, es-AR, and es-MX are in a cluster; es is in a different one
es-419, es-ES ; es-AR ; es-419
es-ES, es-419 ; es-AR ; es-419
es-419, es ; es-AR ; es-419
es, es-419 ; es-AR ; es-419
es-MX, es ; es-AR ; es-MX
es, es-MX ; es-AR ; es-MX
# en-GB, en-AU, and en-NZ are in a cluster; en in a different one
en-GB, en-US ; en-AU ; en-GB
en-US, en-GB ; en-AU ; en-GB
en-GB, en ; en-AU ; en-GB
en, en-GB ; en-AU ; en-GB
en-NZ, en-US ; en-AU ; en-NZ
en-US, en-NZ ; en-AU ; en-NZ
en-NZ, en ; en-AU ; en-NZ
en, en-NZ ; en-AU ; en-NZ
# pt-AU and pt-PT in one cluster; pt-BR in another
pt-PT, pt-BR ; pt-AO ; pt-PT
pt-BR, pt-PT ; pt-AO ; pt-PT
pt-PT, pt ; pt-AO ; pt-PT
pt, pt-PT ; pt-AO ; pt-PT
zh-MO, zh-TW ; zh-HK ; zh-MO
zh-TW, zh-MO ; zh-HK ; zh-MO
zh-MO, zh-TW ; zh-HK ; zh-MO
zh-TW, zh-MO ; zh-HK ; zh-MO
zh-MO, zh-CN ; zh-HK ; zh-MO
zh-CN, zh-MO ; zh-HK ; zh-MO
zh-MO, zh ; zh-HK ; zh-MO
zh, zh-MO ; zh-HK ; zh-MO
##################################################
# testChinese
zh-CN, zh-TW, iw ; zh-Hant-TW ; zh-TW
zh-CN, zh-TW, iw ; zh-Hant ; zh-TW
zh-CN, zh-TW, iw ; zh-TW ; zh-TW
zh-CN, zh-TW, iw ; zh-Hans-CN ; zh-CN
zh-CN, zh-TW, iw ; zh-CN ; zh-CN
zh-CN, zh-TW, iw ; zh ; zh-CN
##################################################
# testenGB
fr, en, en-GB, es-419, es-MX, es ; en-NZ ; en-GB
fr, en, en-GB, es-419, es-MX, es ; es-ES ; es
fr, en, en-GB, es-419, es-MX, es ; es-AR ; es-419
fr, en, en-GB, es-419, es-MX, es ; es-MX ; es-MX
##################################################
# testFallbacks
91, en, hi ; sa ; hi
##################################################
# testBasics
fr, en-GB, en ; en-GB ; en-GB
fr, en-GB, en ; en ; en
fr, en-GB, en ; fr ; fr
fr, en-GB, en ; ja ; fr # return first if no match
##################################################
# testFallback
# check that script fallbacks are handled right
zh-CN, zh-TW, iw ; zh-Hant ; zh-TW
zh-CN, zh-TW, iw ; zh ; zh-CN
zh-CN, zh-TW, iw ; zh-Hans-CN ; zh-CN
zh-CN, zh-TW, iw ; zh-Hant-HK ; zh-TW
zh-CN, zh-TW, iw ; he-IT ; iw
##################################################
# testSpecials
# check that nearby languages are handled
en, fil, ro, nn ; tl ; fil
en, fil, ro, nn ; mo ; ro
en, fil, ro, nn ; nb ; nn
# make sure default works
en, fil, ro, nn ; ja ; en
##################################################
# testRegionalSpecials
# verify that en-AU is closer to en-GB than to en (which is en-US)
en, en-GB, es, es-419 ; es-MX ; es-419
en, en-GB, es, es-419 ; en-AU ; en-GB
en, en-GB, es, es-419 ; es-ES ; es
##################################################
# testHK
# HK and MO are closer to each other for Hant than to TW
zh, zh-TW, zh-MO ; zh-HK ; zh-MO
zh, zh-TW, zh-HK ; zh-MO ; zh-HK
##################################################
# testMatch-exact
# see localeDistance.txt
##################################################
# testMatch-none
# see localeDistance.txt
##################################################
# testMatch-matchOnMazimized
zh, zh-Hant ; und-TW ; zh-Hant # und-TW should be closer to zh-Hant than to zh
en-Hant-TW, und-TW ; zh-Hant ; und-TW # zh-Hant should be closer to und-TW than to en-Hant-TW
en-Hant-TW, und-TW ; zh ; und-TW # zh should be closer to und-TW than to en-Hant-TW
##################################################
# testMatchGrandfatheredCode
fr, i-klingon, en-Latn-US ; en-GB-oed ; en-Latn-US
##################################################
# testGetBestMatchForList-exactMatch
fr, en-GB, ja, es-ES, es-MX ; ja, de ; ja
##################################################
# testGetBestMatchForList-simpleVariantMatch
fr, en-GB, ja, es-ES, es-MX ; de, en-US ; en-GB # Intentionally avoiding a perfect-match or two candidates for variant matches.
# Fallback.
fr, en-GB, ja, es-ES, es-MX ; de, zh ; fr
##################################################
# testGetBestMatchForList-matchOnMaximized
# Check that if the preference is maximized already, it works as well.
en, ja ; ja-Jpan-JP, en-AU ; ja # Match for ja-Jpan-JP (maximized already)
# ja-JP matches ja on likely subtags, and it's listed first, thus it wins over the second preference en-GB.
en, ja ; ja-JP, en-US ; ja # Match for ja-Jpan-JP (maximized already)
# Check that if the preference is maximized already, it works as well.
en, ja ; ja-Jpan-JP, en-US ; ja # Match for ja-Jpan-JP (maximized already)
##################################################
# testGetBestMatchForList-noMatchOnMaximized
# Regression test for http://b/5714572 .
# de maximizes to de-DE. Pick the exact match for the secondary language instead.
en, de, fr, ja ; de-CH, fr ; de
##################################################
# testBestMatchForTraditionalChinese
# Scenario: An application that only supports Simplified Chinese (and some other languages),
# but does not support Traditional Chinese. zh-Hans-CN could be replaced with zh-CN, zh, or
# zh-Hans, it wouldn't make much of a difference.
# The script distance (simplified vs. traditional Han) is considered small enough
# to be an acceptable match. The regional difference is considered almost insignificant.
fr, zh-Hans-CN, en-US ; zh-TW ; zh-Hans-CN
fr, zh-Hans-CN, en-US ; zh-Hant ; zh-Hans-CN
# For geo-political reasons, you might want to avoid a zh-Hant -> zh-Hans match.
# In this case, if zh-TW, zh-HK or a tag starting with zh-Hant is requested, you can
# change your call to getBestMatch to include a 2nd language preference.
# "en" is a better match since its distance to "en-US" is closer than the distance
# from "zh-TW" to "zh-CN" (script distance).
fr, zh-Hans-CN, en-US ; zh-TW, en ; en-US
fr, zh-Hans-CN, en-US ; zh-Hant-CN, en, en ; en-US
fr, zh-Hans-CN, en-US ; zh-Hans, en ; zh-Hans-CN
##################################################
# testUndefined
# When the undefined language doesn't match anything in the list,
# getBestMatch returns the default, as usual.
it, fr ; und ; it
# When it *does* occur in the list, bestMatch returns it, as expected.
it, und ; und ; und
# The unusual part: max("und") = "en-Latn-US", and since matching is based on maximized
# tags, the undefined language would normally match English. But that would produce the
# counterintuitive results that getBestMatch("und", XLocaleMatcher("it,en")) would be "en", and
# getBestMatch("en", XLocaleMatcher("it,und")) would be "und".
# To avoid that, we change the matcher's definitions of max
# so that max("und")="und". That produces the following, more desirable
# results:
it, en ; und ; it
it, und ; en ; it
##################################################
# testGetBestMatch-regionDistance
es-AR, es ; es-MX ; es-AR
fr, en, en-GB ; en-CA ; en-GB
de-AT, de-DE, de-CH ; de ; de-DE
##################################################
# testAsymmetry
mul, nl ; af ; nl # af => nl
mul, af ; nl ; mul # but nl !=> af
##################################################
# testGetBestMatchForList-matchOnMaximized2
# ja-JP matches ja on likely subtags, and it's listed first, thus it wins over the second preference en-GB.
fr, en-GB, ja, es-ES, es-MX ; ja-JP, en-GB ; ja # Match for ja-JP, with likely region subtag
# Check that if the preference is maximized already, it works as well.
fr, en-GB, ja, es-ES, es-MX ; ja-Jpan-JP, en-GB ; ja # Match for ja-Jpan-JP (maximized already)
##################################################
# testGetBestMatchForList-closeEnoughMatchOnMaximized
en-GB, en, de, fr, ja ; de-CH, fr ; de
en-GB, en, de, fr, ja ; en-US, ar, nl, de, ja ; en
##################################################
# testGetBestMatchForPortuguese
# pt might be supported and not pt-PT
# European user who prefers Spanish over Brazillian Portuguese as a fallback.
pt-PT, pt-BR, es, es-419 ; pt-PT, es, pt ; pt-PT
pt-PT, pt, es, es-419 ; pt-PT, es, pt ; pt-PT # pt implicit
# Brazillian user who prefers South American Spanish over European Portuguese as a fallback.
# The asymmetry between this case and above is because it's "pt-PT" that's missing between the
# matchers as "pt-BR" is a much more common language.
pt-PT, pt-BR, es, es-419 ; pt, es-419, pt-PT ; pt-BR
pt-PT, pt-BR, es, es-419 ; pt-PT, es, pt ; pt-PT
pt-PT, pt, es, es-419 ; pt-PT, es, pt ; pt-PT
pt-PT, pt, es, es-419 ; pt, es-419, pt-PT ; pt
pt-BR, es, es-419 ; pt, es-419, pt-PT ; pt-BR
# Code that adds the user's country can get "pt-US" for a user's language.
# That should fall back to "pt-BR".
pt-PT, pt-BR, es, es-419 ; pt-US, pt-PT ; pt-BR
pt-PT, pt, es, es-419 ; pt-US, pt-PT, pt ; pt # pt-BR implicit
##################################################
# testVariantWithScriptMatch 1 and 2
fr, en, sv ; en-GB ; en
fr, en, sv ; en-GB ; en
en, sv ; en-GB, sv ; en
##################################################
# testLongLists
en, sv ; sv ; sv
af, am, ar, az, be, bg, bn, bs, ca, cs, cy, cy, da, de, el, en, en-GB, es, es-419, et, eu, fa, fi, fil, fr, ga, gl, gu, hi, hr, hu, hy, id, is, it, iw, ja, ka, kk, km, kn, ko, ky, lo, lt, lv, mk, ml, mn, mr, ms, my, ne, nl, no, pa, pl, pt, pt-PT, ro, ru, si, sk, sl, sq, sr, sr-Latn, sv, sw, ta, te, th, tr, uk, ur, uz, vi, zh-CN, zh-TW, zu ; sv ; sv
af, af-NA, af-ZA, agq, agq-CM, ak, ak-GH, am, am-ET, ar, ar-001, ar-AE, ar-BH, ar-DJ, ar-DZ, ar-EG, ar-EH, ar-ER, ar-IL, ar-IQ, ar-JO, ar-KM, ar-KW, ar-LB, ar-LY, ar-MA, ar-MR, ar-OM, ar-PS, ar-QA, ar-SA, ar-SD, ar-SO, ar-SS, ar-SY, ar-TD, ar-TN, ar-YE, as, as-IN, asa, asa-TZ, ast, ast-ES, az, az-Cyrl, az-Cyrl-AZ, az-Latn, az-Latn-AZ, bas, bas-CM, be, be-BY, bem, bem-ZM, bez, bez-TZ, bg, bg-BG, bm, bm-ML, bn, bn-BD, bn-IN, bo, bo-CN, bo-IN, br, br-FR, brx, brx-IN, bs, bs-Cyrl, bs-Cyrl-BA, bs-Latn, bs-Latn-BA, ca, ca-AD, ca-ES, ca-ES-VALENCIA, ca-FR, ca-IT, ce, ce-RU, cgg, cgg-UG, chr, chr-US, ckb, ckb-IQ, ckb-IR, cs, cs-CZ, cu, cu-RU, cy, cy-GB, da, da-DK, da-GL, dav, dav-KE, de, de-AT, de-BE, de-CH, de-DE, de-LI, de-LU, dje, dje-NE, dsb, dsb-DE, dua, dua-CM, dyo, dyo-SN, dz, dz-BT, ebu, ebu-KE, ee, ee-GH, ee-TG, el, el-CY, el-GR, en, en-001, en-150, en-AG, en-AI, en-AS, en-AT, en-AU, en-BB, en-BE, en-BI, en-BM, en-BS, en-BW, en-BZ, en-CA, en-CC, en-CH, en-CK, en-CM, en-CX, en-CY, en-DE, en-DG, en-DK, en-DM, en-ER, en-FI, en-FJ, en-FK, en-FM, en-GB, en-GD, en-GG, en-GH, en-GI, en-GM, en-GU, en-GY, en-HK, en-IE, en-IL, en-IM, en-IN, en-IO, en-JE, en-JM, en-KE, en-KI, en-KN, en-KY, en-LC, en-LR, en-LS, en-MG, en-MH, en-MO, en-MP, en-MS, en-MT, en-MU, en-MW, en-MY, en-NA, en-NF, en-NG, en-NL, en-NR, en-NU, en-NZ, en-PG, en-PH, en-PK, en-PN, en-PR, en-PW, en-RW, en-SB, en-SC, en-SD, en-SE, en-SG, en-SH, en-SI, en-SL, en-SS, en-SX, en-SZ, en-TC, en-TK, en-TO, en-TT, en-TV, en-TZ, en-UG, en-UM, en-US, en-US-POSIX, en-VC, en-VG, en-VI, en-VU, en-WS, en-ZA, en-ZM, en-ZW, eo, eo-001, es, es-419, es-AR, es-BO, es-CL, es-CO, es-CR, es-CU, es-DO, es-EA, es-EC, es-ES, es-GQ, es-GT, es-HN, es-IC, es-MX, es-NI, es-PA, es-PE, es-PH, es-PR, es-PY, es-SV, es-US, es-UY, es-VE, et, et-EE, eu, eu-ES, ewo, ewo-CM, fa, fa-AF, fa-IR, ff, ff-CM, ff-GN, ff-MR, ff-SN, fi, fi-FI, fil, fil-PH, fo, fo-DK, fo-FO, fr, fr-BE, fr-BF, fr-BI, fr-BJ, fr-BL, fr-CA, fr-CD, fr-CF, fr-CG, fr-CH, fr-CI, fr-CM, fr-DJ, fr-DZ, fr-FR, fr-GA, fr-GF, fr-GN, fr-GP, fr-GQ, fr-HT, fr-KM, fr-LU, fr-MA, fr-MC, fr-MF, fr-MG, fr-ML, fr-MQ, fr-MR, fr-MU, fr-NC, fr-NE, fr-PF, fr-PM, fr-RE, fr-RW, fr-SC, fr-SN, fr-SY, fr-TD, fr-TG, fr-TN, fr-VU, fr-WF, fr-YT, fur, fur-IT, fy, fy-NL, ga, ga-IE, gd, gd-GB, gl, gl-ES, gsw, gsw-CH, gsw-FR, gsw-LI, gu, gu-IN, guz, guz-KE, gv, gv-IM, ha, ha-GH, ha-NE, ha-NG, haw, haw-US, he, he-IL, hi, hi-IN, hr, hr-BA, hr-HR, hsb, hsb-DE, hu, hu-HU, hy, hy-AM, id, id-ID, ig, ig-NG, ii, ii-CN, is, is-IS, it, it-CH, it-IT, it-SM, ja, ja-JP, jgo, jgo-CM, jmc, jmc-TZ, ka, ka-GE, kab, kab-DZ, kam, kam-KE, kde, kde-TZ, kea, kea-CV, khq, khq-ML, ki, ki-KE, kk, kk-KZ, kkj, kkj-CM, kl, kl-GL, kln, kln-KE, km, km-KH, kn, kn-IN, ko, ko-KP, ko-KR, kok, kok-IN, ks, ks-IN, ksb, ksb-TZ, ksf, ksf-CM, ksh, ksh-DE, kw, kw-GB, ky, ky-KG, lag, lag-TZ, lb, lb-LU, lg, lg-UG, lkt, lkt-US, ln, ln-AO, ln-CD, ln-CF, ln-CG, lo, lo-LA, lrc, lrc-IQ, lrc-IR, lt, lt-LT, lu, lu-CD, luo, luo-KE, luy, luy-KE, lv, lv-LV, mas, mas-KE, mas-TZ, mer, mer-KE, mfe, mfe-MU, mg, mg-MG, mgh, mgh-MZ, mgo, mgo-CM, mk, mk-MK, ml, ml-IN, mn, mn-MN, mr, mr-IN, ms, ms-BN, ms-MY, ms-SG, mt, mt-MT, mua, mua-CM, my, my-MM, mzn, mzn-IR, naq, naq-NA, nb, nb-NO, nb-SJ, nd, nd-ZW, ne, ne-IN, ne-NP, nl, nl-AW, nl-BE, nl-BQ, nl-CW, nl-NL, nl-SR, nl-SX, nmg, nmg-CM, nn, nn-NO, nnh, nnh-CM, nus, nus-SS, nyn, nyn-UG, om, om-ET, om-KE, or, or-IN, os, os-GE, os-RU, pa, pa-Arab, pa-Arab-PK, pa-Guru, pa-Guru-IN, pl, pl-PL, prg, prg-001, ps, ps-AF, pt, pt-AO, pt-BR, pt-CV, pt-GW, pt-MO, pt-MZ, pt-PT, pt-ST, pt-TL, qu, qu-BO, qu-EC, qu-PE, rm, rm-CH, rn, rn-BI, ro, ro-MD, ro-RO, rof, rof-TZ, root, ru, ru-BY, ru-KG, ru-KZ, ru-MD, ru-RU, ru-UA, rw, rw-RW, rwk, rwk-TZ, sah, sah-RU, saq, saq-KE, sbp, sbp-TZ, se, se-FI, se-NO, se-SE, seh, seh-MZ, ses, ses-ML, sg, sg-CF, shi, shi-Latn, shi-Latn-MA, shi-Tfng, shi-Tfng-MA, si, si-LK, sk, sk-SK, sl, sl-SI, smn, smn-FI, sn, sn-ZW, so, so-DJ, so-ET, so-KE, so-SO, sq, sq-AL, sq-MK, sq-XK, sr, sr-Cyrl, sr-Cyrl-BA, sr-Cyrl-ME, sr-Cyrl-RS, sr-Cyrl-XK, sr-Latn, sr-Latn-BA, sr-Latn-ME, sr-Latn-RS, sr-Latn-XK, sv, sv-AX, sv-FI, sv-SE, sw, sw-CD, sw-KE, sw-TZ, sw-UG, ta, ta-IN, ta-LK, ta-MY, ta-SG, te, te-IN, teo, teo-KE, teo-UG, th, th-TH, ti, ti-ER, ti-ET, tk, tk-TM, to, to-TO, tr, tr-CY, tr-TR, twq, twq-NE, tzm, tzm-MA, ug, ug-CN, uk, uk-UA, ur, ur-IN, ur-PK, uz, uz-Arab, uz-Arab-AF, uz-Cyrl, uz-Cyrl-UZ, uz-Latn, uz-Latn-UZ, vai, vai-Latn, vai-Latn-LR, vai-Vaii, vai-Vaii-LR, vi, vi-VN, vo, vo-001, vun, vun-TZ, wae, wae-CH, xog, xog-UG, yav, yav-CM, yi, yi-001, yo, yo-BJ, yo-NG, zgh, zgh-MA, zh, zh-Hans, zh-Hans-CN, zh-Hans-HK, zh-Hans-MO, zh-Hans-SG, zh-Hant, zh-Hant-HK, zh-Hant-MO, zh-Hant-TW, zu, zu-ZA ; sv ; sv
##################################################
# test8288
it, en ; und ; it
it, en ; und, en ; en
# examples from
# http://unicode.org/repos/cldr/tags/latest/common/bcp47/
# http://unicode.org/repos/cldr/tags/latest/common/validity/variant.xml
##################################################
# testUnHack
en-NZ, en-IT ; en-US ; en-NZ
##################################################
# testEmptySupported => null
; en ; null
##################################################
# testVariantsAndExtensions
##################################################
# tests the .combine() method
und, fr ; fr-BE-fonipa ; fr ; fr-BE-fonipa
und, fr-CA ; fr-BE-fonipa ; fr-CA ; fr-BE-fonipa
und, fr-fonupa ; fr-BE-fonipa ; fr-fonupa ; fr-BE-fonipa
und, no ; nn-BE-fonipa ; no ; no-BE-fonipa
und, en-GB-u-sd-gbsct ; en-fonipa-u-nu-Arab-ca-buddhist-t-m0-iso-i0-pinyin ; en-GB-u-sd-gbsct ; en-GB-fonipa-u-nu-Arab-ca-buddhist-t-m0-iso-i0-pinyin
en-PSCRACK, de-PSCRACK, fr-PSCRACK, pt-PT-PSCRACK ; fr-PSCRACK ; fr-PSCRACK
en-PSCRACK, de-PSCRACK, fr-PSCRACK, pt-PT-PSCRACK ; fr ; fr-PSCRACK
en-PSCRACK, de-PSCRACK, fr-PSCRACK, pt-PT-PSCRACK ; de-CH ; de-PSCRACK
##################################################
# testClusters
# we favor es-419 over others in cluster. Clusters: es- {ES, MA, EA} {419, AR, MX}
und, es, es-MA, es-MX, es-419 ; es-AR ; es-419
und, es-MA, es, es-419, es-MX ; es-AR ; es-419
und, es, es-MA, es-MX, es-419 ; es-EA ; es
und, es-MA, es, es-419, es-MX ; es-EA ; es
# of course, fall back to within cluster
und, es, es-MA, es-MX ; es-AR ; es-MX
und, es-MA, es, es-MX ; es-AR ; es-MX
und, es-MA, es-MX, es-419 ; es-EA ; es-MA
und, es-MA, es-419, es-MX ; es-EA ; es-MA
# we favor es-GB over others in cluster. Clusters: en- {US, GU, VI} {GB, IN, ZA}
und, en, en-GU, en-IN, en-GB ; en-ZA ; en-GB
und, en-GU, en, en-GB, en-IN ; en-ZA ; en-GB
und, en, en-GU, en-IN, en-GB ; en-VI ; en
und, en-GU, en, en-GB, en-IN ; en-VI ; en
# of course, fall back to within cluster
und, en, en-GU, en-IN ; en-ZA ; en-IN
und, en-GU, en, en-IN ; en-ZA ; en-IN
und, en-GU, en-IN, en-GB ; en-VI ; en-GU
und, en-GU, en-GB, en-IN ; en-VI ; en-GU
##################################################
# testThreshold
@Threshold=60
50, und, fr-CA-fonupa ; fr-BE-fonipa ; fr-CA-fonupa ; fr-BE-fonipa
50, und, fr-Cyrl-CA-fonupa ; fr-BE-fonipa ; fr-Cyrl-CA-fonupa ; fr-Cyrl-BE-fonipa
@Threshold=-1 # restore
##################################################
# testScriptFirst
@DistanceOption=SCRIPT_FIRST
@debug
ru, fr ; zh, pl ; fr
ru, fr ; zh-Cyrl, pl ; ru
hr, en-Cyrl; sr ; en-Cyrl
da, ru, hr; sr ; ru