// // Driver.cs // // Authors: // Jackson Harper (jackson@ximian.com) // Atsushi Enomoto (atsushi@ximian.com) // Marek Safar // // (C) 2004-2005 Novell, Inc (http://www.novell.com) // Copyright (C) 2012 Xamarin Inc (http://www.xamarin.com) // // Permission is hereby granted, free of charge, to any person obtaining // a copy of this software and associated documentation files (the // "Software"), to deal in the Software without restriction, including // without limitation the rights to use, copy, modify, merge, publish, // distribute, sublicense, and/or sell copies of the Software, and to // permit persons to whom the Software is furnished to do so, subject to // the following conditions: // // The above copyright notice and this permission notice shall be // included in all copies or substantial portions of the Software. // // THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, // EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF // MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND // NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE // LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION // OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION // WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. // using System; using System.IO; using System.Text; using System.Xml; using System.Globalization; using System.Text.RegularExpressions; using System.Collections.Generic; using System.Linq; namespace Mono.Tools.LocaleBuilder { public class Driver { static readonly string data_root = Path.Combine ("CLDR", "common"); public static void Main (string[] args) { Driver d = new Driver (); ParseArgs (args, d); d.Run (); } private static void ParseArgs (string[] args, Driver d) { for (int i = 0; i < args.Length; i++) { if (args[i] == "--lang" && i + 1 < args.Length) d.Lang = args[++i]; else if (args[i] == "--locales" && i + 1 < args.Length) d.Locales = args[++i]; else if (args[i] == "--header" && i + 1 < args.Length) d.HeaderFileName = args[++i]; else if (args[i] == "--compare") d.OutputCompare = true; } } private string lang; private string locales; private string header_name; List cultures; Dictionary region_currency; Dictionary currency_fractions; Dictionary extra_parent_locales; // The lang is the language that display names will be displayed in public string Lang { get { if (lang == null) lang = "en"; return lang; } set { lang = value; } } public string Locales { get { return locales; } set { locales = value; } } public string HeaderFileName { get { if (header_name == null) return "culture-info-tables.h"; return header_name; } set { header_name = value; } } public bool OutputCompare { get; set; } void Print () { cultures.Sort ((a, b) => int.Parse (a.LCID.Substring (2), NumberStyles.HexNumber).CompareTo (int.Parse (b.LCID.Substring (2), NumberStyles.HexNumber))); var writer = Console.Out; foreach (var c in cultures) { writer.WriteLine ("Name: {0}, LCID {1}", c.OriginalName, c.LCID); writer.WriteLine ("{0}: {1}", "DisplayName", c.DisplayName); writer.WriteLine ("{0}: {1}", "EnglishName", c.EnglishName); writer.WriteLine ("{0}: {1}", "NativeName", c.NativeName); // writer.WriteLine ("{0}: {1}", "OptionalCalendars", c.OptionalCalendars); writer.WriteLine ("{0}: {1}", "ThreeLetterISOLanguageName", c.ThreeLetterISOLanguageName); writer.WriteLine ("{0}: {1}", "ThreeLetterWindowsLanguageName", c.ThreeLetterWindowsLanguageName); writer.WriteLine ("{0}: {1}", "TwoLetterISOLanguageName", c.TwoLetterISOLanguageName); writer.WriteLine ("{0}: {1}", "Calendar", GetCalendarType (c.CalendarType)); var df = c.DateTimeFormatEntry; writer.WriteLine ("-- DateTimeFormat --"); Dump (writer, df.AbbreviatedDayNames, "AbbreviatedDayNames"); Dump (writer, df.AbbreviatedMonthGenitiveNames, "AbbreviatedMonthGenitiveNames"); Dump (writer, df.AbbreviatedMonthNames, "AbbreviatedMonthNames"); writer.WriteLine ("{0}: {1}", "AMDesignator", df.AMDesignator); writer.WriteLine ("{0}: {1}", "CalendarWeekRule", (CalendarWeekRule) df.CalendarWeekRule); writer.WriteLine ("{0}: {1}", "DateSeparator", df.DateSeparator); Dump (writer, df.DayNames, "DayNames"); writer.WriteLine ("{0}: {1}", "FirstDayOfWeek", (DayOfWeek) df.FirstDayOfWeek); // Dump (writer, df.GetAllDateTimePatterns (), "GetAllDateTimePatterns"); // writer.WriteLine ("{0}: {1}", "LongDatePattern", df.LongDatePattern); // writer.WriteLine ("{0}: {1}", "LongTimePattern", df.LongTimePattern); writer.WriteLine ("{0}: {1}", "MonthDayPattern", df.MonthDayPattern); Dump (writer, df.MonthGenitiveNames, "MonthGenitiveNames"); Dump (writer, df.MonthNames, "MonthNames"); writer.WriteLine ("{0}: {1}", "NativeCalendarName", df.NativeCalendarName); writer.WriteLine ("{0}: {1}", "PMDesignator", df.PMDesignator); // writer.WriteLine ("{0}: {1}", "ShortDatePattern", df.ShortDatePattern); Dump (writer, df.ShortestDayNames, "ShortestDayNames"); // writer.WriteLine ("{0}: {1}", "ShortTimePattern", df.ShortTimePattern); writer.WriteLine ("{0}: {1}", "TimeSeparator", df.TimeSeparator); // writer.WriteLine ("{0}: {1}", "YearMonthPattern", df.YearMonthPattern); var ti = c.TextInfoEntry; writer.WriteLine ("-- TextInfo --"); writer.WriteLine ("{0}: {1}", "ANSICodePage", ti.ANSICodePage); writer.WriteLine ("{0}: {1}", "EBCDICCodePage", ti.EBCDICCodePage); writer.WriteLine ("{0}: {1}", "IsRightToLeft", ti.IsRightToLeft); writer.WriteLine ("{0}: {1}", "ListSeparator", ti.ListSeparator); writer.WriteLine ("{0}: {1}", "MacCodePage", ti.MacCodePage); writer.WriteLine ("{0}: {1}", "OEMCodePage", ti.OEMCodePage); var nf = c.NumberFormatEntry; writer.WriteLine ("-- NumberFormat --"); writer.WriteLine ("{0}: {1}", "CurrencyDecimalDigits", nf.CurrencyDecimalDigits); writer.WriteLine ("{0}: {1}", "CurrencyDecimalSeparator", nf.CurrencyDecimalSeparator); writer.WriteLine ("{0}: {1}", "CurrencyGroupSeparator", nf.CurrencyGroupSeparator); Dump (writer, nf.CurrencyGroupSizes, "CurrencyGroupSizes", true); writer.WriteLine ("{0}: {1}", "CurrencyNegativePattern", nf.CurrencyNegativePattern); writer.WriteLine ("{0}: {1}", "CurrencyPositivePattern", nf.CurrencyPositivePattern); writer.WriteLine ("{0}: {1}", "CurrencySymbol", nf.CurrencySymbol); writer.WriteLine ("{0}: {1}", "DigitSubstitution", nf.DigitSubstitution); writer.WriteLine ("{0}: {1}", "NaNSymbol", nf.NaNSymbol); Dump (writer, nf.NativeDigits, "NativeDigits"); writer.WriteLine ("{0}: {1}", "NegativeInfinitySymbol", nf.NegativeInfinitySymbol); writer.WriteLine ("{0}: {1}", "NegativeSign", nf.NegativeSign); writer.WriteLine ("{0}: {1}", "NumberDecimalDigits", nf.NumberDecimalDigits); writer.WriteLine ("{0}: {1}", "NumberDecimalSeparator", nf.NumberDecimalSeparator); writer.WriteLine ("{0}: {1}", "NumberGroupSeparator", nf.NumberGroupSeparator); Dump (writer, nf.NumberGroupSizes, "NumberGroupSizes", true); writer.WriteLine ("{0}: {1}", "NumberNegativePattern", nf.NumberNegativePattern); writer.WriteLine ("{0}: {1}", "PercentNegativePattern", nf.PercentNegativePattern); writer.WriteLine ("{0}: {1}", "PercentPositivePattern", nf.PercentPositivePattern); writer.WriteLine ("{0}: {1}", "PercentSymbol", nf.PercentSymbol); writer.WriteLine ("{0}: {1}", "PerMilleSymbol", nf.PerMilleSymbol); writer.WriteLine ("{0}: {1}", "PositiveInfinitySymbol", nf.PositiveInfinitySymbol); writer.WriteLine ("{0}: {1}", "PositiveSign", nf.PositiveSign); if (c.RegionInfoEntry != null) { var ri = c.RegionInfoEntry; writer.WriteLine ("-- RegionInfo --"); writer.WriteLine ("{0}: {1}", "CurrencyEnglishName", ri.CurrencyEnglishName); writer.WriteLine ("{0}: {1}", "CurrencyNativeName", ri.CurrencyNativeName); writer.WriteLine ("{0}: {1}", "CurrencySymbol", ri.CurrencySymbol); writer.WriteLine ("{0}: {1}", "DisplayName", ri.DisplayName); writer.WriteLine ("{0}: {1}", "EnglishName", ri.EnglishName); writer.WriteLine ("{0}: {1}", "GeoId", ri.GeoId); writer.WriteLine ("{0}: {1}", "IsMetric", ri.IsMetric); writer.WriteLine ("{0}: {1}", "ISOCurrencySymbol", ri.ISOCurrencySymbol); writer.WriteLine ("{0}: {1}", "Name", ri.Name); writer.WriteLine ("{0}: {1}", "NativeName", ri.NativeName); writer.WriteLine ("{0}: {1}", "ThreeLetterISORegionName", ri.ThreeLetterISORegionName); writer.WriteLine ("{0}: {1}", "ThreeLetterWindowsRegionName", ri.ThreeLetterWindowsRegionName); writer.WriteLine ("{0}: {1}", "TwoLetterISORegionName", ri.TwoLetterISORegionName); } writer.WriteLine (); } } static Type GetCalendarType (CalendarType ct) { switch (ct) { case CalendarType.Gregorian: return typeof (GregorianCalendar); case CalendarType.HijriCalendar: return typeof (HijriCalendar); case CalendarType.ThaiBuddhist: return typeof (ThaiBuddhistCalendar); case CalendarType.UmAlQuraCalendar: return typeof (UmAlQuraCalendar); default: throw new NotImplementedException (); } } static void Dump (TextWriter tw, IList values, string name, bool stopOnNull = false) where T : class { tw.Write (name); tw.Write (": "); for (int i = 0; i < values.Count; ++i) { var v = values[i]; if (stopOnNull && v == null) break; if (i > 0) tw.Write (", "); tw.Write (v); } tw.WriteLine (); } void Run () { Regex locales_regex = null; if (Locales != null) locales_regex = new Regex (Locales); cultures = new List (); var regions = new List (); var supplemental = GetXmlDocument (Path.Combine (data_root, "supplemental", "supplementalData.xml")); // Read currencies info region_currency = new Dictionary (StringComparer.OrdinalIgnoreCase); foreach (XmlNode entry in supplemental.SelectNodes ("supplementalData/currencyData/region")) { var child = entry.SelectSingleNode ("currency"); region_currency.Add (entry.Attributes["iso3166"].Value, child.Attributes["iso4217"].Value); } // Parent locales extra_parent_locales = new Dictionary (StringComparer.OrdinalIgnoreCase); foreach (XmlNode entry in supplemental.SelectNodes ("supplementalData/parentLocales/parentLocale")) { var parent = entry.Attributes["parent"].Value; if (parent == "root") continue; var locales = entry.Attributes["locales"].Value; foreach (var locale in locales.Split (' ')) extra_parent_locales.Add (locale, parent); } // CLDR has habits of completely removing cultures data between release but we don't want to break // existing code var knownLCIDs = new HashSet () { "ar", "bg", "ca", "zh_Hans", "zh_CHS", "cs", "da", "de", "el", "en", "es", "fi", "fr", "he", "hu", "is", "it", "ja", "ko", "nl", "no", "pl", "pt", "rm", "ro", "ru", "hr", "sk", "sq", "sv", "th", "tr", "ur", "id", "uk", "be", "sl", "et", "lv", "lt", "tg", "fa", "vi", "hy", "az", "eu", "mk", "st", "ts", "tn", "xh", "zu", "af", "ka", "fo", "hi", "mt", "se", "ga", "ms", "kk", "ky", "sw", "uz", "bn", "pa", "gu", "or", "ta", "te", "kn", "ml", "as", "mr", "mn", "bo", "cy", "km", "lo", "my", "gl", "kok", "si", "chr", "am", "tzm", "ne", "ps", "fil", "ff", "ha", "yo", "nso", "kl", "ig", "om", "ti", "haw", "so", "ii", "br", "gsw", "sah", "rw", "gd", "ar_SA", "bg_BG", "ca_ES", "zh_TW", "cs_CZ", "da_DK", "de_DE", "el_GR", "en_US", "fi_FI", "fr_FR", "he_IL", "hu_HU", "is_IS", "it_IT", "ja_JP", "ko_KR", "nl_NL", "nb_NO", "pl_PL", "pt_BR", "rm_CH", "ro_RO", "ru_RU", "hr_HR", "sk_SK", "sq_AL", "sv_SE", "th_TH", "tr_TR", "ur_PK", "id_ID", "uk_UA", "be_BY", "sl_SI", "et_EE", "lv_LV", "lt_LT", "tg_Cyrl_TJ", "fa_IR", "vi_VN", "hy_AM", "az_Latn_AZ", "eu_ES", "mk_MK", "st_ZA", "ts_ZA", "tn_ZA", "xh_ZA", "zu_ZA", "af_ZA", "ka_GE", "fo_FO", "hi_IN", "mt_MT", "se_NO", "sw_KE", "uz_Latn_UZ", "bn_IN", "gu_IN", "or_IN", "ta_IN", "te_IN", "kn_IN", "ml_IN", "as_IN", "mr_IN", "bo_CN", "cy_GB", "km_KH", "lo_LA", "my_MM", "gl_ES", "kok_IN", "si_LK", "am_ET", "ne_NP", "ps_AF", "fil_PH", "ha_Latn_NG", "yo_NG", "nso_ZA", "kl_GL", "ig_NG", "om_ET", "ti_ET", "haw_US", "so_SO", "ii_CN", "br_FR", "sah_RU", "rw_RW", "gd_GB", "ar_IQ", "zh_CN", "de_CH", "en_GB", "es_MX", "fr_BE", "it_CH", "nl_BE", "nn_NO", "pt_PT", "ro_MD", "ru_MD", "sv_FI", "ur_IN", "az_Cyrl_AZ", "tn_BW", "ga_IE", "uz_Cyrl_UZ", "bn_BD", "pa_Arab_PK", "ta_LK", "ne_IN", "ti_ER", "ar_EG", "zh_HK", "de_AT", "en_AU", "es_ES", "fr_CA", "se_FI", "ar_LY", "zh_SG", "de_LU", "en_CA", "es_GT", "fr_CH", "hr_BA", "ar_DZ", "zh_MO", "de_LI", "en_NZ", "es_CR", "fr_LU", "bs_Latn_BA", "ar_MA", "en_IE", "es_PA", "fr_MC", "sr_Latn_BA", "ar_TN", "en_ZA", "es_DO", "sr_Cyrl_BA", "ar_OM", "en_JM", "es_VE", "fr_RE", "bs_Cyrl_BA", "ar_YE", "es_CO", "fr_CD", "sr_Latn_RS", "ar_SY", "en_BZ", "es_PE", "fr_SN", "sr_Cyrl_RS", "ar_JO", "en_TT", "es_AR", "fr_CM", "sr_Latn_ME", "ar_LB", "en_ZW", "es_EC", "fr_CI", "sr_Cyrl_ME", "ar_KW", "en_PH", "es_CL", "fr_ML", "ar_AE", "es_UY", "fr_MA", "ar_BH", "en_HK", "es_PY", "fr_HT", "ar_QA", "en_IN", "es_BO", "es_SV", "en_SG", "es_HN", "es_NI", "es_PR", "es_US", "es_CU", "bs_Cyrl", "bs_Latn", "sr_Cyrl", "sr_Latn", "az_Cyrl", "zh", "nn", "bs", "az_Latn", "uz_Cyrl", "mn_Cyrl", "zh_Hant", "zh_CHT", "nb", "sr", "tg_Cyrl", "uz_Latn", "pa_Arab", "tzm_Latn", "ha_Latn", "hsb", "tk", "fy", "lb", "ug", "hsb_DE", "ms_MY", "kk_KZ", "ky_KG", "tk_TM", "mn_MN", "fy_NL", "lb_LU", "ug_CN", "gsw_FR", "ca_ES_valencia", "dsb_DE", "se_SE", "ms_BN", "smn_FI", "en_MY", "smn", "dsb" }; var lcdids = GetXmlDocument ("lcids.xml"); foreach (XmlNode lcid in lcdids.SelectNodes ("lcids/lcid")) { var name = lcid.Attributes["name"].Value; if (locales_regex != null && !locales_regex.IsMatch (name)) continue; var ci = new CultureInfoEntry (); ci.LCID = lcid.Attributes["id"].Value; ci.ParentLcid = lcid.Attributes["parent"].Value; ci.TwoLetterISOLanguageName = lcid.Attributes["iso2"].Value; ci.ThreeLetterISOLanguageName = lcid.Attributes["iso3"].Value; ci.ThreeLetterWindowsLanguageName = lcid.Attributes["win"].Value; ci.OriginalName = name.Replace ('_', '-'); ci.TextInfoEntry = new TextInfoEntry (); ci.NumberFormatEntry = new NumberFormatEntry (); if (!Import (ci, name)) { if (knownLCIDs.Contains (name)) { Console.WriteLine ($"Missing previously available culture `{ name }' data"); return; } continue; } if (!knownLCIDs.Contains (name)) { Console.WriteLine ($"New culture `{ name }' data available"); return; } cultures.Add (ci); } var doc_english = GetXmlDocument (Path.Combine (data_root, "main", "en.xml")); // // Fill all EnglishName values from en.xml language file // foreach (var ci in cultures) { var el = doc_english.SelectSingleNode (string.Format ("ldml/localeDisplayNames/languages/language[@type='{0}']", ci.Language)); if (el != null) ci.EnglishName = el.InnerText; string s = null; if (ci.Script != null) { el = doc_english.SelectSingleNode (string.Format ("ldml/localeDisplayNames/scripts/script[@type='{0}']", ci.Script)); if (el != null) s = el.InnerText; } if (ci.Territory != null) { el = doc_english.SelectSingleNode (string.Format ("ldml/localeDisplayNames/territories/territory[@type='{0}']", ci.Territory)); if (el != null) { if (s == null) s = el.InnerText; else s = string.Join (", ", s, el.InnerText); } } switch (ci.ThreeLetterWindowsLanguageName) { case "CHT": s = "Traditional"; break; case "CHS": s = "Simplified"; break; } if (s != null) ci.EnglishName = string.Format ("{0} ({1})", ci.EnglishName, s); // Special case legacy chinese if (ci.OriginalName == "zh-CHS" || ci.OriginalName == "zh-CHT") ci.EnglishName += " Legacy"; // Mono is not localized and supports english only, hence the name will always be same ci.DisplayName = ci.EnglishName; } // // Fill culture hierarchy for easier data manipulation // foreach (var ci in cultures) { foreach (var p in cultures.Where (l => ci.LCID == l.ParentLcid)) { ci.Children.Add (p); } } currency_fractions = new Dictionary (StringComparer.OrdinalIgnoreCase); foreach (XmlNode entry in supplemental.SelectNodes ("supplementalData/currencyData/fractions/info")) { currency_fractions.Add (entry.Attributes["iso4217"].Value, entry.Attributes["digits"].Value); } var territory2dayofweek = new Dictionary (StringComparer.OrdinalIgnoreCase); foreach (XmlNode entry in supplemental.SelectNodes ("supplementalData/weekData/firstDay")) { if (entry.Attributes ["alt"] != null) continue; DayOfWeek dow; switch (entry.Attributes["day"].Value) { case "mon": dow = DayOfWeek.Monday; break; case "fri": dow = DayOfWeek.Friday; break; case "sat": dow = DayOfWeek.Saturday; break; case "sun": dow = DayOfWeek.Sunday; break; default: throw new NotImplementedException (); } var territories = entry.Attributes["territories"].Value.Split (new [] { ' ', '\t' }, StringSplitOptions.RemoveEmptyEntries); foreach (var t in territories) { var tr = t.Trim (); if (tr.Length == 0) continue; territory2dayofweek.Add (tr, dow); } } var territory2wr = new Dictionary (StringComparer.OrdinalIgnoreCase); foreach (XmlNode entry in supplemental.SelectNodes ("supplementalData/weekData/minDays")) { CalendarWeekRule rule; switch (entry.Attributes["count"].InnerText) { case "1": rule = CalendarWeekRule.FirstDay; break; case "4": rule = CalendarWeekRule.FirstFourDayWeek; break; default: throw new NotImplementedException (); } var territories = entry.Attributes["territories"].InnerText.Split (); foreach (var t in territories) territory2wr[t] = rule; } // // Fill all territory speficic data where territory is available // var non_metric = new HashSet (); foreach (XmlNode entry in supplemental.SelectNodes ("supplementalData/measurementData/measurementSystem[@type='US']")) { var territories = entry.Attributes["territories"].InnerText.Split (); foreach (var t in territories) non_metric.Add (t); } foreach (var ci in cultures) { if (ci.Territory == null) continue; DayOfWeek value; if (territory2dayofweek.TryGetValue (ci.Territory, out value)) { ci.DateTimeFormatEntry.FirstDayOfWeek = (int) value; } CalendarWeekRule rule; if (territory2wr.TryGetValue (ci.Territory, out rule)) { ci.DateTimeFormatEntry.CalendarWeekRule = (int) rule; } RegionInfoEntry region = regions.Where (l => l.Name == ci.Territory).FirstOrDefault (); if (region == null) { region = new RegionInfoEntry () { CurrencySymbol = ci.NumberFormatEntry.CurrencySymbol, EnglishName = ci.EnglishName, NativeName = ci.NativeTerritoryName, Name = ci.Territory, TwoLetterISORegionName = ci.Territory, CurrencyNativeName = ci.NativeCurrencyName }; var tc = supplemental.SelectSingleNode (string.Format ("supplementalData/codeMappings/territoryCodes[@type='{0}']", ci.Territory)); region.ThreeLetterISORegionName = tc?.Attributes["alpha3"]?.Value ?? "---"; region.ThreeLetterWindowsRegionName = region.ThreeLetterISORegionName; var el = doc_english.SelectSingleNode (string.Format ("ldml/localeDisplayNames/territories/territory[@type='{0}']", ci.Territory)); region.EnglishName = el.InnerText; region.DisplayName = region.EnglishName; string curr; if (!region_currency.TryGetValue (ci.Territory, out curr)) curr = "---"; region.ISOCurrencySymbol = curr; el = doc_english.SelectSingleNode (string.Format ("ldml/numbers/currencies/currency[@type='{0}']/displayName", region.ISOCurrencySymbol)); region.CurrencyEnglishName = el?.InnerText ?? "---"; if (non_metric.Contains (ci.Territory)) region.IsMetric = false; var lcdid_value = int.Parse (ci.LCID.Substring (2), NumberStyles.HexNumber); Patterns.FillValues (lcdid_value, region); regions.Add (region); } string fraction_value; if (currency_fractions.TryGetValue (region.ISOCurrencySymbol, out fraction_value)) { ci.NumberFormatEntry.CurrencyDecimalDigits = fraction_value; } ci.RegionInfoEntry = region; } // // Fill neutral cultures territory data // foreach (var ci in cultures) { var dtf = ci.DateTimeFormatEntry; if (dtf.FirstDayOfWeek == null) { switch (ci.Name) { case "ar": dtf.FirstDayOfWeek = (int) DayOfWeek.Saturday; break; case "en": case "pt": case "zh-Hans": dtf.FirstDayOfWeek = (int) DayOfWeek.Sunday; break; case "es": case "fr": case "bn": case "sr-Cyrl": case "sr-Latn": case "ta": dtf.FirstDayOfWeek = (int) DayOfWeek.Monday; break; default: List all_fdow = new List (); GetAllChildrenValues (ci, all_fdow, l => l.DateTimeFormatEntry.FirstDayOfWeek); var children = all_fdow.Where (l => l != null).Distinct ().ToList (); if (children.Count == 1) { dtf.FirstDayOfWeek = children[0]; } else if (children.Count == 0) { if (!ci.HasMissingLocale) Console.WriteLine ("No week data for `{0}'", ci.Name); // Default to Sunday dtf.FirstDayOfWeek = (int) DayOfWeek.Sunday; } else { // .NET has weird concept of territory data available for neutral cultures (e.g. en, es, pt) // We have to manually disambiguate the correct entry (which is artofficial anyway) throw new ApplicationException (string.Format ("Ambiguous week data for `{0}'", ci.Name)); } break; } } if (dtf.CalendarWeekRule == null) { switch (ci.Name) { case "ar": case "en": case "es": case "zh-Hans": case "pt": case "fr": case "bn": dtf.CalendarWeekRule = (int) CalendarWeekRule.FirstDay; break; default: List all_cwr = new List (); GetAllChildrenValues (ci, all_cwr, l => l.DateTimeFormatEntry.CalendarWeekRule); var children = all_cwr.Where (l => l != null).Distinct ().ToList (); if (children.Count == 1) { dtf.CalendarWeekRule = children[0]; } else if (children.Count == 0) { if (!ci.HasMissingLocale) Console.WriteLine ("No calendar week data for `{0}'", ci.Name); // Default to FirstDay dtf.CalendarWeekRule = (int) CalendarWeekRule.FirstDay; } else { // .NET has weird concept of territory data available for neutral cultures (e.g. en, es, pt) // We have to manually disambiguate the correct entry (which is artofficial anyway) throw new ApplicationException (string.Format ("Ambiguous calendar data for `{0}'", ci.Name)); } break; } } var nfe = ci.NumberFormatEntry; if (nfe.CurrencySymbol == null) { switch (ci.Name) { case "ar": nfe.CurrencySymbol = "ر.س.‏"; break; case "en": nfe.CurrencySymbol = "$"; break; case "bs": nfe.CurrencySymbol = "KM"; break; case "es": case "fr": case "de": case "it": case "se": nfe.CurrencySymbol = "€"; break; case "hr": nfe.CurrencySymbol = "kn"; break; case "pt": nfe.CurrencySymbol = "R$"; break; case "sv": nfe.CurrencySymbol = "kr"; break; case "ms": nfe.CurrencySymbol = "RM"; break; case "bn": nfe.CurrencySymbol = "টা"; break; case "sr-Cyrl": nfe.CurrencySymbol = "Дин."; break; case "sr-Latn": case "sr": nfe.CurrencySymbol = "Din."; break; case "zh": case "zh-Hans": nfe.CurrencySymbol = "¥"; break; case "zh-Hant": nfe.CurrencySymbol = "HK$"; break; case "ru": nfe.CurrencySymbol = "₽"; break; case "ur": nfe.CurrencySymbol = "Rs"; break; case "tn": nfe.CurrencySymbol = "R"; break; case "ta": nfe.CurrencySymbol = "₹"; break; case "ne": nfe.CurrencySymbol = "रु"; break; case "ti": nfe.CurrencySymbol = "Nfk"; break; case "ro": nfe.CurrencySymbol = "RON"; break; default: var all_currencies = new List (); GetAllChildrenValues (ci, all_currencies, l => l.NumberFormatEntry.CurrencySymbol); var children = all_currencies.Where (l => l != null).Distinct ().ToList (); if (children.Count == 1) { nfe.CurrencySymbol = children[0]; } else if (children.Count == 0) { if (!ci.HasMissingLocale) Console.WriteLine ("No currency data for `{0}'", ci.Name); } else { // .NET has weird concept of territory data available for neutral cultures (e.g. en, es, pt) // We have to manually disambiguate the correct entry (which is artofficial anyway) throw new ApplicationException (string.Format ("Ambiguous currency data for `{0}'. Possible values '{1}'", ci.Name, string.Join (", ", children))); } break; } } } if (OutputCompare) Print (); regions.Sort (new RegionComparer ()); for (int i = 0; i < regions.Count; ++i) regions[i].Index = i; /** * Dump each table individually. Using StringBuilders * because it is easier to debug, should switch to just * writing to streams eventually. */ using (StreamWriter writer = new StreamWriter (HeaderFileName, false, new UTF8Encoding (false, true))) { writer.NewLine = "\n"; writer.WriteLine (); writer.WriteLine ("/* This is a generated file. Do not edit. See tools/locale-builder. */"); writer.WriteLine ("#ifndef MONO_METADATA_CULTURE_INFO_TABLES"); writer.WriteLine ("#define MONO_METADATA_CULTURE_INFO_TABLES 1"); writer.WriteLine ("\n"); writer.WriteLine ("#define NUM_CULTURE_ENTRIES {0}", cultures.Count); writer.WriteLine ("#define NUM_REGION_ENTRIES {0}", regions.Count); writer.WriteLine ("\n"); // Sort the cultures by lcid cultures.Sort (new LcidComparer ()); StringBuilder builder = new StringBuilder (); int row = 0; int count = cultures.Count; for (int i = 0; i < count; i++) { CultureInfoEntry ci = cultures[i]; if (ci.DateTimeFormatEntry == null) continue; ci.DateTimeFormatEntry.AppendTableRow (builder); ci.DateTimeFormatEntry.Row = row++; if (i + 1 < count) builder.Append (','); builder.Append ('\n'); } writer.WriteLine ("static const DateTimeFormatEntry datetime_format_entries [] = {"); writer.Write (builder); writer.WriteLine ("};\n\n"); builder = new StringBuilder (); row = 0; for (int i = 0; i < count; i++) { CultureInfoEntry ci = cultures[i]; if (ci.NumberFormatEntry == null) continue; ci.NumberFormatEntry.AppendTableRow (builder); ci.NumberFormatEntry.Row = row++; if (i + 1 < count) builder.Append (','); builder.Append ('\n'); } writer.WriteLine ("static const NumberFormatEntry number_format_entries [] = {"); writer.Write (builder); writer.WriteLine ("};\n\n"); builder = new StringBuilder (); row = 0; for (int i = 0; i < count; i++) { CultureInfoEntry ci = cultures[i]; ci.AppendTableRow (builder); ci.Row = row++; if (i + 1 < count) builder.Append (','); builder.Append ('\n'); } writer.WriteLine ("static const CultureInfoEntry culture_entries [] = {"); writer.Write (builder); writer.WriteLine ("};\n\n"); cultures.Sort (new ExportNameComparer ()); // Sort based on name builder = new StringBuilder (); for (int i = 0; i < count; i++) { CultureInfoEntry ci = cultures[i]; var name = ci.GetExportName ().ToLowerInvariant (); builder.Append ("\t{" + Entry.EncodeStringIdx (name) + ", "); builder.Append (ci.Row + "}"); if (i + 1 < count) builder.Append (','); builder.AppendFormat ("\t /* {0} */", name); builder.Append ('\n'); } writer.WriteLine ("static const CultureInfoNameEntry culture_name_entries [] = {"); writer.Write (builder); writer.WriteLine ("};\n\n"); builder = new StringBuilder (); int rcount = 0; foreach (RegionInfoEntry r in regions) { r.AppendTableRow (builder); if (++rcount != regions.Count) builder.Append (','); builder.Append ('\n'); } writer.WriteLine ("static const RegionInfoEntry region_entries [] = {"); writer.Write (builder); writer.WriteLine ("};\n\n"); builder = new StringBuilder (); rcount = 0; foreach (RegionInfoEntry ri in regions) { builder.Append ("\t{" + Entry.EncodeStringIdx (ri.TwoLetterISORegionName) + ", "); builder.Append (ri.Index + "}"); if (++rcount != regions.Count) builder.Append (','); builder.AppendFormat ("\t /* {0} */", ri.TwoLetterISORegionName); builder.Append ('\n'); } writer.WriteLine ("static const RegionInfoNameEntry region_name_entries [] = {"); writer.Write (builder); writer.WriteLine ("};\n\n"); writer.WriteLine ("static const char locale_strings [] = {"); writer.Write (Entry.General.GetStrings ()); writer.WriteLine ("};\n\n"); writer.WriteLine ("static const char patterns [] = {"); writer.Write (Entry.Patterns.GetStrings ()); writer.WriteLine ("};\n\n"); writer.WriteLine ("static const char datetime_strings [] = {"); writer.Write (Entry.DateTimeStrings.GetStrings ()); writer.WriteLine ("};\n\n"); writer.WriteLine ("#endif\n"); } } static void GetAllChildrenValues (CultureInfoEntry entry, List values, Func selector) { foreach (var e in entry.Children) { if (e == entry) continue; values.Add (selector (e)); foreach (var e2 in e.Children) { GetAllChildrenValues (e2, values, selector); } } } static XmlDocument GetXmlDocument (string path) { var doc = new XmlDocument (); doc.Load (new XmlTextReader (path) { /*DtdProcessing = DtdProcessing.Ignore*/ } ); return doc; } bool Import (CultureInfoEntry data, string locale) { string fname = null; var sep = locale.Split ('_'); data.Language = sep[0]; // CLDR strictly follow ISO names, .NET does not // Replace names where non-iso2 is used, e.g. Norway if (data.Language != data.TwoLetterISOLanguageName) { locale = data.TwoLetterISOLanguageName; if (sep.Length > 1) { locale += string.Join ("_", sep.Skip (1)); } } // Convert broken Chinese names to correct one switch (locale) { case "zh_CHS": locale = "zh_Hans"; break; case "zh_CHT": locale = "zh_Hant"; break; case "zh_CN": locale = "zh_Hans_CN"; break; case "zh_HK": locale = "zh_Hant_HK"; break; case "zh_SG": locale = "zh_Hans_SG"; break; case "zh_TW": locale = "zh_Hant_TW"; break; case "zh_MO": locale = "zh_Hant_MO"; break; } sep = locale.Split ('_'); string full_name = Path.Combine (data_root, "main", locale + ".xml"); if (!File.Exists (full_name)) { Console.WriteLine ("Missing locale file for `{0}'", locale); // We could fill default values but that's not as simple as it seems. For instance for non-neutral // cultures the next part could be territory or not. return false; } else { XmlDocument doc = null; /* * Locale generation is done in several steps, first we * read the root file which is the base invariant data * then the supplemental root data, * then the language file, the supplemental languages * file then the locale file, then the supplemental * locale file. Values in each descending file can * overwrite previous values. */ foreach (var part in sep) { if (fname != null) fname += "_"; fname += part; XmlDocument xml; string extra; if (extra_parent_locales.TryGetValue (fname, out extra)) { xml = GetXmlDocument (Path.Combine (data_root, "main", extra + ".xml")); if (doc == null) doc = xml; Import (xml, data); } xml = GetXmlDocument (Path.Combine (data_root, "main", fname + ".xml")); if (doc == null) doc = xml; Import (xml, data); } // // Extract localized locale name from language xml file. Have to do it after both language and territory are read // var el = doc.SelectSingleNode (string.Format ("ldml/localeDisplayNames/languages/language[@type='{0}']", data.Language)); if (el != null) data.NativeName = el.InnerText; if (data.Territory != null) { el = doc.SelectSingleNode (string.Format ("ldml/localeDisplayNames/territories/territory[@type='{0}']", data.Territory)); if (el != null) { // TODO: Should read data.NativeName = string.Format ("{0} ({1})", data.NativeName, el.InnerText); data.NativeTerritoryName = el.InnerText; } string currency; // We have territory now we have to run the process again to extract currency symbol if (region_currency.TryGetValue (data.Territory, out currency)) { fname = null; var xml = GetXmlDocument (Path.Combine (data_root, "main", "root.xml")); el = xml.SelectSingleNode (string.Format ("ldml/numbers/currencies/currency[@type='{0}']/symbol", currency)); if (el != null) data.NumberFormatEntry.CurrencySymbol = el.InnerText; foreach (var part in sep) { if (fname != null) fname += "_"; fname += part; xml = GetXmlDocument (Path.Combine (data_root, "main", fname + ".xml")); el = xml.SelectSingleNode (string.Format ("ldml/numbers/currencies/currency[@type='{0}']/symbol", currency)); if (el != null) data.NumberFormatEntry.CurrencySymbol = el.InnerText; el = xml.SelectSingleNode (string.Format ("ldml/numbers/currencies/currency[@type='{0}']/displayName", currency)); if (el != null) data.NativeCurrencyName = el.InnerText; } } } } // TODO: Don't have input data available but most values are 2 with few exceptions for 1 and 3 // We don't add 3 as it's for some arabic states only switch (data.ThreeLetterISOLanguageName) { case "amh": data.NumberFormatEntry.NumberDecimalDigits = 1; break; default: data.NumberFormatEntry.NumberDecimalDigits = 2; break; } // TODO: For now we capture only native name for default calendar data.NativeCalendarNames[((int) data.CalendarType & 0xFF) - 1] = data.DateTimeFormatEntry.NativeCalendarName; var lcdid_value = int.Parse (data.LCID.Substring (2), NumberStyles.HexNumber); Patterns.FillValues (lcdid_value, data); return true; } void Import (XmlDocument doc, CultureInfoEntry ci) { XmlNodeList nodes; XmlNode el; // // Extract script & teritory // el = doc.SelectSingleNode ("ldml/identity/script"); if (el != null) ci.Script = el.Attributes["type"].Value; el = doc.SelectSingleNode ("ldml/identity/territory"); if (el != null) ci.Territory = el.Attributes["type"].Value; var df = ci.DateTimeFormatEntry; string calendar; // Default calendar is for now always "gregorian" switch (ci.OriginalName) { case "th": case "th-TH": calendar = "buddhist"; ci.CalendarType = CalendarType.ThaiBuddhist; // typeof (ThaiBuddhistCalendar); break; case "ar": case "ar-SA": calendar = "islamic"; ci.CalendarType = CalendarType.UmAlQuraCalendar; // typeof (UmAlQuraCalendar); break; case "ps": case "ps-AF": case "prs": case "prs-AF": case "dv": case "dv-MV": calendar = "persian"; ci.CalendarType = CalendarType.HijriCalendar; // typeof (HijriCalendar); break; default: calendar = "gregorian"; ci.CalendarType = CalendarType.Gregorian; // typeof (GregorianCalendar); ci.GregorianCalendarType = GregorianCalendarTypes.Localized; break; } var node = doc.SelectSingleNode (string.Format ("ldml/dates/calendars/calendar[@type='{0}']", calendar)); if (node != null) { el = doc.SelectSingleNode (string.Format ("ldml/localeDisplayNames/types/type[@type='{0}']", calendar)); if (el != null) df.NativeCalendarName = el.InnerText; // Apply global rule first nodes = node.SelectNodes ("months/monthContext[@type='format']/monthWidth[@type='wide']/month"); ProcessAllNodes (nodes, df.MonthNames, AddOrReplaceValue); nodes = node.SelectNodes ("months/monthContext[@type='stand-alone']/monthWidth[@type='wide']/month"); ProcessAllNodes (nodes, df.MonthNames, AddOrReplaceValue); if (df.MonthNames != null) { if (ci.Name == "sv" || ci.Name == "sv-SE") { ToLower (df.MonthNames); } } // Apply global rule first if (ci.Name == "ja" || ci.Name == "ja-JP") { // Use common number style } else { nodes = node.SelectNodes ("months/monthContext[@type='format']/monthWidth[@type='abbreviated']/month"); ProcessAllNodes (nodes, df.AbbreviatedMonthNames, AddOrReplaceValue); nodes = node.SelectNodes ("months/monthContext[@type='stand-alone']/monthWidth[@type='abbreviated']/month"); ProcessAllNodes (nodes, df.AbbreviatedMonthNames, AddOrReplaceValue); } if (df.AbbreviatedMonthNames != null) { if (ci.Name == "sv" || ci.Name == "sv-SE") { ToLower (df.AbbreviatedMonthNames); } } nodes = node.SelectNodes ("months/monthContext[@type='format']/monthWidth[@type='wide']/month"); if (nodes != null) { ProcessAllNodes (nodes, df.MonthGenitiveNames, AddOrReplaceValue); } // All values seem to match Array.Copy (df.AbbreviatedMonthNames, df.AbbreviatedMonthGenitiveNames, df.AbbreviatedMonthNames.Length); nodes = node.SelectNodes ("days/dayContext[@type='format']/dayWidth[@type='wide']/day"); ProcessAllNodes (nodes, df.DayNames, AddOrReplaceDayValue); // Apply global rule first nodes = node.SelectNodes ("days/dayContext[@type='format']/dayWidth[@type='abbreviated']/day"); ProcessAllNodes (nodes, df.AbbreviatedDayNames, AddOrReplaceDayValue); nodes = node.SelectNodes ("days/dayContext[@type='stand-alone']/dayWidth[@type='abbreviated']/day"); ProcessAllNodes (nodes, df.AbbreviatedDayNames, AddOrReplaceDayValue); if (df.AbbreviatedDayNames != null) { if (ci.Name == "sv" || ci.Name == "sv-SE") { ToLower (df.AbbreviatedDayNames); } } // TODO: This is not really ShortestDayNames as .NET uses it // Apply global rules first nodes = node.SelectNodes ("days/dayContext[@type='format']/dayWidth[@type='narrow']/day"); ProcessAllNodes (nodes, df.ShortestDayNames, AddOrReplaceDayValue); nodes = node.SelectNodes ("days/dayContext[@type='stand-alone']/dayWidth[@type='narrow']/day"); ProcessAllNodes (nodes, df.ShortestDayNames, AddOrReplaceDayValue); /* Cannot really be used it's too different to .NET and most app rely on it el = node.SelectSingleNode ("dateFormats/dateFormatLength[@type='full']/dateFormat/pattern"); if (el != null) df.LongDatePattern = ConvertDatePatternFormat (el.InnerText); // Medium is our short el = node.SelectSingleNode ("dateFormats/dateFormatLength[@type='medium']/dateFormat/pattern"); if (el != null) df.ShortDatePattern = ConvertDatePatternFormat (el.InnerText); // Medium is our Long el = node.SelectSingleNode ("timeFormats/timeFormatLength[@type='medium']/timeFormat/pattern"); if (el != null) df.LongTimePattern = ConvertTimePatternFormat (el.InnerText); el = node.SelectSingleNode ("timeFormats/timeFormatLength[@type='short']/timeFormat/pattern"); if (el != null) df.ShortTimePattern = ConvertTimePatternFormat (el.InnerText); el = node.SelectSingleNode ("dateTimeFormats/availableFormats/dateFormatItem[@id='yyyyMMMM']"); if (el != null) df.YearMonthPattern = ConvertDatePatternFormat (el.InnerText); el = node.SelectSingleNode ("dateTimeFormats/availableFormats/dateFormatItem[@id='MMMMdd']"); if (el != null) df.MonthDayPattern = ConvertDatePatternFormat (el.InnerText); */ el = node.SelectSingleNode ("dayPeriods/dayPeriodContext/dayPeriodWidth[@type='abbreviated']/dayPeriod[@type='am']"); if (el == null) // Apply global rule first el = node.SelectSingleNode ("dayPeriods/dayPeriodContext/dayPeriodWidth[@type='wide']/dayPeriod[@type='am']"); // Manual edits for exact .net compatiblity switch (ci.Name) { case "en-AU": df.AMDesignator = "AM"; break; case "en-NZ": df.AMDesignator = "a.m."; break; case "ko": case "ko-KP": case "ko-KR": df.AMDesignator = "오전"; break; default: if (el != null) df.AMDesignator = el.InnerText; break; } el = node.SelectSingleNode ("dayPeriods/dayPeriodContext/dayPeriodWidth[@type='abbreviated']/dayPeriod[@type='pm']"); if (el == null) // Apply global rule first el = node.SelectSingleNode ("dayPeriods/dayPeriodContext/dayPeriodWidth[@type='wide']/dayPeriod[@type='pm']"); switch (ci.Name) { case "en-AU": df.PMDesignator = "PM"; break; case "en-NZ": df.PMDesignator = "p.m."; break; case "ko": case "ko-KP": case "ko-KR": df.PMDesignator = "오후"; break; default: if (el != null) df.PMDesignator = el.InnerText; break; } } var ni = ci.NumberFormatEntry; node = doc.SelectSingleNode ("ldml/numbers/symbols"); if (node != null) { el = node.SelectSingleNode ("plusSign"); if (el != null) ni.PositiveSign = el.InnerText; // CLDR uses unicode negative sign for some culture (e.g sv, is, lt, don't kwnow why) but .NET always // uses simple "-" sign and what is worse the parsing code cannot deal with non-ASCII values ni.NegativeSign = "-"; /* el = node.SelectSingleNode ("minusSign"); if (el != null) { switch (el.InnerText) { case "\u2212": case "\u200F\u002D": // Remove any right-to-left mark characters case "\u200E\u002D": case "\u061C\u2212": case "\u200F\u2212": ni.NegativeSign = "-"; break; default: ni.NegativeSign = el.InnerText; break; } } */ el = node.SelectSingleNode ("infinity"); // We cannot use the value from CLDR because many broken // .NET serializers (e.g. JSON) use text value of NegativeInfinity // and different value would break interoperability with .NET var inf = GetInfinitySymbol (ci); if (inf != null) ni.InfinitySymbol = inf; else if (el != null && el.InnerText != "∞") { ni.InfinitySymbol = el.InnerText; } el = node.SelectSingleNode ("perMille"); if (el != null) ni.PerMilleSymbol = el.InnerText; el = node.SelectSingleNode ("nan"); if (el != null) ni.NaNSymbol = el.InnerText; el = node.SelectSingleNode ("percentSign"); if (el != null) ni.PercentSymbol = el.InnerText; } } static void ToLower (string[] values) { if (values == null) return; for (int i = 0; i < values.Length; ++i) { if (values [i] == null) continue; values [i] = values [i].ToLower (); } } string GetInfinitySymbol (CultureInfoEntry ci) { // TODO: Add more switch (ci.TwoLetterISOLanguageName) { case "ca": return "Infinit"; case "cs": case "sk": return "+nekonečno"; case "de": return "+unendlich"; case "el": return "Άπειρο"; case "es": case "gl": return "Infinito"; case "it": case "pt": return "+Infinito"; case "nl": return "oneindig"; case "fr": case "tzm": return "+Infini"; case "pl": return "+nieskończoność"; case "ru": case "tg": return "бесконечность"; case "sl": return "neskončnost"; case "rm": return "+infinit"; case "lv": return "bezgalība"; case "lt": return "begalybė"; case "eu": return "Infinitu"; } return null; } static string ConvertDatePatternFormat (string format) { // // LDMR uses different characters for some fields // http://unicode.org/reports/tr35/#Date_Format_Patterns // format = format.Replace ("EEEE", "dddd"); // The full name of the day of the week format = format.Replace ("LLLL", "MMMM"); // The full month name if (format.EndsWith (" y", StringComparison.Ordinal)) format += "yyy"; return format; } static string ConvertTimePatternFormat (string format) { format = format.Replace ("a", "tt"); // AM or PM return format; } static void ProcessAllNodes (XmlNodeList list, IList values, Action, string, string> convertor) { foreach (XmlNode entry in list) { var index = entry.Attributes["type"].Value; var value = entry.InnerText; convertor (values, index, value); } } // All text indexes are 1-based static void AddOrReplaceValue (IList list, string oneBasedIndex, string value) { int index = int.Parse (oneBasedIndex); AddOrReplaceValue (list, index - 1, value); } static readonly string[] day_types = new string[] { "sun", "mon", "tue", "wed", "thu", "fri", "sat" }; static void AddOrReplaceDayValue (IList list, string dayType, string value) { int index = Array.IndexOf (day_types, dayType); AddOrReplaceValue (list, index, value); } static void AddOrReplaceValue (IList list, int index, string value) { if (list.Count <= index) ((List) list).AddRange (new string[index - list.Count + 1]); list[index] = value; } sealed class LcidComparer : IComparer { public int Compare (CultureInfoEntry x, CultureInfoEntry y) { return x.LCID.CompareTo (y.LCID); } } sealed class ExportNameComparer : IComparer { public int Compare (CultureInfoEntry x, CultureInfoEntry y) { return String.Compare (x.GetExportName (), y.GetExportName (), StringComparison.OrdinalIgnoreCase); } } class RegionComparer : IComparer { public int Compare (RegionInfoEntry x, RegionInfoEntry y) { return x.TwoLetterISORegionName.CompareTo (y.TwoLetterISORegionName); } } } }