diff --git a/Doc/whatsnew/3.15.rst b/Doc/whatsnew/3.15.rst index c4dac339be66af..c8387eb07b52eb 100644 --- a/Doc/whatsnew/3.15.rst +++ b/Doc/whatsnew/3.15.rst @@ -895,7 +895,7 @@ locale * :func:`~locale.setlocale` now supports language codes with ``@``-modifiers. ``@``-modifiers are no longer silently removed in :func:`~locale.getlocale`, but included in the language code. - (Contributed by Serhiy Storchaka in :gh:`137729`.) + (Contributed by Serhiy Storchaka in :gh:`137729` and :gh:`148792`.) * Undeprecate the :func:`locale.getdefaultlocale` function. (Contributed by Victor Stinner in :gh:`130796`.) diff --git a/Lib/locale.py b/Lib/locale.py index 4ff6f8c0f0a775..1c1db041493014 100644 --- a/Lib/locale.py +++ b/Lib/locale.py @@ -10,6 +10,7 @@ """ +import os import sys import encodings import encodings.aliases @@ -468,6 +469,28 @@ def normalize(localename): return localename +def _conv_to_windows(locale): + locale = locale.replace('_', '-') + if '@' in locale: + locale, _, modifier = locale.partition('@') + locale, _, encoding = locale.partition('.') + locale, _, territory = locale.partition('-') + suffix = '' + if modifier: + modifier_lower = modifier.lower() + if modifier_lower == 'valencia': + suffix = '-' + modifier_lower + else: + modifier = _modifier_to_script.get(modifier_lower, modifier) + locale += '-' + modifier + if territory: + locale += '-' + territory + if suffix: + locale += suffix + if encoding: + locale += '.' + encoding + return locale + def _parse_localename(localename): """ Parses the locale code for localename and returns the @@ -505,6 +528,7 @@ def _parse_localename(localename): # On macOS "LC_CTYPE=UTF-8" is a valid locale setting # for getting UTF-8 handling for text. return None, 'UTF-8' + return code, None raise ValueError('unknown locale: %s' % localename) def _build_localename(localetuple): @@ -618,10 +642,34 @@ def setlocale(category, locale=None): category may be given as one of the LC_* values. """ - if locale and not isinstance(locale, _builtin_str): - # convert to string + if not locale or isinstance(locale, _builtin_str): + return _setlocale(category, locale) + + # convert to string + if os.name == 'nt': + locale = _build_localename(locale) + try: + return _setlocale(category, locale) + except Error: + locale2 = _conv_to_windows(locale) + try: + return _setlocale(category, locale2) + except Error: + pass + locale = normalize(locale) + try: + return _setlocale(category, locale) + except Error: + pass + locale2 = _conv_to_windows(locale) + try: + return _setlocale(category, locale2) + except Error: + pass + raise + else: locale = normalize(_build_localename(locale)) - return _setlocale(category, locale) + return _setlocale(category, locale) try: @@ -1546,9 +1594,9 @@ def getpreferredencoding(do_setlocale=True): 0x004d: "as", # Assamese 0x044d: "as_IN", # Assamese - India 0x002c: "az", # Azerbaijani (Latin) - 0x742c: "az", # Azerbaijani (Cyrillic) - 0x782c: "az", # Azerbaijani (Latin) - 0x042c: "az_AZ", # Azerbaijani (Latin) - Azerbaijan + 0x742c: "az@cyrillic", # Azerbaijani (Cyrillic) + 0x782c: "az@latin", # Azerbaijani (Latin) + 0x042c: "az_AZ@latin", # Azerbaijani (Latin) - Azerbaijan 0x0045: "bn", # Bangla 0x0445: "bn_IN", # Bangla - India 0x0845: "bn_BD", # Bangla - Bangladesh @@ -1558,10 +1606,10 @@ def getpreferredencoding(do_setlocale=True): 0x042d: "eu_ES", # Basque - Spain 0x0023: "be", # Belarusian 0x0423: "be_BY", # Belarusian - Belarus - 0x641a: "bs", # Bosnian (Cyrillic) - 0x681a: "bs", # Bosnian (Latin) - 0x141a: "bs_BA", # Bosnian (Latin) - Bosnia and Herzegovina - 0x201a: "bs_BA", # Bosnian (Cyrillic) - Bosnia and Herzegovina + 0x641a: "bs@cyrillic", # Bosnian (Cyrillic) + 0x681a: "bs@latin", # Bosnian (Latin) + 0x141a: "bs_BA@latin", # Bosnian (Latin) - Bosnia and Herzegovina + 0x201a: "bs_BA@cyrillic", # Bosnian (Cyrillic) - Bosnia and Herzegovina 0x781a: "bs", # Bosnian (Latin) 0x007e: "br", # Breton 0x047e: "br_FR", # Breton - France @@ -1571,16 +1619,16 @@ def getpreferredencoding(do_setlocale=True): 0x0455: "my_MM", # Burmese - Myanmar 0x0003: "ca", # Catalan 0x0403: "ca_ES", # Catalan - Spain - 0x0803: "ca_ES", # Valencian - Spain + 0x0803: "ca_ES@valencia", # Valencian - Spain 0x0092: "ku", # Central Kurdish - 0x7c92: "ku", # Central Kurdish - 0x0492: "ku_IQ", # Central Kurdish - Iraq + 0x7c92: "ku@arabic", # Central Kurdish + 0x0492: "ku_IQ@arabic", # Central Kurdish - Iraq 0x005c: "chr", # Cherokee - 0x7c5c: "chr", # Cherokee - 0x045c: "chr_US", # Cherokee - United States - 0x0004: "zh", # Chinese (Simplified) + 0x7c5c: "chr@cherokee", # Cherokee + 0x045c: "chr_US@cherokee", # Cherokee - United States + 0x0004: "zh@Hans", # Chinese (Simplified) 0x7804: "zh", # Chinese (Simplified) - 0x7c04: "zh", # Chinese (Traditional) + 0x7c04: "zh@Hant", # Chinese (Traditional) 0x0404: "zh_TW", # Chinese (Traditional) - Taiwan 0x0804: "zh_CN", # Chinese (Simplified) - People's Republic of China 0x0c04: "zh_HK", # Chinese (Traditional) - Hong Kong S.A.R. @@ -1648,9 +1696,9 @@ def getpreferredencoding(do_setlocale=True): 0x0062: "fy", # Frisian 0x0462: "fy_NL", # Frisian - Netherlands 0x0067: "ff", # Fulah - 0x7c67: "ff", # Fulah (Latin) - 0x0467: "ff_NG", - 0x0867: "ff_SN", # Fulah - Senegal + 0x7c67: "ff@latin", # Fulah (Latin) + 0x0467: "ff_NG@latin", + 0x0867: "ff_SN@latin", # Fulah - Senegal 0x0056: "gl", # Galician 0x0456: "gl_ES", # Galician - Spain 0x0037: "ka", # Georgian @@ -1670,8 +1718,8 @@ def getpreferredencoding(do_setlocale=True): 0x0047: "gu", # Gujarati 0x0447: "gu_IN", # Gujarati - India 0x0068: "ha", # Hausa (Latin) - 0x7c68: "ha", # Hausa (Latin) - 0x0468: "ha_NG", # Hausa (Latin) - Nigeria + 0x7c68: "ha@latin", # Hausa (Latin) + 0x0468: "ha_NG@latin", # Hausa (Latin) - Nigeria 0x0075: "haw", # Hawaiian 0x0475: "haw_US", # Hawaiian - United States 0x000d: "he", # Hebrew @@ -1687,10 +1735,10 @@ def getpreferredencoding(do_setlocale=True): 0x0021: "id", # Indonesian 0x0421: "id_ID", # Indonesian - Indonesia 0x005d: "iu", # Inuktitut (Latin) - 0x785d: "iu", # Inuktitut (Syllabics) - 0x7c5d: "iu", # Inuktitut (Latin) - 0x045d: "iu_CA", # Inuktitut (Syllabics) - Canada - 0x085d: "iu_CA", # Inuktitut (Latin) - Canada + 0x785d: "iu@canadian_aboriginal", # Inuktitut (Syllabics) + 0x7c5d: "iu@latin", # Inuktitut (Latin) + 0x045d: "iu_CA@canadian_aboriginal", # Inuktitut (Syllabics) - Canada + 0x085d: "iu_CA@latin", # Inuktitut (Latin) - Canada 0x003c: "ga", # Irish 0x083c: "ga_IE", # Irish - Ireland 0x0010: "it", # Italian @@ -1700,10 +1748,10 @@ def getpreferredencoding(do_setlocale=True): 0x0411: "ja_JP", # Japanese - Japan 0x004b: "kn", # Kannada 0x044b: "kn_IN", # Kannada - India - 0x0471: "kr_NG", # Kanuri (Latin) - Nigeria + 0x0471: "kr_NG@latin", # Kanuri (Latin) - Nigeria 0x0060: "ks", # Kashmiri - 0x0460: "ks", # Kashmiri - Perso_Arabic - 0x0860: "ks_IN", # Kashmiri (Devanagari) - India + 0x0460: "ks@arabic", # Kashmiri - Perso_Arabic + 0x0860: "ks_IN@devanagari", # Kashmiri (Devanagari) - India 0x003f: "kk", # Kazakh 0x043f: "kk_KZ", # Kazakh - Kazakhstan 0x0053: "km", # Khmer @@ -1747,10 +1795,10 @@ def getpreferredencoding(do_setlocale=True): 0x007c: "moh", # Mohawk 0x047c: "moh_CA", # Mohawk - Canada 0x0050: "mn", # Mongolian (Cyrillic) - 0x7850: "mn", # Mongolian (Cyrillic) - 0x7c50: "mn", # Mongolian (Traditional Mongolian) + 0x7850: "mn@cyrillic", # Mongolian (Cyrillic) + 0x7c50: "mn@mongolian", # Mongolian (Traditional Mongolian) 0x0450: "mn_MN", # Mongolian (Cyrillic) - Mongolia - 0x0c50: "mn_MN", # Mongolian (Traditional Mongolian) - Mongolia + 0x0c50: "mn_MN@mongolian", # Mongolian (Traditional Mongolian) - Mongolia 0x0061: "ne", # Nepali 0x0461: "ne_NP", # Nepali - Nepal 0x0861: "ne_IN", # Nepali - India @@ -1775,9 +1823,9 @@ def getpreferredencoding(do_setlocale=True): 0x0416: "pt_BR", # Portuguese - Brazil 0x0816: "pt_PT", # Portuguese - Portugal 0x0046: "pa", # Punjabi - 0x7c46: "pa", # Punjabi + 0x7c46: "pa@arabic", # Punjabi 0x0446: "pa_IN", # Punjabi - India - 0x0846: "pa_PK", # Punjabi - Islamic Republic of Pakistan + 0x0846: "pa_PK@arabic", # Punjabi - Islamic Republic of Pakistan 0x006b: "quz", # Quechua 0x046b: "quz_BO", # Quechua - Bolivia 0x086b: "quz_EC", # Quechua - Ecuador @@ -1810,25 +1858,25 @@ def getpreferredencoding(do_setlocale=True): 0x044f: "sa_IN", # Sanskrit - India 0x0091: "gd", # Scottish Gaelic 0x0491: "gd_GB", # Scottish Gaelic - United Kingdom - 0x6c1a: "sr", # Serbian (Cyrillic) - 0x701a: "sr", # Serbian (Latin) + 0x6c1a: "sr@cyrillic", # Serbian (Cyrillic) + 0x701a: "sr@latin", # Serbian (Latin) 0x7c1a: "sr", # Serbian (Latin) - 0x081a: "sr_CS", # Serbian (Latin) - Serbia and Montenegro (Former) - 0x0c1a: "sr_CS", # Serbian (Cyrillic) - Serbia and Montenegro (Former) - 0x181a: "sr_BA", # Serbian (Latin) - Bosnia and Herzegovina - 0x1c1a: "sr_BA", # Serbian (Cyrillic) - Bosnia and Herzegovina - 0x241a: "sr_RS", # Serbian (Latin) - Serbia - 0x281a: "sr_RS", # Serbian (Cyrillic) - Serbia - 0x2c1a: "sr_ME", # Serbian (Latin) - Montenegro - 0x301a: "sr_ME", # Serbian (Cyrillic) - Montenegro + 0x081a: "sr_CS@latin", # Serbian (Latin) - Serbia and Montenegro (Former) + 0x0c1a: "sr_CS@cyrillic", # Serbian (Cyrillic) - Serbia and Montenegro (Former) + 0x181a: "sr_BA@latin", # Serbian (Latin) - Bosnia and Herzegovina + 0x1c1a: "sr_BA@cyrillic", # Serbian (Cyrillic) - Bosnia and Herzegovina + 0x241a: "sr_RS@latin", # Serbian (Latin) - Serbia + 0x281a: "sr_RS@cyrillic", # Serbian (Cyrillic) - Serbia + 0x2c1a: "sr_ME@latin", # Serbian (Latin) - Montenegro + 0x301a: "sr_ME@cyrillic", # Serbian (Cyrillic) - Montenegro 0x006c: "nso", # Sesotho sa Leboa 0x046c: "nso_ZA", # Sesotho sa Leboa - South Africa 0x0032: "tn", # Setswana 0x0432: "tn_ZA", # Setswana - South Africa 0x0832: "tn_BW", # Setswana - Botswana 0x0059: "sd", # Sindhi - 0x7c59: "sd", # Sindhi - 0x0859: "sd_PK", # Sindhi - Islamic Republic of Pakistan + 0x7c59: "sd@arabic", # Sindhi + 0x0859: "sd_PK@arabic", # Sindhi - Islamic Republic of Pakistan 0x005b: "si", # Sinhala 0x045b: "si_LK", # Sinhala - Sri Lanka 0x001b: "sk", # Slovak @@ -1867,14 +1915,14 @@ def getpreferredencoding(do_setlocale=True): 0x005a: "syr", # Syriac 0x045a: "syr_SY", # Syriac - Syria 0x0028: "tg", # Tajik (Cyrillic) - 0x7c28: "tg", # Tajik (Cyrillic) - 0x0428: "tg_TJ", # Tajik (Cyrillic) - Tajikistan + 0x7c28: "tg@cyrillic", # Tajik (Cyrillic) + 0x0428: "tg_TJ@cyrillic", # Tajik (Cyrillic) - Tajikistan 0x005f: "tzm", # Tamazight (Latin) - 0x785f: "tzm", - 0x7c5f: "tzm", # Tamazight (Latin) - 0x085f: "tzm_DZ", # Tamazight (Latin) - Algeria - 0x045f: "tzm_MA", # Central Atlas Tamazight (Arabic) - Morocco - 0x105f: "tzm_MA", + 0x785f: "tzm@tifinagh", + 0x7c5f: "tzm@latin", # Tamazight (Latin) + 0x085f: "tzm_DZ@latin", # Tamazight (Latin) - Algeria + 0x045f: "tzm_MA@arabic", # Central Atlas Tamazight (Arabic) - Morocco + 0x105f: "tzm_MA@tifinagh", 0x0049: "ta", # Tamil 0x0449: "ta_IN", # Tamil - India 0x0849: "ta_LK", # Tamil - Sri Lanka @@ -1905,9 +1953,9 @@ def getpreferredencoding(do_setlocale=True): 0x0080: "ug", # Uyghur 0x0480: "ug_CN", # Uyghur - People's Republic of China 0x0043: "uz", # Uzbek (Latin) - 0x7843: "uz", # Uzbek (Cyrillic) - 0x7c43: "uz", # Uzbek (Latin) - 0x0443: "uz_UZ", # Uzbek (Latin) - Uzbekistan + 0x7843: "uz@cyrillic", # Uzbek (Cyrillic) + 0x7c43: "uz@latin", # Uzbek (Latin) + 0x0443: "uz_UZ@latin", # Uzbek (Latin) - Uzbekistan 0x0033: "ve", # Venda 0x0433: "ve_ZA", # Venda - South Africa 0x002a: "vi", # Vietnamese @@ -1943,6 +1991,20 @@ def getpreferredencoding(do_setlocale=True): 0x00051004: "zh_SG", } +# Maps Unix-like modifiers to ISO15924 script names +# https://www.unicode.org/iso15924/iso15924.txt + +_modifier_to_script = { + 'arabic': 'Arab', + 'canadian-aboriginal': 'Cans', + 'cherokee': 'Cher', + 'cyrillic': 'Cyrl', + 'devanagari': 'Deva', + 'latin': 'Latn', + 'mongolian': 'Mong', + 'tifinagh': 'Tfng', +} + def _print_locale(): """ Test function. diff --git a/Lib/test/test_locale.py b/Lib/test/test_locale.py index a06c600cf56689..b25487046890ca 100644 --- a/Lib/test/test_locale.py +++ b/Lib/test/test_locale.py @@ -533,6 +533,7 @@ def test_setlocale_long_encoding(self): with self.assertRaises(locale.Error): locale.setlocale(locale.LC_ALL, loc2) + @unittest.skipUnless(os.name == 'posix', 'requires Windows') @support.subTests('localename,localetuple', [ ('fr_FR.ISO8859-15@euro', ('fr_FR@euro', 'iso885915')), ('fr_FR.ISO8859-15@euro', ('fr_FR@euro', 'iso88591')), @@ -574,7 +575,7 @@ def test_setlocale_long_encoding(self): ('uz_UZ.UTF-8@cyrillic', ('uz_UZ@cyrillic', 'UTF-8')), ('uz_UZ.UTF-8@cyrillic', ('uz_UZ@cyrillic', None)), ]) - def test_setlocale_with_modifier(self, localename, localetuple): + def test_setlocale_with_modifier_posix(self, localename, localetuple): try: locale.setlocale(locale.LC_CTYPE, localename) except locale.Error as exc: @@ -586,6 +587,37 @@ def test_setlocale_with_modifier(self, localename, localetuple): loc = locale.setlocale(locale.LC_CTYPE, loctuple) self.assertEqual(loc, localename) + @unittest.skipUnless(os.name == 'nt', 'requires Windows') + @support.subTests('localename,localetuple', [ + # ('ca-ES-valencia', ('ca_ES@valencia', 'UTF-8')), + # ('ca-ES-valencia', ('ca_ES@valencia', 'utf8')), + ('ca-ES-valencia', ('ca_ES@valencia', None)), + ('ks-Deva-IN.UTF-8', ('ks_IN@devanagari', 'UTF-8')), + ('ks-Deva-IN.utf8', ('ks_IN@devanagari', 'utf8')), + ('ks-Deva-IN', ('ks_IN@devanagari', None)), + ('sd-Deva-IN', ('sd_IN@devanagari', None)), + ('be-Latn-BY.UTF-8', ('be_BY@latin', 'UTF-8')), + ('be-Latn-BY', ('be_BY@latin', None)), + ('sr-Latn-RS.UTF-8', ('sr_RS@latin', 'UTF-8')), + ('sr-Latn-RS', ('sr_RS@latin', None)), + ('ug-Latn-CN', ('ug_CN@latin', None)), + ('uz-Cyrl-UZ', ('uz_UZ@cyrillic', None)), + ('zh-Hant', ('zh@Hant', None)), + ('tzm-Arab-MA', ('tzm_MA@arabic', None)), + ('tzm-Tfng-MA', ('tzm_MA@tifinagh', None)), + ('chr-Cher-US', ('chr_US@cherokee', None)), + ('iu-Cans-CA', ('iu_CA@canadian_aboriginal', None)), + ('mn-Mong', ('mn@mongolian', None)), + ]) + def test_setlocale_with_modifier(self, localename, localetuple): + loc = locale.setlocale(locale.LC_CTYPE, localetuple) + self.assertEqual(loc, localename) + + loctuple = locale.getlocale(locale.LC_CTYPE) + loc = locale.setlocale(locale.LC_CTYPE, loctuple) + self.assertEqual(loc, localename) + + @unittest.skipUnless(os.name == 'posix', 'requires Posix') @support.subTests('localename,localetuple', [ ('fr_FR.iso885915@euro', ('fr_FR@euro', 'ISO8859-15')), ('fr_FR.ISO8859-15@euro', ('fr_FR@euro', 'ISO8859-15')), @@ -621,7 +653,7 @@ def test_setlocale_with_modifier(self, localename, localetuple): ('uz_UZ.UTF-8@cyrillic', ('uz_UZ@cyrillic', 'UTF-8')), ('uz_UZ@cyrillic', ('uz_UZ@cyrillic', 'UTF-8')), ]) - def test_getlocale_with_modifier(self, localename, localetuple): + def test_getlocale_with_modifier_posix(self, localename, localetuple): try: locale.setlocale(locale.LC_CTYPE, localename) except locale.Error as exc: diff --git a/Misc/NEWS.d/next/Library/2026-04-20-14-58-31.gh-issue-148792.gAJWeb.rst b/Misc/NEWS.d/next/Library/2026-04-20-14-58-31.gh-issue-148792.gAJWeb.rst new file mode 100644 index 00000000000000..54220300e74bdc --- /dev/null +++ b/Misc/NEWS.d/next/Library/2026-04-20-14-58-31.gh-issue-148792.gAJWeb.rst @@ -0,0 +1,3 @@ +:func:`locale.setlocale` now supports Unix-like locale names with +@-modifiers on Windows. For example: "ca_ES@valencia", "sr_RS@latin", +"uz_UZ@cyrillic" and "ks_IN@devanagari".