esp8266 · mcspr · Feb 22, 2023 · Feb 16, 2023 · Feb 20, 2023 · Feb 20, 2023
diff --git a/tools/mkbuildoptglobals.py b/tools/mkbuildoptglobals.py
@@ -187,10 +187,13 @@
 import glob
 import os
 import platform
+import traceback
 import sys
 import textwrap
 import time
 
+import locale
+
 # Need to work on signature line used for match to avoid conflicts with
 # existing embedded documentation methods.
 build_opt_signature = "/*@create-file:build.opt@"
@@ -201,6 +204,7 @@
 err_print_flag = False
 msg_print_buf = ""
 debug_enabled = False
+default_encoding = None
 
 # Issues trying to address through buffered printing
 # 1. Arduino IDE 2.0 RC5 does not show stderr text in color. Text printed does
@@ -295,16 +299,16 @@ def copy_create_build_file(source_fqfn, build_target_fqfn):
                 pass
     return True     # file changed
 
-
 def add_include_line(build_opt_fqfn, include_fqfn):
+    global default_encoding
     if not os.path.exists(include_fqfn):
         # If file is missing, we need an place holder
-        with open(include_fqfn, 'w', encoding="utf-8"):
+        with open(include_fqfn, 'w', encoding=default_encoding):
-        with open(include_fqfn, 'w', encoding=default_encoding):
+        with open(include_fqfn, 'w'):
-        with open(include_fqfn, 'w', encoding=default_encoding):
+        with open(include_fqfn, 'w'):
             pass
-        print("add_include_line: Created " + include_fqfn)
-    with open(build_opt_fqfn, 'a', encoding="utf-8") as build_opt:
-        build_opt.write('-include "' + include_fqfn.replace('\\', '\\\\') + '"\n')
+        print_msg("add_include_line: Created " + include_fqfn)
 
+    with open(build_opt_fqfn, 'a', encoding=default_encoding) as build_opt:
+        build_opt.write('-include "' + include_fqfn.replace('\\', '\\\\') + '"\n')
 
 def extract_create_build_opt_file(globals_h_fqfn, file_name, build_opt_fqfn):
     """
@@ -313,8 +317,9 @@ def extract_create_build_opt_file(globals_h_fqfn, file_name, build_opt_fqfn):
     copy of Sketch.ino.globals.h.
     """
     global build_opt_signature
+    global default_encoding
 
-    build_opt = open(build_opt_fqfn, 'w', encoding="utf-8")
+    build_opt = open(build_opt_fqfn, 'w', encoding=default_encoding)
     if not os.path.exists(globals_h_fqfn) or (0 == os.path.getsize(globals_h_fqfn)):
         build_opt.close()
         return False
@@ -605,12 +610,63 @@ def parse_args():
     # ref nargs='*'', https://stackoverflow.com/a/4480202
     # ref no '--n' parameter, https://stackoverflow.com/a/21998252
 
+
+# retrieve *system* encoding, not the one used by python internally
+if sys.version_info >= (3, 11):
+    def get_encoding():
+        return locale.getencoding()
+else:
+    def get_encoding():
+        return locale.getdefaultlocale()[1]
+
+
+def show_value(desc, value):
+    print_dbg(f'{desc:<40} {value}')
+    return
+
+def locale_dbg():
+    show_value("get_encoding()", get_encoding())
+    show_value("locale.getdefaultlocale()", locale.getdefaultlocale())
+    show_value('sys.getfilesystemencoding()', sys.getfilesystemencoding())
+    show_value("sys.getdefaultencoding()", sys.getdefaultencoding())
+    show_value("locale.getpreferredencoding(False)", locale.getpreferredencoding(False))
+    try:
+        show_value("locale.getpreferredencoding()", locale.getpreferredencoding())
+    except:
+        pass
+    show_value("sys.stdout.encoding", sys.stdout.encoding)
+
+    # use current setting
+    show_value("locale.setlocale(locale.LC_ALL, None)", locale.setlocale(locale.LC_ALL, None))
+    try:
+        show_value("locale.getencoding()", locale.getencoding())
+    except:
+        pass
+    show_value("locale.getlocale()", locale.getlocale())
+
+    # use user setting
+    show_value("locale.setlocale(locale.LC_ALL, '')", locale.setlocale(locale.LC_ALL, ''))
+    # show_value("locale.getencoding()", locale.getencoding())
+    show_value("locale.getlocale()", locale.getlocale())
+    return
+
+
 def main():
     global build_opt_signature
     global docs_url
     global debug_enabled
+    global default_encoding
     num_include_lines = 1
 
+    # Given that GCC will handle lines from an @file as if they were on
+    # the command line. I assume that the contents of @file need to be encoded
+    # to match that of the shell running GCC runs. I am not 100% sure this API
+    # gives me that, but it appears to work.
+    #
+    # However, elsewhere when dealing with source code we continue to use 'utf-8',
+    # ref. https://gcc.gnu.org/onlinedocs/cpp/Character-sets.html
+    default_encoding = get_encoding()
+
     args = parse_args()
     debug_enabled = args.debug
     runtime_ide_path = os.path.normpath(args.runtime_ide_path)
@@ -623,6 +679,13 @@ def main():
     build_path_core, build_opt_name = os.path.split(build_opt_fqfn)
     globals_h_fqfn = os.path.join(build_path_core, globals_name)
 
+    if debug_enabled:
+        locale_dbg()
+
+    default_locale = locale.getdefaultlocale()
+    print_msg(f'default locale:         {default_locale}')
+    print_msg(f'default_encoding:       {default_encoding}')
+
     print_dbg(f"runtime_ide_path:       {runtime_ide_path}")
     print_dbg(f"runtime_ide_version:    {args.runtime_ide_version}")
     print_dbg(f"build_path:             {build_path}")
@@ -655,6 +718,10 @@ def main():
     print_dbg(f"first_time:             {first_time}")
     print_dbg(f"use_aggressive_caching_workaround: {use_aggressive_caching_workaround}")
 
+    if not os.path.exists(build_path_core):
+        os.makedirs(build_path_core)
+        print_msg("Clean build, created dir " + build_path_core)
+
     if first_time or \
     not use_aggressive_caching_workaround or \
     not os.path.exists(commonhfile_fqfn):
@@ -667,10 +734,6 @@ def main():
         touch(commonhfile_fqfn)
         print_err(f"Neutralized future timestamp on build file: {commonhfile_fqfn}")
 
-    if not os.path.exists(build_path_core):
-        os.makedirs(build_path_core)
-        print_msg("Clean build, created dir " + build_path_core)
-
     if os.path.exists(source_globals_h_fqfn):
         print_msg("Using global include from " + source_globals_h_fqfn)
 
@@ -750,4 +813,10 @@ def main():
     handle_error(0)   # commit print buffer
 
 if __name__ == '__main__':
-    sys.exit(main())
+    rc = 1
+    try:
+        rc = main()
+    except:
+        print_err(traceback.format_exc())
+        handle_error(0)
+    sys.exit(rc)
diff --git a/tools/sizes.py b/tools/sizes.py
@@ -21,9 +21,20 @@
 import os
 import subprocess
 import sys
+import locale
 
 sys.stdout = sys.stderr
 
+
+# retrieve *system* encoding, not the one used by python internally
+if sys.version_info >= (3, 11):
+    def get_encoding():
+        return locale.getencoding()
+else:
+    def get_encoding():
+        return locale.getdefaultlocale()[1]
+
+
 def get_segment_sizes(elf, path, mmu):
     iram_size = 0
     iheap_size = 0
@@ -73,7 +84,7 @@ def get_segment_sizes(elf, path, mmu):
     )
 
     cmd = [os.path.join(path, "xtensa-lx106-elf-size"), "-A", elf]
-    with subprocess.Popen(cmd, stdout=subprocess.PIPE, universal_newlines=True) as proc:
+    with subprocess.Popen(cmd, stdout=subprocess.PIPE, universal_newlines=True, encoding=get_encoding()) as proc:
         lines = proc.stdout.readlines()
         for line in lines:
             words = line.split()