karlicoss · karlicoss · Mar 3, 2023 · Feb 28, 2023 · Mar 1, 2023 · Mar 2, 2023
diff --git a/my/core/kompress.py b/my/core/kompress.py
@@ -18,39 +18,54 @@ class Ext:
     zip   = '.zip'
     lz4   = '.lz4'
     zstd  = '.zstd'
+    zst   = '.zst'
     targz = '.tar.gz'
 
 
 def is_compressed(p: Path) -> bool:
     # todo kinda lame way for now.. use mime ideally?
     # should cooperate with kompress.kopen?
-    return any(p.name.endswith(ext) for ext in {Ext.xz, Ext.zip, Ext.lz4, Ext.zstd, Ext.targz})
+    return any(p.name.endswith(ext) for ext in {Ext.xz, Ext.zip, Ext.lz4, Ext.zstd, Ext.zst, Ext.targz})
 
 
-def _zstd_open(path: Path, *args, **kwargs) -> IO[str]:
+def _zstd_open(path: Path, *args, **kwargs) -> IO:
     import zstandard as zstd # type: ignore
     fh = path.open('rb')
     dctx = zstd.ZstdDecompressor()
     reader = dctx.stream_reader(fh)
-    return io.TextIOWrapper(reader, **kwargs) # meh
+
+    mode = kwargs.get('mode', 'rt')
+    if mode == 'rb':
+        return reader
+    else:
+        # must be text mode
+        kwargs.pop('mode') # TextIOWrapper doesn't like it
+        return io.TextIOWrapper(reader, **kwargs) # meh
 
 
-# TODO returns protocol that we can call 'read' against?
-# TODO use the 'dependent type' trick?
-def kopen(path: PathIsh, *args, mode: str='rt', **kwargs) -> IO[str]:
-    # TODO handle mode in *rags?
-    encoding = kwargs.get('encoding', 'utf8')
+# TODO use the 'dependent type' trick for return type?
+def kopen(path: PathIsh, *args, mode: str='rt', **kwargs) -> IO:
+    # just in case, but I think this shouldn't be necessary anymore
+    # since when we cann .read_text, encoding is passed already
+    if mode in {'r', 'rt'}:
+        encoding = kwargs.get('encoding', 'utf8')
+    else:
+        encoding = None
     kwargs['encoding'] = encoding
 
     pp = Path(path)
     name = pp.name
     if name.endswith(Ext.xz):
         import lzma
-        r = lzma.open(pp, mode, *args, **kwargs)
-        # should only happen for binary mode?
-        # file:///usr/share/doc/python3/html/library/lzma.html?highlight=lzma#lzma.open
-        assert not isinstance(r, lzma.LZMAFile), r
-        return r
+
+        # ugh. for lzma, 'r' means 'rb'
+        # https://github.com/python/cpython/blob/d01cf5072be5511595b6d0c35ace6c1b07716f8d/Lib/lzma.py#L97
+        # whereas for regular open, 'r' means 'rt'
+        # https://docs.python.org/3/library/functions.html#open
+        if mode == 'r':
+            mode = 'rt'
+        kwargs['mode'] = mode
+        return lzma.open(pp, *args, **kwargs)
     elif name.endswith(Ext.zip):
         # eh. this behaviour is a bit dodgy...
         from zipfile import ZipFile
@@ -70,8 +85,9 @@ def kopen(path: PathIsh, *args, mode: str='rt', **kwargs) -> IO[str]:
     elif name.endswith(Ext.lz4):
         import lz4.frame # type: ignore
         return lz4.frame.open(str(pp), mode, *args, **kwargs)
-    elif name.endswith(Ext.zstd):
-        return _zstd_open(pp, mode, *args, **kwargs)
+    elif name.endswith(Ext.zstd) or name.endswith(Ext.zst):
+        kwargs['mode'] = mode
+        return _zstd_open(pp, *args, **kwargs)
     elif name.endswith(Ext.targz):
         import tarfile
         # FIXME pass mode?
@@ -103,8 +119,15 @@ class CPath(BasePath):
     _accessor.open has to return file descriptor, doesn't work for compressed stuff.
     """
     def open(self, *args, **kwargs):
+        kopen_kwargs = {}
+        mode = kwargs.get('mode')
+        if mode is not None:
+            kopen_kwargs['mode'] = mode
+        encoding = kwargs.get('encoding')
+        if encoding is not None:
+            kopen_kwargs['encoding'] = encoding
         # TODO assert read only?
-        return kopen(str(self))
+        return kopen(str(self), **kopen_kwargs)
 
 
 open = kopen # TODO deprecate

diff --git a/my/core/logging.py b/my/core/logging.py
@@ -1,24 +1,21 @@
 #!/usr/bin/env python3
 '''
 Default logger is a bit meh, see 'test'/run this file for a demo
-TODO name 'klogging' to avoid possible conflict with default 'logging' module
-TODO shit. too late already? maybe use fallback & deprecate
 '''
 
-
 def test() -> None:
-    from typing import Callable
     import logging
     import sys
+    from typing import Callable
 
     M: Callable[[str], None] = lambda s: print(s, file=sys.stderr)
 
     M("   Logging module's defaults are not great...'")
     l = logging.getLogger('test_logger')
-    # todo why is mypy unhappy about these???
     l.error("For example, this should be logged as error. But it's not even formatted properly, doesn't have logger name or level")
 
     M("   The reason is that you need to remember to call basicConfig() first")
+    logging.basicConfig()
     l.error("OK, this is better. But the default format kinda sucks, I prefer having timestamps and the file/line number")
 
     M("")
@@ -32,8 +29,9 @@ def test() -> None:
 
 
 import logging
-from typing import Union, Optional
+from typing import Union, Optional, cast
 import os
+import warnings
 
 Level = int
 LevelIsh = Optional[Union[Level, str]]
@@ -56,42 +54,102 @@ def mklevel(level: LevelIsh) -> Level:
 FORMAT_NOCOLOR = FORMAT.format(start='', end='')
 DATEFMT = '%Y-%m-%d %H:%M:%S'
 
+COLLAPSE_DEBUG_LOGS = os.environ.get('COLLAPSE_DEBUG_LOGS', False)
+
+_init_done = 'lazylogger_init_done'
 
 def setup_logger(logger: logging.Logger, level: LevelIsh) -> None:
     lvl = mklevel(level)
     try:
         import logzero  # type: ignore[import]
-    except ModuleNotFoundError:
-        import warnings
-
-        warnings.warn("You might want to install 'logzero' for nice colored logs!")
-        logger.setLevel(lvl)
-        h = logging.StreamHandler()
-        h.setLevel(lvl)
-        h.setFormatter(logging.Formatter(fmt=FORMAT_NOCOLOR, datefmt=DATEFMT))
-        logger.addHandler(h)
-        logger.propagate = False  # ugh. otherwise it duplicates log messages? not sure about it..
-    else:
         formatter = logzero.LogFormatter(
             fmt=FORMAT_COLOR,
             datefmt=DATEFMT,
         )
+        use_logzero = True
+    except ModuleNotFoundError:
+        warnings.warn("You might want to install 'logzero' for nice colored logs!")
+        formatter = logging.Formatter(fmt=FORMAT_NOCOLOR, datefmt=DATEFMT)
+        use_logzero = False
+
+    logger.addFilter(AddExceptionTraceback())
+    if use_logzero and not COLLAPSE_DEBUG_LOGS: # all set, nothing to do
+        # 'simple' setup
         logzero.setup_logger(logger.name, level=lvl, formatter=formatter)
+        return
+
+    h = CollapseDebugHandler() if COLLAPSE_DEBUG_LOGS else logging.StreamHandler()
+    logger.setLevel(lvl)
+    h.setLevel(lvl)
+    h.setFormatter(formatter)
+    logger.addHandler(h)
+    logger.propagate = False  # ugh. otherwise it duplicates log messages? not sure about it..
 
 
 class LazyLogger(logging.Logger):
     def __new__(cls, name: str, level: LevelIsh = 'INFO') -> 'LazyLogger':
         logger = logging.getLogger(name)
+
         # this is called prior to all _log calls so makes sense to do it here?
-        def isEnabledFor_lazyinit(*args, logger=logger, orig=logger.isEnabledFor, **kwargs):
-            att = 'lazylogger_init_done'
-            if not getattr(logger, att, False):  # init once, if necessary
+        def isEnabledFor_lazyinit(*args, logger=logger, orig=logger.isEnabledFor, **kwargs) -> bool:
+            if not getattr(logger, _init_done, False):  # init once, if necessary
                 setup_logger(logger, level=level)
-                setattr(logger, att, True)
+                setattr(logger, _init_done, True)
+                logger.isEnabledFor = orig # restore the callback
             return orig(*args, **kwargs)
 
-        logger.isEnabledFor = isEnabledFor_lazyinit  # type: ignore[assignment]
-        return logger  # type: ignore[return-value]
+        # oh god.. otherwise might go into an inf loop
+        if not hasattr(logger, _init_done):
+            setattr(logger, _init_done, False) # will setup on the first call
+            logger.isEnabledFor = isEnabledFor_lazyinit  # type: ignore[assignment]
+        return cast(LazyLogger, logger)
+
+
+# by default, logging.exception isn't logging traceback
+# which is a bit annoying since we have to
+# also see https://stackoverflow.com/questions/75121925/why-doesnt-python-logging-exception-method-log-traceback-by-default
+# tod also amend by post about defensive error handling?
+class AddExceptionTraceback(logging.Filter):
+    def filter(self, record):
+        s = super().filter(record)
+        if s is False:
+            return False
+        if record.levelname == 'ERROR':
+            exc = record.msg
+            if isinstance(exc, BaseException):
+                if record.exc_info is None or record.exc_info == (None, None, None):
+                    exc_info = (type(exc), exc, exc.__traceback__)
+                    record.exc_info = exc_info
+        return s
+
+
+# todo also save full log in a file?
+class CollapseDebugHandler(logging.StreamHandler):
+    '''
+    Collapses subsequent debug log lines and redraws on the same line.
+    Hopefully this gives both a sense of progress and doesn't clutter the terminal as much?
+    '''
+    last = False
+
+    def emit(self, record: logging.LogRecord) -> None:
+        try:
+            msg = self.format(record)
+            cur = record.levelno == logging.DEBUG and '\n' not in msg
+            if cur:
+                if self.last:
+                    self.stream.write('\033[K' + '\r') # clear line + return carriage
+            else:
+                if self.last:
+                    self.stream.write('\n') # clean up after the last debug line
+            self.last = cur
+            import os
+            columns, _ = os.get_terminal_size(0)
+            # ugh. the columns thing is meh. dunno I guess ultimately need curses for that
+            # TODO also would be cool to have a terminal post-processor? kinda like tail but aware of logging keyworkds (INFO/DEBUG/etc)
+            self.stream.write(msg + ' ' * max(0, columns - len(msg)) + ('' if cur else '\n'))
+            self.flush()
+        except:
+            self.handleError(record)
 
 
 if __name__ == '__main__':

diff --git a/my/fbmessenger/android.py b/my/fbmessenger/android.py
@@ -41,7 +41,7 @@ def inputs() -> Sequence[Path]:
 @dataclass(unsafe_hash=True)
 class Sender:
     id: str
-    name: str
+    name: Optional[str]
 
 
 @dataclass(unsafe_hash=True)
@@ -103,7 +103,7 @@ def _process_db(db: sqlite3.Connection) -> Iterator[Res[Entity]]:
     for r in db.execute('''SELECT * FROM thread_users'''):
         # for messaging_actor_type == 'REDUCED_MESSAGING_ACTOR', name is None
         # but they are still referenced, so need to keep
-        name = r['name'] or '<NAME UNAVAILABLE>'
+        name = r['name']
         user_key = r['user_key']
         s = Sender(
             id=_normalise_user_id(user_key),
@@ -135,7 +135,7 @@ def _process_db(db: sqlite3.Connection) -> Iterator[Res[Entity]]:
         name = r['name']  # seems that it's only set for some groups
         if name is None:
             users = thread_users[thread_key]
-            name = ', '.join([u.name for u in users])
+            name = ', '.join([u.name or u.id for u in users])
         yield Thread(
             id=_normalise_thread_id(thread_key),
             name=name,