From 6db056cbafbac7859ffdd569754ac1a51e00dc85 Mon Sep 17 00:00:00 2001 From: Dima Gerasimov Date: Tue, 28 Feb 2023 22:53:06 +0000 Subject: [PATCH 1/4] core.kompress: support .zst extension, seems more conventional than .zstd --- my/core/kompress.py | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/my/core/kompress.py b/my/core/kompress.py index 26e0bbd1..9b8a40fb 100644 --- a/my/core/kompress.py +++ b/my/core/kompress.py @@ -18,13 +18,14 @@ class Ext: zip = '.zip' lz4 = '.lz4' zstd = '.zstd' + zst = '.zst' targz = '.tar.gz' def is_compressed(p: Path) -> bool: # todo kinda lame way for now.. use mime ideally? # should cooperate with kompress.kopen? - return any(p.name.endswith(ext) for ext in {Ext.xz, Ext.zip, Ext.lz4, Ext.zstd, Ext.targz}) + return any(p.name.endswith(ext) for ext in {Ext.xz, Ext.zip, Ext.lz4, Ext.zstd, Ext.zst, Ext.targz}) def _zstd_open(path: Path, *args, **kwargs) -> IO[str]: @@ -70,7 +71,7 @@ def kopen(path: PathIsh, *args, mode: str='rt', **kwargs) -> IO[str]: elif name.endswith(Ext.lz4): import lz4.frame # type: ignore return lz4.frame.open(str(pp), mode, *args, **kwargs) - elif name.endswith(Ext.zstd): + elif name.endswith(Ext.zstd) or name.endswith(Ext.zst): return _zstd_open(pp, mode, *args, **kwargs) elif name.endswith(Ext.targz): import tarfile From 1139758a9c743453652140256b5f25768dbc2f92 Mon Sep 17 00:00:00 2001 From: Dima Gerasimov Date: Wed, 1 Mar 2023 00:45:20 +0000 Subject: [PATCH 2/4] core.kompress: proper support for read_text/read_bytes against zstd/xz archives --- my/core/kompress.py | 50 ++++++++++++++++++++++++++++++++------------- 1 file changed, 36 insertions(+), 14 deletions(-) diff --git a/my/core/kompress.py b/my/core/kompress.py index 9b8a40fb..0274e6c4 100644 --- a/my/core/kompress.py +++ b/my/core/kompress.py @@ -28,30 +28,44 @@ def is_compressed(p: Path) -> bool: return any(p.name.endswith(ext) for ext in {Ext.xz, Ext.zip, Ext.lz4, Ext.zstd, Ext.zst, Ext.targz}) -def _zstd_open(path: Path, *args, **kwargs) -> IO[str]: +def _zstd_open(path: Path, *args, **kwargs) -> IO: import zstandard as zstd # type: ignore fh = path.open('rb') dctx = zstd.ZstdDecompressor() reader = dctx.stream_reader(fh) - return io.TextIOWrapper(reader, **kwargs) # meh + + mode = kwargs.get('mode', 'rt') + if mode == 'rb': + return reader + else: + # must be text mode + kwargs.pop('mode') # TextIOWrapper doesn't like it + return io.TextIOWrapper(reader, **kwargs) # meh -# TODO returns protocol that we can call 'read' against? -# TODO use the 'dependent type' trick? -def kopen(path: PathIsh, *args, mode: str='rt', **kwargs) -> IO[str]: - # TODO handle mode in *rags? - encoding = kwargs.get('encoding', 'utf8') +# TODO use the 'dependent type' trick for return type? +def kopen(path: PathIsh, *args, mode: str='rt', **kwargs) -> IO: + # just in case, but I think this shouldn't be necessary anymore + # since when we cann .read_text, encoding is passed already + if mode in {'r', 'rt'}: + encoding = kwargs.get('encoding', 'utf8') + else: + encoding = None kwargs['encoding'] = encoding pp = Path(path) name = pp.name if name.endswith(Ext.xz): import lzma - r = lzma.open(pp, mode, *args, **kwargs) - # should only happen for binary mode? - # file:///usr/share/doc/python3/html/library/lzma.html?highlight=lzma#lzma.open - assert not isinstance(r, lzma.LZMAFile), r - return r + + # ugh. for lzma, 'r' means 'rb' + # https://github.com/python/cpython/blob/d01cf5072be5511595b6d0c35ace6c1b07716f8d/Lib/lzma.py#L97 + # whereas for regular open, 'r' means 'rt' + # https://docs.python.org/3/library/functions.html#open + if mode == 'r': + mode = 'rt' + kwargs['mode'] = mode + return lzma.open(pp, *args, **kwargs) elif name.endswith(Ext.zip): # eh. this behaviour is a bit dodgy... from zipfile import ZipFile @@ -72,7 +86,8 @@ def kopen(path: PathIsh, *args, mode: str='rt', **kwargs) -> IO[str]: import lz4.frame # type: ignore return lz4.frame.open(str(pp), mode, *args, **kwargs) elif name.endswith(Ext.zstd) or name.endswith(Ext.zst): - return _zstd_open(pp, mode, *args, **kwargs) + kwargs['mode'] = mode + return _zstd_open(pp, *args, **kwargs) elif name.endswith(Ext.targz): import tarfile # FIXME pass mode? @@ -104,8 +119,15 @@ class CPath(BasePath): _accessor.open has to return file descriptor, doesn't work for compressed stuff. """ def open(self, *args, **kwargs): + kopen_kwargs = {} + mode = kwargs.get('mode') + if mode is not None: + kopen_kwargs['mode'] = mode + encoding = kwargs.get('encoding') + if encoding is not None: + kopen_kwargs['encoding'] = encoding # TODO assert read only? - return kopen(str(self)) + return kopen(str(self), **kopen_kwargs) open = kopen # TODO deprecate From 472f11616fe7fe3cc77257fca3181197ca0e294f Mon Sep 17 00:00:00 2001 From: Dima Gerasimov Date: Thu, 2 Mar 2023 23:06:45 +0000 Subject: [PATCH 3/4] fbmessenger.android: use Optional name, best to leave for the consumer to decide how to behave when it's unavailable e.g. using was causing issues when used as zulip contact name --- my/fbmessenger/android.py | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/my/fbmessenger/android.py b/my/fbmessenger/android.py index 69555cb5..b8bdbdab 100644 --- a/my/fbmessenger/android.py +++ b/my/fbmessenger/android.py @@ -41,7 +41,7 @@ def inputs() -> Sequence[Path]: @dataclass(unsafe_hash=True) class Sender: id: str - name: str + name: Optional[str] @dataclass(unsafe_hash=True) @@ -103,7 +103,7 @@ def _process_db(db: sqlite3.Connection) -> Iterator[Res[Entity]]: for r in db.execute('''SELECT * FROM thread_users'''): # for messaging_actor_type == 'REDUCED_MESSAGING_ACTOR', name is None # but they are still referenced, so need to keep - name = r['name'] or '' + name = r['name'] user_key = r['user_key'] s = Sender( id=_normalise_user_id(user_key), @@ -135,7 +135,7 @@ def _process_db(db: sqlite3.Connection) -> Iterator[Res[Entity]]: name = r['name'] # seems that it's only set for some groups if name is None: users = thread_users[thread_key] - name = ', '.join([u.name for u in users]) + name = ', '.join([u.name or u.id for u in users]) yield Thread( id=_normalise_thread_id(thread_key), name=name, From e397db761a02c55a7b1a904c390bbc146cdf24a5 Mon Sep 17 00:00:00 2001 From: Dima Gerasimov Date: Thu, 2 Mar 2023 23:10:15 +0000 Subject: [PATCH 4/4] core.logging: sync logging helper with Promnesia, adds more goodies - print exception traceback by default when using logger.exception - COLLAPSE_DEBUG_LOGS env variable --- my/core/logging.py | 104 +++++++++++++++++++++++++++++++++++---------- 1 file changed, 81 insertions(+), 23 deletions(-) diff --git a/my/core/logging.py b/my/core/logging.py index 03484bfd..6cfa12b1 100644 --- a/my/core/logging.py +++ b/my/core/logging.py @@ -1,24 +1,21 @@ #!/usr/bin/env python3 ''' Default logger is a bit meh, see 'test'/run this file for a demo -TODO name 'klogging' to avoid possible conflict with default 'logging' module -TODO shit. too late already? maybe use fallback & deprecate ''' - def test() -> None: - from typing import Callable import logging import sys + from typing import Callable M: Callable[[str], None] = lambda s: print(s, file=sys.stderr) M(" Logging module's defaults are not great...'") l = logging.getLogger('test_logger') - # todo why is mypy unhappy about these??? l.error("For example, this should be logged as error. But it's not even formatted properly, doesn't have logger name or level") M(" The reason is that you need to remember to call basicConfig() first") + logging.basicConfig() l.error("OK, this is better. But the default format kinda sucks, I prefer having timestamps and the file/line number") M("") @@ -32,8 +29,9 @@ def test() -> None: import logging -from typing import Union, Optional +from typing import Union, Optional, cast import os +import warnings Level = int LevelIsh = Optional[Union[Level, str]] @@ -56,42 +54,102 @@ def mklevel(level: LevelIsh) -> Level: FORMAT_NOCOLOR = FORMAT.format(start='', end='') DATEFMT = '%Y-%m-%d %H:%M:%S' +COLLAPSE_DEBUG_LOGS = os.environ.get('COLLAPSE_DEBUG_LOGS', False) + +_init_done = 'lazylogger_init_done' def setup_logger(logger: logging.Logger, level: LevelIsh) -> None: lvl = mklevel(level) try: import logzero # type: ignore[import] - except ModuleNotFoundError: - import warnings - - warnings.warn("You might want to install 'logzero' for nice colored logs!") - logger.setLevel(lvl) - h = logging.StreamHandler() - h.setLevel(lvl) - h.setFormatter(logging.Formatter(fmt=FORMAT_NOCOLOR, datefmt=DATEFMT)) - logger.addHandler(h) - logger.propagate = False # ugh. otherwise it duplicates log messages? not sure about it.. - else: formatter = logzero.LogFormatter( fmt=FORMAT_COLOR, datefmt=DATEFMT, ) + use_logzero = True + except ModuleNotFoundError: + warnings.warn("You might want to install 'logzero' for nice colored logs!") + formatter = logging.Formatter(fmt=FORMAT_NOCOLOR, datefmt=DATEFMT) + use_logzero = False + + logger.addFilter(AddExceptionTraceback()) + if use_logzero and not COLLAPSE_DEBUG_LOGS: # all set, nothing to do + # 'simple' setup logzero.setup_logger(logger.name, level=lvl, formatter=formatter) + return + + h = CollapseDebugHandler() if COLLAPSE_DEBUG_LOGS else logging.StreamHandler() + logger.setLevel(lvl) + h.setLevel(lvl) + h.setFormatter(formatter) + logger.addHandler(h) + logger.propagate = False # ugh. otherwise it duplicates log messages? not sure about it.. class LazyLogger(logging.Logger): def __new__(cls, name: str, level: LevelIsh = 'INFO') -> 'LazyLogger': logger = logging.getLogger(name) + # this is called prior to all _log calls so makes sense to do it here? - def isEnabledFor_lazyinit(*args, logger=logger, orig=logger.isEnabledFor, **kwargs): - att = 'lazylogger_init_done' - if not getattr(logger, att, False): # init once, if necessary + def isEnabledFor_lazyinit(*args, logger=logger, orig=logger.isEnabledFor, **kwargs) -> bool: + if not getattr(logger, _init_done, False): # init once, if necessary setup_logger(logger, level=level) - setattr(logger, att, True) + setattr(logger, _init_done, True) + logger.isEnabledFor = orig # restore the callback return orig(*args, **kwargs) - logger.isEnabledFor = isEnabledFor_lazyinit # type: ignore[assignment] - return logger # type: ignore[return-value] + # oh god.. otherwise might go into an inf loop + if not hasattr(logger, _init_done): + setattr(logger, _init_done, False) # will setup on the first call + logger.isEnabledFor = isEnabledFor_lazyinit # type: ignore[assignment] + return cast(LazyLogger, logger) + + +# by default, logging.exception isn't logging traceback +# which is a bit annoying since we have to +# also see https://stackoverflow.com/questions/75121925/why-doesnt-python-logging-exception-method-log-traceback-by-default +# tod also amend by post about defensive error handling? +class AddExceptionTraceback(logging.Filter): + def filter(self, record): + s = super().filter(record) + if s is False: + return False + if record.levelname == 'ERROR': + exc = record.msg + if isinstance(exc, BaseException): + if record.exc_info is None or record.exc_info == (None, None, None): + exc_info = (type(exc), exc, exc.__traceback__) + record.exc_info = exc_info + return s + + +# todo also save full log in a file? +class CollapseDebugHandler(logging.StreamHandler): + ''' + Collapses subsequent debug log lines and redraws on the same line. + Hopefully this gives both a sense of progress and doesn't clutter the terminal as much? + ''' + last = False + + def emit(self, record: logging.LogRecord) -> None: + try: + msg = self.format(record) + cur = record.levelno == logging.DEBUG and '\n' not in msg + if cur: + if self.last: + self.stream.write('\033[K' + '\r') # clear line + return carriage + else: + if self.last: + self.stream.write('\n') # clean up after the last debug line + self.last = cur + import os + columns, _ = os.get_terminal_size(0) + # ugh. the columns thing is meh. dunno I guess ultimately need curses for that + # TODO also would be cool to have a terminal post-processor? kinda like tail but aware of logging keyworkds (INFO/DEBUG/etc) + self.stream.write(msg + ' ' * max(0, columns - len(msg)) + ('' if cur else '\n')) + self.flush() + except: + self.handleError(record) if __name__ == '__main__':