Skip to content
New issue

Have a question about this project? # for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “#”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? # to your account

core.logging/core.kompress updates #277

Merged
merged 4 commits into from
Mar 3, 2023
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
55 changes: 39 additions & 16 deletions my/core/kompress.py
Original file line number Diff line number Diff line change
Expand Up @@ -18,39 +18,54 @@ class Ext:
zip = '.zip'
lz4 = '.lz4'
zstd = '.zstd'
zst = '.zst'
targz = '.tar.gz'


def is_compressed(p: Path) -> bool:
# todo kinda lame way for now.. use mime ideally?
# should cooperate with kompress.kopen?
return any(p.name.endswith(ext) for ext in {Ext.xz, Ext.zip, Ext.lz4, Ext.zstd, Ext.targz})
return any(p.name.endswith(ext) for ext in {Ext.xz, Ext.zip, Ext.lz4, Ext.zstd, Ext.zst, Ext.targz})


def _zstd_open(path: Path, *args, **kwargs) -> IO[str]:
def _zstd_open(path: Path, *args, **kwargs) -> IO:
import zstandard as zstd # type: ignore
fh = path.open('rb')
dctx = zstd.ZstdDecompressor()
reader = dctx.stream_reader(fh)
return io.TextIOWrapper(reader, **kwargs) # meh

mode = kwargs.get('mode', 'rt')
if mode == 'rb':
return reader
else:
# must be text mode
kwargs.pop('mode') # TextIOWrapper doesn't like it
return io.TextIOWrapper(reader, **kwargs) # meh


# TODO returns protocol that we can call 'read' against?
# TODO use the 'dependent type' trick?
def kopen(path: PathIsh, *args, mode: str='rt', **kwargs) -> IO[str]:
# TODO handle mode in *rags?
encoding = kwargs.get('encoding', 'utf8')
# TODO use the 'dependent type' trick for return type?
def kopen(path: PathIsh, *args, mode: str='rt', **kwargs) -> IO:
# just in case, but I think this shouldn't be necessary anymore
# since when we cann .read_text, encoding is passed already
if mode in {'r', 'rt'}:
encoding = kwargs.get('encoding', 'utf8')
else:
encoding = None
kwargs['encoding'] = encoding

pp = Path(path)
name = pp.name
if name.endswith(Ext.xz):
import lzma
r = lzma.open(pp, mode, *args, **kwargs)
# should only happen for binary mode?
# file:///usr/share/doc/python3/html/library/lzma.html?highlight=lzma#lzma.open
assert not isinstance(r, lzma.LZMAFile), r
return r

# ugh. for lzma, 'r' means 'rb'
# https://github.com/python/cpython/blob/d01cf5072be5511595b6d0c35ace6c1b07716f8d/Lib/lzma.py#L97
# whereas for regular open, 'r' means 'rt'
# https://docs.python.org/3/library/functions.html#open
if mode == 'r':
mode = 'rt'
kwargs['mode'] = mode
return lzma.open(pp, *args, **kwargs)
elif name.endswith(Ext.zip):
# eh. this behaviour is a bit dodgy...
from zipfile import ZipFile
Expand All @@ -70,8 +85,9 @@ def kopen(path: PathIsh, *args, mode: str='rt', **kwargs) -> IO[str]:
elif name.endswith(Ext.lz4):
import lz4.frame # type: ignore
return lz4.frame.open(str(pp), mode, *args, **kwargs)
elif name.endswith(Ext.zstd):
return _zstd_open(pp, mode, *args, **kwargs)
elif name.endswith(Ext.zstd) or name.endswith(Ext.zst):
kwargs['mode'] = mode
return _zstd_open(pp, *args, **kwargs)
elif name.endswith(Ext.targz):
import tarfile
# FIXME pass mode?
Expand Down Expand Up @@ -103,8 +119,15 @@ class CPath(BasePath):
_accessor.open has to return file descriptor, doesn't work for compressed stuff.
"""
def open(self, *args, **kwargs):
kopen_kwargs = {}
mode = kwargs.get('mode')
if mode is not None:
kopen_kwargs['mode'] = mode
encoding = kwargs.get('encoding')
if encoding is not None:
kopen_kwargs['encoding'] = encoding
# TODO assert read only?
return kopen(str(self))
return kopen(str(self), **kopen_kwargs)


open = kopen # TODO deprecate
Expand Down
104 changes: 81 additions & 23 deletions my/core/logging.py
Original file line number Diff line number Diff line change
@@ -1,24 +1,21 @@
#!/usr/bin/env python3
'''
Default logger is a bit meh, see 'test'/run this file for a demo
TODO name 'klogging' to avoid possible conflict with default 'logging' module
TODO shit. too late already? maybe use fallback & deprecate
'''


def test() -> None:
from typing import Callable
import logging
import sys
from typing import Callable

M: Callable[[str], None] = lambda s: print(s, file=sys.stderr)

M(" Logging module's defaults are not great...'")
l = logging.getLogger('test_logger')
# todo why is mypy unhappy about these???
l.error("For example, this should be logged as error. But it's not even formatted properly, doesn't have logger name or level")

M(" The reason is that you need to remember to call basicConfig() first")
logging.basicConfig()
l.error("OK, this is better. But the default format kinda sucks, I prefer having timestamps and the file/line number")

M("")
Expand All @@ -32,8 +29,9 @@ def test() -> None:


import logging
from typing import Union, Optional
from typing import Union, Optional, cast
import os
import warnings

Level = int
LevelIsh = Optional[Union[Level, str]]
Expand All @@ -56,42 +54,102 @@ def mklevel(level: LevelIsh) -> Level:
FORMAT_NOCOLOR = FORMAT.format(start='', end='')
DATEFMT = '%Y-%m-%d %H:%M:%S'

COLLAPSE_DEBUG_LOGS = os.environ.get('COLLAPSE_DEBUG_LOGS', False)

_init_done = 'lazylogger_init_done'

def setup_logger(logger: logging.Logger, level: LevelIsh) -> None:
lvl = mklevel(level)
try:
import logzero # type: ignore[import]
except ModuleNotFoundError:
import warnings

warnings.warn("You might want to install 'logzero' for nice colored logs!")
logger.setLevel(lvl)
h = logging.StreamHandler()
h.setLevel(lvl)
h.setFormatter(logging.Formatter(fmt=FORMAT_NOCOLOR, datefmt=DATEFMT))
logger.addHandler(h)
logger.propagate = False # ugh. otherwise it duplicates log messages? not sure about it..
else:
formatter = logzero.LogFormatter(
fmt=FORMAT_COLOR,
datefmt=DATEFMT,
)
use_logzero = True
except ModuleNotFoundError:
warnings.warn("You might want to install 'logzero' for nice colored logs!")
formatter = logging.Formatter(fmt=FORMAT_NOCOLOR, datefmt=DATEFMT)
use_logzero = False

logger.addFilter(AddExceptionTraceback())
if use_logzero and not COLLAPSE_DEBUG_LOGS: # all set, nothing to do
# 'simple' setup
logzero.setup_logger(logger.name, level=lvl, formatter=formatter)
return

h = CollapseDebugHandler() if COLLAPSE_DEBUG_LOGS else logging.StreamHandler()
logger.setLevel(lvl)
h.setLevel(lvl)
h.setFormatter(formatter)
logger.addHandler(h)
logger.propagate = False # ugh. otherwise it duplicates log messages? not sure about it..


class LazyLogger(logging.Logger):
def __new__(cls, name: str, level: LevelIsh = 'INFO') -> 'LazyLogger':
logger = logging.getLogger(name)

# this is called prior to all _log calls so makes sense to do it here?
def isEnabledFor_lazyinit(*args, logger=logger, orig=logger.isEnabledFor, **kwargs):
att = 'lazylogger_init_done'
if not getattr(logger, att, False): # init once, if necessary
def isEnabledFor_lazyinit(*args, logger=logger, orig=logger.isEnabledFor, **kwargs) -> bool:
if not getattr(logger, _init_done, False): # init once, if necessary
setup_logger(logger, level=level)
setattr(logger, att, True)
setattr(logger, _init_done, True)
logger.isEnabledFor = orig # restore the callback
return orig(*args, **kwargs)

logger.isEnabledFor = isEnabledFor_lazyinit # type: ignore[assignment]
return logger # type: ignore[return-value]
# oh god.. otherwise might go into an inf loop
if not hasattr(logger, _init_done):
setattr(logger, _init_done, False) # will setup on the first call
logger.isEnabledFor = isEnabledFor_lazyinit # type: ignore[assignment]
return cast(LazyLogger, logger)


# by default, logging.exception isn't logging traceback
# which is a bit annoying since we have to
# also see https://stackoverflow.com/questions/75121925/why-doesnt-python-logging-exception-method-log-traceback-by-default
# tod also amend by post about defensive error handling?
class AddExceptionTraceback(logging.Filter):
def filter(self, record):
s = super().filter(record)
if s is False:
return False
if record.levelname == 'ERROR':
exc = record.msg
if isinstance(exc, BaseException):
if record.exc_info is None or record.exc_info == (None, None, None):
exc_info = (type(exc), exc, exc.__traceback__)
record.exc_info = exc_info
return s


# todo also save full log in a file?
class CollapseDebugHandler(logging.StreamHandler):
'''
Collapses subsequent debug log lines and redraws on the same line.
Hopefully this gives both a sense of progress and doesn't clutter the terminal as much?
'''
last = False

def emit(self, record: logging.LogRecord) -> None:
try:
msg = self.format(record)
cur = record.levelno == logging.DEBUG and '\n' not in msg
if cur:
if self.last:
self.stream.write('\033[K' + '\r') # clear line + return carriage
else:
if self.last:
self.stream.write('\n') # clean up after the last debug line
self.last = cur
import os
columns, _ = os.get_terminal_size(0)
# ugh. the columns thing is meh. dunno I guess ultimately need curses for that
# TODO also would be cool to have a terminal post-processor? kinda like tail but aware of logging keyworkds (INFO/DEBUG/etc)
self.stream.write(msg + ' ' * max(0, columns - len(msg)) + ('' if cur else '\n'))
self.flush()
except:
self.handleError(record)


if __name__ == '__main__':
Expand Down
6 changes: 3 additions & 3 deletions my/fbmessenger/android.py
Original file line number Diff line number Diff line change
Expand Up @@ -41,7 +41,7 @@ def inputs() -> Sequence[Path]:
@dataclass(unsafe_hash=True)
class Sender:
id: str
name: str
name: Optional[str]


@dataclass(unsafe_hash=True)
Expand Down Expand Up @@ -103,7 +103,7 @@ def _process_db(db: sqlite3.Connection) -> Iterator[Res[Entity]]:
for r in db.execute('''SELECT * FROM thread_users'''):
# for messaging_actor_type == 'REDUCED_MESSAGING_ACTOR', name is None
# but they are still referenced, so need to keep
name = r['name'] or '<NAME UNAVAILABLE>'
name = r['name']
user_key = r['user_key']
s = Sender(
id=_normalise_user_id(user_key),
Expand Down Expand Up @@ -135,7 +135,7 @@ def _process_db(db: sqlite3.Connection) -> Iterator[Res[Entity]]:
name = r['name'] # seems that it's only set for some groups
if name is None:
users = thread_users[thread_key]
name = ', '.join([u.name for u in users])
name = ', '.join([u.name or u.id for u in users])
yield Thread(
id=_normalise_thread_id(thread_key),
name=name,
Expand Down