From 844f904eabbd27fde622e9dfe2337db056eb8179 Mon Sep 17 00:00:00 2001 From: Rusty Russell Date: Thu, 21 Aug 2025 16:13:27 +0930 Subject: [PATCH 1/6] pyln-testing: catch special CI string so we can have non-BROKEN CI warnings. Signed-off-by: Rusty Russell --- common/status_levels.h | 3 +++ contrib/pyln-testing/pyln/testing/fixtures.py | 2 +- 2 files changed, 4 insertions(+), 1 deletion(-) diff --git a/common/status_levels.h b/common/status_levels.h index 79f7bc57313a..baa04ebb4720 100644 --- a/common/status_levels.h +++ b/common/status_levels.h @@ -20,6 +20,9 @@ enum log_level { }; #define LOG_LEVEL_MAX LOG_BROKEN +/* Things that can happen in real life, but we don't expect under CI. */ +#define CI_UNEXPECTED "That's weird: " + const char *log_level_name(enum log_level level); bool log_level_parse(const char *levelstr, size_t len, enum log_level *level); diff --git a/contrib/pyln-testing/pyln/testing/fixtures.py b/contrib/pyln-testing/pyln/testing/fixtures.py index 215c54022325..4bd062ccc327 100644 --- a/contrib/pyln-testing/pyln/testing/fixtures.py +++ b/contrib/pyln-testing/pyln/testing/fixtures.py @@ -609,7 +609,7 @@ def checkBadGossip(node): def checkBroken(node): node.daemon.logs_catchup() - broken_lines = [l for l in node.daemon.logs if '**BROKEN**' in l] + broken_lines = [l for l in node.daemon.logs if '**BROKEN**' in l or "That's weird: " in l] if node.broken_log: ex = re.compile(node.broken_log) broken_lines = [l for l in broken_lines if not ex.search(l)] From 817936de5df7bef32916d0b8c4865e9d9f25ab49 Mon Sep 17 00:00:00 2001 From: Matt Whitlock Date: Thu, 21 Aug 2025 16:14:27 +0930 Subject: [PATCH 2/6] connectd: demote "Peer did not close, forcing close" to UNUSUAL This message is logged when connectd tries to shut down a peer connection but the transmit buffer remains full for too long, maybe because the peer has crashed or has lost connectivity. Logging this message at the BROKEN level is inappropriate because BROKEN is intended to flag logic errors that imply incorrect code in CLN. The error in question here is actually a runtime error, which does not imply incorrect code (at least on our side), so demote the log message to the UNUSUAL level. (Even this is still probably too severe, as this message is logged rather more frequently than "unusual" would suggest.) Changelog-None Closes: https://github.com/ElementsProject/lightning/issues/5678 --- connectd/multiplex.c | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/connectd/multiplex.c b/connectd/multiplex.c index 8740905b1137..305aef805f8f 100644 --- a/connectd/multiplex.c +++ b/connectd/multiplex.c @@ -106,8 +106,7 @@ static void maybe_free_peer(struct peer *peer) * not reading, we have to give up. */ static void close_peer_io_timeout(struct peer *peer) { - /* BROKEN means we'll trigger CI if we see it, though it's possible */ - status_peer_broken(&peer->id, "Peer did not close, forcing close"); + status_peer_unusual(&peer->id, CI_UNEXPECTED "Peer did not close, forcing close"); io_close(peer->to_peer); } From 058e8c83598836c9d8b7a4c2052e2910b2ce65da Mon Sep 17 00:00:00 2001 From: Rusty Russell Date: Tue, 26 Aug 2025 14:30:49 +0930 Subject: [PATCH 3/6] pytest: fix flake in tests/test_bookkeeper.py::test_bookkeeping_missed_chans_leases ``` FAILED tests/test_bookkeeper.py::test_bookkeeping_missed_chans_leases - AssertionError: assert [{'tag': 'channel_open', 'credit_msat': 506268000, 'debit_msat': 0}, {'tag': 'lease_fee', 'credit_msat': 0, 'debit_msat': 6268000}, {'tag': 'invoice', 'credit_msat': 0, 'debit_msat': 11000000}, {'tag': 'onchain_fee', 'credit_msat': 1314000, 'debit_msat': 0}] == [{'tag': 'channel_open', 'credit_msat': 506268000, 'debit_msat': 0}, {'tag': 'lease_fee', 'credit_msat': 0, 'debit_msat': 6268000}, {'tag': 'onchain_fee', 'credit_msat': 1314000, 'debit_msat': 0}, {'tag': 'invoice', 'credit_msat': 0, 'debit_msat': 11000000}] At index 2 diff: {'tag': 'invoice', 'credit_msat': 0, 'debit_msat': 11000000} != {'tag': 'onchain_fee', 'credit_msat': 1314000, 'debit_msat': 0} Full diff: [ { 'credit_msat': 506268000, 'debit_msat': 0, 'tag': 'channel_open', }, { 'credit_msat': 0, 'debit_msat': 6268000, 'tag': 'lease_fee', }, { + 'credit_msat': 0, + 'debit_msat': 11000000, + 'tag': 'invoice', + }, + { 'credit_msat': 1314000, 'debit_msat': 0, 'tag': 'onchain_fee', }, - { - 'credit_msat': 0, - 'debit_msat': 11000000, - 'tag': 'invoice', - }, ] ``` Signed-off-by: Rusty Russell --- tests/test_bookkeeper.py | 8 +++++++- 1 file changed, 7 insertions(+), 1 deletion(-) diff --git a/tests/test_bookkeeper.py b/tests/test_bookkeeper.py index edb57c0aa04f..1b4d086f9aec 100644 --- a/tests/test_bookkeeper.py +++ b/tests/test_bookkeeper.py @@ -318,8 +318,14 @@ def test_bookkeeping_missed_chans_leases(node_factory, bitcoind): l1.wait_local_channel_active(scid) channel_id = first_channel_id(l1, l2) + # Sigh. bookkeeper sorts events by timestamp. If the invoice event happens + # too close, it can change the order, so sleep here. + time.sleep(2) + + # Send l2 funds via the channel l1.pay(l2, invoice_msat) - l1.daemon.wait_for_log(r'coin movement:.*\'invoice\'') + # Make sure they're completely settled, so accounting correct. + wait_for(lambda: only_one(l1.rpc.listpeerchannels()['channels'])['htlcs'] == []) # Now turn the bookkeeper on and restart l1.stop() From dea44ef0dd7fa6951f7171c2bf0c31026cd12d9f Mon Sep 17 00:00:00 2001 From: Rusty Russell Date: Tue, 26 Aug 2025 14:31:04 +0930 Subject: [PATCH 4/6] pytest: Don't test installing a plugin under valgrind. There's no way to increase reckless' (completely reasonable) 15 seconds timeout, and that can happen under valgrind & CI: ``` def test_reckless_uv_install(node_factory): node = get_reckless_node(node_factory) node.start() r = reckless([f"--network={NETWORK}", "-v", "install", "testpluguv"], dir=node.lightning_dir) > assert r.returncode == 0 E assert 1 == 0 E + where 1 = self.returncode, self.stdout, self.stderr.returncode tests/test_reckless.py:359: AssertionError ... ***RECKLESS STDERR*** config file not found: /tmp/ltests-tui1vmrg/test_reckless_uv_install_1/lightning-1/regtest/config press [Y] to create one now. config file not found: /tmp/ltests-tui1vmrg/test_reckless_uv_install_1/lightning-1/reckless/regtest-reckless.conf config file not found: /tmp/ltests-tui1vmrg/test_reckless_uv_install_1/lightning-1/reckless/.sources Traceback (most recent call last): File "/home/runner/work/lightning/lightning/tools/reckless", line 2091, in log.add_result(args.func(target)) File "/home/runner/work/lightning/lightning/tools/reckless", line 1524, in install return _enable_installed(installed, plugin_name) File "/home/runner/work/lightning/lightning/tools/reckless", line 1476, in _enable_installed if enable(installed.name): File "/home/runner/work/lightning/lightning/tools/reckless", line 1647, in enable lightning_cli('plugin', 'start', path) File "/home/runner/work/lightning/lightning/tools/reckless", line 1613, in lightning_cli clncli = run(cmd, stdout=PIPE, stderr=PIPE, check=False, timeout=timeout) File "/opt/hostedtoolcache/Python/3.10.18/x64/lib/python3.10/subprocess.py", line 505, in run stdout, stderr = process.communicate(input, timeout=timeout) File "/opt/hostedtoolcache/Python/3.10.18/x64/lib/python3.10/subprocess.py", line 1154, in communicate stdout, stderr = self._communicate(input, endtime, timeout) File "/opt/hostedtoolcache/Python/3.10.18/x64/lib/python3.10/subprocess.py", line 2022, in _communicate self._check_timeout(endtime, orig_timeout, stdout, stderr) File "/opt/hostedtoolcache/Python/3.10.18/x64/lib/python3.10/subprocess.py", line 1198, in _check_timeout raise TimeoutExpired( subprocess.TimeoutExpired: Command '['/home/runner/work/lightning/lightning/cli/lightning-cli', '--network=regtest', '--lightning-dir=/tmp/ltests-tui1vmrg/test_reckless_uv_install_1/lightning-1', 'plugin', 'start', '/tmp/ltests-tui1vmrg/test_reckless_uv_install_1/lightning-1/reckless/testpluguv/testpluguv.py']' timed out after 15 seconds ``` Signed-off-by: Rusty Russell --- tests/test_reckless.py | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/tests/test_reckless.py b/tests/test_reckless.py index 64efa6ab6418..10ade3624795 100644 --- a/tests/test_reckless.py +++ b/tests/test_reckless.py @@ -2,7 +2,7 @@ import subprocess from pathlib import PosixPath, Path import socket -from pyln.testing.utils import VALGRIND +from pyln.testing.utils import VALGRIND, SLOW_MACHINE import pytest import os import re @@ -351,6 +351,7 @@ def test_tag_install(node_factory): header = line +@unittest.skipIf(VALGRIND and SLOW_MACHINE, "node too slow for starting plugin under valgrind") def test_reckless_uv_install(node_factory): node = get_reckless_node(node_factory) node.start() From ef44e3cb0d95299719936183331ca64d2de53221 Mon Sep 17 00:00:00 2001 From: Rusty Russell Date: Tue, 26 Aug 2025 14:31:04 +0930 Subject: [PATCH 5/6] pytest: fix flake in test_coinmoves_unilateral_htlc_fulfill DER sigs! Normally, the commitment weight is: ``` Anchorspend for local commit tx fee 9751sat (w=722), commit_tx fee 4866sat (w=1284): package feerate 7286 perkw Creating anchor spend for local commit tx 6a0816ca60d499edc70bfb786ebd164fb7a55d234c84d926102f5bd35087fd45: we're paying fee 9751sat ``` But if we're "lucky" the commitment tx is shorter: ``` Anchorspend for local commit tx fee 9744sat (w=722), commit_tx fee 4866sat (w=1283): package feerate 7286 perkw Creating anchor spend for local commit tx acf78532a9448dd62a4e6319a3d2712189a88b6e59abc637260067d60df70782: we're paying fee 9744sat ``` The resulting failure: ``` 2025-08-21T02:30:34.1906751Z > assert moves == expected ... ... 2025-08-21T02:30:34.1965346Z E { 2025-08-21T02:30:34.1965529Z E 'account_id': 'wallet', 2025-08-21T02:30:34.1965767Z E 'blockheight': 104, 2025-08-21T02:30:34.1965997Z E 'created_index': 6, 2025-08-21T02:30:34.1966229Z E - 'credit_msat': 15579000, 2025-08-21T02:30:34.1966467Z E ? ^^ 2025-08-21T02:30:34.1966698Z E + 'credit_msat': 15586000, 2025-08-21T02:30:34.1966927Z E ? ^^ 2025-08-21T02:30:34.1967150Z E 'debit_msat': 0, 2025-08-21T02:30:34.1967376Z E 'extra_tags': [], 2025-08-21T02:30:34.1967599Z E - 'output_msat': 15579000, 2025-08-21T02:30:34.1967832Z E ? ^^ 2025-08-21T02:30:34.1968061Z E + 'output_msat': 15586000, 2025-08-21T02:30:34.1968294Z E ? ^^ 2025-08-21T02:30:34.1968540Z E 'primary_tag': 'deposit', 2025-08-21T02:30:34.1968908Z E 'utxo': 'acf78532a9448dd62a4e6319a3d2712189a88b6e59abc637260067d60df70782:0', 2025-08-21T02:30:34.1969366Z E }, ``` Signed-off-by: Rusty Russell --- tests/test_coinmoves.py | 9 +++++++-- 1 file changed, 7 insertions(+), 2 deletions(-) diff --git a/tests/test_coinmoves.py b/tests/test_coinmoves.py index 0cdefe83fc73..bf5407fd6685 100644 --- a/tests/test_coinmoves.py +++ b/tests/test_coinmoves.py @@ -1221,6 +1221,11 @@ def test_coinmoves_unilateral_htlc_fulfill(node_factory, bitcoind): line = l1.daemon.is_in_log('Tracking output.*/OUR_HTLC') htlc = int(re.search(r'output [0-9a-f]{64}:([0-9]):', line).group(1)) + # commitment tx weight can vary (DER sigs, FML) and so even though the feerate target + # is fixed, the amount of the child tx we create will vary, hence the change varies. + # So it's usually 15579000, but one in 128 it will be 15586000... + anchor_change_msats = bitcoind.rpc.gettxout(anchor_spend_txid, 0)['value'] * 100_000_000_000 + expected_chain1 += [{'account_id': 'wallet', # Anchor spend from fundchannel change 'blockheight': 104, 'credit_msat': 0, @@ -1232,10 +1237,10 @@ def test_coinmoves_unilateral_htlc_fulfill(node_factory, bitcoind): 'utxo': f"{fundchannel['txid']}:{fundchannel['outnum'] ^ 1}"}, {'account_id': 'wallet', # Change from anchor spend 'blockheight': 104, - 'credit_msat': 15579000, + 'credit_msat': anchor_change_msats, 'debit_msat': 0, 'extra_tags': [], - 'output_msat': 15579000, + 'output_msat': anchor_change_msats, 'primary_tag': 'deposit', 'utxo': f"{anchor_spend_txid}:0"}, {'account_id': fundchannel['channel_id'], From 6be09828ef07a0331810cfe7f2786b24c0f8d2db Mon Sep 17 00:00:00 2001 From: Rusty Russell Date: Tue, 26 Aug 2025 16:01:27 +0930 Subject: [PATCH 6/6] pytest: clean up JSON sql test. 1. Establish a channel with l3; we already have one with l2. 2. Don't bother generating 6 more blocks (fundchannel ensures it's mined). 3. Allow htlcs to be empty: Whitslack reported that happens for him 4. Use only_one() to access where we insist there is only one element in the list. 5. Tighten tests to assert the exact contents, not just test some. Signed-off-by: Rusty Russell Fixes: https://github.com/ElementsProject/lightning/issues/8497 --- tests/test_plugin.py | 28 +++++++++++++++++++--------- 1 file changed, 19 insertions(+), 9 deletions(-) diff --git a/tests/test_plugin.py b/tests/test_plugin.py index b01005479322..9f1bed283ff2 100644 --- a/tests/test_plugin.py +++ b/tests/test_plugin.py @@ -4045,19 +4045,29 @@ def test_sql(node_factory, bitcoind): wait_for(lambda: l3.rpc.sql("SELECT * FROM nodes WHERE alias = '{}'".format(alias))['rows'] != []) # Test json functions - l1.fundchannel(l2) - bitcoind.generate_block(6) - l1.rpc.pay(l2.rpc.invoice(amount_msat=1000000, label='inv1000', description='description 1000 msat')['bolt11']) - ret = l1.rpc.sql("SELECT json_object('peer_id', hex(pc.peer_id), 'alias', alias, 'htlcs'," + scidl1l3, _ = l1.fundchannel(l3) + l1.rpc.pay(l3.rpc.invoice(amount_msat=1000000, label='inv1000', description='description 1000 msat')['bolt11']) + + # Two channels, l1->l3 *may* have an HTLC in flight. + ret = l1.rpc.sql("SELECT json_object('peer_id', hex(pc.peer_id), 'alias', alias, 'scid', short_channel_id, 'htlcs'," " (SELECT json_group_array(json_object('id', hex(id), 'amount_msat', amount_msat))" " FROM peerchannels_htlcs ph WHERE ph.row = pc.rowid)) FROM peerchannels pc JOIN nodes n" " ON pc.peer_id = n.nodeid ORDER BY n.alias, pc.peer_id;") assert len(ret['rows']) == 2 - row1 = json.loads(ret['rows'][0][0]) - row2 = json.loads(ret['rows'][1][0]) - assert row1['peer_id'] == format(l2.info['id'].upper()) - assert len(row2['htlcs']) == 1 - assert row2['htlcs'][0]['amount_msat'] == 1000000 + row1 = json.loads(only_one(ret['rows'][0])) + row2 = json.loads(only_one(ret['rows'][1])) + assert row1 in ({"peer_id": format(l3.info['id'].upper()), + "alias": l3.rpc.getinfo()['alias'], + "scid": scidl1l3, + "htlcs": []}, + {"peer_id": format(l3.info['id'].upper()), + "alias": l3.rpc.getinfo()['alias'], + "scid": scidl1l3, + "htlcs": [{"id": "30", "amount_msat": 1000000}]}) + assert row2 == {"peer_id": format(l2.info['id'].upper()), + "alias": l2.rpc.getinfo()['alias'], + "scid": scid, + "htlcs": []} def test_sql_deprecated(node_factory, bitcoind):