forked from haproxy/haproxy
-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathquic_rx.c
2238 lines (1952 loc) · 69.1 KB
/
quic_rx.c
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
543
544
545
546
547
548
549
550
551
552
553
554
555
556
557
558
559
560
561
562
563
564
565
566
567
568
569
570
571
572
573
574
575
576
577
578
579
580
581
582
583
584
585
586
587
588
589
590
591
592
593
594
595
596
597
598
599
600
601
602
603
604
605
606
607
608
609
610
611
612
613
614
615
616
617
618
619
620
621
622
623
624
625
626
627
628
629
630
631
632
633
634
635
636
637
638
639
640
641
642
643
644
645
646
647
648
649
650
651
652
653
654
655
656
657
658
659
660
661
662
663
664
665
666
667
668
669
670
671
672
673
674
675
676
677
678
679
680
681
682
683
684
685
686
687
688
689
690
691
692
693
694
695
696
697
698
699
700
701
702
703
704
705
706
707
708
709
710
711
712
713
714
715
716
717
718
719
720
721
722
723
724
725
726
727
728
729
730
731
732
733
734
735
736
737
738
739
740
741
742
743
744
745
746
747
748
749
750
751
752
753
754
755
756
757
758
759
760
761
762
763
764
765
766
767
768
769
770
771
772
773
774
775
776
777
778
779
780
781
782
783
784
785
786
787
788
789
790
791
792
793
794
795
796
797
798
799
800
801
802
803
804
805
806
807
808
809
810
811
812
813
814
815
816
817
818
819
820
821
822
823
824
825
826
827
828
829
830
831
832
833
834
835
836
837
838
839
840
841
842
843
844
845
846
847
848
849
850
851
852
853
854
855
856
857
858
859
860
861
862
863
864
865
866
867
868
869
870
871
872
873
874
875
876
877
878
879
880
881
882
883
884
885
886
887
888
889
890
891
892
893
894
895
896
897
898
899
900
901
902
903
904
905
906
907
908
909
910
911
912
913
914
915
916
917
918
919
920
921
922
923
924
925
926
927
928
929
930
931
932
933
934
935
936
937
938
939
940
941
942
943
944
945
946
947
948
949
950
951
952
953
954
955
956
957
958
959
960
961
962
963
964
965
966
967
968
969
970
971
972
973
974
975
976
977
978
979
980
981
982
983
984
985
986
987
988
989
990
991
992
993
994
995
996
997
998
999
1000
/*
* QUIC protocol implementation. Lower layer with internal features implemented
* here such as QUIC encryption, idle timeout, acknowledgement and
* retransmission.
*
* Copyright 2020 HAProxy Technologies, Frederic Lecaille <flecaille@haproxy.com>
*
* This program is free software; you can redistribute it and/or
* modify it under the terms of the GNU General Public License
* as published by the Free Software Foundation; either version
* 2 of the License, or (at your option) any later version.
*
*/
#include <haproxy/quic_rx.h>
#include <haproxy/h3.h>
#include <haproxy/list.h>
#include <haproxy/ncbuf.h>
#include <haproxy/proto_quic.h>
#include <haproxy/quic_ack.h>
#include <haproxy/quic_cid.h>
#include <haproxy/quic_retransmit.h>
#include <haproxy/quic_retry.h>
#include <haproxy/quic_sock.h>
#include <haproxy/quic_stream.h>
#include <haproxy/quic_ssl.h>
#include <haproxy/quic_tls.h>
#include <haproxy/quic_trace.h>
#include <haproxy/quic_tx.h>
#include <haproxy/ssl_sock.h>
#include <haproxy/trace.h>
DECLARE_POOL(pool_head_quic_conn_rxbuf, "quic_conn_rxbuf", QUIC_CONN_RX_BUFSZ);
DECLARE_POOL(pool_head_quic_dgram, "quic_dgram", sizeof(struct quic_dgram));
DECLARE_POOL(pool_head_quic_rx_packet, "quic_rx_packet", sizeof(struct quic_rx_packet));
/* Decode an expected packet number from <truncated_on> its truncated value,
* depending on <largest_pn> the largest received packet number, and <pn_nbits>
* the number of bits used to encode this packet number (its length in bytes * 8).
* See https://quicwg.org/base-drafts/draft-ietf-quic-transport.html#packet-encoding
*/
static uint64_t decode_packet_number(uint64_t largest_pn,
uint32_t truncated_pn, unsigned int pn_nbits)
{
uint64_t expected_pn = largest_pn + 1;
uint64_t pn_win = (uint64_t)1 << pn_nbits;
uint64_t pn_hwin = pn_win / 2;
uint64_t pn_mask = pn_win - 1;
uint64_t candidate_pn;
candidate_pn = (expected_pn & ~pn_mask) | truncated_pn;
/* Note that <pn_win> > <pn_hwin>. */
if (candidate_pn < QUIC_MAX_PACKET_NUM - pn_win &&
candidate_pn + pn_hwin <= expected_pn)
return candidate_pn + pn_win;
if (candidate_pn > expected_pn + pn_hwin && candidate_pn >= pn_win)
return candidate_pn - pn_win;
return candidate_pn;
}
/* Remove the header protection of <pkt> QUIC packet using <tls_ctx> as QUIC TLS
* cryptographic context.
* <largest_pn> is the largest received packet number and <pn> the address of
* the packet number field for this packet with <byte0> address of its first byte.
* <end> points to one byte past the end of this packet.
* Returns 1 if succeeded, 0 if not.
*/
static int qc_do_rm_hp(struct quic_conn *qc,
struct quic_rx_packet *pkt, struct quic_tls_ctx *tls_ctx,
int64_t largest_pn, unsigned char *pn, unsigned char *byte0)
{
int ret, i, pnlen;
uint64_t packet_number;
uint32_t truncated_pn = 0;
unsigned char mask[5] = {0};
unsigned char *sample;
TRACE_ENTER(QUIC_EV_CONN_RMHP, qc);
ret = 0;
/* Check there is enough data in this packet. */
if (pkt->len - (pn - byte0) < QUIC_PACKET_PN_MAXLEN + sizeof mask) {
TRACE_PROTO("too short packet", QUIC_EV_CONN_RMHP, qc, pkt);
goto leave;
}
sample = pn + QUIC_PACKET_PN_MAXLEN;
if (!quic_tls_aes_decrypt(mask, sample, sizeof mask, tls_ctx->rx.hp_ctx)) {
TRACE_ERROR("HP removing failed", QUIC_EV_CONN_RMHP, qc, pkt);
goto leave;
}
*byte0 ^= mask[0] & (*byte0 & QUIC_PACKET_LONG_HEADER_BIT ? 0xf : 0x1f);
pnlen = (*byte0 & QUIC_PACKET_PNL_BITMASK) + 1;
for (i = 0; i < pnlen; i++) {
pn[i] ^= mask[i + 1];
truncated_pn = (truncated_pn << 8) | pn[i];
}
packet_number = decode_packet_number(largest_pn, truncated_pn, pnlen * 8);
/* Store remaining information for this unprotected header */
pkt->pn = packet_number;
pkt->pnl = pnlen;
ret = 1;
leave:
TRACE_LEAVE(QUIC_EV_CONN_RMHP, qc);
return ret;
}
/* Decrypt <pkt> packet using encryption level <qel> for <qc> connection.
* Decryption is done in place in packet buffer.
*
* Returns 1 on success else 0.
*/
static int qc_pkt_decrypt(struct quic_conn *qc, struct quic_enc_level *qel,
struct quic_rx_packet *pkt)
{
int ret, kp_changed;
unsigned char iv[QUIC_TLS_IV_LEN];
struct quic_tls_ctx *tls_ctx =
qc_select_tls_ctx(qc, qel, pkt->type, pkt->version);
EVP_CIPHER_CTX *rx_ctx = tls_ctx->rx.ctx;
unsigned char *rx_iv = tls_ctx->rx.iv;
size_t rx_iv_sz = tls_ctx->rx.ivlen;
unsigned char *rx_key = tls_ctx->rx.key;
TRACE_ENTER(QUIC_EV_CONN_RXPKT, qc);
ret = 0;
kp_changed = 0;
if (pkt->type == QUIC_PACKET_TYPE_SHORT) {
/* The two tested bits are not at the same position,
* this is why they are first both inversed.
*/
if (!(*pkt->data & QUIC_PACKET_KEY_PHASE_BIT) ^ !(tls_ctx->flags & QUIC_FL_TLS_KP_BIT_SET)) {
if (pkt->pn < tls_ctx->rx.pn) {
/* The lowest packet number of a previous key phase
* cannot be null if it really stores previous key phase
* secrets.
*/
// TODO: check if BUG_ON() more suitable
if (!qc->ku.prv_rx.pn) {
TRACE_ERROR("null previous packet number", QUIC_EV_CONN_RXPKT, qc);
goto leave;
}
rx_ctx = qc->ku.prv_rx.ctx;
rx_iv = qc->ku.prv_rx.iv;
rx_key = qc->ku.prv_rx.key;
}
else if (pkt->pn > qel->pktns->rx.largest_pn) {
/* Next key phase */
TRACE_PROTO("Key phase changed", QUIC_EV_CONN_RXPKT, qc);
kp_changed = 1;
rx_ctx = qc->ku.nxt_rx.ctx;
rx_iv = qc->ku.nxt_rx.iv;
rx_key = qc->ku.nxt_rx.key;
}
}
}
quic_aead_iv_build(iv, sizeof iv, rx_iv, rx_iv_sz, pkt->pn);
ret = quic_tls_decrypt(pkt->data + pkt->aad_len, pkt->len - pkt->aad_len,
pkt->data, pkt->aad_len,
rx_ctx, tls_ctx->rx.aead, rx_key, iv);
if (!ret) {
TRACE_ERROR("quic_tls_decrypt() failed", QUIC_EV_CONN_RXPKT, qc);
goto leave;
}
/* Update the keys only if the packet decryption succeeded. */
if (kp_changed) {
quic_tls_rotate_keys(qc);
/* Toggle the Key Phase bit */
tls_ctx->flags ^= QUIC_FL_TLS_KP_BIT_SET;
/* Store the lowest packet number received for the current key phase */
tls_ctx->rx.pn = pkt->pn;
/* Prepare the next key update */
if (!quic_tls_key_update(qc)) {
TRACE_ERROR("quic_tls_key_update() failed", QUIC_EV_CONN_RXPKT, qc);
goto leave;
}
}
/* Update the packet length (required to parse the frames). */
pkt->len -= QUIC_TLS_TAG_LEN;
ret = 1;
leave:
TRACE_LEAVE(QUIC_EV_CONN_RXPKT, qc);
return ret;
}
/* Remove from <stream> the acknowledged frames.
*
* Returns 1 if at least one frame was removed else 0.
*/
static int quic_stream_try_to_consume(struct quic_conn *qc,
struct qc_stream_desc *stream)
{
int ret;
struct eb64_node *frm_node;
TRACE_ENTER(QUIC_EV_CONN_ACKSTRM, qc);
ret = 0;
frm_node = eb64_first(&stream->acked_frms);
while (frm_node) {
struct qf_stream *strm_frm;
struct quic_frame *frm;
size_t offset, len;
strm_frm = eb64_entry(frm_node, struct qf_stream, offset);
offset = strm_frm->offset.key;
len = strm_frm->len;
if (offset > stream->ack_offset)
break;
if (qc_stream_desc_ack(&stream, offset, len)) {
/* cf. next comment : frame may be freed at this stage. */
TRACE_DEVEL("stream consumed", QUIC_EV_CONN_ACKSTRM,
qc, stream ? strm_frm : NULL, stream);
ret = 1;
}
/* If stream is NULL after qc_stream_desc_ack(), it means frame
* has been freed. with the stream frames tree. Nothing to do
* anymore in here.
*/
if (!stream) {
qc_check_close_on_released_mux(qc);
ret = 1;
goto leave;
}
frm_node = eb64_next(frm_node);
eb64_delete(&strm_frm->offset);
frm = container_of(strm_frm, struct quic_frame, stream);
qc_release_frm(qc, frm);
}
leave:
TRACE_LEAVE(QUIC_EV_CONN_ACKSTRM, qc);
return ret;
}
/* Handle <frm> frame whose packet it is attached to has just been acknowledged. The memory allocated
* for this frame will be at least released in every cases.
* Never fail.
*/
static void qc_handle_newly_acked_frm(struct quic_conn *qc, struct quic_frame *frm)
{
TRACE_ENTER(QUIC_EV_CONN_PRSAFRM, qc);
TRACE_PROTO("RX ack TX frm", QUIC_EV_CONN_PRSAFRM, qc, frm);
switch (frm->type) {
case QUIC_FT_STREAM_8 ... QUIC_FT_STREAM_F:
{
struct qf_stream *strm_frm = &frm->stream;
struct eb64_node *node = NULL;
struct qc_stream_desc *stream = NULL;
const size_t offset = strm_frm->offset.key;
const size_t len = strm_frm->len;
/* do not use strm_frm->stream as the qc_stream_desc instance
* might be freed at this stage. Use the id to do a proper
* lookup.
*
* TODO if lookup operation impact on the perf is noticeable,
* implement a refcount on qc_stream_desc instances.
*/
node = eb64_lookup(&qc->streams_by_id, strm_frm->id);
if (!node) {
TRACE_DEVEL("acked stream for released stream", QUIC_EV_CONN_ACKSTRM, qc, strm_frm);
qc_release_frm(qc, frm);
/* early return */
goto leave;
}
stream = eb64_entry(node, struct qc_stream_desc, by_id);
TRACE_DEVEL("acked stream", QUIC_EV_CONN_ACKSTRM, qc, strm_frm, stream);
if (offset <= stream->ack_offset) {
if (qc_stream_desc_ack(&stream, offset, len)) {
TRACE_DEVEL("stream consumed", QUIC_EV_CONN_ACKSTRM,
qc, strm_frm, stream);
}
if (!stream) {
/* no need to continue if stream freed. */
TRACE_DEVEL("stream released and freed", QUIC_EV_CONN_ACKSTRM, qc);
qc_release_frm(qc, frm);
qc_check_close_on_released_mux(qc);
break;
}
TRACE_DEVEL("stream consumed", QUIC_EV_CONN_ACKSTRM,
qc, strm_frm, stream);
qc_release_frm(qc, frm);
}
else {
eb64_insert(&stream->acked_frms, &strm_frm->offset);
}
quic_stream_try_to_consume(qc, stream);
}
break;
default:
qc_release_frm(qc, frm);
}
leave:
TRACE_LEAVE(QUIC_EV_CONN_PRSAFRM, qc);
}
/* Collect newly acknowledged TX packets from <pkts> ebtree into <newly_acked_pkts>
* list depending on <largest> and <smallest> packet number of a range of acknowledged
* packets announced in an ACK frame. <largest_node> may be provided to start
* looking from this packet node.
*/
static void qc_newly_acked_pkts(struct quic_conn *qc, struct eb_root *pkts,
struct list *newly_acked_pkts,
struct eb64_node *largest_node,
uint64_t largest, uint64_t smallest)
{
struct eb64_node *node;
struct quic_tx_packet *pkt;
TRACE_ENTER(QUIC_EV_CONN_PRSAFRM, qc);
node = eb64_lookup_ge(pkts, smallest);
if (!node)
goto leave;
largest_node = largest_node ? largest_node : eb64_lookup_le(pkts, largest);
if (!largest_node)
goto leave;
while (node && node->key <= largest_node->key) {
pkt = eb64_entry(node, struct quic_tx_packet, pn_node);
LIST_APPEND(newly_acked_pkts, &pkt->list);
node = eb64_next(node);
eb64_delete(&pkt->pn_node);
}
leave:
TRACE_LEAVE(QUIC_EV_CONN_PRSAFRM, qc);
}
/* Handle <newly_acked_pkts> list of newly acknowledged TX packets */
static void qc_handle_newly_acked_pkts(struct quic_conn *qc,
unsigned int *pkt_flags, struct list *newly_acked_pkts)
{
struct quic_tx_packet *pkt, *tmp;
TRACE_ENTER(QUIC_EV_CONN_PRSAFRM, qc);
list_for_each_entry_safe(pkt, tmp, newly_acked_pkts, list) {
struct quic_frame *frm, *frmbak;
*pkt_flags |= pkt->flags;
TRACE_DEVEL("Removing packet #", QUIC_EV_CONN_PRSAFRM, qc, NULL, &pkt->pn_node.key);
list_for_each_entry_safe(frm, frmbak, &pkt->frms, list)
qc_handle_newly_acked_frm(qc, frm);
/* If there are others packet in the same datagram <pkt> is attached to,
* detach the previous one and the next one from <pkt>.
*/
quic_tx_packet_dgram_detach(pkt);
eb64_delete(&pkt->pn_node);
}
leave:
TRACE_LEAVE(QUIC_EV_CONN_PRSAFRM, qc);
}
/* Handle all frames sent from <pkt> packet and reinsert them in the same order
* they have been sent into <pktns_frm_list>. The loss counter of each frame is
* incremented and checked if it does not exceed retransmission limit.
*
* Returns 1 on success, 0 if a frame loss limit is exceeded. A
* CONNECTION_CLOSE is scheduled in this case.
*/
int qc_handle_frms_of_lost_pkt(struct quic_conn *qc,
struct quic_tx_packet *pkt,
struct list *pktns_frm_list)
{
struct quic_frame *frm, *frmbak;
struct list *pkt_frm_list = &pkt->frms;
uint64_t pn = pkt->pn_node.key;
int close = 0;
TRACE_ENTER(QUIC_EV_CONN_PRSAFRM, qc);
list_for_each_entry_safe(frm, frmbak, pkt_frm_list, list) {
/* First remove this frame from the packet it was attached to */
LIST_DEL_INIT(&frm->list);
quic_tx_packet_refdec(pkt);
/* At this time, this frame is not freed but removed from its packet */
frm->pkt = NULL;
/* Remove any reference to this frame */
qc_frm_unref(frm, qc);
switch (frm->type) {
case QUIC_FT_STREAM_8 ... QUIC_FT_STREAM_F:
{
struct qf_stream *strm_frm = &frm->stream;
struct eb64_node *node = NULL;
struct qc_stream_desc *stream_desc;
node = eb64_lookup(&qc->streams_by_id, strm_frm->id);
if (!node) {
TRACE_DEVEL("released stream", QUIC_EV_CONN_PRSAFRM, qc, frm);
TRACE_DEVEL("freeing frame from packet", QUIC_EV_CONN_PRSAFRM,
qc, frm, &pn);
qc_frm_free(qc, &frm);
continue;
}
stream_desc = eb64_entry(node, struct qc_stream_desc, by_id);
/* Do not resend this frame if in the "already acked range" */
if (strm_frm->offset.key + strm_frm->len <= stream_desc->ack_offset) {
TRACE_DEVEL("ignored frame in already acked range",
QUIC_EV_CONN_PRSAFRM, qc, frm);
qc_frm_free(qc, &frm);
continue;
}
else if (strm_frm->offset.key < stream_desc->ack_offset) {
uint64_t diff = stream_desc->ack_offset - strm_frm->offset.key;
qc_stream_frm_mv_fwd(frm, diff);
TRACE_DEVEL("updated partially acked frame",
QUIC_EV_CONN_PRSAFRM, qc, frm);
}
break;
}
default:
break;
}
/* Do not resend probing packet with old data */
if (pkt->flags & QUIC_FL_TX_PACKET_PROBE_WITH_OLD_DATA) {
TRACE_DEVEL("ignored frame with old data from packet", QUIC_EV_CONN_PRSAFRM,
qc, frm, &pn);
if (frm->origin)
LIST_DEL_INIT(&frm->ref);
qc_frm_free(qc, &frm);
continue;
}
if (frm->flags & QUIC_FL_TX_FRAME_ACKED) {
TRACE_DEVEL("already acked frame", QUIC_EV_CONN_PRSAFRM, qc, frm);
TRACE_DEVEL("freeing frame from packet", QUIC_EV_CONN_PRSAFRM,
qc, frm, &pn);
qc_frm_free(qc, &frm);
}
else {
if (++frm->loss_count >= global.tune.quic_max_frame_loss) {
TRACE_ERROR("retransmission limit reached, closing the connection", QUIC_EV_CONN_PRSAFRM, qc);
quic_set_connection_close(qc, quic_err_transport(QC_ERR_INTERNAL_ERROR));
qc_notify_err(qc);
close = 1;
}
LIST_APPEND(pktns_frm_list, &frm->list);
TRACE_DEVEL("frame requeued", QUIC_EV_CONN_PRSAFRM, qc, frm);
}
}
end:
TRACE_LEAVE(QUIC_EV_CONN_PRSAFRM, qc);
return !close;
}
/* Send a packet ack event nofication for each newly acked packet of
* <newly_acked_pkts> list and free them.
* Always succeeds.
*/
static void qc_notify_cc_of_newly_acked_pkts(struct quic_conn *qc,
struct list *newly_acked_pkts)
{
struct quic_tx_packet *pkt, *tmp;
struct quic_cc_event ev = { .type = QUIC_CC_EVT_ACK, };
TRACE_ENTER(QUIC_EV_CONN_PRSAFRM, qc);
list_for_each_entry_safe(pkt, tmp, newly_acked_pkts, list) {
pkt->pktns->tx.in_flight -= pkt->in_flight_len;
qc->path->prep_in_flight -= pkt->in_flight_len;
qc->path->in_flight -= pkt->in_flight_len;
if (pkt->flags & QUIC_FL_TX_PACKET_ACK_ELICITING)
qc->path->ifae_pkts--;
/* If this packet contained an ACK frame, proceed to the
* acknowledging of range of acks from the largest acknowledged
* packet number which was sent in an ACK frame by this packet.
*/
if (pkt->largest_acked_pn != -1)
qc_treat_ack_of_ack(qc, &pkt->pktns->rx.arngs, pkt->largest_acked_pn);
ev.ack.acked = pkt->in_flight_len;
ev.ack.time_sent = pkt->time_sent;
ev.ack.pn = pkt->pn_node.key;
quic_cc_event(&qc->path->cc, &ev);
LIST_DEL_INIT(&pkt->list);
quic_tx_packet_refdec(pkt);
}
TRACE_LEAVE(QUIC_EV_CONN_PRSAFRM, qc);
}
/* Parse ACK frame into <frm> from a buffer at <buf> address with <end> being at
* one byte past the end of this buffer. Also update <rtt_sample> if needed, i.e.
* if the largest acked packet was newly acked and if there was at least one newly
* acked ack-eliciting packet.
* Return 1, if succeeded, 0 if not.
*/
static int qc_parse_ack_frm(struct quic_conn *qc,
struct quic_frame *frm,
struct quic_enc_level *qel,
unsigned int *rtt_sample,
const unsigned char **pos, const unsigned char *end)
{
struct qf_ack *ack_frm = &frm->ack;
uint64_t smallest, largest;
struct eb_root *pkts;
struct eb64_node *largest_node;
unsigned int time_sent, pkt_flags;
struct list newly_acked_pkts = LIST_HEAD_INIT(newly_acked_pkts);
struct list lost_pkts = LIST_HEAD_INIT(lost_pkts);
int ret = 0, new_largest_acked_pn = 0;
struct quic_tx_packet *pkt, *tmp;
TRACE_ENTER(QUIC_EV_CONN_PRSAFRM, qc);
pkts = &qel->pktns->tx.pkts;
if (ack_frm->largest_ack > qel->pktns->tx.next_pn) {
TRACE_DEVEL("ACK for not sent packet", QUIC_EV_CONN_PRSAFRM,
qc, NULL, &ack_frm->largest_ack);
goto err;
}
if (ack_frm->first_ack_range > ack_frm->largest_ack) {
TRACE_DEVEL("too big first ACK range", QUIC_EV_CONN_PRSAFRM,
qc, NULL, &ack_frm->first_ack_range);
goto err;
}
largest = ack_frm->largest_ack;
smallest = largest - ack_frm->first_ack_range;
pkt_flags = 0;
largest_node = NULL;
time_sent = 0;
if ((int64_t)ack_frm->largest_ack > qel->pktns->rx.largest_acked_pn) {
largest_node = eb64_lookup(pkts, largest);
if (!largest_node) {
TRACE_DEVEL("Largest acked packet not found",
QUIC_EV_CONN_PRSAFRM, qc);
}
else {
time_sent = eb64_entry(largest_node,
struct quic_tx_packet, pn_node)->time_sent;
new_largest_acked_pn = 1;
}
}
TRACE_PROTO("RX ack range", QUIC_EV_CONN_PRSAFRM,
qc, NULL, &largest, &smallest);
do {
uint64_t gap, ack_range;
qc_newly_acked_pkts(qc, pkts, &newly_acked_pkts,
largest_node, largest, smallest);
if (!ack_frm->ack_range_num--)
break;
if (!quic_dec_int(&gap, pos, end)) {
TRACE_ERROR("quic_dec_int(gap) failed", QUIC_EV_CONN_PRSAFRM, qc);
goto err;
}
if (smallest < gap + 2) {
TRACE_DEVEL("wrong gap value", QUIC_EV_CONN_PRSAFRM,
qc, NULL, &gap, &smallest);
goto err;
}
largest = smallest - gap - 2;
if (!quic_dec_int(&ack_range, pos, end)) {
TRACE_ERROR("quic_dec_int(ack_range) failed", QUIC_EV_CONN_PRSAFRM, qc);
goto err;
}
if (largest < ack_range) {
TRACE_DEVEL("wrong ack range value", QUIC_EV_CONN_PRSAFRM,
qc, NULL, &largest, &ack_range);
goto err;
}
/* Do not use this node anymore. */
largest_node = NULL;
/* Next range */
smallest = largest - ack_range;
TRACE_PROTO("RX next ack range", QUIC_EV_CONN_PRSAFRM,
qc, NULL, &largest, &smallest);
} while (1);
if (!LIST_ISEMPTY(&newly_acked_pkts)) {
qc_handle_newly_acked_pkts(qc, &pkt_flags, &newly_acked_pkts);
if (new_largest_acked_pn && (pkt_flags & QUIC_FL_TX_PACKET_ACK_ELICITING)) {
*rtt_sample = tick_remain(time_sent, now_ms);
qel->pktns->rx.largest_acked_pn = ack_frm->largest_ack;
}
if (!eb_is_empty(&qel->pktns->tx.pkts)) {
qc_packet_loss_lookup(qel->pktns, qc, &lost_pkts);
if (!qc_release_lost_pkts(qc, qel->pktns, &lost_pkts, now_ms))
goto leave;
}
qc_notify_cc_of_newly_acked_pkts(qc, &newly_acked_pkts);
if (quic_peer_validated_addr(qc))
qc->path->loss.pto_count = 0;
qc_set_timer(qc);
qc_notify_send(qc);
}
ret = 1;
leave:
TRACE_LEAVE(QUIC_EV_CONN_PRSAFRM, qc);
return ret;
err:
/* Move back these packets into their tree. */
list_for_each_entry_safe(pkt, tmp, &newly_acked_pkts, list) {
LIST_DEL_INIT(&pkt->list);
eb64_insert(pkts, &pkt->pn_node);
}
goto leave;
}
/* Parse a STREAM frame <strm_frm> received in <pkt> packet for <qc>
* connection. <fin> is true if FIN bit is set on frame type.
*
* Return 1 on success. On error, 0 is returned. In this case, the packet
* containing the frame must not be acknowledged.
*/
static int qc_handle_strm_frm(struct quic_rx_packet *pkt,
struct qf_stream *strm_frm,
struct quic_conn *qc, char fin)
{
int ret;
/* RFC9000 13.1. Packet Processing
*
* A packet MUST NOT be acknowledged until packet protection has been
* successfully removed and all frames contained in the packet have
* been processed. For STREAM frames, this means the data has been
* enqueued in preparation to be received by the application protocol,
* but it does not require that data be delivered and consumed.
*/
TRACE_ENTER(QUIC_EV_CONN_PRSFRM, qc);
ret = qcc_recv(qc->qcc, strm_frm->id, strm_frm->len,
strm_frm->offset.key, fin, (char *)strm_frm->data);
/* frame rejected - packet must not be acknowledeged */
TRACE_LEAVE(QUIC_EV_CONN_PRSFRM, qc);
return !ret;
}
/* Parse <frm> CRYPTO frame coming with <pkt> packet at <qel> <qc> connectionn.
* Returns 1 if succeeded, 0 if not. Also set <*fast_retrans> to 1 if the
* speed up handshake completion may be run after having received duplicated
* CRYPTO data.
*/
static int qc_handle_crypto_frm(struct quic_conn *qc,
struct qf_crypto *crypto_frm, struct quic_rx_packet *pkt,
struct quic_enc_level *qel, int *fast_retrans)
{
int ret = 0;
enum ncb_ret ncb_ret;
/* XXX TO DO: <cfdebug> is used only for the traces. */
struct quic_rx_crypto_frm cfdebug = {
.offset_node.key = crypto_frm->offset,
.len = crypto_frm->len,
};
struct quic_cstream *cstream = qel->cstream;
struct ncbuf *ncbuf = &qel->cstream->rx.ncbuf;
TRACE_ENTER(QUIC_EV_CONN_PRSHPKT, qc);
if (unlikely(crypto_frm->offset < cstream->rx.offset)) {
size_t diff;
if (crypto_frm->offset + crypto_frm->len <= cstream->rx.offset) {
/* Nothing to do */
TRACE_PROTO("Already received CRYPTO data",
QUIC_EV_CONN_RXPKT, qc, pkt, &cfdebug);
if (qc_is_listener(qc) && qel == qc->iel &&
!(qc->flags & QUIC_FL_CONN_HANDSHAKE_SPEED_UP))
*fast_retrans = 1;
goto done;
}
TRACE_PROTO("Partially already received CRYPTO data",
QUIC_EV_CONN_RXPKT, qc, pkt, &cfdebug);
diff = cstream->rx.offset - crypto_frm->offset;
crypto_frm->len -= diff;
crypto_frm->data += diff;
crypto_frm->offset = cstream->rx.offset;
}
if (!quic_get_ncbuf(ncbuf) || ncb_is_null(ncbuf)) {
TRACE_ERROR("CRYPTO ncbuf allocation failed", QUIC_EV_CONN_PRSHPKT, qc);
goto leave;
}
/* crypto_frm->offset > cstream-trx.offset */
ncb_ret = ncb_add(ncbuf, crypto_frm->offset - cstream->rx.offset,
(const char *)crypto_frm->data, crypto_frm->len, NCB_ADD_COMPARE);
if (ncb_ret != NCB_RET_OK) {
if (ncb_ret == NCB_RET_DATA_REJ) {
TRACE_ERROR("overlapping data rejected", QUIC_EV_CONN_PRSHPKT, qc);
quic_set_connection_close(qc, quic_err_transport(QC_ERR_PROTOCOL_VIOLATION));
qc_notify_err(qc);
}
else if (ncb_ret == NCB_RET_GAP_SIZE) {
TRACE_ERROR("cannot bufferize frame due to gap size limit",
QUIC_EV_CONN_PRSHPKT, qc);
}
goto leave;
}
/* Reschedule with TASK_HEAVY if CRYPTO data ready for decoding. */
if (ncb_data(ncbuf, 0)) {
HA_ATOMIC_OR(&qc->wait_event.tasklet->state, TASK_HEAVY);
tasklet_wakeup(qc->wait_event.tasklet);
}
done:
ret = 1;
leave:
TRACE_LEAVE(QUIC_EV_CONN_PRSHPKT, qc);
return ret;
}
/* Handle RETIRE_CONNECTION_ID frame from <frm> frame.
* Return 1 if succeeded, 0 if not. If succeeded, also set <to_retire>
* to the CID to be retired if not already retired.
*/
static int qc_handle_retire_connection_id_frm(struct quic_conn *qc,
struct quic_frame *frm,
struct quic_cid *dcid,
struct quic_connection_id **to_retire)
{
int ret = 0;
struct qf_retire_connection_id *rcid_frm = &frm->retire_connection_id;
struct eb64_node *node;
struct quic_connection_id *conn_id;
TRACE_ENTER(QUIC_EV_CONN_PRSHPKT, qc);
/* RFC 9000 19.16. RETIRE_CONNECTION_ID Frames:
* Receipt of a RETIRE_CONNECTION_ID frame containing a sequence number greater
* than any previously sent to the peer MUST be treated as a connection error
* of type PROTOCOL_VIOLATION.
*/
if (rcid_frm->seq_num >= qc->next_cid_seq_num) {
TRACE_PROTO("CID seq. number too big", QUIC_EV_CONN_PSTRM, qc, frm);
goto protocol_violation;
}
/* RFC 9000 19.16. RETIRE_CONNECTION_ID Frames:
* The sequence number specified in a RETIRE_CONNECTION_ID frame MUST NOT refer to
* the Destination Connection ID field of the packet in which the frame is contained.
* The peer MAY treat this as a connection error of type PROTOCOL_VIOLATION.
*/
node = eb64_lookup(qc->cids, rcid_frm->seq_num);
if (!node) {
TRACE_PROTO("CID already retired", QUIC_EV_CONN_PSTRM, qc, frm);
goto out;
}
conn_id = eb64_entry(node, struct quic_connection_id, seq_num);
/* Note that the length of <dcid> has already been checked. It must match the
* length of the CIDs which have been provided to the peer.
*/
if (!memcmp(dcid->data, conn_id->cid.data, QUIC_HAP_CID_LEN)) {
TRACE_PROTO("cannot retire the current CID", QUIC_EV_CONN_PSTRM, qc, frm);
goto protocol_violation;
}
*to_retire = conn_id;
out:
ret = 1;
leave:
TRACE_LEAVE(QUIC_EV_CONN_PRSHPKT, qc);
return ret;
protocol_violation:
quic_set_connection_close(qc, quic_err_transport(QC_ERR_PROTOCOL_VIOLATION));
qc_notify_err(qc);
goto leave;
}
/* Returns the <ack_delay> field value in milliseconds from <ack_frm> ACK frame for
* <conn> QUIC connection. Note that the value of <ack_delay> coming from
* ACK frame is in microseconds.
*/
static inline unsigned int quic_ack_delay_ms(struct qf_ack *ack_frm,
struct quic_conn *conn)
{
return (ack_frm->ack_delay << conn->tx.params.ack_delay_exponent) / 1000;
}
/* Parse all the frames of <pkt> QUIC packet for QUIC connection <qc> and <qel>
* as encryption level.
* Returns 1 if succeeded, 0 if failed.
*/
static int qc_parse_pkt_frms(struct quic_conn *qc, struct quic_rx_packet *pkt,
struct quic_enc_level *qel)
{
struct quic_frame frm;
const unsigned char *pos, *end;
int fast_retrans = 0, ret = 0;
TRACE_ENTER(QUIC_EV_CONN_PRSHPKT, qc);
/* Skip the AAD */
pos = pkt->data + pkt->aad_len;
end = pkt->data + pkt->len;
/* Packet with no frame. */
if (pos == end) {
/* RFC9000 12.4. Frames and Frame Types
*
* The payload of a packet that contains frames MUST contain at least
* one frame, and MAY contain multiple frames and multiple frame types.
* An endpoint MUST treat receipt of a packet containing no frames as a
* connection error of type PROTOCOL_VIOLATION. Frames always fit within
* a single QUIC packet and cannot span multiple packets.
*/
quic_set_connection_close(qc, quic_err_transport(QC_ERR_PROTOCOL_VIOLATION));
goto leave;
}
while (pos < end) {
if (!qc_parse_frm(&frm, pkt, &pos, end, qc)) {
// trace already emitted by function above
goto leave;
}
switch (frm.type) {
case QUIC_FT_PADDING:
break;
case QUIC_FT_PING:
break;
case QUIC_FT_ACK:
case QUIC_FT_ACK_ECN:
{
unsigned int rtt_sample;
rtt_sample = UINT_MAX;
if (!qc_parse_ack_frm(qc, &frm, qel, &rtt_sample, &pos, end)) {
// trace already emitted by function above
goto leave;
}
if (rtt_sample != UINT_MAX) {
unsigned int ack_delay;
ack_delay = !quic_application_pktns(qel->pktns, qc) ? 0 :
qc->state >= QUIC_HS_ST_CONFIRMED ?
MS_TO_TICKS(QUIC_MIN(quic_ack_delay_ms(&frm.ack, qc), qc->max_ack_delay)) :
MS_TO_TICKS(quic_ack_delay_ms(&frm.ack, qc));
quic_loss_srtt_update(&qc->path->loss, rtt_sample, ack_delay, qc);
}
break;
}
case QUIC_FT_RESET_STREAM:
if (qc->mux_state == QC_MUX_READY) {
struct qf_reset_stream *rs_frm = &frm.reset_stream;
qcc_recv_reset_stream(qc->qcc, rs_frm->id, rs_frm->app_error_code, rs_frm->final_size);
}
break;
case QUIC_FT_STOP_SENDING:
{
struct qf_stop_sending *ss_frm = &frm.stop_sending;
if (qc->mux_state == QC_MUX_READY) {
if (qcc_recv_stop_sending(qc->qcc, ss_frm->id,
ss_frm->app_error_code)) {
TRACE_ERROR("qcc_recv_stop_sending() failed", QUIC_EV_CONN_PRSHPKT, qc);
goto leave;
}
}
break;
}
case QUIC_FT_CRYPTO:
if (!qc_handle_crypto_frm(qc, &frm.crypto, pkt, qel, &fast_retrans))
goto leave;
break;
case QUIC_FT_NEW_TOKEN:
/* TODO */
break;
case QUIC_FT_STREAM_8 ... QUIC_FT_STREAM_F:
{
struct qf_stream *strm_frm = &frm.stream;
unsigned nb_streams = qc->rx.strms[qcs_id_type(strm_frm->id)].nb_streams;
const char fin = frm.type & QUIC_STREAM_FRAME_TYPE_FIN_BIT;
/* The upper layer may not be allocated. */
if (qc->mux_state != QC_MUX_READY) {
if ((strm_frm->id >> QCS_ID_TYPE_SHIFT) < nb_streams) {
TRACE_DATA("Already closed stream", QUIC_EV_CONN_PRSHPKT, qc);
}
else {
TRACE_DEVEL("No mux for new stream", QUIC_EV_CONN_PRSHPKT, qc);
if (qc->app_ops == &h3_ops) {
if (!qc_h3_request_reject(qc, strm_frm->id)) {
TRACE_ERROR("error on request rejection", QUIC_EV_CONN_PRSHPKT, qc);
/* This packet will not be acknowledged */
goto leave;
}
}
else {
/* This packet will not be acknowledged */
goto leave;
}
}
break;
}
if (!qc_handle_strm_frm(pkt, strm_frm, qc, fin)) {
TRACE_ERROR("qc_handle_strm_frm() failed", QUIC_EV_CONN_PRSHPKT, qc);
goto leave;
}
break;
}
case QUIC_FT_MAX_DATA:
if (qc->mux_state == QC_MUX_READY) {
struct qf_max_data *md_frm = &frm.max_data;
qcc_recv_max_data(qc->qcc, md_frm->max_data);
}
break;
case QUIC_FT_MAX_STREAM_DATA:
if (qc->mux_state == QC_MUX_READY) {
struct qf_max_stream_data *msd_frm = &frm.max_stream_data;
if (qcc_recv_max_stream_data(qc->qcc, msd_frm->id,
msd_frm->max_stream_data)) {
TRACE_ERROR("qcc_recv_max_stream_data() failed", QUIC_EV_CONN_PRSHPKT, qc);
goto leave;
}
}
break;
case QUIC_FT_MAX_STREAMS_BIDI:
case QUIC_FT_MAX_STREAMS_UNI:
break;
case QUIC_FT_DATA_BLOCKED:
qc->cntrs.data_blocked++;
break;
case QUIC_FT_STREAM_DATA_BLOCKED:
qc->cntrs.stream_data_blocked++;
break;
case QUIC_FT_STREAMS_BLOCKED_BIDI:
qc->cntrs.streams_blocked_bidi++;
break;
case QUIC_FT_STREAMS_BLOCKED_UNI:
qc->cntrs.streams_blocked_uni++;
break;
case QUIC_FT_NEW_CONNECTION_ID:
/* XXX TO DO XXX */
break;
case QUIC_FT_RETIRE_CONNECTION_ID:
{
struct quic_cid_tree *tree __maybe_unused;
struct quic_connection_id *conn_id = NULL;
if (!qc_handle_retire_connection_id_frm(qc, &frm, &pkt->dcid, &conn_id))
goto leave;
if (!conn_id)
break;
tree = &quic_cid_trees[quic_cid_tree_idx(&conn_id->cid)];
HA_RWLOCK_WRLOCK(QC_CID_LOCK, &tree->lock);
ebmb_delete(&conn_id->node);
HA_RWLOCK_WRUNLOCK(QC_CID_LOCK, &tree->lock);
eb64_delete(&conn_id->seq_num);
pool_free(pool_head_quic_connection_id, conn_id);
TRACE_PROTO("CID retired", QUIC_EV_CONN_PSTRM, qc);