-
Notifications
You must be signed in to change notification settings - Fork 800
/
Copy pathipmasq_nftables_linux.go
229 lines (201 loc) · 7.43 KB
/
ipmasq_nftables_linux.go
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
// Copyright 2023 CNI authors
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
package ip
import (
"context"
"fmt"
"net"
"strings"
"sigs.k8s.io/knftables"
"github.com/containernetworking/cni/pkg/types"
"github.com/containernetworking/plugins/pkg/utils"
)
const (
ipMasqTableName = "cni_plugins_masquerade"
ipMasqChainName = "masq_checks"
)
// The nftables ipmasq implementation is mostly like the iptables implementation, with
// minor updates to fix a bug (adding `ifname`) and to allow future GC support.
//
// We add a rule for each mapping, with a comment containing a hash of its identifiers,
// so that we can later reliably delete the rules we want. (This is important because in
// edge cases, it's possible the plugin might see "ADD container A with IP 192.168.1.3",
// followed by "ADD container B with IP 192.168.1.3" followed by "DEL container A with IP
// 192.168.1.3", and we need to make sure that the DEL causes us to delete the rule for
// container A, and not the rule for container B.)
//
// It would be more nftables-y to have a chain with a single rule doing a lookup against a
// set with an element per mapping, rather than having a chain with a rule per mapping.
// But there's no easy, non-racy way to say "delete the element 192.168.1.3 from the set,
// but only if it was added for container A, not if it was added for container B".
// hashForNetwork returns a unique hash for this network
func hashForNetwork(network string) string {
return utils.MustFormatHashWithPrefix(16, "", network)
}
// hashForInstance returns a unique hash identifying the rules for this
// network/ifname/containerID
func hashForInstance(network, ifname, containerID string) string {
return hashForNetwork(network) + "-" + utils.MustFormatHashWithPrefix(16, "", ifname+":"+containerID)
}
// commentForInstance returns a comment string that begins with a unique hash and
// ends with a (possibly-truncated) human-readable description.
func commentForInstance(network, ifname, containerID string) string {
comment := fmt.Sprintf("%s, net: %s, if: %s, id: %s",
hashForInstance(network, ifname, containerID),
strings.ReplaceAll(network, `"`, ``),
strings.ReplaceAll(ifname, `"`, ``),
strings.ReplaceAll(containerID, `"`, ``),
)
if len(comment) > knftables.CommentLengthMax {
comment = comment[:knftables.CommentLengthMax]
}
return comment
}
// setupIPMasqNFTables is the nftables-based implementation of SetupIPMasqForNetwork
func setupIPMasqNFTables(ipn *net.IPNet, network, ifname, containerID string) error {
nft, err := knftables.New(knftables.InetFamily, ipMasqTableName)
if err != nil {
return err
}
return setupIPMasqNFTablesWithInterface(nft, ipn, network, ifname, containerID)
}
func setupIPMasqNFTablesWithInterface(nft knftables.Interface, ipn *net.IPNet, network, ifname, containerID string) error {
staleRules, err := findRules(nft, hashForInstance(network, ifname, containerID))
if err != nil {
return err
}
tx := nft.NewTransaction()
// Ensure that our table and chains exist.
tx.Add(&knftables.Table{
Comment: knftables.PtrTo("Masquerading for plugins from github.com/containernetworking/plugins"),
})
tx.Add(&knftables.Chain{
Name: ipMasqChainName,
Comment: knftables.PtrTo("Masquerade traffic from certain IPs to any (non-multicast) IP outside their subnet"),
})
// Ensure that the postrouting chain exists and has the correct rules. (Has to be
// done after creating ipMasqChainName, so we can jump to it.)
tx.Add(&knftables.Chain{
Name: "postrouting",
Type: knftables.PtrTo(knftables.NATType),
Hook: knftables.PtrTo(knftables.PostroutingHook),
Priority: knftables.PtrTo(knftables.SNATPriority),
})
tx.Flush(&knftables.Chain{
Name: "postrouting",
})
tx.Add(&knftables.Rule{
Chain: "postrouting",
Rule: "ip daddr == 224.0.0.0/4 return",
})
tx.Add(&knftables.Rule{
Chain: "postrouting",
Rule: "ip6 daddr == ff00::/8 return",
})
tx.Add(&knftables.Rule{
Chain: "postrouting",
Rule: knftables.Concat(
"goto", ipMasqChainName,
),
})
// Delete stale rules, add new rules to masquerade chain
for _, rule := range staleRules {
tx.Delete(rule)
}
ip := "ip"
if ipn.IP.To4() == nil {
ip = "ip6"
}
// e.g. if ipn is "192.168.1.4/24", then dstNet is "192.168.1.0/24"
dstNet := &net.IPNet{IP: ipn.IP.Mask(ipn.Mask), Mask: ipn.Mask}
tx.Add(&knftables.Rule{
Chain: ipMasqChainName,
Rule: knftables.Concat(
ip, "saddr", "==", ipn.IP,
ip, "daddr", "!=", dstNet,
"masquerade",
),
Comment: knftables.PtrTo(commentForInstance(network, ifname, containerID)),
})
return nft.Run(context.TODO(), tx)
}
// teardownIPMasqNFTables is the nftables-based implementation of TeardownIPMasqForNetwork
func teardownIPMasqNFTables(ipn *net.IPNet, network, ifname, containerID string) error {
nft, err := knftables.New(knftables.InetFamily, ipMasqTableName)
if err != nil {
return err
}
return teardownIPMasqNFTablesWithInterface(nft, ipn, network, ifname, containerID)
}
func teardownIPMasqNFTablesWithInterface(nft knftables.Interface, _ *net.IPNet, network, ifname, containerID string) error {
rules, err := findRules(nft, hashForInstance(network, ifname, containerID))
if err != nil {
return err
} else if len(rules) == 0 {
return nil
}
tx := nft.NewTransaction()
for _, rule := range rules {
tx.Delete(rule)
}
return nft.Run(context.TODO(), tx)
}
// gcIPMasqNFTables is the nftables-based implementation of GCIPMasqForNetwork
func gcIPMasqNFTables(network string, attachments []types.GCAttachment) error {
nft, err := knftables.New(knftables.InetFamily, ipMasqTableName)
if err != nil {
return err
}
return gcIPMasqNFTablesWithInterface(nft, network, attachments)
}
func gcIPMasqNFTablesWithInterface(nft knftables.Interface, network string, attachments []types.GCAttachment) error {
// Find all rules for the network
rules, err := findRules(nft, hashForNetwork(network))
if err != nil {
return err
} else if len(rules) == 0 {
return nil
}
// Compute the comments for all elements of attachments
validAttachments := map[string]bool{}
for _, attachment := range attachments {
validAttachments[commentForInstance(network, attachment.IfName, attachment.ContainerID)] = true
}
// Delete anything in rules that isn't in validAttachments
tx := nft.NewTransaction()
for _, rule := range rules {
if !validAttachments[*rule.Comment] {
tx.Delete(rule)
}
}
return nft.Run(context.TODO(), tx)
}
// findRules finds rules with comments that start with commentPrefix.
func findRules(nft knftables.Interface, commentPrefix string) ([]*knftables.Rule, error) {
rules, err := nft.ListRules(context.TODO(), ipMasqChainName)
if err != nil {
if knftables.IsNotFound(err) {
// If ipMasqChainName doesn't exist yet, that's fine
return nil, nil
}
return nil, err
}
matchingRules := make([]*knftables.Rule, 0, 1)
for _, rule := range rules {
if rule.Comment != nil && strings.HasPrefix(*rule.Comment, commentPrefix) {
matchingRules = append(matchingRules, rule)
}
}
return matchingRules, nil
}