1 /* Event cache for netfilter. */
2
3 /*
4 * (C) 2005 Harald Welte <laforge@gnumonks.org>
5 * (C) 2005 Patrick McHardy <kaber@trash.net>
6 * (C) 2005-2006 Netfilter Core Team <coreteam@netfilter.org>
7 * (C) 2005 USAGI/WIDE Project <http://www.linux-ipv6.org>
8 *
9 * This program is free software; you can redistribute it and/or modify
10 * it under the terms of the GNU General Public License version 2 as
11 * published by the Free Software Foundation.
12 */
13
14 #define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
15
16 #include <linux/types.h>
17 #include <linux/netfilter.h>
18 #include <linux/skbuff.h>
19 #include <linux/vmalloc.h>
20 #include <linux/stddef.h>
21 #include <linux/err.h>
22 #include <linux/percpu.h>
23 #include <linux/kernel.h>
24 #include <linux/netdevice.h>
25 #include <linux/slab.h>
26 #include <linux/export.h>
27
28 #include <net/netfilter/nf_conntrack.h>
29 #include <net/netfilter/nf_conntrack_core.h>
30 #include <net/netfilter/nf_conntrack_extend.h>
31
32 static DEFINE_MUTEX(nf_ct_ecache_mutex);
33
34 #define ECACHE_RETRY_WAIT (HZ/10)
35
36 enum retry_state {
37 STATE_CONGESTED,
38 STATE_RESTART,
39 STATE_DONE,
40 };
41
ecache_work_evict_list(struct ct_pcpu * pcpu)42 static enum retry_state ecache_work_evict_list(struct ct_pcpu *pcpu)
43 {
44 struct nf_conn *refs[16];
45 struct nf_conntrack_tuple_hash *h;
46 struct hlist_nulls_node *n;
47 unsigned int evicted = 0;
48 enum retry_state ret = STATE_DONE;
49
50 spin_lock(&pcpu->lock);
51
52 hlist_nulls_for_each_entry(h, n, &pcpu->dying, hnnode) {
53 struct nf_conn *ct = nf_ct_tuplehash_to_ctrack(h);
54 struct nf_conntrack_ecache *e;
55
56 if (!nf_ct_is_confirmed(ct))
57 continue;
58
59 e = nf_ct_ecache_find(ct);
60 if (!e || e->state != NFCT_ECACHE_DESTROY_FAIL)
61 continue;
62
63 if (nf_conntrack_event(IPCT_DESTROY, ct)) {
64 ret = STATE_CONGESTED;
65 break;
66 }
67
68 e->state = NFCT_ECACHE_DESTROY_SENT;
69 refs[evicted] = ct;
70
71 if (++evicted >= ARRAY_SIZE(refs)) {
72 ret = STATE_RESTART;
73 break;
74 }
75 }
76
77 spin_unlock(&pcpu->lock);
78
79 /* can't _put while holding lock */
80 while (evicted)
81 nf_ct_put(refs[--evicted]);
82
83 return ret;
84 }
85
ecache_work(struct work_struct * work)86 static void ecache_work(struct work_struct *work)
87 {
88 struct netns_ct *ctnet =
89 container_of(work, struct netns_ct, ecache_dwork.work);
90 int cpu, delay = -1;
91 struct ct_pcpu *pcpu;
92
93 local_bh_disable();
94
95 for_each_possible_cpu(cpu) {
96 enum retry_state ret;
97
98 pcpu = per_cpu_ptr(ctnet->pcpu_lists, cpu);
99
100 ret = ecache_work_evict_list(pcpu);
101
102 switch (ret) {
103 case STATE_CONGESTED:
104 delay = ECACHE_RETRY_WAIT;
105 goto out;
106 case STATE_RESTART:
107 delay = 0;
108 break;
109 case STATE_DONE:
110 break;
111 }
112 }
113
114 out:
115 local_bh_enable();
116
117 ctnet->ecache_dwork_pending = delay > 0;
118 if (delay >= 0)
119 schedule_delayed_work(&ctnet->ecache_dwork, delay);
120 }
121
nf_conntrack_eventmask_report(unsigned int eventmask,struct nf_conn * ct,u32 portid,int report)122 int nf_conntrack_eventmask_report(unsigned int eventmask, struct nf_conn *ct,
123 u32 portid, int report)
124 {
125 int ret = 0;
126 struct net *net = nf_ct_net(ct);
127 struct nf_ct_event_notifier *notify;
128 struct nf_conntrack_ecache *e;
129
130 rcu_read_lock();
131 notify = rcu_dereference(net->ct.nf_conntrack_event_cb);
132 if (!notify)
133 goto out_unlock;
134
135 e = nf_ct_ecache_find(ct);
136 if (!e)
137 goto out_unlock;
138
139 if (nf_ct_is_confirmed(ct)) {
140 struct nf_ct_event item = {
141 .ct = ct,
142 .portid = e->portid ? e->portid : portid,
143 .report = report
144 };
145 /* This is a resent of a destroy event? If so, skip missed */
146 unsigned long missed = e->portid ? 0 : e->missed;
147
148 if (!((eventmask | missed) & e->ctmask))
149 goto out_unlock;
150
151 ret = notify->fcn(eventmask | missed, &item);
152 if (unlikely(ret < 0 || missed)) {
153 spin_lock_bh(&ct->lock);
154 if (ret < 0) {
155 /* This is a destroy event that has been
156 * triggered by a process, we store the PORTID
157 * to include it in the retransmission.
158 */
159 if (eventmask & (1 << IPCT_DESTROY)) {
160 if (e->portid == 0 && portid != 0)
161 e->portid = portid;
162 e->state = NFCT_ECACHE_DESTROY_FAIL;
163 } else {
164 e->missed |= eventmask;
165 }
166 } else {
167 e->missed &= ~missed;
168 }
169 spin_unlock_bh(&ct->lock);
170 }
171 }
172 out_unlock:
173 rcu_read_unlock();
174 return ret;
175 }
176 EXPORT_SYMBOL_GPL(nf_conntrack_eventmask_report);
177
178 /* deliver cached events and clear cache entry - must be called with locally
179 * disabled softirqs */
nf_ct_deliver_cached_events(struct nf_conn * ct)180 void nf_ct_deliver_cached_events(struct nf_conn *ct)
181 {
182 struct net *net = nf_ct_net(ct);
183 unsigned long events, missed;
184 struct nf_ct_event_notifier *notify;
185 struct nf_conntrack_ecache *e;
186 struct nf_ct_event item;
187 int ret;
188
189 rcu_read_lock();
190 notify = rcu_dereference(net->ct.nf_conntrack_event_cb);
191 if (notify == NULL)
192 goto out_unlock;
193
194 e = nf_ct_ecache_find(ct);
195 if (e == NULL)
196 goto out_unlock;
197
198 events = xchg(&e->cache, 0);
199
200 if (!nf_ct_is_confirmed(ct) || nf_ct_is_dying(ct))
201 goto out_unlock;
202
203 /* We make a copy of the missed event cache without taking
204 * the lock, thus we may send missed events twice. However,
205 * this does not harm and it happens very rarely. */
206 missed = e->missed;
207
208 if (!((events | missed) & e->ctmask))
209 goto out_unlock;
210
211 item.ct = ct;
212 item.portid = 0;
213 item.report = 0;
214
215 ret = notify->fcn(events | missed, &item);
216
217 if (likely(ret == 0 && !missed))
218 goto out_unlock;
219
220 spin_lock_bh(&ct->lock);
221 if (ret < 0)
222 e->missed |= events;
223 else
224 e->missed &= ~missed;
225 spin_unlock_bh(&ct->lock);
226
227 out_unlock:
228 rcu_read_unlock();
229 }
230 EXPORT_SYMBOL_GPL(nf_ct_deliver_cached_events);
231
nf_ct_expect_event_report(enum ip_conntrack_expect_events event,struct nf_conntrack_expect * exp,u32 portid,int report)232 void nf_ct_expect_event_report(enum ip_conntrack_expect_events event,
233 struct nf_conntrack_expect *exp,
234 u32 portid, int report)
235
236 {
237 struct net *net = nf_ct_exp_net(exp);
238 struct nf_exp_event_notifier *notify;
239 struct nf_conntrack_ecache *e;
240
241 rcu_read_lock();
242 notify = rcu_dereference(net->ct.nf_expect_event_cb);
243 if (!notify)
244 goto out_unlock;
245
246 e = nf_ct_ecache_find(exp->master);
247 if (!e)
248 goto out_unlock;
249
250 if (e->expmask & (1 << event)) {
251 struct nf_exp_event item = {
252 .exp = exp,
253 .portid = portid,
254 .report = report
255 };
256 notify->fcn(1 << event, &item);
257 }
258 out_unlock:
259 rcu_read_unlock();
260 }
261
nf_conntrack_register_notifier(struct net * net,struct nf_ct_event_notifier * new)262 int nf_conntrack_register_notifier(struct net *net,
263 struct nf_ct_event_notifier *new)
264 {
265 int ret;
266 struct nf_ct_event_notifier *notify;
267
268 mutex_lock(&nf_ct_ecache_mutex);
269 notify = rcu_dereference_protected(net->ct.nf_conntrack_event_cb,
270 lockdep_is_held(&nf_ct_ecache_mutex));
271 if (notify != NULL) {
272 ret = -EBUSY;
273 goto out_unlock;
274 }
275 rcu_assign_pointer(net->ct.nf_conntrack_event_cb, new);
276 ret = 0;
277
278 out_unlock:
279 mutex_unlock(&nf_ct_ecache_mutex);
280 return ret;
281 }
282 EXPORT_SYMBOL_GPL(nf_conntrack_register_notifier);
283
nf_conntrack_unregister_notifier(struct net * net,struct nf_ct_event_notifier * new)284 void nf_conntrack_unregister_notifier(struct net *net,
285 struct nf_ct_event_notifier *new)
286 {
287 struct nf_ct_event_notifier *notify;
288
289 mutex_lock(&nf_ct_ecache_mutex);
290 notify = rcu_dereference_protected(net->ct.nf_conntrack_event_cb,
291 lockdep_is_held(&nf_ct_ecache_mutex));
292 BUG_ON(notify != new);
293 RCU_INIT_POINTER(net->ct.nf_conntrack_event_cb, NULL);
294 mutex_unlock(&nf_ct_ecache_mutex);
295 /* synchronize_rcu() is called from ctnetlink_exit. */
296 }
297 EXPORT_SYMBOL_GPL(nf_conntrack_unregister_notifier);
298
nf_ct_expect_register_notifier(struct net * net,struct nf_exp_event_notifier * new)299 int nf_ct_expect_register_notifier(struct net *net,
300 struct nf_exp_event_notifier *new)
301 {
302 int ret;
303 struct nf_exp_event_notifier *notify;
304
305 mutex_lock(&nf_ct_ecache_mutex);
306 notify = rcu_dereference_protected(net->ct.nf_expect_event_cb,
307 lockdep_is_held(&nf_ct_ecache_mutex));
308 if (notify != NULL) {
309 ret = -EBUSY;
310 goto out_unlock;
311 }
312 rcu_assign_pointer(net->ct.nf_expect_event_cb, new);
313 ret = 0;
314
315 out_unlock:
316 mutex_unlock(&nf_ct_ecache_mutex);
317 return ret;
318 }
319 EXPORT_SYMBOL_GPL(nf_ct_expect_register_notifier);
320
nf_ct_expect_unregister_notifier(struct net * net,struct nf_exp_event_notifier * new)321 void nf_ct_expect_unregister_notifier(struct net *net,
322 struct nf_exp_event_notifier *new)
323 {
324 struct nf_exp_event_notifier *notify;
325
326 mutex_lock(&nf_ct_ecache_mutex);
327 notify = rcu_dereference_protected(net->ct.nf_expect_event_cb,
328 lockdep_is_held(&nf_ct_ecache_mutex));
329 BUG_ON(notify != new);
330 RCU_INIT_POINTER(net->ct.nf_expect_event_cb, NULL);
331 mutex_unlock(&nf_ct_ecache_mutex);
332 /* synchronize_rcu() is called from ctnetlink_exit. */
333 }
334 EXPORT_SYMBOL_GPL(nf_ct_expect_unregister_notifier);
335
336 #define NF_CT_EVENTS_DEFAULT 1
337 static int nf_ct_events __read_mostly = NF_CT_EVENTS_DEFAULT;
338
339 #ifdef CONFIG_SYSCTL
340 static struct ctl_table event_sysctl_table[] = {
341 {
342 .procname = "nf_conntrack_events",
343 .data = &init_net.ct.sysctl_events,
344 .maxlen = sizeof(unsigned int),
345 .mode = 0644,
346 .proc_handler = proc_dointvec,
347 },
348 {}
349 };
350 #endif /* CONFIG_SYSCTL */
351
352 static const struct nf_ct_ext_type event_extend = {
353 .len = sizeof(struct nf_conntrack_ecache),
354 .align = __alignof__(struct nf_conntrack_ecache),
355 .id = NF_CT_EXT_ECACHE,
356 };
357
358 #ifdef CONFIG_SYSCTL
nf_conntrack_event_init_sysctl(struct net * net)359 static int nf_conntrack_event_init_sysctl(struct net *net)
360 {
361 struct ctl_table *table;
362
363 table = kmemdup(event_sysctl_table, sizeof(event_sysctl_table),
364 GFP_KERNEL);
365 if (!table)
366 goto out;
367
368 table[0].data = &net->ct.sysctl_events;
369
370 /* Don't export sysctls to unprivileged users */
371 if (net->user_ns != &init_user_ns)
372 table[0].procname = NULL;
373
374 net->ct.event_sysctl_header =
375 register_net_sysctl(net, "net/netfilter", table);
376 if (!net->ct.event_sysctl_header) {
377 pr_err("can't register to sysctl\n");
378 goto out_register;
379 }
380 return 0;
381
382 out_register:
383 kfree(table);
384 out:
385 return -ENOMEM;
386 }
387
nf_conntrack_event_fini_sysctl(struct net * net)388 static void nf_conntrack_event_fini_sysctl(struct net *net)
389 {
390 struct ctl_table *table;
391
392 table = net->ct.event_sysctl_header->ctl_table_arg;
393 unregister_net_sysctl_table(net->ct.event_sysctl_header);
394 kfree(table);
395 }
396 #else
nf_conntrack_event_init_sysctl(struct net * net)397 static int nf_conntrack_event_init_sysctl(struct net *net)
398 {
399 return 0;
400 }
401
nf_conntrack_event_fini_sysctl(struct net * net)402 static void nf_conntrack_event_fini_sysctl(struct net *net)
403 {
404 }
405 #endif /* CONFIG_SYSCTL */
406
nf_conntrack_ecache_pernet_init(struct net * net)407 int nf_conntrack_ecache_pernet_init(struct net *net)
408 {
409 net->ct.sysctl_events = nf_ct_events;
410 INIT_DELAYED_WORK(&net->ct.ecache_dwork, ecache_work);
411 return nf_conntrack_event_init_sysctl(net);
412 }
413
nf_conntrack_ecache_pernet_fini(struct net * net)414 void nf_conntrack_ecache_pernet_fini(struct net *net)
415 {
416 cancel_delayed_work_sync(&net->ct.ecache_dwork);
417 nf_conntrack_event_fini_sysctl(net);
418 }
419
nf_conntrack_ecache_init(void)420 int nf_conntrack_ecache_init(void)
421 {
422 int ret = nf_ct_extend_register(&event_extend);
423 if (ret < 0)
424 pr_err("Unable to register event extension\n");
425
426 BUILD_BUG_ON(__IPCT_MAX >= 16); /* ctmask, missed use u16 */
427
428 return ret;
429 }
430
nf_conntrack_ecache_fini(void)431 void nf_conntrack_ecache_fini(void)
432 {
433 nf_ct_extend_unregister(&event_extend);
434 }
435