1 /*
2 * ip_vs_app.c: Application module support for IPVS
3 *
4 * Authors: Wensong Zhang <wensong@linuxvirtualserver.org>
5 *
6 * This program is free software; you can redistribute it and/or
7 * modify it under the terms of the GNU General Public License
8 * as published by the Free Software Foundation; either version
9 * 2 of the License, or (at your option) any later version.
10 *
11 * Most code here is taken from ip_masq_app.c in kernel 2.2. The difference
12 * is that ip_vs_app module handles the reverse direction (incoming requests
13 * and outgoing responses).
14 *
15 * IP_MASQ_APP application masquerading module
16 *
17 * Author: Juan Jose Ciarlante, <jjciarla@raiz.uncu.edu.ar>
18 *
19 */
20
21 #define KMSG_COMPONENT "IPVS"
22 #define pr_fmt(fmt) KMSG_COMPONENT ": " fmt
23
24 #include <linux/module.h>
25 #include <linux/kernel.h>
26 #include <linux/skbuff.h>
27 #include <linux/in.h>
28 #include <linux/ip.h>
29 #include <linux/netfilter.h>
30 #include <linux/slab.h>
31 #include <net/net_namespace.h>
32 #include <net/protocol.h>
33 #include <net/tcp.h>
34 #include <linux/stat.h>
35 #include <linux/proc_fs.h>
36 #include <linux/seq_file.h>
37 #include <linux/mutex.h>
38
39 #include <net/ip_vs.h>
40
41 EXPORT_SYMBOL(register_ip_vs_app);
42 EXPORT_SYMBOL(unregister_ip_vs_app);
43 EXPORT_SYMBOL(register_ip_vs_app_inc);
44
45 static DEFINE_MUTEX(__ip_vs_app_mutex);
46
47 /*
48 * Get an ip_vs_app object
49 */
ip_vs_app_get(struct ip_vs_app * app)50 static inline int ip_vs_app_get(struct ip_vs_app *app)
51 {
52 return try_module_get(app->module);
53 }
54
55
ip_vs_app_put(struct ip_vs_app * app)56 static inline void ip_vs_app_put(struct ip_vs_app *app)
57 {
58 module_put(app->module);
59 }
60
ip_vs_app_inc_destroy(struct ip_vs_app * inc)61 static void ip_vs_app_inc_destroy(struct ip_vs_app *inc)
62 {
63 kfree(inc->timeout_table);
64 kfree(inc);
65 }
66
ip_vs_app_inc_rcu_free(struct rcu_head * head)67 static void ip_vs_app_inc_rcu_free(struct rcu_head *head)
68 {
69 struct ip_vs_app *inc = container_of(head, struct ip_vs_app, rcu_head);
70
71 ip_vs_app_inc_destroy(inc);
72 }
73
74 /*
75 * Allocate/initialize app incarnation and register it in proto apps.
76 */
77 static int
ip_vs_app_inc_new(struct netns_ipvs * ipvs,struct ip_vs_app * app,__u16 proto,__u16 port)78 ip_vs_app_inc_new(struct netns_ipvs *ipvs, struct ip_vs_app *app, __u16 proto,
79 __u16 port)
80 {
81 struct ip_vs_protocol *pp;
82 struct ip_vs_app *inc;
83 int ret;
84
85 if (!(pp = ip_vs_proto_get(proto)))
86 return -EPROTONOSUPPORT;
87
88 if (!pp->unregister_app)
89 return -EOPNOTSUPP;
90
91 inc = kmemdup(app, sizeof(*inc), GFP_KERNEL);
92 if (!inc)
93 return -ENOMEM;
94 INIT_LIST_HEAD(&inc->p_list);
95 INIT_LIST_HEAD(&inc->incs_list);
96 inc->app = app;
97 inc->port = htons(port);
98 atomic_set(&inc->usecnt, 0);
99
100 if (app->timeouts) {
101 inc->timeout_table =
102 ip_vs_create_timeout_table(app->timeouts,
103 app->timeouts_size);
104 if (!inc->timeout_table) {
105 ret = -ENOMEM;
106 goto out;
107 }
108 }
109
110 ret = pp->register_app(ipvs, inc);
111 if (ret)
112 goto out;
113
114 list_add(&inc->a_list, &app->incs_list);
115 IP_VS_DBG(9, "%s App %s:%u registered\n",
116 pp->name, inc->name, ntohs(inc->port));
117
118 return 0;
119
120 out:
121 ip_vs_app_inc_destroy(inc);
122 return ret;
123 }
124
125
126 /*
127 * Release app incarnation
128 */
129 static void
ip_vs_app_inc_release(struct netns_ipvs * ipvs,struct ip_vs_app * inc)130 ip_vs_app_inc_release(struct netns_ipvs *ipvs, struct ip_vs_app *inc)
131 {
132 struct ip_vs_protocol *pp;
133
134 if (!(pp = ip_vs_proto_get(inc->protocol)))
135 return;
136
137 if (pp->unregister_app)
138 pp->unregister_app(ipvs, inc);
139
140 IP_VS_DBG(9, "%s App %s:%u unregistered\n",
141 pp->name, inc->name, ntohs(inc->port));
142
143 list_del(&inc->a_list);
144
145 call_rcu(&inc->rcu_head, ip_vs_app_inc_rcu_free);
146 }
147
148
149 /*
150 * Get reference to app inc (only called from softirq)
151 *
152 */
ip_vs_app_inc_get(struct ip_vs_app * inc)153 int ip_vs_app_inc_get(struct ip_vs_app *inc)
154 {
155 int result;
156
157 result = ip_vs_app_get(inc->app);
158 if (result)
159 atomic_inc(&inc->usecnt);
160 return result;
161 }
162
163
164 /*
165 * Put the app inc (only called from timer or net softirq)
166 */
ip_vs_app_inc_put(struct ip_vs_app * inc)167 void ip_vs_app_inc_put(struct ip_vs_app *inc)
168 {
169 atomic_dec(&inc->usecnt);
170 ip_vs_app_put(inc->app);
171 }
172
173
174 /*
175 * Register an application incarnation in protocol applications
176 */
177 int
register_ip_vs_app_inc(struct netns_ipvs * ipvs,struct ip_vs_app * app,__u16 proto,__u16 port)178 register_ip_vs_app_inc(struct netns_ipvs *ipvs, struct ip_vs_app *app, __u16 proto,
179 __u16 port)
180 {
181 int result;
182
183 mutex_lock(&__ip_vs_app_mutex);
184
185 result = ip_vs_app_inc_new(ipvs, app, proto, port);
186
187 mutex_unlock(&__ip_vs_app_mutex);
188
189 return result;
190 }
191
192
193 /* Register application for netns */
register_ip_vs_app(struct netns_ipvs * ipvs,struct ip_vs_app * app)194 struct ip_vs_app *register_ip_vs_app(struct netns_ipvs *ipvs, struct ip_vs_app *app)
195 {
196 struct ip_vs_app *a;
197 int err = 0;
198
199 mutex_lock(&__ip_vs_app_mutex);
200
201 list_for_each_entry(a, &ipvs->app_list, a_list) {
202 if (!strcmp(app->name, a->name)) {
203 err = -EEXIST;
204 goto out_unlock;
205 }
206 }
207 a = kmemdup(app, sizeof(*app), GFP_KERNEL);
208 if (!a) {
209 err = -ENOMEM;
210 goto out_unlock;
211 }
212 INIT_LIST_HEAD(&a->incs_list);
213 list_add(&a->a_list, &ipvs->app_list);
214 /* increase the module use count */
215 ip_vs_use_count_inc();
216
217 out_unlock:
218 mutex_unlock(&__ip_vs_app_mutex);
219
220 return err ? ERR_PTR(err) : a;
221 }
222
223
224 /*
225 * ip_vs_app unregistration routine
226 * We are sure there are no app incarnations attached to services
227 * Caller should use synchronize_rcu() or rcu_barrier()
228 */
unregister_ip_vs_app(struct netns_ipvs * ipvs,struct ip_vs_app * app)229 void unregister_ip_vs_app(struct netns_ipvs *ipvs, struct ip_vs_app *app)
230 {
231 struct ip_vs_app *a, *anxt, *inc, *nxt;
232
233 mutex_lock(&__ip_vs_app_mutex);
234
235 list_for_each_entry_safe(a, anxt, &ipvs->app_list, a_list) {
236 if (app && strcmp(app->name, a->name))
237 continue;
238 list_for_each_entry_safe(inc, nxt, &a->incs_list, a_list) {
239 ip_vs_app_inc_release(ipvs, inc);
240 }
241
242 list_del(&a->a_list);
243 kfree(a);
244
245 /* decrease the module use count */
246 ip_vs_use_count_dec();
247 }
248
249 mutex_unlock(&__ip_vs_app_mutex);
250 }
251
252
253 /*
254 * Bind ip_vs_conn to its ip_vs_app (called by cp constructor)
255 */
ip_vs_bind_app(struct ip_vs_conn * cp,struct ip_vs_protocol * pp)256 int ip_vs_bind_app(struct ip_vs_conn *cp,
257 struct ip_vs_protocol *pp)
258 {
259 return pp->app_conn_bind(cp);
260 }
261
262
263 /*
264 * Unbind cp from application incarnation (called by cp destructor)
265 */
ip_vs_unbind_app(struct ip_vs_conn * cp)266 void ip_vs_unbind_app(struct ip_vs_conn *cp)
267 {
268 struct ip_vs_app *inc = cp->app;
269
270 if (!inc)
271 return;
272
273 if (inc->unbind_conn)
274 inc->unbind_conn(inc, cp);
275 if (inc->done_conn)
276 inc->done_conn(inc, cp);
277 ip_vs_app_inc_put(inc);
278 cp->app = NULL;
279 }
280
281
282 /*
283 * Fixes th->seq based on ip_vs_seq info.
284 */
vs_fix_seq(const struct ip_vs_seq * vseq,struct tcphdr * th)285 static inline void vs_fix_seq(const struct ip_vs_seq *vseq, struct tcphdr *th)
286 {
287 __u32 seq = ntohl(th->seq);
288
289 /*
290 * Adjust seq with delta-offset for all packets after
291 * the most recent resized pkt seq and with previous_delta offset
292 * for all packets before most recent resized pkt seq.
293 */
294 if (vseq->delta || vseq->previous_delta) {
295 if(after(seq, vseq->init_seq)) {
296 th->seq = htonl(seq + vseq->delta);
297 IP_VS_DBG(9, "%s(): added delta (%d) to seq\n",
298 __func__, vseq->delta);
299 } else {
300 th->seq = htonl(seq + vseq->previous_delta);
301 IP_VS_DBG(9, "%s(): added previous_delta (%d) to seq\n",
302 __func__, vseq->previous_delta);
303 }
304 }
305 }
306
307
308 /*
309 * Fixes th->ack_seq based on ip_vs_seq info.
310 */
311 static inline void
vs_fix_ack_seq(const struct ip_vs_seq * vseq,struct tcphdr * th)312 vs_fix_ack_seq(const struct ip_vs_seq *vseq, struct tcphdr *th)
313 {
314 __u32 ack_seq = ntohl(th->ack_seq);
315
316 /*
317 * Adjust ack_seq with delta-offset for
318 * the packets AFTER most recent resized pkt has caused a shift
319 * for packets before most recent resized pkt, use previous_delta
320 */
321 if (vseq->delta || vseq->previous_delta) {
322 /* since ack_seq is the number of octet that is expected
323 to receive next, so compare it with init_seq+delta */
324 if(after(ack_seq, vseq->init_seq+vseq->delta)) {
325 th->ack_seq = htonl(ack_seq - vseq->delta);
326 IP_VS_DBG(9, "%s(): subtracted delta "
327 "(%d) from ack_seq\n", __func__, vseq->delta);
328
329 } else {
330 th->ack_seq = htonl(ack_seq - vseq->previous_delta);
331 IP_VS_DBG(9, "%s(): subtracted "
332 "previous_delta (%d) from ack_seq\n",
333 __func__, vseq->previous_delta);
334 }
335 }
336 }
337
338
339 /*
340 * Updates ip_vs_seq if pkt has been resized
341 * Assumes already checked proto==IPPROTO_TCP and diff!=0.
342 */
vs_seq_update(struct ip_vs_conn * cp,struct ip_vs_seq * vseq,unsigned int flag,__u32 seq,int diff)343 static inline void vs_seq_update(struct ip_vs_conn *cp, struct ip_vs_seq *vseq,
344 unsigned int flag, __u32 seq, int diff)
345 {
346 /* spinlock is to keep updating cp->flags atomic */
347 spin_lock_bh(&cp->lock);
348 if (!(cp->flags & flag) || after(seq, vseq->init_seq)) {
349 vseq->previous_delta = vseq->delta;
350 vseq->delta += diff;
351 vseq->init_seq = seq;
352 cp->flags |= flag;
353 }
354 spin_unlock_bh(&cp->lock);
355 }
356
app_tcp_pkt_out(struct ip_vs_conn * cp,struct sk_buff * skb,struct ip_vs_app * app,struct ip_vs_iphdr * ipvsh)357 static inline int app_tcp_pkt_out(struct ip_vs_conn *cp, struct sk_buff *skb,
358 struct ip_vs_app *app,
359 struct ip_vs_iphdr *ipvsh)
360 {
361 int diff;
362 const unsigned int tcp_offset = ip_hdrlen(skb);
363 struct tcphdr *th;
364 __u32 seq;
365
366 if (!skb_make_writable(skb, tcp_offset + sizeof(*th)))
367 return 0;
368
369 th = (struct tcphdr *)(skb_network_header(skb) + tcp_offset);
370
371 /*
372 * Remember seq number in case this pkt gets resized
373 */
374 seq = ntohl(th->seq);
375
376 /*
377 * Fix seq stuff if flagged as so.
378 */
379 if (cp->flags & IP_VS_CONN_F_OUT_SEQ)
380 vs_fix_seq(&cp->out_seq, th);
381 if (cp->flags & IP_VS_CONN_F_IN_SEQ)
382 vs_fix_ack_seq(&cp->in_seq, th);
383
384 /*
385 * Call private output hook function
386 */
387 if (app->pkt_out == NULL)
388 return 1;
389
390 if (!app->pkt_out(app, cp, skb, &diff, ipvsh))
391 return 0;
392
393 /*
394 * Update ip_vs seq stuff if len has changed.
395 */
396 if (diff != 0)
397 vs_seq_update(cp, &cp->out_seq,
398 IP_VS_CONN_F_OUT_SEQ, seq, diff);
399
400 return 1;
401 }
402
403 /*
404 * Output pkt hook. Will call bound ip_vs_app specific function
405 * called by ipvs packet handler, assumes previously checked cp!=NULL
406 * returns false if it can't handle packet (oom)
407 */
ip_vs_app_pkt_out(struct ip_vs_conn * cp,struct sk_buff * skb,struct ip_vs_iphdr * ipvsh)408 int ip_vs_app_pkt_out(struct ip_vs_conn *cp, struct sk_buff *skb,
409 struct ip_vs_iphdr *ipvsh)
410 {
411 struct ip_vs_app *app;
412
413 /*
414 * check if application module is bound to
415 * this ip_vs_conn.
416 */
417 if ((app = cp->app) == NULL)
418 return 1;
419
420 /* TCP is complicated */
421 if (cp->protocol == IPPROTO_TCP)
422 return app_tcp_pkt_out(cp, skb, app, ipvsh);
423
424 /*
425 * Call private output hook function
426 */
427 if (app->pkt_out == NULL)
428 return 1;
429
430 return app->pkt_out(app, cp, skb, NULL, ipvsh);
431 }
432
433
app_tcp_pkt_in(struct ip_vs_conn * cp,struct sk_buff * skb,struct ip_vs_app * app,struct ip_vs_iphdr * ipvsh)434 static inline int app_tcp_pkt_in(struct ip_vs_conn *cp, struct sk_buff *skb,
435 struct ip_vs_app *app,
436 struct ip_vs_iphdr *ipvsh)
437 {
438 int diff;
439 const unsigned int tcp_offset = ip_hdrlen(skb);
440 struct tcphdr *th;
441 __u32 seq;
442
443 if (!skb_make_writable(skb, tcp_offset + sizeof(*th)))
444 return 0;
445
446 th = (struct tcphdr *)(skb_network_header(skb) + tcp_offset);
447
448 /*
449 * Remember seq number in case this pkt gets resized
450 */
451 seq = ntohl(th->seq);
452
453 /*
454 * Fix seq stuff if flagged as so.
455 */
456 if (cp->flags & IP_VS_CONN_F_IN_SEQ)
457 vs_fix_seq(&cp->in_seq, th);
458 if (cp->flags & IP_VS_CONN_F_OUT_SEQ)
459 vs_fix_ack_seq(&cp->out_seq, th);
460
461 /*
462 * Call private input hook function
463 */
464 if (app->pkt_in == NULL)
465 return 1;
466
467 if (!app->pkt_in(app, cp, skb, &diff, ipvsh))
468 return 0;
469
470 /*
471 * Update ip_vs seq stuff if len has changed.
472 */
473 if (diff != 0)
474 vs_seq_update(cp, &cp->in_seq,
475 IP_VS_CONN_F_IN_SEQ, seq, diff);
476
477 return 1;
478 }
479
480 /*
481 * Input pkt hook. Will call bound ip_vs_app specific function
482 * called by ipvs packet handler, assumes previously checked cp!=NULL.
483 * returns false if can't handle packet (oom).
484 */
ip_vs_app_pkt_in(struct ip_vs_conn * cp,struct sk_buff * skb,struct ip_vs_iphdr * ipvsh)485 int ip_vs_app_pkt_in(struct ip_vs_conn *cp, struct sk_buff *skb,
486 struct ip_vs_iphdr *ipvsh)
487 {
488 struct ip_vs_app *app;
489
490 /*
491 * check if application module is bound to
492 * this ip_vs_conn.
493 */
494 if ((app = cp->app) == NULL)
495 return 1;
496
497 /* TCP is complicated */
498 if (cp->protocol == IPPROTO_TCP)
499 return app_tcp_pkt_in(cp, skb, app, ipvsh);
500
501 /*
502 * Call private input hook function
503 */
504 if (app->pkt_in == NULL)
505 return 1;
506
507 return app->pkt_in(app, cp, skb, NULL, ipvsh);
508 }
509
510
511 #ifdef CONFIG_PROC_FS
512 /*
513 * /proc/net/ip_vs_app entry function
514 */
515
ip_vs_app_idx(struct netns_ipvs * ipvs,loff_t pos)516 static struct ip_vs_app *ip_vs_app_idx(struct netns_ipvs *ipvs, loff_t pos)
517 {
518 struct ip_vs_app *app, *inc;
519
520 list_for_each_entry(app, &ipvs->app_list, a_list) {
521 list_for_each_entry(inc, &app->incs_list, a_list) {
522 if (pos-- == 0)
523 return inc;
524 }
525 }
526 return NULL;
527
528 }
529
ip_vs_app_seq_start(struct seq_file * seq,loff_t * pos)530 static void *ip_vs_app_seq_start(struct seq_file *seq, loff_t *pos)
531 {
532 struct net *net = seq_file_net(seq);
533 struct netns_ipvs *ipvs = net_ipvs(net);
534
535 mutex_lock(&__ip_vs_app_mutex);
536
537 return *pos ? ip_vs_app_idx(ipvs, *pos - 1) : SEQ_START_TOKEN;
538 }
539
ip_vs_app_seq_next(struct seq_file * seq,void * v,loff_t * pos)540 static void *ip_vs_app_seq_next(struct seq_file *seq, void *v, loff_t *pos)
541 {
542 struct ip_vs_app *inc, *app;
543 struct list_head *e;
544 struct net *net = seq_file_net(seq);
545 struct netns_ipvs *ipvs = net_ipvs(net);
546
547 ++*pos;
548 if (v == SEQ_START_TOKEN)
549 return ip_vs_app_idx(ipvs, 0);
550
551 inc = v;
552 app = inc->app;
553
554 if ((e = inc->a_list.next) != &app->incs_list)
555 return list_entry(e, struct ip_vs_app, a_list);
556
557 /* go on to next application */
558 for (e = app->a_list.next; e != &ipvs->app_list; e = e->next) {
559 app = list_entry(e, struct ip_vs_app, a_list);
560 list_for_each_entry(inc, &app->incs_list, a_list) {
561 return inc;
562 }
563 }
564 return NULL;
565 }
566
ip_vs_app_seq_stop(struct seq_file * seq,void * v)567 static void ip_vs_app_seq_stop(struct seq_file *seq, void *v)
568 {
569 mutex_unlock(&__ip_vs_app_mutex);
570 }
571
ip_vs_app_seq_show(struct seq_file * seq,void * v)572 static int ip_vs_app_seq_show(struct seq_file *seq, void *v)
573 {
574 if (v == SEQ_START_TOKEN)
575 seq_puts(seq, "prot port usecnt name\n");
576 else {
577 const struct ip_vs_app *inc = v;
578
579 seq_printf(seq, "%-3s %-7u %-6d %-17s\n",
580 ip_vs_proto_name(inc->protocol),
581 ntohs(inc->port),
582 atomic_read(&inc->usecnt),
583 inc->name);
584 }
585 return 0;
586 }
587
588 static const struct seq_operations ip_vs_app_seq_ops = {
589 .start = ip_vs_app_seq_start,
590 .next = ip_vs_app_seq_next,
591 .stop = ip_vs_app_seq_stop,
592 .show = ip_vs_app_seq_show,
593 };
594 #endif
595
ip_vs_app_net_init(struct netns_ipvs * ipvs)596 int __net_init ip_vs_app_net_init(struct netns_ipvs *ipvs)
597 {
598 INIT_LIST_HEAD(&ipvs->app_list);
599 proc_create_net("ip_vs_app", 0, ipvs->net->proc_net, &ip_vs_app_seq_ops,
600 sizeof(struct seq_net_private));
601 return 0;
602 }
603
ip_vs_app_net_cleanup(struct netns_ipvs * ipvs)604 void __net_exit ip_vs_app_net_cleanup(struct netns_ipvs *ipvs)
605 {
606 unregister_ip_vs_app(ipvs, NULL /* all */);
607 remove_proc_entry("ip_vs_app", ipvs->net->proc_net);
608 }
609