2 * net/sched/sch_api.c Packet scheduler API.
4 * This program is free software; you can redistribute it and/or
5 * modify it under the terms of the GNU General Public License
6 * as published by the Free Software Foundation; either version
7 * 2 of the License, or (at your option) any later version.
9 * Authors: Alexey Kuznetsov, <kuznet@ms2.inr.ac.ru>
13 * Rani Assaf <rani@magic.metawire.com> :980802: JIFFIES and CPU clock sources are repaired.
14 * Eduardo J. Blanco <ejbs@netlabs.com.uy> :990222: kmod support
15 * Jamal Hadi Salim <hadi@nortelnetworks.com>: 990601: ingress support
18 #include <linux/config.h>
19 #include <linux/types.h>
20 #include <linux/kernel.h>
21 #include <linux/sched.h>
22 #include <linux/string.h>
24 #include <linux/socket.h>
25 #include <linux/sockios.h>
27 #include <linux/errno.h>
28 #include <linux/interrupt.h>
29 #include <linux/netdevice.h>
30 #include <linux/skbuff.h>
31 #include <linux/rtnetlink.h>
32 #include <linux/init.h>
33 #include <linux/proc_fs.h>
34 #include <linux/kmod.h>
37 #include <net/pkt_sched.h>
39 #include <asm/processor.h>
40 #include <asm/uaccess.h>
41 #include <asm/system.h>
42 #include <asm/bitops.h>
44 static int qdisc_notify(struct sk_buff *oskb, struct nlmsghdr *n, u32 clid,
45 struct Qdisc *old, struct Qdisc *new);
46 static int tclass_notify(struct sk_buff *oskb, struct nlmsghdr *n,
47 struct Qdisc *q, unsigned long cl, int event);
54 This file consists of two interrelated parts:
56 1. queueing disciplines manager frontend.
57 2. traffic classes manager frontend.
59 Generally, queueing discipline ("qdisc") is a black box,
60 which is able to enqueue packets and to dequeue them (when
61 device is ready to send something) in order and at times
62 determined by algorithm hidden in it.
64 qdisc's are divided to two categories:
65 - "queues", which have no internal structure visible from outside.
66 - "schedulers", which split all the packets to "traffic classes",
67 using "packet classifiers" (look at cls_api.c)
69 In turn, classes may have child qdiscs (as rule, queues)
70 attached to them etc. etc. etc.
72 The goal of the routines in this file is to translate
73 information supplied by user in the form of handles
74 to more intelligible for kernel form, to make some sanity
75 checks and part of work, which is common to all qdiscs
76 and to provide rtnetlink notifications.
78 All real intelligent work is done inside qdisc modules.
82 Every discipline has two major routines: enqueue and dequeue.
86 dequeue usually returns a skb to send. It is allowed to return NULL,
87 but it does not mean that queue is empty, it just means that
88 discipline does not want to send anything this time.
89 Queue is really empty if q->q.qlen == 0.
90 For complicated disciplines with multiple queues q->q is not
91 real packet queue, but however q->q.qlen must be valid.
95 enqueue returns 0, if packet was enqueued successfully.
96 If packet (this one or another one) was dropped, it returns
98 NET_XMIT_DROP - this packet dropped
99 Expected action: do not backoff, but wait until queue will clear.
100 NET_XMIT_CN - probably this packet enqueued, but another one dropped.
101 Expected action: backoff or ignore
102 NET_XMIT_POLICED - dropped by police.
103 Expected action: backoff or error to real-time apps.
109 requeues once dequeued packet. It is used for non-standard or
110 just buggy devices, which can defer output even if dev->tbusy=0.
114 returns qdisc to initial state: purge all buffers, clear all
115 timers, counters (except for statistics) etc.
119 initializes newly created qdisc.
123 destroys resources allocated by init and during lifetime of qdisc.
127 changes qdisc parameters.
130 /* Protects list of registered TC modules. It is pure SMP lock. */
131 static rwlock_t qdisc_mod_lock = RW_LOCK_UNLOCKED;
134 /************************************************
135 * Queueing disciplines manipulation. *
136 ************************************************/
139 /* The list of all installed queueing disciplines. */
141 static struct Qdisc_ops *qdisc_base = NULL;
143 /* Register/uregister queueing discipline */
145 int register_qdisc(struct Qdisc_ops *qops)
147 struct Qdisc_ops *q, **qp;
149 write_lock(&qdisc_mod_lock);
150 for (qp = &qdisc_base; (q=*qp)!=NULL; qp = &q->next) {
151 if (strcmp(qops->id, q->id) == 0) {
152 write_unlock(&qdisc_mod_lock);
157 if (qops->enqueue == NULL)
158 qops->enqueue = noop_qdisc_ops.enqueue;
159 if (qops->requeue == NULL)
160 qops->requeue = noop_qdisc_ops.requeue;
161 if (qops->dequeue == NULL)
162 qops->dequeue = noop_qdisc_ops.dequeue;
166 write_unlock(&qdisc_mod_lock);
170 int unregister_qdisc(struct Qdisc_ops *qops)
172 struct Qdisc_ops *q, **qp;
175 write_lock(&qdisc_mod_lock);
176 for (qp = &qdisc_base; (q=*qp)!=NULL; qp = &q->next)
184 write_unlock(&qdisc_mod_lock);
188 /* We know handle. Find qdisc among all qdisc's attached to device
189 (root qdisc, all its children, children of children etc.)
192 struct Qdisc *qdisc_lookup(struct net_device *dev, u32 handle)
196 for (q = dev->qdisc_list; q; q = q->next) {
197 if (q->handle == handle)
203 struct Qdisc *qdisc_leaf(struct Qdisc *p, u32 classid)
207 struct Qdisc_class_ops *cops = p->ops->cl_ops;
211 cl = cops->get(p, classid);
215 leaf = cops->leaf(p, cl);
220 /* Find queueing discipline by name */
222 struct Qdisc_ops *qdisc_lookup_ops(struct rtattr *kind)
224 struct Qdisc_ops *q = NULL;
227 read_lock(&qdisc_mod_lock);
228 for (q = qdisc_base; q; q = q->next) {
229 if (rtattr_strcmp(kind, q->id) == 0)
232 read_unlock(&qdisc_mod_lock);
237 static struct qdisc_rate_table *qdisc_rtab_list;
239 struct qdisc_rate_table *qdisc_get_rtab(struct tc_ratespec *r, struct rtattr *tab)
241 struct qdisc_rate_table *rtab;
243 for (rtab = qdisc_rtab_list; rtab; rtab = rtab->next) {
244 if (memcmp(&rtab->rate, r, sizeof(struct tc_ratespec)) == 0) {
250 if (tab == NULL || r->rate == 0 || r->cell_log == 0 || RTA_PAYLOAD(tab) != 1024)
253 rtab = kmalloc(sizeof(*rtab), GFP_KERNEL);
257 memcpy(rtab->data, RTA_DATA(tab), 1024);
258 rtab->next = qdisc_rtab_list;
259 qdisc_rtab_list = rtab;
264 void qdisc_put_rtab(struct qdisc_rate_table *tab)
266 struct qdisc_rate_table *rtab, **rtabp;
268 if (!tab || --tab->refcnt)
271 for (rtabp = &qdisc_rtab_list; (rtab=*rtabp) != NULL; rtabp = &rtab->next) {
281 /* Allocate an unique handle from space managed by kernel */
283 u32 qdisc_alloc_handle(struct net_device *dev)
286 static u32 autohandle = TC_H_MAKE(0x80000000U, 0);
289 autohandle += TC_H_MAKE(0x10000U, 0);
290 if (autohandle == TC_H_MAKE(TC_H_ROOT, 0))
291 autohandle = TC_H_MAKE(0x80000000U, 0);
292 } while (qdisc_lookup(dev, autohandle) && --i > 0);
294 return i>0 ? autohandle : 0;
297 /* Attach toplevel qdisc to device dev */
299 static struct Qdisc *
300 dev_graft_qdisc(struct net_device *dev, struct Qdisc *qdisc)
302 struct Qdisc *oqdisc;
304 if (dev->flags & IFF_UP)
307 write_lock(&qdisc_tree_lock);
308 spin_lock_bh(&dev->queue_lock);
309 if (qdisc && qdisc->flags&TCQ_F_INGRES) {
310 oqdisc = dev->qdisc_ingress;
311 /* Prune old scheduler */
312 if (oqdisc && atomic_read(&oqdisc->refcnt) <= 1) {
315 dev->qdisc_ingress = NULL;
317 dev->qdisc_ingress = qdisc;
322 oqdisc = dev->qdisc_sleeping;
324 /* Prune old scheduler */
325 if (oqdisc && atomic_read(&oqdisc->refcnt) <= 1)
328 /* ... and graft new one */
331 dev->qdisc_sleeping = qdisc;
332 dev->qdisc = &noop_qdisc;
335 spin_unlock_bh(&dev->queue_lock);
336 write_unlock(&qdisc_tree_lock);
338 if (dev->flags & IFF_UP)
345 /* Graft qdisc "new" to class "classid" of qdisc "parent" or
348 Old qdisc is not destroyed but returned in *old.
351 int qdisc_graft(struct net_device *dev, struct Qdisc *parent, u32 classid,
352 struct Qdisc *new, struct Qdisc **old)
355 struct Qdisc *q = *old;
358 if (parent == NULL) {
359 if (q && q->flags&TCQ_F_INGRES) {
360 *old = dev_graft_qdisc(dev, q);
362 *old = dev_graft_qdisc(dev, new);
365 struct Qdisc_class_ops *cops = parent->ops->cl_ops;
370 unsigned long cl = cops->get(parent, classid);
372 err = cops->graft(parent, cl, new, old);
373 cops->put(parent, cl);
381 Allocate and initialize new qdisc.
383 Parameters are passed via opt.
386 static struct Qdisc *
387 qdisc_create(struct net_device *dev, u32 handle, struct rtattr **tca, int *errp)
390 struct rtattr *kind = tca[TCA_KIND-1];
391 struct Qdisc *sch = NULL;
392 struct Qdisc_ops *ops;
395 ops = qdisc_lookup_ops(kind);
397 if (ops==NULL && tca[TCA_KIND-1] != NULL) {
398 char module_name[4 + IFNAMSIZ + 1];
400 if (RTA_PAYLOAD(kind) <= IFNAMSIZ) {
401 sprintf(module_name, "sch_%s", (char*)RTA_DATA(kind));
402 request_module (module_name);
403 ops = qdisc_lookup_ops(kind);
412 size = sizeof(*sch) + ops->priv_size;
414 sch = kmalloc(size, GFP_KERNEL);
419 /* Grrr... Resolve race condition with module unload */
422 if (ops != qdisc_lookup_ops(kind))
425 memset(sch, 0, size);
427 skb_queue_head_init(&sch->q);
429 if (handle == TC_H_INGRESS)
430 sch->flags |= TCQ_F_INGRES;
433 sch->enqueue = ops->enqueue;
434 sch->dequeue = ops->dequeue;
436 atomic_set(&sch->refcnt, 1);
437 sch->stats.lock = &dev->queue_lock;
439 handle = qdisc_alloc_handle(dev);
445 if (handle == TC_H_INGRESS)
446 sch->handle =TC_H_MAKE(TC_H_INGRESS, 0);
448 sch->handle = handle;
450 if (!ops->init || (err = ops->init(sch, tca[TCA_OPTIONS-1])) == 0) {
451 write_lock(&qdisc_tree_lock);
452 sch->next = dev->qdisc_list;
453 dev->qdisc_list = sch;
454 write_unlock(&qdisc_tree_lock);
455 #ifdef CONFIG_NET_ESTIMATOR
457 qdisc_new_estimator(&sch->stats, tca[TCA_RATE-1]);
469 static int qdisc_change(struct Qdisc *sch, struct rtattr **tca)
471 if (tca[TCA_OPTIONS-1]) {
474 if (sch->ops->change == NULL)
476 err = sch->ops->change(sch, tca[TCA_OPTIONS-1]);
480 #ifdef CONFIG_NET_ESTIMATOR
481 if (tca[TCA_RATE-1]) {
482 qdisc_kill_estimator(&sch->stats);
483 qdisc_new_estimator(&sch->stats, tca[TCA_RATE-1]);
489 struct check_loop_arg
491 struct qdisc_walker w;
496 static int check_loop_fn(struct Qdisc *q, unsigned long cl, struct qdisc_walker *w);
498 static int check_loop(struct Qdisc *q, struct Qdisc *p, int depth)
500 struct check_loop_arg arg;
502 if (q->ops->cl_ops == NULL)
505 arg.w.stop = arg.w.skip = arg.w.count = 0;
506 arg.w.fn = check_loop_fn;
509 q->ops->cl_ops->walk(q, &arg.w);
510 return arg.w.stop ? -ELOOP : 0;
514 check_loop_fn(struct Qdisc *q, unsigned long cl, struct qdisc_walker *w)
517 struct Qdisc_class_ops *cops = q->ops->cl_ops;
518 struct check_loop_arg *arg = (struct check_loop_arg *)w;
520 leaf = cops->leaf(q, cl);
522 if (leaf == arg->p || arg->depth > 7)
524 return check_loop(leaf, arg->p, arg->depth + 1);
533 static int tc_get_qdisc(struct sk_buff *skb, struct nlmsghdr *n, void *arg)
535 struct tcmsg *tcm = NLMSG_DATA(n);
536 struct rtattr **tca = arg;
537 struct net_device *dev;
538 u32 clid = tcm->tcm_parent;
539 struct Qdisc *q = NULL;
540 struct Qdisc *p = NULL;
543 if ((dev = __dev_get_by_index(tcm->tcm_ifindex)) == NULL)
547 if (clid != TC_H_ROOT) {
548 if (TC_H_MAJ(clid) != TC_H_MAJ(TC_H_INGRESS)) {
549 if ((p = qdisc_lookup(dev, TC_H_MAJ(clid))) == NULL)
551 q = qdisc_leaf(p, clid);
552 } else { /* ingress */
553 q = dev->qdisc_ingress;
556 q = dev->qdisc_sleeping;
561 if (tcm->tcm_handle && q->handle != tcm->tcm_handle)
564 if ((q = qdisc_lookup(dev, tcm->tcm_handle)) == NULL)
568 if (tca[TCA_KIND-1] && rtattr_strcmp(tca[TCA_KIND-1], q->ops->id))
571 if (n->nlmsg_type == RTM_DELQDISC) {
576 if ((err = qdisc_graft(dev, p, clid, NULL, &q)) != 0)
579 qdisc_notify(skb, n, clid, q, NULL);
580 spin_lock_bh(&dev->queue_lock);
582 spin_unlock_bh(&dev->queue_lock);
585 qdisc_notify(skb, n, clid, NULL, q);
594 static int tc_modify_qdisc(struct sk_buff *skb, struct nlmsghdr *n, void *arg)
596 struct tcmsg *tcm = NLMSG_DATA(n);
597 struct rtattr **tca = arg;
598 struct net_device *dev;
599 u32 clid = tcm->tcm_parent;
600 struct Qdisc *q = NULL;
601 struct Qdisc *p = NULL;
604 if ((dev = __dev_get_by_index(tcm->tcm_ifindex)) == NULL)
608 if (clid != TC_H_ROOT) {
609 if (clid != TC_H_INGRESS) {
610 if ((p = qdisc_lookup(dev, TC_H_MAJ(clid))) == NULL)
612 q = qdisc_leaf(p, clid);
613 } else { /*ingress */
614 q = dev->qdisc_ingress;
617 q = dev->qdisc_sleeping;
620 /* It may be default qdisc, ignore it */
621 if (q && q->handle == 0)
624 if (!q || !tcm->tcm_handle || q->handle != tcm->tcm_handle) {
625 if (tcm->tcm_handle) {
626 if (q && !(n->nlmsg_flags&NLM_F_REPLACE))
628 if (TC_H_MIN(tcm->tcm_handle))
630 if ((q = qdisc_lookup(dev, tcm->tcm_handle)) == NULL)
632 if (n->nlmsg_flags&NLM_F_EXCL)
634 if (tca[TCA_KIND-1] && rtattr_strcmp(tca[TCA_KIND-1], q->ops->id))
637 (p && check_loop(q, p, 0)))
639 atomic_inc(&q->refcnt);
645 /* This magic test requires explanation.
647 * We know, that some child q is already
648 * attached to this parent and have choice:
649 * either to change it or to create/graft new one.
651 * 1. We are allowed to create/graft only
652 * if CREATE and REPLACE flags are set.
654 * 2. If EXCL is set, requestor wanted to say,
655 * that qdisc tcm_handle is not expected
656 * to exist, so that we choose create/graft too.
658 * 3. The last case is when no flags are set.
659 * Alas, it is sort of hole in API, we
660 * cannot decide what to do unambiguously.
661 * For now we select create/graft, if
662 * user gave KIND, which does not match existing.
664 if ((n->nlmsg_flags&NLM_F_CREATE) &&
665 (n->nlmsg_flags&NLM_F_REPLACE) &&
666 ((n->nlmsg_flags&NLM_F_EXCL) ||
668 rtattr_strcmp(tca[TCA_KIND-1], q->ops->id))))
673 if (!tcm->tcm_handle)
675 q = qdisc_lookup(dev, tcm->tcm_handle);
678 /* Change qdisc parameters */
681 if (n->nlmsg_flags&NLM_F_EXCL)
683 if (tca[TCA_KIND-1] && rtattr_strcmp(tca[TCA_KIND-1], q->ops->id))
685 err = qdisc_change(q, tca);
687 qdisc_notify(skb, n, clid, NULL, q);
691 if (!(n->nlmsg_flags&NLM_F_CREATE))
693 if (clid == TC_H_INGRESS)
694 q = qdisc_create(dev, tcm->tcm_parent, tca, &err);
696 q = qdisc_create(dev, tcm->tcm_handle, tca, &err);
702 struct Qdisc *old_q = NULL;
703 err = qdisc_graft(dev, p, clid, q, &old_q);
706 spin_lock_bh(&dev->queue_lock);
708 spin_unlock_bh(&dev->queue_lock);
712 qdisc_notify(skb, n, clid, old_q, q);
714 spin_lock_bh(&dev->queue_lock);
715 qdisc_destroy(old_q);
716 spin_unlock_bh(&dev->queue_lock);
722 int qdisc_copy_stats(struct sk_buff *skb, struct tc_stats *st)
724 spin_lock_bh(st->lock);
725 RTA_PUT(skb, TCA_STATS, (char*)&st->lock - (char*)st, st);
726 spin_unlock_bh(st->lock);
730 spin_unlock_bh(st->lock);
735 static int tc_fill_qdisc(struct sk_buff *skb, struct Qdisc *q, u32 clid,
736 u32 pid, u32 seq, unsigned flags, int event)
739 struct nlmsghdr *nlh;
740 unsigned char *b = skb->tail;
742 nlh = NLMSG_PUT(skb, pid, seq, event, sizeof(*tcm));
743 nlh->nlmsg_flags = flags;
744 tcm = NLMSG_DATA(nlh);
745 tcm->tcm_family = AF_UNSPEC;
746 tcm->tcm_ifindex = q->dev ? q->dev->ifindex : 0;
747 tcm->tcm_parent = clid;
748 tcm->tcm_handle = q->handle;
749 tcm->tcm_info = atomic_read(&q->refcnt);
750 RTA_PUT(skb, TCA_KIND, IFNAMSIZ, q->ops->id);
751 if (q->ops->dump && q->ops->dump(q, skb) < 0)
753 q->stats.qlen = q->q.qlen;
754 if (qdisc_copy_stats(skb, &q->stats))
756 nlh->nlmsg_len = skb->tail - b;
761 skb_trim(skb, b - skb->data);
765 static int qdisc_notify(struct sk_buff *oskb, struct nlmsghdr *n,
766 u32 clid, struct Qdisc *old, struct Qdisc *new)
769 u32 pid = oskb ? NETLINK_CB(oskb).pid : 0;
771 skb = alloc_skb(NLMSG_GOODSIZE, GFP_KERNEL);
775 if (old && old->handle) {
776 if (tc_fill_qdisc(skb, old, clid, pid, n->nlmsg_seq, 0, RTM_DELQDISC) < 0)
780 if (tc_fill_qdisc(skb, new, clid, pid, n->nlmsg_seq, old ? NLM_F_REPLACE : 0, RTM_NEWQDISC) < 0)
785 return rtnetlink_send(skb, pid, RTMGRP_TC, n->nlmsg_flags&NLM_F_ECHO);
792 static int tc_dump_qdisc(struct sk_buff *skb, struct netlink_callback *cb)
796 struct net_device *dev;
800 s_q_idx = q_idx = cb->args[1];
801 read_lock(&dev_base_lock);
802 for (dev=dev_base, idx=0; dev; dev = dev->next, idx++) {
807 read_lock(&qdisc_tree_lock);
808 for (q = dev->qdisc_list, q_idx = 0; q;
809 q = q->next, q_idx++) {
812 if (tc_fill_qdisc(skb, q, 0, NETLINK_CB(cb->skb).pid,
813 cb->nlh->nlmsg_seq, NLM_F_MULTI, RTM_NEWQDISC) <= 0) {
814 read_unlock(&qdisc_tree_lock);
818 read_unlock(&qdisc_tree_lock);
822 read_unlock(&dev_base_lock);
832 /************************************************
833 * Traffic classes manipulation. *
834 ************************************************/
838 static int tc_ctl_tclass(struct sk_buff *skb, struct nlmsghdr *n, void *arg)
840 struct tcmsg *tcm = NLMSG_DATA(n);
841 struct rtattr **tca = arg;
842 struct net_device *dev;
843 struct Qdisc *q = NULL;
844 struct Qdisc_class_ops *cops;
845 unsigned long cl = 0;
846 unsigned long new_cl;
847 u32 pid = tcm->tcm_parent;
848 u32 clid = tcm->tcm_handle;
849 u32 qid = TC_H_MAJ(clid);
852 if ((dev = __dev_get_by_index(tcm->tcm_ifindex)) == NULL)
856 parent == TC_H_UNSPEC - unspecified parent.
857 parent == TC_H_ROOT - class is root, which has no parent.
858 parent == X:0 - parent is root class.
859 parent == X:Y - parent is a node in hierarchy.
860 parent == 0:Y - parent is X:Y, where X:0 is qdisc.
862 handle == 0:0 - generate handle from kernel pool.
863 handle == 0:Y - class is X:Y, where X:0 is qdisc.
864 handle == X:Y - clear.
865 handle == X:0 - root class.
868 /* Step 1. Determine qdisc handle X:0 */
870 if (pid != TC_H_ROOT) {
871 u32 qid1 = TC_H_MAJ(pid);
874 /* If both majors are known, they must be identical. */
880 qid = dev->qdisc_sleeping->handle;
882 /* Now qid is genuine qdisc handle consistent
883 both with parent and child.
885 TC_H_MAJ(pid) still may be unspecified, complete it now.
888 pid = TC_H_MAKE(qid, pid);
891 qid = dev->qdisc_sleeping->handle;
894 /* OK. Locate qdisc */
895 if ((q = qdisc_lookup(dev, qid)) == NULL)
898 /* An check that it supports classes */
899 cops = q->ops->cl_ops;
903 /* Now try to get class */
905 if (pid == TC_H_ROOT)
908 clid = TC_H_MAKE(qid, clid);
911 cl = cops->get(q, clid);
915 if (n->nlmsg_type != RTM_NEWTCLASS || !(n->nlmsg_flags&NLM_F_CREATE))
918 switch (n->nlmsg_type) {
921 if (n->nlmsg_flags&NLM_F_EXCL)
925 err = cops->delete(q, cl);
927 tclass_notify(skb, n, q, cl, RTM_DELTCLASS);
930 err = tclass_notify(skb, n, q, cl, RTM_NEWTCLASS);
939 err = cops->change(q, clid, pid, tca, &new_cl);
941 tclass_notify(skb, n, q, new_cl, RTM_NEWTCLASS);
951 static int tc_fill_tclass(struct sk_buff *skb, struct Qdisc *q,
953 u32 pid, u32 seq, unsigned flags, int event)
956 struct nlmsghdr *nlh;
957 unsigned char *b = skb->tail;
959 nlh = NLMSG_PUT(skb, pid, seq, event, sizeof(*tcm));
960 nlh->nlmsg_flags = flags;
961 tcm = NLMSG_DATA(nlh);
962 tcm->tcm_family = AF_UNSPEC;
963 tcm->tcm_ifindex = q->dev ? q->dev->ifindex : 0;
964 tcm->tcm_parent = q->handle;
965 tcm->tcm_handle = q->handle;
967 RTA_PUT(skb, TCA_KIND, IFNAMSIZ, q->ops->id);
968 if (q->ops->cl_ops->dump && q->ops->cl_ops->dump(q, cl, skb, tcm) < 0)
970 nlh->nlmsg_len = skb->tail - b;
975 skb_trim(skb, b - skb->data);
979 static int tclass_notify(struct sk_buff *oskb, struct nlmsghdr *n,
980 struct Qdisc *q, unsigned long cl, int event)
983 u32 pid = oskb ? NETLINK_CB(oskb).pid : 0;
985 skb = alloc_skb(NLMSG_GOODSIZE, GFP_KERNEL);
989 if (tc_fill_tclass(skb, q, cl, pid, n->nlmsg_seq, 0, event) < 0) {
994 return rtnetlink_send(skb, pid, RTMGRP_TC, n->nlmsg_flags&NLM_F_ECHO);
997 struct qdisc_dump_args
999 struct qdisc_walker w;
1000 struct sk_buff *skb;
1001 struct netlink_callback *cb;
1004 static int qdisc_class_dump(struct Qdisc *q, unsigned long cl, struct qdisc_walker *arg)
1006 struct qdisc_dump_args *a = (struct qdisc_dump_args *)arg;
1008 return tc_fill_tclass(a->skb, q, cl, NETLINK_CB(a->cb->skb).pid,
1009 a->cb->nlh->nlmsg_seq, NLM_F_MULTI, RTM_NEWTCLASS);
1012 static int tc_dump_tclass(struct sk_buff *skb, struct netlink_callback *cb)
1016 struct net_device *dev;
1018 struct tcmsg *tcm = (struct tcmsg*)NLMSG_DATA(cb->nlh);
1019 struct qdisc_dump_args arg;
1021 if (cb->nlh->nlmsg_len < NLMSG_LENGTH(sizeof(*tcm)))
1023 if ((dev = dev_get_by_index(tcm->tcm_ifindex)) == NULL)
1028 read_lock(&qdisc_tree_lock);
1029 for (q=dev->qdisc_list, t=0; q; q = q->next, t++) {
1030 if (t < s_t) continue;
1031 if (!q->ops->cl_ops) continue;
1032 if (tcm->tcm_parent && TC_H_MAJ(tcm->tcm_parent) != q->handle)
1035 memset(&cb->args[1], 0, sizeof(cb->args)-sizeof(cb->args[0]));
1036 arg.w.fn = qdisc_class_dump;
1040 arg.w.skip = cb->args[1];
1042 q->ops->cl_ops->walk(q, &arg.w);
1043 cb->args[1] = arg.w.count;
1047 read_unlock(&qdisc_tree_lock);
1055 int psched_us_per_tick = 1;
1056 int psched_tick_per_us = 1;
1058 #ifdef CONFIG_PROC_FS
1059 static int psched_read_proc(char *buffer, char **start, off_t offset,
1060 int length, int *eof, void *data)
1064 len = sprintf(buffer, "%08x %08x %08x %08x\n",
1065 psched_tick_per_us, psched_us_per_tick,
1075 *start = buffer + offset;
1082 #if PSCHED_CLOCK_SOURCE == PSCHED_GETTIMEOFDAY
1083 int psched_tod_diff(int delta_sec, int bound)
1087 if (bound <= 1000000 || delta_sec > (0x7FFFFFFF/1000000)-1)
1089 delta = delta_sec * 1000000;
1096 psched_time_t psched_time_base;
1098 #if PSCHED_CLOCK_SOURCE == PSCHED_CPU
1099 psched_tdiff_t psched_clock_per_hz;
1100 int psched_clock_scale;
1103 #ifdef PSCHED_WATCHER
1104 PSCHED_WATCHER psched_time_mark;
1106 static void psched_tick(unsigned long);
1108 static struct timer_list psched_timer =
1109 { function: psched_tick };
1111 static void psched_tick(unsigned long dummy)
1113 #if PSCHED_CLOCK_SOURCE == PSCHED_CPU
1114 psched_time_t dummy_stamp;
1115 PSCHED_GET_TIME(dummy_stamp);
1116 /* It is OK up to 4GHz cpu */
1117 psched_timer.expires = jiffies + 1*HZ;
1119 unsigned long now = jiffies;
1120 psched_time_base += ((u64)(now-psched_time_mark))<<PSCHED_JSCALE;
1121 psched_time_mark = now;
1122 psched_timer.expires = now + 60*60*HZ;
1124 add_timer(&psched_timer);
1128 #if PSCHED_CLOCK_SOURCE == PSCHED_CPU
1129 int __init psched_calibrate_clock(void)
1131 psched_time_t stamp, stamp1;
1132 struct timeval tv, tv1;
1133 psched_tdiff_t delay;
1137 #ifdef PSCHED_WATCHER
1140 stop = jiffies + HZ/10;
1141 PSCHED_GET_TIME(stamp);
1142 do_gettimeofday(&tv);
1143 while (time_before(jiffies, stop)) {
1147 PSCHED_GET_TIME(stamp1);
1148 do_gettimeofday(&tv1);
1150 delay = PSCHED_TDIFF(stamp1, stamp);
1151 rdelay = tv1.tv_usec - tv.tv_usec;
1152 rdelay += (tv1.tv_sec - tv.tv_sec)*1000000;
1156 psched_tick_per_us = delay;
1157 while ((delay>>=1) != 0)
1158 psched_clock_scale++;
1159 psched_us_per_tick = 1<<psched_clock_scale;
1160 psched_clock_per_hz = (psched_tick_per_us*(1000000/HZ))>>psched_clock_scale;
1165 int __init pktsched_init(void)
1167 struct rtnetlink_link *link_p;
1169 #if PSCHED_CLOCK_SOURCE == PSCHED_CPU
1170 if (psched_calibrate_clock() < 0)
1172 #elif PSCHED_CLOCK_SOURCE == PSCHED_JIFFIES
1173 psched_tick_per_us = HZ<<PSCHED_JSCALE;
1174 psched_us_per_tick = 1000000;
1175 #ifdef PSCHED_WATCHER
1180 link_p = rtnetlink_links[PF_UNSPEC];
1182 /* Setup rtnetlink links. It is made here to avoid
1183 exporting large number of public symbols.
1187 link_p[RTM_NEWQDISC-RTM_BASE].doit = tc_modify_qdisc;
1188 link_p[RTM_DELQDISC-RTM_BASE].doit = tc_get_qdisc;
1189 link_p[RTM_GETQDISC-RTM_BASE].doit = tc_get_qdisc;
1190 link_p[RTM_GETQDISC-RTM_BASE].dumpit = tc_dump_qdisc;
1191 link_p[RTM_NEWTCLASS-RTM_BASE].doit = tc_ctl_tclass;
1192 link_p[RTM_DELTCLASS-RTM_BASE].doit = tc_ctl_tclass;
1193 link_p[RTM_GETTCLASS-RTM_BASE].doit = tc_ctl_tclass;
1194 link_p[RTM_GETTCLASS-RTM_BASE].dumpit = tc_dump_tclass;
1197 #define INIT_QDISC(name) { \
1198 extern struct Qdisc_ops name##_qdisc_ops; \
1199 register_qdisc(& name##_qdisc_ops); \
1205 #ifdef CONFIG_NET_SCH_CBQ
1208 #ifdef CONFIG_NET_SCH_HTB
1211 #ifdef CONFIG_NET_SCH_CSZ
1214 #ifdef CONFIG_NET_SCH_HPFQ
1217 #ifdef CONFIG_NET_SCH_HFSC
1220 #ifdef CONFIG_NET_SCH_RED
1223 #ifdef CONFIG_NET_SCH_GRED
1226 #ifdef CONFIG_NET_SCH_INGRESS
1227 INIT_QDISC(ingress);
1229 #ifdef CONFIG_NET_SCH_DSMARK
1232 #ifdef CONFIG_NET_SCH_SFQ
1235 #ifdef CONFIG_NET_SCH_TBF
1238 #ifdef CONFIG_NET_SCH_TEQL
1241 #ifdef CONFIG_NET_SCH_PRIO
1244 #ifdef CONFIG_NET_SCH_ATM
1247 #ifdef CONFIG_NET_CLS
1251 #ifdef CONFIG_PROC_FS
1252 create_proc_read_entry("net/psched", 0, 0, psched_read_proc, NULL);