2 * net/sched/sch_cbq.c Class-Based Queueing discipline.
4 * This program is free software; you can redistribute it and/or
5 * modify it under the terms of the GNU General Public License
6 * as published by the Free Software Foundation; either version
7 * 2 of the License, or (at your option) any later version.
9 * Authors: Alexey Kuznetsov, <kuznet@ms2.inr.ac.ru>
13 #include <linux/config.h>
14 #include <linux/module.h>
15 #include <asm/uaccess.h>
16 #include <asm/system.h>
17 #include <asm/bitops.h>
18 #include <linux/types.h>
19 #include <linux/kernel.h>
20 #include <linux/sched.h>
21 #include <linux/string.h>
23 #include <linux/socket.h>
24 #include <linux/sockios.h>
26 #include <linux/errno.h>
27 #include <linux/interrupt.h>
28 #include <linux/if_ether.h>
29 #include <linux/inet.h>
30 #include <linux/netdevice.h>
31 #include <linux/etherdevice.h>
32 #include <linux/notifier.h>
34 #include <net/route.h>
35 #include <linux/skbuff.h>
37 #include <net/pkt_sched.h>
40 /* Class-Based Queueing (CBQ) algorithm.
41 =======================================
43 Sources: [1] Sally Floyd and Van Jacobson, "Link-sharing and Resource
44 Management Models for Packet Networks",
45 IEEE/ACM Transactions on Networking, Vol.3, No.4, 1995
47 [2] Sally Floyd, "Notes on CBQ and Guaranted Service", 1995
49 [3] Sally Floyd, "Notes on Class-Based Queueing: Setting
52 [4] Sally Floyd and Michael Speer, "Experimental Results
53 for Class-Based Queueing", 1998, not published.
55 -----------------------------------------------------------------------
57 Algorithm skeleton was taken from NS simulator cbq.cc.
58 If someone wants to check this code against the LBL version,
59 he should take into account that ONLY the skeleton was borrowed,
60 the implementation is different. Particularly:
62 --- The WRR algorithm is different. Our version looks more
63 reasonable (I hope) and works when quanta are allowed to be
64 less than MTU, which is always the case when real time classes
65 have small rates. Note, that the statement of [3] is
66 incomplete, delay may actually be estimated even if class
67 per-round allotment is less than MTU. Namely, if per-round
68 allotment is W*r_i, and r_1+...+r_k = r < 1
70 delay_i <= ([MTU/(W*r_i)]*W*r + W*r + k*MTU)/B
72 In the worst case we have IntServ estimate with D = W*r+k*MTU
73 and C = MTU*r. The proof (if correct at all) is trivial.
76 --- It seems that cbq-2.0 is not very accurate. At least, I cannot
77 interpret some places, which look like wrong translations
78 from NS. Anyone is advised to find these differences
79 and explain to me, why I am wrong 8).
81 --- Linux has no EOI event, so that we cannot estimate true class
82 idle time. Workaround is to consider the next dequeue event
83 as sign that previous packet is finished. This is wrong because of
84 internal device queueing, but on a permanently loaded link it is true.
85 Moreover, combined with clock integrator, this scheme looks
86 very close to an ideal solution. */
88 struct cbq_sched_data;
93 struct cbq_class *next; /* hash table link */
94 struct cbq_class *next_alive; /* next class with backlog in this priority band */
98 unsigned char priority; /* class priority */
99 unsigned char priority2; /* priority to be used after overlimit */
100 unsigned char ewma_log; /* time constant for idle time calculation */
101 unsigned char ovl_strategy;
102 #ifdef CONFIG_NET_CLS_POLICE
103 unsigned char police;
108 /* Link-sharing scheduler parameters */
109 long maxidle; /* Class paramters: see below. */
113 struct qdisc_rate_table *R_tab;
115 /* Overlimit strategy parameters */
116 void (*overlimit)(struct cbq_class *cl);
119 /* General scheduler (WRR) parameters */
121 long quantum; /* Allotment per WRR round */
122 long weight; /* Relative allotment: see below */
124 struct Qdisc *qdisc; /* Ptr to CBQ discipline */
125 struct cbq_class *split; /* Ptr to split node */
126 struct cbq_class *share; /* Ptr to LS parent in the class tree */
127 struct cbq_class *tparent; /* Ptr to tree parent in the class tree */
128 struct cbq_class *borrow; /* NULL if class is bandwidth limited;
130 struct cbq_class *sibling; /* Sibling chain */
131 struct cbq_class *children; /* Pointer to children chain */
133 struct Qdisc *q; /* Elementary queueing discipline */
137 unsigned char cpriority; /* Effective priority */
138 unsigned char delayed;
139 unsigned char level; /* level of the class in hierarchy:
140 0 for leaf classes, and maximal
141 level of children + 1 for nodes.
144 psched_time_t last; /* Last end of service */
145 psched_time_t undertime;
147 long deficit; /* Saved deficit for WRR */
148 unsigned long penalized;
149 struct tc_stats stats;
150 struct tc_cbq_xstats xstats;
152 struct tcf_proto *filter_list;
157 struct cbq_class *defaults[TC_PRIO_MAX+1];
160 struct cbq_sched_data
162 struct cbq_class *classes[16]; /* Hash table of all classes */
163 int nclasses[TC_CBQ_MAXPRIO+1];
164 unsigned quanta[TC_CBQ_MAXPRIO+1];
166 struct cbq_class link;
169 struct cbq_class *active[TC_CBQ_MAXPRIO+1]; /* List of all classes
172 #ifdef CONFIG_NET_CLS_POLICE
173 struct cbq_class *rx_class;
175 struct cbq_class *tx_class;
176 struct cbq_class *tx_borrowed;
178 psched_time_t now; /* Cached timestamp */
179 psched_time_t now_rt; /* Cached real time */
182 struct timer_list delay_timer;
183 struct timer_list wd_timer; /* Watchdog timer,
193 #define L2T(cl,len) ((cl)->R_tab->data[(len)>>(cl)->R_tab->rate.cell_log])
196 static __inline__ unsigned cbq_hash(u32 h)
203 static __inline__ struct cbq_class *
204 cbq_class_lookup(struct cbq_sched_data *q, u32 classid)
206 struct cbq_class *cl;
208 for (cl = q->classes[cbq_hash(classid)]; cl; cl = cl->next)
209 if (cl->classid == classid)
214 #ifdef CONFIG_NET_CLS_POLICE
216 static struct cbq_class *
217 cbq_reclassify(struct sk_buff *skb, struct cbq_class *this)
219 struct cbq_class *cl, *new;
221 for (cl = this->tparent; cl; cl = cl->tparent)
222 if ((new = cl->defaults[TC_PRIO_BESTEFFORT]) != NULL && new != this)
230 /* Classify packet. The procedure is pretty complicated, but
231 it allows us to combine link sharing and priority scheduling
234 Namely, you can put link sharing rules (f.e. route based) at root of CBQ,
235 so that it resolves to split nodes. Then packets are classified
236 by logical priority, or a more specific classifier may be attached
240 static struct cbq_class *
241 cbq_classify(struct sk_buff *skb, struct Qdisc *sch)
243 struct cbq_sched_data *q = (struct cbq_sched_data*)sch->data;
244 struct cbq_class *head = &q->link;
245 struct cbq_class **defmap;
246 struct cbq_class *cl = NULL;
247 u32 prio = skb->priority;
248 struct tcf_result res;
251 * Step 1. If skb->priority points to one of our classes, use it.
253 if (TC_H_MAJ(prio^sch->handle) == 0 &&
254 (cl = cbq_class_lookup(q, prio)) != NULL)
260 defmap = head->defaults;
263 * Step 2+n. Apply classifier.
265 if (!head->filter_list || (result = tc_classify(skb, head->filter_list, &res)) < 0)
268 if ((cl = (void*)res.class) == NULL) {
269 if (TC_H_MAJ(res.classid))
270 cl = cbq_class_lookup(q, res.classid);
271 else if ((cl = defmap[res.classid&TC_PRIO_MAX]) == NULL)
272 cl = defmap[TC_PRIO_BESTEFFORT];
274 if (cl == NULL || cl->level >= head->level)
278 #ifdef CONFIG_NET_CLS_POLICE
280 case TC_POLICE_RECLASSIFY:
281 return cbq_reclassify(skb, cl);
292 * Step 3+n. If classifier selected a link sharing class,
293 * apply agency specific classifier.
294 * Repeat this procdure until we hit a leaf node.
303 * Step 4. No success...
305 if (TC_H_MAJ(prio) == 0 &&
306 !(cl = head->defaults[prio&TC_PRIO_MAX]) &&
307 !(cl = head->defaults[TC_PRIO_BESTEFFORT]))
314 A packet has just been enqueued on the empty class.
315 cbq_activate_class adds it to the tail of active class list
316 of its priority band.
319 static __inline__ void cbq_activate_class(struct cbq_class *cl)
321 struct cbq_sched_data *q = (struct cbq_sched_data*)cl->qdisc->data;
322 int prio = cl->cpriority;
323 struct cbq_class *cl_tail;
325 cl_tail = q->active[prio];
326 q->active[prio] = cl;
328 if (cl_tail != NULL) {
329 cl->next_alive = cl_tail->next_alive;
330 cl_tail->next_alive = cl;
333 q->activemask |= (1<<prio);
338 Unlink class from active chain.
339 Note that this same procedure is done directly in cbq_dequeue*
340 during round-robin procedure.
343 static void cbq_deactivate_class(struct cbq_class *this)
345 struct cbq_sched_data *q = (struct cbq_sched_data*)this->qdisc->data;
346 int prio = this->cpriority;
347 struct cbq_class *cl;
348 struct cbq_class *cl_prev = q->active[prio];
351 cl = cl_prev->next_alive;
353 cl_prev->next_alive = cl->next_alive;
354 cl->next_alive = NULL;
356 if (cl == q->active[prio]) {
357 q->active[prio] = cl_prev;
358 if (cl == q->active[prio]) {
359 q->active[prio] = NULL;
360 q->activemask &= ~(1<<prio);
365 cl = cl_prev->next_alive;
368 } while ((cl_prev = cl) != q->active[prio]);
372 cbq_mark_toplevel(struct cbq_sched_data *q, struct cbq_class *cl)
374 int toplevel = q->toplevel;
376 if (toplevel > cl->level && !(cl->q->flags&TCQ_F_THROTTLED)) {
380 PSCHED_GET_TIME(now);
381 incr = PSCHED_TDIFF(now, q->now_rt);
382 PSCHED_TADD2(q->now, incr, now);
385 if (PSCHED_TLESS(cl->undertime, now)) {
386 q->toplevel = cl->level;
389 } while ((cl=cl->borrow) != NULL && toplevel > cl->level);
394 cbq_enqueue(struct sk_buff *skb, struct Qdisc *sch)
396 struct cbq_sched_data *q = (struct cbq_sched_data *)sch->data;
397 struct cbq_class *cl = cbq_classify(skb, sch);
399 int ret = NET_XMIT_POLICED;
401 #ifdef CONFIG_NET_CLS_POLICE
405 #ifdef CONFIG_NET_CLS_POLICE
406 cl->q->__parent = sch;
408 if ((ret = cl->q->enqueue(skb, cl->q)) == 0) {
410 sch->stats.packets++;
411 sch->stats.bytes+=len;
412 cbq_mark_toplevel(q, cl);
414 cbq_activate_class(cl);
423 cbq_mark_toplevel(q, cl);
430 cbq_requeue(struct sk_buff *skb, struct Qdisc *sch)
432 struct cbq_sched_data *q = (struct cbq_sched_data *)sch->data;
433 struct cbq_class *cl;
436 if ((cl = q->tx_class) == NULL) {
443 cbq_mark_toplevel(q, cl);
445 #ifdef CONFIG_NET_CLS_POLICE
447 cl->q->__parent = sch;
449 if ((ret = cl->q->ops->requeue(skb, cl->q)) == 0) {
452 cbq_activate_class(cl);
460 /* Overlimit actions */
462 /* TC_CBQ_OVL_CLASSIC: (default) penalize leaf class by adding offtime */
464 static void cbq_ovl_classic(struct cbq_class *cl)
466 struct cbq_sched_data *q = (struct cbq_sched_data *)cl->qdisc->data;
467 psched_tdiff_t delay = PSCHED_TDIFF(cl->undertime, q->now);
470 delay += cl->offtime;
473 Class goes to sleep, so that it will have no
474 chance to work avgidle. Let's forgive it 8)
476 BTW cbq-2.0 has a crap in this
477 place, apparently they forgot to shift it by cl->ewma_log.
480 delay -= (-cl->avgidle) - ((-cl->avgidle) >> cl->ewma_log);
481 if (cl->avgidle < cl->minidle)
482 cl->avgidle = cl->minidle;
485 PSCHED_TADD2(q->now, delay, cl->undertime);
487 cl->xstats.overactions++;
490 if (q->wd_expires == 0 || q->wd_expires > delay)
491 q->wd_expires = delay;
493 /* Dirty work! We must schedule wakeups based on
494 real available rate, rather than leaf rate,
495 which may be tiny (even zero).
497 if (q->toplevel == TC_CBQ_MAXLEVEL) {
499 psched_tdiff_t base_delay = q->wd_expires;
501 for (b = cl->borrow; b; b = b->borrow) {
502 delay = PSCHED_TDIFF(b->undertime, q->now);
503 if (delay < base_delay) {
510 q->wd_expires = base_delay;
514 /* TC_CBQ_OVL_RCLASSIC: penalize by offtime classes in hierarchy, when
518 static void cbq_ovl_rclassic(struct cbq_class *cl)
520 struct cbq_sched_data *q = (struct cbq_sched_data *)cl->qdisc->data;
521 struct cbq_class *this = cl;
524 if (cl->level > q->toplevel) {
528 } while ((cl = cl->borrow) != NULL);
535 /* TC_CBQ_OVL_DELAY: delay until it will go to underlimit */
537 static void cbq_ovl_delay(struct cbq_class *cl)
539 struct cbq_sched_data *q = (struct cbq_sched_data *)cl->qdisc->data;
540 psched_tdiff_t delay = PSCHED_TDIFF(cl->undertime, q->now);
543 unsigned long sched = jiffies;
545 delay += cl->offtime;
547 delay -= (-cl->avgidle) - ((-cl->avgidle) >> cl->ewma_log);
548 if (cl->avgidle < cl->minidle)
549 cl->avgidle = cl->minidle;
550 PSCHED_TADD2(q->now, delay, cl->undertime);
553 sched += PSCHED_US2JIFFIE(delay) + cl->penalty;
554 cl->penalized = sched;
555 cl->cpriority = TC_CBQ_MAXPRIO;
556 q->pmask |= (1<<TC_CBQ_MAXPRIO);
557 if (del_timer(&q->delay_timer) &&
558 (long)(q->delay_timer.expires - sched) > 0)
559 q->delay_timer.expires = sched;
560 add_timer(&q->delay_timer);
562 cl->xstats.overactions++;
567 if (q->wd_expires == 0 || q->wd_expires > delay)
568 q->wd_expires = delay;
571 /* TC_CBQ_OVL_LOWPRIO: penalize class by lowering its priority band */
573 static void cbq_ovl_lowprio(struct cbq_class *cl)
575 struct cbq_sched_data *q = (struct cbq_sched_data*)cl->qdisc->data;
577 cl->penalized = jiffies + cl->penalty;
579 if (cl->cpriority != cl->priority2) {
580 cl->cpriority = cl->priority2;
581 q->pmask |= (1<<cl->cpriority);
582 cl->xstats.overactions++;
587 /* TC_CBQ_OVL_DROP: penalize class by dropping */
589 static void cbq_ovl_drop(struct cbq_class *cl)
591 if (cl->q->ops->drop)
592 if (cl->q->ops->drop(cl->q))
594 cl->xstats.overactions++;
598 static void cbq_watchdog(unsigned long arg)
600 struct Qdisc *sch = (struct Qdisc*)arg;
602 sch->flags &= ~TCQ_F_THROTTLED;
603 netif_schedule(sch->dev);
606 static unsigned long cbq_undelay_prio(struct cbq_sched_data *q, int prio)
608 struct cbq_class *cl;
609 struct cbq_class *cl_prev = q->active[prio];
610 unsigned long now = jiffies;
611 unsigned long sched = now;
617 cl = cl_prev->next_alive;
618 if ((long)(now - cl->penalized) > 0) {
619 cl_prev->next_alive = cl->next_alive;
620 cl->next_alive = NULL;
621 cl->cpriority = cl->priority;
623 cbq_activate_class(cl);
625 if (cl == q->active[prio]) {
626 q->active[prio] = cl_prev;
627 if (cl == q->active[prio]) {
628 q->active[prio] = NULL;
633 cl = cl_prev->next_alive;
634 } else if ((long)(sched - cl->penalized) > 0)
635 sched = cl->penalized;
636 } while ((cl_prev = cl) != q->active[prio]);
638 return (long)(sched - now);
641 static void cbq_undelay(unsigned long arg)
643 struct Qdisc *sch = (struct Qdisc*)arg;
644 struct cbq_sched_data *q = (struct cbq_sched_data*)sch->data;
652 int prio = ffz(~pmask);
657 tmp = cbq_undelay_prio(q, prio);
660 if (tmp < delay || delay == 0)
666 q->delay_timer.expires = jiffies + delay;
667 add_timer(&q->delay_timer);
670 sch->flags &= ~TCQ_F_THROTTLED;
671 netif_schedule(sch->dev);
675 #ifdef CONFIG_NET_CLS_POLICE
677 static int cbq_reshape_fail(struct sk_buff *skb, struct Qdisc *child)
680 struct Qdisc *sch = child->__parent;
681 struct cbq_sched_data *q = (struct cbq_sched_data *)sch->data;
682 struct cbq_class *cl = q->rx_class;
686 if (cl && (cl = cbq_reclassify(skb, cl)) != NULL) {
688 cbq_mark_toplevel(q, cl);
691 cl->q->__parent = sch;
693 if (cl->q->enqueue(skb, cl->q) == 0) {
695 sch->stats.packets++;
696 sch->stats.bytes+=len;
698 cbq_activate_class(cl);
711 It is mission critical procedure.
713 We "regenerate" toplevel cutoff, if transmitting class
714 has backlog and it is not regulated. It is not part of
715 original CBQ description, but looks more reasonable.
716 Probably, it is wrong. This question needs further investigation.
719 static __inline__ void
720 cbq_update_toplevel(struct cbq_sched_data *q, struct cbq_class *cl,
721 struct cbq_class *borrowed)
723 if (cl && q->toplevel >= borrowed->level) {
724 if (cl->q->q.qlen > 1) {
726 if (PSCHED_IS_PASTPERFECT(borrowed->undertime)) {
727 q->toplevel = borrowed->level;
730 } while ((borrowed=borrowed->borrow) != NULL);
733 /* It is not necessary now. Uncommenting it
734 will save CPU cycles, but decrease fairness.
736 q->toplevel = TC_CBQ_MAXLEVEL;
742 cbq_update(struct cbq_sched_data *q)
744 struct cbq_class *this = q->tx_class;
745 struct cbq_class *cl = this;
750 for ( ; cl; cl = cl->share) {
751 long avgidle = cl->avgidle;
755 cl->stats.bytes += len;
758 (now - last) is total time between packet right edges.
759 (last_pktlen/rate) is "virtual" busy time, so that
761 idle = (now - last) - last_pktlen/rate
764 idle = PSCHED_TDIFF(q->now, cl->last);
765 if ((unsigned long)idle > 128*1024*1024) {
766 avgidle = cl->maxidle;
768 idle -= L2T(cl, len);
770 /* true_avgidle := (1-W)*true_avgidle + W*idle,
771 where W=2^{-ewma_log}. But cl->avgidle is scaled:
772 cl->avgidle == true_avgidle/W,
775 avgidle += idle - (avgidle>>cl->ewma_log);
779 /* Overlimit or at-limit */
781 if (avgidle < cl->minidle)
782 avgidle = cl->minidle;
784 cl->avgidle = avgidle;
786 /* Calculate expected time, when this class
787 will be allowed to send.
789 (1-W)*true_avgidle + W*delay = 0, i.e.
790 idle = (1/W - 1)*(-true_avgidle)
792 idle = (1 - W)*(-cl->avgidle);
794 idle = (-avgidle) - ((-avgidle) >> cl->ewma_log);
798 To maintain the rate allocated to the class,
799 we add to undertime virtual clock,
800 necesary to complete transmitted packet.
801 (len/phys_bandwidth has been already passed
802 to the moment of cbq_update)
805 idle -= L2T(&q->link, len);
806 idle += L2T(cl, len);
808 PSCHED_AUDIT_TDIFF(idle);
810 PSCHED_TADD2(q->now, idle, cl->undertime);
814 PSCHED_SET_PASTPERFECT(cl->undertime);
815 if (avgidle > cl->maxidle)
816 cl->avgidle = cl->maxidle;
818 cl->avgidle = avgidle;
823 cbq_update_toplevel(q, this, q->tx_borrowed);
826 static __inline__ struct cbq_class *
827 cbq_under_limit(struct cbq_class *cl)
829 struct cbq_sched_data *q = (struct cbq_sched_data*)cl->qdisc->data;
830 struct cbq_class *this_cl = cl;
832 if (cl->tparent == NULL)
835 if (PSCHED_IS_PASTPERFECT(cl->undertime) ||
836 !PSCHED_TLESS(q->now, cl->undertime)) {
842 /* It is very suspicious place. Now overlimit
843 action is generated for not bounded classes
844 only if link is completely congested.
845 Though it is in agree with ancestor-only paradigm,
846 it looks very stupid. Particularly,
847 it means that this chunk of code will either
848 never be called or result in strong amplification
849 of burstiness. Dangerous, silly, and, however,
850 no another solution exists.
852 if ((cl = cl->borrow) == NULL) {
853 this_cl->stats.overlimits++;
854 this_cl->overlimit(this_cl);
857 if (cl->level > q->toplevel)
859 } while (!PSCHED_IS_PASTPERFECT(cl->undertime) &&
860 PSCHED_TLESS(q->now, cl->undertime));
866 static __inline__ struct sk_buff *
867 cbq_dequeue_prio(struct Qdisc *sch, int prio)
869 struct cbq_sched_data *q = (struct cbq_sched_data *)sch->data;
870 struct cbq_class *cl_tail, *cl_prev, *cl;
874 cl_tail = cl_prev = q->active[prio];
875 cl = cl_prev->next_alive;
882 struct cbq_class *borrow = cl;
885 (borrow = cbq_under_limit(cl)) == NULL)
888 if (cl->deficit <= 0) {
889 /* Class exhausted its allotment per
890 this round. Switch to the next one.
893 cl->deficit += cl->quantum;
897 skb = cl->q->dequeue(cl->q);
899 /* Class did not give us any skb :-(
900 It could occur even if cl->q->q.qlen != 0
901 f.e. if cl->q == "tbf"
906 cl->deficit -= skb->len;
908 q->tx_borrowed = borrow;
910 #ifndef CBQ_XSTATS_BORROWS_BYTES
911 borrow->xstats.borrows++;
912 cl->xstats.borrows++;
914 borrow->xstats.borrows += skb->len;
915 cl->xstats.borrows += skb->len;
918 q->tx_len = skb->len;
920 if (cl->deficit <= 0) {
921 q->active[prio] = cl;
923 cl->deficit += cl->quantum;
928 if (cl->q->q.qlen == 0 || prio != cl->cpriority) {
929 /* Class is empty or penalized.
930 Unlink it from active chain.
932 cl_prev->next_alive = cl->next_alive;
933 cl->next_alive = NULL;
935 /* Did cl_tail point to it? */
940 /* Was it the last class in this band? */
943 q->active[prio] = NULL;
944 q->activemask &= ~(1<<prio);
946 cbq_activate_class(cl);
950 q->active[prio] = cl_tail;
953 cbq_activate_class(cl);
961 } while (cl_prev != cl_tail);
964 q->active[prio] = cl_prev;
969 static __inline__ struct sk_buff *
970 cbq_dequeue_1(struct Qdisc *sch)
972 struct cbq_sched_data *q = (struct cbq_sched_data *)sch->data;
976 activemask = q->activemask&0xFF;
978 int prio = ffz(~activemask);
979 activemask &= ~(1<<prio);
980 skb = cbq_dequeue_prio(sch, prio);
987 static struct sk_buff *
988 cbq_dequeue(struct Qdisc *sch)
991 struct cbq_sched_data *q = (struct cbq_sched_data *)sch->data;
995 PSCHED_GET_TIME(now);
996 incr = PSCHED_TDIFF(now, q->now_rt);
999 psched_tdiff_t incr2;
1000 /* Time integrator. We calculate EOS time
1001 by adding expected packet transmittion time.
1002 If real time is greater, we warp artificial clock,
1005 cbq_time = max(real_time, work);
1007 incr2 = L2T(&q->link, q->tx_len);
1008 PSCHED_TADD(q->now, incr2);
1010 if ((incr -= incr2) < 0)
1013 PSCHED_TADD(q->now, incr);
1019 skb = cbq_dequeue_1(sch);
1022 sch->flags &= ~TCQ_F_THROTTLED;
1026 /* All the classes are overlimit.
1030 1. Scheduler is empty.
1031 2. Toplevel cutoff inhibited borrowing.
1032 3. Root class is overlimit.
1034 Reset 2d and 3d conditions and retry.
1036 Note, that NS and cbq-2.0 are buggy, peeking
1037 an arbitrary class is appropriate for ancestor-only
1038 sharing, but not for toplevel algorithm.
1040 Our version is better, but slower, because it requires
1041 two passes, but it is unavoidable with top-level sharing.
1044 if (q->toplevel == TC_CBQ_MAXLEVEL &&
1045 PSCHED_IS_PASTPERFECT(q->link.undertime))
1048 q->toplevel = TC_CBQ_MAXLEVEL;
1049 PSCHED_SET_PASTPERFECT(q->link.undertime);
1052 /* No packets in scheduler or nobody wants to give them to us :-(
1053 Sigh... start watchdog timer in the last case. */
1056 sch->stats.overlimits++;
1057 if (q->wd_expires && !netif_queue_stopped(sch->dev)) {
1058 long delay = PSCHED_US2JIFFIE(q->wd_expires);
1059 del_timer(&q->wd_timer);
1062 q->wd_timer.expires = jiffies + delay;
1063 add_timer(&q->wd_timer);
1064 sch->flags |= TCQ_F_THROTTLED;
1070 /* CBQ class maintanance routines */
1072 static void cbq_adjust_levels(struct cbq_class *this)
1079 struct cbq_class *cl;
1081 if ((cl = this->children) != NULL) {
1083 if (cl->level > level)
1085 } while ((cl = cl->sibling) != this->children);
1087 this->level = level+1;
1088 } while ((this = this->tparent) != NULL);
1091 static void cbq_normalize_quanta(struct cbq_sched_data *q, int prio)
1093 struct cbq_class *cl;
1096 if (q->quanta[prio] == 0)
1099 for (h=0; h<16; h++) {
1100 for (cl = q->classes[h]; cl; cl = cl->next) {
1101 /* BUGGGG... Beware! This expression suffer of
1102 arithmetic overflows!
1104 if (cl->priority == prio) {
1105 cl->quantum = (cl->weight*cl->allot*q->nclasses[prio])/
1108 if (cl->quantum <= 0 || cl->quantum>32*cl->qdisc->dev->mtu) {
1109 printk(KERN_WARNING "CBQ: class %08x has bad quantum==%ld, repaired.\n", cl->classid, cl->quantum);
1110 cl->quantum = cl->qdisc->dev->mtu/2 + 1;
1116 static void cbq_sync_defmap(struct cbq_class *cl)
1118 struct cbq_sched_data *q = (struct cbq_sched_data*)cl->qdisc->data;
1119 struct cbq_class *split = cl->split;
1126 for (i=0; i<=TC_PRIO_MAX; i++) {
1127 if (split->defaults[i] == cl && !(cl->defmap&(1<<i)))
1128 split->defaults[i] = NULL;
1131 for (i=0; i<=TC_PRIO_MAX; i++) {
1132 int level = split->level;
1134 if (split->defaults[i])
1137 for (h=0; h<16; h++) {
1138 struct cbq_class *c;
1140 for (c = q->classes[h]; c; c = c->next) {
1141 if (c->split == split && c->level < level &&
1143 split->defaults[i] = c;
1151 static void cbq_change_defmap(struct cbq_class *cl, u32 splitid, u32 def, u32 mask)
1153 struct cbq_class *split = NULL;
1156 if ((split = cl->split) == NULL)
1158 splitid = split->classid;
1161 if (split == NULL || split->classid != splitid) {
1162 for (split = cl->tparent; split; split = split->tparent)
1163 if (split->classid == splitid)
1170 if (cl->split != split) {
1172 cbq_sync_defmap(cl);
1174 cl->defmap = def&mask;
1176 cl->defmap = (cl->defmap&~mask)|(def&mask);
1178 cbq_sync_defmap(cl);
1181 static void cbq_unlink_class(struct cbq_class *this)
1183 struct cbq_class *cl, **clp;
1184 struct cbq_sched_data *q = (struct cbq_sched_data*)this->qdisc->data;
1186 for (clp = &q->classes[cbq_hash(this->classid)]; (cl = *clp) != NULL; clp = &cl->next) {
1194 if (this->tparent) {
1203 } while ((cl = *clp) != this->sibling);
1205 if (this->tparent->children == this) {
1206 this->tparent->children = this->sibling;
1207 if (this->sibling == this)
1208 this->tparent->children = NULL;
1211 BUG_TRAP(this->sibling == this);
1215 static void cbq_link_class(struct cbq_class *this)
1217 struct cbq_sched_data *q = (struct cbq_sched_data*)this->qdisc->data;
1218 unsigned h = cbq_hash(this->classid);
1219 struct cbq_class *parent = this->tparent;
1221 this->sibling = this;
1222 this->next = q->classes[h];
1223 q->classes[h] = this;
1228 if (parent->children == NULL) {
1229 parent->children = this;
1231 this->sibling = parent->children->sibling;
1232 parent->children->sibling = this;
1236 static int cbq_drop(struct Qdisc* sch)
1238 struct cbq_sched_data *q = (struct cbq_sched_data *)sch->data;
1239 struct cbq_class *cl, *cl_head;
1242 for (prio = TC_CBQ_MAXPRIO; prio >= 0; prio--) {
1243 if ((cl_head = q->active[prio]) == NULL)
1248 if (cl->q->ops->drop && cl->q->ops->drop(cl->q)) {
1252 } while ((cl = cl->next_alive) != cl_head);
1258 cbq_reset(struct Qdisc* sch)
1260 struct cbq_sched_data *q = (struct cbq_sched_data *)sch->data;
1261 struct cbq_class *cl;
1268 q->tx_borrowed = NULL;
1269 del_timer(&q->wd_timer);
1270 del_timer(&q->delay_timer);
1271 q->toplevel = TC_CBQ_MAXLEVEL;
1272 PSCHED_GET_TIME(q->now);
1275 for (prio = 0; prio <= TC_CBQ_MAXPRIO; prio++)
1276 q->active[prio] = NULL;
1278 for (h = 0; h < 16; h++) {
1279 for (cl = q->classes[h]; cl; cl = cl->next) {
1282 cl->next_alive = NULL;
1283 PSCHED_SET_PASTPERFECT(cl->undertime);
1284 cl->avgidle = cl->maxidle;
1285 cl->deficit = cl->quantum;
1286 cl->cpriority = cl->priority;
1293 static int cbq_set_lss(struct cbq_class *cl, struct tc_cbq_lssopt *lss)
1295 if (lss->change&TCF_CBQ_LSS_FLAGS) {
1296 cl->share = (lss->flags&TCF_CBQ_LSS_ISOLATED) ? NULL : cl->tparent;
1297 cl->borrow = (lss->flags&TCF_CBQ_LSS_BOUNDED) ? NULL : cl->tparent;
1299 if (lss->change&TCF_CBQ_LSS_EWMA)
1300 cl->ewma_log = lss->ewma_log;
1301 if (lss->change&TCF_CBQ_LSS_AVPKT)
1302 cl->avpkt = lss->avpkt;
1303 if (lss->change&TCF_CBQ_LSS_MINIDLE)
1304 cl->minidle = -(long)lss->minidle;
1305 if (lss->change&TCF_CBQ_LSS_MAXIDLE) {
1306 cl->maxidle = lss->maxidle;
1307 cl->avgidle = lss->maxidle;
1309 if (lss->change&TCF_CBQ_LSS_OFFTIME)
1310 cl->offtime = lss->offtime;
1314 static void cbq_rmprio(struct cbq_sched_data *q, struct cbq_class *cl)
1316 q->nclasses[cl->priority]--;
1317 q->quanta[cl->priority] -= cl->weight;
1318 cbq_normalize_quanta(q, cl->priority);
1321 static void cbq_addprio(struct cbq_sched_data *q, struct cbq_class *cl)
1323 q->nclasses[cl->priority]++;
1324 q->quanta[cl->priority] += cl->weight;
1325 cbq_normalize_quanta(q, cl->priority);
1328 static int cbq_set_wrr(struct cbq_class *cl, struct tc_cbq_wrropt *wrr)
1330 struct cbq_sched_data *q = (struct cbq_sched_data *)cl->qdisc->data;
1333 cl->allot = wrr->allot;
1335 cl->weight = wrr->weight;
1336 if (wrr->priority) {
1337 cl->priority = wrr->priority-1;
1338 cl->cpriority = cl->priority;
1339 if (cl->priority >= cl->priority2)
1340 cl->priority2 = TC_CBQ_MAXPRIO-1;
1347 static int cbq_set_overlimit(struct cbq_class *cl, struct tc_cbq_ovl *ovl)
1349 switch (ovl->strategy) {
1350 case TC_CBQ_OVL_CLASSIC:
1351 cl->overlimit = cbq_ovl_classic;
1353 case TC_CBQ_OVL_DELAY:
1354 cl->overlimit = cbq_ovl_delay;
1356 case TC_CBQ_OVL_LOWPRIO:
1357 if (ovl->priority2-1 >= TC_CBQ_MAXPRIO ||
1358 ovl->priority2-1 <= cl->priority)
1360 cl->priority2 = ovl->priority2-1;
1361 cl->overlimit = cbq_ovl_lowprio;
1363 case TC_CBQ_OVL_DROP:
1364 cl->overlimit = cbq_ovl_drop;
1366 case TC_CBQ_OVL_RCLASSIC:
1367 cl->overlimit = cbq_ovl_rclassic;
1372 cl->penalty = (ovl->penalty*HZ)/1000;
1376 #ifdef CONFIG_NET_CLS_POLICE
1377 static int cbq_set_police(struct cbq_class *cl, struct tc_cbq_police *p)
1379 cl->police = p->police;
1381 if (cl->q->handle) {
1382 if (p->police == TC_POLICE_RECLASSIFY)
1383 cl->q->reshape_fail = cbq_reshape_fail;
1385 cl->q->reshape_fail = NULL;
1391 static int cbq_set_fopt(struct cbq_class *cl, struct tc_cbq_fopt *fopt)
1393 cbq_change_defmap(cl, fopt->split, fopt->defmap, fopt->defchange);
1397 static int cbq_init(struct Qdisc *sch, struct rtattr *opt)
1399 struct cbq_sched_data *q = (struct cbq_sched_data*)sch->data;
1400 struct rtattr *tb[TCA_CBQ_MAX];
1401 struct tc_ratespec *r;
1403 if (rtattr_parse(tb, TCA_CBQ_MAX, RTA_DATA(opt), RTA_PAYLOAD(opt)) < 0 ||
1404 tb[TCA_CBQ_RTAB-1] == NULL || tb[TCA_CBQ_RATE-1] == NULL ||
1405 RTA_PAYLOAD(tb[TCA_CBQ_RATE-1]) < sizeof(struct tc_ratespec))
1408 if (tb[TCA_CBQ_LSSOPT-1] &&
1409 RTA_PAYLOAD(tb[TCA_CBQ_LSSOPT-1]) < sizeof(struct tc_cbq_lssopt))
1412 r = RTA_DATA(tb[TCA_CBQ_RATE-1]);
1415 if ((q->link.R_tab = qdisc_get_rtab(r, tb[TCA_CBQ_RTAB-1])) == NULL) {
1421 q->link.sibling = &q->link;
1422 q->link.classid = sch->handle;
1423 q->link.qdisc = sch;
1424 if (!(q->link.q = qdisc_create_dflt(sch->dev, &pfifo_qdisc_ops)))
1425 q->link.q = &noop_qdisc;
1427 q->link.priority = TC_CBQ_MAXPRIO-1;
1428 q->link.priority2 = TC_CBQ_MAXPRIO-1;
1429 q->link.cpriority = TC_CBQ_MAXPRIO-1;
1430 q->link.ovl_strategy = TC_CBQ_OVL_CLASSIC;
1431 q->link.overlimit = cbq_ovl_classic;
1432 q->link.allot = psched_mtu(sch->dev);
1433 q->link.quantum = q->link.allot;
1434 q->link.weight = q->link.R_tab->rate.rate;
1436 q->link.ewma_log = TC_CBQ_DEF_EWMA;
1437 q->link.avpkt = q->link.allot/2;
1438 q->link.minidle = -0x7FFFFFFF;
1439 q->link.stats.lock = &sch->dev->queue_lock;
1441 init_timer(&q->wd_timer);
1442 q->wd_timer.data = (unsigned long)sch;
1443 q->wd_timer.function = cbq_watchdog;
1444 init_timer(&q->delay_timer);
1445 q->delay_timer.data = (unsigned long)sch;
1446 q->delay_timer.function = cbq_undelay;
1447 q->toplevel = TC_CBQ_MAXLEVEL;
1448 PSCHED_GET_TIME(q->now);
1451 cbq_link_class(&q->link);
1453 if (tb[TCA_CBQ_LSSOPT-1])
1454 cbq_set_lss(&q->link, RTA_DATA(tb[TCA_CBQ_LSSOPT-1]));
1456 cbq_addprio(q, &q->link);
1460 static __inline__ int cbq_dump_rate(struct sk_buff *skb, struct cbq_class *cl)
1462 unsigned char *b = skb->tail;
1464 RTA_PUT(skb, TCA_CBQ_RATE, sizeof(cl->R_tab->rate), &cl->R_tab->rate);
1468 skb_trim(skb, b - skb->data);
1472 static __inline__ int cbq_dump_lss(struct sk_buff *skb, struct cbq_class *cl)
1474 unsigned char *b = skb->tail;
1475 struct tc_cbq_lssopt opt;
1478 if (cl->borrow == NULL)
1479 opt.flags |= TCF_CBQ_LSS_BOUNDED;
1480 if (cl->share == NULL)
1481 opt.flags |= TCF_CBQ_LSS_ISOLATED;
1482 opt.ewma_log = cl->ewma_log;
1483 opt.level = cl->level;
1484 opt.avpkt = cl->avpkt;
1485 opt.maxidle = cl->maxidle;
1486 opt.minidle = (u32)(-cl->minidle);
1487 opt.offtime = cl->offtime;
1489 RTA_PUT(skb, TCA_CBQ_LSSOPT, sizeof(opt), &opt);
1493 skb_trim(skb, b - skb->data);
1497 static __inline__ int cbq_dump_wrr(struct sk_buff *skb, struct cbq_class *cl)
1499 unsigned char *b = skb->tail;
1500 struct tc_cbq_wrropt opt;
1503 opt.allot = cl->allot;
1504 opt.priority = cl->priority+1;
1505 opt.cpriority = cl->cpriority+1;
1506 opt.weight = cl->weight;
1507 RTA_PUT(skb, TCA_CBQ_WRROPT, sizeof(opt), &opt);
1511 skb_trim(skb, b - skb->data);
1515 static __inline__ int cbq_dump_ovl(struct sk_buff *skb, struct cbq_class *cl)
1517 unsigned char *b = skb->tail;
1518 struct tc_cbq_ovl opt;
1520 opt.strategy = cl->ovl_strategy;
1521 opt.priority2 = cl->priority2+1;
1522 opt.penalty = (cl->penalty*1000)/HZ;
1523 RTA_PUT(skb, TCA_CBQ_OVL_STRATEGY, sizeof(opt), &opt);
1527 skb_trim(skb, b - skb->data);
1531 static __inline__ int cbq_dump_fopt(struct sk_buff *skb, struct cbq_class *cl)
1533 unsigned char *b = skb->tail;
1534 struct tc_cbq_fopt opt;
1536 if (cl->split || cl->defmap) {
1537 opt.split = cl->split ? cl->split->classid : 0;
1538 opt.defmap = cl->defmap;
1540 RTA_PUT(skb, TCA_CBQ_FOPT, sizeof(opt), &opt);
1545 skb_trim(skb, b - skb->data);
1549 #ifdef CONFIG_NET_CLS_POLICE
1550 static __inline__ int cbq_dump_police(struct sk_buff *skb, struct cbq_class *cl)
1552 unsigned char *b = skb->tail;
1553 struct tc_cbq_police opt;
1556 opt.police = cl->police;
1557 RTA_PUT(skb, TCA_CBQ_POLICE, sizeof(opt), &opt);
1562 skb_trim(skb, b - skb->data);
1567 static int cbq_dump_attr(struct sk_buff *skb, struct cbq_class *cl)
1569 if (cbq_dump_lss(skb, cl) < 0 ||
1570 cbq_dump_rate(skb, cl) < 0 ||
1571 cbq_dump_wrr(skb, cl) < 0 ||
1572 cbq_dump_ovl(skb, cl) < 0 ||
1573 #ifdef CONFIG_NET_CLS_POLICE
1574 cbq_dump_police(skb, cl) < 0 ||
1576 cbq_dump_fopt(skb, cl) < 0)
1581 int cbq_copy_xstats(struct sk_buff *skb, struct tc_cbq_xstats *st)
1583 RTA_PUT(skb, TCA_XSTATS, sizeof(*st), st);
1591 static int cbq_dump(struct Qdisc *sch, struct sk_buff *skb)
1593 struct cbq_sched_data *q = (struct cbq_sched_data*)sch->data;
1594 unsigned char *b = skb->tail;
1597 rta = (struct rtattr*)b;
1598 RTA_PUT(skb, TCA_OPTIONS, 0, NULL);
1599 if (cbq_dump_attr(skb, &q->link) < 0)
1600 goto rtattr_failure;
1601 rta->rta_len = skb->tail - b;
1602 spin_lock_bh(&sch->dev->queue_lock);
1603 q->link.xstats.avgidle = q->link.avgidle;
1604 if (cbq_copy_xstats(skb, &q->link.xstats)) {
1605 spin_unlock_bh(&sch->dev->queue_lock);
1606 goto rtattr_failure;
1608 spin_unlock_bh(&sch->dev->queue_lock);
1612 skb_trim(skb, b - skb->data);
1617 cbq_dump_class(struct Qdisc *sch, unsigned long arg,
1618 struct sk_buff *skb, struct tcmsg *tcm)
1620 struct cbq_sched_data *q = (struct cbq_sched_data*)sch->data;
1621 struct cbq_class *cl = (struct cbq_class*)arg;
1622 unsigned char *b = skb->tail;
1626 tcm->tcm_parent = cl->tparent->classid;
1628 tcm->tcm_parent = TC_H_ROOT;
1629 tcm->tcm_handle = cl->classid;
1630 tcm->tcm_info = cl->q->handle;
1632 rta = (struct rtattr*)b;
1633 RTA_PUT(skb, TCA_OPTIONS, 0, NULL);
1634 if (cbq_dump_attr(skb, cl) < 0)
1635 goto rtattr_failure;
1636 rta->rta_len = skb->tail - b;
1637 cl->stats.qlen = cl->q->q.qlen;
1638 if (qdisc_copy_stats(skb, &cl->stats))
1639 goto rtattr_failure;
1640 spin_lock_bh(&sch->dev->queue_lock);
1641 cl->xstats.avgidle = cl->avgidle;
1642 cl->xstats.undertime = 0;
1643 if (!PSCHED_IS_PASTPERFECT(cl->undertime))
1644 cl->xstats.undertime = PSCHED_TDIFF(cl->undertime, q->now);
1645 q->link.xstats.avgidle = q->link.avgidle;
1646 if (cbq_copy_xstats(skb, &cl->xstats)) {
1647 spin_unlock_bh(&sch->dev->queue_lock);
1648 goto rtattr_failure;
1650 spin_unlock_bh(&sch->dev->queue_lock);
1655 skb_trim(skb, b - skb->data);
1659 static int cbq_graft(struct Qdisc *sch, unsigned long arg, struct Qdisc *new,
1662 struct cbq_class *cl = (struct cbq_class*)arg;
1666 if ((new = qdisc_create_dflt(sch->dev, &pfifo_qdisc_ops)) == NULL)
1669 #ifdef CONFIG_NET_CLS_POLICE
1670 if (cl->police == TC_POLICE_RECLASSIFY)
1671 new->reshape_fail = cbq_reshape_fail;
1678 sch_tree_unlock(sch);
1685 static struct Qdisc *
1686 cbq_leaf(struct Qdisc *sch, unsigned long arg)
1688 struct cbq_class *cl = (struct cbq_class*)arg;
1690 return cl ? cl->q : NULL;
1693 static unsigned long cbq_get(struct Qdisc *sch, u32 classid)
1695 struct cbq_sched_data *q = (struct cbq_sched_data *)sch->data;
1696 struct cbq_class *cl = cbq_class_lookup(q, classid);
1700 return (unsigned long)cl;
1705 static void cbq_destroy_filters(struct cbq_class *cl)
1707 struct tcf_proto *tp;
1709 while ((tp = cl->filter_list) != NULL) {
1710 cl->filter_list = tp->next;
1711 tp->ops->destroy(tp);
1715 static void cbq_destroy_class(struct cbq_class *cl)
1717 cbq_destroy_filters(cl);
1718 qdisc_destroy(cl->q);
1719 qdisc_put_rtab(cl->R_tab);
1720 #ifdef CONFIG_NET_ESTIMATOR
1721 qdisc_kill_estimator(&cl->stats);
1727 cbq_destroy(struct Qdisc* sch)
1729 struct cbq_sched_data *q = (struct cbq_sched_data *)sch->data;
1730 struct cbq_class *cl;
1733 #ifdef CONFIG_NET_CLS_POLICE
1736 for (h = 0; h < 16; h++) {
1737 for (cl = q->classes[h]; cl; cl = cl->next)
1738 cbq_destroy_filters(cl);
1741 for (h = 0; h < 16; h++) {
1742 struct cbq_class *next;
1744 for (cl = q->classes[h]; cl; cl = next) {
1747 cbq_destroy_class(cl);
1751 qdisc_put_rtab(q->link.R_tab);
1755 static void cbq_put(struct Qdisc *sch, unsigned long arg)
1757 struct cbq_class *cl = (struct cbq_class*)arg;
1759 if (--cl->refcnt == 0) {
1760 #ifdef CONFIG_NET_CLS_POLICE
1761 struct cbq_sched_data *q = (struct cbq_sched_data *)sch->data;
1763 spin_lock_bh(&sch->dev->queue_lock);
1764 if (q->rx_class == cl)
1766 spin_unlock_bh(&sch->dev->queue_lock);
1769 cbq_destroy_class(cl);
1774 cbq_change_class(struct Qdisc *sch, u32 classid, u32 parentid, struct rtattr **tca,
1778 struct cbq_sched_data *q = (struct cbq_sched_data *)sch->data;
1779 struct cbq_class *cl = (struct cbq_class*)*arg;
1780 struct rtattr *opt = tca[TCA_OPTIONS-1];
1781 struct rtattr *tb[TCA_CBQ_MAX];
1782 struct cbq_class *parent;
1783 struct qdisc_rate_table *rtab = NULL;
1786 rtattr_parse(tb, TCA_CBQ_MAX, RTA_DATA(opt), RTA_PAYLOAD(opt)))
1789 if (tb[TCA_CBQ_OVL_STRATEGY-1] &&
1790 RTA_PAYLOAD(tb[TCA_CBQ_OVL_STRATEGY-1]) < sizeof(struct tc_cbq_ovl))
1793 if (tb[TCA_CBQ_FOPT-1] &&
1794 RTA_PAYLOAD(tb[TCA_CBQ_FOPT-1]) < sizeof(struct tc_cbq_fopt))
1797 if (tb[TCA_CBQ_RATE-1] &&
1798 RTA_PAYLOAD(tb[TCA_CBQ_RATE-1]) < sizeof(struct tc_ratespec))
1801 if (tb[TCA_CBQ_LSSOPT-1] &&
1802 RTA_PAYLOAD(tb[TCA_CBQ_LSSOPT-1]) < sizeof(struct tc_cbq_lssopt))
1805 if (tb[TCA_CBQ_WRROPT-1] &&
1806 RTA_PAYLOAD(tb[TCA_CBQ_WRROPT-1]) < sizeof(struct tc_cbq_wrropt))
1809 #ifdef CONFIG_NET_CLS_POLICE
1810 if (tb[TCA_CBQ_POLICE-1] &&
1811 RTA_PAYLOAD(tb[TCA_CBQ_POLICE-1]) < sizeof(struct tc_cbq_police))
1818 if (cl->tparent && cl->tparent->classid != parentid)
1820 if (!cl->tparent && parentid != TC_H_ROOT)
1824 if (tb[TCA_CBQ_RATE-1]) {
1825 rtab = qdisc_get_rtab(RTA_DATA(tb[TCA_CBQ_RATE-1]), tb[TCA_CBQ_RTAB-1]);
1830 /* Change class parameters */
1833 if (cl->next_alive != NULL)
1834 cbq_deactivate_class(cl);
1837 rtab = xchg(&cl->R_tab, rtab);
1838 qdisc_put_rtab(rtab);
1841 if (tb[TCA_CBQ_LSSOPT-1])
1842 cbq_set_lss(cl, RTA_DATA(tb[TCA_CBQ_LSSOPT-1]));
1844 if (tb[TCA_CBQ_WRROPT-1]) {
1846 cbq_set_wrr(cl, RTA_DATA(tb[TCA_CBQ_WRROPT-1]));
1849 if (tb[TCA_CBQ_OVL_STRATEGY-1])
1850 cbq_set_overlimit(cl, RTA_DATA(tb[TCA_CBQ_OVL_STRATEGY-1]));
1852 #ifdef CONFIG_NET_CLS_POLICE
1853 if (tb[TCA_CBQ_POLICE-1])
1854 cbq_set_police(cl, RTA_DATA(tb[TCA_CBQ_POLICE-1]));
1857 if (tb[TCA_CBQ_FOPT-1])
1858 cbq_set_fopt(cl, RTA_DATA(tb[TCA_CBQ_FOPT-1]));
1861 cbq_activate_class(cl);
1863 sch_tree_unlock(sch);
1865 #ifdef CONFIG_NET_ESTIMATOR
1866 if (tca[TCA_RATE-1]) {
1867 qdisc_kill_estimator(&cl->stats);
1868 qdisc_new_estimator(&cl->stats, tca[TCA_RATE-1]);
1874 if (parentid == TC_H_ROOT)
1877 if (tb[TCA_CBQ_WRROPT-1] == NULL || tb[TCA_CBQ_RATE-1] == NULL ||
1878 tb[TCA_CBQ_LSSOPT-1] == NULL)
1881 rtab = qdisc_get_rtab(RTA_DATA(tb[TCA_CBQ_RATE-1]), tb[TCA_CBQ_RTAB-1]);
1887 if (TC_H_MAJ(classid^sch->handle) || cbq_class_lookup(q, classid))
1891 classid = TC_H_MAKE(sch->handle,0x8000);
1893 for (i=0; i<0x8000; i++) {
1894 if (++q->hgenerator >= 0x8000)
1896 if (cbq_class_lookup(q, classid|q->hgenerator) == NULL)
1902 classid = classid|q->hgenerator;
1907 parent = cbq_class_lookup(q, parentid);
1914 cl = kmalloc(sizeof(*cl), GFP_KERNEL);
1917 memset(cl, 0, sizeof(*cl));
1921 if (!(cl->q = qdisc_create_dflt(sch->dev, &pfifo_qdisc_ops)))
1922 cl->q = &noop_qdisc;
1923 cl->classid = classid;
1924 cl->tparent = parent;
1926 cl->allot = parent->allot;
1927 cl->quantum = cl->allot;
1928 cl->weight = cl->R_tab->rate.rate;
1929 cl->stats.lock = &sch->dev->queue_lock;
1933 cl->borrow = cl->tparent;
1934 if (cl->tparent != &q->link)
1935 cl->share = cl->tparent;
1936 cbq_adjust_levels(parent);
1937 cl->minidle = -0x7FFFFFFF;
1938 cbq_set_lss(cl, RTA_DATA(tb[TCA_CBQ_LSSOPT-1]));
1939 cbq_set_wrr(cl, RTA_DATA(tb[TCA_CBQ_WRROPT-1]));
1940 if (cl->ewma_log==0)
1941 cl->ewma_log = q->link.ewma_log;
1943 cl->maxidle = q->link.maxidle;
1945 cl->avpkt = q->link.avpkt;
1946 cl->overlimit = cbq_ovl_classic;
1947 if (tb[TCA_CBQ_OVL_STRATEGY-1])
1948 cbq_set_overlimit(cl, RTA_DATA(tb[TCA_CBQ_OVL_STRATEGY-1]));
1949 #ifdef CONFIG_NET_CLS_POLICE
1950 if (tb[TCA_CBQ_POLICE-1])
1951 cbq_set_police(cl, RTA_DATA(tb[TCA_CBQ_POLICE-1]));
1953 if (tb[TCA_CBQ_FOPT-1])
1954 cbq_set_fopt(cl, RTA_DATA(tb[TCA_CBQ_FOPT-1]));
1955 sch_tree_unlock(sch);
1957 #ifdef CONFIG_NET_ESTIMATOR
1958 if (tca[TCA_RATE-1])
1959 qdisc_new_estimator(&cl->stats, tca[TCA_RATE-1]);
1962 *arg = (unsigned long)cl;
1966 qdisc_put_rtab(rtab);
1970 static int cbq_delete(struct Qdisc *sch, unsigned long arg)
1972 struct cbq_sched_data *q = (struct cbq_sched_data *)sch->data;
1973 struct cbq_class *cl = (struct cbq_class*)arg;
1975 if (cl->filters || cl->children || cl == &q->link)
1981 cbq_deactivate_class(cl);
1983 if (q->tx_borrowed == cl)
1984 q->tx_borrowed = q->tx_class;
1985 if (q->tx_class == cl) {
1987 q->tx_borrowed = NULL;
1989 #ifdef CONFIG_NET_CLS_POLICE
1990 if (q->rx_class == cl)
1994 cbq_unlink_class(cl);
1995 cbq_adjust_levels(cl->tparent);
1997 cbq_sync_defmap(cl);
2000 sch_tree_unlock(sch);
2002 if (--cl->refcnt == 0)
2003 cbq_destroy_class(cl);
2008 static struct tcf_proto **cbq_find_tcf(struct Qdisc *sch, unsigned long arg)
2010 struct cbq_sched_data *q = (struct cbq_sched_data *)sch->data;
2011 struct cbq_class *cl = (struct cbq_class *)arg;
2016 return &cl->filter_list;
2019 static unsigned long cbq_bind_filter(struct Qdisc *sch, unsigned long parent,
2022 struct cbq_sched_data *q = (struct cbq_sched_data *)sch->data;
2023 struct cbq_class *p = (struct cbq_class*)parent;
2024 struct cbq_class *cl = cbq_class_lookup(q, classid);
2027 if (p && p->level <= cl->level)
2030 return (unsigned long)cl;
2035 static void cbq_unbind_filter(struct Qdisc *sch, unsigned long arg)
2037 struct cbq_class *cl = (struct cbq_class*)arg;
2042 static void cbq_walk(struct Qdisc *sch, struct qdisc_walker *arg)
2044 struct cbq_sched_data *q = (struct cbq_sched_data *)sch->data;
2050 for (h = 0; h < 16; h++) {
2051 struct cbq_class *cl;
2053 for (cl = q->classes[h]; cl; cl = cl->next) {
2054 if (arg->count < arg->skip) {
2058 if (arg->fn(sch, (unsigned long)cl, arg) < 0) {
2067 static struct Qdisc_class_ops cbq_class_ops =
2084 struct Qdisc_ops cbq_qdisc_ops =
2089 sizeof(struct cbq_sched_data),
2099 NULL /* cbq_change */,
2105 int init_module(void)
2107 return register_qdisc(&cbq_qdisc_ops);
2110 void cleanup_module(void)
2112 unregister_qdisc(&cbq_qdisc_ops);
2115 MODULE_LICENSE("GPL");