[NET_SCHED]: sch_cbq: use hrtimer based watchdog
[powerpc.git] / net / sched / sch_cbq.c
1 /*
2  * net/sched/sch_cbq.c  Class-Based Queueing discipline.
3  *
4  *              This program is free software; you can redistribute it and/or
5  *              modify it under the terms of the GNU General Public License
6  *              as published by the Free Software Foundation; either version
7  *              2 of the License, or (at your option) any later version.
8  *
9  * Authors:     Alexey Kuznetsov, <kuznet@ms2.inr.ac.ru>
10  *
11  */
12
13 #include <linux/module.h>
14 #include <asm/uaccess.h>
15 #include <asm/system.h>
16 #include <linux/bitops.h>
17 #include <linux/types.h>
18 #include <linux/kernel.h>
19 #include <linux/string.h>
20 #include <linux/mm.h>
21 #include <linux/socket.h>
22 #include <linux/sockios.h>
23 #include <linux/in.h>
24 #include <linux/errno.h>
25 #include <linux/interrupt.h>
26 #include <linux/if_ether.h>
27 #include <linux/inet.h>
28 #include <linux/netdevice.h>
29 #include <linux/etherdevice.h>
30 #include <linux/notifier.h>
31 #include <net/ip.h>
32 #include <net/route.h>
33 #include <linux/skbuff.h>
34 #include <net/sock.h>
35 #include <net/pkt_sched.h>
36
37
38 /*      Class-Based Queueing (CBQ) algorithm.
39         =======================================
40
41         Sources: [1] Sally Floyd and Van Jacobson, "Link-sharing and Resource
42                  Management Models for Packet Networks",
43                  IEEE/ACM Transactions on Networking, Vol.3, No.4, 1995
44
45                  [2] Sally Floyd, "Notes on CBQ and Guaranteed Service", 1995
46
47                  [3] Sally Floyd, "Notes on Class-Based Queueing: Setting
48                  Parameters", 1996
49
50                  [4] Sally Floyd and Michael Speer, "Experimental Results
51                  for Class-Based Queueing", 1998, not published.
52
53         -----------------------------------------------------------------------
54
55         Algorithm skeleton was taken from NS simulator cbq.cc.
56         If someone wants to check this code against the LBL version,
57         he should take into account that ONLY the skeleton was borrowed,
58         the implementation is different. Particularly:
59
60         --- The WRR algorithm is different. Our version looks more
61         reasonable (I hope) and works when quanta are allowed to be
62         less than MTU, which is always the case when real time classes
63         have small rates. Note, that the statement of [3] is
64         incomplete, delay may actually be estimated even if class
65         per-round allotment is less than MTU. Namely, if per-round
66         allotment is W*r_i, and r_1+...+r_k = r < 1
67
68         delay_i <= ([MTU/(W*r_i)]*W*r + W*r + k*MTU)/B
69
70         In the worst case we have IntServ estimate with D = W*r+k*MTU
71         and C = MTU*r. The proof (if correct at all) is trivial.
72
73
74         --- It seems that cbq-2.0 is not very accurate. At least, I cannot
75         interpret some places, which look like wrong translations
76         from NS. Anyone is advised to find these differences
77         and explain to me, why I am wrong 8).
78
79         --- Linux has no EOI event, so that we cannot estimate true class
80         idle time. Workaround is to consider the next dequeue event
81         as sign that previous packet is finished. This is wrong because of
82         internal device queueing, but on a permanently loaded link it is true.
83         Moreover, combined with clock integrator, this scheme looks
84         very close to an ideal solution.  */
85
86 struct cbq_sched_data;
87
88
89 struct cbq_class
90 {
91         struct cbq_class        *next;          /* hash table link */
92         struct cbq_class        *next_alive;    /* next class with backlog in this priority band */
93
94 /* Parameters */
95         u32                     classid;
96         unsigned char           priority;       /* class priority */
97         unsigned char           priority2;      /* priority to be used after overlimit */
98         unsigned char           ewma_log;       /* time constant for idle time calculation */
99         unsigned char           ovl_strategy;
100 #ifdef CONFIG_NET_CLS_POLICE
101         unsigned char           police;
102 #endif
103
104         u32                     defmap;
105
106         /* Link-sharing scheduler parameters */
107         long                    maxidle;        /* Class parameters: see below. */
108         long                    offtime;
109         long                    minidle;
110         u32                     avpkt;
111         struct qdisc_rate_table *R_tab;
112
113         /* Overlimit strategy parameters */
114         void                    (*overlimit)(struct cbq_class *cl);
115         long                    penalty;
116
117         /* General scheduler (WRR) parameters */
118         long                    allot;
119         long                    quantum;        /* Allotment per WRR round */
120         long                    weight;         /* Relative allotment: see below */
121
122         struct Qdisc            *qdisc;         /* Ptr to CBQ discipline */
123         struct cbq_class        *split;         /* Ptr to split node */
124         struct cbq_class        *share;         /* Ptr to LS parent in the class tree */
125         struct cbq_class        *tparent;       /* Ptr to tree parent in the class tree */
126         struct cbq_class        *borrow;        /* NULL if class is bandwidth limited;
127                                                    parent otherwise */
128         struct cbq_class        *sibling;       /* Sibling chain */
129         struct cbq_class        *children;      /* Pointer to children chain */
130
131         struct Qdisc            *q;             /* Elementary queueing discipline */
132
133
134 /* Variables */
135         unsigned char           cpriority;      /* Effective priority */
136         unsigned char           delayed;
137         unsigned char           level;          /* level of the class in hierarchy:
138                                                    0 for leaf classes, and maximal
139                                                    level of children + 1 for nodes.
140                                                  */
141
142         psched_time_t           last;           /* Last end of service */
143         psched_time_t           undertime;
144         long                    avgidle;
145         long                    deficit;        /* Saved deficit for WRR */
146         unsigned long           penalized;
147         struct gnet_stats_basic bstats;
148         struct gnet_stats_queue qstats;
149         struct gnet_stats_rate_est rate_est;
150         spinlock_t              *stats_lock;
151         struct tc_cbq_xstats    xstats;
152
153         struct tcf_proto        *filter_list;
154
155         int                     refcnt;
156         int                     filters;
157
158         struct cbq_class        *defaults[TC_PRIO_MAX+1];
159 };
160
161 struct cbq_sched_data
162 {
163         struct cbq_class        *classes[16];           /* Hash table of all classes */
164         int                     nclasses[TC_CBQ_MAXPRIO+1];
165         unsigned                quanta[TC_CBQ_MAXPRIO+1];
166
167         struct cbq_class        link;
168
169         unsigned                activemask;
170         struct cbq_class        *active[TC_CBQ_MAXPRIO+1];      /* List of all classes
171                                                                    with backlog */
172
173 #ifdef CONFIG_NET_CLS_POLICE
174         struct cbq_class        *rx_class;
175 #endif
176         struct cbq_class        *tx_class;
177         struct cbq_class        *tx_borrowed;
178         int                     tx_len;
179         psched_time_t           now;            /* Cached timestamp */
180         psched_time_t           now_rt;         /* Cached real time */
181         unsigned                pmask;
182
183         struct timer_list       delay_timer;
184         struct qdisc_watchdog   watchdog;       /* Watchdog timer,
185                                                    started when CBQ has
186                                                    backlog, but cannot
187                                                    transmit just now */
188         psched_tdiff_t          wd_expires;
189         int                     toplevel;
190         u32                     hgenerator;
191 };
192
193
194 #define L2T(cl,len)     ((cl)->R_tab->data[(len)>>(cl)->R_tab->rate.cell_log])
195
196
197 static __inline__ unsigned cbq_hash(u32 h)
198 {
199         h ^= h>>8;
200         h ^= h>>4;
201         return h&0xF;
202 }
203
204 static __inline__ struct cbq_class *
205 cbq_class_lookup(struct cbq_sched_data *q, u32 classid)
206 {
207         struct cbq_class *cl;
208
209         for (cl = q->classes[cbq_hash(classid)]; cl; cl = cl->next)
210                 if (cl->classid == classid)
211                         return cl;
212         return NULL;
213 }
214
215 #ifdef CONFIG_NET_CLS_POLICE
216
217 static struct cbq_class *
218 cbq_reclassify(struct sk_buff *skb, struct cbq_class *this)
219 {
220         struct cbq_class *cl, *new;
221
222         for (cl = this->tparent; cl; cl = cl->tparent)
223                 if ((new = cl->defaults[TC_PRIO_BESTEFFORT]) != NULL && new != this)
224                         return new;
225
226         return NULL;
227 }
228
229 #endif
230
231 /* Classify packet. The procedure is pretty complicated, but
232    it allows us to combine link sharing and priority scheduling
233    transparently.
234
235    Namely, you can put link sharing rules (f.e. route based) at root of CBQ,
236    so that it resolves to split nodes. Then packets are classified
237    by logical priority, or a more specific classifier may be attached
238    to the split node.
239  */
240
241 static struct cbq_class *
242 cbq_classify(struct sk_buff *skb, struct Qdisc *sch, int *qerr)
243 {
244         struct cbq_sched_data *q = qdisc_priv(sch);
245         struct cbq_class *head = &q->link;
246         struct cbq_class **defmap;
247         struct cbq_class *cl = NULL;
248         u32 prio = skb->priority;
249         struct tcf_result res;
250
251         /*
252          *  Step 1. If skb->priority points to one of our classes, use it.
253          */
254         if (TC_H_MAJ(prio^sch->handle) == 0 &&
255             (cl = cbq_class_lookup(q, prio)) != NULL)
256                 return cl;
257
258         *qerr = NET_XMIT_BYPASS;
259         for (;;) {
260                 int result = 0;
261                 defmap = head->defaults;
262
263                 /*
264                  * Step 2+n. Apply classifier.
265                  */
266                 if (!head->filter_list || (result = tc_classify(skb, head->filter_list, &res)) < 0)
267                         goto fallback;
268
269                 if ((cl = (void*)res.class) == NULL) {
270                         if (TC_H_MAJ(res.classid))
271                                 cl = cbq_class_lookup(q, res.classid);
272                         else if ((cl = defmap[res.classid&TC_PRIO_MAX]) == NULL)
273                                 cl = defmap[TC_PRIO_BESTEFFORT];
274
275                         if (cl == NULL || cl->level >= head->level)
276                                 goto fallback;
277                 }
278
279 #ifdef CONFIG_NET_CLS_ACT
280                 switch (result) {
281                 case TC_ACT_QUEUED:
282                 case TC_ACT_STOLEN:
283                         *qerr = NET_XMIT_SUCCESS;
284                 case TC_ACT_SHOT:
285                         return NULL;
286                 }
287 #elif defined(CONFIG_NET_CLS_POLICE)
288                 switch (result) {
289                 case TC_POLICE_RECLASSIFY:
290                         return cbq_reclassify(skb, cl);
291                 case TC_POLICE_SHOT:
292                         return NULL;
293                 default:
294                         break;
295                 }
296 #endif
297                 if (cl->level == 0)
298                         return cl;
299
300                 /*
301                  * Step 3+n. If classifier selected a link sharing class,
302                  *         apply agency specific classifier.
303                  *         Repeat this procdure until we hit a leaf node.
304                  */
305                 head = cl;
306         }
307
308 fallback:
309         cl = head;
310
311         /*
312          * Step 4. No success...
313          */
314         if (TC_H_MAJ(prio) == 0 &&
315             !(cl = head->defaults[prio&TC_PRIO_MAX]) &&
316             !(cl = head->defaults[TC_PRIO_BESTEFFORT]))
317                 return head;
318
319         return cl;
320 }
321
322 /*
323    A packet has just been enqueued on the empty class.
324    cbq_activate_class adds it to the tail of active class list
325    of its priority band.
326  */
327
328 static __inline__ void cbq_activate_class(struct cbq_class *cl)
329 {
330         struct cbq_sched_data *q = qdisc_priv(cl->qdisc);
331         int prio = cl->cpriority;
332         struct cbq_class *cl_tail;
333
334         cl_tail = q->active[prio];
335         q->active[prio] = cl;
336
337         if (cl_tail != NULL) {
338                 cl->next_alive = cl_tail->next_alive;
339                 cl_tail->next_alive = cl;
340         } else {
341                 cl->next_alive = cl;
342                 q->activemask |= (1<<prio);
343         }
344 }
345
346 /*
347    Unlink class from active chain.
348    Note that this same procedure is done directly in cbq_dequeue*
349    during round-robin procedure.
350  */
351
352 static void cbq_deactivate_class(struct cbq_class *this)
353 {
354         struct cbq_sched_data *q = qdisc_priv(this->qdisc);
355         int prio = this->cpriority;
356         struct cbq_class *cl;
357         struct cbq_class *cl_prev = q->active[prio];
358
359         do {
360                 cl = cl_prev->next_alive;
361                 if (cl == this) {
362                         cl_prev->next_alive = cl->next_alive;
363                         cl->next_alive = NULL;
364
365                         if (cl == q->active[prio]) {
366                                 q->active[prio] = cl_prev;
367                                 if (cl == q->active[prio]) {
368                                         q->active[prio] = NULL;
369                                         q->activemask &= ~(1<<prio);
370                                         return;
371                                 }
372                         }
373                         return;
374                 }
375         } while ((cl_prev = cl) != q->active[prio]);
376 }
377
378 static void
379 cbq_mark_toplevel(struct cbq_sched_data *q, struct cbq_class *cl)
380 {
381         int toplevel = q->toplevel;
382
383         if (toplevel > cl->level && !(cl->q->flags&TCQ_F_THROTTLED)) {
384                 psched_time_t now;
385                 psched_tdiff_t incr;
386
387                 PSCHED_GET_TIME(now);
388                 incr = PSCHED_TDIFF(now, q->now_rt);
389                 PSCHED_TADD2(q->now, incr, now);
390
391                 do {
392                         if (PSCHED_TLESS(cl->undertime, now)) {
393                                 q->toplevel = cl->level;
394                                 return;
395                         }
396                 } while ((cl=cl->borrow) != NULL && toplevel > cl->level);
397         }
398 }
399
400 static int
401 cbq_enqueue(struct sk_buff *skb, struct Qdisc *sch)
402 {
403         struct cbq_sched_data *q = qdisc_priv(sch);
404         int len = skb->len;
405         int ret;
406         struct cbq_class *cl = cbq_classify(skb, sch, &ret);
407
408 #ifdef CONFIG_NET_CLS_POLICE
409         q->rx_class = cl;
410 #endif
411         if (cl == NULL) {
412                 if (ret == NET_XMIT_BYPASS)
413                         sch->qstats.drops++;
414                 kfree_skb(skb);
415                 return ret;
416         }
417
418 #ifdef CONFIG_NET_CLS_POLICE
419         cl->q->__parent = sch;
420 #endif
421         if ((ret = cl->q->enqueue(skb, cl->q)) == NET_XMIT_SUCCESS) {
422                 sch->q.qlen++;
423                 sch->bstats.packets++;
424                 sch->bstats.bytes+=len;
425                 cbq_mark_toplevel(q, cl);
426                 if (!cl->next_alive)
427                         cbq_activate_class(cl);
428                 return ret;
429         }
430
431         sch->qstats.drops++;
432         cbq_mark_toplevel(q, cl);
433         cl->qstats.drops++;
434         return ret;
435 }
436
437 static int
438 cbq_requeue(struct sk_buff *skb, struct Qdisc *sch)
439 {
440         struct cbq_sched_data *q = qdisc_priv(sch);
441         struct cbq_class *cl;
442         int ret;
443
444         if ((cl = q->tx_class) == NULL) {
445                 kfree_skb(skb);
446                 sch->qstats.drops++;
447                 return NET_XMIT_CN;
448         }
449         q->tx_class = NULL;
450
451         cbq_mark_toplevel(q, cl);
452
453 #ifdef CONFIG_NET_CLS_POLICE
454         q->rx_class = cl;
455         cl->q->__parent = sch;
456 #endif
457         if ((ret = cl->q->ops->requeue(skb, cl->q)) == 0) {
458                 sch->q.qlen++;
459                 sch->qstats.requeues++;
460                 if (!cl->next_alive)
461                         cbq_activate_class(cl);
462                 return 0;
463         }
464         sch->qstats.drops++;
465         cl->qstats.drops++;
466         return ret;
467 }
468
469 /* Overlimit actions */
470
471 /* TC_CBQ_OVL_CLASSIC: (default) penalize leaf class by adding offtime */
472
473 static void cbq_ovl_classic(struct cbq_class *cl)
474 {
475         struct cbq_sched_data *q = qdisc_priv(cl->qdisc);
476         psched_tdiff_t delay = PSCHED_TDIFF(cl->undertime, q->now);
477
478         if (!cl->delayed) {
479                 delay += cl->offtime;
480
481                 /*
482                    Class goes to sleep, so that it will have no
483                    chance to work avgidle. Let's forgive it 8)
484
485                    BTW cbq-2.0 has a crap in this
486                    place, apparently they forgot to shift it by cl->ewma_log.
487                  */
488                 if (cl->avgidle < 0)
489                         delay -= (-cl->avgidle) - ((-cl->avgidle) >> cl->ewma_log);
490                 if (cl->avgidle < cl->minidle)
491                         cl->avgidle = cl->minidle;
492                 if (delay <= 0)
493                         delay = 1;
494                 PSCHED_TADD2(q->now, delay, cl->undertime);
495
496                 cl->xstats.overactions++;
497                 cl->delayed = 1;
498         }
499         if (q->wd_expires == 0 || q->wd_expires > delay)
500                 q->wd_expires = delay;
501
502         /* Dirty work! We must schedule wakeups based on
503            real available rate, rather than leaf rate,
504            which may be tiny (even zero).
505          */
506         if (q->toplevel == TC_CBQ_MAXLEVEL) {
507                 struct cbq_class *b;
508                 psched_tdiff_t base_delay = q->wd_expires;
509
510                 for (b = cl->borrow; b; b = b->borrow) {
511                         delay = PSCHED_TDIFF(b->undertime, q->now);
512                         if (delay < base_delay) {
513                                 if (delay <= 0)
514                                         delay = 1;
515                                 base_delay = delay;
516                         }
517                 }
518
519                 q->wd_expires = base_delay;
520         }
521 }
522
523 /* TC_CBQ_OVL_RCLASSIC: penalize by offtime classes in hierarchy, when
524    they go overlimit
525  */
526
527 static void cbq_ovl_rclassic(struct cbq_class *cl)
528 {
529         struct cbq_sched_data *q = qdisc_priv(cl->qdisc);
530         struct cbq_class *this = cl;
531
532         do {
533                 if (cl->level > q->toplevel) {
534                         cl = NULL;
535                         break;
536                 }
537         } while ((cl = cl->borrow) != NULL);
538
539         if (cl == NULL)
540                 cl = this;
541         cbq_ovl_classic(cl);
542 }
543
544 /* TC_CBQ_OVL_DELAY: delay until it will go to underlimit */
545
546 static void cbq_ovl_delay(struct cbq_class *cl)
547 {
548         struct cbq_sched_data *q = qdisc_priv(cl->qdisc);
549         psched_tdiff_t delay = PSCHED_TDIFF(cl->undertime, q->now);
550
551         if (!cl->delayed) {
552                 unsigned long sched = jiffies;
553
554                 delay += cl->offtime;
555                 if (cl->avgidle < 0)
556                         delay -= (-cl->avgidle) - ((-cl->avgidle) >> cl->ewma_log);
557                 if (cl->avgidle < cl->minidle)
558                         cl->avgidle = cl->minidle;
559                 PSCHED_TADD2(q->now, delay, cl->undertime);
560
561                 if (delay > 0) {
562                         sched += PSCHED_US2JIFFIE(delay) + cl->penalty;
563                         cl->penalized = sched;
564                         cl->cpriority = TC_CBQ_MAXPRIO;
565                         q->pmask |= (1<<TC_CBQ_MAXPRIO);
566                         if (del_timer(&q->delay_timer) &&
567                             (long)(q->delay_timer.expires - sched) > 0)
568                                 q->delay_timer.expires = sched;
569                         add_timer(&q->delay_timer);
570                         cl->delayed = 1;
571                         cl->xstats.overactions++;
572                         return;
573                 }
574                 delay = 1;
575         }
576         if (q->wd_expires == 0 || q->wd_expires > delay)
577                 q->wd_expires = delay;
578 }
579
580 /* TC_CBQ_OVL_LOWPRIO: penalize class by lowering its priority band */
581
582 static void cbq_ovl_lowprio(struct cbq_class *cl)
583 {
584         struct cbq_sched_data *q = qdisc_priv(cl->qdisc);
585
586         cl->penalized = jiffies + cl->penalty;
587
588         if (cl->cpriority != cl->priority2) {
589                 cl->cpriority = cl->priority2;
590                 q->pmask |= (1<<cl->cpriority);
591                 cl->xstats.overactions++;
592         }
593         cbq_ovl_classic(cl);
594 }
595
596 /* TC_CBQ_OVL_DROP: penalize class by dropping */
597
598 static void cbq_ovl_drop(struct cbq_class *cl)
599 {
600         if (cl->q->ops->drop)
601                 if (cl->q->ops->drop(cl->q))
602                         cl->qdisc->q.qlen--;
603         cl->xstats.overactions++;
604         cbq_ovl_classic(cl);
605 }
606
607 static unsigned long cbq_undelay_prio(struct cbq_sched_data *q, int prio)
608 {
609         struct cbq_class *cl;
610         struct cbq_class *cl_prev = q->active[prio];
611         unsigned long now = jiffies;
612         unsigned long sched = now;
613
614         if (cl_prev == NULL)
615                 return now;
616
617         do {
618                 cl = cl_prev->next_alive;
619                 if ((long)(now - cl->penalized) > 0) {
620                         cl_prev->next_alive = cl->next_alive;
621                         cl->next_alive = NULL;
622                         cl->cpriority = cl->priority;
623                         cl->delayed = 0;
624                         cbq_activate_class(cl);
625
626                         if (cl == q->active[prio]) {
627                                 q->active[prio] = cl_prev;
628                                 if (cl == q->active[prio]) {
629                                         q->active[prio] = NULL;
630                                         return 0;
631                                 }
632                         }
633
634                         cl = cl_prev->next_alive;
635                 } else if ((long)(sched - cl->penalized) > 0)
636                         sched = cl->penalized;
637         } while ((cl_prev = cl) != q->active[prio]);
638
639         return (long)(sched - now);
640 }
641
642 static void cbq_undelay(unsigned long arg)
643 {
644         struct Qdisc *sch = (struct Qdisc*)arg;
645         struct cbq_sched_data *q = qdisc_priv(sch);
646         long delay = 0;
647         unsigned pmask;
648
649         pmask = q->pmask;
650         q->pmask = 0;
651
652         while (pmask) {
653                 int prio = ffz(~pmask);
654                 long tmp;
655
656                 pmask &= ~(1<<prio);
657
658                 tmp = cbq_undelay_prio(q, prio);
659                 if (tmp > 0) {
660                         q->pmask |= 1<<prio;
661                         if (tmp < delay || delay == 0)
662                                 delay = tmp;
663                 }
664         }
665
666         if (delay) {
667                 q->delay_timer.expires = jiffies + delay;
668                 add_timer(&q->delay_timer);
669         }
670
671         sch->flags &= ~TCQ_F_THROTTLED;
672         netif_schedule(sch->dev);
673 }
674
675
676 #ifdef CONFIG_NET_CLS_POLICE
677
678 static int cbq_reshape_fail(struct sk_buff *skb, struct Qdisc *child)
679 {
680         int len = skb->len;
681         struct Qdisc *sch = child->__parent;
682         struct cbq_sched_data *q = qdisc_priv(sch);
683         struct cbq_class *cl = q->rx_class;
684
685         q->rx_class = NULL;
686
687         if (cl && (cl = cbq_reclassify(skb, cl)) != NULL) {
688
689                 cbq_mark_toplevel(q, cl);
690
691                 q->rx_class = cl;
692                 cl->q->__parent = sch;
693
694                 if (cl->q->enqueue(skb, cl->q) == 0) {
695                         sch->q.qlen++;
696                         sch->bstats.packets++;
697                         sch->bstats.bytes+=len;
698                         if (!cl->next_alive)
699                                 cbq_activate_class(cl);
700                         return 0;
701                 }
702                 sch->qstats.drops++;
703                 return 0;
704         }
705
706         sch->qstats.drops++;
707         return -1;
708 }
709 #endif
710
711 /*
712    It is mission critical procedure.
713
714    We "regenerate" toplevel cutoff, if transmitting class
715    has backlog and it is not regulated. It is not part of
716    original CBQ description, but looks more reasonable.
717    Probably, it is wrong. This question needs further investigation.
718 */
719
720 static __inline__ void
721 cbq_update_toplevel(struct cbq_sched_data *q, struct cbq_class *cl,
722                     struct cbq_class *borrowed)
723 {
724         if (cl && q->toplevel >= borrowed->level) {
725                 if (cl->q->q.qlen > 1) {
726                         do {
727                                 if (PSCHED_IS_PASTPERFECT(borrowed->undertime)) {
728                                         q->toplevel = borrowed->level;
729                                         return;
730                                 }
731                         } while ((borrowed=borrowed->borrow) != NULL);
732                 }
733 #if 0
734         /* It is not necessary now. Uncommenting it
735            will save CPU cycles, but decrease fairness.
736          */
737                 q->toplevel = TC_CBQ_MAXLEVEL;
738 #endif
739         }
740 }
741
742 static void
743 cbq_update(struct cbq_sched_data *q)
744 {
745         struct cbq_class *this = q->tx_class;
746         struct cbq_class *cl = this;
747         int len = q->tx_len;
748
749         q->tx_class = NULL;
750
751         for ( ; cl; cl = cl->share) {
752                 long avgidle = cl->avgidle;
753                 long idle;
754
755                 cl->bstats.packets++;
756                 cl->bstats.bytes += len;
757
758                 /*
759                    (now - last) is total time between packet right edges.
760                    (last_pktlen/rate) is "virtual" busy time, so that
761
762                          idle = (now - last) - last_pktlen/rate
763                  */
764
765                 idle = PSCHED_TDIFF(q->now, cl->last);
766                 if ((unsigned long)idle > 128*1024*1024) {
767                         avgidle = cl->maxidle;
768                 } else {
769                         idle -= L2T(cl, len);
770
771                 /* true_avgidle := (1-W)*true_avgidle + W*idle,
772                    where W=2^{-ewma_log}. But cl->avgidle is scaled:
773                    cl->avgidle == true_avgidle/W,
774                    hence:
775                  */
776                         avgidle += idle - (avgidle>>cl->ewma_log);
777                 }
778
779                 if (avgidle <= 0) {
780                         /* Overlimit or at-limit */
781
782                         if (avgidle < cl->minidle)
783                                 avgidle = cl->minidle;
784
785                         cl->avgidle = avgidle;
786
787                         /* Calculate expected time, when this class
788                            will be allowed to send.
789                            It will occur, when:
790                            (1-W)*true_avgidle + W*delay = 0, i.e.
791                            idle = (1/W - 1)*(-true_avgidle)
792                            or
793                            idle = (1 - W)*(-cl->avgidle);
794                          */
795                         idle = (-avgidle) - ((-avgidle) >> cl->ewma_log);
796
797                         /*
798                            That is not all.
799                            To maintain the rate allocated to the class,
800                            we add to undertime virtual clock,
801                            necessary to complete transmitted packet.
802                            (len/phys_bandwidth has been already passed
803                            to the moment of cbq_update)
804                          */
805
806                         idle -= L2T(&q->link, len);
807                         idle += L2T(cl, len);
808
809                         PSCHED_AUDIT_TDIFF(idle);
810
811                         PSCHED_TADD2(q->now, idle, cl->undertime);
812                 } else {
813                         /* Underlimit */
814
815                         PSCHED_SET_PASTPERFECT(cl->undertime);
816                         if (avgidle > cl->maxidle)
817                                 cl->avgidle = cl->maxidle;
818                         else
819                                 cl->avgidle = avgidle;
820                 }
821                 cl->last = q->now;
822         }
823
824         cbq_update_toplevel(q, this, q->tx_borrowed);
825 }
826
827 static __inline__ struct cbq_class *
828 cbq_under_limit(struct cbq_class *cl)
829 {
830         struct cbq_sched_data *q = qdisc_priv(cl->qdisc);
831         struct cbq_class *this_cl = cl;
832
833         if (cl->tparent == NULL)
834                 return cl;
835
836         if (PSCHED_IS_PASTPERFECT(cl->undertime) ||
837             !PSCHED_TLESS(q->now, cl->undertime)) {
838                 cl->delayed = 0;
839                 return cl;
840         }
841
842         do {
843                 /* It is very suspicious place. Now overlimit
844                    action is generated for not bounded classes
845                    only if link is completely congested.
846                    Though it is in agree with ancestor-only paradigm,
847                    it looks very stupid. Particularly,
848                    it means that this chunk of code will either
849                    never be called or result in strong amplification
850                    of burstiness. Dangerous, silly, and, however,
851                    no another solution exists.
852                  */
853                 if ((cl = cl->borrow) == NULL) {
854                         this_cl->qstats.overlimits++;
855                         this_cl->overlimit(this_cl);
856                         return NULL;
857                 }
858                 if (cl->level > q->toplevel)
859                         return NULL;
860         } while (!PSCHED_IS_PASTPERFECT(cl->undertime) &&
861                  PSCHED_TLESS(q->now, cl->undertime));
862
863         cl->delayed = 0;
864         return cl;
865 }
866
867 static __inline__ struct sk_buff *
868 cbq_dequeue_prio(struct Qdisc *sch, int prio)
869 {
870         struct cbq_sched_data *q = qdisc_priv(sch);
871         struct cbq_class *cl_tail, *cl_prev, *cl;
872         struct sk_buff *skb;
873         int deficit;
874
875         cl_tail = cl_prev = q->active[prio];
876         cl = cl_prev->next_alive;
877
878         do {
879                 deficit = 0;
880
881                 /* Start round */
882                 do {
883                         struct cbq_class *borrow = cl;
884
885                         if (cl->q->q.qlen &&
886                             (borrow = cbq_under_limit(cl)) == NULL)
887                                 goto skip_class;
888
889                         if (cl->deficit <= 0) {
890                                 /* Class exhausted its allotment per
891                                    this round. Switch to the next one.
892                                  */
893                                 deficit = 1;
894                                 cl->deficit += cl->quantum;
895                                 goto next_class;
896                         }
897
898                         skb = cl->q->dequeue(cl->q);
899
900                         /* Class did not give us any skb :-(
901                            It could occur even if cl->q->q.qlen != 0
902                            f.e. if cl->q == "tbf"
903                          */
904                         if (skb == NULL)
905                                 goto skip_class;
906
907                         cl->deficit -= skb->len;
908                         q->tx_class = cl;
909                         q->tx_borrowed = borrow;
910                         if (borrow != cl) {
911 #ifndef CBQ_XSTATS_BORROWS_BYTES
912                                 borrow->xstats.borrows++;
913                                 cl->xstats.borrows++;
914 #else
915                                 borrow->xstats.borrows += skb->len;
916                                 cl->xstats.borrows += skb->len;
917 #endif
918                         }
919                         q->tx_len = skb->len;
920
921                         if (cl->deficit <= 0) {
922                                 q->active[prio] = cl;
923                                 cl = cl->next_alive;
924                                 cl->deficit += cl->quantum;
925                         }
926                         return skb;
927
928 skip_class:
929                         if (cl->q->q.qlen == 0 || prio != cl->cpriority) {
930                                 /* Class is empty or penalized.
931                                    Unlink it from active chain.
932                                  */
933                                 cl_prev->next_alive = cl->next_alive;
934                                 cl->next_alive = NULL;
935
936                                 /* Did cl_tail point to it? */
937                                 if (cl == cl_tail) {
938                                         /* Repair it! */
939                                         cl_tail = cl_prev;
940
941                                         /* Was it the last class in this band? */
942                                         if (cl == cl_tail) {
943                                                 /* Kill the band! */
944                                                 q->active[prio] = NULL;
945                                                 q->activemask &= ~(1<<prio);
946                                                 if (cl->q->q.qlen)
947                                                         cbq_activate_class(cl);
948                                                 return NULL;
949                                         }
950
951                                         q->active[prio] = cl_tail;
952                                 }
953                                 if (cl->q->q.qlen)
954                                         cbq_activate_class(cl);
955
956                                 cl = cl_prev;
957                         }
958
959 next_class:
960                         cl_prev = cl;
961                         cl = cl->next_alive;
962                 } while (cl_prev != cl_tail);
963         } while (deficit);
964
965         q->active[prio] = cl_prev;
966
967         return NULL;
968 }
969
970 static __inline__ struct sk_buff *
971 cbq_dequeue_1(struct Qdisc *sch)
972 {
973         struct cbq_sched_data *q = qdisc_priv(sch);
974         struct sk_buff *skb;
975         unsigned activemask;
976
977         activemask = q->activemask&0xFF;
978         while (activemask) {
979                 int prio = ffz(~activemask);
980                 activemask &= ~(1<<prio);
981                 skb = cbq_dequeue_prio(sch, prio);
982                 if (skb)
983                         return skb;
984         }
985         return NULL;
986 }
987
988 static struct sk_buff *
989 cbq_dequeue(struct Qdisc *sch)
990 {
991         struct sk_buff *skb;
992         struct cbq_sched_data *q = qdisc_priv(sch);
993         psched_time_t now;
994         psched_tdiff_t incr;
995
996         PSCHED_GET_TIME(now);
997         incr = PSCHED_TDIFF(now, q->now_rt);
998
999         if (q->tx_class) {
1000                 psched_tdiff_t incr2;
1001                 /* Time integrator. We calculate EOS time
1002                    by adding expected packet transmission time.
1003                    If real time is greater, we warp artificial clock,
1004                    so that:
1005
1006                    cbq_time = max(real_time, work);
1007                  */
1008                 incr2 = L2T(&q->link, q->tx_len);
1009                 PSCHED_TADD(q->now, incr2);
1010                 cbq_update(q);
1011                 if ((incr -= incr2) < 0)
1012                         incr = 0;
1013         }
1014         PSCHED_TADD(q->now, incr);
1015         q->now_rt = now;
1016
1017         for (;;) {
1018                 q->wd_expires = 0;
1019
1020                 skb = cbq_dequeue_1(sch);
1021                 if (skb) {
1022                         sch->q.qlen--;
1023                         sch->flags &= ~TCQ_F_THROTTLED;
1024                         return skb;
1025                 }
1026
1027                 /* All the classes are overlimit.
1028
1029                    It is possible, if:
1030
1031                    1. Scheduler is empty.
1032                    2. Toplevel cutoff inhibited borrowing.
1033                    3. Root class is overlimit.
1034
1035                    Reset 2d and 3d conditions and retry.
1036
1037                    Note, that NS and cbq-2.0 are buggy, peeking
1038                    an arbitrary class is appropriate for ancestor-only
1039                    sharing, but not for toplevel algorithm.
1040
1041                    Our version is better, but slower, because it requires
1042                    two passes, but it is unavoidable with top-level sharing.
1043                 */
1044
1045                 if (q->toplevel == TC_CBQ_MAXLEVEL &&
1046                     PSCHED_IS_PASTPERFECT(q->link.undertime))
1047                         break;
1048
1049                 q->toplevel = TC_CBQ_MAXLEVEL;
1050                 PSCHED_SET_PASTPERFECT(q->link.undertime);
1051         }
1052
1053         /* No packets in scheduler or nobody wants to give them to us :-(
1054            Sigh... start watchdog timer in the last case. */
1055
1056         if (sch->q.qlen) {
1057                 sch->qstats.overlimits++;
1058                 if (q->wd_expires)
1059                         qdisc_watchdog_schedule(&q->watchdog,
1060                                                 q->now + q->wd_expires);
1061         }
1062         return NULL;
1063 }
1064
1065 /* CBQ class maintanance routines */
1066
1067 static void cbq_adjust_levels(struct cbq_class *this)
1068 {
1069         if (this == NULL)
1070                 return;
1071
1072         do {
1073                 int level = 0;
1074                 struct cbq_class *cl;
1075
1076                 if ((cl = this->children) != NULL) {
1077                         do {
1078                                 if (cl->level > level)
1079                                         level = cl->level;
1080                         } while ((cl = cl->sibling) != this->children);
1081                 }
1082                 this->level = level+1;
1083         } while ((this = this->tparent) != NULL);
1084 }
1085
1086 static void cbq_normalize_quanta(struct cbq_sched_data *q, int prio)
1087 {
1088         struct cbq_class *cl;
1089         unsigned h;
1090
1091         if (q->quanta[prio] == 0)
1092                 return;
1093
1094         for (h=0; h<16; h++) {
1095                 for (cl = q->classes[h]; cl; cl = cl->next) {
1096                         /* BUGGGG... Beware! This expression suffer of
1097                            arithmetic overflows!
1098                          */
1099                         if (cl->priority == prio) {
1100                                 cl->quantum = (cl->weight*cl->allot*q->nclasses[prio])/
1101                                         q->quanta[prio];
1102                         }
1103                         if (cl->quantum <= 0 || cl->quantum>32*cl->qdisc->dev->mtu) {
1104                                 printk(KERN_WARNING "CBQ: class %08x has bad quantum==%ld, repaired.\n", cl->classid, cl->quantum);
1105                                 cl->quantum = cl->qdisc->dev->mtu/2 + 1;
1106                         }
1107                 }
1108         }
1109 }
1110
1111 static void cbq_sync_defmap(struct cbq_class *cl)
1112 {
1113         struct cbq_sched_data *q = qdisc_priv(cl->qdisc);
1114         struct cbq_class *split = cl->split;
1115         unsigned h;
1116         int i;
1117
1118         if (split == NULL)
1119                 return;
1120
1121         for (i=0; i<=TC_PRIO_MAX; i++) {
1122                 if (split->defaults[i] == cl && !(cl->defmap&(1<<i)))
1123                         split->defaults[i] = NULL;
1124         }
1125
1126         for (i=0; i<=TC_PRIO_MAX; i++) {
1127                 int level = split->level;
1128
1129                 if (split->defaults[i])
1130                         continue;
1131
1132                 for (h=0; h<16; h++) {
1133                         struct cbq_class *c;
1134
1135                         for (c = q->classes[h]; c; c = c->next) {
1136                                 if (c->split == split && c->level < level &&
1137                                     c->defmap&(1<<i)) {
1138                                         split->defaults[i] = c;
1139                                         level = c->level;
1140                                 }
1141                         }
1142                 }
1143         }
1144 }
1145
1146 static void cbq_change_defmap(struct cbq_class *cl, u32 splitid, u32 def, u32 mask)
1147 {
1148         struct cbq_class *split = NULL;
1149
1150         if (splitid == 0) {
1151                 if ((split = cl->split) == NULL)
1152                         return;
1153                 splitid = split->classid;
1154         }
1155
1156         if (split == NULL || split->classid != splitid) {
1157                 for (split = cl->tparent; split; split = split->tparent)
1158                         if (split->classid == splitid)
1159                                 break;
1160         }
1161
1162         if (split == NULL)
1163                 return;
1164
1165         if (cl->split != split) {
1166                 cl->defmap = 0;
1167                 cbq_sync_defmap(cl);
1168                 cl->split = split;
1169                 cl->defmap = def&mask;
1170         } else
1171                 cl->defmap = (cl->defmap&~mask)|(def&mask);
1172
1173         cbq_sync_defmap(cl);
1174 }
1175
1176 static void cbq_unlink_class(struct cbq_class *this)
1177 {
1178         struct cbq_class *cl, **clp;
1179         struct cbq_sched_data *q = qdisc_priv(this->qdisc);
1180
1181         for (clp = &q->classes[cbq_hash(this->classid)]; (cl = *clp) != NULL; clp = &cl->next) {
1182                 if (cl == this) {
1183                         *clp = cl->next;
1184                         cl->next = NULL;
1185                         break;
1186                 }
1187         }
1188
1189         if (this->tparent) {
1190                 clp=&this->sibling;
1191                 cl = *clp;
1192                 do {
1193                         if (cl == this) {
1194                                 *clp = cl->sibling;
1195                                 break;
1196                         }
1197                         clp = &cl->sibling;
1198                 } while ((cl = *clp) != this->sibling);
1199
1200                 if (this->tparent->children == this) {
1201                         this->tparent->children = this->sibling;
1202                         if (this->sibling == this)
1203                                 this->tparent->children = NULL;
1204                 }
1205         } else {
1206                 BUG_TRAP(this->sibling == this);
1207         }
1208 }
1209
1210 static void cbq_link_class(struct cbq_class *this)
1211 {
1212         struct cbq_sched_data *q = qdisc_priv(this->qdisc);
1213         unsigned h = cbq_hash(this->classid);
1214         struct cbq_class *parent = this->tparent;
1215
1216         this->sibling = this;
1217         this->next = q->classes[h];
1218         q->classes[h] = this;
1219
1220         if (parent == NULL)
1221                 return;
1222
1223         if (parent->children == NULL) {
1224                 parent->children = this;
1225         } else {
1226                 this->sibling = parent->children->sibling;
1227                 parent->children->sibling = this;
1228         }
1229 }
1230
1231 static unsigned int cbq_drop(struct Qdisc* sch)
1232 {
1233         struct cbq_sched_data *q = qdisc_priv(sch);
1234         struct cbq_class *cl, *cl_head;
1235         int prio;
1236         unsigned int len;
1237
1238         for (prio = TC_CBQ_MAXPRIO; prio >= 0; prio--) {
1239                 if ((cl_head = q->active[prio]) == NULL)
1240                         continue;
1241
1242                 cl = cl_head;
1243                 do {
1244                         if (cl->q->ops->drop && (len = cl->q->ops->drop(cl->q))) {
1245                                 sch->q.qlen--;
1246                                 if (!cl->q->q.qlen)
1247                                         cbq_deactivate_class(cl);
1248                                 return len;
1249                         }
1250                 } while ((cl = cl->next_alive) != cl_head);
1251         }
1252         return 0;
1253 }
1254
1255 static void
1256 cbq_reset(struct Qdisc* sch)
1257 {
1258         struct cbq_sched_data *q = qdisc_priv(sch);
1259         struct cbq_class *cl;
1260         int prio;
1261         unsigned h;
1262
1263         q->activemask = 0;
1264         q->pmask = 0;
1265         q->tx_class = NULL;
1266         q->tx_borrowed = NULL;
1267         qdisc_watchdog_cancel(&q->watchdog);
1268         del_timer(&q->delay_timer);
1269         q->toplevel = TC_CBQ_MAXLEVEL;
1270         PSCHED_GET_TIME(q->now);
1271         q->now_rt = q->now;
1272
1273         for (prio = 0; prio <= TC_CBQ_MAXPRIO; prio++)
1274                 q->active[prio] = NULL;
1275
1276         for (h = 0; h < 16; h++) {
1277                 for (cl = q->classes[h]; cl; cl = cl->next) {
1278                         qdisc_reset(cl->q);
1279
1280                         cl->next_alive = NULL;
1281                         PSCHED_SET_PASTPERFECT(cl->undertime);
1282                         cl->avgidle = cl->maxidle;
1283                         cl->deficit = cl->quantum;
1284                         cl->cpriority = cl->priority;
1285                 }
1286         }
1287         sch->q.qlen = 0;
1288 }
1289
1290
1291 static int cbq_set_lss(struct cbq_class *cl, struct tc_cbq_lssopt *lss)
1292 {
1293         if (lss->change&TCF_CBQ_LSS_FLAGS) {
1294                 cl->share = (lss->flags&TCF_CBQ_LSS_ISOLATED) ? NULL : cl->tparent;
1295                 cl->borrow = (lss->flags&TCF_CBQ_LSS_BOUNDED) ? NULL : cl->tparent;
1296         }
1297         if (lss->change&TCF_CBQ_LSS_EWMA)
1298                 cl->ewma_log = lss->ewma_log;
1299         if (lss->change&TCF_CBQ_LSS_AVPKT)
1300                 cl->avpkt = lss->avpkt;
1301         if (lss->change&TCF_CBQ_LSS_MINIDLE)
1302                 cl->minidle = -(long)lss->minidle;
1303         if (lss->change&TCF_CBQ_LSS_MAXIDLE) {
1304                 cl->maxidle = lss->maxidle;
1305                 cl->avgidle = lss->maxidle;
1306         }
1307         if (lss->change&TCF_CBQ_LSS_OFFTIME)
1308                 cl->offtime = lss->offtime;
1309         return 0;
1310 }
1311
1312 static void cbq_rmprio(struct cbq_sched_data *q, struct cbq_class *cl)
1313 {
1314         q->nclasses[cl->priority]--;
1315         q->quanta[cl->priority] -= cl->weight;
1316         cbq_normalize_quanta(q, cl->priority);
1317 }
1318
1319 static void cbq_addprio(struct cbq_sched_data *q, struct cbq_class *cl)
1320 {
1321         q->nclasses[cl->priority]++;
1322         q->quanta[cl->priority] += cl->weight;
1323         cbq_normalize_quanta(q, cl->priority);
1324 }
1325
1326 static int cbq_set_wrr(struct cbq_class *cl, struct tc_cbq_wrropt *wrr)
1327 {
1328         struct cbq_sched_data *q = qdisc_priv(cl->qdisc);
1329
1330         if (wrr->allot)
1331                 cl->allot = wrr->allot;
1332         if (wrr->weight)
1333                 cl->weight = wrr->weight;
1334         if (wrr->priority) {
1335                 cl->priority = wrr->priority-1;
1336                 cl->cpriority = cl->priority;
1337                 if (cl->priority >= cl->priority2)
1338                         cl->priority2 = TC_CBQ_MAXPRIO-1;
1339         }
1340
1341         cbq_addprio(q, cl);
1342         return 0;
1343 }
1344
1345 static int cbq_set_overlimit(struct cbq_class *cl, struct tc_cbq_ovl *ovl)
1346 {
1347         switch (ovl->strategy) {
1348         case TC_CBQ_OVL_CLASSIC:
1349                 cl->overlimit = cbq_ovl_classic;
1350                 break;
1351         case TC_CBQ_OVL_DELAY:
1352                 cl->overlimit = cbq_ovl_delay;
1353                 break;
1354         case TC_CBQ_OVL_LOWPRIO:
1355                 if (ovl->priority2-1 >= TC_CBQ_MAXPRIO ||
1356                     ovl->priority2-1 <= cl->priority)
1357                         return -EINVAL;
1358                 cl->priority2 = ovl->priority2-1;
1359                 cl->overlimit = cbq_ovl_lowprio;
1360                 break;
1361         case TC_CBQ_OVL_DROP:
1362                 cl->overlimit = cbq_ovl_drop;
1363                 break;
1364         case TC_CBQ_OVL_RCLASSIC:
1365                 cl->overlimit = cbq_ovl_rclassic;
1366                 break;
1367         default:
1368                 return -EINVAL;
1369         }
1370         cl->penalty = (ovl->penalty*HZ)/1000;
1371         return 0;
1372 }
1373
1374 #ifdef CONFIG_NET_CLS_POLICE
1375 static int cbq_set_police(struct cbq_class *cl, struct tc_cbq_police *p)
1376 {
1377         cl->police = p->police;
1378
1379         if (cl->q->handle) {
1380                 if (p->police == TC_POLICE_RECLASSIFY)
1381                         cl->q->reshape_fail = cbq_reshape_fail;
1382                 else
1383                         cl->q->reshape_fail = NULL;
1384         }
1385         return 0;
1386 }
1387 #endif
1388
1389 static int cbq_set_fopt(struct cbq_class *cl, struct tc_cbq_fopt *fopt)
1390 {
1391         cbq_change_defmap(cl, fopt->split, fopt->defmap, fopt->defchange);
1392         return 0;
1393 }
1394
1395 static int cbq_init(struct Qdisc *sch, struct rtattr *opt)
1396 {
1397         struct cbq_sched_data *q = qdisc_priv(sch);
1398         struct rtattr *tb[TCA_CBQ_MAX];
1399         struct tc_ratespec *r;
1400
1401         if (rtattr_parse_nested(tb, TCA_CBQ_MAX, opt) < 0 ||
1402             tb[TCA_CBQ_RTAB-1] == NULL || tb[TCA_CBQ_RATE-1] == NULL ||
1403             RTA_PAYLOAD(tb[TCA_CBQ_RATE-1]) < sizeof(struct tc_ratespec))
1404                 return -EINVAL;
1405
1406         if (tb[TCA_CBQ_LSSOPT-1] &&
1407             RTA_PAYLOAD(tb[TCA_CBQ_LSSOPT-1]) < sizeof(struct tc_cbq_lssopt))
1408                 return -EINVAL;
1409
1410         r = RTA_DATA(tb[TCA_CBQ_RATE-1]);
1411
1412         if ((q->link.R_tab = qdisc_get_rtab(r, tb[TCA_CBQ_RTAB-1])) == NULL)
1413                 return -EINVAL;
1414
1415         q->link.refcnt = 1;
1416         q->link.sibling = &q->link;
1417         q->link.classid = sch->handle;
1418         q->link.qdisc = sch;
1419         if (!(q->link.q = qdisc_create_dflt(sch->dev, &pfifo_qdisc_ops,
1420                                             sch->handle)))
1421                 q->link.q = &noop_qdisc;
1422
1423         q->link.priority = TC_CBQ_MAXPRIO-1;
1424         q->link.priority2 = TC_CBQ_MAXPRIO-1;
1425         q->link.cpriority = TC_CBQ_MAXPRIO-1;
1426         q->link.ovl_strategy = TC_CBQ_OVL_CLASSIC;
1427         q->link.overlimit = cbq_ovl_classic;
1428         q->link.allot = psched_mtu(sch->dev);
1429         q->link.quantum = q->link.allot;
1430         q->link.weight = q->link.R_tab->rate.rate;
1431
1432         q->link.ewma_log = TC_CBQ_DEF_EWMA;
1433         q->link.avpkt = q->link.allot/2;
1434         q->link.minidle = -0x7FFFFFFF;
1435         q->link.stats_lock = &sch->dev->queue_lock;
1436
1437         qdisc_watchdog_init(&q->watchdog, sch);
1438         init_timer(&q->delay_timer);
1439         q->delay_timer.data = (unsigned long)sch;
1440         q->delay_timer.function = cbq_undelay;
1441         q->toplevel = TC_CBQ_MAXLEVEL;
1442         PSCHED_GET_TIME(q->now);
1443         q->now_rt = q->now;
1444
1445         cbq_link_class(&q->link);
1446
1447         if (tb[TCA_CBQ_LSSOPT-1])
1448                 cbq_set_lss(&q->link, RTA_DATA(tb[TCA_CBQ_LSSOPT-1]));
1449
1450         cbq_addprio(q, &q->link);
1451         return 0;
1452 }
1453
1454 static __inline__ int cbq_dump_rate(struct sk_buff *skb, struct cbq_class *cl)
1455 {
1456         unsigned char    *b = skb->tail;
1457
1458         RTA_PUT(skb, TCA_CBQ_RATE, sizeof(cl->R_tab->rate), &cl->R_tab->rate);
1459         return skb->len;
1460
1461 rtattr_failure:
1462         skb_trim(skb, b - skb->data);
1463         return -1;
1464 }
1465
1466 static __inline__ int cbq_dump_lss(struct sk_buff *skb, struct cbq_class *cl)
1467 {
1468         unsigned char    *b = skb->tail;
1469         struct tc_cbq_lssopt opt;
1470
1471         opt.flags = 0;
1472         if (cl->borrow == NULL)
1473                 opt.flags |= TCF_CBQ_LSS_BOUNDED;
1474         if (cl->share == NULL)
1475                 opt.flags |= TCF_CBQ_LSS_ISOLATED;
1476         opt.ewma_log = cl->ewma_log;
1477         opt.level = cl->level;
1478         opt.avpkt = cl->avpkt;
1479         opt.maxidle = cl->maxidle;
1480         opt.minidle = (u32)(-cl->minidle);
1481         opt.offtime = cl->offtime;
1482         opt.change = ~0;
1483         RTA_PUT(skb, TCA_CBQ_LSSOPT, sizeof(opt), &opt);
1484         return skb->len;
1485
1486 rtattr_failure:
1487         skb_trim(skb, b - skb->data);
1488         return -1;
1489 }
1490
1491 static __inline__ int cbq_dump_wrr(struct sk_buff *skb, struct cbq_class *cl)
1492 {
1493         unsigned char    *b = skb->tail;
1494         struct tc_cbq_wrropt opt;
1495
1496         opt.flags = 0;
1497         opt.allot = cl->allot;
1498         opt.priority = cl->priority+1;
1499         opt.cpriority = cl->cpriority+1;
1500         opt.weight = cl->weight;
1501         RTA_PUT(skb, TCA_CBQ_WRROPT, sizeof(opt), &opt);
1502         return skb->len;
1503
1504 rtattr_failure:
1505         skb_trim(skb, b - skb->data);
1506         return -1;
1507 }
1508
1509 static __inline__ int cbq_dump_ovl(struct sk_buff *skb, struct cbq_class *cl)
1510 {
1511         unsigned char    *b = skb->tail;
1512         struct tc_cbq_ovl opt;
1513
1514         opt.strategy = cl->ovl_strategy;
1515         opt.priority2 = cl->priority2+1;
1516         opt.pad = 0;
1517         opt.penalty = (cl->penalty*1000)/HZ;
1518         RTA_PUT(skb, TCA_CBQ_OVL_STRATEGY, sizeof(opt), &opt);
1519         return skb->len;
1520
1521 rtattr_failure:
1522         skb_trim(skb, b - skb->data);
1523         return -1;
1524 }
1525
1526 static __inline__ int cbq_dump_fopt(struct sk_buff *skb, struct cbq_class *cl)
1527 {
1528         unsigned char    *b = skb->tail;
1529         struct tc_cbq_fopt opt;
1530
1531         if (cl->split || cl->defmap) {
1532                 opt.split = cl->split ? cl->split->classid : 0;
1533                 opt.defmap = cl->defmap;
1534                 opt.defchange = ~0;
1535                 RTA_PUT(skb, TCA_CBQ_FOPT, sizeof(opt), &opt);
1536         }
1537         return skb->len;
1538
1539 rtattr_failure:
1540         skb_trim(skb, b - skb->data);
1541         return -1;
1542 }
1543
1544 #ifdef CONFIG_NET_CLS_POLICE
1545 static __inline__ int cbq_dump_police(struct sk_buff *skb, struct cbq_class *cl)
1546 {
1547         unsigned char    *b = skb->tail;
1548         struct tc_cbq_police opt;
1549
1550         if (cl->police) {
1551                 opt.police = cl->police;
1552                 opt.__res1 = 0;
1553                 opt.__res2 = 0;
1554                 RTA_PUT(skb, TCA_CBQ_POLICE, sizeof(opt), &opt);
1555         }
1556         return skb->len;
1557
1558 rtattr_failure:
1559         skb_trim(skb, b - skb->data);
1560         return -1;
1561 }
1562 #endif
1563
1564 static int cbq_dump_attr(struct sk_buff *skb, struct cbq_class *cl)
1565 {
1566         if (cbq_dump_lss(skb, cl) < 0 ||
1567             cbq_dump_rate(skb, cl) < 0 ||
1568             cbq_dump_wrr(skb, cl) < 0 ||
1569             cbq_dump_ovl(skb, cl) < 0 ||
1570 #ifdef CONFIG_NET_CLS_POLICE
1571             cbq_dump_police(skb, cl) < 0 ||
1572 #endif
1573             cbq_dump_fopt(skb, cl) < 0)
1574                 return -1;
1575         return 0;
1576 }
1577
1578 static int cbq_dump(struct Qdisc *sch, struct sk_buff *skb)
1579 {
1580         struct cbq_sched_data *q = qdisc_priv(sch);
1581         unsigned char    *b = skb->tail;
1582         struct rtattr *rta;
1583
1584         rta = (struct rtattr*)b;
1585         RTA_PUT(skb, TCA_OPTIONS, 0, NULL);
1586         if (cbq_dump_attr(skb, &q->link) < 0)
1587                 goto rtattr_failure;
1588         rta->rta_len = skb->tail - b;
1589         return skb->len;
1590
1591 rtattr_failure:
1592         skb_trim(skb, b - skb->data);
1593         return -1;
1594 }
1595
1596 static int
1597 cbq_dump_stats(struct Qdisc *sch, struct gnet_dump *d)
1598 {
1599         struct cbq_sched_data *q = qdisc_priv(sch);
1600
1601         q->link.xstats.avgidle = q->link.avgidle;
1602         return gnet_stats_copy_app(d, &q->link.xstats, sizeof(q->link.xstats));
1603 }
1604
1605 static int
1606 cbq_dump_class(struct Qdisc *sch, unsigned long arg,
1607                struct sk_buff *skb, struct tcmsg *tcm)
1608 {
1609         struct cbq_class *cl = (struct cbq_class*)arg;
1610         unsigned char    *b = skb->tail;
1611         struct rtattr *rta;
1612
1613         if (cl->tparent)
1614                 tcm->tcm_parent = cl->tparent->classid;
1615         else
1616                 tcm->tcm_parent = TC_H_ROOT;
1617         tcm->tcm_handle = cl->classid;
1618         tcm->tcm_info = cl->q->handle;
1619
1620         rta = (struct rtattr*)b;
1621         RTA_PUT(skb, TCA_OPTIONS, 0, NULL);
1622         if (cbq_dump_attr(skb, cl) < 0)
1623                 goto rtattr_failure;
1624         rta->rta_len = skb->tail - b;
1625         return skb->len;
1626
1627 rtattr_failure:
1628         skb_trim(skb, b - skb->data);
1629         return -1;
1630 }
1631
1632 static int
1633 cbq_dump_class_stats(struct Qdisc *sch, unsigned long arg,
1634         struct gnet_dump *d)
1635 {
1636         struct cbq_sched_data *q = qdisc_priv(sch);
1637         struct cbq_class *cl = (struct cbq_class*)arg;
1638
1639         cl->qstats.qlen = cl->q->q.qlen;
1640         cl->xstats.avgidle = cl->avgidle;
1641         cl->xstats.undertime = 0;
1642
1643         if (!PSCHED_IS_PASTPERFECT(cl->undertime))
1644                 cl->xstats.undertime = PSCHED_TDIFF(cl->undertime, q->now);
1645
1646         if (gnet_stats_copy_basic(d, &cl->bstats) < 0 ||
1647 #ifdef CONFIG_NET_ESTIMATOR
1648             gnet_stats_copy_rate_est(d, &cl->rate_est) < 0 ||
1649 #endif
1650             gnet_stats_copy_queue(d, &cl->qstats) < 0)
1651                 return -1;
1652
1653         return gnet_stats_copy_app(d, &cl->xstats, sizeof(cl->xstats));
1654 }
1655
1656 static int cbq_graft(struct Qdisc *sch, unsigned long arg, struct Qdisc *new,
1657                      struct Qdisc **old)
1658 {
1659         struct cbq_class *cl = (struct cbq_class*)arg;
1660
1661         if (cl) {
1662                 if (new == NULL) {
1663                         if ((new = qdisc_create_dflt(sch->dev, &pfifo_qdisc_ops,
1664                                                      cl->classid)) == NULL)
1665                                 return -ENOBUFS;
1666                 } else {
1667 #ifdef CONFIG_NET_CLS_POLICE
1668                         if (cl->police == TC_POLICE_RECLASSIFY)
1669                                 new->reshape_fail = cbq_reshape_fail;
1670 #endif
1671                 }
1672                 sch_tree_lock(sch);
1673                 *old = xchg(&cl->q, new);
1674                 qdisc_tree_decrease_qlen(*old, (*old)->q.qlen);
1675                 qdisc_reset(*old);
1676                 sch_tree_unlock(sch);
1677
1678                 return 0;
1679         }
1680         return -ENOENT;
1681 }
1682
1683 static struct Qdisc *
1684 cbq_leaf(struct Qdisc *sch, unsigned long arg)
1685 {
1686         struct cbq_class *cl = (struct cbq_class*)arg;
1687
1688         return cl ? cl->q : NULL;
1689 }
1690
1691 static void cbq_qlen_notify(struct Qdisc *sch, unsigned long arg)
1692 {
1693         struct cbq_class *cl = (struct cbq_class *)arg;
1694
1695         if (cl->q->q.qlen == 0)
1696                 cbq_deactivate_class(cl);
1697 }
1698
1699 static unsigned long cbq_get(struct Qdisc *sch, u32 classid)
1700 {
1701         struct cbq_sched_data *q = qdisc_priv(sch);
1702         struct cbq_class *cl = cbq_class_lookup(q, classid);
1703
1704         if (cl) {
1705                 cl->refcnt++;
1706                 return (unsigned long)cl;
1707         }
1708         return 0;
1709 }
1710
1711 static void cbq_destroy_filters(struct cbq_class *cl)
1712 {
1713         struct tcf_proto *tp;
1714
1715         while ((tp = cl->filter_list) != NULL) {
1716                 cl->filter_list = tp->next;
1717                 tcf_destroy(tp);
1718         }
1719 }
1720
1721 static void cbq_destroy_class(struct Qdisc *sch, struct cbq_class *cl)
1722 {
1723         struct cbq_sched_data *q = qdisc_priv(sch);
1724
1725         BUG_TRAP(!cl->filters);
1726
1727         cbq_destroy_filters(cl);
1728         qdisc_destroy(cl->q);
1729         qdisc_put_rtab(cl->R_tab);
1730 #ifdef CONFIG_NET_ESTIMATOR
1731         gen_kill_estimator(&cl->bstats, &cl->rate_est);
1732 #endif
1733         if (cl != &q->link)
1734                 kfree(cl);
1735 }
1736
1737 static void
1738 cbq_destroy(struct Qdisc* sch)
1739 {
1740         struct cbq_sched_data *q = qdisc_priv(sch);
1741         struct cbq_class *cl;
1742         unsigned h;
1743
1744 #ifdef CONFIG_NET_CLS_POLICE
1745         q->rx_class = NULL;
1746 #endif
1747         /*
1748          * Filters must be destroyed first because we don't destroy the
1749          * classes from root to leafs which means that filters can still
1750          * be bound to classes which have been destroyed already. --TGR '04
1751          */
1752         for (h = 0; h < 16; h++)
1753                 for (cl = q->classes[h]; cl; cl = cl->next)
1754                         cbq_destroy_filters(cl);
1755
1756         for (h = 0; h < 16; h++) {
1757                 struct cbq_class *next;
1758
1759                 for (cl = q->classes[h]; cl; cl = next) {
1760                         next = cl->next;
1761                         cbq_destroy_class(sch, cl);
1762                 }
1763         }
1764 }
1765
1766 static void cbq_put(struct Qdisc *sch, unsigned long arg)
1767 {
1768         struct cbq_class *cl = (struct cbq_class*)arg;
1769
1770         if (--cl->refcnt == 0) {
1771 #ifdef CONFIG_NET_CLS_POLICE
1772                 struct cbq_sched_data *q = qdisc_priv(sch);
1773
1774                 spin_lock_bh(&sch->dev->queue_lock);
1775                 if (q->rx_class == cl)
1776                         q->rx_class = NULL;
1777                 spin_unlock_bh(&sch->dev->queue_lock);
1778 #endif
1779
1780                 cbq_destroy_class(sch, cl);
1781         }
1782 }
1783
1784 static int
1785 cbq_change_class(struct Qdisc *sch, u32 classid, u32 parentid, struct rtattr **tca,
1786                  unsigned long *arg)
1787 {
1788         int err;
1789         struct cbq_sched_data *q = qdisc_priv(sch);
1790         struct cbq_class *cl = (struct cbq_class*)*arg;
1791         struct rtattr *opt = tca[TCA_OPTIONS-1];
1792         struct rtattr *tb[TCA_CBQ_MAX];
1793         struct cbq_class *parent;
1794         struct qdisc_rate_table *rtab = NULL;
1795
1796         if (opt==NULL || rtattr_parse_nested(tb, TCA_CBQ_MAX, opt))
1797                 return -EINVAL;
1798
1799         if (tb[TCA_CBQ_OVL_STRATEGY-1] &&
1800             RTA_PAYLOAD(tb[TCA_CBQ_OVL_STRATEGY-1]) < sizeof(struct tc_cbq_ovl))
1801                 return -EINVAL;
1802
1803         if (tb[TCA_CBQ_FOPT-1] &&
1804             RTA_PAYLOAD(tb[TCA_CBQ_FOPT-1]) < sizeof(struct tc_cbq_fopt))
1805                 return -EINVAL;
1806
1807         if (tb[TCA_CBQ_RATE-1] &&
1808             RTA_PAYLOAD(tb[TCA_CBQ_RATE-1]) < sizeof(struct tc_ratespec))
1809                         return -EINVAL;
1810
1811         if (tb[TCA_CBQ_LSSOPT-1] &&
1812             RTA_PAYLOAD(tb[TCA_CBQ_LSSOPT-1]) < sizeof(struct tc_cbq_lssopt))
1813                         return -EINVAL;
1814
1815         if (tb[TCA_CBQ_WRROPT-1] &&
1816             RTA_PAYLOAD(tb[TCA_CBQ_WRROPT-1]) < sizeof(struct tc_cbq_wrropt))
1817                         return -EINVAL;
1818
1819 #ifdef CONFIG_NET_CLS_POLICE
1820         if (tb[TCA_CBQ_POLICE-1] &&
1821             RTA_PAYLOAD(tb[TCA_CBQ_POLICE-1]) < sizeof(struct tc_cbq_police))
1822                         return -EINVAL;
1823 #endif
1824
1825         if (cl) {
1826                 /* Check parent */
1827                 if (parentid) {
1828                         if (cl->tparent && cl->tparent->classid != parentid)
1829                                 return -EINVAL;
1830                         if (!cl->tparent && parentid != TC_H_ROOT)
1831                                 return -EINVAL;
1832                 }
1833
1834                 if (tb[TCA_CBQ_RATE-1]) {
1835                         rtab = qdisc_get_rtab(RTA_DATA(tb[TCA_CBQ_RATE-1]), tb[TCA_CBQ_RTAB-1]);
1836                         if (rtab == NULL)
1837                                 return -EINVAL;
1838                 }
1839
1840                 /* Change class parameters */
1841                 sch_tree_lock(sch);
1842
1843                 if (cl->next_alive != NULL)
1844                         cbq_deactivate_class(cl);
1845
1846                 if (rtab) {
1847                         rtab = xchg(&cl->R_tab, rtab);
1848                         qdisc_put_rtab(rtab);
1849                 }
1850
1851                 if (tb[TCA_CBQ_LSSOPT-1])
1852                         cbq_set_lss(cl, RTA_DATA(tb[TCA_CBQ_LSSOPT-1]));
1853
1854                 if (tb[TCA_CBQ_WRROPT-1]) {
1855                         cbq_rmprio(q, cl);
1856                         cbq_set_wrr(cl, RTA_DATA(tb[TCA_CBQ_WRROPT-1]));
1857                 }
1858
1859                 if (tb[TCA_CBQ_OVL_STRATEGY-1])
1860                         cbq_set_overlimit(cl, RTA_DATA(tb[TCA_CBQ_OVL_STRATEGY-1]));
1861
1862 #ifdef CONFIG_NET_CLS_POLICE
1863                 if (tb[TCA_CBQ_POLICE-1])
1864                         cbq_set_police(cl, RTA_DATA(tb[TCA_CBQ_POLICE-1]));
1865 #endif
1866
1867                 if (tb[TCA_CBQ_FOPT-1])
1868                         cbq_set_fopt(cl, RTA_DATA(tb[TCA_CBQ_FOPT-1]));
1869
1870                 if (cl->q->q.qlen)
1871                         cbq_activate_class(cl);
1872
1873                 sch_tree_unlock(sch);
1874
1875 #ifdef CONFIG_NET_ESTIMATOR
1876                 if (tca[TCA_RATE-1])
1877                         gen_replace_estimator(&cl->bstats, &cl->rate_est,
1878                                 cl->stats_lock, tca[TCA_RATE-1]);
1879 #endif
1880                 return 0;
1881         }
1882
1883         if (parentid == TC_H_ROOT)
1884                 return -EINVAL;
1885
1886         if (tb[TCA_CBQ_WRROPT-1] == NULL || tb[TCA_CBQ_RATE-1] == NULL ||
1887             tb[TCA_CBQ_LSSOPT-1] == NULL)
1888                 return -EINVAL;
1889
1890         rtab = qdisc_get_rtab(RTA_DATA(tb[TCA_CBQ_RATE-1]), tb[TCA_CBQ_RTAB-1]);
1891         if (rtab == NULL)
1892                 return -EINVAL;
1893
1894         if (classid) {
1895                 err = -EINVAL;
1896                 if (TC_H_MAJ(classid^sch->handle) || cbq_class_lookup(q, classid))
1897                         goto failure;
1898         } else {
1899                 int i;
1900                 classid = TC_H_MAKE(sch->handle,0x8000);
1901
1902                 for (i=0; i<0x8000; i++) {
1903                         if (++q->hgenerator >= 0x8000)
1904                                 q->hgenerator = 1;
1905                         if (cbq_class_lookup(q, classid|q->hgenerator) == NULL)
1906                                 break;
1907                 }
1908                 err = -ENOSR;
1909                 if (i >= 0x8000)
1910                         goto failure;
1911                 classid = classid|q->hgenerator;
1912         }
1913
1914         parent = &q->link;
1915         if (parentid) {
1916                 parent = cbq_class_lookup(q, parentid);
1917                 err = -EINVAL;
1918                 if (parent == NULL)
1919                         goto failure;
1920         }
1921
1922         err = -ENOBUFS;
1923         cl = kzalloc(sizeof(*cl), GFP_KERNEL);
1924         if (cl == NULL)
1925                 goto failure;
1926         cl->R_tab = rtab;
1927         rtab = NULL;
1928         cl->refcnt = 1;
1929         if (!(cl->q = qdisc_create_dflt(sch->dev, &pfifo_qdisc_ops, classid)))
1930                 cl->q = &noop_qdisc;
1931         cl->classid = classid;
1932         cl->tparent = parent;
1933         cl->qdisc = sch;
1934         cl->allot = parent->allot;
1935         cl->quantum = cl->allot;
1936         cl->weight = cl->R_tab->rate.rate;
1937         cl->stats_lock = &sch->dev->queue_lock;
1938
1939         sch_tree_lock(sch);
1940         cbq_link_class(cl);
1941         cl->borrow = cl->tparent;
1942         if (cl->tparent != &q->link)
1943                 cl->share = cl->tparent;
1944         cbq_adjust_levels(parent);
1945         cl->minidle = -0x7FFFFFFF;
1946         cbq_set_lss(cl, RTA_DATA(tb[TCA_CBQ_LSSOPT-1]));
1947         cbq_set_wrr(cl, RTA_DATA(tb[TCA_CBQ_WRROPT-1]));
1948         if (cl->ewma_log==0)
1949                 cl->ewma_log = q->link.ewma_log;
1950         if (cl->maxidle==0)
1951                 cl->maxidle = q->link.maxidle;
1952         if (cl->avpkt==0)
1953                 cl->avpkt = q->link.avpkt;
1954         cl->overlimit = cbq_ovl_classic;
1955         if (tb[TCA_CBQ_OVL_STRATEGY-1])
1956                 cbq_set_overlimit(cl, RTA_DATA(tb[TCA_CBQ_OVL_STRATEGY-1]));
1957 #ifdef CONFIG_NET_CLS_POLICE
1958         if (tb[TCA_CBQ_POLICE-1])
1959                 cbq_set_police(cl, RTA_DATA(tb[TCA_CBQ_POLICE-1]));
1960 #endif
1961         if (tb[TCA_CBQ_FOPT-1])
1962                 cbq_set_fopt(cl, RTA_DATA(tb[TCA_CBQ_FOPT-1]));
1963         sch_tree_unlock(sch);
1964
1965 #ifdef CONFIG_NET_ESTIMATOR
1966         if (tca[TCA_RATE-1])
1967                 gen_new_estimator(&cl->bstats, &cl->rate_est,
1968                         cl->stats_lock, tca[TCA_RATE-1]);
1969 #endif
1970
1971         *arg = (unsigned long)cl;
1972         return 0;
1973
1974 failure:
1975         qdisc_put_rtab(rtab);
1976         return err;
1977 }
1978
1979 static int cbq_delete(struct Qdisc *sch, unsigned long arg)
1980 {
1981         struct cbq_sched_data *q = qdisc_priv(sch);
1982         struct cbq_class *cl = (struct cbq_class*)arg;
1983         unsigned int qlen;
1984
1985         if (cl->filters || cl->children || cl == &q->link)
1986                 return -EBUSY;
1987
1988         sch_tree_lock(sch);
1989
1990         qlen = cl->q->q.qlen;
1991         qdisc_reset(cl->q);
1992         qdisc_tree_decrease_qlen(cl->q, qlen);
1993
1994         if (cl->next_alive)
1995                 cbq_deactivate_class(cl);
1996
1997         if (q->tx_borrowed == cl)
1998                 q->tx_borrowed = q->tx_class;
1999         if (q->tx_class == cl) {
2000                 q->tx_class = NULL;
2001                 q->tx_borrowed = NULL;
2002         }
2003 #ifdef CONFIG_NET_CLS_POLICE
2004         if (q->rx_class == cl)
2005                 q->rx_class = NULL;
2006 #endif
2007
2008         cbq_unlink_class(cl);
2009         cbq_adjust_levels(cl->tparent);
2010         cl->defmap = 0;
2011         cbq_sync_defmap(cl);
2012
2013         cbq_rmprio(q, cl);
2014         sch_tree_unlock(sch);
2015
2016         if (--cl->refcnt == 0)
2017                 cbq_destroy_class(sch, cl);
2018
2019         return 0;
2020 }
2021
2022 static struct tcf_proto **cbq_find_tcf(struct Qdisc *sch, unsigned long arg)
2023 {
2024         struct cbq_sched_data *q = qdisc_priv(sch);
2025         struct cbq_class *cl = (struct cbq_class *)arg;
2026
2027         if (cl == NULL)
2028                 cl = &q->link;
2029
2030         return &cl->filter_list;
2031 }
2032
2033 static unsigned long cbq_bind_filter(struct Qdisc *sch, unsigned long parent,
2034                                      u32 classid)
2035 {
2036         struct cbq_sched_data *q = qdisc_priv(sch);
2037         struct cbq_class *p = (struct cbq_class*)parent;
2038         struct cbq_class *cl = cbq_class_lookup(q, classid);
2039
2040         if (cl) {
2041                 if (p && p->level <= cl->level)
2042                         return 0;
2043                 cl->filters++;
2044                 return (unsigned long)cl;
2045         }
2046         return 0;
2047 }
2048
2049 static void cbq_unbind_filter(struct Qdisc *sch, unsigned long arg)
2050 {
2051         struct cbq_class *cl = (struct cbq_class*)arg;
2052
2053         cl->filters--;
2054 }
2055
2056 static void cbq_walk(struct Qdisc *sch, struct qdisc_walker *arg)
2057 {
2058         struct cbq_sched_data *q = qdisc_priv(sch);
2059         unsigned h;
2060
2061         if (arg->stop)
2062                 return;
2063
2064         for (h = 0; h < 16; h++) {
2065                 struct cbq_class *cl;
2066
2067                 for (cl = q->classes[h]; cl; cl = cl->next) {
2068                         if (arg->count < arg->skip) {
2069                                 arg->count++;
2070                                 continue;
2071                         }
2072                         if (arg->fn(sch, (unsigned long)cl, arg) < 0) {
2073                                 arg->stop = 1;
2074                                 return;
2075                         }
2076                         arg->count++;
2077                 }
2078         }
2079 }
2080
2081 static struct Qdisc_class_ops cbq_class_ops = {
2082         .graft          =       cbq_graft,
2083         .leaf           =       cbq_leaf,
2084         .qlen_notify    =       cbq_qlen_notify,
2085         .get            =       cbq_get,
2086         .put            =       cbq_put,
2087         .change         =       cbq_change_class,
2088         .delete         =       cbq_delete,
2089         .walk           =       cbq_walk,
2090         .tcf_chain      =       cbq_find_tcf,
2091         .bind_tcf       =       cbq_bind_filter,
2092         .unbind_tcf     =       cbq_unbind_filter,
2093         .dump           =       cbq_dump_class,
2094         .dump_stats     =       cbq_dump_class_stats,
2095 };
2096
2097 static struct Qdisc_ops cbq_qdisc_ops = {
2098         .next           =       NULL,
2099         .cl_ops         =       &cbq_class_ops,
2100         .id             =       "cbq",
2101         .priv_size      =       sizeof(struct cbq_sched_data),
2102         .enqueue        =       cbq_enqueue,
2103         .dequeue        =       cbq_dequeue,
2104         .requeue        =       cbq_requeue,
2105         .drop           =       cbq_drop,
2106         .init           =       cbq_init,
2107         .reset          =       cbq_reset,
2108         .destroy        =       cbq_destroy,
2109         .change         =       NULL,
2110         .dump           =       cbq_dump,
2111         .dump_stats     =       cbq_dump_stats,
2112         .owner          =       THIS_MODULE,
2113 };
2114
2115 static int __init cbq_module_init(void)
2116 {
2117         return register_qdisc(&cbq_qdisc_ops);
2118 }
2119 static void __exit cbq_module_exit(void)
2120 {
2121         unregister_qdisc(&cbq_qdisc_ops);
2122 }
2123 module_init(cbq_module_init)
2124 module_exit(cbq_module_exit)
2125 MODULE_LICENSE("GPL");