net/sched1/sch_cbq.c

   1 /*
   2  * net/sched/sch_cbq.c  Class-Based Queueing discipline.
   3  *
   4  *              This program is free software; you can redistribute it and/or
   5  *              modify it under the terms of the GNU General Public License
   6  *              as published by the Free Software Foundation; either version
   7  *              2 of the License, or (at your option) any later version.
   8  *
   9  * Authors:     Alexey Kuznetsov, <kuznet@ms2.inr.ac.ru>
  10  *
  11  */
  12
  13 #include <linux/config.h>
  14 #include <linux/module.h>
  15 #include <asm/uaccess.h>
  16 #include <asm/system.h>
  17 #include <asm/bitops.h>
  18 #include <linux/types.h>
  19 #include <linux/kernel.h>
  20 #include <linux/sched.h>
  21 #include <linux/string.h>
  22 #include <linux/mm.h>
  23 #include <linux/socket.h>
  24 #include <linux/sockios.h>
  25 #include <linux/in.h>
  26 #include <linux/errno.h>
  27 #include <linux/interrupt.h>
  28 #include <linux/if_ether.h>
  29 #include <linux/inet.h>
  30 #include <linux/netdevice.h>
  31 #include <linux/etherdevice.h>
  32 #include <linux/notifier.h>
  33 #include <net/ip.h>
  34 #include <net/route.h>
  35 #include <linux/skbuff.h>
  36 #include <net/sock.h>
  37 #include <net/pkt_sched.h>
  38
  39
  40 /*      Class-Based Queueing (CBQ) algorithm.
  41         =======================================
  42
  43         Sources: [1] Sally Floyd and Van Jacobson, "Link-sharing and Resource
  44                  Management Models for Packet Networks",
  45                  IEEE/ACM Transactions on Networking, Vol.3, No.4, 1995
  46
  47                  [2] Sally Floyd, "Notes on CBQ and Guaranted Service", 1995
  48
  49                  [3] Sally Floyd, "Notes on Class-Based Queueing: Setting
  50                  Parameters", 1996
  51
  52                  [4] Sally Floyd and Michael Speer, "Experimental Results
  53                  for Class-Based Queueing", 1998, not published.
  54
  55         -----------------------------------------------------------------------
  56
  57         Algorithm skeleton was taken from NS simulator cbq.cc.
  58         If someone wants to check this code against the LBL version,
  59         he should take into account that ONLY the skeleton was borrowed,
  60         the implementation is different. Particularly:
  61
  62         --- The WRR algorithm is different. Our version looks more
  63         reasonable (I hope) and works when quanta are allowed to be
  64         less than MTU, which is always the case when real time classes
  65         have small rates. Note, that the statement of [3] is
  66         incomplete, delay may actually be estimated even if class
  67         per-round allotment is less than MTU. Namely, if per-round
  68         allotment is W*r_i, and r_1+...+r_k = r < 1
  69
  70         delay_i <= ([MTU/(W*r_i)]*W*r + W*r + k*MTU)/B
  71
  72         In the worst case we have IntServ estimate with D = W*r+k*MTU
  73         and C = MTU*r. The proof (if correct at all) is trivial.
  74
  75
  76         --- It seems that cbq-2.0 is not very accurate. At least, I cannot
  77         interpret some places, which look like wrong translations
  78         from NS. Anyone is advised to find these differences
  79         and explain to me, why I am wrong 8).
  80
  81         --- Linux has no EOI event, so that we cannot estimate true class
  82         idle time. Workaround is to consider the next dequeue event
  83         as sign that previous packet is finished. This is wrong because of
  84         internal device queueing, but on a permanently loaded link it is true.
  85         Moreover, combined with clock integrator, this scheme looks
  86         very close to an ideal solution.  */
  87
  88 struct cbq_sched_data;
  89
  90
  91 struct cbq_class
  92 {
  93         struct cbq_class        *next;          /* hash table link */
  94         struct cbq_class        *next_alive;    /* next class with backlog in this priority band */
  95
  96 /* Parameters */
  97         u32                     classid;
  98         unsigned char           priority;       /* class priority */
  99         unsigned char           priority2;      /* priority to be used after overlimit */
 100         unsigned char           ewma_log;       /* time constant for idle time calculation */
 101         unsigned char           ovl_strategy;
 102 #ifdef CONFIG_NET_CLS_POLICE
 103         unsigned char           police;
 104 #endif
 105
 106         u32                     defmap;
 107
 108         /* Link-sharing scheduler parameters */
 109         long                    maxidle;        /* Class paramters: see below. */
 110         long                    offtime;
 111         long                    minidle;
 112         u32                     avpkt;
 113         struct qdisc_rate_table *R_tab;
 114
 115         /* Overlimit strategy parameters */
 116         void                    (*overlimit)(struct cbq_class *cl);
 117         long                    penalty;
 118
 119         /* General scheduler (WRR) parameters */
 120         long                    allot;
 121         long                    quantum;        /* Allotment per WRR round */
 122         long                    weight;         /* Relative allotment: see below */
 123
 124         struct Qdisc            *qdisc;         /* Ptr to CBQ discipline */
 125         struct cbq_class        *split;         /* Ptr to split node */
 126         struct cbq_class        *share;         /* Ptr to LS parent in the class tree */
 127         struct cbq_class        *tparent;       /* Ptr to tree parent in the class tree */
 128         struct cbq_class        *borrow;        /* NULL if class is bandwidth limited;
 129                                                    parent otherwise */
 130         struct cbq_class        *sibling;       /* Sibling chain */
 131         struct cbq_class        *children;      /* Pointer to children chain */
 132
 133         struct Qdisc            *q;             /* Elementary queueing discipline */
 134
 135
 136 /* Variables */
 137         unsigned char           cpriority;      /* Effective priority */
 138         unsigned char           delayed;
 139         unsigned char           level;          /* level of the class in hierarchy:
 140                                                    0 for leaf classes, and maximal
 141                                                    level of children + 1 for nodes.
 142                                                  */
 143
 144         psched_time_t           last;           /* Last end of service */
 145         psched_time_t           undertime;
 146         long                    avgidle;
 147         long                    deficit;        /* Saved deficit for WRR */
 148         unsigned long           penalized;
 149         struct tc_stats         stats;
 150         struct tc_cbq_xstats    xstats;
 151
 152         struct tcf_proto        *filter_list;
 153
 154         int                     refcnt;
 155         int                     filters;
 156
 157         struct cbq_class        *defaults[TC_PRIO_MAX+1];
 158 };
 159
 160 struct cbq_sched_data
 161 {
 162         struct cbq_class        *classes[16];           /* Hash table of all classes */
 163         int                     nclasses[TC_CBQ_MAXPRIO+1];
 164         unsigned                quanta[TC_CBQ_MAXPRIO+1];
 165
 166         struct cbq_class        link;
 167
 168         unsigned                activemask;
 169         struct cbq_class        *active[TC_CBQ_MAXPRIO+1];      /* List of all classes
 170                                                                    with backlog */
 171
 172 #ifdef CONFIG_NET_CLS_POLICE
 173         struct cbq_class        *rx_class;
 174 #endif
 175         struct cbq_class        *tx_class;
 176         struct cbq_class        *tx_borrowed;
 177         int                     tx_len;
 178         psched_time_t           now;            /* Cached timestamp */
 179         psched_time_t           now_rt;         /* Cached real time */
 180         unsigned                pmask;
 181
 182         struct timer_list       delay_timer;
 183         struct timer_list       wd_timer;       /* Watchdog timer,
 184                                                    started when CBQ has
 185                                                    backlog, but cannot
 186                                                    transmit just now */
 187         long                    wd_expires;
 188         int                     toplevel;
 189         u32                     hgenerator;
 190 };
 191
 192
 193 #define L2T(cl,len)     ((cl)->R_tab->data[(len)>>(cl)->R_tab->rate.cell_log])
 194
 195
 196 static __inline__ unsigned cbq_hash(u32 h)
 197 {
 198         h ^= h>>8;
 199         h ^= h>>4;
 200         return h&0xF;
 201 }
 202
 203 static __inline__ struct cbq_class *
 204 cbq_class_lookup(struct cbq_sched_data *q, u32 classid)
 205 {
 206         struct cbq_class *cl;
 207
 208         for (cl = q->classes[cbq_hash(classid)]; cl; cl = cl->next)
 209                 if (cl->classid == classid)
 210                         return cl;
 211         return NULL;
 212 }
 213
 214 #ifdef CONFIG_NET_CLS_POLICE
 215
 216 static struct cbq_class *
 217 cbq_reclassify(struct sk_buff *skb, struct cbq_class *this)
 218 {
 219         struct cbq_class *cl, *new;
 220
 221         for (cl = this->tparent; cl; cl = cl->tparent)
 222                 if ((new = cl->defaults[TC_PRIO_BESTEFFORT]) != NULL && new != this)
 223                         return new;
 224
 225         return NULL;
 226 }
 227
 228 #endif
 229
 230 /* Classify packet. The procedure is pretty complicated, but
 231    it allows us to combine link sharing and priority scheduling
 232    transparently.
 233
 234    Namely, you can put link sharing rules (f.e. route based) at root of CBQ,
 235    so that it resolves to split nodes. Then packets are classified
 236    by logical priority, or a more specific classifier may be attached
 237    to the split node.
 238  */
 239
 240 static struct cbq_class *
 241 cbq_classify(struct sk_buff *skb, struct Qdisc *sch)
 242 {
 243         struct cbq_sched_data *q = (struct cbq_sched_data*)sch->data;
 244         struct cbq_class *head = &q->link;
 245         struct cbq_class **defmap;
 246         struct cbq_class *cl = NULL;
 247         u32 prio = skb->priority;
 248         struct tcf_result res;
 249
 250         /*
 251          *  Step 1. If skb->priority points to one of our classes, use it.
 252          */
 253         if (TC_H_MAJ(prio^sch->handle) == 0 &&
 254             (cl = cbq_class_lookup(q, prio)) != NULL)
 255                         return cl;
 256
 257         for (;;) {
 258                 int result = 0;
 259
 260                 defmap = head->defaults;
 261
 262                 /*
 263                  * Step 2+n. Apply classifier.
 264                  */
 265                 if (!head->filter_list || (result = tc_classify(skb, head->filter_list, &res)) < 0)
 266                         goto fallback;
 267
 268                 if ((cl = (void*)res.class) == NULL) {
 269                         if (TC_H_MAJ(res.classid))
 270                                 cl = cbq_class_lookup(q, res.classid);
 271                         else if ((cl = defmap[res.classid&TC_PRIO_MAX]) == NULL)
 272                                 cl = defmap[TC_PRIO_BESTEFFORT];
 273
 274                         if (cl == NULL || cl->level >= head->level)
 275                                 goto fallback;
 276                 }
 277
 278 #ifdef CONFIG_NET_CLS_POLICE
 279                 switch (result) {
 280                 case TC_POLICE_RECLASSIFY:
 281                         return cbq_reclassify(skb, cl);
 282                 case TC_POLICE_SHOT:
 283                         return NULL;
 284                 default:
 285                         break;
 286                 }
 287 #endif
 288                 if (cl->level == 0)
 289                         return cl;
 290
 291                 /*
 292                  * Step 3+n. If classifier selected a link sharing class,
 293                  *         apply agency specific classifier.
 294                  *         Repeat this procdure until we hit a leaf node.
 295                  */
 296                 head = cl;
 297         }
 298
 299 fallback:
 300         cl = head;
 301
 302         /*
 303          * Step 4. No success...
 304          */
 305         if (TC_H_MAJ(prio) == 0 &&
 306             !(cl = head->defaults[prio&TC_PRIO_MAX]) &&
 307             !(cl = head->defaults[TC_PRIO_BESTEFFORT]))
 308                 return head;
 309
 310         return cl;
 311 }
 312
 313 /*
 314    A packet has just been enqueued on the empty class.
 315    cbq_activate_class adds it to the tail of active class list
 316    of its priority band.
 317  */
 318
 319 static __inline__ void cbq_activate_class(struct cbq_class *cl)
 320 {
 321         struct cbq_sched_data *q = (struct cbq_sched_data*)cl->qdisc->data;
 322         int prio = cl->cpriority;
 323         struct cbq_class *cl_tail;
 324
 325         cl_tail = q->active[prio];
 326         q->active[prio] = cl;
 327
 328         if (cl_tail != NULL) {
 329                 cl->next_alive = cl_tail->next_alive;
 330                 cl_tail->next_alive = cl;
 331         } else {
 332                 cl->next_alive = cl;
 333                 q->activemask |= (1<<prio);
 334         }
 335 }
 336
 337 /*
 338    Unlink class from active chain.
 339    Note that this same procedure is done directly in cbq_dequeue*
 340    during round-robin procedure.
 341  */
 342
 343 static void cbq_deactivate_class(struct cbq_class *this)
 344 {
 345         struct cbq_sched_data *q = (struct cbq_sched_data*)this->qdisc->data;
 346         int prio = this->cpriority;
 347         struct cbq_class *cl;
 348         struct cbq_class *cl_prev = q->active[prio];
 349
 350         do {
 351                 cl = cl_prev->next_alive;
 352                 if (cl == this) {
 353                         cl_prev->next_alive = cl->next_alive;
 354                         cl->next_alive = NULL;
 355
 356                         if (cl == q->active[prio]) {
 357                                 q->active[prio] = cl_prev;
 358                                 if (cl == q->active[prio]) {
 359                                         q->active[prio] = NULL;
 360                                         q->activemask &= ~(1<<prio);
 361                                         return;
 362                                 }
 363                         }
 364
 365                         cl = cl_prev->next_alive;
 366                         return;
 367                 }
 368         } while ((cl_prev = cl) != q->active[prio]);
 369 }
 370
 371 static void
 372 cbq_mark_toplevel(struct cbq_sched_data *q, struct cbq_class *cl)
 373 {
 374         int toplevel = q->toplevel;
 375
 376         if (toplevel > cl->level && !(cl->q->flags&TCQ_F_THROTTLED)) {
 377                 psched_time_t now;
 378                 psched_tdiff_t incr;
 379
 380                 PSCHED_GET_TIME(now);
 381                 incr = PSCHED_TDIFF(now, q->now_rt);
 382                 PSCHED_TADD2(q->now, incr, now);
 383
 384                 do {
 385                         if (PSCHED_TLESS(cl->undertime, now)) {
 386                                 q->toplevel = cl->level;
 387                                 return;
 388                         }
 389                 } while ((cl=cl->borrow) != NULL && toplevel > cl->level);
 390         }
 391 }
 392
 393 static int
 394 cbq_enqueue(struct sk_buff *skb, struct Qdisc *sch)
 395 {
 396         struct cbq_sched_data *q = (struct cbq_sched_data *)sch->data;
 397         struct cbq_class *cl = cbq_classify(skb, sch);
 398         int len = skb->len;
 399         int ret = NET_XMIT_POLICED;
 400
 401 #ifdef CONFIG_NET_CLS_POLICE
 402         q->rx_class = cl;
 403 #endif
 404         if (cl) {
 405 #ifdef CONFIG_NET_CLS_POLICE
 406                 cl->q->__parent = sch;
 407 #endif
 408                 if ((ret = cl->q->enqueue(skb, cl->q)) == 0) {
 409                         sch->q.qlen++;
 410                         sch->stats.packets++;
 411                         sch->stats.bytes+=len;
 412                         cbq_mark_toplevel(q, cl);
 413                         if (!cl->next_alive)
 414                                 cbq_activate_class(cl);
 415                         return 0;
 416                 }
 417         }
 418
 419         sch->stats.drops++;
 420         if (cl == NULL)
 421                 kfree_skb(skb);
 422         else {
 423                 cbq_mark_toplevel(q, cl);
 424                 cl->stats.drops++;
 425         }
 426         return ret;
 427 }
 428
 429 static int
 430 cbq_requeue(struct sk_buff *skb, struct Qdisc *sch)
 431 {
 432         struct cbq_sched_data *q = (struct cbq_sched_data *)sch->data;
 433         struct cbq_class *cl;
 434         int ret;
 435
 436         if ((cl = q->tx_class) == NULL) {
 437                 kfree_skb(skb);
 438                 sch->stats.drops++;
 439                 return NET_XMIT_CN;
 440         }
 441         q->tx_class = NULL;
 442
 443         cbq_mark_toplevel(q, cl);
 444
 445 #ifdef CONFIG_NET_CLS_POLICE
 446         q->rx_class = cl;
 447         cl->q->__parent = sch;
 448 #endif
 449         if ((ret = cl->q->ops->requeue(skb, cl->q)) == 0) {
 450                 sch->q.qlen++;
 451                 if (!cl->next_alive)
 452                         cbq_activate_class(cl);
 453                 return 0;
 454         }
 455         sch->stats.drops++;
 456         cl->stats.drops++;
 457         return ret;
 458 }
 459
 460 /* Overlimit actions */
 461
 462 /* TC_CBQ_OVL_CLASSIC: (default) penalize leaf class by adding offtime */
 463
 464 static void cbq_ovl_classic(struct cbq_class *cl)
 465 {
 466         struct cbq_sched_data *q = (struct cbq_sched_data *)cl->qdisc->data;
 467         psched_tdiff_t delay = PSCHED_TDIFF(cl->undertime, q->now);
 468
 469         if (!cl->delayed) {
 470                 delay += cl->offtime;
 471
 472                 /*
 473                    Class goes to sleep, so that it will have no
 474                    chance to work avgidle. Let's forgive it 8)
 475
 476                    BTW cbq-2.0 has a crap in this
 477                    place, apparently they forgot to shift it by cl->ewma_log.
 478                  */
 479                 if (cl->avgidle < 0)
 480                         delay -= (-cl->avgidle) - ((-cl->avgidle) >> cl->ewma_log);
 481                 if (cl->avgidle < cl->minidle)
 482                         cl->avgidle = cl->minidle;
 483                 if (delay <= 0)
 484                         delay = 1;
 485                 PSCHED_TADD2(q->now, delay, cl->undertime);
 486
 487                 cl->xstats.overactions++;
 488                 cl->delayed = 1;
 489         }
 490         if (q->wd_expires == 0 || q->wd_expires > delay)
 491                 q->wd_expires = delay;
 492
 493         /* Dirty work! We must schedule wakeups based on
 494            real available rate, rather than leaf rate,
 495            which may be tiny (even zero).
 496          */
 497         if (q->toplevel == TC_CBQ_MAXLEVEL) {
 498                 struct cbq_class *b;
 499                 psched_tdiff_t base_delay = q->wd_expires;
 500
 501                 for (b = cl->borrow; b; b = b->borrow) {
 502                         delay = PSCHED_TDIFF(b->undertime, q->now);
 503                         if (delay < base_delay) {
 504                                 if (delay <= 0)
 505                                         delay = 1;
 506                                 base_delay = delay;
 507                         }
 508                 }
 509
 510                 q->wd_expires = base_delay;
 511         }
 512 }
 513
 514 /* TC_CBQ_OVL_RCLASSIC: penalize by offtime classes in hierarchy, when
 515    they go overlimit
 516  */
 517
 518 static void cbq_ovl_rclassic(struct cbq_class *cl)
 519 {
 520         struct cbq_sched_data *q = (struct cbq_sched_data *)cl->qdisc->data;
 521         struct cbq_class *this = cl;
 522
 523         do {
 524                 if (cl->level > q->toplevel) {
 525                         cl = NULL;
 526                         break;
 527                 }
 528         } while ((cl = cl->borrow) != NULL);
 529
 530         if (cl == NULL)
 531                 cl = this;
 532         cbq_ovl_classic(cl);
 533 }
 534
 535 /* TC_CBQ_OVL_DELAY: delay until it will go to underlimit */
 536
 537 static void cbq_ovl_delay(struct cbq_class *cl)
 538 {
 539         struct cbq_sched_data *q = (struct cbq_sched_data *)cl->qdisc->data;
 540         psched_tdiff_t delay = PSCHED_TDIFF(cl->undertime, q->now);
 541
 542         if (!cl->delayed) {
 543                 unsigned long sched = jiffies;
 544
 545                 delay += cl->offtime;
 546                 if (cl->avgidle < 0)
 547                         delay -= (-cl->avgidle) - ((-cl->avgidle) >> cl->ewma_log);
 548                 if (cl->avgidle < cl->minidle)
 549                         cl->avgidle = cl->minidle;
 550                 PSCHED_TADD2(q->now, delay, cl->undertime);
 551
 552                 if (delay > 0) {
 553                         sched += PSCHED_US2JIFFIE(delay) + cl->penalty;
 554                         cl->penalized = sched;
 555                         cl->cpriority = TC_CBQ_MAXPRIO;
 556                         q->pmask |= (1<<TC_CBQ_MAXPRIO);
 557                         if (del_timer(&q->delay_timer) &&
 558                             (long)(q->delay_timer.expires - sched) > 0)
 559                                 q->delay_timer.expires = sched;
 560                         add_timer(&q->delay_timer);
 561                         cl->delayed = 1;
 562                         cl->xstats.overactions++;
 563                         return;
 564                 }
 565                 delay = 1;
 566         }
 567         if (q->wd_expires == 0 || q->wd_expires > delay)
 568                 q->wd_expires = delay;
 569 }
 570
 571 /* TC_CBQ_OVL_LOWPRIO: penalize class by lowering its priority band */
 572
 573 static void cbq_ovl_lowprio(struct cbq_class *cl)
 574 {
 575         struct cbq_sched_data *q = (struct cbq_sched_data*)cl->qdisc->data;
 576
 577         cl->penalized = jiffies + cl->penalty;
 578
 579         if (cl->cpriority != cl->priority2) {
 580                 cl->cpriority = cl->priority2;
 581                 q->pmask |= (1<<cl->cpriority);
 582                 cl->xstats.overactions++;
 583         }
 584         cbq_ovl_classic(cl);
 585 }
 586
 587 /* TC_CBQ_OVL_DROP: penalize class by dropping */
 588
 589 static void cbq_ovl_drop(struct cbq_class *cl)
 590 {
 591         if (cl->q->ops->drop)
 592                 if (cl->q->ops->drop(cl->q))
 593                         cl->qdisc->q.qlen--;
 594         cl->xstats.overactions++;
 595         cbq_ovl_classic(cl);
 596 }
 597
 598 static void cbq_watchdog(unsigned long arg)
 599 {
 600         struct Qdisc *sch = (struct Qdisc*)arg;
 601
 602         sch->flags &= ~TCQ_F_THROTTLED;
 603         netif_schedule(sch->dev);
 604 }
 605
 606 static unsigned long cbq_undelay_prio(struct cbq_sched_data *q, int prio)
 607 {
 608         struct cbq_class *cl;
 609         struct cbq_class *cl_prev = q->active[prio];
 610         unsigned long now = jiffies;
 611         unsigned long sched = now;
 612
 613         if (cl_prev == NULL)
 614                 return now;
 615
 616         do {
 617                 cl = cl_prev->next_alive;
 618                 if ((long)(now - cl->penalized) > 0) {
 619                         cl_prev->next_alive = cl->next_alive;
 620                         cl->next_alive = NULL;
 621                         cl->cpriority = cl->priority;
 622                         cl->delayed = 0;
 623                         cbq_activate_class(cl);
 624
 625                         if (cl == q->active[prio]) {
 626                                 q->active[prio] = cl_prev;
 627                                 if (cl == q->active[prio]) {
 628                                         q->active[prio] = NULL;
 629                                         return 0;
 630                                 }
 631                         }
 632
 633                         cl = cl_prev->next_alive;
 634                 } else if ((long)(sched - cl->penalized) > 0)
 635                         sched = cl->penalized;
 636         } while ((cl_prev = cl) != q->active[prio]);
 637
 638         return (long)(sched - now);
 639 }
 640
 641 static void cbq_undelay(unsigned long arg)
 642 {
 643         struct Qdisc *sch = (struct Qdisc*)arg;
 644         struct cbq_sched_data *q = (struct cbq_sched_data*)sch->data;
 645         long delay = 0;
 646         unsigned pmask;
 647
 648         pmask = q->pmask;
 649         q->pmask = 0;
 650
 651         while (pmask) {
 652                 int prio = ffz(~pmask);
 653                 long tmp;
 654
 655                 pmask &= ~(1<<prio);
 656
 657                 tmp = cbq_undelay_prio(q, prio);
 658                 if (tmp > 0) {
 659                         q->pmask |= 1<<prio;
 660                         if (tmp < delay || delay == 0)
 661                                 delay = tmp;
 662                 }
 663         }
 664
 665         if (delay) {
 666                 q->delay_timer.expires = jiffies + delay;
 667                 add_timer(&q->delay_timer);
 668         }
 669
 670         sch->flags &= ~TCQ_F_THROTTLED;
 671         netif_schedule(sch->dev);
 672 }
 673
 674
 675 #ifdef CONFIG_NET_CLS_POLICE
 676
 677 static int cbq_reshape_fail(struct sk_buff *skb, struct Qdisc *child)
 678 {
 679         int len = skb->len;
 680         struct Qdisc *sch = child->__parent;
 681         struct cbq_sched_data *q = (struct cbq_sched_data *)sch->data;
 682         struct cbq_class *cl = q->rx_class;
 683
 684         q->rx_class = NULL;
 685
 686         if (cl && (cl = cbq_reclassify(skb, cl)) != NULL) {
 687
 688                 cbq_mark_toplevel(q, cl);
 689
 690                 q->rx_class = cl;
 691                 cl->q->__parent = sch;
 692
 693                 if (cl->q->enqueue(skb, cl->q) == 0) {
 694                         sch->q.qlen++;
 695                         sch->stats.packets++;
 696                         sch->stats.bytes+=len;
 697                         if (!cl->next_alive)
 698                                 cbq_activate_class(cl);
 699                         return 0;
 700                 }
 701                 sch->stats.drops++;
 702                 return 0;
 703         }
 704
 705         sch->stats.drops++;
 706         return -1;
 707 }
 708 #endif
 709
 710 /*
 711    It is mission critical procedure.
 712
 713    We "regenerate" toplevel cutoff, if transmitting class
 714    has backlog and it is not regulated. It is not part of
 715    original CBQ description, but looks more reasonable.
 716    Probably, it is wrong. This question needs further investigation.
 717 */
 718
 719 static __inline__ void
 720 cbq_update_toplevel(struct cbq_sched_data *q, struct cbq_class *cl,
 721                     struct cbq_class *borrowed)
 722 {
 723         if (cl && q->toplevel >= borrowed->level) {
 724                 if (cl->q->q.qlen > 1) {
 725                         do {
 726                                 if (PSCHED_IS_PASTPERFECT(borrowed->undertime)) {
 727                                         q->toplevel = borrowed->level;
 728                                         return;
 729                                 }
 730                         } while ((borrowed=borrowed->borrow) != NULL);
 731                 }
 732 #if 0
 733         /* It is not necessary now. Uncommenting it
 734            will save CPU cycles, but decrease fairness.
 735          */
 736                 q->toplevel = TC_CBQ_MAXLEVEL;
 737 #endif
 738         }
 739 }
 740
 741 static void
 742 cbq_update(struct cbq_sched_data *q)
 743 {
 744         struct cbq_class *this = q->tx_class;
 745         struct cbq_class *cl = this;
 746         int len = q->tx_len;
 747
 748         q->tx_class = NULL;
 749
 750         for ( ; cl; cl = cl->share) {
 751                 long avgidle = cl->avgidle;
 752                 long idle;
 753
 754                 cl->stats.packets++;
 755                 cl->stats.bytes += len;
 756
 757                 /*
 758                    (now - last) is total time between packet right edges.
 759                    (last_pktlen/rate) is "virtual" busy time, so that
 760
 761                          idle = (now - last) - last_pktlen/rate
 762                  */
 763
 764                 idle = PSCHED_TDIFF(q->now, cl->last);
 765                 if ((unsigned long)idle > 128*1024*1024) {
 766                         avgidle = cl->maxidle;
 767                 } else {
 768                         idle -= L2T(cl, len);
 769
 770                 /* true_avgidle := (1-W)*true_avgidle + W*idle,
 771                    where W=2^{-ewma_log}. But cl->avgidle is scaled:
 772                    cl->avgidle == true_avgidle/W,
 773                    hence:
 774                  */
 775                         avgidle += idle - (avgidle>>cl->ewma_log);
 776                 }
 777
 778                 if (avgidle <= 0) {
 779                         /* Overlimit or at-limit */
 780
 781                         if (avgidle < cl->minidle)
 782                                 avgidle = cl->minidle;
 783
 784                         cl->avgidle = avgidle;
 785
 786                         /* Calculate expected time, when this class
 787                            will be allowed to send.
 788                            It will occur, when:
 789                            (1-W)*true_avgidle + W*delay = 0, i.e.
 790                            idle = (1/W - 1)*(-true_avgidle)
 791                            or
 792                            idle = (1 - W)*(-cl->avgidle);
 793                          */
 794                         idle = (-avgidle) - ((-avgidle) >> cl->ewma_log);
 795
 796                         /*
 797                            That is not all.
 798                            To maintain the rate allocated to the class,
 799                            we add to undertime virtual clock,
 800                            necesary to complete transmitted packet.
 801                            (len/phys_bandwidth has been already passed
 802                            to the moment of cbq_update)
 803                          */
 804
 805                         idle -= L2T(&q->link, len);
 806                         idle += L2T(cl, len);
 807
 808                         PSCHED_AUDIT_TDIFF(idle);
 809
 810                         PSCHED_TADD2(q->now, idle, cl->undertime);
 811                 } else {
 812                         /* Underlimit */
 813
 814                         PSCHED_SET_PASTPERFECT(cl->undertime);
 815                         if (avgidle > cl->maxidle)
 816                                 cl->avgidle = cl->maxidle;
 817                         else
 818                                 cl->avgidle = avgidle;
 819                 }
 820                 cl->last = q->now;
 821         }
 822
 823         cbq_update_toplevel(q, this, q->tx_borrowed);
 824 }
 825
 826 static __inline__ struct cbq_class *
 827 cbq_under_limit(struct cbq_class *cl)
 828 {
 829         struct cbq_sched_data *q = (struct cbq_sched_data*)cl->qdisc->data;
 830         struct cbq_class *this_cl = cl;
 831
 832         if (cl->tparent == NULL)
 833                 return cl;
 834
 835         if (PSCHED_IS_PASTPERFECT(cl->undertime) ||
 836             !PSCHED_TLESS(q->now, cl->undertime)) {
 837                 cl->delayed = 0;
 838                 return cl;
 839         }
 840
 841         do {
 842                 /* It is very suspicious place. Now overlimit
 843                    action is generated for not bounded classes
 844                    only if link is completely congested.
 845                    Though it is in agree with ancestor-only paradigm,
 846                    it looks very stupid. Particularly,
 847                    it means that this chunk of code will either
 848                    never be called or result in strong amplification
 849                    of burstiness. Dangerous, silly, and, however,
 850                    no another solution exists.
 851                  */
 852                 if ((cl = cl->borrow) == NULL) {
 853                         this_cl->stats.overlimits++;
 854                         this_cl->overlimit(this_cl);
 855                         return NULL;
 856                 }
 857                 if (cl->level > q->toplevel)
 858                         return NULL;
 859         } while (!PSCHED_IS_PASTPERFECT(cl->undertime) &&
 860                  PSCHED_TLESS(q->now, cl->undertime));
 861
 862         cl->delayed = 0;
 863         return cl;
 864 }
 865
 866 static __inline__ struct sk_buff *
 867 cbq_dequeue_prio(struct Qdisc *sch, int prio)
 868 {
 869         struct cbq_sched_data *q = (struct cbq_sched_data *)sch->data;
 870         struct cbq_class *cl_tail, *cl_prev, *cl;
 871         struct sk_buff *skb;
 872         int deficit;
 873
 874         cl_tail = cl_prev = q->active[prio];
 875         cl = cl_prev->next_alive;
 876
 877         do {
 878                 deficit = 0;
 879
 880                 /* Start round */
 881                 do {
 882                         struct cbq_class *borrow = cl;
 883
 884                         if (cl->q->q.qlen &&
 885                             (borrow = cbq_under_limit(cl)) == NULL)
 886                                 goto skip_class;
 887
 888                         if (cl->deficit <= 0) {
 889                                 /* Class exhausted its allotment per
 890                                    this round. Switch to the next one.
 891                                  */
 892                                 deficit = 1;
 893                                 cl->deficit += cl->quantum;
 894                                 goto next_class;
 895                         }
 896
 897                         skb = cl->q->dequeue(cl->q);
 898
 899                         /* Class did not give us any skb :-(
 900                            It could occur even if cl->q->q.qlen != 0
 901                            f.e. if cl->q == "tbf"
 902                          */
 903                         if (skb == NULL)
 904                                 goto skip_class;
 905
 906                         cl->deficit -= skb->len;
 907                         q->tx_class = cl;
 908                         q->tx_borrowed = borrow;
 909                         if (borrow != cl) {
 910 #ifndef CBQ_XSTATS_BORROWS_BYTES
 911                                 borrow->xstats.borrows++;
 912                                 cl->xstats.borrows++;
 913 #else
 914                                 borrow->xstats.borrows += skb->len;
 915                                 cl->xstats.borrows += skb->len;
 916 #endif
 917                         }
 918                         q->tx_len = skb->len;
 919
 920                         if (cl->deficit <= 0) {
 921                                 q->active[prio] = cl;
 922                                 cl = cl->next_alive;
 923                                 cl->deficit += cl->quantum;
 924                         }
 925                         return skb;
 926
 927 skip_class:
 928                         if (cl->q->q.qlen == 0 || prio != cl->cpriority) {
 929                                 /* Class is empty or penalized.
 930                                    Unlink it from active chain.
 931                                  */
 932                                 cl_prev->next_alive = cl->next_alive;
 933                                 cl->next_alive = NULL;
 934
 935                                 /* Did cl_tail point to it? */
 936                                 if (cl == cl_tail) {
 937                                         /* Repair it! */
 938                                         cl_tail = cl_prev;
 939
 940                                         /* Was it the last class in this band? */
 941                                         if (cl == cl_tail) {
 942                                                 /* Kill the band! */
 943                                                 q->active[prio] = NULL;
 944                                                 q->activemask &= ~(1<<prio);
 945                                                 if (cl->q->q.qlen)
 946                                                         cbq_activate_class(cl);
 947                                                 return NULL;
 948                                         }
 949
 950                                         q->active[prio] = cl_tail;
 951                                 }
 952                                 if (cl->q->q.qlen)
 953                                         cbq_activate_class(cl);
 954
 955                                 cl = cl_prev;
 956                         }
 957
 958 next_class:
 959                         cl_prev = cl;
 960                         cl = cl->next_alive;
 961                 } while (cl_prev != cl_tail);
 962         } while (deficit);
 963
 964         q->active[prio] = cl_prev;
 965
 966         return NULL;
 967 }
 968
 969 static __inline__ struct sk_buff *
 970 cbq_dequeue_1(struct Qdisc *sch)
 971 {
 972         struct cbq_sched_data *q = (struct cbq_sched_data *)sch->data;
 973         struct sk_buff *skb;
 974         unsigned activemask;
 975
 976         activemask = q->activemask&0xFF;
 977         while (activemask) {
 978                 int prio = ffz(~activemask);
 979                 activemask &= ~(1<<prio);
 980                 skb = cbq_dequeue_prio(sch, prio);
 981                 if (skb)
 982                         return skb;
 983         }
 984         return NULL;
 985 }
 986
 987 static struct sk_buff *
 988 cbq_dequeue(struct Qdisc *sch)
 989 {
 990         struct sk_buff *skb;
 991         struct cbq_sched_data *q = (struct cbq_sched_data *)sch->data;
 992         psched_time_t now;
 993         psched_tdiff_t incr;
 994
 995         PSCHED_GET_TIME(now);
 996         incr = PSCHED_TDIFF(now, q->now_rt);
 997
 998         if (q->tx_class) {
 999                 psched_tdiff_t incr2;
1000                 /* Time integrator. We calculate EOS time
1001                    by adding expected packet transmittion time.
1002                    If real time is greater, we warp artificial clock,
1003                    so that:
1004
1005                    cbq_time = max(real_time, work);
1006                  */
1007                 incr2 = L2T(&q->link, q->tx_len);
1008                 PSCHED_TADD(q->now, incr2);
1009                 cbq_update(q);
1010                 if ((incr -= incr2) < 0)
1011                         incr = 0;
1012         }
1013         PSCHED_TADD(q->now, incr);
1014         q->now_rt = now;
1015
1016         for (;;) {
1017                 q->wd_expires = 0;
1018
1019                 skb = cbq_dequeue_1(sch);
1020                 if (skb) {
1021                         sch->q.qlen--;
1022                         sch->flags &= ~TCQ_F_THROTTLED;
1023                         return skb;
1024                 }
1025
1026                 /* All the classes are overlimit.
1027
1028                    It is possible, if:
1029
1030                    1. Scheduler is empty.
1031                    2. Toplevel cutoff inhibited borrowing.
1032                    3. Root class is overlimit.
1033
1034                    Reset 2d and 3d conditions and retry.
1035
1036                    Note, that NS and cbq-2.0 are buggy, peeking
1037                    an arbitrary class is appropriate for ancestor-only
1038                    sharing, but not for toplevel algorithm.
1039
1040                    Our version is better, but slower, because it requires
1041                    two passes, but it is unavoidable with top-level sharing.
1042                 */
1043
1044                 if (q->toplevel == TC_CBQ_MAXLEVEL &&
1045                     PSCHED_IS_PASTPERFECT(q->link.undertime))
1046                         break;
1047
1048                 q->toplevel = TC_CBQ_MAXLEVEL;
1049                 PSCHED_SET_PASTPERFECT(q->link.undertime);
1050         }
1051
1052         /* No packets in scheduler or nobody wants to give them to us :-(
1053            Sigh... start watchdog timer in the last case. */
1054
1055         if (sch->q.qlen) {
1056                 sch->stats.overlimits++;
1057                 if (q->wd_expires && !netif_queue_stopped(sch->dev)) {
1058                         long delay = PSCHED_US2JIFFIE(q->wd_expires);
1059                         del_timer(&q->wd_timer);
1060                         if (delay <= 0)
1061                                 delay = 1;
1062                         q->wd_timer.expires = jiffies + delay;
1063                         add_timer(&q->wd_timer);
1064                         sch->flags |= TCQ_F_THROTTLED;
1065                 }
1066         }
1067         return NULL;
1068 }
1069
1070 /* CBQ class maintanance routines */
1071
1072 static void cbq_adjust_levels(struct cbq_class *this)
1073 {
1074         if (this == NULL)
1075                 return;
1076
1077         do {
1078                 int level = 0;
1079                 struct cbq_class *cl;
1080
1081                 if ((cl = this->children) != NULL) {
1082                         do {
1083                                 if (cl->level > level)
1084                                         level = cl->level;
1085                         } while ((cl = cl->sibling) != this->children);
1086                 }
1087                 this->level = level+1;
1088         } while ((this = this->tparent) != NULL);
1089 }
1090
1091 static void cbq_normalize_quanta(struct cbq_sched_data *q, int prio)
1092 {
1093         struct cbq_class *cl;
1094         unsigned h;
1095
1096         if (q->quanta[prio] == 0)
1097                 return;
1098
1099         for (h=0; h<16; h++) {
1100                 for (cl = q->classes[h]; cl; cl = cl->next) {
1101                         /* BUGGGG... Beware! This expression suffer of
1102                            arithmetic overflows!
1103                          */
1104                         if (cl->priority == prio) {
1105                                 cl->quantum = (cl->weight*cl->allot*q->nclasses[prio])/
1106                                         q->quanta[prio];
1107                         }
1108                         if (cl->quantum <= 0 || cl->quantum>32*cl->qdisc->dev->mtu) {
1109                                 printk(KERN_WARNING "CBQ: class %08x has bad quantum==%ld, repaired.\n", cl->classid, cl->quantum);
1110                                 cl->quantum = cl->qdisc->dev->mtu/2 + 1;
1111                         }
1112                 }
1113         }
1114 }
1115
1116 static void cbq_sync_defmap(struct cbq_class *cl)
1117 {
1118         struct cbq_sched_data *q = (struct cbq_sched_data*)cl->qdisc->data;
1119         struct cbq_class *split = cl->split;
1120         unsigned h;
1121         int i;
1122
1123         if (split == NULL)
1124                 return;
1125
1126         for (i=0; i<=TC_PRIO_MAX; i++) {
1127                 if (split->defaults[i] == cl && !(cl->defmap&(1<<i)))
1128                         split->defaults[i] = NULL;
1129         }
1130
1131         for (i=0; i<=TC_PRIO_MAX; i++) {
1132                 int level = split->level;
1133
1134                 if (split->defaults[i])
1135                         continue;
1136
1137                 for (h=0; h<16; h++) {
1138                         struct cbq_class *c;
1139
1140                         for (c = q->classes[h]; c; c = c->next) {
1141                                 if (c->split == split && c->level < level &&
1142                                     c->defmap&(1<<i)) {
1143                                         split->defaults[i] = c;
1144                                         level = c->level;
1145                                 }
1146                         }
1147                 }
1148         }
1149 }
1150
1151 static void cbq_change_defmap(struct cbq_class *cl, u32 splitid, u32 def, u32 mask)
1152 {
1153         struct cbq_class *split = NULL;
1154
1155         if (splitid == 0) {
1156                 if ((split = cl->split) == NULL)
1157                         return;
1158                 splitid = split->classid;
1159         }
1160
1161         if (split == NULL || split->classid != splitid) {
1162                 for (split = cl->tparent; split; split = split->tparent)
1163                         if (split->classid == splitid)
1164                                 break;
1165         }
1166
1167         if (split == NULL)
1168                 return;
1169
1170         if (cl->split != split) {
1171                 cl->defmap = 0;
1172                 cbq_sync_defmap(cl);
1173                 cl->split = split;
1174                 cl->defmap = def&mask;
1175         } else
1176                 cl->defmap = (cl->defmap&~mask)|(def&mask);
1177
1178         cbq_sync_defmap(cl);
1179 }
1180
1181 static void cbq_unlink_class(struct cbq_class *this)
1182 {
1183         struct cbq_class *cl, **clp;
1184         struct cbq_sched_data *q = (struct cbq_sched_data*)this->qdisc->data;
1185
1186         for (clp = &q->classes[cbq_hash(this->classid)]; (cl = *clp) != NULL; clp = &cl->next) {
1187                 if (cl == this) {
1188                         *clp = cl->next;
1189                         cl->next = NULL;
1190                         break;
1191                 }
1192         }
1193
1194         if (this->tparent) {
1195                 clp=&this->sibling;
1196                 cl = *clp;
1197                 do {
1198                         if (cl == this) {
1199                                 *clp = cl->sibling;
1200                                 break;
1201                         }
1202                         clp = &cl->sibling;
1203                 } while ((cl = *clp) != this->sibling);
1204
1205                 if (this->tparent->children == this) {
1206                         this->tparent->children = this->sibling;
1207                         if (this->sibling == this)
1208                                 this->tparent->children = NULL;
1209                 }
1210         } else {
1211                 BUG_TRAP(this->sibling == this);
1212         }
1213 }
1214
1215 static void cbq_link_class(struct cbq_class *this)
1216 {
1217         struct cbq_sched_data *q = (struct cbq_sched_data*)this->qdisc->data;
1218         unsigned h = cbq_hash(this->classid);
1219         struct cbq_class *parent = this->tparent;
1220
1221         this->sibling = this;
1222         this->next = q->classes[h];
1223         q->classes[h] = this;
1224
1225         if (parent == NULL)
1226                 return;
1227
1228         if (parent->children == NULL) {
1229                 parent->children = this;
1230         } else {
1231                 this->sibling = parent->children->sibling;
1232                 parent->children->sibling = this;
1233         }
1234 }
1235
1236 static int cbq_drop(struct Qdisc* sch)
1237 {
1238         struct cbq_sched_data *q = (struct cbq_sched_data *)sch->data;
1239         struct cbq_class *cl, *cl_head;
1240         int prio;
1241
1242         for (prio = TC_CBQ_MAXPRIO; prio >= 0; prio--) {
1243                 if ((cl_head = q->active[prio]) == NULL)
1244                         continue;
1245
1246                 cl = cl_head;
1247                 do {
1248                         if (cl->q->ops->drop && cl->q->ops->drop(cl->q)) {
1249                                 sch->q.qlen--;
1250                                 return 1;
1251                         }
1252                 } while ((cl = cl->next_alive) != cl_head);
1253         }
1254         return 0;
1255 }
1256
1257 static void
1258 cbq_reset(struct Qdisc* sch)
1259 {
1260         struct cbq_sched_data *q = (struct cbq_sched_data *)sch->data;
1261         struct cbq_class *cl;
1262         int prio;
1263         unsigned h;
1264
1265         q->activemask = 0;
1266         q->pmask = 0;
1267         q->tx_class = NULL;
1268         q->tx_borrowed = NULL;
1269         del_timer(&q->wd_timer);
1270         del_timer(&q->delay_timer);
1271         q->toplevel = TC_CBQ_MAXLEVEL;
1272         PSCHED_GET_TIME(q->now);
1273         q->now_rt = q->now;
1274
1275         for (prio = 0; prio <= TC_CBQ_MAXPRIO; prio++)
1276                 q->active[prio] = NULL;
1277
1278         for (h = 0; h < 16; h++) {
1279                 for (cl = q->classes[h]; cl; cl = cl->next) {
1280                         qdisc_reset(cl->q);
1281
1282                         cl->next_alive = NULL;
1283                         PSCHED_SET_PASTPERFECT(cl->undertime);
1284                         cl->avgidle = cl->maxidle;
1285                         cl->deficit = cl->quantum;
1286                         cl->cpriority = cl->priority;
1287                 }
1288         }
1289         sch->q.qlen = 0;
1290 }
1291
1292
1293 static int cbq_set_lss(struct cbq_class *cl, struct tc_cbq_lssopt *lss)
1294 {
1295         if (lss->change&TCF_CBQ_LSS_FLAGS) {
1296                 cl->share = (lss->flags&TCF_CBQ_LSS_ISOLATED) ? NULL : cl->tparent;
1297                 cl->borrow = (lss->flags&TCF_CBQ_LSS_BOUNDED) ? NULL : cl->tparent;
1298         }
1299         if (lss->change&TCF_CBQ_LSS_EWMA)
1300                 cl->ewma_log = lss->ewma_log;
1301         if (lss->change&TCF_CBQ_LSS_AVPKT)
1302                 cl->avpkt = lss->avpkt;
1303         if (lss->change&TCF_CBQ_LSS_MINIDLE)
1304                 cl->minidle = -(long)lss->minidle;
1305         if (lss->change&TCF_CBQ_LSS_MAXIDLE) {
1306                 cl->maxidle = lss->maxidle;
1307                 cl->avgidle = lss->maxidle;
1308         }
1309         if (lss->change&TCF_CBQ_LSS_OFFTIME)
1310                 cl->offtime = lss->offtime;
1311         return 0;
1312 }
1313
1314 static void cbq_rmprio(struct cbq_sched_data *q, struct cbq_class *cl)
1315 {
1316         q->nclasses[cl->priority]--;
1317         q->quanta[cl->priority] -= cl->weight;
1318         cbq_normalize_quanta(q, cl->priority);
1319 }
1320
1321 static void cbq_addprio(struct cbq_sched_data *q, struct cbq_class *cl)
1322 {
1323         q->nclasses[cl->priority]++;
1324         q->quanta[cl->priority] += cl->weight;
1325         cbq_normalize_quanta(q, cl->priority);
1326 }
1327
1328 static int cbq_set_wrr(struct cbq_class *cl, struct tc_cbq_wrropt *wrr)
1329 {
1330         struct cbq_sched_data *q = (struct cbq_sched_data *)cl->qdisc->data;
1331
1332         if (wrr->allot)
1333                 cl->allot = wrr->allot;
1334         if (wrr->weight)
1335                 cl->weight = wrr->weight;
1336         if (wrr->priority) {
1337                 cl->priority = wrr->priority-1;
1338                 cl->cpriority = cl->priority;
1339                 if (cl->priority >= cl->priority2)
1340                         cl->priority2 = TC_CBQ_MAXPRIO-1;
1341         }
1342
1343         cbq_addprio(q, cl);
1344         return 0;
1345 }
1346
1347 static int cbq_set_overlimit(struct cbq_class *cl, struct tc_cbq_ovl *ovl)
1348 {
1349         switch (ovl->strategy) {
1350         case TC_CBQ_OVL_CLASSIC:
1351                 cl->overlimit = cbq_ovl_classic;
1352                 break;
1353         case TC_CBQ_OVL_DELAY:
1354                 cl->overlimit = cbq_ovl_delay;
1355                 break;
1356         case TC_CBQ_OVL_LOWPRIO:
1357                 if (ovl->priority2-1 >= TC_CBQ_MAXPRIO ||
1358                     ovl->priority2-1 <= cl->priority)
1359                         return -EINVAL;
1360                 cl->priority2 = ovl->priority2-1;
1361                 cl->overlimit = cbq_ovl_lowprio;
1362                 break;
1363         case TC_CBQ_OVL_DROP:
1364                 cl->overlimit = cbq_ovl_drop;
1365                 break;
1366         case TC_CBQ_OVL_RCLASSIC:
1367                 cl->overlimit = cbq_ovl_rclassic;
1368                 break;
1369         default:
1370                 return -EINVAL;
1371         }
1372         cl->penalty = (ovl->penalty*HZ)/1000;
1373         return 0;
1374 }
1375
1376 #ifdef CONFIG_NET_CLS_POLICE
1377 static int cbq_set_police(struct cbq_class *cl, struct tc_cbq_police *p)
1378 {
1379         cl->police = p->police;
1380
1381         if (cl->q->handle) {
1382                 if (p->police == TC_POLICE_RECLASSIFY)
1383                         cl->q->reshape_fail = cbq_reshape_fail;
1384                 else
1385                         cl->q->reshape_fail = NULL;
1386         }
1387         return 0;
1388 }
1389 #endif
1390
1391 static int cbq_set_fopt(struct cbq_class *cl, struct tc_cbq_fopt *fopt)
1392 {
1393         cbq_change_defmap(cl, fopt->split, fopt->defmap, fopt->defchange);
1394         return 0;
1395 }
1396
1397 static int cbq_init(struct Qdisc *sch, struct rtattr *opt)
1398 {
1399         struct cbq_sched_data *q = (struct cbq_sched_data*)sch->data;
1400         struct rtattr *tb[TCA_CBQ_MAX];
1401         struct tc_ratespec *r;
1402
1403         if (rtattr_parse(tb, TCA_CBQ_MAX, RTA_DATA(opt), RTA_PAYLOAD(opt)) < 0 ||
1404             tb[TCA_CBQ_RTAB-1] == NULL || tb[TCA_CBQ_RATE-1] == NULL ||
1405             RTA_PAYLOAD(tb[TCA_CBQ_RATE-1]) < sizeof(struct tc_ratespec))
1406                 return -EINVAL;
1407
1408         if (tb[TCA_CBQ_LSSOPT-1] &&
1409             RTA_PAYLOAD(tb[TCA_CBQ_LSSOPT-1]) < sizeof(struct tc_cbq_lssopt))
1410                 return -EINVAL;
1411
1412         r = RTA_DATA(tb[TCA_CBQ_RATE-1]);
1413
1414         MOD_INC_USE_COUNT;
1415         if ((q->link.R_tab = qdisc_get_rtab(r, tb[TCA_CBQ_RTAB-1])) == NULL) {
1416                 MOD_DEC_USE_COUNT;
1417                 return -EINVAL;
1418         }
1419
1420         q->link.refcnt = 1;
1421         q->link.sibling = &q->link;
1422         q->link.classid = sch->handle;
1423         q->link.qdisc = sch;
1424         if (!(q->link.q = qdisc_create_dflt(sch->dev, &pfifo_qdisc_ops)))
1425                 q->link.q = &noop_qdisc;
1426
1427         q->link.priority = TC_CBQ_MAXPRIO-1;
1428         q->link.priority2 = TC_CBQ_MAXPRIO-1;
1429         q->link.cpriority = TC_CBQ_MAXPRIO-1;
1430         q->link.ovl_strategy = TC_CBQ_OVL_CLASSIC;
1431         q->link.overlimit = cbq_ovl_classic;
1432         q->link.allot = psched_mtu(sch->dev);
1433         q->link.quantum = q->link.allot;
1434         q->link.weight = q->link.R_tab->rate.rate;
1435
1436         q->link.ewma_log = TC_CBQ_DEF_EWMA;
1437         q->link.avpkt = q->link.allot/2;
1438         q->link.minidle = -0x7FFFFFFF;
1439         q->link.stats.lock = &sch->dev->queue_lock;
1440
1441         init_timer(&q->wd_timer);
1442         q->wd_timer.data = (unsigned long)sch;
1443         q->wd_timer.function = cbq_watchdog;
1444         init_timer(&q->delay_timer);
1445         q->delay_timer.data = (unsigned long)sch;
1446         q->delay_timer.function = cbq_undelay;
1447         q->toplevel = TC_CBQ_MAXLEVEL;
1448         PSCHED_GET_TIME(q->now);
1449         q->now_rt = q->now;
1450
1451         cbq_link_class(&q->link);
1452
1453         if (tb[TCA_CBQ_LSSOPT-1])
1454                 cbq_set_lss(&q->link, RTA_DATA(tb[TCA_CBQ_LSSOPT-1]));
1455
1456         cbq_addprio(q, &q->link);
1457         return 0;
1458 }
1459
1460 static __inline__ int cbq_dump_rate(struct sk_buff *skb, struct cbq_class *cl)
1461 {
1462         unsigned char    *b = skb->tail;
1463
1464         RTA_PUT(skb, TCA_CBQ_RATE, sizeof(cl->R_tab->rate), &cl->R_tab->rate);
1465         return skb->len;
1466
1467 rtattr_failure:
1468         skb_trim(skb, b - skb->data);
1469         return -1;
1470 }
1471
1472 static __inline__ int cbq_dump_lss(struct sk_buff *skb, struct cbq_class *cl)
1473 {
1474         unsigned char    *b = skb->tail;
1475         struct tc_cbq_lssopt opt;
1476
1477         opt.flags = 0;
1478         if (cl->borrow == NULL)
1479                 opt.flags |= TCF_CBQ_LSS_BOUNDED;
1480         if (cl->share == NULL)
1481                 opt.flags |= TCF_CBQ_LSS_ISOLATED;
1482         opt.ewma_log = cl->ewma_log;
1483         opt.level = cl->level;
1484         opt.avpkt = cl->avpkt;
1485         opt.maxidle = cl->maxidle;
1486         opt.minidle = (u32)(-cl->minidle);
1487         opt.offtime = cl->offtime;
1488         opt.change = ~0;
1489         RTA_PUT(skb, TCA_CBQ_LSSOPT, sizeof(opt), &opt);
1490         return skb->len;
1491
1492 rtattr_failure:
1493         skb_trim(skb, b - skb->data);
1494         return -1;
1495 }
1496
1497 static __inline__ int cbq_dump_wrr(struct sk_buff *skb, struct cbq_class *cl)
1498 {
1499         unsigned char    *b = skb->tail;
1500         struct tc_cbq_wrropt opt;
1501
1502         opt.flags = 0;
1503         opt.allot = cl->allot;
1504         opt.priority = cl->priority+1;
1505         opt.cpriority = cl->cpriority+1;
1506         opt.weight = cl->weight;
1507         RTA_PUT(skb, TCA_CBQ_WRROPT, sizeof(opt), &opt);
1508         return skb->len;
1509
1510 rtattr_failure:
1511         skb_trim(skb, b - skb->data);
1512         return -1;
1513 }
1514
1515 static __inline__ int cbq_dump_ovl(struct sk_buff *skb, struct cbq_class *cl)
1516 {
1517         unsigned char    *b = skb->tail;
1518         struct tc_cbq_ovl opt;
1519
1520         opt.strategy = cl->ovl_strategy;
1521         opt.priority2 = cl->priority2+1;
1522         opt.penalty = (cl->penalty*1000)/HZ;
1523         RTA_PUT(skb, TCA_CBQ_OVL_STRATEGY, sizeof(opt), &opt);
1524         return skb->len;
1525
1526 rtattr_failure:
1527         skb_trim(skb, b - skb->data);
1528         return -1;
1529 }
1530
1531 static __inline__ int cbq_dump_fopt(struct sk_buff *skb, struct cbq_class *cl)
1532 {
1533         unsigned char    *b = skb->tail;
1534         struct tc_cbq_fopt opt;
1535
1536         if (cl->split || cl->defmap) {
1537                 opt.split = cl->split ? cl->split->classid : 0;
1538                 opt.defmap = cl->defmap;
1539                 opt.defchange = ~0;
1540                 RTA_PUT(skb, TCA_CBQ_FOPT, sizeof(opt), &opt);
1541         }
1542         return skb->len;
1543
1544 rtattr_failure:
1545         skb_trim(skb, b - skb->data);
1546         return -1;
1547 }
1548
1549 #ifdef CONFIG_NET_CLS_POLICE
1550 static __inline__ int cbq_dump_police(struct sk_buff *skb, struct cbq_class *cl)
1551 {
1552         unsigned char    *b = skb->tail;
1553         struct tc_cbq_police opt;
1554
1555         if (cl->police) {
1556                 opt.police = cl->police;
1557                 RTA_PUT(skb, TCA_CBQ_POLICE, sizeof(opt), &opt);
1558         }
1559         return skb->len;
1560
1561 rtattr_failure:
1562         skb_trim(skb, b - skb->data);
1563         return -1;
1564 }
1565 #endif
1566
1567 static int cbq_dump_attr(struct sk_buff *skb, struct cbq_class *cl)
1568 {
1569         if (cbq_dump_lss(skb, cl) < 0 ||
1570             cbq_dump_rate(skb, cl) < 0 ||
1571             cbq_dump_wrr(skb, cl) < 0 ||
1572             cbq_dump_ovl(skb, cl) < 0 ||
1573 #ifdef CONFIG_NET_CLS_POLICE
1574             cbq_dump_police(skb, cl) < 0 ||
1575 #endif
1576             cbq_dump_fopt(skb, cl) < 0)
1577                 return -1;
1578         return 0;
1579 }
1580
1581 int cbq_copy_xstats(struct sk_buff *skb, struct tc_cbq_xstats *st)
1582 {
1583         RTA_PUT(skb, TCA_XSTATS, sizeof(*st), st);
1584         return 0;
1585
1586 rtattr_failure:
1587         return -1;
1588 }
1589
1590
1591 static int cbq_dump(struct Qdisc *sch, struct sk_buff *skb)
1592 {
1593         struct cbq_sched_data *q = (struct cbq_sched_data*)sch->data;
1594         unsigned char    *b = skb->tail;
1595         struct rtattr *rta;
1596
1597         rta = (struct rtattr*)b;
1598         RTA_PUT(skb, TCA_OPTIONS, 0, NULL);
1599         if (cbq_dump_attr(skb, &q->link) < 0)
1600                 goto rtattr_failure;
1601         rta->rta_len = skb->tail - b;
1602         spin_lock_bh(&sch->dev->queue_lock);
1603         q->link.xstats.avgidle = q->link.avgidle;
1604         if (cbq_copy_xstats(skb, &q->link.xstats)) {
1605                 spin_unlock_bh(&sch->dev->queue_lock);
1606                 goto rtattr_failure;
1607         }
1608         spin_unlock_bh(&sch->dev->queue_lock);
1609         return skb->len;
1610
1611 rtattr_failure:
1612         skb_trim(skb, b - skb->data);
1613         return -1;
1614 }
1615
1616 static int
1617 cbq_dump_class(struct Qdisc *sch, unsigned long arg,
1618                struct sk_buff *skb, struct tcmsg *tcm)
1619 {
1620         struct cbq_sched_data *q = (struct cbq_sched_data*)sch->data;
1621         struct cbq_class *cl = (struct cbq_class*)arg;
1622         unsigned char    *b = skb->tail;
1623         struct rtattr *rta;
1624
1625         if (cl->tparent)
1626                 tcm->tcm_parent = cl->tparent->classid;
1627         else
1628                 tcm->tcm_parent = TC_H_ROOT;
1629         tcm->tcm_handle = cl->classid;
1630         tcm->tcm_info = cl->q->handle;
1631
1632         rta = (struct rtattr*)b;
1633         RTA_PUT(skb, TCA_OPTIONS, 0, NULL);
1634         if (cbq_dump_attr(skb, cl) < 0)
1635                 goto rtattr_failure;
1636         rta->rta_len = skb->tail - b;
1637         cl->stats.qlen = cl->q->q.qlen;
1638         if (qdisc_copy_stats(skb, &cl->stats))
1639                 goto rtattr_failure;
1640         spin_lock_bh(&sch->dev->queue_lock);
1641         cl->xstats.avgidle = cl->avgidle;
1642         cl->xstats.undertime = 0;
1643         if (!PSCHED_IS_PASTPERFECT(cl->undertime))
1644                 cl->xstats.undertime = PSCHED_TDIFF(cl->undertime, q->now);
1645         q->link.xstats.avgidle = q->link.avgidle;
1646         if (cbq_copy_xstats(skb, &cl->xstats)) {
1647                 spin_unlock_bh(&sch->dev->queue_lock);
1648                 goto rtattr_failure;
1649         }
1650         spin_unlock_bh(&sch->dev->queue_lock);
1651
1652         return skb->len;
1653
1654 rtattr_failure:
1655         skb_trim(skb, b - skb->data);
1656         return -1;
1657 }
1658
1659 static int cbq_graft(struct Qdisc *sch, unsigned long arg, struct Qdisc *new,
1660                      struct Qdisc **old)
1661 {
1662         struct cbq_class *cl = (struct cbq_class*)arg;
1663
1664         if (cl) {
1665                 if (new == NULL) {
1666                         if ((new = qdisc_create_dflt(sch->dev, &pfifo_qdisc_ops)) == NULL)
1667                                 return -ENOBUFS;
1668                 } else {
1669 #ifdef CONFIG_NET_CLS_POLICE
1670                         if (cl->police == TC_POLICE_RECLASSIFY)
1671                                 new->reshape_fail = cbq_reshape_fail;
1672 #endif
1673                 }
1674                 sch_tree_lock(sch);
1675                 *old = cl->q;
1676                 cl->q = new;
1677                 qdisc_reset(*old);
1678                 sch_tree_unlock(sch);
1679
1680                 return 0;
1681         }
1682         return -ENOENT;
1683 }
1684
1685 static struct Qdisc *
1686 cbq_leaf(struct Qdisc *sch, unsigned long arg)
1687 {
1688         struct cbq_class *cl = (struct cbq_class*)arg;
1689
1690         return cl ? cl->q : NULL;
1691 }
1692
1693 static unsigned long cbq_get(struct Qdisc *sch, u32 classid)
1694 {
1695         struct cbq_sched_data *q = (struct cbq_sched_data *)sch->data;
1696         struct cbq_class *cl = cbq_class_lookup(q, classid);
1697
1698         if (cl) {
1699                 cl->refcnt++;
1700                 return (unsigned long)cl;
1701         }
1702         return 0;
1703 }
1704
1705 static void cbq_destroy_filters(struct cbq_class *cl)
1706 {
1707         struct tcf_proto *tp;
1708
1709         while ((tp = cl->filter_list) != NULL) {
1710                 cl->filter_list = tp->next;
1711                 tp->ops->destroy(tp);
1712         }
1713 }
1714
1715 static void cbq_destroy_class(struct cbq_class *cl)
1716 {
1717         cbq_destroy_filters(cl);
1718         qdisc_destroy(cl->q);
1719         qdisc_put_rtab(cl->R_tab);
1720 #ifdef CONFIG_NET_ESTIMATOR
1721         qdisc_kill_estimator(&cl->stats);
1722 #endif
1723         kfree(cl);
1724 }
1725
1726 static void
1727 cbq_destroy(struct Qdisc* sch)
1728 {
1729         struct cbq_sched_data *q = (struct cbq_sched_data *)sch->data;
1730         struct cbq_class *cl;
1731         unsigned h;
1732
1733 #ifdef CONFIG_NET_CLS_POLICE
1734         q->rx_class = NULL;
1735 #endif
1736         for (h = 0; h < 16; h++) {
1737                 for (cl = q->classes[h]; cl; cl = cl->next)
1738                         cbq_destroy_filters(cl);
1739         }
1740
1741         for (h = 0; h < 16; h++) {
1742                 struct cbq_class *next;
1743
1744                 for (cl = q->classes[h]; cl; cl = next) {
1745                         next = cl->next;
1746                         if (cl != &q->link)
1747                                 cbq_destroy_class(cl);
1748                 }
1749         }
1750
1751         qdisc_put_rtab(q->link.R_tab);
1752         MOD_DEC_USE_COUNT;
1753 }
1754
1755 static void cbq_put(struct Qdisc *sch, unsigned long arg)
1756 {
1757         struct cbq_class *cl = (struct cbq_class*)arg;
1758
1759         if (--cl->refcnt == 0) {
1760 #ifdef CONFIG_NET_CLS_POLICE
1761                 struct cbq_sched_data *q = (struct cbq_sched_data *)sch->data;
1762
1763                 spin_lock_bh(&sch->dev->queue_lock);
1764                 if (q->rx_class == cl)
1765                         q->rx_class = NULL;
1766                 spin_unlock_bh(&sch->dev->queue_lock);
1767 #endif
1768
1769                 cbq_destroy_class(cl);
1770         }
1771 }
1772
1773 static int
1774 cbq_change_class(struct Qdisc *sch, u32 classid, u32 parentid, struct rtattr **tca,
1775                  unsigned long *arg)
1776 {
1777         int err;
1778         struct cbq_sched_data *q = (struct cbq_sched_data *)sch->data;
1779         struct cbq_class *cl = (struct cbq_class*)*arg;
1780         struct rtattr *opt = tca[TCA_OPTIONS-1];
1781         struct rtattr *tb[TCA_CBQ_MAX];
1782         struct cbq_class *parent;
1783         struct qdisc_rate_table *rtab = NULL;
1784
1785         if (opt==NULL ||
1786             rtattr_parse(tb, TCA_CBQ_MAX, RTA_DATA(opt), RTA_PAYLOAD(opt)))
1787                 return -EINVAL;
1788
1789         if (tb[TCA_CBQ_OVL_STRATEGY-1] &&
1790             RTA_PAYLOAD(tb[TCA_CBQ_OVL_STRATEGY-1]) < sizeof(struct tc_cbq_ovl))
1791                 return -EINVAL;
1792
1793         if (tb[TCA_CBQ_FOPT-1] &&
1794             RTA_PAYLOAD(tb[TCA_CBQ_FOPT-1]) < sizeof(struct tc_cbq_fopt))
1795                 return -EINVAL;
1796
1797         if (tb[TCA_CBQ_RATE-1] &&
1798             RTA_PAYLOAD(tb[TCA_CBQ_RATE-1]) < sizeof(struct tc_ratespec))
1799                         return -EINVAL;
1800
1801         if (tb[TCA_CBQ_LSSOPT-1] &&
1802             RTA_PAYLOAD(tb[TCA_CBQ_LSSOPT-1]) < sizeof(struct tc_cbq_lssopt))
1803                         return -EINVAL;
1804
1805         if (tb[TCA_CBQ_WRROPT-1] &&
1806             RTA_PAYLOAD(tb[TCA_CBQ_WRROPT-1]) < sizeof(struct tc_cbq_wrropt))
1807                         return -EINVAL;
1808
1809 #ifdef CONFIG_NET_CLS_POLICE
1810         if (tb[TCA_CBQ_POLICE-1] &&
1811             RTA_PAYLOAD(tb[TCA_CBQ_POLICE-1]) < sizeof(struct tc_cbq_police))
1812                         return -EINVAL;
1813 #endif
1814
1815         if (cl) {
1816                 /* Check parent */
1817                 if (parentid) {
1818                         if (cl->tparent && cl->tparent->classid != parentid)
1819                                 return -EINVAL;
1820                         if (!cl->tparent && parentid != TC_H_ROOT)
1821                                 return -EINVAL;
1822                 }
1823
1824                 if (tb[TCA_CBQ_RATE-1]) {
1825                         rtab = qdisc_get_rtab(RTA_DATA(tb[TCA_CBQ_RATE-1]), tb[TCA_CBQ_RTAB-1]);
1826                         if (rtab == NULL)
1827                                 return -EINVAL;
1828                 }
1829
1830                 /* Change class parameters */
1831                 sch_tree_lock(sch);
1832
1833                 if (cl->next_alive != NULL)
1834                         cbq_deactivate_class(cl);
1835
1836                 if (rtab) {
1837                         rtab = xchg(&cl->R_tab, rtab);
1838                         qdisc_put_rtab(rtab);
1839                 }
1840
1841                 if (tb[TCA_CBQ_LSSOPT-1])
1842                         cbq_set_lss(cl, RTA_DATA(tb[TCA_CBQ_LSSOPT-1]));
1843
1844                 if (tb[TCA_CBQ_WRROPT-1]) {
1845                         cbq_rmprio(q, cl);
1846                         cbq_set_wrr(cl, RTA_DATA(tb[TCA_CBQ_WRROPT-1]));
1847                 }
1848
1849                 if (tb[TCA_CBQ_OVL_STRATEGY-1])
1850                         cbq_set_overlimit(cl, RTA_DATA(tb[TCA_CBQ_OVL_STRATEGY-1]));
1851
1852 #ifdef CONFIG_NET_CLS_POLICE
1853                 if (tb[TCA_CBQ_POLICE-1])
1854                         cbq_set_police(cl, RTA_DATA(tb[TCA_CBQ_POLICE-1]));
1855 #endif
1856
1857                 if (tb[TCA_CBQ_FOPT-1])
1858                         cbq_set_fopt(cl, RTA_DATA(tb[TCA_CBQ_FOPT-1]));
1859
1860                 if (cl->q->q.qlen)
1861                         cbq_activate_class(cl);
1862
1863                 sch_tree_unlock(sch);
1864
1865 #ifdef CONFIG_NET_ESTIMATOR
1866                 if (tca[TCA_RATE-1]) {
1867                         qdisc_kill_estimator(&cl->stats);
1868                         qdisc_new_estimator(&cl->stats, tca[TCA_RATE-1]);
1869                 }
1870 #endif
1871                 return 0;
1872         }
1873
1874         if (parentid == TC_H_ROOT)
1875                 return -EINVAL;
1876
1877         if (tb[TCA_CBQ_WRROPT-1] == NULL || tb[TCA_CBQ_RATE-1] == NULL ||
1878             tb[TCA_CBQ_LSSOPT-1] == NULL)
1879                 return -EINVAL;
1880
1881         rtab = qdisc_get_rtab(RTA_DATA(tb[TCA_CBQ_RATE-1]), tb[TCA_CBQ_RTAB-1]);
1882         if (rtab == NULL)
1883                 return -EINVAL;
1884
1885         if (classid) {
1886                 err = -EINVAL;
1887                 if (TC_H_MAJ(classid^sch->handle) || cbq_class_lookup(q, classid))
1888                         goto failure;
1889         } else {
1890                 int i;
1891                 classid = TC_H_MAKE(sch->handle,0x8000);
1892
1893                 for (i=0; i<0x8000; i++) {
1894                         if (++q->hgenerator >= 0x8000)
1895                                 q->hgenerator = 1;
1896                         if (cbq_class_lookup(q, classid|q->hgenerator) == NULL)
1897                                 break;
1898                 }
1899                 err = -ENOSR;
1900                 if (i >= 0x8000)
1901                         goto failure;
1902                 classid = classid|q->hgenerator;
1903         }
1904
1905         parent = &q->link;
1906         if (parentid) {
1907                 parent = cbq_class_lookup(q, parentid);
1908                 err = -EINVAL;
1909                 if (parent == NULL)
1910                         goto failure;
1911         }
1912
1913         err = -ENOBUFS;
1914         cl = kmalloc(sizeof(*cl), GFP_KERNEL);
1915         if (cl == NULL)
1916                 goto failure;
1917         memset(cl, 0, sizeof(*cl));
1918         cl->R_tab = rtab;
1919         rtab = NULL;
1920         cl->refcnt = 1;
1921         if (!(cl->q = qdisc_create_dflt(sch->dev, &pfifo_qdisc_ops)))
1922                 cl->q = &noop_qdisc;
1923         cl->classid = classid;
1924         cl->tparent = parent;
1925         cl->qdisc = sch;
1926         cl->allot = parent->allot;
1927         cl->quantum = cl->allot;
1928         cl->weight = cl->R_tab->rate.rate;
1929         cl->stats.lock = &sch->dev->queue_lock;
1930
1931         sch_tree_lock(sch);
1932         cbq_link_class(cl);
1933         cl->borrow = cl->tparent;
1934         if (cl->tparent != &q->link)
1935                 cl->share = cl->tparent;
1936         cbq_adjust_levels(parent);
1937         cl->minidle = -0x7FFFFFFF;
1938         cbq_set_lss(cl, RTA_DATA(tb[TCA_CBQ_LSSOPT-1]));
1939         cbq_set_wrr(cl, RTA_DATA(tb[TCA_CBQ_WRROPT-1]));
1940         if (cl->ewma_log==0)
1941                 cl->ewma_log = q->link.ewma_log;
1942         if (cl->maxidle==0)
1943                 cl->maxidle = q->link.maxidle;
1944         if (cl->avpkt==0)
1945                 cl->avpkt = q->link.avpkt;
1946         cl->overlimit = cbq_ovl_classic;
1947         if (tb[TCA_CBQ_OVL_STRATEGY-1])
1948                 cbq_set_overlimit(cl, RTA_DATA(tb[TCA_CBQ_OVL_STRATEGY-1]));
1949 #ifdef CONFIG_NET_CLS_POLICE
1950         if (tb[TCA_CBQ_POLICE-1])
1951                 cbq_set_police(cl, RTA_DATA(tb[TCA_CBQ_POLICE-1]));
1952 #endif
1953         if (tb[TCA_CBQ_FOPT-1])
1954                 cbq_set_fopt(cl, RTA_DATA(tb[TCA_CBQ_FOPT-1]));
1955         sch_tree_unlock(sch);
1956
1957 #ifdef CONFIG_NET_ESTIMATOR
1958         if (tca[TCA_RATE-1])
1959                 qdisc_new_estimator(&cl->stats, tca[TCA_RATE-1]);
1960 #endif
1961
1962         *arg = (unsigned long)cl;
1963         return 0;
1964
1965 failure:
1966         qdisc_put_rtab(rtab);
1967         return err;
1968 }
1969
1970 static int cbq_delete(struct Qdisc *sch, unsigned long arg)
1971 {
1972         struct cbq_sched_data *q = (struct cbq_sched_data *)sch->data;
1973         struct cbq_class *cl = (struct cbq_class*)arg;
1974
1975         if (cl->filters || cl->children || cl == &q->link)
1976                 return -EBUSY;
1977
1978         sch_tree_lock(sch);
1979
1980         if (cl->next_alive)
1981                 cbq_deactivate_class(cl);
1982
1983         if (q->tx_borrowed == cl)
1984                 q->tx_borrowed = q->tx_class;
1985         if (q->tx_class == cl) {
1986                 q->tx_class = NULL;
1987                 q->tx_borrowed = NULL;
1988         }
1989 #ifdef CONFIG_NET_CLS_POLICE
1990         if (q->rx_class == cl)
1991                 q->rx_class = NULL;
1992 #endif
1993
1994         cbq_unlink_class(cl);
1995         cbq_adjust_levels(cl->tparent);
1996         cl->defmap = 0;
1997         cbq_sync_defmap(cl);
1998
1999         cbq_rmprio(q, cl);
2000         sch_tree_unlock(sch);
2001
2002         if (--cl->refcnt == 0)
2003                 cbq_destroy_class(cl);
2004
2005         return 0;
2006 }
2007
2008 static struct tcf_proto **cbq_find_tcf(struct Qdisc *sch, unsigned long arg)
2009 {
2010         struct cbq_sched_data *q = (struct cbq_sched_data *)sch->data;
2011         struct cbq_class *cl = (struct cbq_class *)arg;
2012
2013         if (cl == NULL)
2014                 cl = &q->link;
2015
2016         return &cl->filter_list;
2017 }
2018
2019 static unsigned long cbq_bind_filter(struct Qdisc *sch, unsigned long parent,
2020                                      u32 classid)
2021 {
2022         struct cbq_sched_data *q = (struct cbq_sched_data *)sch->data;
2023         struct cbq_class *p = (struct cbq_class*)parent;
2024         struct cbq_class *cl = cbq_class_lookup(q, classid);
2025
2026         if (cl) {
2027                 if (p && p->level <= cl->level)
2028                         return 0;
2029                 cl->filters++;
2030                 return (unsigned long)cl;
2031         }
2032         return 0;
2033 }
2034
2035 static void cbq_unbind_filter(struct Qdisc *sch, unsigned long arg)
2036 {
2037         struct cbq_class *cl = (struct cbq_class*)arg;
2038
2039         cl->filters--;
2040 }
2041
2042 static void cbq_walk(struct Qdisc *sch, struct qdisc_walker *arg)
2043 {
2044         struct cbq_sched_data *q = (struct cbq_sched_data *)sch->data;
2045         unsigned h;
2046
2047         if (arg->stop)
2048                 return;
2049
2050         for (h = 0; h < 16; h++) {
2051                 struct cbq_class *cl;
2052
2053                 for (cl = q->classes[h]; cl; cl = cl->next) {
2054                         if (arg->count < arg->skip) {
2055                                 arg->count++;
2056                                 continue;
2057                         }
2058                         if (arg->fn(sch, (unsigned long)cl, arg) < 0) {
2059                                 arg->stop = 1;
2060                                 return;
2061                         }
2062                         arg->count++;
2063                 }
2064         }
2065 }
2066
2067 static struct Qdisc_class_ops cbq_class_ops =
2068 {
2069         cbq_graft,
2070         cbq_leaf,
2071         cbq_get,
2072         cbq_put,
2073         cbq_change_class,
2074         cbq_delete,
2075         cbq_walk,
2076
2077         cbq_find_tcf,
2078         cbq_bind_filter,
2079         cbq_unbind_filter,
2080
2081         cbq_dump_class,
2082 };
2083
2084 struct Qdisc_ops cbq_qdisc_ops =
2085 {
2086         NULL,
2087         &cbq_class_ops,
2088         "cbq",
2089         sizeof(struct cbq_sched_data),
2090
2091         cbq_enqueue,
2092         cbq_dequeue,
2093         cbq_requeue,
2094         cbq_drop,
2095
2096         cbq_init,
2097         cbq_reset,
2098         cbq_destroy,
2099         NULL /* cbq_change */,
2100
2101         cbq_dump,
2102 };
2103
2104 #ifdef MODULE
2105 int init_module(void)
2106 {
2107         return register_qdisc(&cbq_qdisc_ops);
2108 }
2109
2110 void cleanup_module(void)
2111 {
2112         unregister_qdisc(&cbq_qdisc_ops);
2113 }
2114 #endif
2115 MODULE_LICENSE("GPL");