2 * This file is subject to the terms and conditions of the GNU General Public
3 * License. See the file "COPYING" in the main directory of this archive
6 * arch/sh5/lib/checksum.S
8 * Copyright (C) 2000, 2001 Paolo Alberelli, Stefano D'Andrea
14 * INET An implementation of the TCP/IP protocol suite for the LINUX
15 * operating system. INET is implemented using the BSD Socket
16 * interface as the means of communication with the user level.
18 * IP/TCP/UDP checksumming routines
20 * Authors: Jorge Cwik, <jorge@laser.satlink.net>
21 * Arnt Gulbrandsen, <agulbra@nvg.unit.no>
22 * Tom May, <ftom@netcom.com>
23 * Pentium Pro/II routines:
24 * Alexander Kjeldaas <astor@guardian.no>
25 * Finn Arne Gangstad <finnag@guardian.no>
26 * Lots of code moved from tcp.c and ip.c; see those files
29 * Changes: Ingo Molnar, converted csum_partial_copy() to 2.1 exception
31 * Andi Kleen, add zeroing on error
32 * converted to pure assembler
34 * SuperH version: Copyright (C) 1999 Niibe Yutaka
36 * SH-5 version: Copyright (C) 2000 Paolo Alberelli, Stefano D'Andrea
38 * This program is free software; you can redistribute it and/or
39 * modify it under the terms of the GNU General Public License
40 * as published by the Free Software Foundation; either version
41 * 2 of the License, or (at your option) any later version.
44 #include <asm/errno.h>
45 #include <asm/registers.h>
47 .section .text64, "ax"
49 * unsigned int csum_partial(const unsigned char *buf,
54 * computes a partial checksum, e.g. for TCP/UDP fragments
58 * SH-5 ABI convention:
62 * r4 = sum so far computed checksum (may be zero)
63 * return value must be in:
64 * r2 returned checksum
68 * Experiments with Ethernet and SLIP connections show that buff
69 * is aligned on either a 2-byte or 4-byte boundary. We get at
70 * least a twofold speedup on 486 and Pentium if it is 4-byte aligned.
71 * Fortunately, it is easy to convert 2-byte alignment to 4-byte
72 * alignment for the unrolled loop.
76 /* This version of _csum_partial is an easy but unefficient
78 * It's mainteined only for historical reasons.
82 _ptar .byte_footer, t1 /* t1 = .byte_footer */
83 _ptar .add_short_loop, t2 /* t2 = checksum eval loop */
84 _ptar .exiting, t3 /* t3 = add carry... */
88 /* we assume buf short aligned */
89 /* Short must be checksummed */
91 bgt r1, r3, t1 /* Size = 1 or 0 (remind) */
93 ld.uw r2, 0, r7 /* r7 = word to be checksumed*/
96 addi r2, 2, r2 /* move buf forward... */
97 addi r3, -2, r3 /* decrement len */
98 blink t2, ZERO /* goto .add_short_loop */
101 /* still one byte to be checksummed ? */
104 ld.ub r2, 0, r7 /* r7 = last byte... */
105 #ifndef __LITTLE_ENDIAN__
115 #else /* NOTDEF ---------------------------------------------------- */
119 movi 32, r8 /* r8 = sizeof(8 * int) */
120 _ptar .byte_footer, t1 /* t1 = .byte_footer */
121 _ptar .word_footer, t2 /* t2 = .word_footer */
122 _ptar .long_footer, t3 /* t3 = .long_footer */
123 _ptar .exit, t4 /* t4 = exit point */
124 _ptar .long_aligned, t0 /* t0 = .long_aligned */
125 or r2, ZERO, r5 /* r5 = buffer pointer */
126 or r3, ZERO, r6 /* r6 = original length */
128 beq r7, ZERO, t0 /* It's buf long aligned */
130 /* we assume buf short aligned */
132 beqi r3, 1, t1 /* Size = 1 */
134 /* Short must be checksummed */
135 ld.uw r2, 0, r7 /* r7 = word to be checksumed*/
142 add r4, r7, r4 /* Add eventual "carry" */
144 addi r5, 2, r5 /* r5 is now long aligned */
145 beq r6, ZERO, t4 /* Exit if done */
147 or ZERO, ZERO, r7 /* Clean up r7 */
152 /* 8 Longs to be checksummed */
153 ld.l r5, 0, r7 /* r7 = data to be checksummed*/
158 add r4, r7, r4 /* Add eventual "carry" */
159 ld.l r5, 4, r7 /* r7 = data to be checksummed*/
166 add r4, r7, r4 /* Add eventual "carry" */
167 ld.l r5, 8, r7 /* r7 = data to be checksummed*/
172 add r4, r7, r4 /* Add eventual "carry" */
174 ld.l r5, 12, r7 /* r7 = data to be checksummed*/
179 add r4, r7, r4 /* Add eventual "carry" */
182 ld.l r5, 16, r7 /* r7 = data to be checksummed*/
187 add r4, r7, r4 /* Add eventual "carry" */
189 ld.l r5, 20, r7 /* r7 = data to be checksummed*/
194 add r4, r7, r4 /* Add eventual "carry" */
197 ld.l r5, 24, r7 /* r7 = data to be checksummed*/
202 add r4, r7, r4 /* Add eventual "carry" */
205 ld.l r5, 28, r7 /* r7 = data to be checksummed*/
210 add r4, r7, r4 /* Add eventual "carry" */
221 /* Long to be checksummed */
222 ld.l r5, 0, r7 /* r7 = data to be checksummed*/
227 add r4, r7, r4 /* Add eventual "carry" */
238 /* Short to be checksummed */
239 ld.uw r5, 0, r7 /* r7 = data to be checksummed*/
244 add r4, r7, r4 /* Add eventual "carry" */
252 /* Short to be checksummed */
253 ld.ub r5, 0, r7 /* r7 = data to be checksummed*/
255 #ifndef __LITTLE_ENDIAN__
263 add r4, r7, r4 /* Add eventual "carry" */
275 * unsigned int csum_partial_copy_generic (const char *src,
284 * Copy from ds while checksumming, otherwise like csum_partial
286 * The macros SRC and DST specify the type of access for the instruction.
287 * thus we can call a custom exception handler for all access types.
289 * FIXME: could someone double-check whether I haven't mixed up some SRC and
290 * DST definitions? It's damn hard to trigger all cases. I hope I got
291 * them all but there's no guarantee.
295 * SH-5 ABI convention:
297 * r2 = const char *src
300 * r5 = int sum so far computed checksum (may be zero)
301 * r6 = int *src_err_ptr
302 * r7 = int *dst_err_ptr
303 * return value must be in:
304 * r2 returned checksum
309 ** This version of _csum_partial_copy_generic is an easy but
310 ** unefficient implementation.
311 ** It's mainteined only for historical reasons.
313 .global csum_partial_copy_generic
314 csum_partial_copy_generic:
315 _ptar .gc_byte_footer, t1 /* t1 = .byte_footer */
316 _ptar .gc_add_short_loop, t2 /* t2 = checksum eval loop */
317 _ptar .gc_exiting, t3 /* t3 = add carry... */
319 or r2, ZERO, r20 /* r20 = source pointer */
320 or r3, ZERO, r21 /* r21 = destination pointer */
324 /* we assume buf short aligned */
325 /* Short must be checksummed */
328 bgt r1, r4, t1 /* Size = 1 or 0 (remind) */
331 ld.uw r20, 0, r8 /* r8 = word to be checksum. */
332 st.w r21, 0, r8 /* fill data into DST */
336 addi r20, 2, r20 /* move SRC forward... */
337 addi r21, 2, r21 /* move DST forward... */
338 addi r4, -2, r4 /* decrement len */
339 blink t2, ZERO /* goto .gc_add_short_loop */
342 /* still one byte to be checksummed ? */
347 ld.ub r20, 0, r8 /* r8 = last byte... */
349 #ifndef __LITTLE_ENDIAN__
359 .section .fixup, "ax"
361 _csum_partial_copy_generic_dst_err:
362 movi -(EFAULT), r8 /* r8 = EFAULT reply */
363 st.l r7, 0, r8 /* *DST_ERR = -EFAULT */
371 _csum_partial_copy_generic_src_err:
372 movi -(EFAULT), r8 /* r8 = EFAULT reply */
373 ld.l r6, 0, r8 /* *SRC_ERR = -EFAULT */
376 * Now reset the DST buffer.
377 * r20 points to the next DST byte.
378 * r3 points to the first DST byte.
380 _ptar .quiet_exit, t0
381 _ptar .src_err_loop, t1
396 .section __ex_table, "a"
398 .global asm_checksum_start /* Just a marker */
400 .long .src_err_1, _csum_partial_copy_generic_src_err
401 .long .src_err_2, _csum_partial_copy_generic_src_err
402 .long .src_err_1+4, _csum_partial_copy_generic_dst_err
403 .long .src_err_2+4, _csum_partial_copy_generic_dst_err
404 .global asm_checksum_end /* Just a marker */
407 #else /* NOTDEF -------------------------------------------------------- */
409 .global csum_partial_copy_generic
410 csum_partial_copy_generic:
412 movi 32, r27 /* r27 = sizeof(8 * int) */
413 _ptar .byte_footer_gc, t1 /* t1 = .byte_footer_gc */
414 _ptar .word_footer_gc, t2 /* t2 = .word_footer_gc */
415 _ptar .long_footer_gc, t3 /* t3 = .long_footer_gc */
416 _ptar .exit_gc, t4 /* t4 = exit point */
418 or r2, ZERO, r24 /* r24 = original SRC pointer */
419 or r3, ZERO, r20 /* r20 = original DST pointer */
420 or r4, ZERO, r25 /* r25 = original length */
421 _ptar .long_aligned_gc, t0
422 andi r2, 2, r26 /* check if source it's */
423 beq r26, ZERO, t0 /* long aligned */
425 /* It's short aligned */
427 beqi r4, 1, t1 /* Size = 1 */
429 /* Short must be checksummed */
431 ld.w r2, 0, r26 /* r26: data to be checksummed*/
432 st.w r3, 0, r26 /* fill data into DST */
435 mshflo.l r5, r26, r26
437 add r5, r26, r5 /* Add eventual "carry" */
440 addi r24, 2, r24 /* r24 is now long aligned */
441 addi r20, 2, r20 /* r20 it's now long aligned */
442 beq r25, ZERO, t4 /* Exit if done */
444 or ZERO, ZERO, r26 /* Clean up r26 */
449 /* 8 Longs to be checksummed */
451 ld.l r24, 0, r26 /* r26: data to be checksummed*/
452 st.l r20, 0, r26 /* fill data into DST */
455 mshflo.l r5, r26, r26
457 add r5, r26, r5 /* Add eventual "carry" */
460 ld.l r24, 4, r26 /* r26: data to be checksummed*/
461 st.l r20, 4, r26 /* fill data into DST */
464 mshflo.l r5, r26, r26
466 add r5, r26, r5 /* Add eventual "carry" */
469 ld.l r24, 8, r26 /* r26: data to be checksummed*/
470 st.l r20, 8, r26 /* fill data into DST */
473 mshflo.l r5, r26, r26
475 add r5, r26, r5 /* Add eventual "carry" */
478 ld.l r24, 12, r26 /* r26: data to be checksummed*/
479 st.l r20, 12, r26 /* fill data into DST */
482 mshflo.l r5, r26, r26
484 add r5, r26, r5 /* Add eventual "carry" */
487 ld.l r24, 16, r26 /* r26: data to be checksummed*/
488 st.l r20, 16, r26 /* fill data into DST */
491 mshflo.l r5, r26, r26
493 add r5, r26, r5 /* Add eventual "carry" */
496 ld.l r24, 20, r26 /* r26: data to be checksummed*/
497 st.l r20, 20, r26 /* fill data into DST */
502 add r5, r26, r5 /* Add eventual "carry" */
505 ld.l r24, 24, r26 /* r26: data to be checksummed*/
506 st.l r20, 24, r26 /* fill data into DST */
509 mshflo.l r5, r26, r26
511 add r5, r26, r5 /* Add eventual "carry" */
514 ld.l r24, 28, r26 /* r26: data to be checksummed*/
515 st.l r20, 28, r26 /* fill data into DST */
518 mshflo.l r5, r26, r26
520 add r5, r26, r5 /* Add eventual "carry" */
531 /* Long to be checksummed */
533 ld.l r24, 0, r26 /* r26: data to be checksummed*/
534 st.l r20, 0, r26 /* fill data into DST */
537 mshflo.l r5, r26, r26
539 add r5, r26, r5 /* Add eventual "carry" */
550 /* Short to be checksummed */
552 ld.uw r24, 0, r26 /* r26: data to be checksummed*/
553 st.w r20, 0, r26 /* fill data into DST */
556 mshflo.l r5, r26, r26
558 add r5, r26, r5 /* Add eventual "carry" */
566 /* Short to be checksummed */
567 ld.ub r24, 0, r26 /* r26: data to be checksummed*/
569 ld.ub r24, 0, r26 /* r26: data to be checksummed*/
570 st.b r20, 0, r26 /* fill data into DST */
572 #ifndef __LITTLE_ENDIAN__
578 mshflo.l r5, r26, r26
580 add r5, r26, r5 /* Add eventual "carry" */
588 .section .fixup, "ax"
590 _csum_partial_copy_generic_dst_err:
591 movi -(EFAULT), r21 /* r21 = EFAULT reply */
600 _csum_partial_copy_generic_src_err:
601 movi -(EFAULT), r21 /* r21 = EFAULT reply */
605 * Now reset the DST buffer.
606 * r20 points to the next DST byte.
607 * r3 points to the first DST byte.
609 _ptar .quiet_exit, t0
610 _ptar .src_err_loop, t1
625 .section __ex_table, "a"
627 .global asm_checksum_start /* Just a marker */
629 .long .src_err_1, _csum_partial_copy_generic_src_err
630 .long .src_err_2, _csum_partial_copy_generic_src_err
631 .long .src_err_3, _csum_partial_copy_generic_src_err
632 .long .src_err_4, _csum_partial_copy_generic_src_err
633 .long .src_err_5, _csum_partial_copy_generic_src_err
634 .long .src_err_6, _csum_partial_copy_generic_src_err
635 .long .src_err_7, _csum_partial_copy_generic_src_err
636 .long .src_err_8, _csum_partial_copy_generic_src_err
637 .long .src_err_9, _csum_partial_copy_generic_src_err
638 .long .src_err_10, _csum_partial_copy_generic_src_err
639 .long .src_err_11, _csum_partial_copy_generic_src_err
640 .long .src_err_12, _csum_partial_copy_generic_src_err
641 .long .src_err_1+4, _csum_partial_copy_generic_dst_err
642 .long .src_err_2+4, _csum_partial_copy_generic_dst_err
643 .long .src_err_3+4, _csum_partial_copy_generic_dst_err
644 .long .src_err_4+4, _csum_partial_copy_generic_dst_err
645 .long .src_err_5+4, _csum_partial_copy_generic_dst_err
646 .long .src_err_6+4, _csum_partial_copy_generic_dst_err
647 .long .src_err_7+4, _csum_partial_copy_generic_dst_err
648 .long .src_err_8+4, _csum_partial_copy_generic_dst_err
649 .long .src_err_9+4, _csum_partial_copy_generic_dst_err
650 .long .src_err_10+4, _csum_partial_copy_generic_dst_err
651 .long .src_err_11+4, _csum_partial_copy_generic_dst_err
652 .long .src_err_12+4, _csum_partial_copy_generic_dst_err
654 .global asm_checksum_end /* Just a marker */