2 * include/asm-ia64/xor.h
4 * Optimized RAID-5 checksumming functions for IA-64.
6 * This program is free software; you can redistribute it and/or modify
7 * it under the terms of the GNU General Public License as published by
8 * the Free Software Foundation; either version 2, or (at your option)
11 * You should have received a copy of the GNU General Public License
12 * (for example /usr/src/linux/COPYING); if not, write to the Free
13 * Software Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
17 extern void xor_ia64_2(unsigned long, unsigned long *, unsigned long *);
18 extern void xor_ia64_3(unsigned long, unsigned long *, unsigned long *,
20 extern void xor_ia64_4(unsigned long, unsigned long *, unsigned long *,
21 unsigned long *, unsigned long *);
22 extern void xor_ia64_5(unsigned long, unsigned long *, unsigned long *,
23 unsigned long *, unsigned long *, unsigned long *);
28 // Assume L2 memory latency of 6 cycles.
36 alloc r31 = ar.pfs, 3, 0, 13, 16
61 .rotr s1[6+1], s2[6+1], d[2]
64 (p[0]) ld8.nta s1[0] = [r16], 8
65 (p[0]) ld8.nta s2[0] = [r17], 8
66 (p[6]) xor d[0] = s1[6], s2[6]
69 (p[6+1]) st8.nta [r8] = d[1], 8
89 alloc r31 = ar.pfs, 4, 0, 20, 24
115 .rotr s1[6+1], s2[6+1], s3[6+1], d[2]
118 (p[0]) ld8.nta s1[0] = [r16], 8
119 (p[0]) ld8.nta s2[0] = [r17], 8
120 (p[6]) xor d[0] = s1[6], s2[6]
124 (p[0]) ld8.nta s3[0] = [r18], 8
125 (p[6+1]) st8.nta [r8] = d[1], 8
126 (p[6]) xor d[0] = d[0], s3[6]
147 alloc r31 = ar.pfs, 5, 0, 27, 32
176 .rotr s1[6+1], s2[6+1], s3[6+1], s4[6+1], d[2]
179 (p[0]) ld8.nta s1[0] = [r16], 8
180 (p[0]) ld8.nta s2[0] = [r17], 8
181 (p[6]) xor d[0] = s1[6], s2[6]
184 (p[0]) ld8.nta s3[0] = [r18], 8
185 (p[0]) ld8.nta s4[0] = [r19], 8
186 (p[6]) xor r20 = s3[6], s4[6]
190 (p[6+1]) st8.nta [r8] = d[1], 8
191 (p[6]) xor d[0] = d[0], r20
210 alloc r31 = ar.pfs, 6, 0, 34, 40
240 .rotr s1[6+1], s2[6+1], s3[6+1], s4[6+1], s5[6+1], d[2]
243 (p[0]) ld8.nta s1[0] = [r16], 8
244 (p[0]) ld8.nta s2[0] = [r17], 8
245 (p[6]) xor d[0] = s1[6], s2[6]
248 (p[0]) ld8.nta s3[0] = [r18], 8
249 (p[0]) ld8.nta s4[0] = [r19], 8
250 (p[6]) xor r21 = s3[6], s4[6]
254 (p[0]) ld8.nta s5[0] = [r20], 8
255 (p[6+1]) st8.nta [r8] = d[1], 8
256 (p[6]) xor d[0] = d[0], r21
260 (p[6]) xor d[0] = d[0], s5[6]
275 static struct xor_block_template xor_block_ia64 = {
283 #define XOR_TRY_TEMPLATES xor_speed(&xor_block_ia64)