| 123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596 | /* * Copyright (C) 2004, 2007-2010, 2011-2012 Synopsys, Inc. (www.synopsys.com) * * This program is free software; you can redistribute it and/or modify * it under the terms of the GNU General Public License version 2 as * published by the Free Software Foundation. *//* This is optimized primarily for the ARC700.   It would be possible to speed up the loops by one cycle / word   respective one cycle / byte by forcing double source 1 alignment, unrolling   by a factor of two, and speculatively loading the second word / byte of   source 1; however, that would increase the overhead for loop setup / finish,   and strcmp might often terminate early.  */#include <linux/linkage.h>ENTRY_CFI(strcmp)	or	r2,r0,r1	bmsk_s	r2,r2,1	brne	r2,0,.Lcharloop	mov_s	r12,0x01010101	ror	r5,r12.Lwordloop:	ld.ab	r2,[r0,4]	ld.ab	r3,[r1,4]	nop_s	sub	r4,r2,r12	bic	r4,r4,r2	and	r4,r4,r5	brne	r4,0,.Lfound0	breq	r2,r3,.Lwordloop#ifdef	__LITTLE_ENDIAN__	xor	r0,r2,r3	; mask for difference	sub_s	r1,r0,1	bic_s	r0,r0,r1	; mask for least significant difference bit	sub	r1,r5,r0	xor	r0,r5,r1	; mask for least significant difference byte	and_s	r2,r2,r0	and_s	r3,r3,r0#endif /* LITTLE ENDIAN */	cmp_s	r2,r3	mov_s	r0,1	j_s.d	[blink]	bset.lo	r0,r0,31	.balign	4#ifdef __LITTLE_ENDIAN__.Lfound0:	xor	r0,r2,r3	; mask for difference	or	r0,r0,r4	; or in zero indicator	sub_s	r1,r0,1	bic_s	r0,r0,r1	; mask for least significant difference bit	sub	r1,r5,r0	xor	r0,r5,r1	; mask for least significant difference byte	and_s	r2,r2,r0	and_s	r3,r3,r0	sub.f	r0,r2,r3	mov.hi	r0,1	j_s.d	[blink]	bset.lo	r0,r0,31#else /* BIG ENDIAN */	/* The zero-detection above can mis-detect 0x01 bytes as zeroes	   because of carry-propagateion from a lower significant zero byte.	   We can compensate for this by checking that bit0 is zero.	   This compensation is not necessary in the step where we	   get a low estimate for r2, because in any affected bytes	   we already have 0x00 or 0x01, which will remain unchanged	   when bit 7 is cleared.  */	.balign	4.Lfound0:	lsr	r0,r4,8	lsr_s	r1,r2	bic_s	r2,r2,r0	; get low estimate for r2 and get ...	bic_s	r0,r0,r1	; <this is the adjusted mask for zeros>	or_s	r3,r3,r0	; ... high estimate r3 so that r2 > r3 will ...	cmp_s	r3,r2		; ... be independent of trailing garbage	or_s	r2,r2,r0	; likewise for r3 > r2	bic_s	r3,r3,r0	rlc	r0,0		; r0 := r2 > r3 ? 1 : 0	cmp_s	r2,r3	j_s.d	[blink]	bset.lo	r0,r0,31#endif /* ENDIAN */	.balign	4.Lcharloop:	ldb.ab	r2,[r0,1]	ldb.ab	r3,[r1,1]	nop_s	breq	r2,0,.Lcmpend	breq	r2,r3,.Lcharloop.Lcmpend:	j_s.d	[blink]	sub	r0,r2,r3END_CFI(strcmp)
 |