You've already forked linux-packaging-mono
							
							
		
			
	
	
		
			265 lines
		
	
	
		
			5.8 KiB
		
	
	
	
		
			ArmAsm
		
	
	
	
	
	
		
		
			
		
	
	
			265 lines
		
	
	
		
			5.8 KiB
		
	
	
	
		
			ArmAsm
		
	
	
	
	
	
|   | /*===-- udivsi3.S - 32-bit unsigned integer divide ------------------------===// | ||
|  |  * | ||
|  |  *                     The LLVM Compiler Infrastructure | ||
|  |  * | ||
|  |  * This file is dual licensed under the MIT and the University of Illinois Open | ||
|  |  * Source Licenses. See LICENSE.TXT for details. | ||
|  |  * | ||
|  |  *===----------------------------------------------------------------------===// | ||
|  |  * | ||
|  |  * This file implements the __udivsi3 (32-bit unsigned integer divide) | ||
|  |  * function for the ARM 32-bit architecture. | ||
|  |  * | ||
|  |  *===----------------------------------------------------------------------===*/ | ||
|  | 
 | ||
|  | #include "../assembly.h" | ||
|  | 
 | ||
|  | 	.syntax unified
 | ||
|  | 	.text | ||
|  | 
 | ||
|  | DEFINE_CODE_STATE | ||
|  | 
 | ||
|  | 	.p2align 2
 | ||
|  | DEFINE_AEABI_FUNCTION_ALIAS(__aeabi_uidiv, __udivsi3) | ||
|  | 
 | ||
|  | @ unsigned int __udivsi3(unsigned int divident, unsigned int divisor)
 | ||
|  | @   Calculate and return the quotient of the (unsigned) division.
 | ||
|  | 
 | ||
|  | DEFINE_COMPILERRT_FUNCTION(__udivsi3) | ||
|  | #if __ARM_ARCH_EXT_IDIV__ | ||
|  | 	tst     r1, r1 | ||
|  | 	beq     LOCAL_LABEL(divby0) | ||
|  | 	udiv	r0, r0, r1 | ||
|  | 	bx  	lr | ||
|  | 
 | ||
|  | LOCAL_LABEL(divby0): | ||
|  | 	mov     r0, #0 | ||
|  | #  ifdef __ARM_EABI__ | ||
|  | 	b       __aeabi_idiv0 | ||
|  | #  else | ||
|  | 	JMP(lr) | ||
|  | #  endif | ||
|  | 
 | ||
|  | #else /* ! __ARM_ARCH_EXT_IDIV__ */ | ||
|  | 	cmp	r1, #1 | ||
|  | 	bcc	LOCAL_LABEL(divby0) | ||
|  | #if defined(USE_THUMB_1) | ||
|  | 	bne LOCAL_LABEL(num_neq_denom) | ||
|  | 	JMP(lr) | ||
|  | LOCAL_LABEL(num_neq_denom): | ||
|  | #else | ||
|  | 	IT(eq) | ||
|  | 	JMPc(lr, eq) | ||
|  | #endif | ||
|  | 	cmp	r0, r1 | ||
|  | #if defined(USE_THUMB_1) | ||
|  | 	bhs LOCAL_LABEL(num_ge_denom) | ||
|  | 	movs r0, #0 | ||
|  | 	JMP(lr) | ||
|  | LOCAL_LABEL(num_ge_denom): | ||
|  | #else | ||
|  | 	ITT(cc) | ||
|  | 	movcc	r0, #0 | ||
|  | 	JMPc(lr, cc) | ||
|  | #endif | ||
|  | 
 | ||
|  | 	/* | ||
|  | 	 * Implement division using binary long division algorithm. | ||
|  | 	 * | ||
|  | 	 * r0 is the numerator, r1 the denominator. | ||
|  | 	 * | ||
|  | 	 * The code before JMP computes the correct shift I, so that | ||
|  | 	 * r0 and (r1 << I) have the highest bit set in the same position. | ||
|  | 	 * At the time of JMP, ip := .Ldiv0block - 12 * I. | ||
|  | 	 * This depends on the fixed instruction size of block. | ||
|  | 	 * For ARM mode, this is 12 Bytes, for THUMB mode 14 Bytes. | ||
|  | 	 * | ||
|  | 	 * block(shift) implements the test-and-update-quotient core. | ||
|  | 	 * It assumes (r0 << shift) can be computed without overflow and | ||
|  | 	 * that (r0 << shift) < 2 * r1. The quotient is stored in r3. | ||
|  | 	 */ | ||
|  | 
 | ||
|  | #  if defined(__ARM_FEATURE_CLZ) | ||
|  | 	clz	ip, r0 | ||
|  | 	clz	r3, r1 | ||
|  | 	/* r0 >= r1 implies clz(r0) <= clz(r1), so ip <= r3. */ | ||
|  | 	sub	r3, r3, ip | ||
|  | #    if defined(USE_THUMB_2) | ||
|  | 	adr	ip, LOCAL_LABEL(div0block) + 1 | ||
|  | 	sub	ip, ip, r3, lsl #1 | ||
|  | #    else | ||
|  | 	adr	ip, LOCAL_LABEL(div0block) | ||
|  | #    endif | ||
|  | 	sub	ip, ip, r3, lsl #2 | ||
|  | 	sub	ip, ip, r3, lsl #3 | ||
|  | 	mov	r3, #0 | ||
|  | 	bx	ip | ||
|  | #  else /* No CLZ Feature */ | ||
|  | #    if defined(USE_THUMB_2) | ||
|  | #    error THUMB mode requires CLZ or UDIV | ||
|  | #    endif | ||
|  | #    if defined(USE_THUMB_1) | ||
|  | #      define BLOCK_SIZE 10 | ||
|  | #    else | ||
|  | #      define BLOCK_SIZE 12 | ||
|  | #    endif | ||
|  | 
 | ||
|  | 	mov	r2, r0 | ||
|  | #    if defined(USE_THUMB_1) | ||
|  | 	mov ip, r0 | ||
|  | 	adr r0, LOCAL_LABEL(div0block) | ||
|  | 	adds r0, #1 | ||
|  | #    else | ||
|  | 	adr	ip, LOCAL_LABEL(div0block) | ||
|  | #    endif | ||
|  | 	lsrs	r3, r2, #16 | ||
|  | 	cmp	r3, r1 | ||
|  | #    if defined(USE_THUMB_1) | ||
|  | 	blo LOCAL_LABEL(skip_16) | ||
|  | 	movs r2, r3 | ||
|  | 	subs r0, r0, #(16 * BLOCK_SIZE) | ||
|  | LOCAL_LABEL(skip_16): | ||
|  | #    else | ||
|  | 	movhs	r2, r3 | ||
|  | 	subhs	ip, ip, #(16 * BLOCK_SIZE) | ||
|  | #    endif | ||
|  | 
 | ||
|  | 	lsrs	r3, r2, #8 | ||
|  | 	cmp	r3, r1 | ||
|  | #    if defined(USE_THUMB_1) | ||
|  | 	blo LOCAL_LABEL(skip_8) | ||
|  | 	movs r2, r3 | ||
|  | 	subs r0, r0, #(8 * BLOCK_SIZE) | ||
|  | LOCAL_LABEL(skip_8): | ||
|  | #    else | ||
|  | 	movhs	r2, r3 | ||
|  | 	subhs	ip, ip, #(8 * BLOCK_SIZE) | ||
|  | #    endif | ||
|  | 
 | ||
|  | 	lsrs	r3, r2, #4 | ||
|  | 	cmp	r3, r1 | ||
|  | #    if defined(USE_THUMB_1) | ||
|  | 	blo LOCAL_LABEL(skip_4) | ||
|  | 	movs r2, r3 | ||
|  | 	subs r0, r0, #(4 * BLOCK_SIZE) | ||
|  | LOCAL_LABEL(skip_4): | ||
|  | #    else | ||
|  | 	movhs	r2, r3 | ||
|  | 	subhs	ip, #(4 * BLOCK_SIZE) | ||
|  | #    endif | ||
|  | 
 | ||
|  | 	lsrs	r3, r2, #2 | ||
|  | 	cmp	r3, r1 | ||
|  | #    if defined(USE_THUMB_1) | ||
|  | 	blo LOCAL_LABEL(skip_2) | ||
|  | 	movs r2, r3 | ||
|  | 	subs r0, r0, #(2 * BLOCK_SIZE) | ||
|  | LOCAL_LABEL(skip_2): | ||
|  | #    else | ||
|  | 	movhs	r2, r3 | ||
|  | 	subhs	ip, ip, #(2 * BLOCK_SIZE) | ||
|  | #    endif | ||
|  | 
 | ||
|  | 	/* Last block, no need to update r2 or r3. */ | ||
|  | #    if defined(USE_THUMB_1) | ||
|  | 	lsrs r3, r2, #1 | ||
|  | 	cmp r3, r1 | ||
|  | 	blo LOCAL_LABEL(skip_1) | ||
|  | 	subs r0, r0, #(1 * BLOCK_SIZE) | ||
|  | LOCAL_LABEL(skip_1): | ||
|  | 	movs r2, r0 | ||
|  | 	mov r0, ip | ||
|  | 	movs r3, #0 | ||
|  | 	JMP (r2) | ||
|  | 
 | ||
|  | #    else | ||
|  | 	cmp	r1, r2, lsr #1 | ||
|  | 	subls	ip, ip, #(1 * BLOCK_SIZE) | ||
|  | 
 | ||
|  | 	movs	r3, #0 | ||
|  | 
 | ||
|  | 	JMP(ip) | ||
|  | #    endif | ||
|  | #  endif /* __ARM_FEATURE_CLZ */ | ||
|  | 
 | ||
|  | 
 | ||
|  | #define	IMM	# | ||
|  | 	/* due to the range limit of branch in Thumb1, we have to place the | ||
|  | 		 block closer */ | ||
|  | LOCAL_LABEL(divby0): | ||
|  | 	movs	r0, #0 | ||
|  | #      if defined(__ARM_EABI__) | ||
|  | 	push {r7, lr} | ||
|  | 	bl	__aeabi_idiv0 // due to relocation limit, can't use b. | ||
|  | 	pop  {r7, pc} | ||
|  | #      else | ||
|  | 	JMP(lr) | ||
|  | #      endif | ||
|  | 
 | ||
|  | 
 | ||
|  | #if defined(USE_THUMB_1) | ||
|  | #define block(shift)                                                           \ | ||
|  | 	lsls r2, r1, IMM shift;                                                      \
 | ||
|  | 	cmp r0, r2;                                                                  \
 | ||
|  | 	blo LOCAL_LABEL(block_skip_##shift);                                         \ | ||
|  | 	subs r0, r0, r2;                                                             \
 | ||
|  | 	LOCAL_LABEL(block_skip_##shift) :;                                           \ | ||
|  | 	adcs r3, r3 /* same as ((r3 << 1) | Carry). Carry is set if r0 >= r2. */ | ||
|  | 
 | ||
|  | 	/* TODO: if current location counter is not not word aligned, we don't | ||
|  | 		 need the .p2align and nop */ | ||
|  | 	/* Label div0block must be word-aligned. First align block 31 */ | ||
|  | 	.p2align 2
 | ||
|  | 	nop /* Padding to align div0block as 31 blocks = 310 bytes */ | ||
|  | 
 | ||
|  | #else | ||
|  | #define block(shift)                                                           \ | ||
|  | 	cmp	r0, r1, lsl IMM shift;                                         \
 | ||
|  | 	ITT(hs);                                                               \
 | ||
|  | 	WIDE(addhs)	r3, r3, IMM (1 << shift);                              \
 | ||
|  | 	WIDE(subhs)	r0, r0, r1, lsl IMM shift | ||
|  | #endif | ||
|  | 
 | ||
|  | 	block(31) | ||
|  | 	block(30) | ||
|  | 	block(29) | ||
|  | 	block(28) | ||
|  | 	block(27) | ||
|  | 	block(26) | ||
|  | 	block(25) | ||
|  | 	block(24) | ||
|  | 	block(23) | ||
|  | 	block(22) | ||
|  | 	block(21) | ||
|  | 	block(20) | ||
|  | 	block(19) | ||
|  | 	block(18) | ||
|  | 	block(17) | ||
|  | 	block(16) | ||
|  | 	block(15) | ||
|  | 	block(14) | ||
|  | 	block(13) | ||
|  | 	block(12) | ||
|  | 	block(11) | ||
|  | 	block(10) | ||
|  | 	block(9) | ||
|  | 	block(8) | ||
|  | 	block(7) | ||
|  | 	block(6) | ||
|  | 	block(5) | ||
|  | 	block(4) | ||
|  | 	block(3) | ||
|  | 	block(2) | ||
|  | 	block(1) | ||
|  | LOCAL_LABEL(div0block): | ||
|  | 	block(0) | ||
|  | 
 | ||
|  | 	mov	r0, r3 | ||
|  | 	JMP(lr) | ||
|  | #endif /* __ARM_ARCH_EXT_IDIV__ */ | ||
|  | 
 | ||
|  | END_COMPILERRT_FUNCTION(__udivsi3) | ||
|  | 
 | ||
|  | NO_EXEC_STACK_DIRECTIVE | ||
|  | 
 |