sdivsi3.S 2.7 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136
  1. /* SPDX-License-Identifier: GPL-2.0 */
  2. .global __sdivsi3
  3. .global __sdivsi3_1
  4. .global __sdivsi3_2
  5. .section .text..SHmedia32,"ax"
  6. .align 2
  7. /* inputs: r4,r5 */
  8. /* clobbered: r1,r18,r19,r20,r21,r25,tr0 */
  9. /* result in r0 */
  10. __sdivsi3:
  11. __sdivsi3_1:
  12. ptb __div_table,tr0
  13. gettr tr0,r20
  14. __sdivsi3_2:
  15. nsb r5, r1
  16. shlld r5, r1, r25 /* normalize; [-2 ..1, 1..2) in s2.62 */
  17. shari r25, 58, r21 /* extract 5(6) bit index (s2.4 with hole -1..1) */
  18. /* bubble */
  19. ldx.ub r20, r21, r19 /* u0.8 */
  20. shari r25, 32, r25 /* normalize to s2.30 */
  21. shlli r21, 1, r21
  22. muls.l r25, r19, r19 /* s2.38 */
  23. ldx.w r20, r21, r21 /* s2.14 */
  24. ptabs r18, tr0
  25. shari r19, 24, r19 /* truncate to s2.14 */
  26. sub r21, r19, r19 /* some 11 bit inverse in s1.14 */
  27. muls.l r19, r19, r21 /* u0.28 */
  28. sub r63, r1, r1
  29. addi r1, 92, r1
  30. muls.l r25, r21, r18 /* s2.58 */
  31. shlli r19, 45, r19 /* multiply by two and convert to s2.58 */
  32. /* bubble */
  33. sub r19, r18, r18
  34. shari r18, 28, r18 /* some 22 bit inverse in s1.30 */
  35. muls.l r18, r25, r0 /* s2.60 */
  36. muls.l r18, r4, r25 /* s32.30 */
  37. /* bubble */
  38. shari r0, 16, r19 /* s-16.44 */
  39. muls.l r19, r18, r19 /* s-16.74 */
  40. shari r25, 63, r0
  41. shari r4, 14, r18 /* s19.-14 */
  42. shari r19, 30, r19 /* s-16.44 */
  43. muls.l r19, r18, r19 /* s15.30 */
  44. xor r21, r0, r21 /* You could also use the constant 1 << 27. */
  45. add r21, r25, r21
  46. sub r21, r19, r21
  47. shard r21, r1, r21
  48. sub r21, r0, r0
  49. blink tr0, r63
  50. /* This table has been generated by divtab.c .
  51. Defects for bias -330:
  52. Max defect: 6.081536e-07 at -1.000000e+00
  53. Min defect: 2.849516e-08 at 1.030651e+00
  54. Max 2nd step defect: 9.606539e-12 at -1.000000e+00
  55. Min 2nd step defect: 0.000000e+00 at 0.000000e+00
  56. Defect at 1: 1.238659e-07
  57. Defect at -2: 1.061708e-07 */
  58. .balign 2
  59. .type __div_table,@object
  60. .size __div_table,128
  61. /* negative division constants */
  62. .word -16638
  63. .word -17135
  64. .word -17737
  65. .word -18433
  66. .word -19103
  67. .word -19751
  68. .word -20583
  69. .word -21383
  70. .word -22343
  71. .word -23353
  72. .word -24407
  73. .word -25582
  74. .word -26863
  75. .word -28382
  76. .word -29965
  77. .word -31800
  78. /* negative division factors */
  79. .byte 66
  80. .byte 70
  81. .byte 75
  82. .byte 81
  83. .byte 87
  84. .byte 93
  85. .byte 101
  86. .byte 109
  87. .byte 119
  88. .byte 130
  89. .byte 142
  90. .byte 156
  91. .byte 172
  92. .byte 192
  93. .byte 214
  94. .byte 241
  95. .skip 16
  96. .global __div_table
  97. __div_table:
  98. .skip 16
  99. /* positive division factors */
  100. .byte 241
  101. .byte 214
  102. .byte 192
  103. .byte 172
  104. .byte 156
  105. .byte 142
  106. .byte 130
  107. .byte 119
  108. .byte 109
  109. .byte 101
  110. .byte 93
  111. .byte 87
  112. .byte 81
  113. .byte 75
  114. .byte 70
  115. .byte 66
  116. /* positive division constants */
  117. .word 31801
  118. .word 29966
  119. .word 28383
  120. .word 26864
  121. .word 25583
  122. .word 24408
  123. .word 23354
  124. .word 22344
  125. .word 21384
  126. .word 20584
  127. .word 19752
  128. .word 19104
  129. .word 18434
  130. .word 17738
  131. .word 17136
  132. .word 16639