xor_template.c 2.7 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110
  1. // SPDX-License-Identifier: GPL-2.0-or-later
  2. /*
  3. * Copyright (C) 2023 WANG Xuerui <git@xen0n.name>
  4. *
  5. * Template for XOR operations, instantiated in xor_simd.c.
  6. *
  7. * Expected preprocessor definitions:
  8. *
  9. * - LINE_WIDTH
  10. * - XOR_FUNC_NAME(nr)
  11. * - LD_INOUT_LINE(buf)
  12. * - LD_AND_XOR_LINE(buf)
  13. * - ST_LINE(buf)
  14. */
  15. void XOR_FUNC_NAME(2)(unsigned long bytes,
  16. unsigned long * __restrict v1,
  17. const unsigned long * __restrict v2)
  18. {
  19. unsigned long lines = bytes / LINE_WIDTH;
  20. do {
  21. __asm__ __volatile__ (
  22. LD_INOUT_LINE(v1)
  23. LD_AND_XOR_LINE(v2)
  24. ST_LINE(v1)
  25. : : [v1] "r"(v1), [v2] "r"(v2) : "memory"
  26. );
  27. v1 += LINE_WIDTH / sizeof(unsigned long);
  28. v2 += LINE_WIDTH / sizeof(unsigned long);
  29. } while (--lines > 0);
  30. }
  31. void XOR_FUNC_NAME(3)(unsigned long bytes,
  32. unsigned long * __restrict v1,
  33. const unsigned long * __restrict v2,
  34. const unsigned long * __restrict v3)
  35. {
  36. unsigned long lines = bytes / LINE_WIDTH;
  37. do {
  38. __asm__ __volatile__ (
  39. LD_INOUT_LINE(v1)
  40. LD_AND_XOR_LINE(v2)
  41. LD_AND_XOR_LINE(v3)
  42. ST_LINE(v1)
  43. : : [v1] "r"(v1), [v2] "r"(v2), [v3] "r"(v3) : "memory"
  44. );
  45. v1 += LINE_WIDTH / sizeof(unsigned long);
  46. v2 += LINE_WIDTH / sizeof(unsigned long);
  47. v3 += LINE_WIDTH / sizeof(unsigned long);
  48. } while (--lines > 0);
  49. }
  50. void XOR_FUNC_NAME(4)(unsigned long bytes,
  51. unsigned long * __restrict v1,
  52. const unsigned long * __restrict v2,
  53. const unsigned long * __restrict v3,
  54. const unsigned long * __restrict v4)
  55. {
  56. unsigned long lines = bytes / LINE_WIDTH;
  57. do {
  58. __asm__ __volatile__ (
  59. LD_INOUT_LINE(v1)
  60. LD_AND_XOR_LINE(v2)
  61. LD_AND_XOR_LINE(v3)
  62. LD_AND_XOR_LINE(v4)
  63. ST_LINE(v1)
  64. : : [v1] "r"(v1), [v2] "r"(v2), [v3] "r"(v3), [v4] "r"(v4)
  65. : "memory"
  66. );
  67. v1 += LINE_WIDTH / sizeof(unsigned long);
  68. v2 += LINE_WIDTH / sizeof(unsigned long);
  69. v3 += LINE_WIDTH / sizeof(unsigned long);
  70. v4 += LINE_WIDTH / sizeof(unsigned long);
  71. } while (--lines > 0);
  72. }
  73. void XOR_FUNC_NAME(5)(unsigned long bytes,
  74. unsigned long * __restrict v1,
  75. const unsigned long * __restrict v2,
  76. const unsigned long * __restrict v3,
  77. const unsigned long * __restrict v4,
  78. const unsigned long * __restrict v5)
  79. {
  80. unsigned long lines = bytes / LINE_WIDTH;
  81. do {
  82. __asm__ __volatile__ (
  83. LD_INOUT_LINE(v1)
  84. LD_AND_XOR_LINE(v2)
  85. LD_AND_XOR_LINE(v3)
  86. LD_AND_XOR_LINE(v4)
  87. LD_AND_XOR_LINE(v5)
  88. ST_LINE(v1)
  89. : : [v1] "r"(v1), [v2] "r"(v2), [v3] "r"(v3), [v4] "r"(v4),
  90. [v5] "r"(v5) : "memory"
  91. );
  92. v1 += LINE_WIDTH / sizeof(unsigned long);
  93. v2 += LINE_WIDTH / sizeof(unsigned long);
  94. v3 += LINE_WIDTH / sizeof(unsigned long);
  95. v4 += LINE_WIDTH / sizeof(unsigned long);
  96. v5 += LINE_WIDTH / sizeof(unsigned long);
  97. } while (--lines > 0);
  98. }