xor.c 3.8 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174
  1. // SPDX-License-Identifier: GPL-2.0-or-later
  2. /*
  3. * xor.c : Multiple Devices driver for Linux
  4. *
  5. * Copyright (C) 1996, 1997, 1998, 1999, 2000,
  6. * Ingo Molnar, Matti Aarnio, Jakub Jelinek, Richard Henderson.
  7. *
  8. * Dispatch optimized RAID-5 checksumming functions.
  9. */
  10. #define BH_TRACE 0
  11. #include <linux/module.h>
  12. #include <linux/gfp.h>
  13. #include <linux/raid/xor.h>
  14. #include <linux/jiffies.h>
  15. #include <linux/preempt.h>
  16. #include <asm/xor.h>
  17. #ifndef XOR_SELECT_TEMPLATE
  18. #define XOR_SELECT_TEMPLATE(x) (x)
  19. #endif
  20. /* The xor routines to use. */
  21. static struct xor_block_template *active_template;
  22. void
  23. xor_blocks(unsigned int src_count, unsigned int bytes, void *dest, void **srcs)
  24. {
  25. unsigned long *p1, *p2, *p3, *p4;
  26. p1 = (unsigned long *) srcs[0];
  27. if (src_count == 1) {
  28. active_template->do_2(bytes, dest, p1);
  29. return;
  30. }
  31. p2 = (unsigned long *) srcs[1];
  32. if (src_count == 2) {
  33. active_template->do_3(bytes, dest, p1, p2);
  34. return;
  35. }
  36. p3 = (unsigned long *) srcs[2];
  37. if (src_count == 3) {
  38. active_template->do_4(bytes, dest, p1, p2, p3);
  39. return;
  40. }
  41. p4 = (unsigned long *) srcs[3];
  42. active_template->do_5(bytes, dest, p1, p2, p3, p4);
  43. }
  44. EXPORT_SYMBOL(xor_blocks);
  45. /* Set of all registered templates. */
  46. static struct xor_block_template *__initdata template_list;
  47. #ifndef MODULE
  48. static void __init do_xor_register(struct xor_block_template *tmpl)
  49. {
  50. tmpl->next = template_list;
  51. template_list = tmpl;
  52. }
  53. static int __init register_xor_blocks(void)
  54. {
  55. active_template = XOR_SELECT_TEMPLATE(NULL);
  56. if (!active_template) {
  57. #define xor_speed do_xor_register
  58. // register all the templates and pick the first as the default
  59. XOR_TRY_TEMPLATES;
  60. #undef xor_speed
  61. active_template = template_list;
  62. }
  63. return 0;
  64. }
  65. #endif
  66. #define BENCH_SIZE 4096
  67. #define REPS 800U
  68. static void __init
  69. do_xor_speed(struct xor_block_template *tmpl, void *b1, void *b2)
  70. {
  71. int speed;
  72. unsigned long reps;
  73. ktime_t min, start, t0;
  74. tmpl->next = template_list;
  75. template_list = tmpl;
  76. preempt_disable();
  77. reps = 0;
  78. t0 = ktime_get();
  79. /* delay start until time has advanced */
  80. while ((start = ktime_get()) == t0)
  81. cpu_relax();
  82. do {
  83. mb(); /* prevent loop optimization */
  84. tmpl->do_2(BENCH_SIZE, b1, b2);
  85. mb();
  86. } while (reps++ < REPS || (t0 = ktime_get()) == start);
  87. min = ktime_sub(t0, start);
  88. preempt_enable();
  89. // bytes/ns == GB/s, multiply by 1000 to get MB/s [not MiB/s]
  90. speed = (1000 * reps * BENCH_SIZE) / (unsigned int)ktime_to_ns(min);
  91. tmpl->speed = speed;
  92. pr_info(" %-16s: %5d MB/sec\n", tmpl->name, speed);
  93. }
  94. static int __init
  95. calibrate_xor_blocks(void)
  96. {
  97. void *b1, *b2;
  98. struct xor_block_template *f, *fastest;
  99. fastest = XOR_SELECT_TEMPLATE(NULL);
  100. if (fastest) {
  101. printk(KERN_INFO "xor: automatically using best "
  102. "checksumming function %-10s\n",
  103. fastest->name);
  104. goto out;
  105. }
  106. b1 = (void *) __get_free_pages(GFP_KERNEL, 2);
  107. if (!b1) {
  108. printk(KERN_WARNING "xor: Yikes! No memory available.\n");
  109. return -ENOMEM;
  110. }
  111. b2 = b1 + 2*PAGE_SIZE + BENCH_SIZE;
  112. /*
  113. * If this arch/cpu has a short-circuited selection, don't loop through
  114. * all the possible functions, just test the best one
  115. */
  116. #define xor_speed(templ) do_xor_speed((templ), b1, b2)
  117. printk(KERN_INFO "xor: measuring software checksum speed\n");
  118. template_list = NULL;
  119. XOR_TRY_TEMPLATES;
  120. fastest = template_list;
  121. for (f = fastest; f; f = f->next)
  122. if (f->speed > fastest->speed)
  123. fastest = f;
  124. pr_info("xor: using function: %s (%d MB/sec)\n",
  125. fastest->name, fastest->speed);
  126. #undef xor_speed
  127. free_pages((unsigned long)b1, 2);
  128. out:
  129. active_template = fastest;
  130. return 0;
  131. }
  132. static __exit void xor_exit(void) { }
  133. MODULE_DESCRIPTION("RAID-5 checksumming functions");
  134. MODULE_LICENSE("GPL");
  135. #ifndef MODULE
  136. /* when built-in xor.o must initialize before drivers/md/md.o */
  137. core_initcall(register_xor_blocks);
  138. #endif
  139. module_init(calibrate_xor_blocks);
  140. module_exit(xor_exit);