LLVM OpenMP* Runtime Library
 All Classes Functions Variables Typedefs Enumerations Enumerator Modules Pages
kmp_atomic.cpp
1 /*
2  * kmp_atomic.cpp -- ATOMIC implementation routines
3  */
4 
5 //===----------------------------------------------------------------------===//
6 //
7 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
8 // See https://llvm.org/LICENSE.txt for license information.
9 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
10 //
11 //===----------------------------------------------------------------------===//
12 
13 #include "kmp_atomic.h"
14 #include "kmp.h" // TRUE, asm routines prototypes
15 
16 typedef unsigned char uchar;
17 typedef unsigned short ushort;
18 
561 /*
562  * Global vars
563  */
564 
565 #ifndef KMP_GOMP_COMPAT
566 int __kmp_atomic_mode = 1; // Intel perf
567 #else
568 int __kmp_atomic_mode = 2; // GOMP compatibility
569 #endif /* KMP_GOMP_COMPAT */
570 
571 KMP_ALIGN(128)
572 
573 // Control access to all user coded atomics in Gnu compat mode
574 kmp_atomic_lock_t __kmp_atomic_lock;
575 // Control access to all user coded atomics for 1-byte fixed data types
576 kmp_atomic_lock_t __kmp_atomic_lock_1i;
577 // Control access to all user coded atomics for 2-byte fixed data types
578 kmp_atomic_lock_t __kmp_atomic_lock_2i;
579 // Control access to all user coded atomics for 4-byte fixed data types
580 kmp_atomic_lock_t __kmp_atomic_lock_4i;
581 // Control access to all user coded atomics for kmp_real32 data type
582 kmp_atomic_lock_t __kmp_atomic_lock_4r;
583 // Control access to all user coded atomics for 8-byte fixed data types
584 kmp_atomic_lock_t __kmp_atomic_lock_8i;
585 // Control access to all user coded atomics for kmp_real64 data type
586 kmp_atomic_lock_t __kmp_atomic_lock_8r;
587 // Control access to all user coded atomics for complex byte data type
588 kmp_atomic_lock_t __kmp_atomic_lock_8c;
589 // Control access to all user coded atomics for long double data type
590 kmp_atomic_lock_t __kmp_atomic_lock_10r;
591 // Control access to all user coded atomics for _Quad data type
592 kmp_atomic_lock_t __kmp_atomic_lock_16r;
593 // Control access to all user coded atomics for double complex data type
594 kmp_atomic_lock_t __kmp_atomic_lock_16c;
595 // Control access to all user coded atomics for long double complex type
596 kmp_atomic_lock_t __kmp_atomic_lock_20c;
597 // Control access to all user coded atomics for _Quad complex data type
598 kmp_atomic_lock_t __kmp_atomic_lock_32c;
599 
600 /* 2007-03-02:
601  Without "volatile" specifier in OP_CMPXCHG and MIN_MAX_CMPXCHG we have a bug
602  on *_32 and *_32e. This is just a temporary workaround for the problem. It
603  seems the right solution is writing OP_CMPXCHG and MIN_MAX_CMPXCHG routines
604  in assembler language. */
605 #define KMP_ATOMIC_VOLATILE volatile
606 
607 #if (KMP_ARCH_X86) && KMP_HAVE_QUAD
608 
609 static inline void operator+=(Quad_a4_t &lhs, Quad_a4_t &rhs) {
610  lhs.q += rhs.q;
611 }
612 static inline void operator-=(Quad_a4_t &lhs, Quad_a4_t &rhs) {
613  lhs.q -= rhs.q;
614 }
615 static inline void operator*=(Quad_a4_t &lhs, Quad_a4_t &rhs) {
616  lhs.q *= rhs.q;
617 }
618 static inline void operator/=(Quad_a4_t &lhs, Quad_a4_t &rhs) {
619  lhs.q /= rhs.q;
620 }
621 static inline bool operator<(Quad_a4_t &lhs, Quad_a4_t &rhs) {
622  return lhs.q < rhs.q;
623 }
624 static inline bool operator>(Quad_a4_t &lhs, Quad_a4_t &rhs) {
625  return lhs.q > rhs.q;
626 }
627 
628 static inline void operator+=(Quad_a16_t &lhs, Quad_a16_t &rhs) {
629  lhs.q += rhs.q;
630 }
631 static inline void operator-=(Quad_a16_t &lhs, Quad_a16_t &rhs) {
632  lhs.q -= rhs.q;
633 }
634 static inline void operator*=(Quad_a16_t &lhs, Quad_a16_t &rhs) {
635  lhs.q *= rhs.q;
636 }
637 static inline void operator/=(Quad_a16_t &lhs, Quad_a16_t &rhs) {
638  lhs.q /= rhs.q;
639 }
640 static inline bool operator<(Quad_a16_t &lhs, Quad_a16_t &rhs) {
641  return lhs.q < rhs.q;
642 }
643 static inline bool operator>(Quad_a16_t &lhs, Quad_a16_t &rhs) {
644  return lhs.q > rhs.q;
645 }
646 
647 static inline void operator+=(kmp_cmplx128_a4_t &lhs, kmp_cmplx128_a4_t &rhs) {
648  lhs.q += rhs.q;
649 }
650 static inline void operator-=(kmp_cmplx128_a4_t &lhs, kmp_cmplx128_a4_t &rhs) {
651  lhs.q -= rhs.q;
652 }
653 static inline void operator*=(kmp_cmplx128_a4_t &lhs, kmp_cmplx128_a4_t &rhs) {
654  lhs.q *= rhs.q;
655 }
656 static inline void operator/=(kmp_cmplx128_a4_t &lhs, kmp_cmplx128_a4_t &rhs) {
657  lhs.q /= rhs.q;
658 }
659 
660 static inline void operator+=(kmp_cmplx128_a16_t &lhs,
661  kmp_cmplx128_a16_t &rhs) {
662  lhs.q += rhs.q;
663 }
664 static inline void operator-=(kmp_cmplx128_a16_t &lhs,
665  kmp_cmplx128_a16_t &rhs) {
666  lhs.q -= rhs.q;
667 }
668 static inline void operator*=(kmp_cmplx128_a16_t &lhs,
669  kmp_cmplx128_a16_t &rhs) {
670  lhs.q *= rhs.q;
671 }
672 static inline void operator/=(kmp_cmplx128_a16_t &lhs,
673  kmp_cmplx128_a16_t &rhs) {
674  lhs.q /= rhs.q;
675 }
676 
677 #endif
678 
679 // ATOMIC implementation routines -----------------------------------------
680 // One routine for each operation and operand type.
681 // All routines declarations looks like
682 // void __kmpc_atomic_RTYPE_OP( ident_t*, int, TYPE *lhs, TYPE rhs );
683 
684 #define KMP_CHECK_GTID \
685  if (gtid == KMP_GTID_UNKNOWN) { \
686  gtid = __kmp_entry_gtid(); \
687  } // check and get gtid when needed
688 
689 // Beginning of a definition (provides name, parameters, gebug trace)
690 // TYPE_ID - operands type and size (fixed*, fixed*u for signed, unsigned
691 // fixed)
692 // OP_ID - operation identifier (add, sub, mul, ...)
693 // TYPE - operands' type
694 #define ATOMIC_BEGIN(TYPE_ID, OP_ID, TYPE, RET_TYPE) \
695  RET_TYPE __kmpc_atomic_##TYPE_ID##_##OP_ID(ident_t *id_ref, int gtid, \
696  TYPE *lhs, TYPE rhs) { \
697  KMP_DEBUG_ASSERT(__kmp_init_serial); \
698  KA_TRACE(100, ("__kmpc_atomic_" #TYPE_ID "_" #OP_ID ": T#%d\n", gtid));
699 
700 // ------------------------------------------------------------------------
701 // Lock variables used for critical sections for various size operands
702 #define ATOMIC_LOCK0 __kmp_atomic_lock // all types, for Gnu compat
703 #define ATOMIC_LOCK1i __kmp_atomic_lock_1i // char
704 #define ATOMIC_LOCK2i __kmp_atomic_lock_2i // short
705 #define ATOMIC_LOCK4i __kmp_atomic_lock_4i // long int
706 #define ATOMIC_LOCK4r __kmp_atomic_lock_4r // float
707 #define ATOMIC_LOCK8i __kmp_atomic_lock_8i // long long int
708 #define ATOMIC_LOCK8r __kmp_atomic_lock_8r // double
709 #define ATOMIC_LOCK8c __kmp_atomic_lock_8c // float complex
710 #define ATOMIC_LOCK10r __kmp_atomic_lock_10r // long double
711 #define ATOMIC_LOCK16r __kmp_atomic_lock_16r // _Quad
712 #define ATOMIC_LOCK16c __kmp_atomic_lock_16c // double complex
713 #define ATOMIC_LOCK20c __kmp_atomic_lock_20c // long double complex
714 #define ATOMIC_LOCK32c __kmp_atomic_lock_32c // _Quad complex
715 
716 // ------------------------------------------------------------------------
717 // Operation on *lhs, rhs bound by critical section
718 // OP - operator (it's supposed to contain an assignment)
719 // LCK_ID - lock identifier
720 // Note: don't check gtid as it should always be valid
721 // 1, 2-byte - expect valid parameter, other - check before this macro
722 #define OP_CRITICAL(OP, LCK_ID) \
723  __kmp_acquire_atomic_lock(&ATOMIC_LOCK##LCK_ID, gtid); \
724  \
725  (*lhs) OP(rhs); \
726  \
727  __kmp_release_atomic_lock(&ATOMIC_LOCK##LCK_ID, gtid);
728 
729 // ------------------------------------------------------------------------
730 // For GNU compatibility, we may need to use a critical section,
731 // even though it is not required by the ISA.
732 //
733 // On IA-32 architecture, all atomic operations except for fixed 4 byte add,
734 // sub, and bitwise logical ops, and 1 & 2 byte logical ops use a common
735 // critical section. On Intel(R) 64, all atomic operations are done with fetch
736 // and add or compare and exchange. Therefore, the FLAG parameter to this
737 // macro is either KMP_ARCH_X86 or 0 (or 1, for Intel-specific extension which
738 // require a critical section, where we predict that they will be implemented
739 // in the Gnu codegen by calling GOMP_atomic_start() / GOMP_atomic_end()).
740 //
741 // When the OP_GOMP_CRITICAL macro is used in a *CRITICAL* macro construct,
742 // the FLAG parameter should always be 1. If we know that we will be using
743 // a critical section, then we want to make certain that we use the generic
744 // lock __kmp_atomic_lock to protect the atomic update, and not of of the
745 // locks that are specialized based upon the size or type of the data.
746 //
747 // If FLAG is 0, then we are relying on dead code elimination by the build
748 // compiler to get rid of the useless block of code, and save a needless
749 // branch at runtime.
750 
751 #ifdef KMP_GOMP_COMPAT
752 #define OP_GOMP_CRITICAL(OP, FLAG) \
753  if ((FLAG) && (__kmp_atomic_mode == 2)) { \
754  KMP_CHECK_GTID; \
755  OP_CRITICAL(OP, 0); \
756  return; \
757  }
758 #else
759 #define OP_GOMP_CRITICAL(OP, FLAG)
760 #endif /* KMP_GOMP_COMPAT */
761 
762 #if KMP_MIC
763 #define KMP_DO_PAUSE _mm_delay_32(1)
764 #else
765 #define KMP_DO_PAUSE KMP_CPU_PAUSE()
766 #endif /* KMP_MIC */
767 
768 // ------------------------------------------------------------------------
769 // Operation on *lhs, rhs using "compare_and_store" routine
770 // TYPE - operands' type
771 // BITS - size in bits, used to distinguish low level calls
772 // OP - operator
773 #define OP_CMPXCHG(TYPE, BITS, OP) \
774  { \
775  TYPE old_value, new_value; \
776  old_value = *(TYPE volatile *)lhs; \
777  new_value = old_value OP rhs; \
778  while (!KMP_COMPARE_AND_STORE_ACQ##BITS( \
779  (kmp_int##BITS *)lhs, *VOLATILE_CAST(kmp_int##BITS *) & old_value, \
780  *VOLATILE_CAST(kmp_int##BITS *) & new_value)) { \
781  KMP_DO_PAUSE; \
782  \
783  old_value = *(TYPE volatile *)lhs; \
784  new_value = old_value OP rhs; \
785  } \
786  }
787 
788 #if USE_CMPXCHG_FIX
789 // 2007-06-25:
790 // workaround for C78287 (complex(kind=4) data type). lin_32, lin_32e, win_32
791 // and win_32e are affected (I verified the asm). Compiler ignores the volatile
792 // qualifier of the temp_val in the OP_CMPXCHG macro. This is a problem of the
793 // compiler. Related tracker is C76005, targeted to 11.0. I verified the asm of
794 // the workaround.
795 #define OP_CMPXCHG_WORKAROUND(TYPE, BITS, OP) \
796  { \
797  struct _sss { \
798  TYPE cmp; \
799  kmp_int##BITS *vvv; \
800  }; \
801  struct _sss old_value, new_value; \
802  old_value.vvv = (kmp_int##BITS *)&old_value.cmp; \
803  new_value.vvv = (kmp_int##BITS *)&new_value.cmp; \
804  *old_value.vvv = *(volatile kmp_int##BITS *)lhs; \
805  new_value.cmp = old_value.cmp OP rhs; \
806  while (!KMP_COMPARE_AND_STORE_ACQ##BITS( \
807  (kmp_int##BITS *)lhs, *VOLATILE_CAST(kmp_int##BITS *) old_value.vvv, \
808  *VOLATILE_CAST(kmp_int##BITS *) new_value.vvv)) { \
809  KMP_DO_PAUSE; \
810  \
811  *old_value.vvv = *(volatile kmp_int##BITS *)lhs; \
812  new_value.cmp = old_value.cmp OP rhs; \
813  } \
814  }
815 // end of the first part of the workaround for C78287
816 #endif // USE_CMPXCHG_FIX
817 
818 #if KMP_ARCH_X86 || KMP_ARCH_X86_64
819 
820 // ------------------------------------------------------------------------
821 // X86 or X86_64: no alignment problems ====================================
822 #define ATOMIC_FIXED_ADD(TYPE_ID, OP_ID, TYPE, BITS, OP, LCK_ID, MASK, \
823  GOMP_FLAG) \
824  ATOMIC_BEGIN(TYPE_ID, OP_ID, TYPE, void) \
825  OP_GOMP_CRITICAL(OP## =, GOMP_FLAG) \
826  /* OP used as a sign for subtraction: (lhs-rhs) --> (lhs+-rhs) */ \
827  KMP_TEST_THEN_ADD##BITS(lhs, OP rhs); \
828  }
829 // -------------------------------------------------------------------------
830 #define ATOMIC_CMPXCHG(TYPE_ID, OP_ID, TYPE, BITS, OP, LCK_ID, MASK, \
831  GOMP_FLAG) \
832  ATOMIC_BEGIN(TYPE_ID, OP_ID, TYPE, void) \
833  OP_GOMP_CRITICAL(OP## =, GOMP_FLAG) \
834  OP_CMPXCHG(TYPE, BITS, OP) \
835  }
836 #if USE_CMPXCHG_FIX
837 // -------------------------------------------------------------------------
838 // workaround for C78287 (complex(kind=4) data type)
839 #define ATOMIC_CMPXCHG_WORKAROUND(TYPE_ID, OP_ID, TYPE, BITS, OP, LCK_ID, \
840  MASK, GOMP_FLAG) \
841  ATOMIC_BEGIN(TYPE_ID, OP_ID, TYPE, void) \
842  OP_GOMP_CRITICAL(OP## =, GOMP_FLAG) \
843  OP_CMPXCHG_WORKAROUND(TYPE, BITS, OP) \
844  }
845 // end of the second part of the workaround for C78287
846 #endif
847 
848 #else
849 // -------------------------------------------------------------------------
850 // Code for other architectures that don't handle unaligned accesses.
851 #define ATOMIC_FIXED_ADD(TYPE_ID, OP_ID, TYPE, BITS, OP, LCK_ID, MASK, \
852  GOMP_FLAG) \
853  ATOMIC_BEGIN(TYPE_ID, OP_ID, TYPE, void) \
854  OP_GOMP_CRITICAL(OP## =, GOMP_FLAG) \
855  if (!((kmp_uintptr_t)lhs & 0x##MASK)) { \
856  /* OP used as a sign for subtraction: (lhs-rhs) --> (lhs+-rhs) */ \
857  KMP_TEST_THEN_ADD##BITS(lhs, OP rhs); \
858  } else { \
859  KMP_CHECK_GTID; \
860  OP_CRITICAL(OP## =, LCK_ID) /* unaligned address - use critical */ \
861  } \
862  }
863 // -------------------------------------------------------------------------
864 #define ATOMIC_CMPXCHG(TYPE_ID, OP_ID, TYPE, BITS, OP, LCK_ID, MASK, \
865  GOMP_FLAG) \
866  ATOMIC_BEGIN(TYPE_ID, OP_ID, TYPE, void) \
867  OP_GOMP_CRITICAL(OP## =, GOMP_FLAG) \
868  if (!((kmp_uintptr_t)lhs & 0x##MASK)) { \
869  OP_CMPXCHG(TYPE, BITS, OP) /* aligned address */ \
870  } else { \
871  KMP_CHECK_GTID; \
872  OP_CRITICAL(OP## =, LCK_ID) /* unaligned address - use critical */ \
873  } \
874  }
875 #if USE_CMPXCHG_FIX
876 // -------------------------------------------------------------------------
877 // workaround for C78287 (complex(kind=4) data type)
878 #define ATOMIC_CMPXCHG_WORKAROUND(TYPE_ID, OP_ID, TYPE, BITS, OP, LCK_ID, \
879  MASK, GOMP_FLAG) \
880  ATOMIC_BEGIN(TYPE_ID, OP_ID, TYPE, void) \
881  OP_GOMP_CRITICAL(OP## =, GOMP_FLAG) \
882  if (!((kmp_uintptr_t)lhs & 0x##MASK)) { \
883  OP_CMPXCHG(TYPE, BITS, OP) /* aligned address */ \
884  } else { \
885  KMP_CHECK_GTID; \
886  OP_CRITICAL(OP## =, LCK_ID) /* unaligned address - use critical */ \
887  } \
888  }
889 // end of the second part of the workaround for C78287
890 #endif // USE_CMPXCHG_FIX
891 #endif /* KMP_ARCH_X86 || KMP_ARCH_X86_64 */
892 
893 // Routines for ATOMIC 4-byte operands addition and subtraction
894 ATOMIC_FIXED_ADD(fixed4, add, kmp_int32, 32, +, 4i, 3,
895  0) // __kmpc_atomic_fixed4_add
896 ATOMIC_FIXED_ADD(fixed4, sub, kmp_int32, 32, -, 4i, 3,
897  0) // __kmpc_atomic_fixed4_sub
898 
899 ATOMIC_CMPXCHG(float4, add, kmp_real32, 32, +, 4r, 3,
900  KMP_ARCH_X86) // __kmpc_atomic_float4_add
901 ATOMIC_CMPXCHG(float4, sub, kmp_real32, 32, -, 4r, 3,
902  KMP_ARCH_X86) // __kmpc_atomic_float4_sub
903 
904 // Routines for ATOMIC 8-byte operands addition and subtraction
905 ATOMIC_FIXED_ADD(fixed8, add, kmp_int64, 64, +, 8i, 7,
906  KMP_ARCH_X86) // __kmpc_atomic_fixed8_add
907 ATOMIC_FIXED_ADD(fixed8, sub, kmp_int64, 64, -, 8i, 7,
908  KMP_ARCH_X86) // __kmpc_atomic_fixed8_sub
909 
910 ATOMIC_CMPXCHG(float8, add, kmp_real64, 64, +, 8r, 7,
911  KMP_ARCH_X86) // __kmpc_atomic_float8_add
912 ATOMIC_CMPXCHG(float8, sub, kmp_real64, 64, -, 8r, 7,
913  KMP_ARCH_X86) // __kmpc_atomic_float8_sub
914 
915 // ------------------------------------------------------------------------
916 // Entries definition for integer operands
917 // TYPE_ID - operands type and size (fixed4, float4)
918 // OP_ID - operation identifier (add, sub, mul, ...)
919 // TYPE - operand type
920 // BITS - size in bits, used to distinguish low level calls
921 // OP - operator (used in critical section)
922 // LCK_ID - lock identifier, used to possibly distinguish lock variable
923 // MASK - used for alignment check
924 
925 // TYPE_ID,OP_ID, TYPE, BITS,OP,LCK_ID,MASK,GOMP_FLAG
926 // ------------------------------------------------------------------------
927 // Routines for ATOMIC integer operands, other operators
928 // ------------------------------------------------------------------------
929 // TYPE_ID,OP_ID, TYPE, OP, LCK_ID, GOMP_FLAG
930 ATOMIC_CMPXCHG(fixed1, add, kmp_int8, 8, +, 1i, 0,
931  KMP_ARCH_X86) // __kmpc_atomic_fixed1_add
932 ATOMIC_CMPXCHG(fixed1, andb, kmp_int8, 8, &, 1i, 0,
933  0) // __kmpc_atomic_fixed1_andb
934 ATOMIC_CMPXCHG(fixed1, div, kmp_int8, 8, /, 1i, 0,
935  KMP_ARCH_X86) // __kmpc_atomic_fixed1_div
936 ATOMIC_CMPXCHG(fixed1u, div, kmp_uint8, 8, /, 1i, 0,
937  KMP_ARCH_X86) // __kmpc_atomic_fixed1u_div
938 ATOMIC_CMPXCHG(fixed1, mul, kmp_int8, 8, *, 1i, 0,
939  KMP_ARCH_X86) // __kmpc_atomic_fixed1_mul
940 ATOMIC_CMPXCHG(fixed1, orb, kmp_int8, 8, |, 1i, 0,
941  0) // __kmpc_atomic_fixed1_orb
942 ATOMIC_CMPXCHG(fixed1, shl, kmp_int8, 8, <<, 1i, 0,
943  KMP_ARCH_X86) // __kmpc_atomic_fixed1_shl
944 ATOMIC_CMPXCHG(fixed1, shr, kmp_int8, 8, >>, 1i, 0,
945  KMP_ARCH_X86) // __kmpc_atomic_fixed1_shr
946 ATOMIC_CMPXCHG(fixed1u, shr, kmp_uint8, 8, >>, 1i, 0,
947  KMP_ARCH_X86) // __kmpc_atomic_fixed1u_shr
948 ATOMIC_CMPXCHG(fixed1, sub, kmp_int8, 8, -, 1i, 0,
949  KMP_ARCH_X86) // __kmpc_atomic_fixed1_sub
950 ATOMIC_CMPXCHG(fixed1, xor, kmp_int8, 8, ^, 1i, 0,
951  0) // __kmpc_atomic_fixed1_xor
952 ATOMIC_CMPXCHG(fixed2, add, kmp_int16, 16, +, 2i, 1,
953  KMP_ARCH_X86) // __kmpc_atomic_fixed2_add
954 ATOMIC_CMPXCHG(fixed2, andb, kmp_int16, 16, &, 2i, 1,
955  0) // __kmpc_atomic_fixed2_andb
956 ATOMIC_CMPXCHG(fixed2, div, kmp_int16, 16, /, 2i, 1,
957  KMP_ARCH_X86) // __kmpc_atomic_fixed2_div
958 ATOMIC_CMPXCHG(fixed2u, div, kmp_uint16, 16, /, 2i, 1,
959  KMP_ARCH_X86) // __kmpc_atomic_fixed2u_div
960 ATOMIC_CMPXCHG(fixed2, mul, kmp_int16, 16, *, 2i, 1,
961  KMP_ARCH_X86) // __kmpc_atomic_fixed2_mul
962 ATOMIC_CMPXCHG(fixed2, orb, kmp_int16, 16, |, 2i, 1,
963  0) // __kmpc_atomic_fixed2_orb
964 ATOMIC_CMPXCHG(fixed2, shl, kmp_int16, 16, <<, 2i, 1,
965  KMP_ARCH_X86) // __kmpc_atomic_fixed2_shl
966 ATOMIC_CMPXCHG(fixed2, shr, kmp_int16, 16, >>, 2i, 1,
967  KMP_ARCH_X86) // __kmpc_atomic_fixed2_shr
968 ATOMIC_CMPXCHG(fixed2u, shr, kmp_uint16, 16, >>, 2i, 1,
969  KMP_ARCH_X86) // __kmpc_atomic_fixed2u_shr
970 ATOMIC_CMPXCHG(fixed2, sub, kmp_int16, 16, -, 2i, 1,
971  KMP_ARCH_X86) // __kmpc_atomic_fixed2_sub
972 ATOMIC_CMPXCHG(fixed2, xor, kmp_int16, 16, ^, 2i, 1,
973  0) // __kmpc_atomic_fixed2_xor
974 ATOMIC_CMPXCHG(fixed4, andb, kmp_int32, 32, &, 4i, 3,
975  0) // __kmpc_atomic_fixed4_andb
976 ATOMIC_CMPXCHG(fixed4, div, kmp_int32, 32, /, 4i, 3,
977  KMP_ARCH_X86) // __kmpc_atomic_fixed4_div
978 ATOMIC_CMPXCHG(fixed4u, div, kmp_uint32, 32, /, 4i, 3,
979  KMP_ARCH_X86) // __kmpc_atomic_fixed4u_div
980 ATOMIC_CMPXCHG(fixed4, mul, kmp_int32, 32, *, 4i, 3,
981  KMP_ARCH_X86) // __kmpc_atomic_fixed4_mul
982 ATOMIC_CMPXCHG(fixed4, orb, kmp_int32, 32, |, 4i, 3,
983  0) // __kmpc_atomic_fixed4_orb
984 ATOMIC_CMPXCHG(fixed4, shl, kmp_int32, 32, <<, 4i, 3,
985  KMP_ARCH_X86) // __kmpc_atomic_fixed4_shl
986 ATOMIC_CMPXCHG(fixed4, shr, kmp_int32, 32, >>, 4i, 3,
987  KMP_ARCH_X86) // __kmpc_atomic_fixed4_shr
988 ATOMIC_CMPXCHG(fixed4u, shr, kmp_uint32, 32, >>, 4i, 3,
989  KMP_ARCH_X86) // __kmpc_atomic_fixed4u_shr
990 ATOMIC_CMPXCHG(fixed4, xor, kmp_int32, 32, ^, 4i, 3,
991  0) // __kmpc_atomic_fixed4_xor
992 ATOMIC_CMPXCHG(fixed8, andb, kmp_int64, 64, &, 8i, 7,
993  KMP_ARCH_X86) // __kmpc_atomic_fixed8_andb
994 ATOMIC_CMPXCHG(fixed8, div, kmp_int64, 64, /, 8i, 7,
995  KMP_ARCH_X86) // __kmpc_atomic_fixed8_div
996 ATOMIC_CMPXCHG(fixed8u, div, kmp_uint64, 64, /, 8i, 7,
997  KMP_ARCH_X86) // __kmpc_atomic_fixed8u_div
998 ATOMIC_CMPXCHG(fixed8, mul, kmp_int64, 64, *, 8i, 7,
999  KMP_ARCH_X86) // __kmpc_atomic_fixed8_mul
1000 ATOMIC_CMPXCHG(fixed8, orb, kmp_int64, 64, |, 8i, 7,
1001  KMP_ARCH_X86) // __kmpc_atomic_fixed8_orb
1002 ATOMIC_CMPXCHG(fixed8, shl, kmp_int64, 64, <<, 8i, 7,
1003  KMP_ARCH_X86) // __kmpc_atomic_fixed8_shl
1004 ATOMIC_CMPXCHG(fixed8, shr, kmp_int64, 64, >>, 8i, 7,
1005  KMP_ARCH_X86) // __kmpc_atomic_fixed8_shr
1006 ATOMIC_CMPXCHG(fixed8u, shr, kmp_uint64, 64, >>, 8i, 7,
1007  KMP_ARCH_X86) // __kmpc_atomic_fixed8u_shr
1008 ATOMIC_CMPXCHG(fixed8, xor, kmp_int64, 64, ^, 8i, 7,
1009  KMP_ARCH_X86) // __kmpc_atomic_fixed8_xor
1010 ATOMIC_CMPXCHG(float4, div, kmp_real32, 32, /, 4r, 3,
1011  KMP_ARCH_X86) // __kmpc_atomic_float4_div
1012 ATOMIC_CMPXCHG(float4, mul, kmp_real32, 32, *, 4r, 3,
1013  KMP_ARCH_X86) // __kmpc_atomic_float4_mul
1014 ATOMIC_CMPXCHG(float8, div, kmp_real64, 64, /, 8r, 7,
1015  KMP_ARCH_X86) // __kmpc_atomic_float8_div
1016 ATOMIC_CMPXCHG(float8, mul, kmp_real64, 64, *, 8r, 7,
1017  KMP_ARCH_X86) // __kmpc_atomic_float8_mul
1018 // TYPE_ID,OP_ID, TYPE, OP, LCK_ID, GOMP_FLAG
1019 
1020 /* ------------------------------------------------------------------------ */
1021 /* Routines for C/C++ Reduction operators && and || */
1022 
1023 // ------------------------------------------------------------------------
1024 // Need separate macros for &&, || because there is no combined assignment
1025 // TODO: eliminate ATOMIC_CRIT_{L,EQV} macros as not used
1026 #define ATOMIC_CRIT_L(TYPE_ID, OP_ID, TYPE, OP, LCK_ID, GOMP_FLAG) \
1027  ATOMIC_BEGIN(TYPE_ID, OP_ID, TYPE, void) \
1028  OP_GOMP_CRITICAL(= *lhs OP, GOMP_FLAG) \
1029  OP_CRITICAL(= *lhs OP, LCK_ID) \
1030  }
1031 
1032 #if KMP_ARCH_X86 || KMP_ARCH_X86_64
1033 
1034 // ------------------------------------------------------------------------
1035 // X86 or X86_64: no alignment problems ===================================
1036 #define ATOMIC_CMPX_L(TYPE_ID, OP_ID, TYPE, BITS, OP, LCK_ID, MASK, GOMP_FLAG) \
1037  ATOMIC_BEGIN(TYPE_ID, OP_ID, TYPE, void) \
1038  OP_GOMP_CRITICAL(= *lhs OP, GOMP_FLAG) \
1039  OP_CMPXCHG(TYPE, BITS, OP) \
1040  }
1041 
1042 #else
1043 // ------------------------------------------------------------------------
1044 // Code for other architectures that don't handle unaligned accesses.
1045 #define ATOMIC_CMPX_L(TYPE_ID, OP_ID, TYPE, BITS, OP, LCK_ID, MASK, GOMP_FLAG) \
1046  ATOMIC_BEGIN(TYPE_ID, OP_ID, TYPE, void) \
1047  OP_GOMP_CRITICAL(= *lhs OP, GOMP_FLAG) \
1048  if (!((kmp_uintptr_t)lhs & 0x##MASK)) { \
1049  OP_CMPXCHG(TYPE, BITS, OP) /* aligned address */ \
1050  } else { \
1051  KMP_CHECK_GTID; \
1052  OP_CRITICAL(= *lhs OP, LCK_ID) /* unaligned - use critical */ \
1053  } \
1054  }
1055 #endif /* KMP_ARCH_X86 || KMP_ARCH_X86_64 */
1056 
1057 ATOMIC_CMPX_L(fixed1, andl, char, 8, &&, 1i, 0,
1058  KMP_ARCH_X86) // __kmpc_atomic_fixed1_andl
1059 ATOMIC_CMPX_L(fixed1, orl, char, 8, ||, 1i, 0,
1060  KMP_ARCH_X86) // __kmpc_atomic_fixed1_orl
1061 ATOMIC_CMPX_L(fixed2, andl, short, 16, &&, 2i, 1,
1062  KMP_ARCH_X86) // __kmpc_atomic_fixed2_andl
1063 ATOMIC_CMPX_L(fixed2, orl, short, 16, ||, 2i, 1,
1064  KMP_ARCH_X86) // __kmpc_atomic_fixed2_orl
1065 ATOMIC_CMPX_L(fixed4, andl, kmp_int32, 32, &&, 4i, 3,
1066  0) // __kmpc_atomic_fixed4_andl
1067 ATOMIC_CMPX_L(fixed4, orl, kmp_int32, 32, ||, 4i, 3,
1068  0) // __kmpc_atomic_fixed4_orl
1069 ATOMIC_CMPX_L(fixed8, andl, kmp_int64, 64, &&, 8i, 7,
1070  KMP_ARCH_X86) // __kmpc_atomic_fixed8_andl
1071 ATOMIC_CMPX_L(fixed8, orl, kmp_int64, 64, ||, 8i, 7,
1072  KMP_ARCH_X86) // __kmpc_atomic_fixed8_orl
1073 
1074 /* ------------------------------------------------------------------------- */
1075 /* Routines for Fortran operators that matched no one in C: */
1076 /* MAX, MIN, .EQV., .NEQV. */
1077 /* Operators .AND., .OR. are covered by __kmpc_atomic_*_{andl,orl} */
1078 /* Intrinsics IAND, IOR, IEOR are covered by __kmpc_atomic_*_{andb,orb,xor} */
1079 
1080 // -------------------------------------------------------------------------
1081 // MIN and MAX need separate macros
1082 // OP - operator to check if we need any actions?
1083 #define MIN_MAX_CRITSECT(OP, LCK_ID) \
1084  __kmp_acquire_atomic_lock(&ATOMIC_LOCK##LCK_ID, gtid); \
1085  \
1086  if (*lhs OP rhs) { /* still need actions? */ \
1087  *lhs = rhs; \
1088  } \
1089  __kmp_release_atomic_lock(&ATOMIC_LOCK##LCK_ID, gtid);
1090 
1091 // -------------------------------------------------------------------------
1092 #ifdef KMP_GOMP_COMPAT
1093 #define GOMP_MIN_MAX_CRITSECT(OP, FLAG) \
1094  if ((FLAG) && (__kmp_atomic_mode == 2)) { \
1095  KMP_CHECK_GTID; \
1096  MIN_MAX_CRITSECT(OP, 0); \
1097  return; \
1098  }
1099 #else
1100 #define GOMP_MIN_MAX_CRITSECT(OP, FLAG)
1101 #endif /* KMP_GOMP_COMPAT */
1102 
1103 // -------------------------------------------------------------------------
1104 #define MIN_MAX_CMPXCHG(TYPE, BITS, OP) \
1105  { \
1106  TYPE KMP_ATOMIC_VOLATILE temp_val; \
1107  TYPE old_value; \
1108  temp_val = *lhs; \
1109  old_value = temp_val; \
1110  while (old_value OP rhs && /* still need actions? */ \
1111  !KMP_COMPARE_AND_STORE_ACQ##BITS( \
1112  (kmp_int##BITS *)lhs, \
1113  *VOLATILE_CAST(kmp_int##BITS *) & old_value, \
1114  *VOLATILE_CAST(kmp_int##BITS *) & rhs)) { \
1115  KMP_CPU_PAUSE(); \
1116  temp_val = *lhs; \
1117  old_value = temp_val; \
1118  } \
1119  }
1120 
1121 // -------------------------------------------------------------------------
1122 // 1-byte, 2-byte operands - use critical section
1123 #define MIN_MAX_CRITICAL(TYPE_ID, OP_ID, TYPE, OP, LCK_ID, GOMP_FLAG) \
1124  ATOMIC_BEGIN(TYPE_ID, OP_ID, TYPE, void) \
1125  if (*lhs OP rhs) { /* need actions? */ \
1126  GOMP_MIN_MAX_CRITSECT(OP, GOMP_FLAG) \
1127  MIN_MAX_CRITSECT(OP, LCK_ID) \
1128  } \
1129  }
1130 
1131 #if KMP_ARCH_X86 || KMP_ARCH_X86_64
1132 
1133 // -------------------------------------------------------------------------
1134 // X86 or X86_64: no alignment problems ====================================
1135 #define MIN_MAX_COMPXCHG(TYPE_ID, OP_ID, TYPE, BITS, OP, LCK_ID, MASK, \
1136  GOMP_FLAG) \
1137  ATOMIC_BEGIN(TYPE_ID, OP_ID, TYPE, void) \
1138  if (*lhs OP rhs) { \
1139  GOMP_MIN_MAX_CRITSECT(OP, GOMP_FLAG) \
1140  MIN_MAX_CMPXCHG(TYPE, BITS, OP) \
1141  } \
1142  }
1143 
1144 #else
1145 // -------------------------------------------------------------------------
1146 // Code for other architectures that don't handle unaligned accesses.
1147 #define MIN_MAX_COMPXCHG(TYPE_ID, OP_ID, TYPE, BITS, OP, LCK_ID, MASK, \
1148  GOMP_FLAG) \
1149  ATOMIC_BEGIN(TYPE_ID, OP_ID, TYPE, void) \
1150  if (*lhs OP rhs) { \
1151  GOMP_MIN_MAX_CRITSECT(OP, GOMP_FLAG) \
1152  if (!((kmp_uintptr_t)lhs & 0x##MASK)) { \
1153  MIN_MAX_CMPXCHG(TYPE, BITS, OP) /* aligned address */ \
1154  } else { \
1155  KMP_CHECK_GTID; \
1156  MIN_MAX_CRITSECT(OP, LCK_ID) /* unaligned address */ \
1157  } \
1158  } \
1159  }
1160 #endif /* KMP_ARCH_X86 || KMP_ARCH_X86_64 */
1161 
1162 MIN_MAX_COMPXCHG(fixed1, max, char, 8, <, 1i, 0,
1163  KMP_ARCH_X86) // __kmpc_atomic_fixed1_max
1164 MIN_MAX_COMPXCHG(fixed1, min, char, 8, >, 1i, 0,
1165  KMP_ARCH_X86) // __kmpc_atomic_fixed1_min
1166 MIN_MAX_COMPXCHG(fixed2, max, short, 16, <, 2i, 1,
1167  KMP_ARCH_X86) // __kmpc_atomic_fixed2_max
1168 MIN_MAX_COMPXCHG(fixed2, min, short, 16, >, 2i, 1,
1169  KMP_ARCH_X86) // __kmpc_atomic_fixed2_min
1170 MIN_MAX_COMPXCHG(fixed4, max, kmp_int32, 32, <, 4i, 3,
1171  0) // __kmpc_atomic_fixed4_max
1172 MIN_MAX_COMPXCHG(fixed4, min, kmp_int32, 32, >, 4i, 3,
1173  0) // __kmpc_atomic_fixed4_min
1174 MIN_MAX_COMPXCHG(fixed8, max, kmp_int64, 64, <, 8i, 7,
1175  KMP_ARCH_X86) // __kmpc_atomic_fixed8_max
1176 MIN_MAX_COMPXCHG(fixed8, min, kmp_int64, 64, >, 8i, 7,
1177  KMP_ARCH_X86) // __kmpc_atomic_fixed8_min
1178 MIN_MAX_COMPXCHG(float4, max, kmp_real32, 32, <, 4r, 3,
1179  KMP_ARCH_X86) // __kmpc_atomic_float4_max
1180 MIN_MAX_COMPXCHG(float4, min, kmp_real32, 32, >, 4r, 3,
1181  KMP_ARCH_X86) // __kmpc_atomic_float4_min
1182 MIN_MAX_COMPXCHG(float8, max, kmp_real64, 64, <, 8r, 7,
1183  KMP_ARCH_X86) // __kmpc_atomic_float8_max
1184 MIN_MAX_COMPXCHG(float8, min, kmp_real64, 64, >, 8r, 7,
1185  KMP_ARCH_X86) // __kmpc_atomic_float8_min
1186 #if KMP_HAVE_QUAD
1187 MIN_MAX_CRITICAL(float16, max, QUAD_LEGACY, <, 16r,
1188  1) // __kmpc_atomic_float16_max
1189 MIN_MAX_CRITICAL(float16, min, QUAD_LEGACY, >, 16r,
1190  1) // __kmpc_atomic_float16_min
1191 #if (KMP_ARCH_X86)
1192 MIN_MAX_CRITICAL(float16, max_a16, Quad_a16_t, <, 16r,
1193  1) // __kmpc_atomic_float16_max_a16
1194 MIN_MAX_CRITICAL(float16, min_a16, Quad_a16_t, >, 16r,
1195  1) // __kmpc_atomic_float16_min_a16
1196 #endif
1197 #endif
1198 // ------------------------------------------------------------------------
1199 // Need separate macros for .EQV. because of the need of complement (~)
1200 // OP ignored for critical sections, ^=~ used instead
1201 #define ATOMIC_CRIT_EQV(TYPE_ID, OP_ID, TYPE, OP, LCK_ID, GOMP_FLAG) \
1202  ATOMIC_BEGIN(TYPE_ID, OP_ID, TYPE, void) \
1203  OP_GOMP_CRITICAL(^= ~, GOMP_FLAG) /* send assignment */ \
1204  OP_CRITICAL(^= ~, LCK_ID) /* send assignment and complement */ \
1205  }
1206 
1207 // ------------------------------------------------------------------------
1208 #if KMP_ARCH_X86 || KMP_ARCH_X86_64
1209 // ------------------------------------------------------------------------
1210 // X86 or X86_64: no alignment problems ===================================
1211 #define ATOMIC_CMPX_EQV(TYPE_ID, OP_ID, TYPE, BITS, OP, LCK_ID, MASK, \
1212  GOMP_FLAG) \
1213  ATOMIC_BEGIN(TYPE_ID, OP_ID, TYPE, void) \
1214  OP_GOMP_CRITICAL(^= ~, GOMP_FLAG) /* send assignment */ \
1215  OP_CMPXCHG(TYPE, BITS, OP) \
1216  }
1217 // ------------------------------------------------------------------------
1218 #else
1219 // ------------------------------------------------------------------------
1220 // Code for other architectures that don't handle unaligned accesses.
1221 #define ATOMIC_CMPX_EQV(TYPE_ID, OP_ID, TYPE, BITS, OP, LCK_ID, MASK, \
1222  GOMP_FLAG) \
1223  ATOMIC_BEGIN(TYPE_ID, OP_ID, TYPE, void) \
1224  OP_GOMP_CRITICAL(^= ~, GOMP_FLAG) \
1225  if (!((kmp_uintptr_t)lhs & 0x##MASK)) { \
1226  OP_CMPXCHG(TYPE, BITS, OP) /* aligned address */ \
1227  } else { \
1228  KMP_CHECK_GTID; \
1229  OP_CRITICAL(^= ~, LCK_ID) /* unaligned address - use critical */ \
1230  } \
1231  }
1232 #endif /* KMP_ARCH_X86 || KMP_ARCH_X86_64 */
1233 
1234 ATOMIC_CMPXCHG(fixed1, neqv, kmp_int8, 8, ^, 1i, 0,
1235  KMP_ARCH_X86) // __kmpc_atomic_fixed1_neqv
1236 ATOMIC_CMPXCHG(fixed2, neqv, kmp_int16, 16, ^, 2i, 1,
1237  KMP_ARCH_X86) // __kmpc_atomic_fixed2_neqv
1238 ATOMIC_CMPXCHG(fixed4, neqv, kmp_int32, 32, ^, 4i, 3,
1239  KMP_ARCH_X86) // __kmpc_atomic_fixed4_neqv
1240 ATOMIC_CMPXCHG(fixed8, neqv, kmp_int64, 64, ^, 8i, 7,
1241  KMP_ARCH_X86) // __kmpc_atomic_fixed8_neqv
1242 ATOMIC_CMPX_EQV(fixed1, eqv, kmp_int8, 8, ^~, 1i, 0,
1243  KMP_ARCH_X86) // __kmpc_atomic_fixed1_eqv
1244 ATOMIC_CMPX_EQV(fixed2, eqv, kmp_int16, 16, ^~, 2i, 1,
1245  KMP_ARCH_X86) // __kmpc_atomic_fixed2_eqv
1246 ATOMIC_CMPX_EQV(fixed4, eqv, kmp_int32, 32, ^~, 4i, 3,
1247  KMP_ARCH_X86) // __kmpc_atomic_fixed4_eqv
1248 ATOMIC_CMPX_EQV(fixed8, eqv, kmp_int64, 64, ^~, 8i, 7,
1249  KMP_ARCH_X86) // __kmpc_atomic_fixed8_eqv
1250 
1251 // ------------------------------------------------------------------------
1252 // Routines for Extended types: long double, _Quad, complex flavours (use
1253 // critical section)
1254 // TYPE_ID, OP_ID, TYPE - detailed above
1255 // OP - operator
1256 // LCK_ID - lock identifier, used to possibly distinguish lock variable
1257 #define ATOMIC_CRITICAL(TYPE_ID, OP_ID, TYPE, OP, LCK_ID, GOMP_FLAG) \
1258  ATOMIC_BEGIN(TYPE_ID, OP_ID, TYPE, void) \
1259  OP_GOMP_CRITICAL(OP## =, GOMP_FLAG) /* send assignment */ \
1260  OP_CRITICAL(OP## =, LCK_ID) /* send assignment */ \
1261  }
1262 
1263 /* ------------------------------------------------------------------------- */
1264 // routines for long double type
1265 ATOMIC_CRITICAL(float10, add, long double, +, 10r,
1266  1) // __kmpc_atomic_float10_add
1267 ATOMIC_CRITICAL(float10, sub, long double, -, 10r,
1268  1) // __kmpc_atomic_float10_sub
1269 ATOMIC_CRITICAL(float10, mul, long double, *, 10r,
1270  1) // __kmpc_atomic_float10_mul
1271 ATOMIC_CRITICAL(float10, div, long double, /, 10r,
1272  1) // __kmpc_atomic_float10_div
1273 #if KMP_HAVE_QUAD
1274 // routines for _Quad type
1275 ATOMIC_CRITICAL(float16, add, QUAD_LEGACY, +, 16r,
1276  1) // __kmpc_atomic_float16_add
1277 ATOMIC_CRITICAL(float16, sub, QUAD_LEGACY, -, 16r,
1278  1) // __kmpc_atomic_float16_sub
1279 ATOMIC_CRITICAL(float16, mul, QUAD_LEGACY, *, 16r,
1280  1) // __kmpc_atomic_float16_mul
1281 ATOMIC_CRITICAL(float16, div, QUAD_LEGACY, /, 16r,
1282  1) // __kmpc_atomic_float16_div
1283 #if (KMP_ARCH_X86)
1284 ATOMIC_CRITICAL(float16, add_a16, Quad_a16_t, +, 16r,
1285  1) // __kmpc_atomic_float16_add_a16
1286 ATOMIC_CRITICAL(float16, sub_a16, Quad_a16_t, -, 16r,
1287  1) // __kmpc_atomic_float16_sub_a16
1288 ATOMIC_CRITICAL(float16, mul_a16, Quad_a16_t, *, 16r,
1289  1) // __kmpc_atomic_float16_mul_a16
1290 ATOMIC_CRITICAL(float16, div_a16, Quad_a16_t, /, 16r,
1291  1) // __kmpc_atomic_float16_div_a16
1292 #endif
1293 #endif
1294 // routines for complex types
1295 
1296 #if USE_CMPXCHG_FIX
1297 // workaround for C78287 (complex(kind=4) data type)
1298 ATOMIC_CMPXCHG_WORKAROUND(cmplx4, add, kmp_cmplx32, 64, +, 8c, 7,
1299  1) // __kmpc_atomic_cmplx4_add
1300 ATOMIC_CMPXCHG_WORKAROUND(cmplx4, sub, kmp_cmplx32, 64, -, 8c, 7,
1301  1) // __kmpc_atomic_cmplx4_sub
1302 ATOMIC_CMPXCHG_WORKAROUND(cmplx4, mul, kmp_cmplx32, 64, *, 8c, 7,
1303  1) // __kmpc_atomic_cmplx4_mul
1304 ATOMIC_CMPXCHG_WORKAROUND(cmplx4, div, kmp_cmplx32, 64, /, 8c, 7,
1305  1) // __kmpc_atomic_cmplx4_div
1306 // end of the workaround for C78287
1307 #else
1308 ATOMIC_CRITICAL(cmplx4, add, kmp_cmplx32, +, 8c, 1) // __kmpc_atomic_cmplx4_add
1309 ATOMIC_CRITICAL(cmplx4, sub, kmp_cmplx32, -, 8c, 1) // __kmpc_atomic_cmplx4_sub
1310 ATOMIC_CRITICAL(cmplx4, mul, kmp_cmplx32, *, 8c, 1) // __kmpc_atomic_cmplx4_mul
1311 ATOMIC_CRITICAL(cmplx4, div, kmp_cmplx32, /, 8c, 1) // __kmpc_atomic_cmplx4_div
1312 #endif // USE_CMPXCHG_FIX
1313 
1314 ATOMIC_CRITICAL(cmplx8, add, kmp_cmplx64, +, 16c, 1) // __kmpc_atomic_cmplx8_add
1315 ATOMIC_CRITICAL(cmplx8, sub, kmp_cmplx64, -, 16c, 1) // __kmpc_atomic_cmplx8_sub
1316 ATOMIC_CRITICAL(cmplx8, mul, kmp_cmplx64, *, 16c, 1) // __kmpc_atomic_cmplx8_mul
1317 ATOMIC_CRITICAL(cmplx8, div, kmp_cmplx64, /, 16c, 1) // __kmpc_atomic_cmplx8_div
1318 ATOMIC_CRITICAL(cmplx10, add, kmp_cmplx80, +, 20c,
1319  1) // __kmpc_atomic_cmplx10_add
1320 ATOMIC_CRITICAL(cmplx10, sub, kmp_cmplx80, -, 20c,
1321  1) // __kmpc_atomic_cmplx10_sub
1322 ATOMIC_CRITICAL(cmplx10, mul, kmp_cmplx80, *, 20c,
1323  1) // __kmpc_atomic_cmplx10_mul
1324 ATOMIC_CRITICAL(cmplx10, div, kmp_cmplx80, /, 20c,
1325  1) // __kmpc_atomic_cmplx10_div
1326 #if KMP_HAVE_QUAD
1327 ATOMIC_CRITICAL(cmplx16, add, CPLX128_LEG, +, 32c,
1328  1) // __kmpc_atomic_cmplx16_add
1329 ATOMIC_CRITICAL(cmplx16, sub, CPLX128_LEG, -, 32c,
1330  1) // __kmpc_atomic_cmplx16_sub
1331 ATOMIC_CRITICAL(cmplx16, mul, CPLX128_LEG, *, 32c,
1332  1) // __kmpc_atomic_cmplx16_mul
1333 ATOMIC_CRITICAL(cmplx16, div, CPLX128_LEG, /, 32c,
1334  1) // __kmpc_atomic_cmplx16_div
1335 #if (KMP_ARCH_X86)
1336 ATOMIC_CRITICAL(cmplx16, add_a16, kmp_cmplx128_a16_t, +, 32c,
1337  1) // __kmpc_atomic_cmplx16_add_a16
1338 ATOMIC_CRITICAL(cmplx16, sub_a16, kmp_cmplx128_a16_t, -, 32c,
1339  1) // __kmpc_atomic_cmplx16_sub_a16
1340 ATOMIC_CRITICAL(cmplx16, mul_a16, kmp_cmplx128_a16_t, *, 32c,
1341  1) // __kmpc_atomic_cmplx16_mul_a16
1342 ATOMIC_CRITICAL(cmplx16, div_a16, kmp_cmplx128_a16_t, /, 32c,
1343  1) // __kmpc_atomic_cmplx16_div_a16
1344 #endif
1345 #endif
1346 
1347 #if OMP_40_ENABLED
1348 
1349 // OpenMP 4.0: x = expr binop x for non-commutative operations.
1350 // Supported only on IA-32 architecture and Intel(R) 64
1351 #if KMP_ARCH_X86 || KMP_ARCH_X86_64
1352 
1353 // ------------------------------------------------------------------------
1354 // Operation on *lhs, rhs bound by critical section
1355 // OP - operator (it's supposed to contain an assignment)
1356 // LCK_ID - lock identifier
1357 // Note: don't check gtid as it should always be valid
1358 // 1, 2-byte - expect valid parameter, other - check before this macro
1359 #define OP_CRITICAL_REV(OP, LCK_ID) \
1360  __kmp_acquire_atomic_lock(&ATOMIC_LOCK##LCK_ID, gtid); \
1361  \
1362  (*lhs) = (rhs)OP(*lhs); \
1363  \
1364  __kmp_release_atomic_lock(&ATOMIC_LOCK##LCK_ID, gtid);
1365 
1366 #ifdef KMP_GOMP_COMPAT
1367 #define OP_GOMP_CRITICAL_REV(OP, FLAG) \
1368  if ((FLAG) && (__kmp_atomic_mode == 2)) { \
1369  KMP_CHECK_GTID; \
1370  OP_CRITICAL_REV(OP, 0); \
1371  return; \
1372  }
1373 #else
1374 #define OP_GOMP_CRITICAL_REV(OP, FLAG)
1375 #endif /* KMP_GOMP_COMPAT */
1376 
1377 // Beginning of a definition (provides name, parameters, gebug trace)
1378 // TYPE_ID - operands type and size (fixed*, fixed*u for signed, unsigned
1379 // fixed)
1380 // OP_ID - operation identifier (add, sub, mul, ...)
1381 // TYPE - operands' type
1382 #define ATOMIC_BEGIN_REV(TYPE_ID, OP_ID, TYPE, RET_TYPE) \
1383  RET_TYPE __kmpc_atomic_##TYPE_ID##_##OP_ID##_rev(ident_t *id_ref, int gtid, \
1384  TYPE *lhs, TYPE rhs) { \
1385  KMP_DEBUG_ASSERT(__kmp_init_serial); \
1386  KA_TRACE(100, ("__kmpc_atomic_" #TYPE_ID "_" #OP_ID "_rev: T#%d\n", gtid));
1387 
1388 // ------------------------------------------------------------------------
1389 // Operation on *lhs, rhs using "compare_and_store" routine
1390 // TYPE - operands' type
1391 // BITS - size in bits, used to distinguish low level calls
1392 // OP - operator
1393 // Note: temp_val introduced in order to force the compiler to read
1394 // *lhs only once (w/o it the compiler reads *lhs twice)
1395 #define OP_CMPXCHG_REV(TYPE, BITS, OP) \
1396  { \
1397  TYPE KMP_ATOMIC_VOLATILE temp_val; \
1398  TYPE old_value, new_value; \
1399  temp_val = *lhs; \
1400  old_value = temp_val; \
1401  new_value = rhs OP old_value; \
1402  while (!KMP_COMPARE_AND_STORE_ACQ##BITS( \
1403  (kmp_int##BITS *)lhs, *VOLATILE_CAST(kmp_int##BITS *) & old_value, \
1404  *VOLATILE_CAST(kmp_int##BITS *) & new_value)) { \
1405  KMP_DO_PAUSE; \
1406  \
1407  temp_val = *lhs; \
1408  old_value = temp_val; \
1409  new_value = rhs OP old_value; \
1410  } \
1411  }
1412 
1413 // -------------------------------------------------------------------------
1414 #define ATOMIC_CMPXCHG_REV(TYPE_ID, OP_ID, TYPE, BITS, OP, LCK_ID, GOMP_FLAG) \
1415  ATOMIC_BEGIN_REV(TYPE_ID, OP_ID, TYPE, void) \
1416  OP_GOMP_CRITICAL_REV(OP, GOMP_FLAG) \
1417  OP_CMPXCHG_REV(TYPE, BITS, OP) \
1418  }
1419 
1420 // ------------------------------------------------------------------------
1421 // Entries definition for integer operands
1422 // TYPE_ID - operands type and size (fixed4, float4)
1423 // OP_ID - operation identifier (add, sub, mul, ...)
1424 // TYPE - operand type
1425 // BITS - size in bits, used to distinguish low level calls
1426 // OP - operator (used in critical section)
1427 // LCK_ID - lock identifier, used to possibly distinguish lock variable
1428 
1429 // TYPE_ID,OP_ID, TYPE, BITS,OP,LCK_ID,GOMP_FLAG
1430 // ------------------------------------------------------------------------
1431 // Routines for ATOMIC integer operands, other operators
1432 // ------------------------------------------------------------------------
1433 // TYPE_ID,OP_ID, TYPE, BITS, OP, LCK_ID, GOMP_FLAG
1434 ATOMIC_CMPXCHG_REV(fixed1, div, kmp_int8, 8, /, 1i,
1435  KMP_ARCH_X86) // __kmpc_atomic_fixed1_div_rev
1436 ATOMIC_CMPXCHG_REV(fixed1u, div, kmp_uint8, 8, /, 1i,
1437  KMP_ARCH_X86) // __kmpc_atomic_fixed1u_div_rev
1438 ATOMIC_CMPXCHG_REV(fixed1, shl, kmp_int8, 8, <<, 1i,
1439  KMP_ARCH_X86) // __kmpc_atomic_fixed1_shl_rev
1440 ATOMIC_CMPXCHG_REV(fixed1, shr, kmp_int8, 8, >>, 1i,
1441  KMP_ARCH_X86) // __kmpc_atomic_fixed1_shr_rev
1442 ATOMIC_CMPXCHG_REV(fixed1u, shr, kmp_uint8, 8, >>, 1i,
1443  KMP_ARCH_X86) // __kmpc_atomic_fixed1u_shr_rev
1444 ATOMIC_CMPXCHG_REV(fixed1, sub, kmp_int8, 8, -, 1i,
1445  KMP_ARCH_X86) // __kmpc_atomic_fixed1_sub_rev
1446 
1447 ATOMIC_CMPXCHG_REV(fixed2, div, kmp_int16, 16, /, 2i,
1448  KMP_ARCH_X86) // __kmpc_atomic_fixed2_div_rev
1449 ATOMIC_CMPXCHG_REV(fixed2u, div, kmp_uint16, 16, /, 2i,
1450  KMP_ARCH_X86) // __kmpc_atomic_fixed2u_div_rev
1451 ATOMIC_CMPXCHG_REV(fixed2, shl, kmp_int16, 16, <<, 2i,
1452  KMP_ARCH_X86) // __kmpc_atomic_fixed2_shl_rev
1453 ATOMIC_CMPXCHG_REV(fixed2, shr, kmp_int16, 16, >>, 2i,
1454  KMP_ARCH_X86) // __kmpc_atomic_fixed2_shr_rev
1455 ATOMIC_CMPXCHG_REV(fixed2u, shr, kmp_uint16, 16, >>, 2i,
1456  KMP_ARCH_X86) // __kmpc_atomic_fixed2u_shr_rev
1457 ATOMIC_CMPXCHG_REV(fixed2, sub, kmp_int16, 16, -, 2i,
1458  KMP_ARCH_X86) // __kmpc_atomic_fixed2_sub_rev
1459 
1460 ATOMIC_CMPXCHG_REV(fixed4, div, kmp_int32, 32, /, 4i,
1461  KMP_ARCH_X86) // __kmpc_atomic_fixed4_div_rev
1462 ATOMIC_CMPXCHG_REV(fixed4u, div, kmp_uint32, 32, /, 4i,
1463  KMP_ARCH_X86) // __kmpc_atomic_fixed4u_div_rev
1464 ATOMIC_CMPXCHG_REV(fixed4, shl, kmp_int32, 32, <<, 4i,
1465  KMP_ARCH_X86) // __kmpc_atomic_fixed4_shl_rev
1466 ATOMIC_CMPXCHG_REV(fixed4, shr, kmp_int32, 32, >>, 4i,
1467  KMP_ARCH_X86) // __kmpc_atomic_fixed4_shr_rev
1468 ATOMIC_CMPXCHG_REV(fixed4u, shr, kmp_uint32, 32, >>, 4i,
1469  KMP_ARCH_X86) // __kmpc_atomic_fixed4u_shr_rev
1470 ATOMIC_CMPXCHG_REV(fixed4, sub, kmp_int32, 32, -, 4i,
1471  KMP_ARCH_X86) // __kmpc_atomic_fixed4_sub_rev
1472 
1473 ATOMIC_CMPXCHG_REV(fixed8, div, kmp_int64, 64, /, 8i,
1474  KMP_ARCH_X86) // __kmpc_atomic_fixed8_div_rev
1475 ATOMIC_CMPXCHG_REV(fixed8u, div, kmp_uint64, 64, /, 8i,
1476  KMP_ARCH_X86) // __kmpc_atomic_fixed8u_div_rev
1477 ATOMIC_CMPXCHG_REV(fixed8, shl, kmp_int64, 64, <<, 8i,
1478  KMP_ARCH_X86) // __kmpc_atomic_fixed8_shl_rev
1479 ATOMIC_CMPXCHG_REV(fixed8, shr, kmp_int64, 64, >>, 8i,
1480  KMP_ARCH_X86) // __kmpc_atomic_fixed8_shr_rev
1481 ATOMIC_CMPXCHG_REV(fixed8u, shr, kmp_uint64, 64, >>, 8i,
1482  KMP_ARCH_X86) // __kmpc_atomic_fixed8u_shr_rev
1483 ATOMIC_CMPXCHG_REV(fixed8, sub, kmp_int64, 64, -, 8i,
1484  KMP_ARCH_X86) // __kmpc_atomic_fixed8_sub_rev
1485 
1486 ATOMIC_CMPXCHG_REV(float4, div, kmp_real32, 32, /, 4r,
1487  KMP_ARCH_X86) // __kmpc_atomic_float4_div_rev
1488 ATOMIC_CMPXCHG_REV(float4, sub, kmp_real32, 32, -, 4r,
1489  KMP_ARCH_X86) // __kmpc_atomic_float4_sub_rev
1490 
1491 ATOMIC_CMPXCHG_REV(float8, div, kmp_real64, 64, /, 8r,
1492  KMP_ARCH_X86) // __kmpc_atomic_float8_div_rev
1493 ATOMIC_CMPXCHG_REV(float8, sub, kmp_real64, 64, -, 8r,
1494  KMP_ARCH_X86) // __kmpc_atomic_float8_sub_rev
1495 // TYPE_ID,OP_ID, TYPE, BITS,OP,LCK_ID, GOMP_FLAG
1496 
1497 // ------------------------------------------------------------------------
1498 // Routines for Extended types: long double, _Quad, complex flavours (use
1499 // critical section)
1500 // TYPE_ID, OP_ID, TYPE - detailed above
1501 // OP - operator
1502 // LCK_ID - lock identifier, used to possibly distinguish lock variable
1503 #define ATOMIC_CRITICAL_REV(TYPE_ID, OP_ID, TYPE, OP, LCK_ID, GOMP_FLAG) \
1504  ATOMIC_BEGIN_REV(TYPE_ID, OP_ID, TYPE, void) \
1505  OP_GOMP_CRITICAL_REV(OP, GOMP_FLAG) \
1506  OP_CRITICAL_REV(OP, LCK_ID) \
1507  }
1508 
1509 /* ------------------------------------------------------------------------- */
1510 // routines for long double type
1511 ATOMIC_CRITICAL_REV(float10, sub, long double, -, 10r,
1512  1) // __kmpc_atomic_float10_sub_rev
1513 ATOMIC_CRITICAL_REV(float10, div, long double, /, 10r,
1514  1) // __kmpc_atomic_float10_div_rev
1515 #if KMP_HAVE_QUAD
1516 // routines for _Quad type
1517 ATOMIC_CRITICAL_REV(float16, sub, QUAD_LEGACY, -, 16r,
1518  1) // __kmpc_atomic_float16_sub_rev
1519 ATOMIC_CRITICAL_REV(float16, div, QUAD_LEGACY, /, 16r,
1520  1) // __kmpc_atomic_float16_div_rev
1521 #if (KMP_ARCH_X86)
1522 ATOMIC_CRITICAL_REV(float16, sub_a16, Quad_a16_t, -, 16r,
1523  1) // __kmpc_atomic_float16_sub_a16_rev
1524 ATOMIC_CRITICAL_REV(float16, div_a16, Quad_a16_t, /, 16r,
1525  1) // __kmpc_atomic_float16_div_a16_rev
1526 #endif
1527 #endif
1528 
1529 // routines for complex types
1530 ATOMIC_CRITICAL_REV(cmplx4, sub, kmp_cmplx32, -, 8c,
1531  1) // __kmpc_atomic_cmplx4_sub_rev
1532 ATOMIC_CRITICAL_REV(cmplx4, div, kmp_cmplx32, /, 8c,
1533  1) // __kmpc_atomic_cmplx4_div_rev
1534 ATOMIC_CRITICAL_REV(cmplx8, sub, kmp_cmplx64, -, 16c,
1535  1) // __kmpc_atomic_cmplx8_sub_rev
1536 ATOMIC_CRITICAL_REV(cmplx8, div, kmp_cmplx64, /, 16c,
1537  1) // __kmpc_atomic_cmplx8_div_rev
1538 ATOMIC_CRITICAL_REV(cmplx10, sub, kmp_cmplx80, -, 20c,
1539  1) // __kmpc_atomic_cmplx10_sub_rev
1540 ATOMIC_CRITICAL_REV(cmplx10, div, kmp_cmplx80, /, 20c,
1541  1) // __kmpc_atomic_cmplx10_div_rev
1542 #if KMP_HAVE_QUAD
1543 ATOMIC_CRITICAL_REV(cmplx16, sub, CPLX128_LEG, -, 32c,
1544  1) // __kmpc_atomic_cmplx16_sub_rev
1545 ATOMIC_CRITICAL_REV(cmplx16, div, CPLX128_LEG, /, 32c,
1546  1) // __kmpc_atomic_cmplx16_div_rev
1547 #if (KMP_ARCH_X86)
1548 ATOMIC_CRITICAL_REV(cmplx16, sub_a16, kmp_cmplx128_a16_t, -, 32c,
1549  1) // __kmpc_atomic_cmplx16_sub_a16_rev
1550 ATOMIC_CRITICAL_REV(cmplx16, div_a16, kmp_cmplx128_a16_t, /, 32c,
1551  1) // __kmpc_atomic_cmplx16_div_a16_rev
1552 #endif
1553 #endif
1554 
1555 #endif // KMP_ARCH_X86 || KMP_ARCH_X86_64
1556 // End of OpenMP 4.0: x = expr binop x for non-commutative operations.
1557 
1558 #endif // OMP_40_ENABLED
1559 
1560 /* ------------------------------------------------------------------------ */
1561 /* Routines for mixed types of LHS and RHS, when RHS is "larger" */
1562 /* Note: in order to reduce the total number of types combinations */
1563 /* it is supposed that compiler converts RHS to longest floating type,*/
1564 /* that is _Quad, before call to any of these routines */
1565 /* Conversion to _Quad will be done by the compiler during calculation, */
1566 /* conversion back to TYPE - before the assignment, like: */
1567 /* *lhs = (TYPE)( (_Quad)(*lhs) OP rhs ) */
1568 /* Performance penalty expected because of SW emulation use */
1569 /* ------------------------------------------------------------------------ */
1570 
1571 #define ATOMIC_BEGIN_MIX(TYPE_ID, TYPE, OP_ID, RTYPE_ID, RTYPE) \
1572  void __kmpc_atomic_##TYPE_ID##_##OP_ID##_##RTYPE_ID( \
1573  ident_t *id_ref, int gtid, TYPE *lhs, RTYPE rhs) { \
1574  KMP_DEBUG_ASSERT(__kmp_init_serial); \
1575  KA_TRACE(100, \
1576  ("__kmpc_atomic_" #TYPE_ID "_" #OP_ID "_" #RTYPE_ID ": T#%d\n", \
1577  gtid));
1578 
1579 // -------------------------------------------------------------------------
1580 #define ATOMIC_CRITICAL_FP(TYPE_ID, TYPE, OP_ID, OP, RTYPE_ID, RTYPE, LCK_ID, \
1581  GOMP_FLAG) \
1582  ATOMIC_BEGIN_MIX(TYPE_ID, TYPE, OP_ID, RTYPE_ID, RTYPE) \
1583  OP_GOMP_CRITICAL(OP## =, GOMP_FLAG) /* send assignment */ \
1584  OP_CRITICAL(OP## =, LCK_ID) /* send assignment */ \
1585  }
1586 
1587 // -------------------------------------------------------------------------
1588 #if KMP_ARCH_X86 || KMP_ARCH_X86_64
1589 // -------------------------------------------------------------------------
1590 // X86 or X86_64: no alignment problems ====================================
1591 #define ATOMIC_CMPXCHG_MIX(TYPE_ID, TYPE, OP_ID, BITS, OP, RTYPE_ID, RTYPE, \
1592  LCK_ID, MASK, GOMP_FLAG) \
1593  ATOMIC_BEGIN_MIX(TYPE_ID, TYPE, OP_ID, RTYPE_ID, RTYPE) \
1594  OP_GOMP_CRITICAL(OP## =, GOMP_FLAG) \
1595  OP_CMPXCHG(TYPE, BITS, OP) \
1596  }
1597 // -------------------------------------------------------------------------
1598 #else
1599 // ------------------------------------------------------------------------
1600 // Code for other architectures that don't handle unaligned accesses.
1601 #define ATOMIC_CMPXCHG_MIX(TYPE_ID, TYPE, OP_ID, BITS, OP, RTYPE_ID, RTYPE, \
1602  LCK_ID, MASK, GOMP_FLAG) \
1603  ATOMIC_BEGIN_MIX(TYPE_ID, TYPE, OP_ID, RTYPE_ID, RTYPE) \
1604  OP_GOMP_CRITICAL(OP## =, GOMP_FLAG) \
1605  if (!((kmp_uintptr_t)lhs & 0x##MASK)) { \
1606  OP_CMPXCHG(TYPE, BITS, OP) /* aligned address */ \
1607  } else { \
1608  KMP_CHECK_GTID; \
1609  OP_CRITICAL(OP## =, LCK_ID) /* unaligned address - use critical */ \
1610  } \
1611  }
1612 #endif /* KMP_ARCH_X86 || KMP_ARCH_X86_64 */
1613 
1614 // -------------------------------------------------------------------------
1615 #if KMP_ARCH_X86 || KMP_ARCH_X86_64
1616 // -------------------------------------------------------------------------
1617 #define ATOMIC_CMPXCHG_REV_MIX(TYPE_ID, TYPE, OP_ID, BITS, OP, RTYPE_ID, \
1618  RTYPE, LCK_ID, MASK, GOMP_FLAG) \
1619  ATOMIC_BEGIN_MIX(TYPE_ID, TYPE, OP_ID, RTYPE_ID, RTYPE) \
1620  OP_GOMP_CRITICAL_REV(OP, GOMP_FLAG) \
1621  OP_CMPXCHG_REV(TYPE, BITS, OP) \
1622  }
1623 #define ATOMIC_CRITICAL_REV_FP(TYPE_ID, TYPE, OP_ID, OP, RTYPE_ID, RTYPE, \
1624  LCK_ID, GOMP_FLAG) \
1625  ATOMIC_BEGIN_MIX(TYPE_ID, TYPE, OP_ID, RTYPE_ID, RTYPE) \
1626  OP_GOMP_CRITICAL_REV(OP, GOMP_FLAG) \
1627  OP_CRITICAL_REV(OP, LCK_ID) \
1628  }
1629 #endif /* KMP_ARCH_X86 || KMP_ARCH_X86_64 */
1630 
1631 // RHS=float8
1632 ATOMIC_CMPXCHG_MIX(fixed1, char, mul, 8, *, float8, kmp_real64, 1i, 0,
1633  KMP_ARCH_X86) // __kmpc_atomic_fixed1_mul_float8
1634 ATOMIC_CMPXCHG_MIX(fixed1, char, div, 8, /, float8, kmp_real64, 1i, 0,
1635  KMP_ARCH_X86) // __kmpc_atomic_fixed1_div_float8
1636 ATOMIC_CMPXCHG_MIX(fixed2, short, mul, 16, *, float8, kmp_real64, 2i, 1,
1637  KMP_ARCH_X86) // __kmpc_atomic_fixed2_mul_float8
1638 ATOMIC_CMPXCHG_MIX(fixed2, short, div, 16, /, float8, kmp_real64, 2i, 1,
1639  KMP_ARCH_X86) // __kmpc_atomic_fixed2_div_float8
1640 ATOMIC_CMPXCHG_MIX(fixed4, kmp_int32, mul, 32, *, float8, kmp_real64, 4i, 3,
1641  0) // __kmpc_atomic_fixed4_mul_float8
1642 ATOMIC_CMPXCHG_MIX(fixed4, kmp_int32, div, 32, /, float8, kmp_real64, 4i, 3,
1643  0) // __kmpc_atomic_fixed4_div_float8
1644 ATOMIC_CMPXCHG_MIX(fixed8, kmp_int64, mul, 64, *, float8, kmp_real64, 8i, 7,
1645  KMP_ARCH_X86) // __kmpc_atomic_fixed8_mul_float8
1646 ATOMIC_CMPXCHG_MIX(fixed8, kmp_int64, div, 64, /, float8, kmp_real64, 8i, 7,
1647  KMP_ARCH_X86) // __kmpc_atomic_fixed8_div_float8
1648 ATOMIC_CMPXCHG_MIX(float4, kmp_real32, add, 32, +, float8, kmp_real64, 4r, 3,
1649  KMP_ARCH_X86) // __kmpc_atomic_float4_add_float8
1650 ATOMIC_CMPXCHG_MIX(float4, kmp_real32, sub, 32, -, float8, kmp_real64, 4r, 3,
1651  KMP_ARCH_X86) // __kmpc_atomic_float4_sub_float8
1652 ATOMIC_CMPXCHG_MIX(float4, kmp_real32, mul, 32, *, float8, kmp_real64, 4r, 3,
1653  KMP_ARCH_X86) // __kmpc_atomic_float4_mul_float8
1654 ATOMIC_CMPXCHG_MIX(float4, kmp_real32, div, 32, /, float8, kmp_real64, 4r, 3,
1655  KMP_ARCH_X86) // __kmpc_atomic_float4_div_float8
1656 
1657 // RHS=float16 (deprecated, to be removed when we are sure the compiler does not
1658 // use them)
1659 #if KMP_HAVE_QUAD
1660 ATOMIC_CMPXCHG_MIX(fixed1, char, add, 8, +, fp, _Quad, 1i, 0,
1661  KMP_ARCH_X86) // __kmpc_atomic_fixed1_add_fp
1662 ATOMIC_CMPXCHG_MIX(fixed1u, uchar, add, 8, +, fp, _Quad, 1i, 0,
1663  KMP_ARCH_X86) // __kmpc_atomic_fixed1u_add_fp
1664 ATOMIC_CMPXCHG_MIX(fixed1, char, sub, 8, -, fp, _Quad, 1i, 0,
1665  KMP_ARCH_X86) // __kmpc_atomic_fixed1_sub_fp
1666 ATOMIC_CMPXCHG_MIX(fixed1u, uchar, sub, 8, -, fp, _Quad, 1i, 0,
1667  KMP_ARCH_X86) // __kmpc_atomic_fixed1u_sub_fp
1668 ATOMIC_CMPXCHG_MIX(fixed1, char, mul, 8, *, fp, _Quad, 1i, 0,
1669  KMP_ARCH_X86) // __kmpc_atomic_fixed1_mul_fp
1670 ATOMIC_CMPXCHG_MIX(fixed1u, uchar, mul, 8, *, fp, _Quad, 1i, 0,
1671  KMP_ARCH_X86) // __kmpc_atomic_fixed1u_mul_fp
1672 ATOMIC_CMPXCHG_MIX(fixed1, char, div, 8, /, fp, _Quad, 1i, 0,
1673  KMP_ARCH_X86) // __kmpc_atomic_fixed1_div_fp
1674 ATOMIC_CMPXCHG_MIX(fixed1u, uchar, div, 8, /, fp, _Quad, 1i, 0,
1675  KMP_ARCH_X86) // __kmpc_atomic_fixed1u_div_fp
1676 
1677 ATOMIC_CMPXCHG_MIX(fixed2, short, add, 16, +, fp, _Quad, 2i, 1,
1678  KMP_ARCH_X86) // __kmpc_atomic_fixed2_add_fp
1679 ATOMIC_CMPXCHG_MIX(fixed2u, ushort, add, 16, +, fp, _Quad, 2i, 1,
1680  KMP_ARCH_X86) // __kmpc_atomic_fixed2u_add_fp
1681 ATOMIC_CMPXCHG_MIX(fixed2, short, sub, 16, -, fp, _Quad, 2i, 1,
1682  KMP_ARCH_X86) // __kmpc_atomic_fixed2_sub_fp
1683 ATOMIC_CMPXCHG_MIX(fixed2u, ushort, sub, 16, -, fp, _Quad, 2i, 1,
1684  KMP_ARCH_X86) // __kmpc_atomic_fixed2u_sub_fp
1685 ATOMIC_CMPXCHG_MIX(fixed2, short, mul, 16, *, fp, _Quad, 2i, 1,
1686  KMP_ARCH_X86) // __kmpc_atomic_fixed2_mul_fp
1687 ATOMIC_CMPXCHG_MIX(fixed2u, ushort, mul, 16, *, fp, _Quad, 2i, 1,
1688  KMP_ARCH_X86) // __kmpc_atomic_fixed2u_mul_fp
1689 ATOMIC_CMPXCHG_MIX(fixed2, short, div, 16, /, fp, _Quad, 2i, 1,
1690  KMP_ARCH_X86) // __kmpc_atomic_fixed2_div_fp
1691 ATOMIC_CMPXCHG_MIX(fixed2u, ushort, div, 16, /, fp, _Quad, 2i, 1,
1692  KMP_ARCH_X86) // __kmpc_atomic_fixed2u_div_fp
1693 
1694 ATOMIC_CMPXCHG_MIX(fixed4, kmp_int32, add, 32, +, fp, _Quad, 4i, 3,
1695  0) // __kmpc_atomic_fixed4_add_fp
1696 ATOMIC_CMPXCHG_MIX(fixed4u, kmp_uint32, add, 32, +, fp, _Quad, 4i, 3,
1697  0) // __kmpc_atomic_fixed4u_add_fp
1698 ATOMIC_CMPXCHG_MIX(fixed4, kmp_int32, sub, 32, -, fp, _Quad, 4i, 3,
1699  0) // __kmpc_atomic_fixed4_sub_fp
1700 ATOMIC_CMPXCHG_MIX(fixed4u, kmp_uint32, sub, 32, -, fp, _Quad, 4i, 3,
1701  0) // __kmpc_atomic_fixed4u_sub_fp
1702 ATOMIC_CMPXCHG_MIX(fixed4, kmp_int32, mul, 32, *, fp, _Quad, 4i, 3,
1703  0) // __kmpc_atomic_fixed4_mul_fp
1704 ATOMIC_CMPXCHG_MIX(fixed4u, kmp_uint32, mul, 32, *, fp, _Quad, 4i, 3,
1705  0) // __kmpc_atomic_fixed4u_mul_fp
1706 ATOMIC_CMPXCHG_MIX(fixed4, kmp_int32, div, 32, /, fp, _Quad, 4i, 3,
1707  0) // __kmpc_atomic_fixed4_div_fp
1708 ATOMIC_CMPXCHG_MIX(fixed4u, kmp_uint32, div, 32, /, fp, _Quad, 4i, 3,
1709  0) // __kmpc_atomic_fixed4u_div_fp
1710 
1711 ATOMIC_CMPXCHG_MIX(fixed8, kmp_int64, add, 64, +, fp, _Quad, 8i, 7,
1712  KMP_ARCH_X86) // __kmpc_atomic_fixed8_add_fp
1713 ATOMIC_CMPXCHG_MIX(fixed8u, kmp_uint64, add, 64, +, fp, _Quad, 8i, 7,
1714  KMP_ARCH_X86) // __kmpc_atomic_fixed8u_add_fp
1715 ATOMIC_CMPXCHG_MIX(fixed8, kmp_int64, sub, 64, -, fp, _Quad, 8i, 7,
1716  KMP_ARCH_X86) // __kmpc_atomic_fixed8_sub_fp
1717 ATOMIC_CMPXCHG_MIX(fixed8u, kmp_uint64, sub, 64, -, fp, _Quad, 8i, 7,
1718  KMP_ARCH_X86) // __kmpc_atomic_fixed8u_sub_fp
1719 ATOMIC_CMPXCHG_MIX(fixed8, kmp_int64, mul, 64, *, fp, _Quad, 8i, 7,
1720  KMP_ARCH_X86) // __kmpc_atomic_fixed8_mul_fp
1721 ATOMIC_CMPXCHG_MIX(fixed8u, kmp_uint64, mul, 64, *, fp, _Quad, 8i, 7,
1722  KMP_ARCH_X86) // __kmpc_atomic_fixed8u_mul_fp
1723 ATOMIC_CMPXCHG_MIX(fixed8, kmp_int64, div, 64, /, fp, _Quad, 8i, 7,
1724  KMP_ARCH_X86) // __kmpc_atomic_fixed8_div_fp
1725 ATOMIC_CMPXCHG_MIX(fixed8u, kmp_uint64, div, 64, /, fp, _Quad, 8i, 7,
1726  KMP_ARCH_X86) // __kmpc_atomic_fixed8u_div_fp
1727 
1728 ATOMIC_CMPXCHG_MIX(float4, kmp_real32, add, 32, +, fp, _Quad, 4r, 3,
1729  KMP_ARCH_X86) // __kmpc_atomic_float4_add_fp
1730 ATOMIC_CMPXCHG_MIX(float4, kmp_real32, sub, 32, -, fp, _Quad, 4r, 3,
1731  KMP_ARCH_X86) // __kmpc_atomic_float4_sub_fp
1732 ATOMIC_CMPXCHG_MIX(float4, kmp_real32, mul, 32, *, fp, _Quad, 4r, 3,
1733  KMP_ARCH_X86) // __kmpc_atomic_float4_mul_fp
1734 ATOMIC_CMPXCHG_MIX(float4, kmp_real32, div, 32, /, fp, _Quad, 4r, 3,
1735  KMP_ARCH_X86) // __kmpc_atomic_float4_div_fp
1736 
1737 ATOMIC_CMPXCHG_MIX(float8, kmp_real64, add, 64, +, fp, _Quad, 8r, 7,
1738  KMP_ARCH_X86) // __kmpc_atomic_float8_add_fp
1739 ATOMIC_CMPXCHG_MIX(float8, kmp_real64, sub, 64, -, fp, _Quad, 8r, 7,
1740  KMP_ARCH_X86) // __kmpc_atomic_float8_sub_fp
1741 ATOMIC_CMPXCHG_MIX(float8, kmp_real64, mul, 64, *, fp, _Quad, 8r, 7,
1742  KMP_ARCH_X86) // __kmpc_atomic_float8_mul_fp
1743 ATOMIC_CMPXCHG_MIX(float8, kmp_real64, div, 64, /, fp, _Quad, 8r, 7,
1744  KMP_ARCH_X86) // __kmpc_atomic_float8_div_fp
1745 
1746 ATOMIC_CRITICAL_FP(float10, long double, add, +, fp, _Quad, 10r,
1747  1) // __kmpc_atomic_float10_add_fp
1748 ATOMIC_CRITICAL_FP(float10, long double, sub, -, fp, _Quad, 10r,
1749  1) // __kmpc_atomic_float10_sub_fp
1750 ATOMIC_CRITICAL_FP(float10, long double, mul, *, fp, _Quad, 10r,
1751  1) // __kmpc_atomic_float10_mul_fp
1752 ATOMIC_CRITICAL_FP(float10, long double, div, /, fp, _Quad, 10r,
1753  1) // __kmpc_atomic_float10_div_fp
1754 
1755 #if KMP_ARCH_X86 || KMP_ARCH_X86_64
1756 // Reverse operations
1757 ATOMIC_CMPXCHG_REV_MIX(fixed1, char, sub_rev, 8, -, fp, _Quad, 1i, 0,
1758  KMP_ARCH_X86) // __kmpc_atomic_fixed1_sub_rev_fp
1759 ATOMIC_CMPXCHG_REV_MIX(fixed1u, uchar, sub_rev, 8, -, fp, _Quad, 1i, 0,
1760  KMP_ARCH_X86) // __kmpc_atomic_fixed1u_sub_rev_fp
1761 ATOMIC_CMPXCHG_REV_MIX(fixed1, char, div_rev, 8, /, fp, _Quad, 1i, 0,
1762  KMP_ARCH_X86) // __kmpc_atomic_fixed1_div_rev_fp
1763 ATOMIC_CMPXCHG_REV_MIX(fixed1u, uchar, div_rev, 8, /, fp, _Quad, 1i, 0,
1764  KMP_ARCH_X86) // __kmpc_atomic_fixed1u_div_rev_fp
1765 
1766 ATOMIC_CMPXCHG_REV_MIX(fixed2, short, sub_rev, 16, -, fp, _Quad, 2i, 1,
1767  KMP_ARCH_X86) // __kmpc_atomic_fixed2_sub_rev_fp
1768 ATOMIC_CMPXCHG_REV_MIX(fixed2u, ushort, sub_rev, 16, -, fp, _Quad, 2i, 1,
1769  KMP_ARCH_X86) // __kmpc_atomic_fixed2u_sub_rev_fp
1770 ATOMIC_CMPXCHG_REV_MIX(fixed2, short, div_rev, 16, /, fp, _Quad, 2i, 1,
1771  KMP_ARCH_X86) // __kmpc_atomic_fixed2_div_rev_fp
1772 ATOMIC_CMPXCHG_REV_MIX(fixed2u, ushort, div_rev, 16, /, fp, _Quad, 2i, 1,
1773  KMP_ARCH_X86) // __kmpc_atomic_fixed2u_div_rev_fp
1774 
1775 ATOMIC_CMPXCHG_REV_MIX(fixed4, kmp_int32, sub_rev, 32, -, fp, _Quad, 4i, 3,
1776  0) // __kmpc_atomic_fixed4_sub_rev_fp
1777 ATOMIC_CMPXCHG_REV_MIX(fixed4u, kmp_uint32, sub_rev, 32, -, fp, _Quad, 4i, 3,
1778  0) // __kmpc_atomic_fixed4u_sub_rev_fp
1779 ATOMIC_CMPXCHG_REV_MIX(fixed4, kmp_int32, div_rev, 32, /, fp, _Quad, 4i, 3,
1780  0) // __kmpc_atomic_fixed4_div_rev_fp
1781 ATOMIC_CMPXCHG_REV_MIX(fixed4u, kmp_uint32, div_rev, 32, /, fp, _Quad, 4i, 3,
1782  0) // __kmpc_atomic_fixed4u_div_rev_fp
1783 
1784 ATOMIC_CMPXCHG_REV_MIX(fixed8, kmp_int64, sub_rev, 64, -, fp, _Quad, 8i, 7,
1785  KMP_ARCH_X86) // __kmpc_atomic_fixed8_sub_rev_fp
1786 ATOMIC_CMPXCHG_REV_MIX(fixed8u, kmp_uint64, sub_rev, 64, -, fp, _Quad, 8i, 7,
1787  KMP_ARCH_X86) // __kmpc_atomic_fixed8u_sub_rev_fp
1788 ATOMIC_CMPXCHG_REV_MIX(fixed8, kmp_int64, div_rev, 64, /, fp, _Quad, 8i, 7,
1789  KMP_ARCH_X86) // __kmpc_atomic_fixed8_div_rev_fp
1790 ATOMIC_CMPXCHG_REV_MIX(fixed8u, kmp_uint64, div_rev, 64, /, fp, _Quad, 8i, 7,
1791  KMP_ARCH_X86) // __kmpc_atomic_fixed8u_div_rev_fp
1792 
1793 ATOMIC_CMPXCHG_REV_MIX(float4, kmp_real32, sub_rev, 32, -, fp, _Quad, 4r, 3,
1794  KMP_ARCH_X86) // __kmpc_atomic_float4_sub_rev_fp
1795 ATOMIC_CMPXCHG_REV_MIX(float4, kmp_real32, div_rev, 32, /, fp, _Quad, 4r, 3,
1796  KMP_ARCH_X86) // __kmpc_atomic_float4_div_rev_fp
1797 
1798 ATOMIC_CMPXCHG_REV_MIX(float8, kmp_real64, sub_rev, 64, -, fp, _Quad, 8r, 7,
1799  KMP_ARCH_X86) // __kmpc_atomic_float8_sub_rev_fp
1800 ATOMIC_CMPXCHG_REV_MIX(float8, kmp_real64, div_rev, 64, /, fp, _Quad, 8r, 7,
1801  KMP_ARCH_X86) // __kmpc_atomic_float8_div_rev_fp
1802 
1803 ATOMIC_CRITICAL_REV_FP(float10, long double, sub_rev, -, fp, _Quad, 10r,
1804  1) // __kmpc_atomic_float10_sub_rev_fp
1805 ATOMIC_CRITICAL_REV_FP(float10, long double, div_rev, /, fp, _Quad, 10r,
1806  1) // __kmpc_atomic_float10_div_rev_fp
1807 #endif /* KMP_ARCH_X86 || KMP_ARCH_X86_64 */
1808 
1809 #endif
1810 
1811 #if KMP_ARCH_X86 || KMP_ARCH_X86_64
1812 // ------------------------------------------------------------------------
1813 // X86 or X86_64: no alignment problems ====================================
1814 #if USE_CMPXCHG_FIX
1815 // workaround for C78287 (complex(kind=4) data type)
1816 #define ATOMIC_CMPXCHG_CMPLX(TYPE_ID, TYPE, OP_ID, BITS, OP, RTYPE_ID, RTYPE, \
1817  LCK_ID, MASK, GOMP_FLAG) \
1818  ATOMIC_BEGIN_MIX(TYPE_ID, TYPE, OP_ID, RTYPE_ID, RTYPE) \
1819  OP_GOMP_CRITICAL(OP## =, GOMP_FLAG) \
1820  OP_CMPXCHG_WORKAROUND(TYPE, BITS, OP) \
1821  }
1822 // end of the second part of the workaround for C78287
1823 #else
1824 #define ATOMIC_CMPXCHG_CMPLX(TYPE_ID, TYPE, OP_ID, BITS, OP, RTYPE_ID, RTYPE, \
1825  LCK_ID, MASK, GOMP_FLAG) \
1826  ATOMIC_BEGIN_MIX(TYPE_ID, TYPE, OP_ID, RTYPE_ID, RTYPE) \
1827  OP_GOMP_CRITICAL(OP## =, GOMP_FLAG) \
1828  OP_CMPXCHG(TYPE, BITS, OP) \
1829  }
1830 #endif // USE_CMPXCHG_FIX
1831 #else
1832 // ------------------------------------------------------------------------
1833 // Code for other architectures that don't handle unaligned accesses.
1834 #define ATOMIC_CMPXCHG_CMPLX(TYPE_ID, TYPE, OP_ID, BITS, OP, RTYPE_ID, RTYPE, \
1835  LCK_ID, MASK, GOMP_FLAG) \
1836  ATOMIC_BEGIN_MIX(TYPE_ID, TYPE, OP_ID, RTYPE_ID, RTYPE) \
1837  OP_GOMP_CRITICAL(OP## =, GOMP_FLAG) \
1838  if (!((kmp_uintptr_t)lhs & 0x##MASK)) { \
1839  OP_CMPXCHG(TYPE, BITS, OP) /* aligned address */ \
1840  } else { \
1841  KMP_CHECK_GTID; \
1842  OP_CRITICAL(OP## =, LCK_ID) /* unaligned address - use critical */ \
1843  } \
1844  }
1845 #endif /* KMP_ARCH_X86 || KMP_ARCH_X86_64 */
1846 
1847 ATOMIC_CMPXCHG_CMPLX(cmplx4, kmp_cmplx32, add, 64, +, cmplx8, kmp_cmplx64, 8c,
1848  7, KMP_ARCH_X86) // __kmpc_atomic_cmplx4_add_cmplx8
1849 ATOMIC_CMPXCHG_CMPLX(cmplx4, kmp_cmplx32, sub, 64, -, cmplx8, kmp_cmplx64, 8c,
1850  7, KMP_ARCH_X86) // __kmpc_atomic_cmplx4_sub_cmplx8
1851 ATOMIC_CMPXCHG_CMPLX(cmplx4, kmp_cmplx32, mul, 64, *, cmplx8, kmp_cmplx64, 8c,
1852  7, KMP_ARCH_X86) // __kmpc_atomic_cmplx4_mul_cmplx8
1853 ATOMIC_CMPXCHG_CMPLX(cmplx4, kmp_cmplx32, div, 64, /, cmplx8, kmp_cmplx64, 8c,
1854  7, KMP_ARCH_X86) // __kmpc_atomic_cmplx4_div_cmplx8
1855 
1856 // READ, WRITE, CAPTURE are supported only on IA-32 architecture and Intel(R) 64
1857 #if KMP_ARCH_X86 || KMP_ARCH_X86_64
1858 
1859 // ------------------------------------------------------------------------
1860 // Atomic READ routines
1861 
1862 // ------------------------------------------------------------------------
1863 // Beginning of a definition (provides name, parameters, gebug trace)
1864 // TYPE_ID - operands type and size (fixed*, fixed*u for signed, unsigned
1865 // fixed)
1866 // OP_ID - operation identifier (add, sub, mul, ...)
1867 // TYPE - operands' type
1868 #define ATOMIC_BEGIN_READ(TYPE_ID, OP_ID, TYPE, RET_TYPE) \
1869  RET_TYPE __kmpc_atomic_##TYPE_ID##_##OP_ID(ident_t *id_ref, int gtid, \
1870  TYPE *loc) { \
1871  KMP_DEBUG_ASSERT(__kmp_init_serial); \
1872  KA_TRACE(100, ("__kmpc_atomic_" #TYPE_ID "_" #OP_ID ": T#%d\n", gtid));
1873 
1874 // ------------------------------------------------------------------------
1875 // Operation on *lhs, rhs using "compare_and_store_ret" routine
1876 // TYPE - operands' type
1877 // BITS - size in bits, used to distinguish low level calls
1878 // OP - operator
1879 // Note: temp_val introduced in order to force the compiler to read
1880 // *lhs only once (w/o it the compiler reads *lhs twice)
1881 // TODO: check if it is still necessary
1882 // Return old value regardless of the result of "compare & swap# operation
1883 #define OP_CMPXCHG_READ(TYPE, BITS, OP) \
1884  { \
1885  TYPE KMP_ATOMIC_VOLATILE temp_val; \
1886  union f_i_union { \
1887  TYPE f_val; \
1888  kmp_int##BITS i_val; \
1889  }; \
1890  union f_i_union old_value; \
1891  temp_val = *loc; \
1892  old_value.f_val = temp_val; \
1893  old_value.i_val = KMP_COMPARE_AND_STORE_RET##BITS( \
1894  (kmp_int##BITS *)loc, \
1895  *VOLATILE_CAST(kmp_int##BITS *) & old_value.i_val, \
1896  *VOLATILE_CAST(kmp_int##BITS *) & old_value.i_val); \
1897  new_value = old_value.f_val; \
1898  return new_value; \
1899  }
1900 
1901 // -------------------------------------------------------------------------
1902 // Operation on *lhs, rhs bound by critical section
1903 // OP - operator (it's supposed to contain an assignment)
1904 // LCK_ID - lock identifier
1905 // Note: don't check gtid as it should always be valid
1906 // 1, 2-byte - expect valid parameter, other - check before this macro
1907 #define OP_CRITICAL_READ(OP, LCK_ID) \
1908  __kmp_acquire_atomic_lock(&ATOMIC_LOCK##LCK_ID, gtid); \
1909  \
1910  new_value = (*loc); \
1911  \
1912  __kmp_release_atomic_lock(&ATOMIC_LOCK##LCK_ID, gtid);
1913 
1914 // -------------------------------------------------------------------------
1915 #ifdef KMP_GOMP_COMPAT
1916 #define OP_GOMP_CRITICAL_READ(OP, FLAG) \
1917  if ((FLAG) && (__kmp_atomic_mode == 2)) { \
1918  KMP_CHECK_GTID; \
1919  OP_CRITICAL_READ(OP, 0); \
1920  return new_value; \
1921  }
1922 #else
1923 #define OP_GOMP_CRITICAL_READ(OP, FLAG)
1924 #endif /* KMP_GOMP_COMPAT */
1925 
1926 // -------------------------------------------------------------------------
1927 #define ATOMIC_FIXED_READ(TYPE_ID, OP_ID, TYPE, BITS, OP, GOMP_FLAG) \
1928  ATOMIC_BEGIN_READ(TYPE_ID, OP_ID, TYPE, TYPE) \
1929  TYPE new_value; \
1930  OP_GOMP_CRITICAL_READ(OP## =, GOMP_FLAG) \
1931  new_value = KMP_TEST_THEN_ADD##BITS(loc, OP 0); \
1932  return new_value; \
1933  }
1934 // -------------------------------------------------------------------------
1935 #define ATOMIC_CMPXCHG_READ(TYPE_ID, OP_ID, TYPE, BITS, OP, GOMP_FLAG) \
1936  ATOMIC_BEGIN_READ(TYPE_ID, OP_ID, TYPE, TYPE) \
1937  TYPE new_value; \
1938  OP_GOMP_CRITICAL_READ(OP## =, GOMP_FLAG) \
1939  OP_CMPXCHG_READ(TYPE, BITS, OP) \
1940  }
1941 // ------------------------------------------------------------------------
1942 // Routines for Extended types: long double, _Quad, complex flavours (use
1943 // critical section)
1944 // TYPE_ID, OP_ID, TYPE - detailed above
1945 // OP - operator
1946 // LCK_ID - lock identifier, used to possibly distinguish lock variable
1947 #define ATOMIC_CRITICAL_READ(TYPE_ID, OP_ID, TYPE, OP, LCK_ID, GOMP_FLAG) \
1948  ATOMIC_BEGIN_READ(TYPE_ID, OP_ID, TYPE, TYPE) \
1949  TYPE new_value; \
1950  OP_GOMP_CRITICAL_READ(OP## =, GOMP_FLAG) /* send assignment */ \
1951  OP_CRITICAL_READ(OP, LCK_ID) /* send assignment */ \
1952  return new_value; \
1953  }
1954 
1955 // ------------------------------------------------------------------------
1956 // Fix for cmplx4 read (CQ220361) on Windows* OS. Regular routine with return
1957 // value doesn't work.
1958 // Let's return the read value through the additional parameter.
1959 #if (KMP_OS_WINDOWS)
1960 
1961 #define OP_CRITICAL_READ_WRK(OP, LCK_ID) \
1962  __kmp_acquire_atomic_lock(&ATOMIC_LOCK##LCK_ID, gtid); \
1963  \
1964  (*out) = (*loc); \
1965  \
1966  __kmp_release_atomic_lock(&ATOMIC_LOCK##LCK_ID, gtid);
1967 // ------------------------------------------------------------------------
1968 #ifdef KMP_GOMP_COMPAT
1969 #define OP_GOMP_CRITICAL_READ_WRK(OP, FLAG) \
1970  if ((FLAG) && (__kmp_atomic_mode == 2)) { \
1971  KMP_CHECK_GTID; \
1972  OP_CRITICAL_READ_WRK(OP, 0); \
1973  }
1974 #else
1975 #define OP_GOMP_CRITICAL_READ_WRK(OP, FLAG)
1976 #endif /* KMP_GOMP_COMPAT */
1977 // ------------------------------------------------------------------------
1978 #define ATOMIC_BEGIN_READ_WRK(TYPE_ID, OP_ID, TYPE) \
1979  void __kmpc_atomic_##TYPE_ID##_##OP_ID(TYPE *out, ident_t *id_ref, int gtid, \
1980  TYPE *loc) { \
1981  KMP_DEBUG_ASSERT(__kmp_init_serial); \
1982  KA_TRACE(100, ("__kmpc_atomic_" #TYPE_ID "_" #OP_ID ": T#%d\n", gtid));
1983 
1984 // ------------------------------------------------------------------------
1985 #define ATOMIC_CRITICAL_READ_WRK(TYPE_ID, OP_ID, TYPE, OP, LCK_ID, GOMP_FLAG) \
1986  ATOMIC_BEGIN_READ_WRK(TYPE_ID, OP_ID, TYPE) \
1987  OP_GOMP_CRITICAL_READ_WRK(OP## =, GOMP_FLAG) /* send assignment */ \
1988  OP_CRITICAL_READ_WRK(OP, LCK_ID) /* send assignment */ \
1989  }
1990 
1991 #endif // KMP_OS_WINDOWS
1992 
1993 // ------------------------------------------------------------------------
1994 // TYPE_ID,OP_ID, TYPE, OP, GOMP_FLAG
1995 ATOMIC_FIXED_READ(fixed4, rd, kmp_int32, 32, +, 0) // __kmpc_atomic_fixed4_rd
1996 ATOMIC_FIXED_READ(fixed8, rd, kmp_int64, 64, +,
1997  KMP_ARCH_X86) // __kmpc_atomic_fixed8_rd
1998 ATOMIC_CMPXCHG_READ(float4, rd, kmp_real32, 32, +,
1999  KMP_ARCH_X86) // __kmpc_atomic_float4_rd
2000 ATOMIC_CMPXCHG_READ(float8, rd, kmp_real64, 64, +,
2001  KMP_ARCH_X86) // __kmpc_atomic_float8_rd
2002 
2003 // !!! TODO: Remove lock operations for "char" since it can't be non-atomic
2004 ATOMIC_CMPXCHG_READ(fixed1, rd, kmp_int8, 8, +,
2005  KMP_ARCH_X86) // __kmpc_atomic_fixed1_rd
2006 ATOMIC_CMPXCHG_READ(fixed2, rd, kmp_int16, 16, +,
2007  KMP_ARCH_X86) // __kmpc_atomic_fixed2_rd
2008 
2009 ATOMIC_CRITICAL_READ(float10, rd, long double, +, 10r,
2010  1) // __kmpc_atomic_float10_rd
2011 #if KMP_HAVE_QUAD
2012 ATOMIC_CRITICAL_READ(float16, rd, QUAD_LEGACY, +, 16r,
2013  1) // __kmpc_atomic_float16_rd
2014 #endif // KMP_HAVE_QUAD
2015 
2016 // Fix for CQ220361 on Windows* OS
2017 #if (KMP_OS_WINDOWS)
2018 ATOMIC_CRITICAL_READ_WRK(cmplx4, rd, kmp_cmplx32, +, 8c,
2019  1) // __kmpc_atomic_cmplx4_rd
2020 #else
2021 ATOMIC_CRITICAL_READ(cmplx4, rd, kmp_cmplx32, +, 8c,
2022  1) // __kmpc_atomic_cmplx4_rd
2023 #endif
2024 ATOMIC_CRITICAL_READ(cmplx8, rd, kmp_cmplx64, +, 16c,
2025  1) // __kmpc_atomic_cmplx8_rd
2026 ATOMIC_CRITICAL_READ(cmplx10, rd, kmp_cmplx80, +, 20c,
2027  1) // __kmpc_atomic_cmplx10_rd
2028 #if KMP_HAVE_QUAD
2029 ATOMIC_CRITICAL_READ(cmplx16, rd, CPLX128_LEG, +, 32c,
2030  1) // __kmpc_atomic_cmplx16_rd
2031 #if (KMP_ARCH_X86)
2032 ATOMIC_CRITICAL_READ(float16, a16_rd, Quad_a16_t, +, 16r,
2033  1) // __kmpc_atomic_float16_a16_rd
2034 ATOMIC_CRITICAL_READ(cmplx16, a16_rd, kmp_cmplx128_a16_t, +, 32c,
2035  1) // __kmpc_atomic_cmplx16_a16_rd
2036 #endif
2037 #endif
2038 
2039 // ------------------------------------------------------------------------
2040 // Atomic WRITE routines
2041 
2042 #define ATOMIC_XCHG_WR(TYPE_ID, OP_ID, TYPE, BITS, OP, GOMP_FLAG) \
2043  ATOMIC_BEGIN(TYPE_ID, OP_ID, TYPE, void) \
2044  OP_GOMP_CRITICAL(OP, GOMP_FLAG) \
2045  KMP_XCHG_FIXED##BITS(lhs, rhs); \
2046  }
2047 // ------------------------------------------------------------------------
2048 #define ATOMIC_XCHG_FLOAT_WR(TYPE_ID, OP_ID, TYPE, BITS, OP, GOMP_FLAG) \
2049  ATOMIC_BEGIN(TYPE_ID, OP_ID, TYPE, void) \
2050  OP_GOMP_CRITICAL(OP, GOMP_FLAG) \
2051  KMP_XCHG_REAL##BITS(lhs, rhs); \
2052  }
2053 
2054 // ------------------------------------------------------------------------
2055 // Operation on *lhs, rhs using "compare_and_store" routine
2056 // TYPE - operands' type
2057 // BITS - size in bits, used to distinguish low level calls
2058 // OP - operator
2059 // Note: temp_val introduced in order to force the compiler to read
2060 // *lhs only once (w/o it the compiler reads *lhs twice)
2061 #define OP_CMPXCHG_WR(TYPE, BITS, OP) \
2062  { \
2063  TYPE KMP_ATOMIC_VOLATILE temp_val; \
2064  TYPE old_value, new_value; \
2065  temp_val = *lhs; \
2066  old_value = temp_val; \
2067  new_value = rhs; \
2068  while (!KMP_COMPARE_AND_STORE_ACQ##BITS( \
2069  (kmp_int##BITS *)lhs, *VOLATILE_CAST(kmp_int##BITS *) & old_value, \
2070  *VOLATILE_CAST(kmp_int##BITS *) & new_value)) { \
2071  KMP_CPU_PAUSE(); \
2072  \
2073  temp_val = *lhs; \
2074  old_value = temp_val; \
2075  new_value = rhs; \
2076  } \
2077  }
2078 
2079 // -------------------------------------------------------------------------
2080 #define ATOMIC_CMPXCHG_WR(TYPE_ID, OP_ID, TYPE, BITS, OP, GOMP_FLAG) \
2081  ATOMIC_BEGIN(TYPE_ID, OP_ID, TYPE, void) \
2082  OP_GOMP_CRITICAL(OP, GOMP_FLAG) \
2083  OP_CMPXCHG_WR(TYPE, BITS, OP) \
2084  }
2085 
2086 // ------------------------------------------------------------------------
2087 // Routines for Extended types: long double, _Quad, complex flavours (use
2088 // critical section)
2089 // TYPE_ID, OP_ID, TYPE - detailed above
2090 // OP - operator
2091 // LCK_ID - lock identifier, used to possibly distinguish lock variable
2092 #define ATOMIC_CRITICAL_WR(TYPE_ID, OP_ID, TYPE, OP, LCK_ID, GOMP_FLAG) \
2093  ATOMIC_BEGIN(TYPE_ID, OP_ID, TYPE, void) \
2094  OP_GOMP_CRITICAL(OP, GOMP_FLAG) /* send assignment */ \
2095  OP_CRITICAL(OP, LCK_ID) /* send assignment */ \
2096  }
2097 // -------------------------------------------------------------------------
2098 
2099 ATOMIC_XCHG_WR(fixed1, wr, kmp_int8, 8, =,
2100  KMP_ARCH_X86) // __kmpc_atomic_fixed1_wr
2101 ATOMIC_XCHG_WR(fixed2, wr, kmp_int16, 16, =,
2102  KMP_ARCH_X86) // __kmpc_atomic_fixed2_wr
2103 ATOMIC_XCHG_WR(fixed4, wr, kmp_int32, 32, =,
2104  KMP_ARCH_X86) // __kmpc_atomic_fixed4_wr
2105 #if (KMP_ARCH_X86)
2106 ATOMIC_CMPXCHG_WR(fixed8, wr, kmp_int64, 64, =,
2107  KMP_ARCH_X86) // __kmpc_atomic_fixed8_wr
2108 #else
2109 ATOMIC_XCHG_WR(fixed8, wr, kmp_int64, 64, =,
2110  KMP_ARCH_X86) // __kmpc_atomic_fixed8_wr
2111 #endif
2112 
2113 ATOMIC_XCHG_FLOAT_WR(float4, wr, kmp_real32, 32, =,
2114  KMP_ARCH_X86) // __kmpc_atomic_float4_wr
2115 #if (KMP_ARCH_X86)
2116 ATOMIC_CMPXCHG_WR(float8, wr, kmp_real64, 64, =,
2117  KMP_ARCH_X86) // __kmpc_atomic_float8_wr
2118 #else
2119 ATOMIC_XCHG_FLOAT_WR(float8, wr, kmp_real64, 64, =,
2120  KMP_ARCH_X86) // __kmpc_atomic_float8_wr
2121 #endif
2122 
2123 ATOMIC_CRITICAL_WR(float10, wr, long double, =, 10r,
2124  1) // __kmpc_atomic_float10_wr
2125 #if KMP_HAVE_QUAD
2126 ATOMIC_CRITICAL_WR(float16, wr, QUAD_LEGACY, =, 16r,
2127  1) // __kmpc_atomic_float16_wr
2128 #endif
2129 ATOMIC_CRITICAL_WR(cmplx4, wr, kmp_cmplx32, =, 8c, 1) // __kmpc_atomic_cmplx4_wr
2130 ATOMIC_CRITICAL_WR(cmplx8, wr, kmp_cmplx64, =, 16c,
2131  1) // __kmpc_atomic_cmplx8_wr
2132 ATOMIC_CRITICAL_WR(cmplx10, wr, kmp_cmplx80, =, 20c,
2133  1) // __kmpc_atomic_cmplx10_wr
2134 #if KMP_HAVE_QUAD
2135 ATOMIC_CRITICAL_WR(cmplx16, wr, CPLX128_LEG, =, 32c,
2136  1) // __kmpc_atomic_cmplx16_wr
2137 #if (KMP_ARCH_X86)
2138 ATOMIC_CRITICAL_WR(float16, a16_wr, Quad_a16_t, =, 16r,
2139  1) // __kmpc_atomic_float16_a16_wr
2140 ATOMIC_CRITICAL_WR(cmplx16, a16_wr, kmp_cmplx128_a16_t, =, 32c,
2141  1) // __kmpc_atomic_cmplx16_a16_wr
2142 #endif
2143 #endif
2144 
2145 // ------------------------------------------------------------------------
2146 // Atomic CAPTURE routines
2147 
2148 // Beginning of a definition (provides name, parameters, gebug trace)
2149 // TYPE_ID - operands type and size (fixed*, fixed*u for signed, unsigned
2150 // fixed)
2151 // OP_ID - operation identifier (add, sub, mul, ...)
2152 // TYPE - operands' type
2153 #define ATOMIC_BEGIN_CPT(TYPE_ID, OP_ID, TYPE, RET_TYPE) \
2154  RET_TYPE __kmpc_atomic_##TYPE_ID##_##OP_ID(ident_t *id_ref, int gtid, \
2155  TYPE *lhs, TYPE rhs, int flag) { \
2156  KMP_DEBUG_ASSERT(__kmp_init_serial); \
2157  KA_TRACE(100, ("__kmpc_atomic_" #TYPE_ID "_" #OP_ID ": T#%d\n", gtid));
2158 
2159 // -------------------------------------------------------------------------
2160 // Operation on *lhs, rhs bound by critical section
2161 // OP - operator (it's supposed to contain an assignment)
2162 // LCK_ID - lock identifier
2163 // Note: don't check gtid as it should always be valid
2164 // 1, 2-byte - expect valid parameter, other - check before this macro
2165 #define OP_CRITICAL_CPT(OP, LCK_ID) \
2166  __kmp_acquire_atomic_lock(&ATOMIC_LOCK##LCK_ID, gtid); \
2167  \
2168  if (flag) { \
2169  (*lhs) OP rhs; \
2170  new_value = (*lhs); \
2171  } else { \
2172  new_value = (*lhs); \
2173  (*lhs) OP rhs; \
2174  } \
2175  \
2176  __kmp_release_atomic_lock(&ATOMIC_LOCK##LCK_ID, gtid); \
2177  return new_value;
2178 
2179 // ------------------------------------------------------------------------
2180 #ifdef KMP_GOMP_COMPAT
2181 #define OP_GOMP_CRITICAL_CPT(OP, FLAG) \
2182  if ((FLAG) && (__kmp_atomic_mode == 2)) { \
2183  KMP_CHECK_GTID; \
2184  OP_CRITICAL_CPT(OP## =, 0); \
2185  }
2186 #else
2187 #define OP_GOMP_CRITICAL_CPT(OP, FLAG)
2188 #endif /* KMP_GOMP_COMPAT */
2189 
2190 // ------------------------------------------------------------------------
2191 // Operation on *lhs, rhs using "compare_and_store" routine
2192 // TYPE - operands' type
2193 // BITS - size in bits, used to distinguish low level calls
2194 // OP - operator
2195 // Note: temp_val introduced in order to force the compiler to read
2196 // *lhs only once (w/o it the compiler reads *lhs twice)
2197 #define OP_CMPXCHG_CPT(TYPE, BITS, OP) \
2198  { \
2199  TYPE KMP_ATOMIC_VOLATILE temp_val; \
2200  TYPE old_value, new_value; \
2201  temp_val = *lhs; \
2202  old_value = temp_val; \
2203  new_value = old_value OP rhs; \
2204  while (!KMP_COMPARE_AND_STORE_ACQ##BITS( \
2205  (kmp_int##BITS *)lhs, *VOLATILE_CAST(kmp_int##BITS *) & old_value, \
2206  *VOLATILE_CAST(kmp_int##BITS *) & new_value)) { \
2207  KMP_CPU_PAUSE(); \
2208  \
2209  temp_val = *lhs; \
2210  old_value = temp_val; \
2211  new_value = old_value OP rhs; \
2212  } \
2213  if (flag) { \
2214  return new_value; \
2215  } else \
2216  return old_value; \
2217  }
2218 
2219 // -------------------------------------------------------------------------
2220 #define ATOMIC_CMPXCHG_CPT(TYPE_ID, OP_ID, TYPE, BITS, OP, GOMP_FLAG) \
2221  ATOMIC_BEGIN_CPT(TYPE_ID, OP_ID, TYPE, TYPE) \
2222  TYPE new_value; \
2223  OP_GOMP_CRITICAL_CPT(OP, GOMP_FLAG) \
2224  OP_CMPXCHG_CPT(TYPE, BITS, OP) \
2225  }
2226 
2227 // -------------------------------------------------------------------------
2228 #define ATOMIC_FIXED_ADD_CPT(TYPE_ID, OP_ID, TYPE, BITS, OP, GOMP_FLAG) \
2229  ATOMIC_BEGIN_CPT(TYPE_ID, OP_ID, TYPE, TYPE) \
2230  TYPE old_value, new_value; \
2231  OP_GOMP_CRITICAL_CPT(OP, GOMP_FLAG) \
2232  /* OP used as a sign for subtraction: (lhs-rhs) --> (lhs+-rhs) */ \
2233  old_value = KMP_TEST_THEN_ADD##BITS(lhs, OP rhs); \
2234  if (flag) { \
2235  return old_value OP rhs; \
2236  } else \
2237  return old_value; \
2238  }
2239 // -------------------------------------------------------------------------
2240 
2241 ATOMIC_FIXED_ADD_CPT(fixed4, add_cpt, kmp_int32, 32, +,
2242  0) // __kmpc_atomic_fixed4_add_cpt
2243 ATOMIC_FIXED_ADD_CPT(fixed4, sub_cpt, kmp_int32, 32, -,
2244  0) // __kmpc_atomic_fixed4_sub_cpt
2245 ATOMIC_FIXED_ADD_CPT(fixed8, add_cpt, kmp_int64, 64, +,
2246  KMP_ARCH_X86) // __kmpc_atomic_fixed8_add_cpt
2247 ATOMIC_FIXED_ADD_CPT(fixed8, sub_cpt, kmp_int64, 64, -,
2248  KMP_ARCH_X86) // __kmpc_atomic_fixed8_sub_cpt
2249 
2250 ATOMIC_CMPXCHG_CPT(float4, add_cpt, kmp_real32, 32, +,
2251  KMP_ARCH_X86) // __kmpc_atomic_float4_add_cpt
2252 ATOMIC_CMPXCHG_CPT(float4, sub_cpt, kmp_real32, 32, -,
2253  KMP_ARCH_X86) // __kmpc_atomic_float4_sub_cpt
2254 ATOMIC_CMPXCHG_CPT(float8, add_cpt, kmp_real64, 64, +,
2255  KMP_ARCH_X86) // __kmpc_atomic_float8_add_cpt
2256 ATOMIC_CMPXCHG_CPT(float8, sub_cpt, kmp_real64, 64, -,
2257  KMP_ARCH_X86) // __kmpc_atomic_float8_sub_cpt
2258 
2259 // ------------------------------------------------------------------------
2260 // Entries definition for integer operands
2261 // TYPE_ID - operands type and size (fixed4, float4)
2262 // OP_ID - operation identifier (add, sub, mul, ...)
2263 // TYPE - operand type
2264 // BITS - size in bits, used to distinguish low level calls
2265 // OP - operator (used in critical section)
2266 // TYPE_ID,OP_ID, TYPE, BITS,OP,GOMP_FLAG
2267 // ------------------------------------------------------------------------
2268 // Routines for ATOMIC integer operands, other operators
2269 // ------------------------------------------------------------------------
2270 // TYPE_ID,OP_ID, TYPE, OP, GOMP_FLAG
2271 ATOMIC_CMPXCHG_CPT(fixed1, add_cpt, kmp_int8, 8, +,
2272  KMP_ARCH_X86) // __kmpc_atomic_fixed1_add_cpt
2273 ATOMIC_CMPXCHG_CPT(fixed1, andb_cpt, kmp_int8, 8, &,
2274  0) // __kmpc_atomic_fixed1_andb_cpt
2275 ATOMIC_CMPXCHG_CPT(fixed1, div_cpt, kmp_int8, 8, /,
2276  KMP_ARCH_X86) // __kmpc_atomic_fixed1_div_cpt
2277 ATOMIC_CMPXCHG_CPT(fixed1u, div_cpt, kmp_uint8, 8, /,
2278  KMP_ARCH_X86) // __kmpc_atomic_fixed1u_div_cpt
2279 ATOMIC_CMPXCHG_CPT(fixed1, mul_cpt, kmp_int8, 8, *,
2280  KMP_ARCH_X86) // __kmpc_atomic_fixed1_mul_cpt
2281 ATOMIC_CMPXCHG_CPT(fixed1, orb_cpt, kmp_int8, 8, |,
2282  0) // __kmpc_atomic_fixed1_orb_cpt
2283 ATOMIC_CMPXCHG_CPT(fixed1, shl_cpt, kmp_int8, 8, <<,
2284  KMP_ARCH_X86) // __kmpc_atomic_fixed1_shl_cpt
2285 ATOMIC_CMPXCHG_CPT(fixed1, shr_cpt, kmp_int8, 8, >>,
2286  KMP_ARCH_X86) // __kmpc_atomic_fixed1_shr_cpt
2287 ATOMIC_CMPXCHG_CPT(fixed1u, shr_cpt, kmp_uint8, 8, >>,
2288  KMP_ARCH_X86) // __kmpc_atomic_fixed1u_shr_cpt
2289 ATOMIC_CMPXCHG_CPT(fixed1, sub_cpt, kmp_int8, 8, -,
2290  KMP_ARCH_X86) // __kmpc_atomic_fixed1_sub_cpt
2291 ATOMIC_CMPXCHG_CPT(fixed1, xor_cpt, kmp_int8, 8, ^,
2292  0) // __kmpc_atomic_fixed1_xor_cpt
2293 ATOMIC_CMPXCHG_CPT(fixed2, add_cpt, kmp_int16, 16, +,
2294  KMP_ARCH_X86) // __kmpc_atomic_fixed2_add_cpt
2295 ATOMIC_CMPXCHG_CPT(fixed2, andb_cpt, kmp_int16, 16, &,
2296  0) // __kmpc_atomic_fixed2_andb_cpt
2297 ATOMIC_CMPXCHG_CPT(fixed2, div_cpt, kmp_int16, 16, /,
2298  KMP_ARCH_X86) // __kmpc_atomic_fixed2_div_cpt
2299 ATOMIC_CMPXCHG_CPT(fixed2u, div_cpt, kmp_uint16, 16, /,
2300  KMP_ARCH_X86) // __kmpc_atomic_fixed2u_div_cpt
2301 ATOMIC_CMPXCHG_CPT(fixed2, mul_cpt, kmp_int16, 16, *,
2302  KMP_ARCH_X86) // __kmpc_atomic_fixed2_mul_cpt
2303 ATOMIC_CMPXCHG_CPT(fixed2, orb_cpt, kmp_int16, 16, |,
2304  0) // __kmpc_atomic_fixed2_orb_cpt
2305 ATOMIC_CMPXCHG_CPT(fixed2, shl_cpt, kmp_int16, 16, <<,
2306  KMP_ARCH_X86) // __kmpc_atomic_fixed2_shl_cpt
2307 ATOMIC_CMPXCHG_CPT(fixed2, shr_cpt, kmp_int16, 16, >>,
2308  KMP_ARCH_X86) // __kmpc_atomic_fixed2_shr_cpt
2309 ATOMIC_CMPXCHG_CPT(fixed2u, shr_cpt, kmp_uint16, 16, >>,
2310  KMP_ARCH_X86) // __kmpc_atomic_fixed2u_shr_cpt
2311 ATOMIC_CMPXCHG_CPT(fixed2, sub_cpt, kmp_int16, 16, -,
2312  KMP_ARCH_X86) // __kmpc_atomic_fixed2_sub_cpt
2313 ATOMIC_CMPXCHG_CPT(fixed2, xor_cpt, kmp_int16, 16, ^,
2314  0) // __kmpc_atomic_fixed2_xor_cpt
2315 ATOMIC_CMPXCHG_CPT(fixed4, andb_cpt, kmp_int32, 32, &,
2316  0) // __kmpc_atomic_fixed4_andb_cpt
2317 ATOMIC_CMPXCHG_CPT(fixed4, div_cpt, kmp_int32, 32, /,
2318  KMP_ARCH_X86) // __kmpc_atomic_fixed4_div_cpt
2319 ATOMIC_CMPXCHG_CPT(fixed4u, div_cpt, kmp_uint32, 32, /,
2320  KMP_ARCH_X86) // __kmpc_atomic_fixed4u_div_cpt
2321 ATOMIC_CMPXCHG_CPT(fixed4, mul_cpt, kmp_int32, 32, *,
2322  KMP_ARCH_X86) // __kmpc_atomic_fixed4_mul_cpt
2323 ATOMIC_CMPXCHG_CPT(fixed4, orb_cpt, kmp_int32, 32, |,
2324  0) // __kmpc_atomic_fixed4_orb_cpt
2325 ATOMIC_CMPXCHG_CPT(fixed4, shl_cpt, kmp_int32, 32, <<,
2326  KMP_ARCH_X86) // __kmpc_atomic_fixed4_shl_cpt
2327 ATOMIC_CMPXCHG_CPT(fixed4, shr_cpt, kmp_int32, 32, >>,
2328  KMP_ARCH_X86) // __kmpc_atomic_fixed4_shr_cpt
2329 ATOMIC_CMPXCHG_CPT(fixed4u, shr_cpt, kmp_uint32, 32, >>,
2330  KMP_ARCH_X86) // __kmpc_atomic_fixed4u_shr_cpt
2331 ATOMIC_CMPXCHG_CPT(fixed4, xor_cpt, kmp_int32, 32, ^,
2332  0) // __kmpc_atomic_fixed4_xor_cpt
2333 ATOMIC_CMPXCHG_CPT(fixed8, andb_cpt, kmp_int64, 64, &,
2334  KMP_ARCH_X86) // __kmpc_atomic_fixed8_andb_cpt
2335 ATOMIC_CMPXCHG_CPT(fixed8, div_cpt, kmp_int64, 64, /,
2336  KMP_ARCH_X86) // __kmpc_atomic_fixed8_div_cpt
2337 ATOMIC_CMPXCHG_CPT(fixed8u, div_cpt, kmp_uint64, 64, /,
2338  KMP_ARCH_X86) // __kmpc_atomic_fixed8u_div_cpt
2339 ATOMIC_CMPXCHG_CPT(fixed8, mul_cpt, kmp_int64, 64, *,
2340  KMP_ARCH_X86) // __kmpc_atomic_fixed8_mul_cpt
2341 ATOMIC_CMPXCHG_CPT(fixed8, orb_cpt, kmp_int64, 64, |,
2342  KMP_ARCH_X86) // __kmpc_atomic_fixed8_orb_cpt
2343 ATOMIC_CMPXCHG_CPT(fixed8, shl_cpt, kmp_int64, 64, <<,
2344  KMP_ARCH_X86) // __kmpc_atomic_fixed8_shl_cpt
2345 ATOMIC_CMPXCHG_CPT(fixed8, shr_cpt, kmp_int64, 64, >>,
2346  KMP_ARCH_X86) // __kmpc_atomic_fixed8_shr_cpt
2347 ATOMIC_CMPXCHG_CPT(fixed8u, shr_cpt, kmp_uint64, 64, >>,
2348  KMP_ARCH_X86) // __kmpc_atomic_fixed8u_shr_cpt
2349 ATOMIC_CMPXCHG_CPT(fixed8, xor_cpt, kmp_int64, 64, ^,
2350  KMP_ARCH_X86) // __kmpc_atomic_fixed8_xor_cpt
2351 ATOMIC_CMPXCHG_CPT(float4, div_cpt, kmp_real32, 32, /,
2352  KMP_ARCH_X86) // __kmpc_atomic_float4_div_cpt
2353 ATOMIC_CMPXCHG_CPT(float4, mul_cpt, kmp_real32, 32, *,
2354  KMP_ARCH_X86) // __kmpc_atomic_float4_mul_cpt
2355 ATOMIC_CMPXCHG_CPT(float8, div_cpt, kmp_real64, 64, /,
2356  KMP_ARCH_X86) // __kmpc_atomic_float8_div_cpt
2357 ATOMIC_CMPXCHG_CPT(float8, mul_cpt, kmp_real64, 64, *,
2358  KMP_ARCH_X86) // __kmpc_atomic_float8_mul_cpt
2359 // TYPE_ID,OP_ID, TYPE, OP, GOMP_FLAG
2360 
2361 // CAPTURE routines for mixed types RHS=float16
2362 #if KMP_HAVE_QUAD
2363 
2364 // Beginning of a definition (provides name, parameters, gebug trace)
2365 // TYPE_ID - operands type and size (fixed*, fixed*u for signed, unsigned
2366 // fixed)
2367 // OP_ID - operation identifier (add, sub, mul, ...)
2368 // TYPE - operands' type
2369 #define ATOMIC_BEGIN_CPT_MIX(TYPE_ID, OP_ID, TYPE, RTYPE_ID, RTYPE) \
2370  TYPE __kmpc_atomic_##TYPE_ID##_##OP_ID##_##RTYPE_ID( \
2371  ident_t *id_ref, int gtid, TYPE *lhs, RTYPE rhs, int flag) { \
2372  KMP_DEBUG_ASSERT(__kmp_init_serial); \
2373  KA_TRACE(100, \
2374  ("__kmpc_atomic_" #TYPE_ID "_" #OP_ID "_" #RTYPE_ID ": T#%d\n", \
2375  gtid));
2376 
2377 // -------------------------------------------------------------------------
2378 #define ATOMIC_CMPXCHG_CPT_MIX(TYPE_ID, TYPE, OP_ID, BITS, OP, RTYPE_ID, \
2379  RTYPE, LCK_ID, MASK, GOMP_FLAG) \
2380  ATOMIC_BEGIN_CPT_MIX(TYPE_ID, OP_ID, TYPE, RTYPE_ID, RTYPE) \
2381  TYPE new_value; \
2382  OP_GOMP_CRITICAL_CPT(OP, GOMP_FLAG) \
2383  OP_CMPXCHG_CPT(TYPE, BITS, OP) \
2384  }
2385 
2386 // -------------------------------------------------------------------------
2387 #define ATOMIC_CRITICAL_CPT_MIX(TYPE_ID, TYPE, OP_ID, OP, RTYPE_ID, RTYPE, \
2388  LCK_ID, GOMP_FLAG) \
2389  ATOMIC_BEGIN_CPT_MIX(TYPE_ID, OP_ID, TYPE, RTYPE_ID, RTYPE) \
2390  TYPE new_value; \
2391  OP_GOMP_CRITICAL_CPT(OP, GOMP_FLAG) /* send assignment */ \
2392  OP_CRITICAL_CPT(OP## =, LCK_ID) /* send assignment */ \
2393  }
2394 
2395 ATOMIC_CMPXCHG_CPT_MIX(fixed1, char, add_cpt, 8, +, fp, _Quad, 1i, 0,
2396  KMP_ARCH_X86) // __kmpc_atomic_fixed1_add_cpt_fp
2397 ATOMIC_CMPXCHG_CPT_MIX(fixed1u, uchar, add_cpt, 8, +, fp, _Quad, 1i, 0,
2398  KMP_ARCH_X86) // __kmpc_atomic_fixed1u_add_cpt_fp
2399 ATOMIC_CMPXCHG_CPT_MIX(fixed1, char, sub_cpt, 8, -, fp, _Quad, 1i, 0,
2400  KMP_ARCH_X86) // __kmpc_atomic_fixed1_sub_cpt_fp
2401 ATOMIC_CMPXCHG_CPT_MIX(fixed1u, uchar, sub_cpt, 8, -, fp, _Quad, 1i, 0,
2402  KMP_ARCH_X86) // __kmpc_atomic_fixed1u_sub_cpt_fp
2403 ATOMIC_CMPXCHG_CPT_MIX(fixed1, char, mul_cpt, 8, *, fp, _Quad, 1i, 0,
2404  KMP_ARCH_X86) // __kmpc_atomic_fixed1_mul_cpt_fp
2405 ATOMIC_CMPXCHG_CPT_MIX(fixed1u, uchar, mul_cpt, 8, *, fp, _Quad, 1i, 0,
2406  KMP_ARCH_X86) // __kmpc_atomic_fixed1u_mul_cpt_fp
2407 ATOMIC_CMPXCHG_CPT_MIX(fixed1, char, div_cpt, 8, /, fp, _Quad, 1i, 0,
2408  KMP_ARCH_X86) // __kmpc_atomic_fixed1_div_cpt_fp
2409 ATOMIC_CMPXCHG_CPT_MIX(fixed1u, uchar, div_cpt, 8, /, fp, _Quad, 1i, 0,
2410  KMP_ARCH_X86) // __kmpc_atomic_fixed1u_div_cpt_fp
2411 
2412 ATOMIC_CMPXCHG_CPT_MIX(fixed2, short, add_cpt, 16, +, fp, _Quad, 2i, 1,
2413  KMP_ARCH_X86) // __kmpc_atomic_fixed2_add_cpt_fp
2414 ATOMIC_CMPXCHG_CPT_MIX(fixed2u, ushort, add_cpt, 16, +, fp, _Quad, 2i, 1,
2415  KMP_ARCH_X86) // __kmpc_atomic_fixed2u_add_cpt_fp
2416 ATOMIC_CMPXCHG_CPT_MIX(fixed2, short, sub_cpt, 16, -, fp, _Quad, 2i, 1,
2417  KMP_ARCH_X86) // __kmpc_atomic_fixed2_sub_cpt_fp
2418 ATOMIC_CMPXCHG_CPT_MIX(fixed2u, ushort, sub_cpt, 16, -, fp, _Quad, 2i, 1,
2419  KMP_ARCH_X86) // __kmpc_atomic_fixed2u_sub_cpt_fp
2420 ATOMIC_CMPXCHG_CPT_MIX(fixed2, short, mul_cpt, 16, *, fp, _Quad, 2i, 1,
2421  KMP_ARCH_X86) // __kmpc_atomic_fixed2_mul_cpt_fp
2422 ATOMIC_CMPXCHG_CPT_MIX(fixed2u, ushort, mul_cpt, 16, *, fp, _Quad, 2i, 1,
2423  KMP_ARCH_X86) // __kmpc_atomic_fixed2u_mul_cpt_fp
2424 ATOMIC_CMPXCHG_CPT_MIX(fixed2, short, div_cpt, 16, /, fp, _Quad, 2i, 1,
2425  KMP_ARCH_X86) // __kmpc_atomic_fixed2_div_cpt_fp
2426 ATOMIC_CMPXCHG_CPT_MIX(fixed2u, ushort, div_cpt, 16, /, fp, _Quad, 2i, 1,
2427  KMP_ARCH_X86) // __kmpc_atomic_fixed2u_div_cpt_fp
2428 
2429 ATOMIC_CMPXCHG_CPT_MIX(fixed4, kmp_int32, add_cpt, 32, +, fp, _Quad, 4i, 3,
2430  0) // __kmpc_atomic_fixed4_add_cpt_fp
2431 ATOMIC_CMPXCHG_CPT_MIX(fixed4u, kmp_uint32, add_cpt, 32, +, fp, _Quad, 4i, 3,
2432  0) // __kmpc_atomic_fixed4u_add_cpt_fp
2433 ATOMIC_CMPXCHG_CPT_MIX(fixed4, kmp_int32, sub_cpt, 32, -, fp, _Quad, 4i, 3,
2434  0) // __kmpc_atomic_fixed4_sub_cpt_fp
2435 ATOMIC_CMPXCHG_CPT_MIX(fixed4u, kmp_uint32, sub_cpt, 32, -, fp, _Quad, 4i, 3,
2436  0) // __kmpc_atomic_fixed4u_sub_cpt_fp
2437 ATOMIC_CMPXCHG_CPT_MIX(fixed4, kmp_int32, mul_cpt, 32, *, fp, _Quad, 4i, 3,
2438  0) // __kmpc_atomic_fixed4_mul_cpt_fp
2439 ATOMIC_CMPXCHG_CPT_MIX(fixed4u, kmp_uint32, mul_cpt, 32, *, fp, _Quad, 4i, 3,
2440  0) // __kmpc_atomic_fixed4u_mul_cpt_fp
2441 ATOMIC_CMPXCHG_CPT_MIX(fixed4, kmp_int32, div_cpt, 32, /, fp, _Quad, 4i, 3,
2442  0) // __kmpc_atomic_fixed4_div_cpt_fp
2443 ATOMIC_CMPXCHG_CPT_MIX(fixed4u, kmp_uint32, div_cpt, 32, /, fp, _Quad, 4i, 3,
2444  0) // __kmpc_atomic_fixed4u_div_cpt_fp
2445 
2446 ATOMIC_CMPXCHG_CPT_MIX(fixed8, kmp_int64, add_cpt, 64, +, fp, _Quad, 8i, 7,
2447  KMP_ARCH_X86) // __kmpc_atomic_fixed8_add_cpt_fp
2448 ATOMIC_CMPXCHG_CPT_MIX(fixed8u, kmp_uint64, add_cpt, 64, +, fp, _Quad, 8i, 7,
2449  KMP_ARCH_X86) // __kmpc_atomic_fixed8u_add_cpt_fp
2450 ATOMIC_CMPXCHG_CPT_MIX(fixed8, kmp_int64, sub_cpt, 64, -, fp, _Quad, 8i, 7,
2451  KMP_ARCH_X86) // __kmpc_atomic_fixed8_sub_cpt_fp
2452 ATOMIC_CMPXCHG_CPT_MIX(fixed8u, kmp_uint64, sub_cpt, 64, -, fp, _Quad, 8i, 7,
2453  KMP_ARCH_X86) // __kmpc_atomic_fixed8u_sub_cpt_fp
2454 ATOMIC_CMPXCHG_CPT_MIX(fixed8, kmp_int64, mul_cpt, 64, *, fp, _Quad, 8i, 7,
2455  KMP_ARCH_X86) // __kmpc_atomic_fixed8_mul_cpt_fp
2456 ATOMIC_CMPXCHG_CPT_MIX(fixed8u, kmp_uint64, mul_cpt, 64, *, fp, _Quad, 8i, 7,
2457  KMP_ARCH_X86) // __kmpc_atomic_fixed8u_mul_cpt_fp
2458 ATOMIC_CMPXCHG_CPT_MIX(fixed8, kmp_int64, div_cpt, 64, /, fp, _Quad, 8i, 7,
2459  KMP_ARCH_X86) // __kmpc_atomic_fixed8_div_cpt_fp
2460 ATOMIC_CMPXCHG_CPT_MIX(fixed8u, kmp_uint64, div_cpt, 64, /, fp, _Quad, 8i, 7,
2461  KMP_ARCH_X86) // __kmpc_atomic_fixed8u_div_cpt_fp
2462 
2463 ATOMIC_CMPXCHG_CPT_MIX(float4, kmp_real32, add_cpt, 32, +, fp, _Quad, 4r, 3,
2464  KMP_ARCH_X86) // __kmpc_atomic_float4_add_cpt_fp
2465 ATOMIC_CMPXCHG_CPT_MIX(float4, kmp_real32, sub_cpt, 32, -, fp, _Quad, 4r, 3,
2466  KMP_ARCH_X86) // __kmpc_atomic_float4_sub_cpt_fp
2467 ATOMIC_CMPXCHG_CPT_MIX(float4, kmp_real32, mul_cpt, 32, *, fp, _Quad, 4r, 3,
2468  KMP_ARCH_X86) // __kmpc_atomic_float4_mul_cpt_fp
2469 ATOMIC_CMPXCHG_CPT_MIX(float4, kmp_real32, div_cpt, 32, /, fp, _Quad, 4r, 3,
2470  KMP_ARCH_X86) // __kmpc_atomic_float4_div_cpt_fp
2471 
2472 ATOMIC_CMPXCHG_CPT_MIX(float8, kmp_real64, add_cpt, 64, +, fp, _Quad, 8r, 7,
2473  KMP_ARCH_X86) // __kmpc_atomic_float8_add_cpt_fp
2474 ATOMIC_CMPXCHG_CPT_MIX(float8, kmp_real64, sub_cpt, 64, -, fp, _Quad, 8r, 7,
2475  KMP_ARCH_X86) // __kmpc_atomic_float8_sub_cpt_fp
2476 ATOMIC_CMPXCHG_CPT_MIX(float8, kmp_real64, mul_cpt, 64, *, fp, _Quad, 8r, 7,
2477  KMP_ARCH_X86) // __kmpc_atomic_float8_mul_cpt_fp
2478 ATOMIC_CMPXCHG_CPT_MIX(float8, kmp_real64, div_cpt, 64, /, fp, _Quad, 8r, 7,
2479  KMP_ARCH_X86) // __kmpc_atomic_float8_div_cpt_fp
2480 
2481 ATOMIC_CRITICAL_CPT_MIX(float10, long double, add_cpt, +, fp, _Quad, 10r,
2482  1) // __kmpc_atomic_float10_add_cpt_fp
2483 ATOMIC_CRITICAL_CPT_MIX(float10, long double, sub_cpt, -, fp, _Quad, 10r,
2484  1) // __kmpc_atomic_float10_sub_cpt_fp
2485 ATOMIC_CRITICAL_CPT_MIX(float10, long double, mul_cpt, *, fp, _Quad, 10r,
2486  1) // __kmpc_atomic_float10_mul_cpt_fp
2487 ATOMIC_CRITICAL_CPT_MIX(float10, long double, div_cpt, /, fp, _Quad, 10r,
2488  1) // __kmpc_atomic_float10_div_cpt_fp
2489 
2490 #endif // KMP_HAVE_QUAD
2491 
2492 // ------------------------------------------------------------------------
2493 // Routines for C/C++ Reduction operators && and ||
2494 
2495 // -------------------------------------------------------------------------
2496 // Operation on *lhs, rhs bound by critical section
2497 // OP - operator (it's supposed to contain an assignment)
2498 // LCK_ID - lock identifier
2499 // Note: don't check gtid as it should always be valid
2500 // 1, 2-byte - expect valid parameter, other - check before this macro
2501 #define OP_CRITICAL_L_CPT(OP, LCK_ID) \
2502  __kmp_acquire_atomic_lock(&ATOMIC_LOCK##LCK_ID, gtid); \
2503  \
2504  if (flag) { \
2505  new_value OP rhs; \
2506  } else \
2507  new_value = (*lhs); \
2508  \
2509  __kmp_release_atomic_lock(&ATOMIC_LOCK##LCK_ID, gtid);
2510 
2511 // ------------------------------------------------------------------------
2512 #ifdef KMP_GOMP_COMPAT
2513 #define OP_GOMP_CRITICAL_L_CPT(OP, FLAG) \
2514  if ((FLAG) && (__kmp_atomic_mode == 2)) { \
2515  KMP_CHECK_GTID; \
2516  OP_CRITICAL_L_CPT(OP, 0); \
2517  return new_value; \
2518  }
2519 #else
2520 #define OP_GOMP_CRITICAL_L_CPT(OP, FLAG)
2521 #endif /* KMP_GOMP_COMPAT */
2522 
2523 // ------------------------------------------------------------------------
2524 // Need separate macros for &&, || because there is no combined assignment
2525 #define ATOMIC_CMPX_L_CPT(TYPE_ID, OP_ID, TYPE, BITS, OP, GOMP_FLAG) \
2526  ATOMIC_BEGIN_CPT(TYPE_ID, OP_ID, TYPE, TYPE) \
2527  TYPE new_value; \
2528  OP_GOMP_CRITICAL_L_CPT(= *lhs OP, GOMP_FLAG) \
2529  OP_CMPXCHG_CPT(TYPE, BITS, OP) \
2530  }
2531 
2532 ATOMIC_CMPX_L_CPT(fixed1, andl_cpt, char, 8, &&,
2533  KMP_ARCH_X86) // __kmpc_atomic_fixed1_andl_cpt
2534 ATOMIC_CMPX_L_CPT(fixed1, orl_cpt, char, 8, ||,
2535  KMP_ARCH_X86) // __kmpc_atomic_fixed1_orl_cpt
2536 ATOMIC_CMPX_L_CPT(fixed2, andl_cpt, short, 16, &&,
2537  KMP_ARCH_X86) // __kmpc_atomic_fixed2_andl_cpt
2538 ATOMIC_CMPX_L_CPT(fixed2, orl_cpt, short, 16, ||,
2539  KMP_ARCH_X86) // __kmpc_atomic_fixed2_orl_cpt
2540 ATOMIC_CMPX_L_CPT(fixed4, andl_cpt, kmp_int32, 32, &&,
2541  0) // __kmpc_atomic_fixed4_andl_cpt
2542 ATOMIC_CMPX_L_CPT(fixed4, orl_cpt, kmp_int32, 32, ||,
2543  0) // __kmpc_atomic_fixed4_orl_cpt
2544 ATOMIC_CMPX_L_CPT(fixed8, andl_cpt, kmp_int64, 64, &&,
2545  KMP_ARCH_X86) // __kmpc_atomic_fixed8_andl_cpt
2546 ATOMIC_CMPX_L_CPT(fixed8, orl_cpt, kmp_int64, 64, ||,
2547  KMP_ARCH_X86) // __kmpc_atomic_fixed8_orl_cpt
2548 
2549 // -------------------------------------------------------------------------
2550 // Routines for Fortran operators that matched no one in C:
2551 // MAX, MIN, .EQV., .NEQV.
2552 // Operators .AND., .OR. are covered by __kmpc_atomic_*_{andl,orl}_cpt
2553 // Intrinsics IAND, IOR, IEOR are covered by __kmpc_atomic_*_{andb,orb,xor}_cpt
2554 
2555 // -------------------------------------------------------------------------
2556 // MIN and MAX need separate macros
2557 // OP - operator to check if we need any actions?
2558 #define MIN_MAX_CRITSECT_CPT(OP, LCK_ID) \
2559  __kmp_acquire_atomic_lock(&ATOMIC_LOCK##LCK_ID, gtid); \
2560  \
2561  if (*lhs OP rhs) { /* still need actions? */ \
2562  old_value = *lhs; \
2563  *lhs = rhs; \
2564  if (flag) \
2565  new_value = rhs; \
2566  else \
2567  new_value = old_value; \
2568  } \
2569  __kmp_release_atomic_lock(&ATOMIC_LOCK##LCK_ID, gtid); \
2570  return new_value;
2571 
2572 // -------------------------------------------------------------------------
2573 #ifdef KMP_GOMP_COMPAT
2574 #define GOMP_MIN_MAX_CRITSECT_CPT(OP, FLAG) \
2575  if ((FLAG) && (__kmp_atomic_mode == 2)) { \
2576  KMP_CHECK_GTID; \
2577  MIN_MAX_CRITSECT_CPT(OP, 0); \
2578  }
2579 #else
2580 #define GOMP_MIN_MAX_CRITSECT_CPT(OP, FLAG)
2581 #endif /* KMP_GOMP_COMPAT */
2582 
2583 // -------------------------------------------------------------------------
2584 #define MIN_MAX_CMPXCHG_CPT(TYPE, BITS, OP) \
2585  { \
2586  TYPE KMP_ATOMIC_VOLATILE temp_val; \
2587  /*TYPE old_value; */ \
2588  temp_val = *lhs; \
2589  old_value = temp_val; \
2590  while (old_value OP rhs && /* still need actions? */ \
2591  !KMP_COMPARE_AND_STORE_ACQ##BITS( \
2592  (kmp_int##BITS *)lhs, \
2593  *VOLATILE_CAST(kmp_int##BITS *) & old_value, \
2594  *VOLATILE_CAST(kmp_int##BITS *) & rhs)) { \
2595  KMP_CPU_PAUSE(); \
2596  temp_val = *lhs; \
2597  old_value = temp_val; \
2598  } \
2599  if (flag) \
2600  return rhs; \
2601  else \
2602  return old_value; \
2603  }
2604 
2605 // -------------------------------------------------------------------------
2606 // 1-byte, 2-byte operands - use critical section
2607 #define MIN_MAX_CRITICAL_CPT(TYPE_ID, OP_ID, TYPE, OP, LCK_ID, GOMP_FLAG) \
2608  ATOMIC_BEGIN_CPT(TYPE_ID, OP_ID, TYPE, TYPE) \
2609  TYPE new_value, old_value; \
2610  if (*lhs OP rhs) { /* need actions? */ \
2611  GOMP_MIN_MAX_CRITSECT_CPT(OP, GOMP_FLAG) \
2612  MIN_MAX_CRITSECT_CPT(OP, LCK_ID) \
2613  } \
2614  return *lhs; \
2615  }
2616 
2617 #define MIN_MAX_COMPXCHG_CPT(TYPE_ID, OP_ID, TYPE, BITS, OP, GOMP_FLAG) \
2618  ATOMIC_BEGIN_CPT(TYPE_ID, OP_ID, TYPE, TYPE) \
2619  TYPE new_value, old_value; \
2620  if (*lhs OP rhs) { \
2621  GOMP_MIN_MAX_CRITSECT_CPT(OP, GOMP_FLAG) \
2622  MIN_MAX_CMPXCHG_CPT(TYPE, BITS, OP) \
2623  } \
2624  return *lhs; \
2625  }
2626 
2627 MIN_MAX_COMPXCHG_CPT(fixed1, max_cpt, char, 8, <,
2628  KMP_ARCH_X86) // __kmpc_atomic_fixed1_max_cpt
2629 MIN_MAX_COMPXCHG_CPT(fixed1, min_cpt, char, 8, >,
2630  KMP_ARCH_X86) // __kmpc_atomic_fixed1_min_cpt
2631 MIN_MAX_COMPXCHG_CPT(fixed2, max_cpt, short, 16, <,
2632  KMP_ARCH_X86) // __kmpc_atomic_fixed2_max_cpt
2633 MIN_MAX_COMPXCHG_CPT(fixed2, min_cpt, short, 16, >,
2634  KMP_ARCH_X86) // __kmpc_atomic_fixed2_min_cpt
2635 MIN_MAX_COMPXCHG_CPT(fixed4, max_cpt, kmp_int32, 32, <,
2636  0) // __kmpc_atomic_fixed4_max_cpt
2637 MIN_MAX_COMPXCHG_CPT(fixed4, min_cpt, kmp_int32, 32, >,
2638  0) // __kmpc_atomic_fixed4_min_cpt
2639 MIN_MAX_COMPXCHG_CPT(fixed8, max_cpt, kmp_int64, 64, <,
2640  KMP_ARCH_X86) // __kmpc_atomic_fixed8_max_cpt
2641 MIN_MAX_COMPXCHG_CPT(fixed8, min_cpt, kmp_int64, 64, >,
2642  KMP_ARCH_X86) // __kmpc_atomic_fixed8_min_cpt
2643 MIN_MAX_COMPXCHG_CPT(float4, max_cpt, kmp_real32, 32, <,
2644  KMP_ARCH_X86) // __kmpc_atomic_float4_max_cpt
2645 MIN_MAX_COMPXCHG_CPT(float4, min_cpt, kmp_real32, 32, >,
2646  KMP_ARCH_X86) // __kmpc_atomic_float4_min_cpt
2647 MIN_MAX_COMPXCHG_CPT(float8, max_cpt, kmp_real64, 64, <,
2648  KMP_ARCH_X86) // __kmpc_atomic_float8_max_cpt
2649 MIN_MAX_COMPXCHG_CPT(float8, min_cpt, kmp_real64, 64, >,
2650  KMP_ARCH_X86) // __kmpc_atomic_float8_min_cpt
2651 #if KMP_HAVE_QUAD
2652 MIN_MAX_CRITICAL_CPT(float16, max_cpt, QUAD_LEGACY, <, 16r,
2653  1) // __kmpc_atomic_float16_max_cpt
2654 MIN_MAX_CRITICAL_CPT(float16, min_cpt, QUAD_LEGACY, >, 16r,
2655  1) // __kmpc_atomic_float16_min_cpt
2656 #if (KMP_ARCH_X86)
2657 MIN_MAX_CRITICAL_CPT(float16, max_a16_cpt, Quad_a16_t, <, 16r,
2658  1) // __kmpc_atomic_float16_max_a16_cpt
2659 MIN_MAX_CRITICAL_CPT(float16, min_a16_cpt, Quad_a16_t, >, 16r,
2660  1) // __kmpc_atomic_float16_mix_a16_cpt
2661 #endif
2662 #endif
2663 
2664 // ------------------------------------------------------------------------
2665 #ifdef KMP_GOMP_COMPAT
2666 #define OP_GOMP_CRITICAL_EQV_CPT(OP, FLAG) \
2667  if ((FLAG) && (__kmp_atomic_mode == 2)) { \
2668  KMP_CHECK_GTID; \
2669  OP_CRITICAL_CPT(OP, 0); \
2670  }
2671 #else
2672 #define OP_GOMP_CRITICAL_EQV_CPT(OP, FLAG)
2673 #endif /* KMP_GOMP_COMPAT */
2674 // ------------------------------------------------------------------------
2675 #define ATOMIC_CMPX_EQV_CPT(TYPE_ID, OP_ID, TYPE, BITS, OP, GOMP_FLAG) \
2676  ATOMIC_BEGIN_CPT(TYPE_ID, OP_ID, TYPE, TYPE) \
2677  TYPE new_value; \
2678  OP_GOMP_CRITICAL_EQV_CPT(^= ~, GOMP_FLAG) /* send assignment */ \
2679  OP_CMPXCHG_CPT(TYPE, BITS, OP) \
2680  }
2681 
2682 // ------------------------------------------------------------------------
2683 
2684 ATOMIC_CMPXCHG_CPT(fixed1, neqv_cpt, kmp_int8, 8, ^,
2685  KMP_ARCH_X86) // __kmpc_atomic_fixed1_neqv_cpt
2686 ATOMIC_CMPXCHG_CPT(fixed2, neqv_cpt, kmp_int16, 16, ^,
2687  KMP_ARCH_X86) // __kmpc_atomic_fixed2_neqv_cpt
2688 ATOMIC_CMPXCHG_CPT(fixed4, neqv_cpt, kmp_int32, 32, ^,
2689  KMP_ARCH_X86) // __kmpc_atomic_fixed4_neqv_cpt
2690 ATOMIC_CMPXCHG_CPT(fixed8, neqv_cpt, kmp_int64, 64, ^,
2691  KMP_ARCH_X86) // __kmpc_atomic_fixed8_neqv_cpt
2692 ATOMIC_CMPX_EQV_CPT(fixed1, eqv_cpt, kmp_int8, 8, ^~,
2693  KMP_ARCH_X86) // __kmpc_atomic_fixed1_eqv_cpt
2694 ATOMIC_CMPX_EQV_CPT(fixed2, eqv_cpt, kmp_int16, 16, ^~,
2695  KMP_ARCH_X86) // __kmpc_atomic_fixed2_eqv_cpt
2696 ATOMIC_CMPX_EQV_CPT(fixed4, eqv_cpt, kmp_int32, 32, ^~,
2697  KMP_ARCH_X86) // __kmpc_atomic_fixed4_eqv_cpt
2698 ATOMIC_CMPX_EQV_CPT(fixed8, eqv_cpt, kmp_int64, 64, ^~,
2699  KMP_ARCH_X86) // __kmpc_atomic_fixed8_eqv_cpt
2700 
2701 // ------------------------------------------------------------------------
2702 // Routines for Extended types: long double, _Quad, complex flavours (use
2703 // critical section)
2704 // TYPE_ID, OP_ID, TYPE - detailed above
2705 // OP - operator
2706 // LCK_ID - lock identifier, used to possibly distinguish lock variable
2707 #define ATOMIC_CRITICAL_CPT(TYPE_ID, OP_ID, TYPE, OP, LCK_ID, GOMP_FLAG) \
2708  ATOMIC_BEGIN_CPT(TYPE_ID, OP_ID, TYPE, TYPE) \
2709  TYPE new_value; \
2710  OP_GOMP_CRITICAL_CPT(OP, GOMP_FLAG) /* send assignment */ \
2711  OP_CRITICAL_CPT(OP## =, LCK_ID) /* send assignment */ \
2712  }
2713 
2714 // ------------------------------------------------------------------------
2715 // Workaround for cmplx4. Regular routines with return value don't work
2716 // on Win_32e. Let's return captured values through the additional parameter.
2717 #define OP_CRITICAL_CPT_WRK(OP, LCK_ID) \
2718  __kmp_acquire_atomic_lock(&ATOMIC_LOCK##LCK_ID, gtid); \
2719  \
2720  if (flag) { \
2721  (*lhs) OP rhs; \
2722  (*out) = (*lhs); \
2723  } else { \
2724  (*out) = (*lhs); \
2725  (*lhs) OP rhs; \
2726  } \
2727  \
2728  __kmp_release_atomic_lock(&ATOMIC_LOCK##LCK_ID, gtid); \
2729  return;
2730 // ------------------------------------------------------------------------
2731 
2732 #ifdef KMP_GOMP_COMPAT
2733 #define OP_GOMP_CRITICAL_CPT_WRK(OP, FLAG) \
2734  if ((FLAG) && (__kmp_atomic_mode == 2)) { \
2735  KMP_CHECK_GTID; \
2736  OP_CRITICAL_CPT_WRK(OP## =, 0); \
2737  }
2738 #else
2739 #define OP_GOMP_CRITICAL_CPT_WRK(OP, FLAG)
2740 #endif /* KMP_GOMP_COMPAT */
2741 // ------------------------------------------------------------------------
2742 
2743 #define ATOMIC_BEGIN_WRK(TYPE_ID, OP_ID, TYPE) \
2744  void __kmpc_atomic_##TYPE_ID##_##OP_ID(ident_t *id_ref, int gtid, TYPE *lhs, \
2745  TYPE rhs, TYPE *out, int flag) { \
2746  KMP_DEBUG_ASSERT(__kmp_init_serial); \
2747  KA_TRACE(100, ("__kmpc_atomic_" #TYPE_ID "_" #OP_ID ": T#%d\n", gtid));
2748 // ------------------------------------------------------------------------
2749 
2750 #define ATOMIC_CRITICAL_CPT_WRK(TYPE_ID, OP_ID, TYPE, OP, LCK_ID, GOMP_FLAG) \
2751  ATOMIC_BEGIN_WRK(TYPE_ID, OP_ID, TYPE) \
2752  OP_GOMP_CRITICAL_CPT_WRK(OP, GOMP_FLAG) \
2753  OP_CRITICAL_CPT_WRK(OP## =, LCK_ID) \
2754  }
2755 // The end of workaround for cmplx4
2756 
2757 /* ------------------------------------------------------------------------- */
2758 // routines for long double type
2759 ATOMIC_CRITICAL_CPT(float10, add_cpt, long double, +, 10r,
2760  1) // __kmpc_atomic_float10_add_cpt
2761 ATOMIC_CRITICAL_CPT(float10, sub_cpt, long double, -, 10r,
2762  1) // __kmpc_atomic_float10_sub_cpt
2763 ATOMIC_CRITICAL_CPT(float10, mul_cpt, long double, *, 10r,
2764  1) // __kmpc_atomic_float10_mul_cpt
2765 ATOMIC_CRITICAL_CPT(float10, div_cpt, long double, /, 10r,
2766  1) // __kmpc_atomic_float10_div_cpt
2767 #if KMP_HAVE_QUAD
2768 // routines for _Quad type
2769 ATOMIC_CRITICAL_CPT(float16, add_cpt, QUAD_LEGACY, +, 16r,
2770  1) // __kmpc_atomic_float16_add_cpt
2771 ATOMIC_CRITICAL_CPT(float16, sub_cpt, QUAD_LEGACY, -, 16r,
2772  1) // __kmpc_atomic_float16_sub_cpt
2773 ATOMIC_CRITICAL_CPT(float16, mul_cpt, QUAD_LEGACY, *, 16r,
2774  1) // __kmpc_atomic_float16_mul_cpt
2775 ATOMIC_CRITICAL_CPT(float16, div_cpt, QUAD_LEGACY, /, 16r,
2776  1) // __kmpc_atomic_float16_div_cpt
2777 #if (KMP_ARCH_X86)
2778 ATOMIC_CRITICAL_CPT(float16, add_a16_cpt, Quad_a16_t, +, 16r,
2779  1) // __kmpc_atomic_float16_add_a16_cpt
2780 ATOMIC_CRITICAL_CPT(float16, sub_a16_cpt, Quad_a16_t, -, 16r,
2781  1) // __kmpc_atomic_float16_sub_a16_cpt
2782 ATOMIC_CRITICAL_CPT(float16, mul_a16_cpt, Quad_a16_t, *, 16r,
2783  1) // __kmpc_atomic_float16_mul_a16_cpt
2784 ATOMIC_CRITICAL_CPT(float16, div_a16_cpt, Quad_a16_t, /, 16r,
2785  1) // __kmpc_atomic_float16_div_a16_cpt
2786 #endif
2787 #endif
2788 
2789 // routines for complex types
2790 
2791 // cmplx4 routines to return void
2792 ATOMIC_CRITICAL_CPT_WRK(cmplx4, add_cpt, kmp_cmplx32, +, 8c,
2793  1) // __kmpc_atomic_cmplx4_add_cpt
2794 ATOMIC_CRITICAL_CPT_WRK(cmplx4, sub_cpt, kmp_cmplx32, -, 8c,
2795  1) // __kmpc_atomic_cmplx4_sub_cpt
2796 ATOMIC_CRITICAL_CPT_WRK(cmplx4, mul_cpt, kmp_cmplx32, *, 8c,
2797  1) // __kmpc_atomic_cmplx4_mul_cpt
2798 ATOMIC_CRITICAL_CPT_WRK(cmplx4, div_cpt, kmp_cmplx32, /, 8c,
2799  1) // __kmpc_atomic_cmplx4_div_cpt
2800 
2801 ATOMIC_CRITICAL_CPT(cmplx8, add_cpt, kmp_cmplx64, +, 16c,
2802  1) // __kmpc_atomic_cmplx8_add_cpt
2803 ATOMIC_CRITICAL_CPT(cmplx8, sub_cpt, kmp_cmplx64, -, 16c,
2804  1) // __kmpc_atomic_cmplx8_sub_cpt
2805 ATOMIC_CRITICAL_CPT(cmplx8, mul_cpt, kmp_cmplx64, *, 16c,
2806  1) // __kmpc_atomic_cmplx8_mul_cpt
2807 ATOMIC_CRITICAL_CPT(cmplx8, div_cpt, kmp_cmplx64, /, 16c,
2808  1) // __kmpc_atomic_cmplx8_div_cpt
2809 ATOMIC_CRITICAL_CPT(cmplx10, add_cpt, kmp_cmplx80, +, 20c,
2810  1) // __kmpc_atomic_cmplx10_add_cpt
2811 ATOMIC_CRITICAL_CPT(cmplx10, sub_cpt, kmp_cmplx80, -, 20c,
2812  1) // __kmpc_atomic_cmplx10_sub_cpt
2813 ATOMIC_CRITICAL_CPT(cmplx10, mul_cpt, kmp_cmplx80, *, 20c,
2814  1) // __kmpc_atomic_cmplx10_mul_cpt
2815 ATOMIC_CRITICAL_CPT(cmplx10, div_cpt, kmp_cmplx80, /, 20c,
2816  1) // __kmpc_atomic_cmplx10_div_cpt
2817 #if KMP_HAVE_QUAD
2818 ATOMIC_CRITICAL_CPT(cmplx16, add_cpt, CPLX128_LEG, +, 32c,
2819  1) // __kmpc_atomic_cmplx16_add_cpt
2820 ATOMIC_CRITICAL_CPT(cmplx16, sub_cpt, CPLX128_LEG, -, 32c,
2821  1) // __kmpc_atomic_cmplx16_sub_cpt
2822 ATOMIC_CRITICAL_CPT(cmplx16, mul_cpt, CPLX128_LEG, *, 32c,
2823  1) // __kmpc_atomic_cmplx16_mul_cpt
2824 ATOMIC_CRITICAL_CPT(cmplx16, div_cpt, CPLX128_LEG, /, 32c,
2825  1) // __kmpc_atomic_cmplx16_div_cpt
2826 #if (KMP_ARCH_X86)
2827 ATOMIC_CRITICAL_CPT(cmplx16, add_a16_cpt, kmp_cmplx128_a16_t, +, 32c,
2828  1) // __kmpc_atomic_cmplx16_add_a16_cpt
2829 ATOMIC_CRITICAL_CPT(cmplx16, sub_a16_cpt, kmp_cmplx128_a16_t, -, 32c,
2830  1) // __kmpc_atomic_cmplx16_sub_a16_cpt
2831 ATOMIC_CRITICAL_CPT(cmplx16, mul_a16_cpt, kmp_cmplx128_a16_t, *, 32c,
2832  1) // __kmpc_atomic_cmplx16_mul_a16_cpt
2833 ATOMIC_CRITICAL_CPT(cmplx16, div_a16_cpt, kmp_cmplx128_a16_t, /, 32c,
2834  1) // __kmpc_atomic_cmplx16_div_a16_cpt
2835 #endif
2836 #endif
2837 
2838 #if OMP_40_ENABLED
2839 
2840 // OpenMP 4.0: v = x = expr binop x; { v = x; x = expr binop x; } { x = expr
2841 // binop x; v = x; } for non-commutative operations.
2842 // Supported only on IA-32 architecture and Intel(R) 64
2843 
2844 // -------------------------------------------------------------------------
2845 // Operation on *lhs, rhs bound by critical section
2846 // OP - operator (it's supposed to contain an assignment)
2847 // LCK_ID - lock identifier
2848 // Note: don't check gtid as it should always be valid
2849 // 1, 2-byte - expect valid parameter, other - check before this macro
2850 #define OP_CRITICAL_CPT_REV(OP, LCK_ID) \
2851  __kmp_acquire_atomic_lock(&ATOMIC_LOCK##LCK_ID, gtid); \
2852  \
2853  if (flag) { \
2854  /*temp_val = (*lhs);*/ \
2855  (*lhs) = (rhs)OP(*lhs); \
2856  new_value = (*lhs); \
2857  } else { \
2858  new_value = (*lhs); \
2859  (*lhs) = (rhs)OP(*lhs); \
2860  } \
2861  __kmp_release_atomic_lock(&ATOMIC_LOCK##LCK_ID, gtid); \
2862  return new_value;
2863 
2864 // ------------------------------------------------------------------------
2865 #ifdef KMP_GOMP_COMPAT
2866 #define OP_GOMP_CRITICAL_CPT_REV(OP, FLAG) \
2867  if ((FLAG) && (__kmp_atomic_mode == 2)) { \
2868  KMP_CHECK_GTID; \
2869  OP_CRITICAL_CPT_REV(OP, 0); \
2870  }
2871 #else
2872 #define OP_GOMP_CRITICAL_CPT_REV(OP, FLAG)
2873 #endif /* KMP_GOMP_COMPAT */
2874 
2875 // ------------------------------------------------------------------------
2876 // Operation on *lhs, rhs using "compare_and_store" routine
2877 // TYPE - operands' type
2878 // BITS - size in bits, used to distinguish low level calls
2879 // OP - operator
2880 // Note: temp_val introduced in order to force the compiler to read
2881 // *lhs only once (w/o it the compiler reads *lhs twice)
2882 #define OP_CMPXCHG_CPT_REV(TYPE, BITS, OP) \
2883  { \
2884  TYPE KMP_ATOMIC_VOLATILE temp_val; \
2885  TYPE old_value, new_value; \
2886  temp_val = *lhs; \
2887  old_value = temp_val; \
2888  new_value = rhs OP old_value; \
2889  while (!KMP_COMPARE_AND_STORE_ACQ##BITS( \
2890  (kmp_int##BITS *)lhs, *VOLATILE_CAST(kmp_int##BITS *) & old_value, \
2891  *VOLATILE_CAST(kmp_int##BITS *) & new_value)) { \
2892  KMP_CPU_PAUSE(); \
2893  \
2894  temp_val = *lhs; \
2895  old_value = temp_val; \
2896  new_value = rhs OP old_value; \
2897  } \
2898  if (flag) { \
2899  return new_value; \
2900  } else \
2901  return old_value; \
2902  }
2903 
2904 // -------------------------------------------------------------------------
2905 #define ATOMIC_CMPXCHG_CPT_REV(TYPE_ID, OP_ID, TYPE, BITS, OP, GOMP_FLAG) \
2906  ATOMIC_BEGIN_CPT(TYPE_ID, OP_ID, TYPE, TYPE) \
2907  TYPE new_value; \
2908  OP_GOMP_CRITICAL_CPT_REV(OP, GOMP_FLAG) \
2909  OP_CMPXCHG_CPT_REV(TYPE, BITS, OP) \
2910  }
2911 
2912 ATOMIC_CMPXCHG_CPT_REV(fixed1, div_cpt_rev, kmp_int8, 8, /,
2913  KMP_ARCH_X86) // __kmpc_atomic_fixed1_div_cpt_rev
2914 ATOMIC_CMPXCHG_CPT_REV(fixed1u, div_cpt_rev, kmp_uint8, 8, /,
2915  KMP_ARCH_X86) // __kmpc_atomic_fixed1u_div_cpt_rev
2916 ATOMIC_CMPXCHG_CPT_REV(fixed1, shl_cpt_rev, kmp_int8, 8, <<,
2917  KMP_ARCH_X86) // __kmpc_atomic_fixed1_shl_cpt_rev
2918 ATOMIC_CMPXCHG_CPT_REV(fixed1, shr_cpt_rev, kmp_int8, 8, >>,
2919  KMP_ARCH_X86) // __kmpc_atomic_fixed1_shr_cpt_rev
2920 ATOMIC_CMPXCHG_CPT_REV(fixed1u, shr_cpt_rev, kmp_uint8, 8, >>,
2921  KMP_ARCH_X86) // __kmpc_atomic_fixed1u_shr_cpt_rev
2922 ATOMIC_CMPXCHG_CPT_REV(fixed1, sub_cpt_rev, kmp_int8, 8, -,
2923  KMP_ARCH_X86) // __kmpc_atomic_fixed1_sub_cpt_rev
2924 ATOMIC_CMPXCHG_CPT_REV(fixed2, div_cpt_rev, kmp_int16, 16, /,
2925  KMP_ARCH_X86) // __kmpc_atomic_fixed2_div_cpt_rev
2926 ATOMIC_CMPXCHG_CPT_REV(fixed2u, div_cpt_rev, kmp_uint16, 16, /,
2927  KMP_ARCH_X86) // __kmpc_atomic_fixed2u_div_cpt_rev
2928 ATOMIC_CMPXCHG_CPT_REV(fixed2, shl_cpt_rev, kmp_int16, 16, <<,
2929  KMP_ARCH_X86) // __kmpc_atomic_fixed2_shl_cpt_rev
2930 ATOMIC_CMPXCHG_CPT_REV(fixed2, shr_cpt_rev, kmp_int16, 16, >>,
2931  KMP_ARCH_X86) // __kmpc_atomic_fixed2_shr_cpt_rev
2932 ATOMIC_CMPXCHG_CPT_REV(fixed2u, shr_cpt_rev, kmp_uint16, 16, >>,
2933  KMP_ARCH_X86) // __kmpc_atomic_fixed2u_shr_cpt_rev
2934 ATOMIC_CMPXCHG_CPT_REV(fixed2, sub_cpt_rev, kmp_int16, 16, -,
2935  KMP_ARCH_X86) // __kmpc_atomic_fixed2_sub_cpt_rev
2936 ATOMIC_CMPXCHG_CPT_REV(fixed4, div_cpt_rev, kmp_int32, 32, /,
2937  KMP_ARCH_X86) // __kmpc_atomic_fixed4_div_cpt_rev
2938 ATOMIC_CMPXCHG_CPT_REV(fixed4u, div_cpt_rev, kmp_uint32, 32, /,
2939  KMP_ARCH_X86) // __kmpc_atomic_fixed4u_div_cpt_rev
2940 ATOMIC_CMPXCHG_CPT_REV(fixed4, shl_cpt_rev, kmp_int32, 32, <<,
2941  KMP_ARCH_X86) // __kmpc_atomic_fixed4_shl_cpt_rev
2942 ATOMIC_CMPXCHG_CPT_REV(fixed4, shr_cpt_rev, kmp_int32, 32, >>,
2943  KMP_ARCH_X86) // __kmpc_atomic_fixed4_shr_cpt_rev
2944 ATOMIC_CMPXCHG_CPT_REV(fixed4u, shr_cpt_rev, kmp_uint32, 32, >>,
2945  KMP_ARCH_X86) // __kmpc_atomic_fixed4u_shr_cpt_rev
2946 ATOMIC_CMPXCHG_CPT_REV(fixed4, sub_cpt_rev, kmp_int32, 32, -,
2947  KMP_ARCH_X86) // __kmpc_atomic_fixed4_sub_cpt_rev
2948 ATOMIC_CMPXCHG_CPT_REV(fixed8, div_cpt_rev, kmp_int64, 64, /,
2949  KMP_ARCH_X86) // __kmpc_atomic_fixed8_div_cpt_rev
2950 ATOMIC_CMPXCHG_CPT_REV(fixed8u, div_cpt_rev, kmp_uint64, 64, /,
2951  KMP_ARCH_X86) // __kmpc_atomic_fixed8u_div_cpt_rev
2952 ATOMIC_CMPXCHG_CPT_REV(fixed8, shl_cpt_rev, kmp_int64, 64, <<,
2953  KMP_ARCH_X86) // __kmpc_atomic_fixed8_shl_cpt_rev
2954 ATOMIC_CMPXCHG_CPT_REV(fixed8, shr_cpt_rev, kmp_int64, 64, >>,
2955  KMP_ARCH_X86) // __kmpc_atomic_fixed8_shr_cpt_rev
2956 ATOMIC_CMPXCHG_CPT_REV(fixed8u, shr_cpt_rev, kmp_uint64, 64, >>,
2957  KMP_ARCH_X86) // __kmpc_atomic_fixed8u_shr_cpt_rev
2958 ATOMIC_CMPXCHG_CPT_REV(fixed8, sub_cpt_rev, kmp_int64, 64, -,
2959  KMP_ARCH_X86) // __kmpc_atomic_fixed8_sub_cpt_rev
2960 ATOMIC_CMPXCHG_CPT_REV(float4, div_cpt_rev, kmp_real32, 32, /,
2961  KMP_ARCH_X86) // __kmpc_atomic_float4_div_cpt_rev
2962 ATOMIC_CMPXCHG_CPT_REV(float4, sub_cpt_rev, kmp_real32, 32, -,
2963  KMP_ARCH_X86) // __kmpc_atomic_float4_sub_cpt_rev
2964 ATOMIC_CMPXCHG_CPT_REV(float8, div_cpt_rev, kmp_real64, 64, /,
2965  KMP_ARCH_X86) // __kmpc_atomic_float8_div_cpt_rev
2966 ATOMIC_CMPXCHG_CPT_REV(float8, sub_cpt_rev, kmp_real64, 64, -,
2967  KMP_ARCH_X86) // __kmpc_atomic_float8_sub_cpt_rev
2968 // TYPE_ID,OP_ID, TYPE, OP, GOMP_FLAG
2969 
2970 // ------------------------------------------------------------------------
2971 // Routines for Extended types: long double, _Quad, complex flavours (use
2972 // critical section)
2973 // TYPE_ID, OP_ID, TYPE - detailed above
2974 // OP - operator
2975 // LCK_ID - lock identifier, used to possibly distinguish lock variable
2976 #define ATOMIC_CRITICAL_CPT_REV(TYPE_ID, OP_ID, TYPE, OP, LCK_ID, GOMP_FLAG) \
2977  ATOMIC_BEGIN_CPT(TYPE_ID, OP_ID, TYPE, TYPE) \
2978  TYPE new_value; \
2979  /*printf("__kmp_atomic_mode = %d\n", __kmp_atomic_mode);*/ \
2980  OP_GOMP_CRITICAL_CPT_REV(OP, GOMP_FLAG) \
2981  OP_CRITICAL_CPT_REV(OP, LCK_ID) \
2982  }
2983 
2984 /* ------------------------------------------------------------------------- */
2985 // routines for long double type
2986 ATOMIC_CRITICAL_CPT_REV(float10, sub_cpt_rev, long double, -, 10r,
2987  1) // __kmpc_atomic_float10_sub_cpt_rev
2988 ATOMIC_CRITICAL_CPT_REV(float10, div_cpt_rev, long double, /, 10r,
2989  1) // __kmpc_atomic_float10_div_cpt_rev
2990 #if KMP_HAVE_QUAD
2991 // routines for _Quad type
2992 ATOMIC_CRITICAL_CPT_REV(float16, sub_cpt_rev, QUAD_LEGACY, -, 16r,
2993  1) // __kmpc_atomic_float16_sub_cpt_rev
2994 ATOMIC_CRITICAL_CPT_REV(float16, div_cpt_rev, QUAD_LEGACY, /, 16r,
2995  1) // __kmpc_atomic_float16_div_cpt_rev
2996 #if (KMP_ARCH_X86)
2997 ATOMIC_CRITICAL_CPT_REV(float16, sub_a16_cpt_rev, Quad_a16_t, -, 16r,
2998  1) // __kmpc_atomic_float16_sub_a16_cpt_rev
2999 ATOMIC_CRITICAL_CPT_REV(float16, div_a16_cpt_rev, Quad_a16_t, /, 16r,
3000  1) // __kmpc_atomic_float16_div_a16_cpt_rev
3001 #endif
3002 #endif
3003 
3004 // routines for complex types
3005 
3006 // ------------------------------------------------------------------------
3007 // Workaround for cmplx4. Regular routines with return value don't work
3008 // on Win_32e. Let's return captured values through the additional parameter.
3009 #define OP_CRITICAL_CPT_REV_WRK(OP, LCK_ID) \
3010  __kmp_acquire_atomic_lock(&ATOMIC_LOCK##LCK_ID, gtid); \
3011  \
3012  if (flag) { \
3013  (*lhs) = (rhs)OP(*lhs); \
3014  (*out) = (*lhs); \
3015  } else { \
3016  (*out) = (*lhs); \
3017  (*lhs) = (rhs)OP(*lhs); \
3018  } \
3019  \
3020  __kmp_release_atomic_lock(&ATOMIC_LOCK##LCK_ID, gtid); \
3021  return;
3022 // ------------------------------------------------------------------------
3023 
3024 #ifdef KMP_GOMP_COMPAT
3025 #define OP_GOMP_CRITICAL_CPT_REV_WRK(OP, FLAG) \
3026  if ((FLAG) && (__kmp_atomic_mode == 2)) { \
3027  KMP_CHECK_GTID; \
3028  OP_CRITICAL_CPT_REV_WRK(OP, 0); \
3029  }
3030 #else
3031 #define OP_GOMP_CRITICAL_CPT_REV_WRK(OP, FLAG)
3032 #endif /* KMP_GOMP_COMPAT */
3033 // ------------------------------------------------------------------------
3034 
3035 #define ATOMIC_CRITICAL_CPT_REV_WRK(TYPE_ID, OP_ID, TYPE, OP, LCK_ID, \
3036  GOMP_FLAG) \
3037  ATOMIC_BEGIN_WRK(TYPE_ID, OP_ID, TYPE) \
3038  OP_GOMP_CRITICAL_CPT_REV_WRK(OP, GOMP_FLAG) \
3039  OP_CRITICAL_CPT_REV_WRK(OP, LCK_ID) \
3040  }
3041 // The end of workaround for cmplx4
3042 
3043 // !!! TODO: check if we need to return void for cmplx4 routines
3044 // cmplx4 routines to return void
3045 ATOMIC_CRITICAL_CPT_REV_WRK(cmplx4, sub_cpt_rev, kmp_cmplx32, -, 8c,
3046  1) // __kmpc_atomic_cmplx4_sub_cpt_rev
3047 ATOMIC_CRITICAL_CPT_REV_WRK(cmplx4, div_cpt_rev, kmp_cmplx32, /, 8c,
3048  1) // __kmpc_atomic_cmplx4_div_cpt_rev
3049 
3050 ATOMIC_CRITICAL_CPT_REV(cmplx8, sub_cpt_rev, kmp_cmplx64, -, 16c,
3051  1) // __kmpc_atomic_cmplx8_sub_cpt_rev
3052 ATOMIC_CRITICAL_CPT_REV(cmplx8, div_cpt_rev, kmp_cmplx64, /, 16c,
3053  1) // __kmpc_atomic_cmplx8_div_cpt_rev
3054 ATOMIC_CRITICAL_CPT_REV(cmplx10, sub_cpt_rev, kmp_cmplx80, -, 20c,
3055  1) // __kmpc_atomic_cmplx10_sub_cpt_rev
3056 ATOMIC_CRITICAL_CPT_REV(cmplx10, div_cpt_rev, kmp_cmplx80, /, 20c,
3057  1) // __kmpc_atomic_cmplx10_div_cpt_rev
3058 #if KMP_HAVE_QUAD
3059 ATOMIC_CRITICAL_CPT_REV(cmplx16, sub_cpt_rev, CPLX128_LEG, -, 32c,
3060  1) // __kmpc_atomic_cmplx16_sub_cpt_rev
3061 ATOMIC_CRITICAL_CPT_REV(cmplx16, div_cpt_rev, CPLX128_LEG, /, 32c,
3062  1) // __kmpc_atomic_cmplx16_div_cpt_rev
3063 #if (KMP_ARCH_X86)
3064 ATOMIC_CRITICAL_CPT_REV(cmplx16, sub_a16_cpt_rev, kmp_cmplx128_a16_t, -, 32c,
3065  1) // __kmpc_atomic_cmplx16_sub_a16_cpt_rev
3066 ATOMIC_CRITICAL_CPT_REV(cmplx16, div_a16_cpt_rev, kmp_cmplx128_a16_t, /, 32c,
3067  1) // __kmpc_atomic_cmplx16_div_a16_cpt_rev
3068 #endif
3069 #endif
3070 
3071 // Capture reverse for mixed type: RHS=float16
3072 #if KMP_HAVE_QUAD
3073 
3074 // Beginning of a definition (provides name, parameters, gebug trace)
3075 // TYPE_ID - operands type and size (fixed*, fixed*u for signed, unsigned
3076 // fixed)
3077 // OP_ID - operation identifier (add, sub, mul, ...)
3078 // TYPE - operands' type
3079 // -------------------------------------------------------------------------
3080 #define ATOMIC_CMPXCHG_CPT_REV_MIX(TYPE_ID, TYPE, OP_ID, BITS, OP, RTYPE_ID, \
3081  RTYPE, LCK_ID, MASK, GOMP_FLAG) \
3082  ATOMIC_BEGIN_CPT_MIX(TYPE_ID, OP_ID, TYPE, RTYPE_ID, RTYPE) \
3083  TYPE new_value; \
3084  OP_GOMP_CRITICAL_CPT_REV(OP, GOMP_FLAG) \
3085  OP_CMPXCHG_CPT_REV(TYPE, BITS, OP) \
3086  }
3087 
3088 // -------------------------------------------------------------------------
3089 #define ATOMIC_CRITICAL_CPT_REV_MIX(TYPE_ID, TYPE, OP_ID, OP, RTYPE_ID, RTYPE, \
3090  LCK_ID, GOMP_FLAG) \
3091  ATOMIC_BEGIN_CPT_MIX(TYPE_ID, OP_ID, TYPE, RTYPE_ID, RTYPE) \
3092  TYPE new_value; \
3093  OP_GOMP_CRITICAL_CPT_REV(OP, GOMP_FLAG) /* send assignment */ \
3094  OP_CRITICAL_CPT_REV(OP, LCK_ID) /* send assignment */ \
3095  }
3096 
3097 ATOMIC_CMPXCHG_CPT_REV_MIX(fixed1, char, sub_cpt_rev, 8, -, fp, _Quad, 1i, 0,
3098  KMP_ARCH_X86) // __kmpc_atomic_fixed1_sub_cpt_rev_fp
3099 ATOMIC_CMPXCHG_CPT_REV_MIX(fixed1u, uchar, sub_cpt_rev, 8, -, fp, _Quad, 1i, 0,
3100  KMP_ARCH_X86) // __kmpc_atomic_fixed1u_sub_cpt_rev_fp
3101 ATOMIC_CMPXCHG_CPT_REV_MIX(fixed1, char, div_cpt_rev, 8, /, fp, _Quad, 1i, 0,
3102  KMP_ARCH_X86) // __kmpc_atomic_fixed1_div_cpt_rev_fp
3103 ATOMIC_CMPXCHG_CPT_REV_MIX(fixed1u, uchar, div_cpt_rev, 8, /, fp, _Quad, 1i, 0,
3104  KMP_ARCH_X86) // __kmpc_atomic_fixed1u_div_cpt_rev_fp
3105 
3106 ATOMIC_CMPXCHG_CPT_REV_MIX(fixed2, short, sub_cpt_rev, 16, -, fp, _Quad, 2i, 1,
3107  KMP_ARCH_X86) // __kmpc_atomic_fixed2_sub_cpt_rev_fp
3108 ATOMIC_CMPXCHG_CPT_REV_MIX(fixed2u, ushort, sub_cpt_rev, 16, -, fp, _Quad, 2i,
3109  1,
3110  KMP_ARCH_X86) // __kmpc_atomic_fixed2u_sub_cpt_rev_fp
3111 ATOMIC_CMPXCHG_CPT_REV_MIX(fixed2, short, div_cpt_rev, 16, /, fp, _Quad, 2i, 1,
3112  KMP_ARCH_X86) // __kmpc_atomic_fixed2_div_cpt_rev_fp
3113 ATOMIC_CMPXCHG_CPT_REV_MIX(fixed2u, ushort, div_cpt_rev, 16, /, fp, _Quad, 2i,
3114  1,
3115  KMP_ARCH_X86) // __kmpc_atomic_fixed2u_div_cpt_rev_fp
3116 
3117 ATOMIC_CMPXCHG_CPT_REV_MIX(fixed4, kmp_int32, sub_cpt_rev, 32, -, fp, _Quad, 4i,
3118  3, 0) // __kmpc_atomic_fixed4_sub_cpt_rev_fp
3119 ATOMIC_CMPXCHG_CPT_REV_MIX(fixed4u, kmp_uint32, sub_cpt_rev, 32, -, fp, _Quad,
3120  4i, 3, 0) // __kmpc_atomic_fixed4u_sub_cpt_rev_fp
3121 ATOMIC_CMPXCHG_CPT_REV_MIX(fixed4, kmp_int32, div_cpt_rev, 32, /, fp, _Quad, 4i,
3122  3, 0) // __kmpc_atomic_fixed4_div_cpt_rev_fp
3123 ATOMIC_CMPXCHG_CPT_REV_MIX(fixed4u, kmp_uint32, div_cpt_rev, 32, /, fp, _Quad,
3124  4i, 3, 0) // __kmpc_atomic_fixed4u_div_cpt_rev_fp
3125 
3126 ATOMIC_CMPXCHG_CPT_REV_MIX(fixed8, kmp_int64, sub_cpt_rev, 64, -, fp, _Quad, 8i,
3127  7,
3128  KMP_ARCH_X86) // __kmpc_atomic_fixed8_sub_cpt_rev_fp
3129 ATOMIC_CMPXCHG_CPT_REV_MIX(fixed8u, kmp_uint64, sub_cpt_rev, 64, -, fp, _Quad,
3130  8i, 7,
3131  KMP_ARCH_X86) // __kmpc_atomic_fixed8u_sub_cpt_rev_fp
3132 ATOMIC_CMPXCHG_CPT_REV_MIX(fixed8, kmp_int64, div_cpt_rev, 64, /, fp, _Quad, 8i,
3133  7,
3134  KMP_ARCH_X86) // __kmpc_atomic_fixed8_div_cpt_rev_fp
3135 ATOMIC_CMPXCHG_CPT_REV_MIX(fixed8u, kmp_uint64, div_cpt_rev, 64, /, fp, _Quad,
3136  8i, 7,
3137  KMP_ARCH_X86) // __kmpc_atomic_fixed8u_div_cpt_rev_fp
3138 
3139 ATOMIC_CMPXCHG_CPT_REV_MIX(float4, kmp_real32, sub_cpt_rev, 32, -, fp, _Quad,
3140  4r, 3,
3141  KMP_ARCH_X86) // __kmpc_atomic_float4_sub_cpt_rev_fp
3142 ATOMIC_CMPXCHG_CPT_REV_MIX(float4, kmp_real32, div_cpt_rev, 32, /, fp, _Quad,
3143  4r, 3,
3144  KMP_ARCH_X86) // __kmpc_atomic_float4_div_cpt_rev_fp
3145 
3146 ATOMIC_CMPXCHG_CPT_REV_MIX(float8, kmp_real64, sub_cpt_rev, 64, -, fp, _Quad,
3147  8r, 7,
3148  KMP_ARCH_X86) // __kmpc_atomic_float8_sub_cpt_rev_fp
3149 ATOMIC_CMPXCHG_CPT_REV_MIX(float8, kmp_real64, div_cpt_rev, 64, /, fp, _Quad,
3150  8r, 7,
3151  KMP_ARCH_X86) // __kmpc_atomic_float8_div_cpt_rev_fp
3152 
3153 ATOMIC_CRITICAL_CPT_REV_MIX(float10, long double, sub_cpt_rev, -, fp, _Quad,
3154  10r, 1) // __kmpc_atomic_float10_sub_cpt_rev_fp
3155 ATOMIC_CRITICAL_CPT_REV_MIX(float10, long double, div_cpt_rev, /, fp, _Quad,
3156  10r, 1) // __kmpc_atomic_float10_div_cpt_rev_fp
3157 
3158 #endif // KMP_HAVE_QUAD
3159 
3160 // OpenMP 4.0 Capture-write (swap): {v = x; x = expr;}
3161 
3162 #define ATOMIC_BEGIN_SWP(TYPE_ID, TYPE) \
3163  TYPE __kmpc_atomic_##TYPE_ID##_swp(ident_t *id_ref, int gtid, TYPE *lhs, \
3164  TYPE rhs) { \
3165  KMP_DEBUG_ASSERT(__kmp_init_serial); \
3166  KA_TRACE(100, ("__kmpc_atomic_" #TYPE_ID "_swp: T#%d\n", gtid));
3167 
3168 #define CRITICAL_SWP(LCK_ID) \
3169  __kmp_acquire_atomic_lock(&ATOMIC_LOCK##LCK_ID, gtid); \
3170  \
3171  old_value = (*lhs); \
3172  (*lhs) = rhs; \
3173  \
3174  __kmp_release_atomic_lock(&ATOMIC_LOCK##LCK_ID, gtid); \
3175  return old_value;
3176 
3177 // ------------------------------------------------------------------------
3178 #ifdef KMP_GOMP_COMPAT
3179 #define GOMP_CRITICAL_SWP(FLAG) \
3180  if ((FLAG) && (__kmp_atomic_mode == 2)) { \
3181  KMP_CHECK_GTID; \
3182  CRITICAL_SWP(0); \
3183  }
3184 #else
3185 #define GOMP_CRITICAL_SWP(FLAG)
3186 #endif /* KMP_GOMP_COMPAT */
3187 
3188 #define ATOMIC_XCHG_SWP(TYPE_ID, TYPE, BITS, GOMP_FLAG) \
3189  ATOMIC_BEGIN_SWP(TYPE_ID, TYPE) \
3190  TYPE old_value; \
3191  GOMP_CRITICAL_SWP(GOMP_FLAG) \
3192  old_value = KMP_XCHG_FIXED##BITS(lhs, rhs); \
3193  return old_value; \
3194  }
3195 // ------------------------------------------------------------------------
3196 #define ATOMIC_XCHG_FLOAT_SWP(TYPE_ID, TYPE, BITS, GOMP_FLAG) \
3197  ATOMIC_BEGIN_SWP(TYPE_ID, TYPE) \
3198  TYPE old_value; \
3199  GOMP_CRITICAL_SWP(GOMP_FLAG) \
3200  old_value = KMP_XCHG_REAL##BITS(lhs, rhs); \
3201  return old_value; \
3202  }
3203 
3204 // ------------------------------------------------------------------------
3205 #define CMPXCHG_SWP(TYPE, BITS) \
3206  { \
3207  TYPE KMP_ATOMIC_VOLATILE temp_val; \
3208  TYPE old_value, new_value; \
3209  temp_val = *lhs; \
3210  old_value = temp_val; \
3211  new_value = rhs; \
3212  while (!KMP_COMPARE_AND_STORE_ACQ##BITS( \
3213  (kmp_int##BITS *)lhs, *VOLATILE_CAST(kmp_int##BITS *) & old_value, \
3214  *VOLATILE_CAST(kmp_int##BITS *) & new_value)) { \
3215  KMP_CPU_PAUSE(); \
3216  \
3217  temp_val = *lhs; \
3218  old_value = temp_val; \
3219  new_value = rhs; \
3220  } \
3221  return old_value; \
3222  }
3223 
3224 // -------------------------------------------------------------------------
3225 #define ATOMIC_CMPXCHG_SWP(TYPE_ID, TYPE, BITS, GOMP_FLAG) \
3226  ATOMIC_BEGIN_SWP(TYPE_ID, TYPE) \
3227  TYPE old_value; \
3228  GOMP_CRITICAL_SWP(GOMP_FLAG) \
3229  CMPXCHG_SWP(TYPE, BITS) \
3230  }
3231 
3232 ATOMIC_XCHG_SWP(fixed1, kmp_int8, 8, KMP_ARCH_X86) // __kmpc_atomic_fixed1_swp
3233 ATOMIC_XCHG_SWP(fixed2, kmp_int16, 16, KMP_ARCH_X86) // __kmpc_atomic_fixed2_swp
3234 ATOMIC_XCHG_SWP(fixed4, kmp_int32, 32, KMP_ARCH_X86) // __kmpc_atomic_fixed4_swp
3235 
3236 ATOMIC_XCHG_FLOAT_SWP(float4, kmp_real32, 32,
3237  KMP_ARCH_X86) // __kmpc_atomic_float4_swp
3238 
3239 #if (KMP_ARCH_X86)
3240 ATOMIC_CMPXCHG_SWP(fixed8, kmp_int64, 64,
3241  KMP_ARCH_X86) // __kmpc_atomic_fixed8_swp
3242 ATOMIC_CMPXCHG_SWP(float8, kmp_real64, 64,
3243  KMP_ARCH_X86) // __kmpc_atomic_float8_swp
3244 #else
3245 ATOMIC_XCHG_SWP(fixed8, kmp_int64, 64, KMP_ARCH_X86) // __kmpc_atomic_fixed8_swp
3246 ATOMIC_XCHG_FLOAT_SWP(float8, kmp_real64, 64,
3247  KMP_ARCH_X86) // __kmpc_atomic_float8_swp
3248 #endif
3249 
3250 // ------------------------------------------------------------------------
3251 // Routines for Extended types: long double, _Quad, complex flavours (use
3252 // critical section)
3253 #define ATOMIC_CRITICAL_SWP(TYPE_ID, TYPE, LCK_ID, GOMP_FLAG) \
3254  ATOMIC_BEGIN_SWP(TYPE_ID, TYPE) \
3255  TYPE old_value; \
3256  GOMP_CRITICAL_SWP(GOMP_FLAG) \
3257  CRITICAL_SWP(LCK_ID) \
3258  }
3259 
3260 // ------------------------------------------------------------------------
3261 // !!! TODO: check if we need to return void for cmplx4 routines
3262 // Workaround for cmplx4. Regular routines with return value don't work
3263 // on Win_32e. Let's return captured values through the additional parameter.
3264 
3265 #define ATOMIC_BEGIN_SWP_WRK(TYPE_ID, TYPE) \
3266  void __kmpc_atomic_##TYPE_ID##_swp(ident_t *id_ref, int gtid, TYPE *lhs, \
3267  TYPE rhs, TYPE *out) { \
3268  KMP_DEBUG_ASSERT(__kmp_init_serial); \
3269  KA_TRACE(100, ("__kmpc_atomic_" #TYPE_ID "_swp: T#%d\n", gtid));
3270 
3271 #define CRITICAL_SWP_WRK(LCK_ID) \
3272  __kmp_acquire_atomic_lock(&ATOMIC_LOCK##LCK_ID, gtid); \
3273  \
3274  tmp = (*lhs); \
3275  (*lhs) = (rhs); \
3276  (*out) = tmp; \
3277  __kmp_release_atomic_lock(&ATOMIC_LOCK##LCK_ID, gtid); \
3278  return;
3279 // ------------------------------------------------------------------------
3280 
3281 #ifdef KMP_GOMP_COMPAT
3282 #define GOMP_CRITICAL_SWP_WRK(FLAG) \
3283  if ((FLAG) && (__kmp_atomic_mode == 2)) { \
3284  KMP_CHECK_GTID; \
3285  CRITICAL_SWP_WRK(0); \
3286  }
3287 #else
3288 #define GOMP_CRITICAL_SWP_WRK(FLAG)
3289 #endif /* KMP_GOMP_COMPAT */
3290 // ------------------------------------------------------------------------
3291 
3292 #define ATOMIC_CRITICAL_SWP_WRK(TYPE_ID, TYPE, LCK_ID, GOMP_FLAG) \
3293  ATOMIC_BEGIN_SWP_WRK(TYPE_ID, TYPE) \
3294  TYPE tmp; \
3295  GOMP_CRITICAL_SWP_WRK(GOMP_FLAG) \
3296  CRITICAL_SWP_WRK(LCK_ID) \
3297  }
3298 // The end of workaround for cmplx4
3299 
3300 ATOMIC_CRITICAL_SWP(float10, long double, 10r, 1) // __kmpc_atomic_float10_swp
3301 #if KMP_HAVE_QUAD
3302 ATOMIC_CRITICAL_SWP(float16, QUAD_LEGACY, 16r, 1) // __kmpc_atomic_float16_swp
3303 #endif
3304 // cmplx4 routine to return void
3305 ATOMIC_CRITICAL_SWP_WRK(cmplx4, kmp_cmplx32, 8c, 1) // __kmpc_atomic_cmplx4_swp
3306 
3307 // ATOMIC_CRITICAL_SWP( cmplx4, kmp_cmplx32, 8c, 1 ) //
3308 // __kmpc_atomic_cmplx4_swp
3309 
3310 ATOMIC_CRITICAL_SWP(cmplx8, kmp_cmplx64, 16c, 1) // __kmpc_atomic_cmplx8_swp
3311 ATOMIC_CRITICAL_SWP(cmplx10, kmp_cmplx80, 20c, 1) // __kmpc_atomic_cmplx10_swp
3312 #if KMP_HAVE_QUAD
3313 ATOMIC_CRITICAL_SWP(cmplx16, CPLX128_LEG, 32c, 1) // __kmpc_atomic_cmplx16_swp
3314 #if (KMP_ARCH_X86)
3315 ATOMIC_CRITICAL_SWP(float16_a16, Quad_a16_t, 16r,
3316  1) // __kmpc_atomic_float16_a16_swp
3317 ATOMIC_CRITICAL_SWP(cmplx16_a16, kmp_cmplx128_a16_t, 32c,
3318  1) // __kmpc_atomic_cmplx16_a16_swp
3319 #endif
3320 #endif
3321 
3322 // End of OpenMP 4.0 Capture
3323 
3324 #endif // OMP_40_ENABLED
3325 
3326 #endif // KMP_ARCH_X86 || KMP_ARCH_X86_64
3327 
3328 #undef OP_CRITICAL
3329 
3330 /* ------------------------------------------------------------------------ */
3331 /* Generic atomic routines */
3332 
3333 void __kmpc_atomic_1(ident_t *id_ref, int gtid, void *lhs, void *rhs,
3334  void (*f)(void *, void *, void *)) {
3335  KMP_DEBUG_ASSERT(__kmp_init_serial);
3336 
3337  if (
3338 #if KMP_ARCH_X86 && defined(KMP_GOMP_COMPAT)
3339  FALSE /* must use lock */
3340 #else
3341  TRUE
3342 #endif
3343  ) {
3344  kmp_int8 old_value, new_value;
3345 
3346  old_value = *(kmp_int8 *)lhs;
3347  (*f)(&new_value, &old_value, rhs);
3348 
3349  /* TODO: Should this be acquire or release? */
3350  while (!KMP_COMPARE_AND_STORE_ACQ8((kmp_int8 *)lhs, *(kmp_int8 *)&old_value,
3351  *(kmp_int8 *)&new_value)) {
3352  KMP_CPU_PAUSE();
3353 
3354  old_value = *(kmp_int8 *)lhs;
3355  (*f)(&new_value, &old_value, rhs);
3356  }
3357 
3358  return;
3359  } else {
3360 // All 1-byte data is of integer data type.
3361 
3362 #ifdef KMP_GOMP_COMPAT
3363  if (__kmp_atomic_mode == 2) {
3364  __kmp_acquire_atomic_lock(&__kmp_atomic_lock, gtid);
3365  } else
3366 #endif /* KMP_GOMP_COMPAT */
3367  __kmp_acquire_atomic_lock(&__kmp_atomic_lock_1i, gtid);
3368 
3369  (*f)(lhs, lhs, rhs);
3370 
3371 #ifdef KMP_GOMP_COMPAT
3372  if (__kmp_atomic_mode == 2) {
3373  __kmp_release_atomic_lock(&__kmp_atomic_lock, gtid);
3374  } else
3375 #endif /* KMP_GOMP_COMPAT */
3376  __kmp_release_atomic_lock(&__kmp_atomic_lock_1i, gtid);
3377  }
3378 }
3379 
3380 void __kmpc_atomic_2(ident_t *id_ref, int gtid, void *lhs, void *rhs,
3381  void (*f)(void *, void *, void *)) {
3382  if (
3383 #if KMP_ARCH_X86 && defined(KMP_GOMP_COMPAT)
3384  FALSE /* must use lock */
3385 #elif KMP_ARCH_X86 || KMP_ARCH_X86_64
3386  TRUE /* no alignment problems */
3387 #else
3388  !((kmp_uintptr_t)lhs & 0x1) /* make sure address is 2-byte aligned */
3389 #endif
3390  ) {
3391  kmp_int16 old_value, new_value;
3392 
3393  old_value = *(kmp_int16 *)lhs;
3394  (*f)(&new_value, &old_value, rhs);
3395 
3396  /* TODO: Should this be acquire or release? */
3397  while (!KMP_COMPARE_AND_STORE_ACQ16(
3398  (kmp_int16 *)lhs, *(kmp_int16 *)&old_value, *(kmp_int16 *)&new_value)) {
3399  KMP_CPU_PAUSE();
3400 
3401  old_value = *(kmp_int16 *)lhs;
3402  (*f)(&new_value, &old_value, rhs);
3403  }
3404 
3405  return;
3406  } else {
3407 // All 2-byte data is of integer data type.
3408 
3409 #ifdef KMP_GOMP_COMPAT
3410  if (__kmp_atomic_mode == 2) {
3411  __kmp_acquire_atomic_lock(&__kmp_atomic_lock, gtid);
3412  } else
3413 #endif /* KMP_GOMP_COMPAT */
3414  __kmp_acquire_atomic_lock(&__kmp_atomic_lock_2i, gtid);
3415 
3416  (*f)(lhs, lhs, rhs);
3417 
3418 #ifdef KMP_GOMP_COMPAT
3419  if (__kmp_atomic_mode == 2) {
3420  __kmp_release_atomic_lock(&__kmp_atomic_lock, gtid);
3421  } else
3422 #endif /* KMP_GOMP_COMPAT */
3423  __kmp_release_atomic_lock(&__kmp_atomic_lock_2i, gtid);
3424  }
3425 }
3426 
3427 void __kmpc_atomic_4(ident_t *id_ref, int gtid, void *lhs, void *rhs,
3428  void (*f)(void *, void *, void *)) {
3429  KMP_DEBUG_ASSERT(__kmp_init_serial);
3430 
3431  if (
3432 // FIXME: On IA-32 architecture, gcc uses cmpxchg only for 4-byte ints.
3433 // Gomp compatibility is broken if this routine is called for floats.
3434 #if KMP_ARCH_X86 || KMP_ARCH_X86_64
3435  TRUE /* no alignment problems */
3436 #else
3437  !((kmp_uintptr_t)lhs & 0x3) /* make sure address is 4-byte aligned */
3438 #endif
3439  ) {
3440  kmp_int32 old_value, new_value;
3441 
3442  old_value = *(kmp_int32 *)lhs;
3443  (*f)(&new_value, &old_value, rhs);
3444 
3445  /* TODO: Should this be acquire or release? */
3446  while (!KMP_COMPARE_AND_STORE_ACQ32(
3447  (kmp_int32 *)lhs, *(kmp_int32 *)&old_value, *(kmp_int32 *)&new_value)) {
3448  KMP_CPU_PAUSE();
3449 
3450  old_value = *(kmp_int32 *)lhs;
3451  (*f)(&new_value, &old_value, rhs);
3452  }
3453 
3454  return;
3455  } else {
3456 // Use __kmp_atomic_lock_4i for all 4-byte data,
3457 // even if it isn't of integer data type.
3458 
3459 #ifdef KMP_GOMP_COMPAT
3460  if (__kmp_atomic_mode == 2) {
3461  __kmp_acquire_atomic_lock(&__kmp_atomic_lock, gtid);
3462  } else
3463 #endif /* KMP_GOMP_COMPAT */
3464  __kmp_acquire_atomic_lock(&__kmp_atomic_lock_4i, gtid);
3465 
3466  (*f)(lhs, lhs, rhs);
3467 
3468 #ifdef KMP_GOMP_COMPAT
3469  if (__kmp_atomic_mode == 2) {
3470  __kmp_release_atomic_lock(&__kmp_atomic_lock, gtid);
3471  } else
3472 #endif /* KMP_GOMP_COMPAT */
3473  __kmp_release_atomic_lock(&__kmp_atomic_lock_4i, gtid);
3474  }
3475 }
3476 
3477 void __kmpc_atomic_8(ident_t *id_ref, int gtid, void *lhs, void *rhs,
3478  void (*f)(void *, void *, void *)) {
3479  KMP_DEBUG_ASSERT(__kmp_init_serial);
3480  if (
3481 
3482 #if KMP_ARCH_X86 && defined(KMP_GOMP_COMPAT)
3483  FALSE /* must use lock */
3484 #elif KMP_ARCH_X86 || KMP_ARCH_X86_64
3485  TRUE /* no alignment problems */
3486 #else
3487  !((kmp_uintptr_t)lhs & 0x7) /* make sure address is 8-byte aligned */
3488 #endif
3489  ) {
3490  kmp_int64 old_value, new_value;
3491 
3492  old_value = *(kmp_int64 *)lhs;
3493  (*f)(&new_value, &old_value, rhs);
3494  /* TODO: Should this be acquire or release? */
3495  while (!KMP_COMPARE_AND_STORE_ACQ64(
3496  (kmp_int64 *)lhs, *(kmp_int64 *)&old_value, *(kmp_int64 *)&new_value)) {
3497  KMP_CPU_PAUSE();
3498 
3499  old_value = *(kmp_int64 *)lhs;
3500  (*f)(&new_value, &old_value, rhs);
3501  }
3502 
3503  return;
3504  } else {
3505 // Use __kmp_atomic_lock_8i for all 8-byte data,
3506 // even if it isn't of integer data type.
3507 
3508 #ifdef KMP_GOMP_COMPAT
3509  if (__kmp_atomic_mode == 2) {
3510  __kmp_acquire_atomic_lock(&__kmp_atomic_lock, gtid);
3511  } else
3512 #endif /* KMP_GOMP_COMPAT */
3513  __kmp_acquire_atomic_lock(&__kmp_atomic_lock_8i, gtid);
3514 
3515  (*f)(lhs, lhs, rhs);
3516 
3517 #ifdef KMP_GOMP_COMPAT
3518  if (__kmp_atomic_mode == 2) {
3519  __kmp_release_atomic_lock(&__kmp_atomic_lock, gtid);
3520  } else
3521 #endif /* KMP_GOMP_COMPAT */
3522  __kmp_release_atomic_lock(&__kmp_atomic_lock_8i, gtid);
3523  }
3524 }
3525 
3526 void __kmpc_atomic_10(ident_t *id_ref, int gtid, void *lhs, void *rhs,
3527  void (*f)(void *, void *, void *)) {
3528  KMP_DEBUG_ASSERT(__kmp_init_serial);
3529 
3530 #ifdef KMP_GOMP_COMPAT
3531  if (__kmp_atomic_mode == 2) {
3532  __kmp_acquire_atomic_lock(&__kmp_atomic_lock, gtid);
3533  } else
3534 #endif /* KMP_GOMP_COMPAT */
3535  __kmp_acquire_atomic_lock(&__kmp_atomic_lock_10r, gtid);
3536 
3537  (*f)(lhs, lhs, rhs);
3538 
3539 #ifdef KMP_GOMP_COMPAT
3540  if (__kmp_atomic_mode == 2) {
3541  __kmp_release_atomic_lock(&__kmp_atomic_lock, gtid);
3542  } else
3543 #endif /* KMP_GOMP_COMPAT */
3544  __kmp_release_atomic_lock(&__kmp_atomic_lock_10r, gtid);
3545 }
3546 
3547 void __kmpc_atomic_16(ident_t *id_ref, int gtid, void *lhs, void *rhs,
3548  void (*f)(void *, void *, void *)) {
3549  KMP_DEBUG_ASSERT(__kmp_init_serial);
3550 
3551 #ifdef KMP_GOMP_COMPAT
3552  if (__kmp_atomic_mode == 2) {
3553  __kmp_acquire_atomic_lock(&__kmp_atomic_lock, gtid);
3554  } else
3555 #endif /* KMP_GOMP_COMPAT */
3556  __kmp_acquire_atomic_lock(&__kmp_atomic_lock_16c, gtid);
3557 
3558  (*f)(lhs, lhs, rhs);
3559 
3560 #ifdef KMP_GOMP_COMPAT
3561  if (__kmp_atomic_mode == 2) {
3562  __kmp_release_atomic_lock(&__kmp_atomic_lock, gtid);
3563  } else
3564 #endif /* KMP_GOMP_COMPAT */
3565  __kmp_release_atomic_lock(&__kmp_atomic_lock_16c, gtid);
3566 }
3567 
3568 void __kmpc_atomic_20(ident_t *id_ref, int gtid, void *lhs, void *rhs,
3569  void (*f)(void *, void *, void *)) {
3570  KMP_DEBUG_ASSERT(__kmp_init_serial);
3571 
3572 #ifdef KMP_GOMP_COMPAT
3573  if (__kmp_atomic_mode == 2) {
3574  __kmp_acquire_atomic_lock(&__kmp_atomic_lock, gtid);
3575  } else
3576 #endif /* KMP_GOMP_COMPAT */
3577  __kmp_acquire_atomic_lock(&__kmp_atomic_lock_20c, gtid);
3578 
3579  (*f)(lhs, lhs, rhs);
3580 
3581 #ifdef KMP_GOMP_COMPAT
3582  if (__kmp_atomic_mode == 2) {
3583  __kmp_release_atomic_lock(&__kmp_atomic_lock, gtid);
3584  } else
3585 #endif /* KMP_GOMP_COMPAT */
3586  __kmp_release_atomic_lock(&__kmp_atomic_lock_20c, gtid);
3587 }
3588 
3589 void __kmpc_atomic_32(ident_t *id_ref, int gtid, void *lhs, void *rhs,
3590  void (*f)(void *, void *, void *)) {
3591  KMP_DEBUG_ASSERT(__kmp_init_serial);
3592 
3593 #ifdef KMP_GOMP_COMPAT
3594  if (__kmp_atomic_mode == 2) {
3595  __kmp_acquire_atomic_lock(&__kmp_atomic_lock, gtid);
3596  } else
3597 #endif /* KMP_GOMP_COMPAT */
3598  __kmp_acquire_atomic_lock(&__kmp_atomic_lock_32c, gtid);
3599 
3600  (*f)(lhs, lhs, rhs);
3601 
3602 #ifdef KMP_GOMP_COMPAT
3603  if (__kmp_atomic_mode == 2) {
3604  __kmp_release_atomic_lock(&__kmp_atomic_lock, gtid);
3605  } else
3606 #endif /* KMP_GOMP_COMPAT */
3607  __kmp_release_atomic_lock(&__kmp_atomic_lock_32c, gtid);
3608 }
3609 
3610 // AC: same two routines as GOMP_atomic_start/end, but will be called by our
3611 // compiler; duplicated in order to not use 3-party names in pure Intel code
3612 // TODO: consider adding GTID parameter after consultation with Ernesto/Xinmin.
3613 void __kmpc_atomic_start(void) {
3614  int gtid = __kmp_entry_gtid();
3615  KA_TRACE(20, ("__kmpc_atomic_start: T#%d\n", gtid));
3616  __kmp_acquire_atomic_lock(&__kmp_atomic_lock, gtid);
3617 }
3618 
3619 void __kmpc_atomic_end(void) {
3620  int gtid = __kmp_get_gtid();
3621  KA_TRACE(20, ("__kmpc_atomic_end: T#%d\n", gtid));
3622  __kmp_release_atomic_lock(&__kmp_atomic_lock, gtid);
3623 }
3624 
3629 // end of file
Definition: kmp.h:223