LLVM OpenMP* Runtime Library
kmp_atomic.cpp
1 /*
2  * kmp_atomic.cpp -- ATOMIC implementation routines
3  */
4 
5 //===----------------------------------------------------------------------===//
6 //
7 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
8 // See https://llvm.org/LICENSE.txt for license information.
9 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
10 //
11 //===----------------------------------------------------------------------===//
12 
13 #include "kmp_atomic.h"
14 #include "kmp.h" // TRUE, asm routines prototypes
15 
16 typedef unsigned char uchar;
17 typedef unsigned short ushort;
18 
561 /*
562  * Global vars
563  */
564 
565 #ifndef KMP_GOMP_COMPAT
566 int __kmp_atomic_mode = 1; // Intel perf
567 #else
568 int __kmp_atomic_mode = 2; // GOMP compatibility
569 #endif /* KMP_GOMP_COMPAT */
570 
571 KMP_ALIGN(128)
572 
573 // Control access to all user coded atomics in Gnu compat mode
574 kmp_atomic_lock_t __kmp_atomic_lock;
575 // Control access to all user coded atomics for 1-byte fixed data types
576 kmp_atomic_lock_t __kmp_atomic_lock_1i;
577 // Control access to all user coded atomics for 2-byte fixed data types
578 kmp_atomic_lock_t __kmp_atomic_lock_2i;
579 // Control access to all user coded atomics for 4-byte fixed data types
580 kmp_atomic_lock_t __kmp_atomic_lock_4i;
581 // Control access to all user coded atomics for kmp_real32 data type
582 kmp_atomic_lock_t __kmp_atomic_lock_4r;
583 // Control access to all user coded atomics for 8-byte fixed data types
584 kmp_atomic_lock_t __kmp_atomic_lock_8i;
585 // Control access to all user coded atomics for kmp_real64 data type
586 kmp_atomic_lock_t __kmp_atomic_lock_8r;
587 // Control access to all user coded atomics for complex byte data type
588 kmp_atomic_lock_t __kmp_atomic_lock_8c;
589 // Control access to all user coded atomics for long double data type
590 kmp_atomic_lock_t __kmp_atomic_lock_10r;
591 // Control access to all user coded atomics for _Quad data type
592 kmp_atomic_lock_t __kmp_atomic_lock_16r;
593 // Control access to all user coded atomics for double complex data type
594 kmp_atomic_lock_t __kmp_atomic_lock_16c;
595 // Control access to all user coded atomics for long double complex type
596 kmp_atomic_lock_t __kmp_atomic_lock_20c;
597 // Control access to all user coded atomics for _Quad complex data type
598 kmp_atomic_lock_t __kmp_atomic_lock_32c;
599 
600 /* 2007-03-02:
601  Without "volatile" specifier in OP_CMPXCHG and MIN_MAX_CMPXCHG we have a bug
602  on *_32 and *_32e. This is just a temporary workaround for the problem. It
603  seems the right solution is writing OP_CMPXCHG and MIN_MAX_CMPXCHG routines
604  in assembler language. */
605 #define KMP_ATOMIC_VOLATILE volatile
606 
607 #if (KMP_ARCH_X86) && KMP_HAVE_QUAD
608 
609 static inline Quad_a4_t operator+(Quad_a4_t &lhs, Quad_a4_t &rhs) {
610  return lhs.q + rhs.q;
611 }
612 static inline Quad_a4_t operator-(Quad_a4_t &lhs, Quad_a4_t &rhs) {
613  return lhs.q - rhs.q;
614 }
615 static inline Quad_a4_t operator*(Quad_a4_t &lhs, Quad_a4_t &rhs) {
616  return lhs.q * rhs.q;
617 }
618 static inline Quad_a4_t operator/(Quad_a4_t &lhs, Quad_a4_t &rhs) {
619  return lhs.q / rhs.q;
620 }
621 static inline bool operator<(Quad_a4_t &lhs, Quad_a4_t &rhs) {
622  return lhs.q < rhs.q;
623 }
624 static inline bool operator>(Quad_a4_t &lhs, Quad_a4_t &rhs) {
625  return lhs.q > rhs.q;
626 }
627 
628 static inline Quad_a16_t operator+(Quad_a16_t &lhs, Quad_a16_t &rhs) {
629  return lhs.q + rhs.q;
630 }
631 static inline Quad_a16_t operator-(Quad_a16_t &lhs, Quad_a16_t &rhs) {
632  return lhs.q - rhs.q;
633 }
634 static inline Quad_a16_t operator*(Quad_a16_t &lhs, Quad_a16_t &rhs) {
635  return lhs.q * rhs.q;
636 }
637 static inline Quad_a16_t operator/(Quad_a16_t &lhs, Quad_a16_t &rhs) {
638  return lhs.q / rhs.q;
639 }
640 static inline bool operator<(Quad_a16_t &lhs, Quad_a16_t &rhs) {
641  return lhs.q < rhs.q;
642 }
643 static inline bool operator>(Quad_a16_t &lhs, Quad_a16_t &rhs) {
644  return lhs.q > rhs.q;
645 }
646 
647 static inline kmp_cmplx128_a4_t operator+(kmp_cmplx128_a4_t &lhs,
648  kmp_cmplx128_a4_t &rhs) {
649  return lhs.q + rhs.q;
650 }
651 static inline kmp_cmplx128_a4_t operator-(kmp_cmplx128_a4_t &lhs,
652  kmp_cmplx128_a4_t &rhs) {
653  return lhs.q - rhs.q;
654 }
655 static inline kmp_cmplx128_a4_t operator*(kmp_cmplx128_a4_t &lhs,
656  kmp_cmplx128_a4_t &rhs) {
657  return lhs.q * rhs.q;
658 }
659 static inline kmp_cmplx128_a4_t operator/(kmp_cmplx128_a4_t &lhs,
660  kmp_cmplx128_a4_t &rhs) {
661  return lhs.q / rhs.q;
662 }
663 
664 static inline kmp_cmplx128_a16_t operator+(kmp_cmplx128_a16_t &lhs,
665  kmp_cmplx128_a16_t &rhs) {
666  return lhs.q + rhs.q;
667 }
668 static inline kmp_cmplx128_a16_t operator-(kmp_cmplx128_a16_t &lhs,
669  kmp_cmplx128_a16_t &rhs) {
670  return lhs.q - rhs.q;
671 }
672 static inline kmp_cmplx128_a16_t operator*(kmp_cmplx128_a16_t &lhs,
673  kmp_cmplx128_a16_t &rhs) {
674  return lhs.q * rhs.q;
675 }
676 static inline kmp_cmplx128_a16_t operator/(kmp_cmplx128_a16_t &lhs,
677  kmp_cmplx128_a16_t &rhs) {
678  return lhs.q / rhs.q;
679 }
680 
681 #endif // (KMP_ARCH_X86) && KMP_HAVE_QUAD
682 
683 // ATOMIC implementation routines -----------------------------------------
684 // One routine for each operation and operand type.
685 // All routines declarations looks like
686 // void __kmpc_atomic_RTYPE_OP( ident_t*, int, TYPE *lhs, TYPE rhs );
687 
688 #define KMP_CHECK_GTID \
689  if (gtid == KMP_GTID_UNKNOWN) { \
690  gtid = __kmp_entry_gtid(); \
691  } // check and get gtid when needed
692 
693 // Beginning of a definition (provides name, parameters, gebug trace)
694 // TYPE_ID - operands type and size (fixed*, fixed*u for signed, unsigned
695 // fixed)
696 // OP_ID - operation identifier (add, sub, mul, ...)
697 // TYPE - operands' type
698 #define ATOMIC_BEGIN(TYPE_ID, OP_ID, TYPE, RET_TYPE) \
699  RET_TYPE __kmpc_atomic_##TYPE_ID##_##OP_ID(ident_t *id_ref, int gtid, \
700  TYPE *lhs, TYPE rhs) { \
701  KMP_DEBUG_ASSERT(__kmp_init_serial); \
702  KA_TRACE(100, ("__kmpc_atomic_" #TYPE_ID "_" #OP_ID ": T#%d\n", gtid));
703 
704 // ------------------------------------------------------------------------
705 // Lock variables used for critical sections for various size operands
706 #define ATOMIC_LOCK0 __kmp_atomic_lock // all types, for Gnu compat
707 #define ATOMIC_LOCK1i __kmp_atomic_lock_1i // char
708 #define ATOMIC_LOCK2i __kmp_atomic_lock_2i // short
709 #define ATOMIC_LOCK4i __kmp_atomic_lock_4i // long int
710 #define ATOMIC_LOCK4r __kmp_atomic_lock_4r // float
711 #define ATOMIC_LOCK8i __kmp_atomic_lock_8i // long long int
712 #define ATOMIC_LOCK8r __kmp_atomic_lock_8r // double
713 #define ATOMIC_LOCK8c __kmp_atomic_lock_8c // float complex
714 #define ATOMIC_LOCK10r __kmp_atomic_lock_10r // long double
715 #define ATOMIC_LOCK16r __kmp_atomic_lock_16r // _Quad
716 #define ATOMIC_LOCK16c __kmp_atomic_lock_16c // double complex
717 #define ATOMIC_LOCK20c __kmp_atomic_lock_20c // long double complex
718 #define ATOMIC_LOCK32c __kmp_atomic_lock_32c // _Quad complex
719 
720 // ------------------------------------------------------------------------
721 // Operation on *lhs, rhs bound by critical section
722 // OP - operator (it's supposed to contain an assignment)
723 // LCK_ID - lock identifier
724 // Note: don't check gtid as it should always be valid
725 // 1, 2-byte - expect valid parameter, other - check before this macro
726 #define OP_CRITICAL(OP, LCK_ID) \
727  __kmp_acquire_atomic_lock(&ATOMIC_LOCK##LCK_ID, gtid); \
728  \
729  (*lhs) OP(rhs); \
730  \
731  __kmp_release_atomic_lock(&ATOMIC_LOCK##LCK_ID, gtid);
732 
733 #define OP_UPDATE_CRITICAL(TYPE, OP, LCK_ID) \
734  __kmp_acquire_atomic_lock(&ATOMIC_LOCK##LCK_ID, gtid); \
735  (*lhs) = (TYPE)((*lhs)OP rhs); \
736  __kmp_release_atomic_lock(&ATOMIC_LOCK##LCK_ID, gtid);
737 
738 // ------------------------------------------------------------------------
739 // For GNU compatibility, we may need to use a critical section,
740 // even though it is not required by the ISA.
741 //
742 // On IA-32 architecture, all atomic operations except for fixed 4 byte add,
743 // sub, and bitwise logical ops, and 1 & 2 byte logical ops use a common
744 // critical section. On Intel(R) 64, all atomic operations are done with fetch
745 // and add or compare and exchange. Therefore, the FLAG parameter to this
746 // macro is either KMP_ARCH_X86 or 0 (or 1, for Intel-specific extension which
747 // require a critical section, where we predict that they will be implemented
748 // in the Gnu codegen by calling GOMP_atomic_start() / GOMP_atomic_end()).
749 //
750 // When the OP_GOMP_CRITICAL macro is used in a *CRITICAL* macro construct,
751 // the FLAG parameter should always be 1. If we know that we will be using
752 // a critical section, then we want to make certain that we use the generic
753 // lock __kmp_atomic_lock to protect the atomic update, and not of of the
754 // locks that are specialized based upon the size or type of the data.
755 //
756 // If FLAG is 0, then we are relying on dead code elimination by the build
757 // compiler to get rid of the useless block of code, and save a needless
758 // branch at runtime.
759 
760 #ifdef KMP_GOMP_COMPAT
761 #define OP_GOMP_CRITICAL(OP, FLAG) \
762  if ((FLAG) && (__kmp_atomic_mode == 2)) { \
763  KMP_CHECK_GTID; \
764  OP_CRITICAL(OP, 0); \
765  return; \
766  }
767 
768 #define OP_UPDATE_GOMP_CRITICAL(TYPE, OP, FLAG) \
769  if ((FLAG) && (__kmp_atomic_mode == 2)) { \
770  KMP_CHECK_GTID; \
771  OP_UPDATE_CRITICAL(TYPE, OP, 0); \
772  return; \
773  }
774 #else
775 #define OP_GOMP_CRITICAL(OP, FLAG)
776 #define OP_UPDATE_GOMP_CRITICAL(TYPE, OP, FLAG)
777 #endif /* KMP_GOMP_COMPAT */
778 
779 #if KMP_MIC
780 #define KMP_DO_PAUSE _mm_delay_32(1)
781 #else
782 #define KMP_DO_PAUSE
783 #endif /* KMP_MIC */
784 
785 // ------------------------------------------------------------------------
786 // Operation on *lhs, rhs using "compare_and_store" routine
787 // TYPE - operands' type
788 // BITS - size in bits, used to distinguish low level calls
789 // OP - operator
790 #define OP_CMPXCHG(TYPE, BITS, OP) \
791  { \
792  TYPE old_value, new_value; \
793  old_value = *(TYPE volatile *)lhs; \
794  new_value = (TYPE)(old_value OP rhs); \
795  while (!KMP_COMPARE_AND_STORE_ACQ##BITS( \
796  (kmp_int##BITS *)lhs, *VOLATILE_CAST(kmp_int##BITS *) & old_value, \
797  *VOLATILE_CAST(kmp_int##BITS *) & new_value)) { \
798  KMP_DO_PAUSE; \
799  \
800  old_value = *(TYPE volatile *)lhs; \
801  new_value = (TYPE)(old_value OP rhs); \
802  } \
803  }
804 
805 #if USE_CMPXCHG_FIX
806 // 2007-06-25:
807 // workaround for C78287 (complex(kind=4) data type). lin_32, lin_32e, win_32
808 // and win_32e are affected (I verified the asm). Compiler ignores the volatile
809 // qualifier of the temp_val in the OP_CMPXCHG macro. This is a problem of the
810 // compiler. Related tracker is C76005, targeted to 11.0. I verified the asm of
811 // the workaround.
812 #define OP_CMPXCHG_WORKAROUND(TYPE, BITS, OP) \
813  { \
814  struct _sss { \
815  TYPE cmp; \
816  kmp_int##BITS *vvv; \
817  }; \
818  struct _sss old_value, new_value; \
819  old_value.vvv = (kmp_int##BITS *)&old_value.cmp; \
820  new_value.vvv = (kmp_int##BITS *)&new_value.cmp; \
821  *old_value.vvv = *(volatile kmp_int##BITS *)lhs; \
822  new_value.cmp = (TYPE)(old_value.cmp OP rhs); \
823  while (!KMP_COMPARE_AND_STORE_ACQ##BITS( \
824  (kmp_int##BITS *)lhs, *VOLATILE_CAST(kmp_int##BITS *) old_value.vvv, \
825  *VOLATILE_CAST(kmp_int##BITS *) new_value.vvv)) { \
826  KMP_DO_PAUSE; \
827  \
828  *old_value.vvv = *(volatile kmp_int##BITS *)lhs; \
829  new_value.cmp = (TYPE)(old_value.cmp OP rhs); \
830  } \
831  }
832 // end of the first part of the workaround for C78287
833 #endif // USE_CMPXCHG_FIX
834 
835 #if KMP_OS_WINDOWS && KMP_ARCH_AARCH64
836 // Undo explicit type casts to get MSVC ARM64 to build. Uses
837 // OP_CMPXCHG_WORKAROUND definition for OP_CMPXCHG
838 #undef OP_CMPXCHG
839 #define OP_CMPXCHG(TYPE, BITS, OP) \
840  { \
841  struct _sss { \
842  TYPE cmp; \
843  kmp_int##BITS *vvv; \
844  }; \
845  struct _sss old_value, new_value; \
846  old_value.vvv = (kmp_int##BITS *)&old_value.cmp; \
847  new_value.vvv = (kmp_int##BITS *)&new_value.cmp; \
848  *old_value.vvv = *(volatile kmp_int##BITS *)lhs; \
849  new_value.cmp = old_value.cmp OP rhs; \
850  while (!KMP_COMPARE_AND_STORE_ACQ##BITS( \
851  (kmp_int##BITS *)lhs, *VOLATILE_CAST(kmp_int##BITS *) old_value.vvv, \
852  *VOLATILE_CAST(kmp_int##BITS *) new_value.vvv)) { \
853  KMP_DO_PAUSE; \
854  \
855  *old_value.vvv = *(volatile kmp_int##BITS *)lhs; \
856  new_value.cmp = old_value.cmp OP rhs; \
857  } \
858  }
859 
860 #undef OP_UPDATE_CRITICAL
861 #define OP_UPDATE_CRITICAL(TYPE, OP, LCK_ID) \
862  __kmp_acquire_atomic_lock(&ATOMIC_LOCK##LCK_ID, gtid); \
863  (*lhs) = (*lhs)OP rhs; \
864  __kmp_release_atomic_lock(&ATOMIC_LOCK##LCK_ID, gtid);
865 
866 #endif // KMP_OS_WINDOWS && KMP_ARCH_AARCH64
867 
868 #if KMP_ARCH_X86 || KMP_ARCH_X86_64
869 
870 // ------------------------------------------------------------------------
871 // X86 or X86_64: no alignment problems ====================================
872 #define ATOMIC_FIXED_ADD(TYPE_ID, OP_ID, TYPE, BITS, OP, LCK_ID, MASK, \
873  GOMP_FLAG) \
874  ATOMIC_BEGIN(TYPE_ID, OP_ID, TYPE, void) \
875  OP_UPDATE_GOMP_CRITICAL(TYPE, OP, GOMP_FLAG) \
876  /* OP used as a sign for subtraction: (lhs-rhs) --> (lhs+-rhs) */ \
877  KMP_TEST_THEN_ADD##BITS(lhs, OP rhs); \
878  }
879 // -------------------------------------------------------------------------
880 #define ATOMIC_CMPXCHG(TYPE_ID, OP_ID, TYPE, BITS, OP, LCK_ID, MASK, \
881  GOMP_FLAG) \
882  ATOMIC_BEGIN(TYPE_ID, OP_ID, TYPE, void) \
883  OP_UPDATE_GOMP_CRITICAL(TYPE, OP, GOMP_FLAG) \
884  OP_CMPXCHG(TYPE, BITS, OP) \
885  }
886 #if USE_CMPXCHG_FIX
887 // -------------------------------------------------------------------------
888 // workaround for C78287 (complex(kind=4) data type)
889 #define ATOMIC_CMPXCHG_WORKAROUND(TYPE_ID, OP_ID, TYPE, BITS, OP, LCK_ID, \
890  MASK, GOMP_FLAG) \
891  ATOMIC_BEGIN(TYPE_ID, OP_ID, TYPE, void) \
892  OP_UPDATE_GOMP_CRITICAL(TYPE, OP, GOMP_FLAG) \
893  OP_CMPXCHG_WORKAROUND(TYPE, BITS, OP) \
894  }
895 // end of the second part of the workaround for C78287
896 #endif // USE_CMPXCHG_FIX
897 
898 #else
899 // -------------------------------------------------------------------------
900 // Code for other architectures that don't handle unaligned accesses.
901 #define ATOMIC_FIXED_ADD(TYPE_ID, OP_ID, TYPE, BITS, OP, LCK_ID, MASK, \
902  GOMP_FLAG) \
903  ATOMIC_BEGIN(TYPE_ID, OP_ID, TYPE, void) \
904  OP_UPDATE_GOMP_CRITICAL(TYPE, OP, GOMP_FLAG) \
905  if (!((kmp_uintptr_t)lhs & 0x##MASK)) { \
906  /* OP used as a sign for subtraction: (lhs-rhs) --> (lhs+-rhs) */ \
907  KMP_TEST_THEN_ADD##BITS(lhs, OP rhs); \
908  } else { \
909  KMP_CHECK_GTID; \
910  OP_UPDATE_CRITICAL(TYPE, OP, \
911  LCK_ID) /* unaligned address - use critical */ \
912  } \
913  }
914 // -------------------------------------------------------------------------
915 #define ATOMIC_CMPXCHG(TYPE_ID, OP_ID, TYPE, BITS, OP, LCK_ID, MASK, \
916  GOMP_FLAG) \
917  ATOMIC_BEGIN(TYPE_ID, OP_ID, TYPE, void) \
918  OP_UPDATE_GOMP_CRITICAL(TYPE, OP, GOMP_FLAG) \
919  if (!((kmp_uintptr_t)lhs & 0x##MASK)) { \
920  OP_CMPXCHG(TYPE, BITS, OP) /* aligned address */ \
921  } else { \
922  KMP_CHECK_GTID; \
923  OP_UPDATE_CRITICAL(TYPE, OP, \
924  LCK_ID) /* unaligned address - use critical */ \
925  } \
926  }
927 #if USE_CMPXCHG_FIX
928 // -------------------------------------------------------------------------
929 // workaround for C78287 (complex(kind=4) data type)
930 #define ATOMIC_CMPXCHG_WORKAROUND(TYPE_ID, OP_ID, TYPE, BITS, OP, LCK_ID, \
931  MASK, GOMP_FLAG) \
932  ATOMIC_BEGIN(TYPE_ID, OP_ID, TYPE, void) \
933  OP_UPDATE_GOMP_CRITICAL(TYPE, OP, GOMP_FLAG) \
934  if (!((kmp_uintptr_t)lhs & 0x##MASK)) { \
935  OP_CMPXCHG(TYPE, BITS, OP) /* aligned address */ \
936  } else { \
937  KMP_CHECK_GTID; \
938  OP_UPDATE_CRITICAL(TYPE, OP, \
939  LCK_ID) /* unaligned address - use critical */ \
940  } \
941  }
942 // end of the second part of the workaround for C78287
943 #endif // USE_CMPXCHG_FIX
944 #endif /* KMP_ARCH_X86 || KMP_ARCH_X86_64 */
945 
946 // Routines for ATOMIC 4-byte operands addition and subtraction
947 ATOMIC_FIXED_ADD(fixed4, add, kmp_int32, 32, +, 4i, 3,
948  0) // __kmpc_atomic_fixed4_add
949 ATOMIC_FIXED_ADD(fixed4, sub, kmp_int32, 32, -, 4i, 3,
950  0) // __kmpc_atomic_fixed4_sub
951 
952 ATOMIC_CMPXCHG(float4, add, kmp_real32, 32, +, 4r, 3,
953  KMP_ARCH_X86) // __kmpc_atomic_float4_add
954 ATOMIC_CMPXCHG(float4, sub, kmp_real32, 32, -, 4r, 3,
955  KMP_ARCH_X86) // __kmpc_atomic_float4_sub
956 
957 // Routines for ATOMIC 8-byte operands addition and subtraction
958 ATOMIC_FIXED_ADD(fixed8, add, kmp_int64, 64, +, 8i, 7,
959  KMP_ARCH_X86) // __kmpc_atomic_fixed8_add
960 ATOMIC_FIXED_ADD(fixed8, sub, kmp_int64, 64, -, 8i, 7,
961  KMP_ARCH_X86) // __kmpc_atomic_fixed8_sub
962 
963 ATOMIC_CMPXCHG(float8, add, kmp_real64, 64, +, 8r, 7,
964  KMP_ARCH_X86) // __kmpc_atomic_float8_add
965 ATOMIC_CMPXCHG(float8, sub, kmp_real64, 64, -, 8r, 7,
966  KMP_ARCH_X86) // __kmpc_atomic_float8_sub
967 
968 // ------------------------------------------------------------------------
969 // Entries definition for integer operands
970 // TYPE_ID - operands type and size (fixed4, float4)
971 // OP_ID - operation identifier (add, sub, mul, ...)
972 // TYPE - operand type
973 // BITS - size in bits, used to distinguish low level calls
974 // OP - operator (used in critical section)
975 // LCK_ID - lock identifier, used to possibly distinguish lock variable
976 // MASK - used for alignment check
977 
978 // TYPE_ID,OP_ID, TYPE, BITS,OP,LCK_ID,MASK,GOMP_FLAG
979 // ------------------------------------------------------------------------
980 // Routines for ATOMIC integer operands, other operators
981 // ------------------------------------------------------------------------
982 // TYPE_ID,OP_ID, TYPE, OP, LCK_ID, GOMP_FLAG
983 ATOMIC_CMPXCHG(fixed1, add, kmp_int8, 8, +, 1i, 0,
984  KMP_ARCH_X86) // __kmpc_atomic_fixed1_add
985 ATOMIC_CMPXCHG(fixed1, andb, kmp_int8, 8, &, 1i, 0,
986  0) // __kmpc_atomic_fixed1_andb
987 ATOMIC_CMPXCHG(fixed1, div, kmp_int8, 8, /, 1i, 0,
988  KMP_ARCH_X86) // __kmpc_atomic_fixed1_div
989 ATOMIC_CMPXCHG(fixed1u, div, kmp_uint8, 8, /, 1i, 0,
990  KMP_ARCH_X86) // __kmpc_atomic_fixed1u_div
991 ATOMIC_CMPXCHG(fixed1, mul, kmp_int8, 8, *, 1i, 0,
992  KMP_ARCH_X86) // __kmpc_atomic_fixed1_mul
993 ATOMIC_CMPXCHG(fixed1, orb, kmp_int8, 8, |, 1i, 0,
994  0) // __kmpc_atomic_fixed1_orb
995 ATOMIC_CMPXCHG(fixed1, shl, kmp_int8, 8, <<, 1i, 0,
996  KMP_ARCH_X86) // __kmpc_atomic_fixed1_shl
997 ATOMIC_CMPXCHG(fixed1, shr, kmp_int8, 8, >>, 1i, 0,
998  KMP_ARCH_X86) // __kmpc_atomic_fixed1_shr
999 ATOMIC_CMPXCHG(fixed1u, shr, kmp_uint8, 8, >>, 1i, 0,
1000  KMP_ARCH_X86) // __kmpc_atomic_fixed1u_shr
1001 ATOMIC_CMPXCHG(fixed1, sub, kmp_int8, 8, -, 1i, 0,
1002  KMP_ARCH_X86) // __kmpc_atomic_fixed1_sub
1003 ATOMIC_CMPXCHG(fixed1, xor, kmp_int8, 8, ^, 1i, 0,
1004  0) // __kmpc_atomic_fixed1_xor
1005 ATOMIC_CMPXCHG(fixed2, add, kmp_int16, 16, +, 2i, 1,
1006  KMP_ARCH_X86) // __kmpc_atomic_fixed2_add
1007 ATOMIC_CMPXCHG(fixed2, andb, kmp_int16, 16, &, 2i, 1,
1008  0) // __kmpc_atomic_fixed2_andb
1009 ATOMIC_CMPXCHG(fixed2, div, kmp_int16, 16, /, 2i, 1,
1010  KMP_ARCH_X86) // __kmpc_atomic_fixed2_div
1011 ATOMIC_CMPXCHG(fixed2u, div, kmp_uint16, 16, /, 2i, 1,
1012  KMP_ARCH_X86) // __kmpc_atomic_fixed2u_div
1013 ATOMIC_CMPXCHG(fixed2, mul, kmp_int16, 16, *, 2i, 1,
1014  KMP_ARCH_X86) // __kmpc_atomic_fixed2_mul
1015 ATOMIC_CMPXCHG(fixed2, orb, kmp_int16, 16, |, 2i, 1,
1016  0) // __kmpc_atomic_fixed2_orb
1017 ATOMIC_CMPXCHG(fixed2, shl, kmp_int16, 16, <<, 2i, 1,
1018  KMP_ARCH_X86) // __kmpc_atomic_fixed2_shl
1019 ATOMIC_CMPXCHG(fixed2, shr, kmp_int16, 16, >>, 2i, 1,
1020  KMP_ARCH_X86) // __kmpc_atomic_fixed2_shr
1021 ATOMIC_CMPXCHG(fixed2u, shr, kmp_uint16, 16, >>, 2i, 1,
1022  KMP_ARCH_X86) // __kmpc_atomic_fixed2u_shr
1023 ATOMIC_CMPXCHG(fixed2, sub, kmp_int16, 16, -, 2i, 1,
1024  KMP_ARCH_X86) // __kmpc_atomic_fixed2_sub
1025 ATOMIC_CMPXCHG(fixed2, xor, kmp_int16, 16, ^, 2i, 1,
1026  0) // __kmpc_atomic_fixed2_xor
1027 ATOMIC_CMPXCHG(fixed4, andb, kmp_int32, 32, &, 4i, 3,
1028  0) // __kmpc_atomic_fixed4_andb
1029 ATOMIC_CMPXCHG(fixed4, div, kmp_int32, 32, /, 4i, 3,
1030  KMP_ARCH_X86) // __kmpc_atomic_fixed4_div
1031 ATOMIC_CMPXCHG(fixed4u, div, kmp_uint32, 32, /, 4i, 3,
1032  KMP_ARCH_X86) // __kmpc_atomic_fixed4u_div
1033 ATOMIC_CMPXCHG(fixed4, mul, kmp_int32, 32, *, 4i, 3,
1034  KMP_ARCH_X86) // __kmpc_atomic_fixed4_mul
1035 ATOMIC_CMPXCHG(fixed4, orb, kmp_int32, 32, |, 4i, 3,
1036  0) // __kmpc_atomic_fixed4_orb
1037 ATOMIC_CMPXCHG(fixed4, shl, kmp_int32, 32, <<, 4i, 3,
1038  KMP_ARCH_X86) // __kmpc_atomic_fixed4_shl
1039 ATOMIC_CMPXCHG(fixed4, shr, kmp_int32, 32, >>, 4i, 3,
1040  KMP_ARCH_X86) // __kmpc_atomic_fixed4_shr
1041 ATOMIC_CMPXCHG(fixed4u, shr, kmp_uint32, 32, >>, 4i, 3,
1042  KMP_ARCH_X86) // __kmpc_atomic_fixed4u_shr
1043 ATOMIC_CMPXCHG(fixed4, xor, kmp_int32, 32, ^, 4i, 3,
1044  0) // __kmpc_atomic_fixed4_xor
1045 ATOMIC_CMPXCHG(fixed8, andb, kmp_int64, 64, &, 8i, 7,
1046  KMP_ARCH_X86) // __kmpc_atomic_fixed8_andb
1047 ATOMIC_CMPXCHG(fixed8, div, kmp_int64, 64, /, 8i, 7,
1048  KMP_ARCH_X86) // __kmpc_atomic_fixed8_div
1049 ATOMIC_CMPXCHG(fixed8u, div, kmp_uint64, 64, /, 8i, 7,
1050  KMP_ARCH_X86) // __kmpc_atomic_fixed8u_div
1051 ATOMIC_CMPXCHG(fixed8, mul, kmp_int64, 64, *, 8i, 7,
1052  KMP_ARCH_X86) // __kmpc_atomic_fixed8_mul
1053 ATOMIC_CMPXCHG(fixed8, orb, kmp_int64, 64, |, 8i, 7,
1054  KMP_ARCH_X86) // __kmpc_atomic_fixed8_orb
1055 ATOMIC_CMPXCHG(fixed8, shl, kmp_int64, 64, <<, 8i, 7,
1056  KMP_ARCH_X86) // __kmpc_atomic_fixed8_shl
1057 ATOMIC_CMPXCHG(fixed8, shr, kmp_int64, 64, >>, 8i, 7,
1058  KMP_ARCH_X86) // __kmpc_atomic_fixed8_shr
1059 ATOMIC_CMPXCHG(fixed8u, shr, kmp_uint64, 64, >>, 8i, 7,
1060  KMP_ARCH_X86) // __kmpc_atomic_fixed8u_shr
1061 ATOMIC_CMPXCHG(fixed8, xor, kmp_int64, 64, ^, 8i, 7,
1062  KMP_ARCH_X86) // __kmpc_atomic_fixed8_xor
1063 ATOMIC_CMPXCHG(float4, div, kmp_real32, 32, /, 4r, 3,
1064  KMP_ARCH_X86) // __kmpc_atomic_float4_div
1065 ATOMIC_CMPXCHG(float4, mul, kmp_real32, 32, *, 4r, 3,
1066  KMP_ARCH_X86) // __kmpc_atomic_float4_mul
1067 ATOMIC_CMPXCHG(float8, div, kmp_real64, 64, /, 8r, 7,
1068  KMP_ARCH_X86) // __kmpc_atomic_float8_div
1069 ATOMIC_CMPXCHG(float8, mul, kmp_real64, 64, *, 8r, 7,
1070  KMP_ARCH_X86) // __kmpc_atomic_float8_mul
1071 // TYPE_ID,OP_ID, TYPE, OP, LCK_ID, GOMP_FLAG
1072 
1073 /* ------------------------------------------------------------------------ */
1074 /* Routines for C/C++ Reduction operators && and || */
1075 
1076 // ------------------------------------------------------------------------
1077 // Need separate macros for &&, || because there is no combined assignment
1078 // TODO: eliminate ATOMIC_CRIT_{L,EQV} macros as not used
1079 #define ATOMIC_CRIT_L(TYPE_ID, OP_ID, TYPE, OP, LCK_ID, GOMP_FLAG) \
1080  ATOMIC_BEGIN(TYPE_ID, OP_ID, TYPE, void) \
1081  OP_GOMP_CRITICAL(= *lhs OP, GOMP_FLAG) \
1082  OP_CRITICAL(= *lhs OP, LCK_ID) \
1083  }
1084 
1085 #if KMP_ARCH_X86 || KMP_ARCH_X86_64
1086 
1087 // ------------------------------------------------------------------------
1088 // X86 or X86_64: no alignment problems ===================================
1089 #define ATOMIC_CMPX_L(TYPE_ID, OP_ID, TYPE, BITS, OP, LCK_ID, MASK, GOMP_FLAG) \
1090  ATOMIC_BEGIN(TYPE_ID, OP_ID, TYPE, void) \
1091  OP_GOMP_CRITICAL(= *lhs OP, GOMP_FLAG) \
1092  OP_CMPXCHG(TYPE, BITS, OP) \
1093  }
1094 
1095 #else
1096 // ------------------------------------------------------------------------
1097 // Code for other architectures that don't handle unaligned accesses.
1098 #define ATOMIC_CMPX_L(TYPE_ID, OP_ID, TYPE, BITS, OP, LCK_ID, MASK, GOMP_FLAG) \
1099  ATOMIC_BEGIN(TYPE_ID, OP_ID, TYPE, void) \
1100  OP_GOMP_CRITICAL(= *lhs OP, GOMP_FLAG) \
1101  if (!((kmp_uintptr_t)lhs & 0x##MASK)) { \
1102  OP_CMPXCHG(TYPE, BITS, OP) /* aligned address */ \
1103  } else { \
1104  KMP_CHECK_GTID; \
1105  OP_CRITICAL(= *lhs OP, LCK_ID) /* unaligned - use critical */ \
1106  } \
1107  }
1108 #endif /* KMP_ARCH_X86 || KMP_ARCH_X86_64 */
1109 
1110 ATOMIC_CMPX_L(fixed1, andl, char, 8, &&, 1i, 0,
1111  KMP_ARCH_X86) // __kmpc_atomic_fixed1_andl
1112 ATOMIC_CMPX_L(fixed1, orl, char, 8, ||, 1i, 0,
1113  KMP_ARCH_X86) // __kmpc_atomic_fixed1_orl
1114 ATOMIC_CMPX_L(fixed2, andl, short, 16, &&, 2i, 1,
1115  KMP_ARCH_X86) // __kmpc_atomic_fixed2_andl
1116 ATOMIC_CMPX_L(fixed2, orl, short, 16, ||, 2i, 1,
1117  KMP_ARCH_X86) // __kmpc_atomic_fixed2_orl
1118 ATOMIC_CMPX_L(fixed4, andl, kmp_int32, 32, &&, 4i, 3,
1119  0) // __kmpc_atomic_fixed4_andl
1120 ATOMIC_CMPX_L(fixed4, orl, kmp_int32, 32, ||, 4i, 3,
1121  0) // __kmpc_atomic_fixed4_orl
1122 ATOMIC_CMPX_L(fixed8, andl, kmp_int64, 64, &&, 8i, 7,
1123  KMP_ARCH_X86) // __kmpc_atomic_fixed8_andl
1124 ATOMIC_CMPX_L(fixed8, orl, kmp_int64, 64, ||, 8i, 7,
1125  KMP_ARCH_X86) // __kmpc_atomic_fixed8_orl
1126 
1127 /* ------------------------------------------------------------------------- */
1128 /* Routines for Fortran operators that matched no one in C: */
1129 /* MAX, MIN, .EQV., .NEQV. */
1130 /* Operators .AND., .OR. are covered by __kmpc_atomic_*_{andl,orl} */
1131 /* Intrinsics IAND, IOR, IEOR are covered by __kmpc_atomic_*_{andb,orb,xor} */
1132 
1133 // -------------------------------------------------------------------------
1134 // MIN and MAX need separate macros
1135 // OP - operator to check if we need any actions?
1136 #define MIN_MAX_CRITSECT(OP, LCK_ID) \
1137  __kmp_acquire_atomic_lock(&ATOMIC_LOCK##LCK_ID, gtid); \
1138  \
1139  if (*lhs OP rhs) { /* still need actions? */ \
1140  *lhs = rhs; \
1141  } \
1142  __kmp_release_atomic_lock(&ATOMIC_LOCK##LCK_ID, gtid);
1143 
1144 // -------------------------------------------------------------------------
1145 #ifdef KMP_GOMP_COMPAT
1146 #define GOMP_MIN_MAX_CRITSECT(OP, FLAG) \
1147  if ((FLAG) && (__kmp_atomic_mode == 2)) { \
1148  KMP_CHECK_GTID; \
1149  MIN_MAX_CRITSECT(OP, 0); \
1150  return; \
1151  }
1152 #else
1153 #define GOMP_MIN_MAX_CRITSECT(OP, FLAG)
1154 #endif /* KMP_GOMP_COMPAT */
1155 
1156 // -------------------------------------------------------------------------
1157 #define MIN_MAX_CMPXCHG(TYPE, BITS, OP) \
1158  { \
1159  TYPE KMP_ATOMIC_VOLATILE temp_val; \
1160  TYPE old_value; \
1161  temp_val = *lhs; \
1162  old_value = temp_val; \
1163  while (old_value OP rhs && /* still need actions? */ \
1164  !KMP_COMPARE_AND_STORE_ACQ##BITS( \
1165  (kmp_int##BITS *)lhs, \
1166  *VOLATILE_CAST(kmp_int##BITS *) & old_value, \
1167  *VOLATILE_CAST(kmp_int##BITS *) & rhs)) { \
1168  temp_val = *lhs; \
1169  old_value = temp_val; \
1170  } \
1171  }
1172 
1173 // -------------------------------------------------------------------------
1174 // 1-byte, 2-byte operands - use critical section
1175 #define MIN_MAX_CRITICAL(TYPE_ID, OP_ID, TYPE, OP, LCK_ID, GOMP_FLAG) \
1176  ATOMIC_BEGIN(TYPE_ID, OP_ID, TYPE, void) \
1177  if (*lhs OP rhs) { /* need actions? */ \
1178  GOMP_MIN_MAX_CRITSECT(OP, GOMP_FLAG) \
1179  MIN_MAX_CRITSECT(OP, LCK_ID) \
1180  } \
1181  }
1182 
1183 #if KMP_ARCH_X86 || KMP_ARCH_X86_64
1184 
1185 // -------------------------------------------------------------------------
1186 // X86 or X86_64: no alignment problems ====================================
1187 #define MIN_MAX_COMPXCHG(TYPE_ID, OP_ID, TYPE, BITS, OP, LCK_ID, MASK, \
1188  GOMP_FLAG) \
1189  ATOMIC_BEGIN(TYPE_ID, OP_ID, TYPE, void) \
1190  if (*lhs OP rhs) { \
1191  GOMP_MIN_MAX_CRITSECT(OP, GOMP_FLAG) \
1192  MIN_MAX_CMPXCHG(TYPE, BITS, OP) \
1193  } \
1194  }
1195 
1196 #else
1197 // -------------------------------------------------------------------------
1198 // Code for other architectures that don't handle unaligned accesses.
1199 #define MIN_MAX_COMPXCHG(TYPE_ID, OP_ID, TYPE, BITS, OP, LCK_ID, MASK, \
1200  GOMP_FLAG) \
1201  ATOMIC_BEGIN(TYPE_ID, OP_ID, TYPE, void) \
1202  if (*lhs OP rhs) { \
1203  GOMP_MIN_MAX_CRITSECT(OP, GOMP_FLAG) \
1204  if (!((kmp_uintptr_t)lhs & 0x##MASK)) { \
1205  MIN_MAX_CMPXCHG(TYPE, BITS, OP) /* aligned address */ \
1206  } else { \
1207  KMP_CHECK_GTID; \
1208  MIN_MAX_CRITSECT(OP, LCK_ID) /* unaligned address */ \
1209  } \
1210  } \
1211  }
1212 #endif /* KMP_ARCH_X86 || KMP_ARCH_X86_64 */
1213 
1214 MIN_MAX_COMPXCHG(fixed1, max, char, 8, <, 1i, 0,
1215  KMP_ARCH_X86) // __kmpc_atomic_fixed1_max
1216 MIN_MAX_COMPXCHG(fixed1, min, char, 8, >, 1i, 0,
1217  KMP_ARCH_X86) // __kmpc_atomic_fixed1_min
1218 MIN_MAX_COMPXCHG(fixed2, max, short, 16, <, 2i, 1,
1219  KMP_ARCH_X86) // __kmpc_atomic_fixed2_max
1220 MIN_MAX_COMPXCHG(fixed2, min, short, 16, >, 2i, 1,
1221  KMP_ARCH_X86) // __kmpc_atomic_fixed2_min
1222 MIN_MAX_COMPXCHG(fixed4, max, kmp_int32, 32, <, 4i, 3,
1223  0) // __kmpc_atomic_fixed4_max
1224 MIN_MAX_COMPXCHG(fixed4, min, kmp_int32, 32, >, 4i, 3,
1225  0) // __kmpc_atomic_fixed4_min
1226 MIN_MAX_COMPXCHG(fixed8, max, kmp_int64, 64, <, 8i, 7,
1227  KMP_ARCH_X86) // __kmpc_atomic_fixed8_max
1228 MIN_MAX_COMPXCHG(fixed8, min, kmp_int64, 64, >, 8i, 7,
1229  KMP_ARCH_X86) // __kmpc_atomic_fixed8_min
1230 MIN_MAX_COMPXCHG(float4, max, kmp_real32, 32, <, 4r, 3,
1231  KMP_ARCH_X86) // __kmpc_atomic_float4_max
1232 MIN_MAX_COMPXCHG(float4, min, kmp_real32, 32, >, 4r, 3,
1233  KMP_ARCH_X86) // __kmpc_atomic_float4_min
1234 MIN_MAX_COMPXCHG(float8, max, kmp_real64, 64, <, 8r, 7,
1235  KMP_ARCH_X86) // __kmpc_atomic_float8_max
1236 MIN_MAX_COMPXCHG(float8, min, kmp_real64, 64, >, 8r, 7,
1237  KMP_ARCH_X86) // __kmpc_atomic_float8_min
1238 MIN_MAX_CRITICAL(float10, max, long double, <, 10r,
1239  1) // __kmpc_atomic_float10_max
1240 MIN_MAX_CRITICAL(float10, min, long double, >, 10r,
1241  1) // __kmpc_atomic_float10_min
1242 #if KMP_HAVE_QUAD
1243 MIN_MAX_CRITICAL(float16, max, QUAD_LEGACY, <, 16r,
1244  1) // __kmpc_atomic_float16_max
1245 MIN_MAX_CRITICAL(float16, min, QUAD_LEGACY, >, 16r,
1246  1) // __kmpc_atomic_float16_min
1247 #if (KMP_ARCH_X86)
1248 MIN_MAX_CRITICAL(float16, max_a16, Quad_a16_t, <, 16r,
1249  1) // __kmpc_atomic_float16_max_a16
1250 MIN_MAX_CRITICAL(float16, min_a16, Quad_a16_t, >, 16r,
1251  1) // __kmpc_atomic_float16_min_a16
1252 #endif // (KMP_ARCH_X86)
1253 #endif // KMP_HAVE_QUAD
1254 // ------------------------------------------------------------------------
1255 // Need separate macros for .EQV. because of the need of complement (~)
1256 // OP ignored for critical sections, ^=~ used instead
1257 #define ATOMIC_CRIT_EQV(TYPE_ID, OP_ID, TYPE, OP, LCK_ID, GOMP_FLAG) \
1258  ATOMIC_BEGIN(TYPE_ID, OP_ID, TYPE, void) \
1259  OP_GOMP_CRITICAL(^= (TYPE) ~, GOMP_FLAG) /* send assignment */ \
1260  OP_CRITICAL(^= (TYPE) ~, LCK_ID) /* send assignment and complement */ \
1261  }
1262 
1263 // ------------------------------------------------------------------------
1264 #if KMP_ARCH_X86 || KMP_ARCH_X86_64
1265 // ------------------------------------------------------------------------
1266 // X86 or X86_64: no alignment problems ===================================
1267 #define ATOMIC_CMPX_EQV(TYPE_ID, OP_ID, TYPE, BITS, OP, LCK_ID, MASK, \
1268  GOMP_FLAG) \
1269  ATOMIC_BEGIN(TYPE_ID, OP_ID, TYPE, void) \
1270  OP_GOMP_CRITICAL(^= (TYPE) ~, GOMP_FLAG) /* send assignment */ \
1271  OP_CMPXCHG(TYPE, BITS, OP) \
1272  }
1273 // ------------------------------------------------------------------------
1274 #else
1275 // ------------------------------------------------------------------------
1276 // Code for other architectures that don't handle unaligned accesses.
1277 #define ATOMIC_CMPX_EQV(TYPE_ID, OP_ID, TYPE, BITS, OP, LCK_ID, MASK, \
1278  GOMP_FLAG) \
1279  ATOMIC_BEGIN(TYPE_ID, OP_ID, TYPE, void) \
1280  OP_GOMP_CRITICAL(^= (TYPE) ~, GOMP_FLAG) \
1281  if (!((kmp_uintptr_t)lhs & 0x##MASK)) { \
1282  OP_CMPXCHG(TYPE, BITS, OP) /* aligned address */ \
1283  } else { \
1284  KMP_CHECK_GTID; \
1285  OP_CRITICAL(^= (TYPE) ~, LCK_ID) /* unaligned address - use critical */ \
1286  } \
1287  }
1288 #endif /* KMP_ARCH_X86 || KMP_ARCH_X86_64 */
1289 
1290 ATOMIC_CMPXCHG(fixed1, neqv, kmp_int8, 8, ^, 1i, 0,
1291  KMP_ARCH_X86) // __kmpc_atomic_fixed1_neqv
1292 ATOMIC_CMPXCHG(fixed2, neqv, kmp_int16, 16, ^, 2i, 1,
1293  KMP_ARCH_X86) // __kmpc_atomic_fixed2_neqv
1294 ATOMIC_CMPXCHG(fixed4, neqv, kmp_int32, 32, ^, 4i, 3,
1295  KMP_ARCH_X86) // __kmpc_atomic_fixed4_neqv
1296 ATOMIC_CMPXCHG(fixed8, neqv, kmp_int64, 64, ^, 8i, 7,
1297  KMP_ARCH_X86) // __kmpc_atomic_fixed8_neqv
1298 ATOMIC_CMPX_EQV(fixed1, eqv, kmp_int8, 8, ^~, 1i, 0,
1299  KMP_ARCH_X86) // __kmpc_atomic_fixed1_eqv
1300 ATOMIC_CMPX_EQV(fixed2, eqv, kmp_int16, 16, ^~, 2i, 1,
1301  KMP_ARCH_X86) // __kmpc_atomic_fixed2_eqv
1302 ATOMIC_CMPX_EQV(fixed4, eqv, kmp_int32, 32, ^~, 4i, 3,
1303  KMP_ARCH_X86) // __kmpc_atomic_fixed4_eqv
1304 ATOMIC_CMPX_EQV(fixed8, eqv, kmp_int64, 64, ^~, 8i, 7,
1305  KMP_ARCH_X86) // __kmpc_atomic_fixed8_eqv
1306 
1307 // ------------------------------------------------------------------------
1308 // Routines for Extended types: long double, _Quad, complex flavours (use
1309 // critical section)
1310 // TYPE_ID, OP_ID, TYPE - detailed above
1311 // OP - operator
1312 // LCK_ID - lock identifier, used to possibly distinguish lock variable
1313 #define ATOMIC_CRITICAL(TYPE_ID, OP_ID, TYPE, OP, LCK_ID, GOMP_FLAG) \
1314  ATOMIC_BEGIN(TYPE_ID, OP_ID, TYPE, void) \
1315  OP_UPDATE_GOMP_CRITICAL(TYPE, OP, GOMP_FLAG) /* send assignment */ \
1316  OP_UPDATE_CRITICAL(TYPE, OP, LCK_ID) /* send assignment */ \
1317  }
1318 
1319 /* ------------------------------------------------------------------------- */
1320 // routines for long double type
1321 ATOMIC_CRITICAL(float10, add, long double, +, 10r,
1322  1) // __kmpc_atomic_float10_add
1323 ATOMIC_CRITICAL(float10, sub, long double, -, 10r,
1324  1) // __kmpc_atomic_float10_sub
1325 ATOMIC_CRITICAL(float10, mul, long double, *, 10r,
1326  1) // __kmpc_atomic_float10_mul
1327 ATOMIC_CRITICAL(float10, div, long double, /, 10r,
1328  1) // __kmpc_atomic_float10_div
1329 #if KMP_HAVE_QUAD
1330 // routines for _Quad type
1331 ATOMIC_CRITICAL(float16, add, QUAD_LEGACY, +, 16r,
1332  1) // __kmpc_atomic_float16_add
1333 ATOMIC_CRITICAL(float16, sub, QUAD_LEGACY, -, 16r,
1334  1) // __kmpc_atomic_float16_sub
1335 ATOMIC_CRITICAL(float16, mul, QUAD_LEGACY, *, 16r,
1336  1) // __kmpc_atomic_float16_mul
1337 ATOMIC_CRITICAL(float16, div, QUAD_LEGACY, /, 16r,
1338  1) // __kmpc_atomic_float16_div
1339 #if (KMP_ARCH_X86)
1340 ATOMIC_CRITICAL(float16, add_a16, Quad_a16_t, +, 16r,
1341  1) // __kmpc_atomic_float16_add_a16
1342 ATOMIC_CRITICAL(float16, sub_a16, Quad_a16_t, -, 16r,
1343  1) // __kmpc_atomic_float16_sub_a16
1344 ATOMIC_CRITICAL(float16, mul_a16, Quad_a16_t, *, 16r,
1345  1) // __kmpc_atomic_float16_mul_a16
1346 ATOMIC_CRITICAL(float16, div_a16, Quad_a16_t, /, 16r,
1347  1) // __kmpc_atomic_float16_div_a16
1348 #endif // (KMP_ARCH_X86)
1349 #endif // KMP_HAVE_QUAD
1350 // routines for complex types
1351 
1352 #if USE_CMPXCHG_FIX
1353 // workaround for C78287 (complex(kind=4) data type)
1354 ATOMIC_CMPXCHG_WORKAROUND(cmplx4, add, kmp_cmplx32, 64, +, 8c, 7,
1355  1) // __kmpc_atomic_cmplx4_add
1356 ATOMIC_CMPXCHG_WORKAROUND(cmplx4, sub, kmp_cmplx32, 64, -, 8c, 7,
1357  1) // __kmpc_atomic_cmplx4_sub
1358 ATOMIC_CMPXCHG_WORKAROUND(cmplx4, mul, kmp_cmplx32, 64, *, 8c, 7,
1359  1) // __kmpc_atomic_cmplx4_mul
1360 ATOMIC_CMPXCHG_WORKAROUND(cmplx4, div, kmp_cmplx32, 64, /, 8c, 7,
1361  1) // __kmpc_atomic_cmplx4_div
1362 // end of the workaround for C78287
1363 #else
1364 ATOMIC_CRITICAL(cmplx4, add, kmp_cmplx32, +, 8c, 1) // __kmpc_atomic_cmplx4_add
1365 ATOMIC_CRITICAL(cmplx4, sub, kmp_cmplx32, -, 8c, 1) // __kmpc_atomic_cmplx4_sub
1366 ATOMIC_CRITICAL(cmplx4, mul, kmp_cmplx32, *, 8c, 1) // __kmpc_atomic_cmplx4_mul
1367 ATOMIC_CRITICAL(cmplx4, div, kmp_cmplx32, /, 8c, 1) // __kmpc_atomic_cmplx4_div
1368 #endif // USE_CMPXCHG_FIX
1369 
1370 ATOMIC_CRITICAL(cmplx8, add, kmp_cmplx64, +, 16c, 1) // __kmpc_atomic_cmplx8_add
1371 ATOMIC_CRITICAL(cmplx8, sub, kmp_cmplx64, -, 16c, 1) // __kmpc_atomic_cmplx8_sub
1372 ATOMIC_CRITICAL(cmplx8, mul, kmp_cmplx64, *, 16c, 1) // __kmpc_atomic_cmplx8_mul
1373 ATOMIC_CRITICAL(cmplx8, div, kmp_cmplx64, /, 16c, 1) // __kmpc_atomic_cmplx8_div
1374 ATOMIC_CRITICAL(cmplx10, add, kmp_cmplx80, +, 20c,
1375  1) // __kmpc_atomic_cmplx10_add
1376 ATOMIC_CRITICAL(cmplx10, sub, kmp_cmplx80, -, 20c,
1377  1) // __kmpc_atomic_cmplx10_sub
1378 ATOMIC_CRITICAL(cmplx10, mul, kmp_cmplx80, *, 20c,
1379  1) // __kmpc_atomic_cmplx10_mul
1380 ATOMIC_CRITICAL(cmplx10, div, kmp_cmplx80, /, 20c,
1381  1) // __kmpc_atomic_cmplx10_div
1382 #if KMP_HAVE_QUAD
1383 ATOMIC_CRITICAL(cmplx16, add, CPLX128_LEG, +, 32c,
1384  1) // __kmpc_atomic_cmplx16_add
1385 ATOMIC_CRITICAL(cmplx16, sub, CPLX128_LEG, -, 32c,
1386  1) // __kmpc_atomic_cmplx16_sub
1387 ATOMIC_CRITICAL(cmplx16, mul, CPLX128_LEG, *, 32c,
1388  1) // __kmpc_atomic_cmplx16_mul
1389 ATOMIC_CRITICAL(cmplx16, div, CPLX128_LEG, /, 32c,
1390  1) // __kmpc_atomic_cmplx16_div
1391 #if (KMP_ARCH_X86)
1392 ATOMIC_CRITICAL(cmplx16, add_a16, kmp_cmplx128_a16_t, +, 32c,
1393  1) // __kmpc_atomic_cmplx16_add_a16
1394 ATOMIC_CRITICAL(cmplx16, sub_a16, kmp_cmplx128_a16_t, -, 32c,
1395  1) // __kmpc_atomic_cmplx16_sub_a16
1396 ATOMIC_CRITICAL(cmplx16, mul_a16, kmp_cmplx128_a16_t, *, 32c,
1397  1) // __kmpc_atomic_cmplx16_mul_a16
1398 ATOMIC_CRITICAL(cmplx16, div_a16, kmp_cmplx128_a16_t, /, 32c,
1399  1) // __kmpc_atomic_cmplx16_div_a16
1400 #endif // (KMP_ARCH_X86)
1401 #endif // KMP_HAVE_QUAD
1402 
1403 // OpenMP 4.0: x = expr binop x for non-commutative operations.
1404 // Supported only on IA-32 architecture and Intel(R) 64
1405 #if KMP_ARCH_X86 || KMP_ARCH_X86_64
1406 
1407 // ------------------------------------------------------------------------
1408 // Operation on *lhs, rhs bound by critical section
1409 // OP - operator (it's supposed to contain an assignment)
1410 // LCK_ID - lock identifier
1411 // Note: don't check gtid as it should always be valid
1412 // 1, 2-byte - expect valid parameter, other - check before this macro
1413 #define OP_CRITICAL_REV(TYPE, OP, LCK_ID) \
1414  __kmp_acquire_atomic_lock(&ATOMIC_LOCK##LCK_ID, gtid); \
1415  \
1416  (*lhs) = (TYPE)((rhs)OP(*lhs)); \
1417  \
1418  __kmp_release_atomic_lock(&ATOMIC_LOCK##LCK_ID, gtid);
1419 
1420 #ifdef KMP_GOMP_COMPAT
1421 #define OP_GOMP_CRITICAL_REV(TYPE, OP, FLAG) \
1422  if ((FLAG) && (__kmp_atomic_mode == 2)) { \
1423  KMP_CHECK_GTID; \
1424  OP_CRITICAL_REV(TYPE, OP, 0); \
1425  return; \
1426  }
1427 
1428 #else
1429 #define OP_GOMP_CRITICAL_REV(TYPE, OP, FLAG)
1430 #endif /* KMP_GOMP_COMPAT */
1431 
1432 // Beginning of a definition (provides name, parameters, gebug trace)
1433 // TYPE_ID - operands type and size (fixed*, fixed*u for signed, unsigned
1434 // fixed)
1435 // OP_ID - operation identifier (add, sub, mul, ...)
1436 // TYPE - operands' type
1437 #define ATOMIC_BEGIN_REV(TYPE_ID, OP_ID, TYPE, RET_TYPE) \
1438  RET_TYPE __kmpc_atomic_##TYPE_ID##_##OP_ID##_rev(ident_t *id_ref, int gtid, \
1439  TYPE *lhs, TYPE rhs) { \
1440  KMP_DEBUG_ASSERT(__kmp_init_serial); \
1441  KA_TRACE(100, ("__kmpc_atomic_" #TYPE_ID "_" #OP_ID "_rev: T#%d\n", gtid));
1442 
1443 // ------------------------------------------------------------------------
1444 // Operation on *lhs, rhs using "compare_and_store" routine
1445 // TYPE - operands' type
1446 // BITS - size in bits, used to distinguish low level calls
1447 // OP - operator
1448 // Note: temp_val introduced in order to force the compiler to read
1449 // *lhs only once (w/o it the compiler reads *lhs twice)
1450 #define OP_CMPXCHG_REV(TYPE, BITS, OP) \
1451  { \
1452  TYPE KMP_ATOMIC_VOLATILE temp_val; \
1453  TYPE old_value, new_value; \
1454  temp_val = *lhs; \
1455  old_value = temp_val; \
1456  new_value = (TYPE)(rhs OP old_value); \
1457  while (!KMP_COMPARE_AND_STORE_ACQ##BITS( \
1458  (kmp_int##BITS *)lhs, *VOLATILE_CAST(kmp_int##BITS *) & old_value, \
1459  *VOLATILE_CAST(kmp_int##BITS *) & new_value)) { \
1460  KMP_DO_PAUSE; \
1461  \
1462  temp_val = *lhs; \
1463  old_value = temp_val; \
1464  new_value = (TYPE)(rhs OP old_value); \
1465  } \
1466  }
1467 
1468 // -------------------------------------------------------------------------
1469 #define ATOMIC_CMPXCHG_REV(TYPE_ID, OP_ID, TYPE, BITS, OP, LCK_ID, GOMP_FLAG) \
1470  ATOMIC_BEGIN_REV(TYPE_ID, OP_ID, TYPE, void) \
1471  OP_GOMP_CRITICAL_REV(TYPE, OP, GOMP_FLAG) \
1472  OP_CMPXCHG_REV(TYPE, BITS, OP) \
1473  }
1474 
1475 // ------------------------------------------------------------------------
1476 // Entries definition for integer operands
1477 // TYPE_ID - operands type and size (fixed4, float4)
1478 // OP_ID - operation identifier (add, sub, mul, ...)
1479 // TYPE - operand type
1480 // BITS - size in bits, used to distinguish low level calls
1481 // OP - operator (used in critical section)
1482 // LCK_ID - lock identifier, used to possibly distinguish lock variable
1483 
1484 // TYPE_ID,OP_ID, TYPE, BITS,OP,LCK_ID,GOMP_FLAG
1485 // ------------------------------------------------------------------------
1486 // Routines for ATOMIC integer operands, other operators
1487 // ------------------------------------------------------------------------
1488 // TYPE_ID,OP_ID, TYPE, BITS, OP, LCK_ID, GOMP_FLAG
1489 ATOMIC_CMPXCHG_REV(fixed1, div, kmp_int8, 8, /, 1i,
1490  KMP_ARCH_X86) // __kmpc_atomic_fixed1_div_rev
1491 ATOMIC_CMPXCHG_REV(fixed1u, div, kmp_uint8, 8, /, 1i,
1492  KMP_ARCH_X86) // __kmpc_atomic_fixed1u_div_rev
1493 ATOMIC_CMPXCHG_REV(fixed1, shl, kmp_int8, 8, <<, 1i,
1494  KMP_ARCH_X86) // __kmpc_atomic_fixed1_shl_rev
1495 ATOMIC_CMPXCHG_REV(fixed1, shr, kmp_int8, 8, >>, 1i,
1496  KMP_ARCH_X86) // __kmpc_atomic_fixed1_shr_rev
1497 ATOMIC_CMPXCHG_REV(fixed1u, shr, kmp_uint8, 8, >>, 1i,
1498  KMP_ARCH_X86) // __kmpc_atomic_fixed1u_shr_rev
1499 ATOMIC_CMPXCHG_REV(fixed1, sub, kmp_int8, 8, -, 1i,
1500  KMP_ARCH_X86) // __kmpc_atomic_fixed1_sub_rev
1501 
1502 ATOMIC_CMPXCHG_REV(fixed2, div, kmp_int16, 16, /, 2i,
1503  KMP_ARCH_X86) // __kmpc_atomic_fixed2_div_rev
1504 ATOMIC_CMPXCHG_REV(fixed2u, div, kmp_uint16, 16, /, 2i,
1505  KMP_ARCH_X86) // __kmpc_atomic_fixed2u_div_rev
1506 ATOMIC_CMPXCHG_REV(fixed2, shl, kmp_int16, 16, <<, 2i,
1507  KMP_ARCH_X86) // __kmpc_atomic_fixed2_shl_rev
1508 ATOMIC_CMPXCHG_REV(fixed2, shr, kmp_int16, 16, >>, 2i,
1509  KMP_ARCH_X86) // __kmpc_atomic_fixed2_shr_rev
1510 ATOMIC_CMPXCHG_REV(fixed2u, shr, kmp_uint16, 16, >>, 2i,
1511  KMP_ARCH_X86) // __kmpc_atomic_fixed2u_shr_rev
1512 ATOMIC_CMPXCHG_REV(fixed2, sub, kmp_int16, 16, -, 2i,
1513  KMP_ARCH_X86) // __kmpc_atomic_fixed2_sub_rev
1514 
1515 ATOMIC_CMPXCHG_REV(fixed4, div, kmp_int32, 32, /, 4i,
1516  KMP_ARCH_X86) // __kmpc_atomic_fixed4_div_rev
1517 ATOMIC_CMPXCHG_REV(fixed4u, div, kmp_uint32, 32, /, 4i,
1518  KMP_ARCH_X86) // __kmpc_atomic_fixed4u_div_rev
1519 ATOMIC_CMPXCHG_REV(fixed4, shl, kmp_int32, 32, <<, 4i,
1520  KMP_ARCH_X86) // __kmpc_atomic_fixed4_shl_rev
1521 ATOMIC_CMPXCHG_REV(fixed4, shr, kmp_int32, 32, >>, 4i,
1522  KMP_ARCH_X86) // __kmpc_atomic_fixed4_shr_rev
1523 ATOMIC_CMPXCHG_REV(fixed4u, shr, kmp_uint32, 32, >>, 4i,
1524  KMP_ARCH_X86) // __kmpc_atomic_fixed4u_shr_rev
1525 ATOMIC_CMPXCHG_REV(fixed4, sub, kmp_int32, 32, -, 4i,
1526  KMP_ARCH_X86) // __kmpc_atomic_fixed4_sub_rev
1527 
1528 ATOMIC_CMPXCHG_REV(fixed8, div, kmp_int64, 64, /, 8i,
1529  KMP_ARCH_X86) // __kmpc_atomic_fixed8_div_rev
1530 ATOMIC_CMPXCHG_REV(fixed8u, div, kmp_uint64, 64, /, 8i,
1531  KMP_ARCH_X86) // __kmpc_atomic_fixed8u_div_rev
1532 ATOMIC_CMPXCHG_REV(fixed8, shl, kmp_int64, 64, <<, 8i,
1533  KMP_ARCH_X86) // __kmpc_atomic_fixed8_shl_rev
1534 ATOMIC_CMPXCHG_REV(fixed8, shr, kmp_int64, 64, >>, 8i,
1535  KMP_ARCH_X86) // __kmpc_atomic_fixed8_shr_rev
1536 ATOMIC_CMPXCHG_REV(fixed8u, shr, kmp_uint64, 64, >>, 8i,
1537  KMP_ARCH_X86) // __kmpc_atomic_fixed8u_shr_rev
1538 ATOMIC_CMPXCHG_REV(fixed8, sub, kmp_int64, 64, -, 8i,
1539  KMP_ARCH_X86) // __kmpc_atomic_fixed8_sub_rev
1540 
1541 ATOMIC_CMPXCHG_REV(float4, div, kmp_real32, 32, /, 4r,
1542  KMP_ARCH_X86) // __kmpc_atomic_float4_div_rev
1543 ATOMIC_CMPXCHG_REV(float4, sub, kmp_real32, 32, -, 4r,
1544  KMP_ARCH_X86) // __kmpc_atomic_float4_sub_rev
1545 
1546 ATOMIC_CMPXCHG_REV(float8, div, kmp_real64, 64, /, 8r,
1547  KMP_ARCH_X86) // __kmpc_atomic_float8_div_rev
1548 ATOMIC_CMPXCHG_REV(float8, sub, kmp_real64, 64, -, 8r,
1549  KMP_ARCH_X86) // __kmpc_atomic_float8_sub_rev
1550 // TYPE_ID,OP_ID, TYPE, BITS,OP,LCK_ID, GOMP_FLAG
1551 
1552 // ------------------------------------------------------------------------
1553 // Routines for Extended types: long double, _Quad, complex flavours (use
1554 // critical section)
1555 // TYPE_ID, OP_ID, TYPE - detailed above
1556 // OP - operator
1557 // LCK_ID - lock identifier, used to possibly distinguish lock variable
1558 #define ATOMIC_CRITICAL_REV(TYPE_ID, OP_ID, TYPE, OP, LCK_ID, GOMP_FLAG) \
1559  ATOMIC_BEGIN_REV(TYPE_ID, OP_ID, TYPE, void) \
1560  OP_GOMP_CRITICAL_REV(TYPE, OP, GOMP_FLAG) \
1561  OP_CRITICAL_REV(TYPE, OP, LCK_ID) \
1562  }
1563 
1564 /* ------------------------------------------------------------------------- */
1565 // routines for long double type
1566 ATOMIC_CRITICAL_REV(float10, sub, long double, -, 10r,
1567  1) // __kmpc_atomic_float10_sub_rev
1568 ATOMIC_CRITICAL_REV(float10, div, long double, /, 10r,
1569  1) // __kmpc_atomic_float10_div_rev
1570 #if KMP_HAVE_QUAD
1571 // routines for _Quad type
1572 ATOMIC_CRITICAL_REV(float16, sub, QUAD_LEGACY, -, 16r,
1573  1) // __kmpc_atomic_float16_sub_rev
1574 ATOMIC_CRITICAL_REV(float16, div, QUAD_LEGACY, /, 16r,
1575  1) // __kmpc_atomic_float16_div_rev
1576 #if (KMP_ARCH_X86)
1577 ATOMIC_CRITICAL_REV(float16, sub_a16, Quad_a16_t, -, 16r,
1578  1) // __kmpc_atomic_float16_sub_a16_rev
1579 ATOMIC_CRITICAL_REV(float16, div_a16, Quad_a16_t, /, 16r,
1580  1) // __kmpc_atomic_float16_div_a16_rev
1581 #endif // KMP_ARCH_X86
1582 #endif // KMP_HAVE_QUAD
1583 
1584 // routines for complex types
1585 ATOMIC_CRITICAL_REV(cmplx4, sub, kmp_cmplx32, -, 8c,
1586  1) // __kmpc_atomic_cmplx4_sub_rev
1587 ATOMIC_CRITICAL_REV(cmplx4, div, kmp_cmplx32, /, 8c,
1588  1) // __kmpc_atomic_cmplx4_div_rev
1589 ATOMIC_CRITICAL_REV(cmplx8, sub, kmp_cmplx64, -, 16c,
1590  1) // __kmpc_atomic_cmplx8_sub_rev
1591 ATOMIC_CRITICAL_REV(cmplx8, div, kmp_cmplx64, /, 16c,
1592  1) // __kmpc_atomic_cmplx8_div_rev
1593 ATOMIC_CRITICAL_REV(cmplx10, sub, kmp_cmplx80, -, 20c,
1594  1) // __kmpc_atomic_cmplx10_sub_rev
1595 ATOMIC_CRITICAL_REV(cmplx10, div, kmp_cmplx80, /, 20c,
1596  1) // __kmpc_atomic_cmplx10_div_rev
1597 #if KMP_HAVE_QUAD
1598 ATOMIC_CRITICAL_REV(cmplx16, sub, CPLX128_LEG, -, 32c,
1599  1) // __kmpc_atomic_cmplx16_sub_rev
1600 ATOMIC_CRITICAL_REV(cmplx16, div, CPLX128_LEG, /, 32c,
1601  1) // __kmpc_atomic_cmplx16_div_rev
1602 #if (KMP_ARCH_X86)
1603 ATOMIC_CRITICAL_REV(cmplx16, sub_a16, kmp_cmplx128_a16_t, -, 32c,
1604  1) // __kmpc_atomic_cmplx16_sub_a16_rev
1605 ATOMIC_CRITICAL_REV(cmplx16, div_a16, kmp_cmplx128_a16_t, /, 32c,
1606  1) // __kmpc_atomic_cmplx16_div_a16_rev
1607 #endif // KMP_ARCH_X86
1608 #endif // KMP_HAVE_QUAD
1609 
1610 #endif // KMP_ARCH_X86 || KMP_ARCH_X86_64
1611 // End of OpenMP 4.0: x = expr binop x for non-commutative operations.
1612 
1613 /* ------------------------------------------------------------------------ */
1614 /* Routines for mixed types of LHS and RHS, when RHS is "larger" */
1615 /* Note: in order to reduce the total number of types combinations */
1616 /* it is supposed that compiler converts RHS to longest floating type,*/
1617 /* that is _Quad, before call to any of these routines */
1618 /* Conversion to _Quad will be done by the compiler during calculation, */
1619 /* conversion back to TYPE - before the assignment, like: */
1620 /* *lhs = (TYPE)( (_Quad)(*lhs) OP rhs ) */
1621 /* Performance penalty expected because of SW emulation use */
1622 /* ------------------------------------------------------------------------ */
1623 
1624 #define ATOMIC_BEGIN_MIX(TYPE_ID, TYPE, OP_ID, RTYPE_ID, RTYPE) \
1625  void __kmpc_atomic_##TYPE_ID##_##OP_ID##_##RTYPE_ID( \
1626  ident_t *id_ref, int gtid, TYPE *lhs, RTYPE rhs) { \
1627  KMP_DEBUG_ASSERT(__kmp_init_serial); \
1628  KA_TRACE(100, \
1629  ("__kmpc_atomic_" #TYPE_ID "_" #OP_ID "_" #RTYPE_ID ": T#%d\n", \
1630  gtid));
1631 
1632 // -------------------------------------------------------------------------
1633 #define ATOMIC_CRITICAL_FP(TYPE_ID, TYPE, OP_ID, OP, RTYPE_ID, RTYPE, LCK_ID, \
1634  GOMP_FLAG) \
1635  ATOMIC_BEGIN_MIX(TYPE_ID, TYPE, OP_ID, RTYPE_ID, RTYPE) \
1636  OP_UPDATE_GOMP_CRITICAL(TYPE, OP, GOMP_FLAG) /* send assignment */ \
1637  OP_UPDATE_CRITICAL(TYPE, OP, LCK_ID) /* send assignment */ \
1638  }
1639 
1640 // -------------------------------------------------------------------------
1641 #if KMP_ARCH_X86 || KMP_ARCH_X86_64
1642 // -------------------------------------------------------------------------
1643 // X86 or X86_64: no alignment problems ====================================
1644 #define ATOMIC_CMPXCHG_MIX(TYPE_ID, TYPE, OP_ID, BITS, OP, RTYPE_ID, RTYPE, \
1645  LCK_ID, MASK, GOMP_FLAG) \
1646  ATOMIC_BEGIN_MIX(TYPE_ID, TYPE, OP_ID, RTYPE_ID, RTYPE) \
1647  OP_UPDATE_GOMP_CRITICAL(TYPE, OP, GOMP_FLAG) \
1648  OP_CMPXCHG(TYPE, BITS, OP) \
1649  }
1650 // -------------------------------------------------------------------------
1651 #else
1652 // ------------------------------------------------------------------------
1653 // Code for other architectures that don't handle unaligned accesses.
1654 #define ATOMIC_CMPXCHG_MIX(TYPE_ID, TYPE, OP_ID, BITS, OP, RTYPE_ID, RTYPE, \
1655  LCK_ID, MASK, GOMP_FLAG) \
1656  ATOMIC_BEGIN_MIX(TYPE_ID, TYPE, OP_ID, RTYPE_ID, RTYPE) \
1657  OP_UPDATE_GOMP_CRITICAL(TYPE, OP, GOMP_FLAG) \
1658  if (!((kmp_uintptr_t)lhs & 0x##MASK)) { \
1659  OP_CMPXCHG(TYPE, BITS, OP) /* aligned address */ \
1660  } else { \
1661  KMP_CHECK_GTID; \
1662  OP_UPDATE_CRITICAL(TYPE, OP, \
1663  LCK_ID) /* unaligned address - use critical */ \
1664  } \
1665  }
1666 #endif /* KMP_ARCH_X86 || KMP_ARCH_X86_64 */
1667 
1668 // -------------------------------------------------------------------------
1669 #if KMP_ARCH_X86 || KMP_ARCH_X86_64
1670 // -------------------------------------------------------------------------
1671 #define ATOMIC_CMPXCHG_REV_MIX(TYPE_ID, TYPE, OP_ID, BITS, OP, RTYPE_ID, \
1672  RTYPE, LCK_ID, MASK, GOMP_FLAG) \
1673  ATOMIC_BEGIN_MIX(TYPE_ID, TYPE, OP_ID, RTYPE_ID, RTYPE) \
1674  OP_GOMP_CRITICAL_REV(TYPE, OP, GOMP_FLAG) \
1675  OP_CMPXCHG_REV(TYPE, BITS, OP) \
1676  }
1677 #define ATOMIC_CRITICAL_REV_FP(TYPE_ID, TYPE, OP_ID, OP, RTYPE_ID, RTYPE, \
1678  LCK_ID, GOMP_FLAG) \
1679  ATOMIC_BEGIN_MIX(TYPE_ID, TYPE, OP_ID, RTYPE_ID, RTYPE) \
1680  OP_GOMP_CRITICAL_REV(TYPE, OP, GOMP_FLAG) \
1681  OP_CRITICAL_REV(TYPE, OP, LCK_ID) \
1682  }
1683 #endif /* KMP_ARCH_X86 || KMP_ARCH_X86_64 */
1684 
1685 // RHS=float8
1686 ATOMIC_CMPXCHG_MIX(fixed1, char, mul, 8, *, float8, kmp_real64, 1i, 0,
1687  KMP_ARCH_X86) // __kmpc_atomic_fixed1_mul_float8
1688 ATOMIC_CMPXCHG_MIX(fixed1, char, div, 8, /, float8, kmp_real64, 1i, 0,
1689  KMP_ARCH_X86) // __kmpc_atomic_fixed1_div_float8
1690 ATOMIC_CMPXCHG_MIX(fixed2, short, mul, 16, *, float8, kmp_real64, 2i, 1,
1691  KMP_ARCH_X86) // __kmpc_atomic_fixed2_mul_float8
1692 ATOMIC_CMPXCHG_MIX(fixed2, short, div, 16, /, float8, kmp_real64, 2i, 1,
1693  KMP_ARCH_X86) // __kmpc_atomic_fixed2_div_float8
1694 ATOMIC_CMPXCHG_MIX(fixed4, kmp_int32, mul, 32, *, float8, kmp_real64, 4i, 3,
1695  0) // __kmpc_atomic_fixed4_mul_float8
1696 ATOMIC_CMPXCHG_MIX(fixed4, kmp_int32, div, 32, /, float8, kmp_real64, 4i, 3,
1697  0) // __kmpc_atomic_fixed4_div_float8
1698 ATOMIC_CMPXCHG_MIX(fixed8, kmp_int64, mul, 64, *, float8, kmp_real64, 8i, 7,
1699  KMP_ARCH_X86) // __kmpc_atomic_fixed8_mul_float8
1700 ATOMIC_CMPXCHG_MIX(fixed8, kmp_int64, div, 64, /, float8, kmp_real64, 8i, 7,
1701  KMP_ARCH_X86) // __kmpc_atomic_fixed8_div_float8
1702 ATOMIC_CMPXCHG_MIX(float4, kmp_real32, add, 32, +, float8, kmp_real64, 4r, 3,
1703  KMP_ARCH_X86) // __kmpc_atomic_float4_add_float8
1704 ATOMIC_CMPXCHG_MIX(float4, kmp_real32, sub, 32, -, float8, kmp_real64, 4r, 3,
1705  KMP_ARCH_X86) // __kmpc_atomic_float4_sub_float8
1706 ATOMIC_CMPXCHG_MIX(float4, kmp_real32, mul, 32, *, float8, kmp_real64, 4r, 3,
1707  KMP_ARCH_X86) // __kmpc_atomic_float4_mul_float8
1708 ATOMIC_CMPXCHG_MIX(float4, kmp_real32, div, 32, /, float8, kmp_real64, 4r, 3,
1709  KMP_ARCH_X86) // __kmpc_atomic_float4_div_float8
1710 
1711 // RHS=float16 (deprecated, to be removed when we are sure the compiler does not
1712 // use them)
1713 #if KMP_HAVE_QUAD
1714 ATOMIC_CMPXCHG_MIX(fixed1, char, add, 8, +, fp, _Quad, 1i, 0,
1715  KMP_ARCH_X86) // __kmpc_atomic_fixed1_add_fp
1716 ATOMIC_CMPXCHG_MIX(fixed1u, uchar, add, 8, +, fp, _Quad, 1i, 0,
1717  KMP_ARCH_X86) // __kmpc_atomic_fixed1u_add_fp
1718 ATOMIC_CMPXCHG_MIX(fixed1, char, sub, 8, -, fp, _Quad, 1i, 0,
1719  KMP_ARCH_X86) // __kmpc_atomic_fixed1_sub_fp
1720 ATOMIC_CMPXCHG_MIX(fixed1u, uchar, sub, 8, -, fp, _Quad, 1i, 0,
1721  KMP_ARCH_X86) // __kmpc_atomic_fixed1u_sub_fp
1722 ATOMIC_CMPXCHG_MIX(fixed1, char, mul, 8, *, fp, _Quad, 1i, 0,
1723  KMP_ARCH_X86) // __kmpc_atomic_fixed1_mul_fp
1724 ATOMIC_CMPXCHG_MIX(fixed1u, uchar, mul, 8, *, fp, _Quad, 1i, 0,
1725  KMP_ARCH_X86) // __kmpc_atomic_fixed1u_mul_fp
1726 ATOMIC_CMPXCHG_MIX(fixed1, char, div, 8, /, fp, _Quad, 1i, 0,
1727  KMP_ARCH_X86) // __kmpc_atomic_fixed1_div_fp
1728 ATOMIC_CMPXCHG_MIX(fixed1u, uchar, div, 8, /, fp, _Quad, 1i, 0,
1729  KMP_ARCH_X86) // __kmpc_atomic_fixed1u_div_fp
1730 
1731 ATOMIC_CMPXCHG_MIX(fixed2, short, add, 16, +, fp, _Quad, 2i, 1,
1732  KMP_ARCH_X86) // __kmpc_atomic_fixed2_add_fp
1733 ATOMIC_CMPXCHG_MIX(fixed2u, ushort, add, 16, +, fp, _Quad, 2i, 1,
1734  KMP_ARCH_X86) // __kmpc_atomic_fixed2u_add_fp
1735 ATOMIC_CMPXCHG_MIX(fixed2, short, sub, 16, -, fp, _Quad, 2i, 1,
1736  KMP_ARCH_X86) // __kmpc_atomic_fixed2_sub_fp
1737 ATOMIC_CMPXCHG_MIX(fixed2u, ushort, sub, 16, -, fp, _Quad, 2i, 1,
1738  KMP_ARCH_X86) // __kmpc_atomic_fixed2u_sub_fp
1739 ATOMIC_CMPXCHG_MIX(fixed2, short, mul, 16, *, fp, _Quad, 2i, 1,
1740  KMP_ARCH_X86) // __kmpc_atomic_fixed2_mul_fp
1741 ATOMIC_CMPXCHG_MIX(fixed2u, ushort, mul, 16, *, fp, _Quad, 2i, 1,
1742  KMP_ARCH_X86) // __kmpc_atomic_fixed2u_mul_fp
1743 ATOMIC_CMPXCHG_MIX(fixed2, short, div, 16, /, fp, _Quad, 2i, 1,
1744  KMP_ARCH_X86) // __kmpc_atomic_fixed2_div_fp
1745 ATOMIC_CMPXCHG_MIX(fixed2u, ushort, div, 16, /, fp, _Quad, 2i, 1,
1746  KMP_ARCH_X86) // __kmpc_atomic_fixed2u_div_fp
1747 
1748 ATOMIC_CMPXCHG_MIX(fixed4, kmp_int32, add, 32, +, fp, _Quad, 4i, 3,
1749  0) // __kmpc_atomic_fixed4_add_fp
1750 ATOMIC_CMPXCHG_MIX(fixed4u, kmp_uint32, add, 32, +, fp, _Quad, 4i, 3,
1751  0) // __kmpc_atomic_fixed4u_add_fp
1752 ATOMIC_CMPXCHG_MIX(fixed4, kmp_int32, sub, 32, -, fp, _Quad, 4i, 3,
1753  0) // __kmpc_atomic_fixed4_sub_fp
1754 ATOMIC_CMPXCHG_MIX(fixed4u, kmp_uint32, sub, 32, -, fp, _Quad, 4i, 3,
1755  0) // __kmpc_atomic_fixed4u_sub_fp
1756 ATOMIC_CMPXCHG_MIX(fixed4, kmp_int32, mul, 32, *, fp, _Quad, 4i, 3,
1757  0) // __kmpc_atomic_fixed4_mul_fp
1758 ATOMIC_CMPXCHG_MIX(fixed4u, kmp_uint32, mul, 32, *, fp, _Quad, 4i, 3,
1759  0) // __kmpc_atomic_fixed4u_mul_fp
1760 ATOMIC_CMPXCHG_MIX(fixed4, kmp_int32, div, 32, /, fp, _Quad, 4i, 3,
1761  0) // __kmpc_atomic_fixed4_div_fp
1762 ATOMIC_CMPXCHG_MIX(fixed4u, kmp_uint32, div, 32, /, fp, _Quad, 4i, 3,
1763  0) // __kmpc_atomic_fixed4u_div_fp
1764 
1765 ATOMIC_CMPXCHG_MIX(fixed8, kmp_int64, add, 64, +, fp, _Quad, 8i, 7,
1766  KMP_ARCH_X86) // __kmpc_atomic_fixed8_add_fp
1767 ATOMIC_CMPXCHG_MIX(fixed8u, kmp_uint64, add, 64, +, fp, _Quad, 8i, 7,
1768  KMP_ARCH_X86) // __kmpc_atomic_fixed8u_add_fp
1769 ATOMIC_CMPXCHG_MIX(fixed8, kmp_int64, sub, 64, -, fp, _Quad, 8i, 7,
1770  KMP_ARCH_X86) // __kmpc_atomic_fixed8_sub_fp
1771 ATOMIC_CMPXCHG_MIX(fixed8u, kmp_uint64, sub, 64, -, fp, _Quad, 8i, 7,
1772  KMP_ARCH_X86) // __kmpc_atomic_fixed8u_sub_fp
1773 ATOMIC_CMPXCHG_MIX(fixed8, kmp_int64, mul, 64, *, fp, _Quad, 8i, 7,
1774  KMP_ARCH_X86) // __kmpc_atomic_fixed8_mul_fp
1775 ATOMIC_CMPXCHG_MIX(fixed8u, kmp_uint64, mul, 64, *, fp, _Quad, 8i, 7,
1776  KMP_ARCH_X86) // __kmpc_atomic_fixed8u_mul_fp
1777 ATOMIC_CMPXCHG_MIX(fixed8, kmp_int64, div, 64, /, fp, _Quad, 8i, 7,
1778  KMP_ARCH_X86) // __kmpc_atomic_fixed8_div_fp
1779 ATOMIC_CMPXCHG_MIX(fixed8u, kmp_uint64, div, 64, /, fp, _Quad, 8i, 7,
1780  KMP_ARCH_X86) // __kmpc_atomic_fixed8u_div_fp
1781 
1782 ATOMIC_CMPXCHG_MIX(float4, kmp_real32, add, 32, +, fp, _Quad, 4r, 3,
1783  KMP_ARCH_X86) // __kmpc_atomic_float4_add_fp
1784 ATOMIC_CMPXCHG_MIX(float4, kmp_real32, sub, 32, -, fp, _Quad, 4r, 3,
1785  KMP_ARCH_X86) // __kmpc_atomic_float4_sub_fp
1786 ATOMIC_CMPXCHG_MIX(float4, kmp_real32, mul, 32, *, fp, _Quad, 4r, 3,
1787  KMP_ARCH_X86) // __kmpc_atomic_float4_mul_fp
1788 ATOMIC_CMPXCHG_MIX(float4, kmp_real32, div, 32, /, fp, _Quad, 4r, 3,
1789  KMP_ARCH_X86) // __kmpc_atomic_float4_div_fp
1790 
1791 ATOMIC_CMPXCHG_MIX(float8, kmp_real64, add, 64, +, fp, _Quad, 8r, 7,
1792  KMP_ARCH_X86) // __kmpc_atomic_float8_add_fp
1793 ATOMIC_CMPXCHG_MIX(float8, kmp_real64, sub, 64, -, fp, _Quad, 8r, 7,
1794  KMP_ARCH_X86) // __kmpc_atomic_float8_sub_fp
1795 ATOMIC_CMPXCHG_MIX(float8, kmp_real64, mul, 64, *, fp, _Quad, 8r, 7,
1796  KMP_ARCH_X86) // __kmpc_atomic_float8_mul_fp
1797 ATOMIC_CMPXCHG_MIX(float8, kmp_real64, div, 64, /, fp, _Quad, 8r, 7,
1798  KMP_ARCH_X86) // __kmpc_atomic_float8_div_fp
1799 
1800 ATOMIC_CRITICAL_FP(float10, long double, add, +, fp, _Quad, 10r,
1801  1) // __kmpc_atomic_float10_add_fp
1802 ATOMIC_CRITICAL_FP(float10, long double, sub, -, fp, _Quad, 10r,
1803  1) // __kmpc_atomic_float10_sub_fp
1804 ATOMIC_CRITICAL_FP(float10, long double, mul, *, fp, _Quad, 10r,
1805  1) // __kmpc_atomic_float10_mul_fp
1806 ATOMIC_CRITICAL_FP(float10, long double, div, /, fp, _Quad, 10r,
1807  1) // __kmpc_atomic_float10_div_fp
1808 
1809 #if KMP_ARCH_X86 || KMP_ARCH_X86_64
1810 // Reverse operations
1811 ATOMIC_CMPXCHG_REV_MIX(fixed1, char, sub_rev, 8, -, fp, _Quad, 1i, 0,
1812  KMP_ARCH_X86) // __kmpc_atomic_fixed1_sub_rev_fp
1813 ATOMIC_CMPXCHG_REV_MIX(fixed1u, uchar, sub_rev, 8, -, fp, _Quad, 1i, 0,
1814  KMP_ARCH_X86) // __kmpc_atomic_fixed1u_sub_rev_fp
1815 ATOMIC_CMPXCHG_REV_MIX(fixed1, char, div_rev, 8, /, fp, _Quad, 1i, 0,
1816  KMP_ARCH_X86) // __kmpc_atomic_fixed1_div_rev_fp
1817 ATOMIC_CMPXCHG_REV_MIX(fixed1u, uchar, div_rev, 8, /, fp, _Quad, 1i, 0,
1818  KMP_ARCH_X86) // __kmpc_atomic_fixed1u_div_rev_fp
1819 
1820 ATOMIC_CMPXCHG_REV_MIX(fixed2, short, sub_rev, 16, -, fp, _Quad, 2i, 1,
1821  KMP_ARCH_X86) // __kmpc_atomic_fixed2_sub_rev_fp
1822 ATOMIC_CMPXCHG_REV_MIX(fixed2u, ushort, sub_rev, 16, -, fp, _Quad, 2i, 1,
1823  KMP_ARCH_X86) // __kmpc_atomic_fixed2u_sub_rev_fp
1824 ATOMIC_CMPXCHG_REV_MIX(fixed2, short, div_rev, 16, /, fp, _Quad, 2i, 1,
1825  KMP_ARCH_X86) // __kmpc_atomic_fixed2_div_rev_fp
1826 ATOMIC_CMPXCHG_REV_MIX(fixed2u, ushort, div_rev, 16, /, fp, _Quad, 2i, 1,
1827  KMP_ARCH_X86) // __kmpc_atomic_fixed2u_div_rev_fp
1828 
1829 ATOMIC_CMPXCHG_REV_MIX(fixed4, kmp_int32, sub_rev, 32, -, fp, _Quad, 4i, 3,
1830  0) // __kmpc_atomic_fixed4_sub_rev_fp
1831 ATOMIC_CMPXCHG_REV_MIX(fixed4u, kmp_uint32, sub_rev, 32, -, fp, _Quad, 4i, 3,
1832  0) // __kmpc_atomic_fixed4u_sub_rev_fp
1833 ATOMIC_CMPXCHG_REV_MIX(fixed4, kmp_int32, div_rev, 32, /, fp, _Quad, 4i, 3,
1834  0) // __kmpc_atomic_fixed4_div_rev_fp
1835 ATOMIC_CMPXCHG_REV_MIX(fixed4u, kmp_uint32, div_rev, 32, /, fp, _Quad, 4i, 3,
1836  0) // __kmpc_atomic_fixed4u_div_rev_fp
1837 
1838 ATOMIC_CMPXCHG_REV_MIX(fixed8, kmp_int64, sub_rev, 64, -, fp, _Quad, 8i, 7,
1839  KMP_ARCH_X86) // __kmpc_atomic_fixed8_sub_rev_fp
1840 ATOMIC_CMPXCHG_REV_MIX(fixed8u, kmp_uint64, sub_rev, 64, -, fp, _Quad, 8i, 7,
1841  KMP_ARCH_X86) // __kmpc_atomic_fixed8u_sub_rev_fp
1842 ATOMIC_CMPXCHG_REV_MIX(fixed8, kmp_int64, div_rev, 64, /, fp, _Quad, 8i, 7,
1843  KMP_ARCH_X86) // __kmpc_atomic_fixed8_div_rev_fp
1844 ATOMIC_CMPXCHG_REV_MIX(fixed8u, kmp_uint64, div_rev, 64, /, fp, _Quad, 8i, 7,
1845  KMP_ARCH_X86) // __kmpc_atomic_fixed8u_div_rev_fp
1846 
1847 ATOMIC_CMPXCHG_REV_MIX(float4, kmp_real32, sub_rev, 32, -, fp, _Quad, 4r, 3,
1848  KMP_ARCH_X86) // __kmpc_atomic_float4_sub_rev_fp
1849 ATOMIC_CMPXCHG_REV_MIX(float4, kmp_real32, div_rev, 32, /, fp, _Quad, 4r, 3,
1850  KMP_ARCH_X86) // __kmpc_atomic_float4_div_rev_fp
1851 
1852 ATOMIC_CMPXCHG_REV_MIX(float8, kmp_real64, sub_rev, 64, -, fp, _Quad, 8r, 7,
1853  KMP_ARCH_X86) // __kmpc_atomic_float8_sub_rev_fp
1854 ATOMIC_CMPXCHG_REV_MIX(float8, kmp_real64, div_rev, 64, /, fp, _Quad, 8r, 7,
1855  KMP_ARCH_X86) // __kmpc_atomic_float8_div_rev_fp
1856 
1857 ATOMIC_CRITICAL_REV_FP(float10, long double, sub_rev, -, fp, _Quad, 10r,
1858  1) // __kmpc_atomic_float10_sub_rev_fp
1859 ATOMIC_CRITICAL_REV_FP(float10, long double, div_rev, /, fp, _Quad, 10r,
1860  1) // __kmpc_atomic_float10_div_rev_fp
1861 #endif /* KMP_ARCH_X86 || KMP_ARCH_X86_64 */
1862 
1863 #endif // KMP_HAVE_QUAD
1864 
1865 #if KMP_ARCH_X86 || KMP_ARCH_X86_64
1866 // ------------------------------------------------------------------------
1867 // X86 or X86_64: no alignment problems ====================================
1868 #if USE_CMPXCHG_FIX
1869 // workaround for C78287 (complex(kind=4) data type)
1870 #define ATOMIC_CMPXCHG_CMPLX(TYPE_ID, TYPE, OP_ID, BITS, OP, RTYPE_ID, RTYPE, \
1871  LCK_ID, MASK, GOMP_FLAG) \
1872  ATOMIC_BEGIN_MIX(TYPE_ID, TYPE, OP_ID, RTYPE_ID, RTYPE) \
1873  OP_UPDATE_GOMP_CRITICAL(TYPE, OP, GOMP_FLAG) \
1874  OP_CMPXCHG_WORKAROUND(TYPE, BITS, OP) \
1875  }
1876 // end of the second part of the workaround for C78287
1877 #else
1878 #define ATOMIC_CMPXCHG_CMPLX(TYPE_ID, TYPE, OP_ID, BITS, OP, RTYPE_ID, RTYPE, \
1879  LCK_ID, MASK, GOMP_FLAG) \
1880  ATOMIC_BEGIN_MIX(TYPE_ID, TYPE, OP_ID, RTYPE_ID, RTYPE) \
1881  OP_UPDATE_GOMP_CRITICAL(TYPE, OP, GOMP_FLAG) \
1882  OP_CMPXCHG(TYPE, BITS, OP) \
1883  }
1884 #endif // USE_CMPXCHG_FIX
1885 #else
1886 // ------------------------------------------------------------------------
1887 // Code for other architectures that don't handle unaligned accesses.
1888 #define ATOMIC_CMPXCHG_CMPLX(TYPE_ID, TYPE, OP_ID, BITS, OP, RTYPE_ID, RTYPE, \
1889  LCK_ID, MASK, GOMP_FLAG) \
1890  ATOMIC_BEGIN_MIX(TYPE_ID, TYPE, OP_ID, RTYPE_ID, RTYPE) \
1891  OP_UPDATE_GOMP_CRITICAL(TYPE, OP, GOMP_FLAG) \
1892  if (!((kmp_uintptr_t)lhs & 0x##MASK)) { \
1893  OP_CMPXCHG(TYPE, BITS, OP) /* aligned address */ \
1894  } else { \
1895  KMP_CHECK_GTID; \
1896  OP_UPDATE_CRITICAL(TYPE, OP, \
1897  LCK_ID) /* unaligned address - use critical */ \
1898  } \
1899  }
1900 #endif /* KMP_ARCH_X86 || KMP_ARCH_X86_64 */
1901 
1902 ATOMIC_CMPXCHG_CMPLX(cmplx4, kmp_cmplx32, add, 64, +, cmplx8, kmp_cmplx64, 8c,
1903  7, KMP_ARCH_X86) // __kmpc_atomic_cmplx4_add_cmplx8
1904 ATOMIC_CMPXCHG_CMPLX(cmplx4, kmp_cmplx32, sub, 64, -, cmplx8, kmp_cmplx64, 8c,
1905  7, KMP_ARCH_X86) // __kmpc_atomic_cmplx4_sub_cmplx8
1906 ATOMIC_CMPXCHG_CMPLX(cmplx4, kmp_cmplx32, mul, 64, *, cmplx8, kmp_cmplx64, 8c,
1907  7, KMP_ARCH_X86) // __kmpc_atomic_cmplx4_mul_cmplx8
1908 ATOMIC_CMPXCHG_CMPLX(cmplx4, kmp_cmplx32, div, 64, /, cmplx8, kmp_cmplx64, 8c,
1909  7, KMP_ARCH_X86) // __kmpc_atomic_cmplx4_div_cmplx8
1910 
1911 // READ, WRITE, CAPTURE are supported only on IA-32 architecture and Intel(R) 64
1912 #if KMP_ARCH_X86 || KMP_ARCH_X86_64
1913 
1914 // ------------------------------------------------------------------------
1915 // Atomic READ routines
1916 
1917 // ------------------------------------------------------------------------
1918 // Beginning of a definition (provides name, parameters, gebug trace)
1919 // TYPE_ID - operands type and size (fixed*, fixed*u for signed, unsigned
1920 // fixed)
1921 // OP_ID - operation identifier (add, sub, mul, ...)
1922 // TYPE - operands' type
1923 #define ATOMIC_BEGIN_READ(TYPE_ID, OP_ID, TYPE, RET_TYPE) \
1924  RET_TYPE __kmpc_atomic_##TYPE_ID##_##OP_ID(ident_t *id_ref, int gtid, \
1925  TYPE *loc) { \
1926  KMP_DEBUG_ASSERT(__kmp_init_serial); \
1927  KA_TRACE(100, ("__kmpc_atomic_" #TYPE_ID "_" #OP_ID ": T#%d\n", gtid));
1928 
1929 // ------------------------------------------------------------------------
1930 // Operation on *lhs, rhs using "compare_and_store_ret" routine
1931 // TYPE - operands' type
1932 // BITS - size in bits, used to distinguish low level calls
1933 // OP - operator
1934 // Note: temp_val introduced in order to force the compiler to read
1935 // *lhs only once (w/o it the compiler reads *lhs twice)
1936 // TODO: check if it is still necessary
1937 // Return old value regardless of the result of "compare & swap# operation
1938 #define OP_CMPXCHG_READ(TYPE, BITS, OP) \
1939  { \
1940  TYPE KMP_ATOMIC_VOLATILE temp_val; \
1941  union f_i_union { \
1942  TYPE f_val; \
1943  kmp_int##BITS i_val; \
1944  }; \
1945  union f_i_union old_value; \
1946  temp_val = *loc; \
1947  old_value.f_val = temp_val; \
1948  old_value.i_val = KMP_COMPARE_AND_STORE_RET##BITS( \
1949  (kmp_int##BITS *)loc, \
1950  *VOLATILE_CAST(kmp_int##BITS *) & old_value.i_val, \
1951  *VOLATILE_CAST(kmp_int##BITS *) & old_value.i_val); \
1952  new_value = old_value.f_val; \
1953  return new_value; \
1954  }
1955 
1956 // -------------------------------------------------------------------------
1957 // Operation on *lhs, rhs bound by critical section
1958 // OP - operator (it's supposed to contain an assignment)
1959 // LCK_ID - lock identifier
1960 // Note: don't check gtid as it should always be valid
1961 // 1, 2-byte - expect valid parameter, other - check before this macro
1962 #define OP_CRITICAL_READ(OP, LCK_ID) \
1963  __kmp_acquire_atomic_lock(&ATOMIC_LOCK##LCK_ID, gtid); \
1964  \
1965  new_value = (*loc); \
1966  \
1967  __kmp_release_atomic_lock(&ATOMIC_LOCK##LCK_ID, gtid);
1968 
1969 // -------------------------------------------------------------------------
1970 #ifdef KMP_GOMP_COMPAT
1971 #define OP_GOMP_CRITICAL_READ(OP, FLAG) \
1972  if ((FLAG) && (__kmp_atomic_mode == 2)) { \
1973  KMP_CHECK_GTID; \
1974  OP_CRITICAL_READ(OP, 0); \
1975  return new_value; \
1976  }
1977 #else
1978 #define OP_GOMP_CRITICAL_READ(OP, FLAG)
1979 #endif /* KMP_GOMP_COMPAT */
1980 
1981 // -------------------------------------------------------------------------
1982 #define ATOMIC_FIXED_READ(TYPE_ID, OP_ID, TYPE, BITS, OP, GOMP_FLAG) \
1983  ATOMIC_BEGIN_READ(TYPE_ID, OP_ID, TYPE, TYPE) \
1984  TYPE new_value; \
1985  OP_GOMP_CRITICAL_READ(OP## =, GOMP_FLAG) \
1986  new_value = KMP_TEST_THEN_ADD##BITS(loc, OP 0); \
1987  return new_value; \
1988  }
1989 // -------------------------------------------------------------------------
1990 #define ATOMIC_CMPXCHG_READ(TYPE_ID, OP_ID, TYPE, BITS, OP, GOMP_FLAG) \
1991  ATOMIC_BEGIN_READ(TYPE_ID, OP_ID, TYPE, TYPE) \
1992  TYPE new_value; \
1993  OP_GOMP_CRITICAL_READ(OP## =, GOMP_FLAG) \
1994  OP_CMPXCHG_READ(TYPE, BITS, OP) \
1995  }
1996 // ------------------------------------------------------------------------
1997 // Routines for Extended types: long double, _Quad, complex flavours (use
1998 // critical section)
1999 // TYPE_ID, OP_ID, TYPE - detailed above
2000 // OP - operator
2001 // LCK_ID - lock identifier, used to possibly distinguish lock variable
2002 #define ATOMIC_CRITICAL_READ(TYPE_ID, OP_ID, TYPE, OP, LCK_ID, GOMP_FLAG) \
2003  ATOMIC_BEGIN_READ(TYPE_ID, OP_ID, TYPE, TYPE) \
2004  TYPE new_value; \
2005  OP_GOMP_CRITICAL_READ(OP## =, GOMP_FLAG) /* send assignment */ \
2006  OP_CRITICAL_READ(OP, LCK_ID) /* send assignment */ \
2007  return new_value; \
2008  }
2009 
2010 // ------------------------------------------------------------------------
2011 // Fix for cmplx4 read (CQ220361) on Windows* OS. Regular routine with return
2012 // value doesn't work.
2013 // Let's return the read value through the additional parameter.
2014 #if (KMP_OS_WINDOWS)
2015 
2016 #define OP_CRITICAL_READ_WRK(OP, LCK_ID) \
2017  __kmp_acquire_atomic_lock(&ATOMIC_LOCK##LCK_ID, gtid); \
2018  \
2019  (*out) = (*loc); \
2020  \
2021  __kmp_release_atomic_lock(&ATOMIC_LOCK##LCK_ID, gtid);
2022 // ------------------------------------------------------------------------
2023 #ifdef KMP_GOMP_COMPAT
2024 #define OP_GOMP_CRITICAL_READ_WRK(OP, FLAG) \
2025  if ((FLAG) && (__kmp_atomic_mode == 2)) { \
2026  KMP_CHECK_GTID; \
2027  OP_CRITICAL_READ_WRK(OP, 0); \
2028  }
2029 #else
2030 #define OP_GOMP_CRITICAL_READ_WRK(OP, FLAG)
2031 #endif /* KMP_GOMP_COMPAT */
2032 // ------------------------------------------------------------------------
2033 #define ATOMIC_BEGIN_READ_WRK(TYPE_ID, OP_ID, TYPE) \
2034  void __kmpc_atomic_##TYPE_ID##_##OP_ID(TYPE *out, ident_t *id_ref, int gtid, \
2035  TYPE *loc) { \
2036  KMP_DEBUG_ASSERT(__kmp_init_serial); \
2037  KA_TRACE(100, ("__kmpc_atomic_" #TYPE_ID "_" #OP_ID ": T#%d\n", gtid));
2038 
2039 // ------------------------------------------------------------------------
2040 #define ATOMIC_CRITICAL_READ_WRK(TYPE_ID, OP_ID, TYPE, OP, LCK_ID, GOMP_FLAG) \
2041  ATOMIC_BEGIN_READ_WRK(TYPE_ID, OP_ID, TYPE) \
2042  OP_GOMP_CRITICAL_READ_WRK(OP## =, GOMP_FLAG) /* send assignment */ \
2043  OP_CRITICAL_READ_WRK(OP, LCK_ID) /* send assignment */ \
2044  }
2045 
2046 #endif // KMP_OS_WINDOWS
2047 
2048 // ------------------------------------------------------------------------
2049 // TYPE_ID,OP_ID, TYPE, OP, GOMP_FLAG
2050 ATOMIC_FIXED_READ(fixed4, rd, kmp_int32, 32, +, 0) // __kmpc_atomic_fixed4_rd
2051 ATOMIC_FIXED_READ(fixed8, rd, kmp_int64, 64, +,
2052  KMP_ARCH_X86) // __kmpc_atomic_fixed8_rd
2053 ATOMIC_CMPXCHG_READ(float4, rd, kmp_real32, 32, +,
2054  KMP_ARCH_X86) // __kmpc_atomic_float4_rd
2055 ATOMIC_CMPXCHG_READ(float8, rd, kmp_real64, 64, +,
2056  KMP_ARCH_X86) // __kmpc_atomic_float8_rd
2057 
2058 // !!! TODO: Remove lock operations for "char" since it can't be non-atomic
2059 ATOMIC_CMPXCHG_READ(fixed1, rd, kmp_int8, 8, +,
2060  KMP_ARCH_X86) // __kmpc_atomic_fixed1_rd
2061 ATOMIC_CMPXCHG_READ(fixed2, rd, kmp_int16, 16, +,
2062  KMP_ARCH_X86) // __kmpc_atomic_fixed2_rd
2063 
2064 ATOMIC_CRITICAL_READ(float10, rd, long double, +, 10r,
2065  1) // __kmpc_atomic_float10_rd
2066 #if KMP_HAVE_QUAD
2067 ATOMIC_CRITICAL_READ(float16, rd, QUAD_LEGACY, +, 16r,
2068  1) // __kmpc_atomic_float16_rd
2069 #endif // KMP_HAVE_QUAD
2070 
2071 // Fix for CQ220361 on Windows* OS
2072 #if (KMP_OS_WINDOWS)
2073 ATOMIC_CRITICAL_READ_WRK(cmplx4, rd, kmp_cmplx32, +, 8c,
2074  1) // __kmpc_atomic_cmplx4_rd
2075 #else
2076 ATOMIC_CRITICAL_READ(cmplx4, rd, kmp_cmplx32, +, 8c,
2077  1) // __kmpc_atomic_cmplx4_rd
2078 #endif // (KMP_OS_WINDOWS)
2079 ATOMIC_CRITICAL_READ(cmplx8, rd, kmp_cmplx64, +, 16c,
2080  1) // __kmpc_atomic_cmplx8_rd
2081 ATOMIC_CRITICAL_READ(cmplx10, rd, kmp_cmplx80, +, 20c,
2082  1) // __kmpc_atomic_cmplx10_rd
2083 #if KMP_HAVE_QUAD
2084 ATOMIC_CRITICAL_READ(cmplx16, rd, CPLX128_LEG, +, 32c,
2085  1) // __kmpc_atomic_cmplx16_rd
2086 #if (KMP_ARCH_X86)
2087 ATOMIC_CRITICAL_READ(float16, a16_rd, Quad_a16_t, +, 16r,
2088  1) // __kmpc_atomic_float16_a16_rd
2089 ATOMIC_CRITICAL_READ(cmplx16, a16_rd, kmp_cmplx128_a16_t, +, 32c,
2090  1) // __kmpc_atomic_cmplx16_a16_rd
2091 #endif // (KMP_ARCH_X86)
2092 #endif // KMP_HAVE_QUAD
2093 
2094 // ------------------------------------------------------------------------
2095 // Atomic WRITE routines
2096 
2097 #define ATOMIC_XCHG_WR(TYPE_ID, OP_ID, TYPE, BITS, OP, GOMP_FLAG) \
2098  ATOMIC_BEGIN(TYPE_ID, OP_ID, TYPE, void) \
2099  OP_GOMP_CRITICAL(OP, GOMP_FLAG) \
2100  KMP_XCHG_FIXED##BITS(lhs, rhs); \
2101  }
2102 // ------------------------------------------------------------------------
2103 #define ATOMIC_XCHG_FLOAT_WR(TYPE_ID, OP_ID, TYPE, BITS, OP, GOMP_FLAG) \
2104  ATOMIC_BEGIN(TYPE_ID, OP_ID, TYPE, void) \
2105  OP_GOMP_CRITICAL(OP, GOMP_FLAG) \
2106  KMP_XCHG_REAL##BITS(lhs, rhs); \
2107  }
2108 
2109 // ------------------------------------------------------------------------
2110 // Operation on *lhs, rhs using "compare_and_store" routine
2111 // TYPE - operands' type
2112 // BITS - size in bits, used to distinguish low level calls
2113 // OP - operator
2114 // Note: temp_val introduced in order to force the compiler to read
2115 // *lhs only once (w/o it the compiler reads *lhs twice)
2116 #define OP_CMPXCHG_WR(TYPE, BITS, OP) \
2117  { \
2118  TYPE KMP_ATOMIC_VOLATILE temp_val; \
2119  TYPE old_value, new_value; \
2120  temp_val = *lhs; \
2121  old_value = temp_val; \
2122  new_value = rhs; \
2123  while (!KMP_COMPARE_AND_STORE_ACQ##BITS( \
2124  (kmp_int##BITS *)lhs, *VOLATILE_CAST(kmp_int##BITS *) & old_value, \
2125  *VOLATILE_CAST(kmp_int##BITS *) & new_value)) { \
2126  temp_val = *lhs; \
2127  old_value = temp_val; \
2128  new_value = rhs; \
2129  } \
2130  }
2131 
2132 // -------------------------------------------------------------------------
2133 #define ATOMIC_CMPXCHG_WR(TYPE_ID, OP_ID, TYPE, BITS, OP, GOMP_FLAG) \
2134  ATOMIC_BEGIN(TYPE_ID, OP_ID, TYPE, void) \
2135  OP_GOMP_CRITICAL(OP, GOMP_FLAG) \
2136  OP_CMPXCHG_WR(TYPE, BITS, OP) \
2137  }
2138 
2139 // ------------------------------------------------------------------------
2140 // Routines for Extended types: long double, _Quad, complex flavours (use
2141 // critical section)
2142 // TYPE_ID, OP_ID, TYPE - detailed above
2143 // OP - operator
2144 // LCK_ID - lock identifier, used to possibly distinguish lock variable
2145 #define ATOMIC_CRITICAL_WR(TYPE_ID, OP_ID, TYPE, OP, LCK_ID, GOMP_FLAG) \
2146  ATOMIC_BEGIN(TYPE_ID, OP_ID, TYPE, void) \
2147  OP_GOMP_CRITICAL(OP, GOMP_FLAG) /* send assignment */ \
2148  OP_CRITICAL(OP, LCK_ID) /* send assignment */ \
2149  }
2150 // -------------------------------------------------------------------------
2151 
2152 ATOMIC_XCHG_WR(fixed1, wr, kmp_int8, 8, =,
2153  KMP_ARCH_X86) // __kmpc_atomic_fixed1_wr
2154 ATOMIC_XCHG_WR(fixed2, wr, kmp_int16, 16, =,
2155  KMP_ARCH_X86) // __kmpc_atomic_fixed2_wr
2156 ATOMIC_XCHG_WR(fixed4, wr, kmp_int32, 32, =,
2157  KMP_ARCH_X86) // __kmpc_atomic_fixed4_wr
2158 #if (KMP_ARCH_X86)
2159 ATOMIC_CMPXCHG_WR(fixed8, wr, kmp_int64, 64, =,
2160  KMP_ARCH_X86) // __kmpc_atomic_fixed8_wr
2161 #else
2162 ATOMIC_XCHG_WR(fixed8, wr, kmp_int64, 64, =,
2163  KMP_ARCH_X86) // __kmpc_atomic_fixed8_wr
2164 #endif // (KMP_ARCH_X86)
2165 
2166 ATOMIC_XCHG_FLOAT_WR(float4, wr, kmp_real32, 32, =,
2167  KMP_ARCH_X86) // __kmpc_atomic_float4_wr
2168 #if (KMP_ARCH_X86)
2169 ATOMIC_CMPXCHG_WR(float8, wr, kmp_real64, 64, =,
2170  KMP_ARCH_X86) // __kmpc_atomic_float8_wr
2171 #else
2172 ATOMIC_XCHG_FLOAT_WR(float8, wr, kmp_real64, 64, =,
2173  KMP_ARCH_X86) // __kmpc_atomic_float8_wr
2174 #endif // (KMP_ARCH_X86)
2175 
2176 ATOMIC_CRITICAL_WR(float10, wr, long double, =, 10r,
2177  1) // __kmpc_atomic_float10_wr
2178 #if KMP_HAVE_QUAD
2179 ATOMIC_CRITICAL_WR(float16, wr, QUAD_LEGACY, =, 16r,
2180  1) // __kmpc_atomic_float16_wr
2181 #endif // KMP_HAVE_QUAD
2182 ATOMIC_CRITICAL_WR(cmplx4, wr, kmp_cmplx32, =, 8c, 1) // __kmpc_atomic_cmplx4_wr
2183 ATOMIC_CRITICAL_WR(cmplx8, wr, kmp_cmplx64, =, 16c,
2184  1) // __kmpc_atomic_cmplx8_wr
2185 ATOMIC_CRITICAL_WR(cmplx10, wr, kmp_cmplx80, =, 20c,
2186  1) // __kmpc_atomic_cmplx10_wr
2187 #if KMP_HAVE_QUAD
2188 ATOMIC_CRITICAL_WR(cmplx16, wr, CPLX128_LEG, =, 32c,
2189  1) // __kmpc_atomic_cmplx16_wr
2190 #if (KMP_ARCH_X86)
2191 ATOMIC_CRITICAL_WR(float16, a16_wr, Quad_a16_t, =, 16r,
2192  1) // __kmpc_atomic_float16_a16_wr
2193 ATOMIC_CRITICAL_WR(cmplx16, a16_wr, kmp_cmplx128_a16_t, =, 32c,
2194  1) // __kmpc_atomic_cmplx16_a16_wr
2195 #endif // (KMP_ARCH_X86)
2196 #endif // KMP_HAVE_QUAD
2197 
2198 // ------------------------------------------------------------------------
2199 // Atomic CAPTURE routines
2200 
2201 // Beginning of a definition (provides name, parameters, gebug trace)
2202 // TYPE_ID - operands type and size (fixed*, fixed*u for signed, unsigned
2203 // fixed)
2204 // OP_ID - operation identifier (add, sub, mul, ...)
2205 // TYPE - operands' type
2206 #define ATOMIC_BEGIN_CPT(TYPE_ID, OP_ID, TYPE, RET_TYPE) \
2207  RET_TYPE __kmpc_atomic_##TYPE_ID##_##OP_ID(ident_t *id_ref, int gtid, \
2208  TYPE *lhs, TYPE rhs, int flag) { \
2209  KMP_DEBUG_ASSERT(__kmp_init_serial); \
2210  KA_TRACE(100, ("__kmpc_atomic_" #TYPE_ID "_" #OP_ID ": T#%d\n", gtid));
2211 
2212 // -------------------------------------------------------------------------
2213 // Operation on *lhs, rhs bound by critical section
2214 // OP - operator (it's supposed to contain an assignment)
2215 // LCK_ID - lock identifier
2216 // Note: don't check gtid as it should always be valid
2217 // 1, 2-byte - expect valid parameter, other - check before this macro
2218 #define OP_CRITICAL_CPT(OP, LCK_ID) \
2219  __kmp_acquire_atomic_lock(&ATOMIC_LOCK##LCK_ID, gtid); \
2220  \
2221  if (flag) { \
2222  (*lhs) OP rhs; \
2223  new_value = (*lhs); \
2224  } else { \
2225  new_value = (*lhs); \
2226  (*lhs) OP rhs; \
2227  } \
2228  \
2229  __kmp_release_atomic_lock(&ATOMIC_LOCK##LCK_ID, gtid); \
2230  return new_value;
2231 
2232 #define OP_UPDATE_CRITICAL_CPT(TYPE, OP, LCK_ID) \
2233  __kmp_acquire_atomic_lock(&ATOMIC_LOCK##LCK_ID, gtid); \
2234  \
2235  if (flag) { \
2236  (*lhs) = (TYPE)((*lhs)OP rhs); \
2237  new_value = (*lhs); \
2238  } else { \
2239  new_value = (*lhs); \
2240  (*lhs) = (TYPE)((*lhs)OP rhs); \
2241  } \
2242  \
2243  __kmp_release_atomic_lock(&ATOMIC_LOCK##LCK_ID, gtid); \
2244  return new_value;
2245 
2246 // ------------------------------------------------------------------------
2247 #ifdef KMP_GOMP_COMPAT
2248 #define OP_GOMP_CRITICAL_CPT(TYPE, OP, FLAG) \
2249  if ((FLAG) && (__kmp_atomic_mode == 2)) { \
2250  KMP_CHECK_GTID; \
2251  OP_UPDATE_CRITICAL_CPT(TYPE, OP, 0); \
2252  }
2253 #else
2254 #define OP_GOMP_CRITICAL_CPT(TYPE, OP, FLAG)
2255 #endif /* KMP_GOMP_COMPAT */
2256 
2257 // ------------------------------------------------------------------------
2258 // Operation on *lhs, rhs using "compare_and_store" routine
2259 // TYPE - operands' type
2260 // BITS - size in bits, used to distinguish low level calls
2261 // OP - operator
2262 // Note: temp_val introduced in order to force the compiler to read
2263 // *lhs only once (w/o it the compiler reads *lhs twice)
2264 #define OP_CMPXCHG_CPT(TYPE, BITS, OP) \
2265  { \
2266  TYPE KMP_ATOMIC_VOLATILE temp_val; \
2267  TYPE old_value, new_value; \
2268  temp_val = *lhs; \
2269  old_value = temp_val; \
2270  new_value = (TYPE)(old_value OP rhs); \
2271  while (!KMP_COMPARE_AND_STORE_ACQ##BITS( \
2272  (kmp_int##BITS *)lhs, *VOLATILE_CAST(kmp_int##BITS *) & old_value, \
2273  *VOLATILE_CAST(kmp_int##BITS *) & new_value)) { \
2274  temp_val = *lhs; \
2275  old_value = temp_val; \
2276  new_value = (TYPE)(old_value OP rhs); \
2277  } \
2278  if (flag) { \
2279  return new_value; \
2280  } else \
2281  return old_value; \
2282  }
2283 
2284 // -------------------------------------------------------------------------
2285 #define ATOMIC_CMPXCHG_CPT(TYPE_ID, OP_ID, TYPE, BITS, OP, GOMP_FLAG) \
2286  ATOMIC_BEGIN_CPT(TYPE_ID, OP_ID, TYPE, TYPE) \
2287  TYPE new_value; \
2288  (void)new_value; \
2289  OP_GOMP_CRITICAL_CPT(TYPE, OP, GOMP_FLAG) \
2290  OP_CMPXCHG_CPT(TYPE, BITS, OP) \
2291  }
2292 
2293 // -------------------------------------------------------------------------
2294 #define ATOMIC_FIXED_ADD_CPT(TYPE_ID, OP_ID, TYPE, BITS, OP, GOMP_FLAG) \
2295  ATOMIC_BEGIN_CPT(TYPE_ID, OP_ID, TYPE, TYPE) \
2296  TYPE old_value, new_value; \
2297  (void)new_value; \
2298  OP_GOMP_CRITICAL_CPT(TYPE, OP, GOMP_FLAG) \
2299  /* OP used as a sign for subtraction: (lhs-rhs) --> (lhs+-rhs) */ \
2300  old_value = KMP_TEST_THEN_ADD##BITS(lhs, OP rhs); \
2301  if (flag) { \
2302  return old_value OP rhs; \
2303  } else \
2304  return old_value; \
2305  }
2306 // -------------------------------------------------------------------------
2307 
2308 ATOMIC_FIXED_ADD_CPT(fixed4, add_cpt, kmp_int32, 32, +,
2309  0) // __kmpc_atomic_fixed4_add_cpt
2310 ATOMIC_FIXED_ADD_CPT(fixed4, sub_cpt, kmp_int32, 32, -,
2311  0) // __kmpc_atomic_fixed4_sub_cpt
2312 ATOMIC_FIXED_ADD_CPT(fixed8, add_cpt, kmp_int64, 64, +,
2313  KMP_ARCH_X86) // __kmpc_atomic_fixed8_add_cpt
2314 ATOMIC_FIXED_ADD_CPT(fixed8, sub_cpt, kmp_int64, 64, -,
2315  KMP_ARCH_X86) // __kmpc_atomic_fixed8_sub_cpt
2316 
2317 ATOMIC_CMPXCHG_CPT(float4, add_cpt, kmp_real32, 32, +,
2318  KMP_ARCH_X86) // __kmpc_atomic_float4_add_cpt
2319 ATOMIC_CMPXCHG_CPT(float4, sub_cpt, kmp_real32, 32, -,
2320  KMP_ARCH_X86) // __kmpc_atomic_float4_sub_cpt
2321 ATOMIC_CMPXCHG_CPT(float8, add_cpt, kmp_real64, 64, +,
2322  KMP_ARCH_X86) // __kmpc_atomic_float8_add_cpt
2323 ATOMIC_CMPXCHG_CPT(float8, sub_cpt, kmp_real64, 64, -,
2324  KMP_ARCH_X86) // __kmpc_atomic_float8_sub_cpt
2325 
2326 // ------------------------------------------------------------------------
2327 // Entries definition for integer operands
2328 // TYPE_ID - operands type and size (fixed4, float4)
2329 // OP_ID - operation identifier (add, sub, mul, ...)
2330 // TYPE - operand type
2331 // BITS - size in bits, used to distinguish low level calls
2332 // OP - operator (used in critical section)
2333 // TYPE_ID,OP_ID, TYPE, BITS,OP,GOMP_FLAG
2334 // ------------------------------------------------------------------------
2335 // Routines for ATOMIC integer operands, other operators
2336 // ------------------------------------------------------------------------
2337 // TYPE_ID,OP_ID, TYPE, OP, GOMP_FLAG
2338 ATOMIC_CMPXCHG_CPT(fixed1, add_cpt, kmp_int8, 8, +,
2339  KMP_ARCH_X86) // __kmpc_atomic_fixed1_add_cpt
2340 ATOMIC_CMPXCHG_CPT(fixed1, andb_cpt, kmp_int8, 8, &,
2341  0) // __kmpc_atomic_fixed1_andb_cpt
2342 ATOMIC_CMPXCHG_CPT(fixed1, div_cpt, kmp_int8, 8, /,
2343  KMP_ARCH_X86) // __kmpc_atomic_fixed1_div_cpt
2344 ATOMIC_CMPXCHG_CPT(fixed1u, div_cpt, kmp_uint8, 8, /,
2345  KMP_ARCH_X86) // __kmpc_atomic_fixed1u_div_cpt
2346 ATOMIC_CMPXCHG_CPT(fixed1, mul_cpt, kmp_int8, 8, *,
2347  KMP_ARCH_X86) // __kmpc_atomic_fixed1_mul_cpt
2348 ATOMIC_CMPXCHG_CPT(fixed1, orb_cpt, kmp_int8, 8, |,
2349  0) // __kmpc_atomic_fixed1_orb_cpt
2350 ATOMIC_CMPXCHG_CPT(fixed1, shl_cpt, kmp_int8, 8, <<,
2351  KMP_ARCH_X86) // __kmpc_atomic_fixed1_shl_cpt
2352 ATOMIC_CMPXCHG_CPT(fixed1, shr_cpt, kmp_int8, 8, >>,
2353  KMP_ARCH_X86) // __kmpc_atomic_fixed1_shr_cpt
2354 ATOMIC_CMPXCHG_CPT(fixed1u, shr_cpt, kmp_uint8, 8, >>,
2355  KMP_ARCH_X86) // __kmpc_atomic_fixed1u_shr_cpt
2356 ATOMIC_CMPXCHG_CPT(fixed1, sub_cpt, kmp_int8, 8, -,
2357  KMP_ARCH_X86) // __kmpc_atomic_fixed1_sub_cpt
2358 ATOMIC_CMPXCHG_CPT(fixed1, xor_cpt, kmp_int8, 8, ^,
2359  0) // __kmpc_atomic_fixed1_xor_cpt
2360 ATOMIC_CMPXCHG_CPT(fixed2, add_cpt, kmp_int16, 16, +,
2361  KMP_ARCH_X86) // __kmpc_atomic_fixed2_add_cpt
2362 ATOMIC_CMPXCHG_CPT(fixed2, andb_cpt, kmp_int16, 16, &,
2363  0) // __kmpc_atomic_fixed2_andb_cpt
2364 ATOMIC_CMPXCHG_CPT(fixed2, div_cpt, kmp_int16, 16, /,
2365  KMP_ARCH_X86) // __kmpc_atomic_fixed2_div_cpt
2366 ATOMIC_CMPXCHG_CPT(fixed2u, div_cpt, kmp_uint16, 16, /,
2367  KMP_ARCH_X86) // __kmpc_atomic_fixed2u_div_cpt
2368 ATOMIC_CMPXCHG_CPT(fixed2, mul_cpt, kmp_int16, 16, *,
2369  KMP_ARCH_X86) // __kmpc_atomic_fixed2_mul_cpt
2370 ATOMIC_CMPXCHG_CPT(fixed2, orb_cpt, kmp_int16, 16, |,
2371  0) // __kmpc_atomic_fixed2_orb_cpt
2372 ATOMIC_CMPXCHG_CPT(fixed2, shl_cpt, kmp_int16, 16, <<,
2373  KMP_ARCH_X86) // __kmpc_atomic_fixed2_shl_cpt
2374 ATOMIC_CMPXCHG_CPT(fixed2, shr_cpt, kmp_int16, 16, >>,
2375  KMP_ARCH_X86) // __kmpc_atomic_fixed2_shr_cpt
2376 ATOMIC_CMPXCHG_CPT(fixed2u, shr_cpt, kmp_uint16, 16, >>,
2377  KMP_ARCH_X86) // __kmpc_atomic_fixed2u_shr_cpt
2378 ATOMIC_CMPXCHG_CPT(fixed2, sub_cpt, kmp_int16, 16, -,
2379  KMP_ARCH_X86) // __kmpc_atomic_fixed2_sub_cpt
2380 ATOMIC_CMPXCHG_CPT(fixed2, xor_cpt, kmp_int16, 16, ^,
2381  0) // __kmpc_atomic_fixed2_xor_cpt
2382 ATOMIC_CMPXCHG_CPT(fixed4, andb_cpt, kmp_int32, 32, &,
2383  0) // __kmpc_atomic_fixed4_andb_cpt
2384 ATOMIC_CMPXCHG_CPT(fixed4, div_cpt, kmp_int32, 32, /,
2385  KMP_ARCH_X86) // __kmpc_atomic_fixed4_div_cpt
2386 ATOMIC_CMPXCHG_CPT(fixed4u, div_cpt, kmp_uint32, 32, /,
2387  KMP_ARCH_X86) // __kmpc_atomic_fixed4u_div_cpt
2388 ATOMIC_CMPXCHG_CPT(fixed4, mul_cpt, kmp_int32, 32, *,
2389  KMP_ARCH_X86) // __kmpc_atomic_fixed4_mul_cpt
2390 ATOMIC_CMPXCHG_CPT(fixed4, orb_cpt, kmp_int32, 32, |,
2391  0) // __kmpc_atomic_fixed4_orb_cpt
2392 ATOMIC_CMPXCHG_CPT(fixed4, shl_cpt, kmp_int32, 32, <<,
2393  KMP_ARCH_X86) // __kmpc_atomic_fixed4_shl_cpt
2394 ATOMIC_CMPXCHG_CPT(fixed4, shr_cpt, kmp_int32, 32, >>,
2395  KMP_ARCH_X86) // __kmpc_atomic_fixed4_shr_cpt
2396 ATOMIC_CMPXCHG_CPT(fixed4u, shr_cpt, kmp_uint32, 32, >>,
2397  KMP_ARCH_X86) // __kmpc_atomic_fixed4u_shr_cpt
2398 ATOMIC_CMPXCHG_CPT(fixed4, xor_cpt, kmp_int32, 32, ^,
2399  0) // __kmpc_atomic_fixed4_xor_cpt
2400 ATOMIC_CMPXCHG_CPT(fixed8, andb_cpt, kmp_int64, 64, &,
2401  KMP_ARCH_X86) // __kmpc_atomic_fixed8_andb_cpt
2402 ATOMIC_CMPXCHG_CPT(fixed8, div_cpt, kmp_int64, 64, /,
2403  KMP_ARCH_X86) // __kmpc_atomic_fixed8_div_cpt
2404 ATOMIC_CMPXCHG_CPT(fixed8u, div_cpt, kmp_uint64, 64, /,
2405  KMP_ARCH_X86) // __kmpc_atomic_fixed8u_div_cpt
2406 ATOMIC_CMPXCHG_CPT(fixed8, mul_cpt, kmp_int64, 64, *,
2407  KMP_ARCH_X86) // __kmpc_atomic_fixed8_mul_cpt
2408 ATOMIC_CMPXCHG_CPT(fixed8, orb_cpt, kmp_int64, 64, |,
2409  KMP_ARCH_X86) // __kmpc_atomic_fixed8_orb_cpt
2410 ATOMIC_CMPXCHG_CPT(fixed8, shl_cpt, kmp_int64, 64, <<,
2411  KMP_ARCH_X86) // __kmpc_atomic_fixed8_shl_cpt
2412 ATOMIC_CMPXCHG_CPT(fixed8, shr_cpt, kmp_int64, 64, >>,
2413  KMP_ARCH_X86) // __kmpc_atomic_fixed8_shr_cpt
2414 ATOMIC_CMPXCHG_CPT(fixed8u, shr_cpt, kmp_uint64, 64, >>,
2415  KMP_ARCH_X86) // __kmpc_atomic_fixed8u_shr_cpt
2416 ATOMIC_CMPXCHG_CPT(fixed8, xor_cpt, kmp_int64, 64, ^,
2417  KMP_ARCH_X86) // __kmpc_atomic_fixed8_xor_cpt
2418 ATOMIC_CMPXCHG_CPT(float4, div_cpt, kmp_real32, 32, /,
2419  KMP_ARCH_X86) // __kmpc_atomic_float4_div_cpt
2420 ATOMIC_CMPXCHG_CPT(float4, mul_cpt, kmp_real32, 32, *,
2421  KMP_ARCH_X86) // __kmpc_atomic_float4_mul_cpt
2422 ATOMIC_CMPXCHG_CPT(float8, div_cpt, kmp_real64, 64, /,
2423  KMP_ARCH_X86) // __kmpc_atomic_float8_div_cpt
2424 ATOMIC_CMPXCHG_CPT(float8, mul_cpt, kmp_real64, 64, *,
2425  KMP_ARCH_X86) // __kmpc_atomic_float8_mul_cpt
2426 // TYPE_ID,OP_ID, TYPE, OP, GOMP_FLAG
2427 
2428 // CAPTURE routines for mixed types RHS=float16
2429 #if KMP_HAVE_QUAD
2430 
2431 // Beginning of a definition (provides name, parameters, gebug trace)
2432 // TYPE_ID - operands type and size (fixed*, fixed*u for signed, unsigned
2433 // fixed)
2434 // OP_ID - operation identifier (add, sub, mul, ...)
2435 // TYPE - operands' type
2436 #define ATOMIC_BEGIN_CPT_MIX(TYPE_ID, OP_ID, TYPE, RTYPE_ID, RTYPE) \
2437  TYPE __kmpc_atomic_##TYPE_ID##_##OP_ID##_##RTYPE_ID( \
2438  ident_t *id_ref, int gtid, TYPE *lhs, RTYPE rhs, int flag) { \
2439  KMP_DEBUG_ASSERT(__kmp_init_serial); \
2440  KA_TRACE(100, \
2441  ("__kmpc_atomic_" #TYPE_ID "_" #OP_ID "_" #RTYPE_ID ": T#%d\n", \
2442  gtid));
2443 
2444 // -------------------------------------------------------------------------
2445 #define ATOMIC_CMPXCHG_CPT_MIX(TYPE_ID, TYPE, OP_ID, BITS, OP, RTYPE_ID, \
2446  RTYPE, LCK_ID, MASK, GOMP_FLAG) \
2447  ATOMIC_BEGIN_CPT_MIX(TYPE_ID, OP_ID, TYPE, RTYPE_ID, RTYPE) \
2448  TYPE new_value; \
2449  OP_GOMP_CRITICAL_CPT(TYPE, OP, GOMP_FLAG) \
2450  OP_CMPXCHG_CPT(TYPE, BITS, OP) \
2451  }
2452 
2453 // -------------------------------------------------------------------------
2454 #define ATOMIC_CRITICAL_CPT_MIX(TYPE_ID, TYPE, OP_ID, OP, RTYPE_ID, RTYPE, \
2455  LCK_ID, GOMP_FLAG) \
2456  ATOMIC_BEGIN_CPT_MIX(TYPE_ID, OP_ID, TYPE, RTYPE_ID, RTYPE) \
2457  TYPE new_value; \
2458  OP_GOMP_CRITICAL_CPT(TYPE, OP, GOMP_FLAG) /* send assignment */ \
2459  OP_UPDATE_CRITICAL_CPT(TYPE, OP, LCK_ID) /* send assignment */ \
2460  }
2461 
2462 ATOMIC_CMPXCHG_CPT_MIX(fixed1, char, add_cpt, 8, +, fp, _Quad, 1i, 0,
2463  KMP_ARCH_X86) // __kmpc_atomic_fixed1_add_cpt_fp
2464 ATOMIC_CMPXCHG_CPT_MIX(fixed1u, uchar, add_cpt, 8, +, fp, _Quad, 1i, 0,
2465  KMP_ARCH_X86) // __kmpc_atomic_fixed1u_add_cpt_fp
2466 ATOMIC_CMPXCHG_CPT_MIX(fixed1, char, sub_cpt, 8, -, fp, _Quad, 1i, 0,
2467  KMP_ARCH_X86) // __kmpc_atomic_fixed1_sub_cpt_fp
2468 ATOMIC_CMPXCHG_CPT_MIX(fixed1u, uchar, sub_cpt, 8, -, fp, _Quad, 1i, 0,
2469  KMP_ARCH_X86) // __kmpc_atomic_fixed1u_sub_cpt_fp
2470 ATOMIC_CMPXCHG_CPT_MIX(fixed1, char, mul_cpt, 8, *, fp, _Quad, 1i, 0,
2471  KMP_ARCH_X86) // __kmpc_atomic_fixed1_mul_cpt_fp
2472 ATOMIC_CMPXCHG_CPT_MIX(fixed1u, uchar, mul_cpt, 8, *, fp, _Quad, 1i, 0,
2473  KMP_ARCH_X86) // __kmpc_atomic_fixed1u_mul_cpt_fp
2474 ATOMIC_CMPXCHG_CPT_MIX(fixed1, char, div_cpt, 8, /, fp, _Quad, 1i, 0,
2475  KMP_ARCH_X86) // __kmpc_atomic_fixed1_div_cpt_fp
2476 ATOMIC_CMPXCHG_CPT_MIX(fixed1u, uchar, div_cpt, 8, /, fp, _Quad, 1i, 0,
2477  KMP_ARCH_X86) // __kmpc_atomic_fixed1u_div_cpt_fp
2478 
2479 ATOMIC_CMPXCHG_CPT_MIX(fixed2, short, add_cpt, 16, +, fp, _Quad, 2i, 1,
2480  KMP_ARCH_X86) // __kmpc_atomic_fixed2_add_cpt_fp
2481 ATOMIC_CMPXCHG_CPT_MIX(fixed2u, ushort, add_cpt, 16, +, fp, _Quad, 2i, 1,
2482  KMP_ARCH_X86) // __kmpc_atomic_fixed2u_add_cpt_fp
2483 ATOMIC_CMPXCHG_CPT_MIX(fixed2, short, sub_cpt, 16, -, fp, _Quad, 2i, 1,
2484  KMP_ARCH_X86) // __kmpc_atomic_fixed2_sub_cpt_fp
2485 ATOMIC_CMPXCHG_CPT_MIX(fixed2u, ushort, sub_cpt, 16, -, fp, _Quad, 2i, 1,
2486  KMP_ARCH_X86) // __kmpc_atomic_fixed2u_sub_cpt_fp
2487 ATOMIC_CMPXCHG_CPT_MIX(fixed2, short, mul_cpt, 16, *, fp, _Quad, 2i, 1,
2488  KMP_ARCH_X86) // __kmpc_atomic_fixed2_mul_cpt_fp
2489 ATOMIC_CMPXCHG_CPT_MIX(fixed2u, ushort, mul_cpt, 16, *, fp, _Quad, 2i, 1,
2490  KMP_ARCH_X86) // __kmpc_atomic_fixed2u_mul_cpt_fp
2491 ATOMIC_CMPXCHG_CPT_MIX(fixed2, short, div_cpt, 16, /, fp, _Quad, 2i, 1,
2492  KMP_ARCH_X86) // __kmpc_atomic_fixed2_div_cpt_fp
2493 ATOMIC_CMPXCHG_CPT_MIX(fixed2u, ushort, div_cpt, 16, /, fp, _Quad, 2i, 1,
2494  KMP_ARCH_X86) // __kmpc_atomic_fixed2u_div_cpt_fp
2495 
2496 ATOMIC_CMPXCHG_CPT_MIX(fixed4, kmp_int32, add_cpt, 32, +, fp, _Quad, 4i, 3,
2497  0) // __kmpc_atomic_fixed4_add_cpt_fp
2498 ATOMIC_CMPXCHG_CPT_MIX(fixed4u, kmp_uint32, add_cpt, 32, +, fp, _Quad, 4i, 3,
2499  0) // __kmpc_atomic_fixed4u_add_cpt_fp
2500 ATOMIC_CMPXCHG_CPT_MIX(fixed4, kmp_int32, sub_cpt, 32, -, fp, _Quad, 4i, 3,
2501  0) // __kmpc_atomic_fixed4_sub_cpt_fp
2502 ATOMIC_CMPXCHG_CPT_MIX(fixed4u, kmp_uint32, sub_cpt, 32, -, fp, _Quad, 4i, 3,
2503  0) // __kmpc_atomic_fixed4u_sub_cpt_fp
2504 ATOMIC_CMPXCHG_CPT_MIX(fixed4, kmp_int32, mul_cpt, 32, *, fp, _Quad, 4i, 3,
2505  0) // __kmpc_atomic_fixed4_mul_cpt_fp
2506 ATOMIC_CMPXCHG_CPT_MIX(fixed4u, kmp_uint32, mul_cpt, 32, *, fp, _Quad, 4i, 3,
2507  0) // __kmpc_atomic_fixed4u_mul_cpt_fp
2508 ATOMIC_CMPXCHG_CPT_MIX(fixed4, kmp_int32, div_cpt, 32, /, fp, _Quad, 4i, 3,
2509  0) // __kmpc_atomic_fixed4_div_cpt_fp
2510 ATOMIC_CMPXCHG_CPT_MIX(fixed4u, kmp_uint32, div_cpt, 32, /, fp, _Quad, 4i, 3,
2511  0) // __kmpc_atomic_fixed4u_div_cpt_fp
2512 
2513 ATOMIC_CMPXCHG_CPT_MIX(fixed8, kmp_int64, add_cpt, 64, +, fp, _Quad, 8i, 7,
2514  KMP_ARCH_X86) // __kmpc_atomic_fixed8_add_cpt_fp
2515 ATOMIC_CMPXCHG_CPT_MIX(fixed8u, kmp_uint64, add_cpt, 64, +, fp, _Quad, 8i, 7,
2516  KMP_ARCH_X86) // __kmpc_atomic_fixed8u_add_cpt_fp
2517 ATOMIC_CMPXCHG_CPT_MIX(fixed8, kmp_int64, sub_cpt, 64, -, fp, _Quad, 8i, 7,
2518  KMP_ARCH_X86) // __kmpc_atomic_fixed8_sub_cpt_fp
2519 ATOMIC_CMPXCHG_CPT_MIX(fixed8u, kmp_uint64, sub_cpt, 64, -, fp, _Quad, 8i, 7,
2520  KMP_ARCH_X86) // __kmpc_atomic_fixed8u_sub_cpt_fp
2521 ATOMIC_CMPXCHG_CPT_MIX(fixed8, kmp_int64, mul_cpt, 64, *, fp, _Quad, 8i, 7,
2522  KMP_ARCH_X86) // __kmpc_atomic_fixed8_mul_cpt_fp
2523 ATOMIC_CMPXCHG_CPT_MIX(fixed8u, kmp_uint64, mul_cpt, 64, *, fp, _Quad, 8i, 7,
2524  KMP_ARCH_X86) // __kmpc_atomic_fixed8u_mul_cpt_fp
2525 ATOMIC_CMPXCHG_CPT_MIX(fixed8, kmp_int64, div_cpt, 64, /, fp, _Quad, 8i, 7,
2526  KMP_ARCH_X86) // __kmpc_atomic_fixed8_div_cpt_fp
2527 ATOMIC_CMPXCHG_CPT_MIX(fixed8u, kmp_uint64, div_cpt, 64, /, fp, _Quad, 8i, 7,
2528  KMP_ARCH_X86) // __kmpc_atomic_fixed8u_div_cpt_fp
2529 
2530 ATOMIC_CMPXCHG_CPT_MIX(float4, kmp_real32, add_cpt, 32, +, fp, _Quad, 4r, 3,
2531  KMP_ARCH_X86) // __kmpc_atomic_float4_add_cpt_fp
2532 ATOMIC_CMPXCHG_CPT_MIX(float4, kmp_real32, sub_cpt, 32, -, fp, _Quad, 4r, 3,
2533  KMP_ARCH_X86) // __kmpc_atomic_float4_sub_cpt_fp
2534 ATOMIC_CMPXCHG_CPT_MIX(float4, kmp_real32, mul_cpt, 32, *, fp, _Quad, 4r, 3,
2535  KMP_ARCH_X86) // __kmpc_atomic_float4_mul_cpt_fp
2536 ATOMIC_CMPXCHG_CPT_MIX(float4, kmp_real32, div_cpt, 32, /, fp, _Quad, 4r, 3,
2537  KMP_ARCH_X86) // __kmpc_atomic_float4_div_cpt_fp
2538 
2539 ATOMIC_CMPXCHG_CPT_MIX(float8, kmp_real64, add_cpt, 64, +, fp, _Quad, 8r, 7,
2540  KMP_ARCH_X86) // __kmpc_atomic_float8_add_cpt_fp
2541 ATOMIC_CMPXCHG_CPT_MIX(float8, kmp_real64, sub_cpt, 64, -, fp, _Quad, 8r, 7,
2542  KMP_ARCH_X86) // __kmpc_atomic_float8_sub_cpt_fp
2543 ATOMIC_CMPXCHG_CPT_MIX(float8, kmp_real64, mul_cpt, 64, *, fp, _Quad, 8r, 7,
2544  KMP_ARCH_X86) // __kmpc_atomic_float8_mul_cpt_fp
2545 ATOMIC_CMPXCHG_CPT_MIX(float8, kmp_real64, div_cpt, 64, /, fp, _Quad, 8r, 7,
2546  KMP_ARCH_X86) // __kmpc_atomic_float8_div_cpt_fp
2547 
2548 ATOMIC_CRITICAL_CPT_MIX(float10, long double, add_cpt, +, fp, _Quad, 10r,
2549  1) // __kmpc_atomic_float10_add_cpt_fp
2550 ATOMIC_CRITICAL_CPT_MIX(float10, long double, sub_cpt, -, fp, _Quad, 10r,
2551  1) // __kmpc_atomic_float10_sub_cpt_fp
2552 ATOMIC_CRITICAL_CPT_MIX(float10, long double, mul_cpt, *, fp, _Quad, 10r,
2553  1) // __kmpc_atomic_float10_mul_cpt_fp
2554 ATOMIC_CRITICAL_CPT_MIX(float10, long double, div_cpt, /, fp, _Quad, 10r,
2555  1) // __kmpc_atomic_float10_div_cpt_fp
2556 
2557 #endif // KMP_HAVE_QUAD
2558 
2559 // ------------------------------------------------------------------------
2560 // Routines for C/C++ Reduction operators && and ||
2561 
2562 // -------------------------------------------------------------------------
2563 // Operation on *lhs, rhs bound by critical section
2564 // OP - operator (it's supposed to contain an assignment)
2565 // LCK_ID - lock identifier
2566 // Note: don't check gtid as it should always be valid
2567 // 1, 2-byte - expect valid parameter, other - check before this macro
2568 #define OP_CRITICAL_L_CPT(OP, LCK_ID) \
2569  __kmp_acquire_atomic_lock(&ATOMIC_LOCK##LCK_ID, gtid); \
2570  \
2571  if (flag) { \
2572  new_value OP rhs; \
2573  (*lhs) = new_value; \
2574  } else { \
2575  new_value = (*lhs); \
2576  (*lhs) OP rhs; \
2577  } \
2578  \
2579  __kmp_release_atomic_lock(&ATOMIC_LOCK##LCK_ID, gtid);
2580 
2581 // ------------------------------------------------------------------------
2582 #ifdef KMP_GOMP_COMPAT
2583 #define OP_GOMP_CRITICAL_L_CPT(OP, FLAG) \
2584  if ((FLAG) && (__kmp_atomic_mode == 2)) { \
2585  KMP_CHECK_GTID; \
2586  OP_CRITICAL_L_CPT(OP, 0); \
2587  return new_value; \
2588  }
2589 #else
2590 #define OP_GOMP_CRITICAL_L_CPT(OP, FLAG)
2591 #endif /* KMP_GOMP_COMPAT */
2592 
2593 // ------------------------------------------------------------------------
2594 // Need separate macros for &&, || because there is no combined assignment
2595 #define ATOMIC_CMPX_L_CPT(TYPE_ID, OP_ID, TYPE, BITS, OP, GOMP_FLAG) \
2596  ATOMIC_BEGIN_CPT(TYPE_ID, OP_ID, TYPE, TYPE) \
2597  TYPE new_value; \
2598  (void)new_value; \
2599  OP_GOMP_CRITICAL_L_CPT(= *lhs OP, GOMP_FLAG) \
2600  OP_CMPXCHG_CPT(TYPE, BITS, OP) \
2601  }
2602 
2603 ATOMIC_CMPX_L_CPT(fixed1, andl_cpt, char, 8, &&,
2604  KMP_ARCH_X86) // __kmpc_atomic_fixed1_andl_cpt
2605 ATOMIC_CMPX_L_CPT(fixed1, orl_cpt, char, 8, ||,
2606  KMP_ARCH_X86) // __kmpc_atomic_fixed1_orl_cpt
2607 ATOMIC_CMPX_L_CPT(fixed2, andl_cpt, short, 16, &&,
2608  KMP_ARCH_X86) // __kmpc_atomic_fixed2_andl_cpt
2609 ATOMIC_CMPX_L_CPT(fixed2, orl_cpt, short, 16, ||,
2610  KMP_ARCH_X86) // __kmpc_atomic_fixed2_orl_cpt
2611 ATOMIC_CMPX_L_CPT(fixed4, andl_cpt, kmp_int32, 32, &&,
2612  0) // __kmpc_atomic_fixed4_andl_cpt
2613 ATOMIC_CMPX_L_CPT(fixed4, orl_cpt, kmp_int32, 32, ||,
2614  0) // __kmpc_atomic_fixed4_orl_cpt
2615 ATOMIC_CMPX_L_CPT(fixed8, andl_cpt, kmp_int64, 64, &&,
2616  KMP_ARCH_X86) // __kmpc_atomic_fixed8_andl_cpt
2617 ATOMIC_CMPX_L_CPT(fixed8, orl_cpt, kmp_int64, 64, ||,
2618  KMP_ARCH_X86) // __kmpc_atomic_fixed8_orl_cpt
2619 
2620 // -------------------------------------------------------------------------
2621 // Routines for Fortran operators that matched no one in C:
2622 // MAX, MIN, .EQV., .NEQV.
2623 // Operators .AND., .OR. are covered by __kmpc_atomic_*_{andl,orl}_cpt
2624 // Intrinsics IAND, IOR, IEOR are covered by __kmpc_atomic_*_{andb,orb,xor}_cpt
2625 
2626 // -------------------------------------------------------------------------
2627 // MIN and MAX need separate macros
2628 // OP - operator to check if we need any actions?
2629 #define MIN_MAX_CRITSECT_CPT(OP, LCK_ID) \
2630  __kmp_acquire_atomic_lock(&ATOMIC_LOCK##LCK_ID, gtid); \
2631  \
2632  if (*lhs OP rhs) { /* still need actions? */ \
2633  old_value = *lhs; \
2634  *lhs = rhs; \
2635  if (flag) \
2636  new_value = rhs; \
2637  else \
2638  new_value = old_value; \
2639  } else { \
2640  new_value = *lhs; \
2641  } \
2642  __kmp_release_atomic_lock(&ATOMIC_LOCK##LCK_ID, gtid); \
2643  return new_value;
2644 
2645 // -------------------------------------------------------------------------
2646 #ifdef KMP_GOMP_COMPAT
2647 #define GOMP_MIN_MAX_CRITSECT_CPT(OP, FLAG) \
2648  if ((FLAG) && (__kmp_atomic_mode == 2)) { \
2649  KMP_CHECK_GTID; \
2650  MIN_MAX_CRITSECT_CPT(OP, 0); \
2651  }
2652 #else
2653 #define GOMP_MIN_MAX_CRITSECT_CPT(OP, FLAG)
2654 #endif /* KMP_GOMP_COMPAT */
2655 
2656 // -------------------------------------------------------------------------
2657 #define MIN_MAX_CMPXCHG_CPT(TYPE, BITS, OP) \
2658  { \
2659  TYPE KMP_ATOMIC_VOLATILE temp_val; \
2660  /*TYPE old_value; */ \
2661  temp_val = *lhs; \
2662  old_value = temp_val; \
2663  while (old_value OP rhs && /* still need actions? */ \
2664  !KMP_COMPARE_AND_STORE_ACQ##BITS( \
2665  (kmp_int##BITS *)lhs, \
2666  *VOLATILE_CAST(kmp_int##BITS *) & old_value, \
2667  *VOLATILE_CAST(kmp_int##BITS *) & rhs)) { \
2668  temp_val = *lhs; \
2669  old_value = temp_val; \
2670  } \
2671  if (flag) \
2672  return rhs; \
2673  else \
2674  return old_value; \
2675  }
2676 
2677 // -------------------------------------------------------------------------
2678 // 1-byte, 2-byte operands - use critical section
2679 #define MIN_MAX_CRITICAL_CPT(TYPE_ID, OP_ID, TYPE, OP, LCK_ID, GOMP_FLAG) \
2680  ATOMIC_BEGIN_CPT(TYPE_ID, OP_ID, TYPE, TYPE) \
2681  TYPE new_value, old_value; \
2682  if (*lhs OP rhs) { /* need actions? */ \
2683  GOMP_MIN_MAX_CRITSECT_CPT(OP, GOMP_FLAG) \
2684  MIN_MAX_CRITSECT_CPT(OP, LCK_ID) \
2685  } \
2686  return *lhs; \
2687  }
2688 
2689 #define MIN_MAX_COMPXCHG_CPT(TYPE_ID, OP_ID, TYPE, BITS, OP, GOMP_FLAG) \
2690  ATOMIC_BEGIN_CPT(TYPE_ID, OP_ID, TYPE, TYPE) \
2691  TYPE new_value, old_value; \
2692  (void)new_value; \
2693  if (*lhs OP rhs) { \
2694  GOMP_MIN_MAX_CRITSECT_CPT(OP, GOMP_FLAG) \
2695  MIN_MAX_CMPXCHG_CPT(TYPE, BITS, OP) \
2696  } \
2697  return *lhs; \
2698  }
2699 
2700 MIN_MAX_COMPXCHG_CPT(fixed1, max_cpt, char, 8, <,
2701  KMP_ARCH_X86) // __kmpc_atomic_fixed1_max_cpt
2702 MIN_MAX_COMPXCHG_CPT(fixed1, min_cpt, char, 8, >,
2703  KMP_ARCH_X86) // __kmpc_atomic_fixed1_min_cpt
2704 MIN_MAX_COMPXCHG_CPT(fixed2, max_cpt, short, 16, <,
2705  KMP_ARCH_X86) // __kmpc_atomic_fixed2_max_cpt
2706 MIN_MAX_COMPXCHG_CPT(fixed2, min_cpt, short, 16, >,
2707  KMP_ARCH_X86) // __kmpc_atomic_fixed2_min_cpt
2708 MIN_MAX_COMPXCHG_CPT(fixed4, max_cpt, kmp_int32, 32, <,
2709  0) // __kmpc_atomic_fixed4_max_cpt
2710 MIN_MAX_COMPXCHG_CPT(fixed4, min_cpt, kmp_int32, 32, >,
2711  0) // __kmpc_atomic_fixed4_min_cpt
2712 MIN_MAX_COMPXCHG_CPT(fixed8, max_cpt, kmp_int64, 64, <,
2713  KMP_ARCH_X86) // __kmpc_atomic_fixed8_max_cpt
2714 MIN_MAX_COMPXCHG_CPT(fixed8, min_cpt, kmp_int64, 64, >,
2715  KMP_ARCH_X86) // __kmpc_atomic_fixed8_min_cpt
2716 MIN_MAX_COMPXCHG_CPT(float4, max_cpt, kmp_real32, 32, <,
2717  KMP_ARCH_X86) // __kmpc_atomic_float4_max_cpt
2718 MIN_MAX_COMPXCHG_CPT(float4, min_cpt, kmp_real32, 32, >,
2719  KMP_ARCH_X86) // __kmpc_atomic_float4_min_cpt
2720 MIN_MAX_COMPXCHG_CPT(float8, max_cpt, kmp_real64, 64, <,
2721  KMP_ARCH_X86) // __kmpc_atomic_float8_max_cpt
2722 MIN_MAX_COMPXCHG_CPT(float8, min_cpt, kmp_real64, 64, >,
2723  KMP_ARCH_X86) // __kmpc_atomic_float8_min_cpt
2724 MIN_MAX_CRITICAL_CPT(float10, max_cpt, long double, <, 10r,
2725  1) // __kmpc_atomic_float10_max_cpt
2726 MIN_MAX_CRITICAL_CPT(float10, min_cpt, long double, >, 10r,
2727  1) // __kmpc_atomic_float10_min_cpt
2728 #if KMP_HAVE_QUAD
2729 MIN_MAX_CRITICAL_CPT(float16, max_cpt, QUAD_LEGACY, <, 16r,
2730  1) // __kmpc_atomic_float16_max_cpt
2731 MIN_MAX_CRITICAL_CPT(float16, min_cpt, QUAD_LEGACY, >, 16r,
2732  1) // __kmpc_atomic_float16_min_cpt
2733 #if (KMP_ARCH_X86)
2734 MIN_MAX_CRITICAL_CPT(float16, max_a16_cpt, Quad_a16_t, <, 16r,
2735  1) // __kmpc_atomic_float16_max_a16_cpt
2736 MIN_MAX_CRITICAL_CPT(float16, min_a16_cpt, Quad_a16_t, >, 16r,
2737  1) // __kmpc_atomic_float16_mix_a16_cpt
2738 #endif // (KMP_ARCH_X86)
2739 #endif // KMP_HAVE_QUAD
2740 
2741 // ------------------------------------------------------------------------
2742 #ifdef KMP_GOMP_COMPAT
2743 #define OP_GOMP_CRITICAL_EQV_CPT(OP, FLAG) \
2744  if ((FLAG) && (__kmp_atomic_mode == 2)) { \
2745  KMP_CHECK_GTID; \
2746  OP_CRITICAL_CPT(OP, 0); \
2747  }
2748 #else
2749 #define OP_GOMP_CRITICAL_EQV_CPT(OP, FLAG)
2750 #endif /* KMP_GOMP_COMPAT */
2751 // ------------------------------------------------------------------------
2752 #define ATOMIC_CMPX_EQV_CPT(TYPE_ID, OP_ID, TYPE, BITS, OP, GOMP_FLAG) \
2753  ATOMIC_BEGIN_CPT(TYPE_ID, OP_ID, TYPE, TYPE) \
2754  TYPE new_value; \
2755  (void)new_value; \
2756  OP_GOMP_CRITICAL_EQV_CPT(^= (TYPE) ~, GOMP_FLAG) /* send assignment */ \
2757  OP_CMPXCHG_CPT(TYPE, BITS, OP) \
2758  }
2759 
2760 // ------------------------------------------------------------------------
2761 
2762 ATOMIC_CMPXCHG_CPT(fixed1, neqv_cpt, kmp_int8, 8, ^,
2763  KMP_ARCH_X86) // __kmpc_atomic_fixed1_neqv_cpt
2764 ATOMIC_CMPXCHG_CPT(fixed2, neqv_cpt, kmp_int16, 16, ^,
2765  KMP_ARCH_X86) // __kmpc_atomic_fixed2_neqv_cpt
2766 ATOMIC_CMPXCHG_CPT(fixed4, neqv_cpt, kmp_int32, 32, ^,
2767  KMP_ARCH_X86) // __kmpc_atomic_fixed4_neqv_cpt
2768 ATOMIC_CMPXCHG_CPT(fixed8, neqv_cpt, kmp_int64, 64, ^,
2769  KMP_ARCH_X86) // __kmpc_atomic_fixed8_neqv_cpt
2770 ATOMIC_CMPX_EQV_CPT(fixed1, eqv_cpt, kmp_int8, 8, ^~,
2771  KMP_ARCH_X86) // __kmpc_atomic_fixed1_eqv_cpt
2772 ATOMIC_CMPX_EQV_CPT(fixed2, eqv_cpt, kmp_int16, 16, ^~,
2773  KMP_ARCH_X86) // __kmpc_atomic_fixed2_eqv_cpt
2774 ATOMIC_CMPX_EQV_CPT(fixed4, eqv_cpt, kmp_int32, 32, ^~,
2775  KMP_ARCH_X86) // __kmpc_atomic_fixed4_eqv_cpt
2776 ATOMIC_CMPX_EQV_CPT(fixed8, eqv_cpt, kmp_int64, 64, ^~,
2777  KMP_ARCH_X86) // __kmpc_atomic_fixed8_eqv_cpt
2778 
2779 // ------------------------------------------------------------------------
2780 // Routines for Extended types: long double, _Quad, complex flavours (use
2781 // critical section)
2782 // TYPE_ID, OP_ID, TYPE - detailed above
2783 // OP - operator
2784 // LCK_ID - lock identifier, used to possibly distinguish lock variable
2785 #define ATOMIC_CRITICAL_CPT(TYPE_ID, OP_ID, TYPE, OP, LCK_ID, GOMP_FLAG) \
2786  ATOMIC_BEGIN_CPT(TYPE_ID, OP_ID, TYPE, TYPE) \
2787  TYPE new_value; \
2788  OP_GOMP_CRITICAL_CPT(TYPE, OP, GOMP_FLAG) /* send assignment */ \
2789  OP_UPDATE_CRITICAL_CPT(TYPE, OP, LCK_ID) /* send assignment */ \
2790  }
2791 
2792 // ------------------------------------------------------------------------
2793 // Workaround for cmplx4. Regular routines with return value don't work
2794 // on Win_32e. Let's return captured values through the additional parameter.
2795 #define OP_CRITICAL_CPT_WRK(OP, LCK_ID) \
2796  __kmp_acquire_atomic_lock(&ATOMIC_LOCK##LCK_ID, gtid); \
2797  \
2798  if (flag) { \
2799  (*lhs) OP rhs; \
2800  (*out) = (*lhs); \
2801  } else { \
2802  (*out) = (*lhs); \
2803  (*lhs) OP rhs; \
2804  } \
2805  \
2806  __kmp_release_atomic_lock(&ATOMIC_LOCK##LCK_ID, gtid); \
2807  return;
2808 // ------------------------------------------------------------------------
2809 
2810 #ifdef KMP_GOMP_COMPAT
2811 #define OP_GOMP_CRITICAL_CPT_WRK(OP, FLAG) \
2812  if ((FLAG) && (__kmp_atomic_mode == 2)) { \
2813  KMP_CHECK_GTID; \
2814  OP_CRITICAL_CPT_WRK(OP## =, 0); \
2815  }
2816 #else
2817 #define OP_GOMP_CRITICAL_CPT_WRK(OP, FLAG)
2818 #endif /* KMP_GOMP_COMPAT */
2819 // ------------------------------------------------------------------------
2820 
2821 #define ATOMIC_BEGIN_WRK(TYPE_ID, OP_ID, TYPE) \
2822  void __kmpc_atomic_##TYPE_ID##_##OP_ID(ident_t *id_ref, int gtid, TYPE *lhs, \
2823  TYPE rhs, TYPE *out, int flag) { \
2824  KMP_DEBUG_ASSERT(__kmp_init_serial); \
2825  KA_TRACE(100, ("__kmpc_atomic_" #TYPE_ID "_" #OP_ID ": T#%d\n", gtid));
2826 // ------------------------------------------------------------------------
2827 
2828 #define ATOMIC_CRITICAL_CPT_WRK(TYPE_ID, OP_ID, TYPE, OP, LCK_ID, GOMP_FLAG) \
2829  ATOMIC_BEGIN_WRK(TYPE_ID, OP_ID, TYPE) \
2830  OP_GOMP_CRITICAL_CPT_WRK(OP, GOMP_FLAG) \
2831  OP_CRITICAL_CPT_WRK(OP## =, LCK_ID) \
2832  }
2833 // The end of workaround for cmplx4
2834 
2835 /* ------------------------------------------------------------------------- */
2836 // routines for long double type
2837 ATOMIC_CRITICAL_CPT(float10, add_cpt, long double, +, 10r,
2838  1) // __kmpc_atomic_float10_add_cpt
2839 ATOMIC_CRITICAL_CPT(float10, sub_cpt, long double, -, 10r,
2840  1) // __kmpc_atomic_float10_sub_cpt
2841 ATOMIC_CRITICAL_CPT(float10, mul_cpt, long double, *, 10r,
2842  1) // __kmpc_atomic_float10_mul_cpt
2843 ATOMIC_CRITICAL_CPT(float10, div_cpt, long double, /, 10r,
2844  1) // __kmpc_atomic_float10_div_cpt
2845 #if KMP_HAVE_QUAD
2846 // routines for _Quad type
2847 ATOMIC_CRITICAL_CPT(float16, add_cpt, QUAD_LEGACY, +, 16r,
2848  1) // __kmpc_atomic_float16_add_cpt
2849 ATOMIC_CRITICAL_CPT(float16, sub_cpt, QUAD_LEGACY, -, 16r,
2850  1) // __kmpc_atomic_float16_sub_cpt
2851 ATOMIC_CRITICAL_CPT(float16, mul_cpt, QUAD_LEGACY, *, 16r,
2852  1) // __kmpc_atomic_float16_mul_cpt
2853 ATOMIC_CRITICAL_CPT(float16, div_cpt, QUAD_LEGACY, /, 16r,
2854  1) // __kmpc_atomic_float16_div_cpt
2855 #if (KMP_ARCH_X86)
2856 ATOMIC_CRITICAL_CPT(float16, add_a16_cpt, Quad_a16_t, +, 16r,
2857  1) // __kmpc_atomic_float16_add_a16_cpt
2858 ATOMIC_CRITICAL_CPT(float16, sub_a16_cpt, Quad_a16_t, -, 16r,
2859  1) // __kmpc_atomic_float16_sub_a16_cpt
2860 ATOMIC_CRITICAL_CPT(float16, mul_a16_cpt, Quad_a16_t, *, 16r,
2861  1) // __kmpc_atomic_float16_mul_a16_cpt
2862 ATOMIC_CRITICAL_CPT(float16, div_a16_cpt, Quad_a16_t, /, 16r,
2863  1) // __kmpc_atomic_float16_div_a16_cpt
2864 #endif // (KMP_ARCH_X86)
2865 #endif // KMP_HAVE_QUAD
2866 
2867 // routines for complex types
2868 
2869 // cmplx4 routines to return void
2870 ATOMIC_CRITICAL_CPT_WRK(cmplx4, add_cpt, kmp_cmplx32, +, 8c,
2871  1) // __kmpc_atomic_cmplx4_add_cpt
2872 ATOMIC_CRITICAL_CPT_WRK(cmplx4, sub_cpt, kmp_cmplx32, -, 8c,
2873  1) // __kmpc_atomic_cmplx4_sub_cpt
2874 ATOMIC_CRITICAL_CPT_WRK(cmplx4, mul_cpt, kmp_cmplx32, *, 8c,
2875  1) // __kmpc_atomic_cmplx4_mul_cpt
2876 ATOMIC_CRITICAL_CPT_WRK(cmplx4, div_cpt, kmp_cmplx32, /, 8c,
2877  1) // __kmpc_atomic_cmplx4_div_cpt
2878 
2879 ATOMIC_CRITICAL_CPT(cmplx8, add_cpt, kmp_cmplx64, +, 16c,
2880  1) // __kmpc_atomic_cmplx8_add_cpt
2881 ATOMIC_CRITICAL_CPT(cmplx8, sub_cpt, kmp_cmplx64, -, 16c,
2882  1) // __kmpc_atomic_cmplx8_sub_cpt
2883 ATOMIC_CRITICAL_CPT(cmplx8, mul_cpt, kmp_cmplx64, *, 16c,
2884  1) // __kmpc_atomic_cmplx8_mul_cpt
2885 ATOMIC_CRITICAL_CPT(cmplx8, div_cpt, kmp_cmplx64, /, 16c,
2886  1) // __kmpc_atomic_cmplx8_div_cpt
2887 ATOMIC_CRITICAL_CPT(cmplx10, add_cpt, kmp_cmplx80, +, 20c,
2888  1) // __kmpc_atomic_cmplx10_add_cpt
2889 ATOMIC_CRITICAL_CPT(cmplx10, sub_cpt, kmp_cmplx80, -, 20c,
2890  1) // __kmpc_atomic_cmplx10_sub_cpt
2891 ATOMIC_CRITICAL_CPT(cmplx10, mul_cpt, kmp_cmplx80, *, 20c,
2892  1) // __kmpc_atomic_cmplx10_mul_cpt
2893 ATOMIC_CRITICAL_CPT(cmplx10, div_cpt, kmp_cmplx80, /, 20c,
2894  1) // __kmpc_atomic_cmplx10_div_cpt
2895 #if KMP_HAVE_QUAD
2896 ATOMIC_CRITICAL_CPT(cmplx16, add_cpt, CPLX128_LEG, +, 32c,
2897  1) // __kmpc_atomic_cmplx16_add_cpt
2898 ATOMIC_CRITICAL_CPT(cmplx16, sub_cpt, CPLX128_LEG, -, 32c,
2899  1) // __kmpc_atomic_cmplx16_sub_cpt
2900 ATOMIC_CRITICAL_CPT(cmplx16, mul_cpt, CPLX128_LEG, *, 32c,
2901  1) // __kmpc_atomic_cmplx16_mul_cpt
2902 ATOMIC_CRITICAL_CPT(cmplx16, div_cpt, CPLX128_LEG, /, 32c,
2903  1) // __kmpc_atomic_cmplx16_div_cpt
2904 #if (KMP_ARCH_X86)
2905 ATOMIC_CRITICAL_CPT(cmplx16, add_a16_cpt, kmp_cmplx128_a16_t, +, 32c,
2906  1) // __kmpc_atomic_cmplx16_add_a16_cpt
2907 ATOMIC_CRITICAL_CPT(cmplx16, sub_a16_cpt, kmp_cmplx128_a16_t, -, 32c,
2908  1) // __kmpc_atomic_cmplx16_sub_a16_cpt
2909 ATOMIC_CRITICAL_CPT(cmplx16, mul_a16_cpt, kmp_cmplx128_a16_t, *, 32c,
2910  1) // __kmpc_atomic_cmplx16_mul_a16_cpt
2911 ATOMIC_CRITICAL_CPT(cmplx16, div_a16_cpt, kmp_cmplx128_a16_t, /, 32c,
2912  1) // __kmpc_atomic_cmplx16_div_a16_cpt
2913 #endif // (KMP_ARCH_X86)
2914 #endif // KMP_HAVE_QUAD
2915 
2916 // OpenMP 4.0: v = x = expr binop x; { v = x; x = expr binop x; } { x = expr
2917 // binop x; v = x; } for non-commutative operations.
2918 // Supported only on IA-32 architecture and Intel(R) 64
2919 
2920 // -------------------------------------------------------------------------
2921 // Operation on *lhs, rhs bound by critical section
2922 // OP - operator (it's supposed to contain an assignment)
2923 // LCK_ID - lock identifier
2924 // Note: don't check gtid as it should always be valid
2925 // 1, 2-byte - expect valid parameter, other - check before this macro
2926 #define OP_CRITICAL_CPT_REV(TYPE, OP, LCK_ID) \
2927  __kmp_acquire_atomic_lock(&ATOMIC_LOCK##LCK_ID, gtid); \
2928  \
2929  if (flag) { \
2930  /*temp_val = (*lhs);*/ \
2931  (*lhs) = (TYPE)((rhs)OP(*lhs)); \
2932  new_value = (*lhs); \
2933  } else { \
2934  new_value = (*lhs); \
2935  (*lhs) = (TYPE)((rhs)OP(*lhs)); \
2936  } \
2937  __kmp_release_atomic_lock(&ATOMIC_LOCK##LCK_ID, gtid); \
2938  return new_value;
2939 
2940 // ------------------------------------------------------------------------
2941 #ifdef KMP_GOMP_COMPAT
2942 #define OP_GOMP_CRITICAL_CPT_REV(TYPE, OP, FLAG) \
2943  if ((FLAG) && (__kmp_atomic_mode == 2)) { \
2944  KMP_CHECK_GTID; \
2945  OP_CRITICAL_CPT_REV(TYPE, OP, 0); \
2946  }
2947 #else
2948 #define OP_GOMP_CRITICAL_CPT_REV(TYPE, OP, FLAG)
2949 #endif /* KMP_GOMP_COMPAT */
2950 
2951 // ------------------------------------------------------------------------
2952 // Operation on *lhs, rhs using "compare_and_store" routine
2953 // TYPE - operands' type
2954 // BITS - size in bits, used to distinguish low level calls
2955 // OP - operator
2956 // Note: temp_val introduced in order to force the compiler to read
2957 // *lhs only once (w/o it the compiler reads *lhs twice)
2958 #define OP_CMPXCHG_CPT_REV(TYPE, BITS, OP) \
2959  { \
2960  TYPE KMP_ATOMIC_VOLATILE temp_val; \
2961  TYPE old_value, new_value; \
2962  temp_val = *lhs; \
2963  old_value = temp_val; \
2964  new_value = (TYPE)(rhs OP old_value); \
2965  while (!KMP_COMPARE_AND_STORE_ACQ##BITS( \
2966  (kmp_int##BITS *)lhs, *VOLATILE_CAST(kmp_int##BITS *) & old_value, \
2967  *VOLATILE_CAST(kmp_int##BITS *) & new_value)) { \
2968  temp_val = *lhs; \
2969  old_value = temp_val; \
2970  new_value = (TYPE)(rhs OP old_value); \
2971  } \
2972  if (flag) { \
2973  return new_value; \
2974  } else \
2975  return old_value; \
2976  }
2977 
2978 // -------------------------------------------------------------------------
2979 #define ATOMIC_CMPXCHG_CPT_REV(TYPE_ID, OP_ID, TYPE, BITS, OP, GOMP_FLAG) \
2980  ATOMIC_BEGIN_CPT(TYPE_ID, OP_ID, TYPE, TYPE) \
2981  TYPE new_value; \
2982  (void)new_value; \
2983  OP_GOMP_CRITICAL_CPT_REV(TYPE, OP, GOMP_FLAG) \
2984  OP_CMPXCHG_CPT_REV(TYPE, BITS, OP) \
2985  }
2986 
2987 ATOMIC_CMPXCHG_CPT_REV(fixed1, div_cpt_rev, kmp_int8, 8, /,
2988  KMP_ARCH_X86) // __kmpc_atomic_fixed1_div_cpt_rev
2989 ATOMIC_CMPXCHG_CPT_REV(fixed1u, div_cpt_rev, kmp_uint8, 8, /,
2990  KMP_ARCH_X86) // __kmpc_atomic_fixed1u_div_cpt_rev
2991 ATOMIC_CMPXCHG_CPT_REV(fixed1, shl_cpt_rev, kmp_int8, 8, <<,
2992  KMP_ARCH_X86) // __kmpc_atomic_fixed1_shl_cpt_rev
2993 ATOMIC_CMPXCHG_CPT_REV(fixed1, shr_cpt_rev, kmp_int8, 8, >>,
2994  KMP_ARCH_X86) // __kmpc_atomic_fixed1_shr_cpt_rev
2995 ATOMIC_CMPXCHG_CPT_REV(fixed1u, shr_cpt_rev, kmp_uint8, 8, >>,
2996  KMP_ARCH_X86) // __kmpc_atomic_fixed1u_shr_cpt_rev
2997 ATOMIC_CMPXCHG_CPT_REV(fixed1, sub_cpt_rev, kmp_int8, 8, -,
2998  KMP_ARCH_X86) // __kmpc_atomic_fixed1_sub_cpt_rev
2999 ATOMIC_CMPXCHG_CPT_REV(fixed2, div_cpt_rev, kmp_int16, 16, /,
3000  KMP_ARCH_X86) // __kmpc_atomic_fixed2_div_cpt_rev
3001 ATOMIC_CMPXCHG_CPT_REV(fixed2u, div_cpt_rev, kmp_uint16, 16, /,
3002  KMP_ARCH_X86) // __kmpc_atomic_fixed2u_div_cpt_rev
3003 ATOMIC_CMPXCHG_CPT_REV(fixed2, shl_cpt_rev, kmp_int16, 16, <<,
3004  KMP_ARCH_X86) // __kmpc_atomic_fixed2_shl_cpt_rev
3005 ATOMIC_CMPXCHG_CPT_REV(fixed2, shr_cpt_rev, kmp_int16, 16, >>,
3006  KMP_ARCH_X86) // __kmpc_atomic_fixed2_shr_cpt_rev
3007 ATOMIC_CMPXCHG_CPT_REV(fixed2u, shr_cpt_rev, kmp_uint16, 16, >>,
3008  KMP_ARCH_X86) // __kmpc_atomic_fixed2u_shr_cpt_rev
3009 ATOMIC_CMPXCHG_CPT_REV(fixed2, sub_cpt_rev, kmp_int16, 16, -,
3010  KMP_ARCH_X86) // __kmpc_atomic_fixed2_sub_cpt_rev
3011 ATOMIC_CMPXCHG_CPT_REV(fixed4, div_cpt_rev, kmp_int32, 32, /,
3012  KMP_ARCH_X86) // __kmpc_atomic_fixed4_div_cpt_rev
3013 ATOMIC_CMPXCHG_CPT_REV(fixed4u, div_cpt_rev, kmp_uint32, 32, /,
3014  KMP_ARCH_X86) // __kmpc_atomic_fixed4u_div_cpt_rev
3015 ATOMIC_CMPXCHG_CPT_REV(fixed4, shl_cpt_rev, kmp_int32, 32, <<,
3016  KMP_ARCH_X86) // __kmpc_atomic_fixed4_shl_cpt_rev
3017 ATOMIC_CMPXCHG_CPT_REV(fixed4, shr_cpt_rev, kmp_int32, 32, >>,
3018  KMP_ARCH_X86) // __kmpc_atomic_fixed4_shr_cpt_rev
3019 ATOMIC_CMPXCHG_CPT_REV(fixed4u, shr_cpt_rev, kmp_uint32, 32, >>,
3020  KMP_ARCH_X86) // __kmpc_atomic_fixed4u_shr_cpt_rev
3021 ATOMIC_CMPXCHG_CPT_REV(fixed4, sub_cpt_rev, kmp_int32, 32, -,
3022  KMP_ARCH_X86) // __kmpc_atomic_fixed4_sub_cpt_rev
3023 ATOMIC_CMPXCHG_CPT_REV(fixed8, div_cpt_rev, kmp_int64, 64, /,
3024  KMP_ARCH_X86) // __kmpc_atomic_fixed8_div_cpt_rev
3025 ATOMIC_CMPXCHG_CPT_REV(fixed8u, div_cpt_rev, kmp_uint64, 64, /,
3026  KMP_ARCH_X86) // __kmpc_atomic_fixed8u_div_cpt_rev
3027 ATOMIC_CMPXCHG_CPT_REV(fixed8, shl_cpt_rev, kmp_int64, 64, <<,
3028  KMP_ARCH_X86) // __kmpc_atomic_fixed8_shl_cpt_rev
3029 ATOMIC_CMPXCHG_CPT_REV(fixed8, shr_cpt_rev, kmp_int64, 64, >>,
3030  KMP_ARCH_X86) // __kmpc_atomic_fixed8_shr_cpt_rev
3031 ATOMIC_CMPXCHG_CPT_REV(fixed8u, shr_cpt_rev, kmp_uint64, 64, >>,
3032  KMP_ARCH_X86) // __kmpc_atomic_fixed8u_shr_cpt_rev
3033 ATOMIC_CMPXCHG_CPT_REV(fixed8, sub_cpt_rev, kmp_int64, 64, -,
3034  KMP_ARCH_X86) // __kmpc_atomic_fixed8_sub_cpt_rev
3035 ATOMIC_CMPXCHG_CPT_REV(float4, div_cpt_rev, kmp_real32, 32, /,
3036  KMP_ARCH_X86) // __kmpc_atomic_float4_div_cpt_rev
3037 ATOMIC_CMPXCHG_CPT_REV(float4, sub_cpt_rev, kmp_real32, 32, -,
3038  KMP_ARCH_X86) // __kmpc_atomic_float4_sub_cpt_rev
3039 ATOMIC_CMPXCHG_CPT_REV(float8, div_cpt_rev, kmp_real64, 64, /,
3040  KMP_ARCH_X86) // __kmpc_atomic_float8_div_cpt_rev
3041 ATOMIC_CMPXCHG_CPT_REV(float8, sub_cpt_rev, kmp_real64, 64, -,
3042  KMP_ARCH_X86) // __kmpc_atomic_float8_sub_cpt_rev
3043 // TYPE_ID,OP_ID, TYPE, OP, GOMP_FLAG
3044 
3045 // ------------------------------------------------------------------------
3046 // Routines for Extended types: long double, _Quad, complex flavours (use
3047 // critical section)
3048 // TYPE_ID, OP_ID, TYPE - detailed above
3049 // OP - operator
3050 // LCK_ID - lock identifier, used to possibly distinguish lock variable
3051 #define ATOMIC_CRITICAL_CPT_REV(TYPE_ID, OP_ID, TYPE, OP, LCK_ID, GOMP_FLAG) \
3052  ATOMIC_BEGIN_CPT(TYPE_ID, OP_ID, TYPE, TYPE) \
3053  TYPE new_value; \
3054  /*printf("__kmp_atomic_mode = %d\n", __kmp_atomic_mode);*/ \
3055  OP_GOMP_CRITICAL_CPT_REV(TYPE, OP, GOMP_FLAG) \
3056  OP_CRITICAL_CPT_REV(TYPE, OP, LCK_ID) \
3057  }
3058 
3059 /* ------------------------------------------------------------------------- */
3060 // routines for long double type
3061 ATOMIC_CRITICAL_CPT_REV(float10, sub_cpt_rev, long double, -, 10r,
3062  1) // __kmpc_atomic_float10_sub_cpt_rev
3063 ATOMIC_CRITICAL_CPT_REV(float10, div_cpt_rev, long double, /, 10r,
3064  1) // __kmpc_atomic_float10_div_cpt_rev
3065 #if KMP_HAVE_QUAD
3066 // routines for _Quad type
3067 ATOMIC_CRITICAL_CPT_REV(float16, sub_cpt_rev, QUAD_LEGACY, -, 16r,
3068  1) // __kmpc_atomic_float16_sub_cpt_rev
3069 ATOMIC_CRITICAL_CPT_REV(float16, div_cpt_rev, QUAD_LEGACY, /, 16r,
3070  1) // __kmpc_atomic_float16_div_cpt_rev
3071 #if (KMP_ARCH_X86)
3072 ATOMIC_CRITICAL_CPT_REV(float16, sub_a16_cpt_rev, Quad_a16_t, -, 16r,
3073  1) // __kmpc_atomic_float16_sub_a16_cpt_rev
3074 ATOMIC_CRITICAL_CPT_REV(float16, div_a16_cpt_rev, Quad_a16_t, /, 16r,
3075  1) // __kmpc_atomic_float16_div_a16_cpt_rev
3076 #endif // (KMP_ARCH_X86)
3077 #endif // KMP_HAVE_QUAD
3078 
3079 // routines for complex types
3080 
3081 // ------------------------------------------------------------------------
3082 // Workaround for cmplx4. Regular routines with return value don't work
3083 // on Win_32e. Let's return captured values through the additional parameter.
3084 #define OP_CRITICAL_CPT_REV_WRK(OP, LCK_ID) \
3085  __kmp_acquire_atomic_lock(&ATOMIC_LOCK##LCK_ID, gtid); \
3086  \
3087  if (flag) { \
3088  (*lhs) = (rhs)OP(*lhs); \
3089  (*out) = (*lhs); \
3090  } else { \
3091  (*out) = (*lhs); \
3092  (*lhs) = (rhs)OP(*lhs); \
3093  } \
3094  \
3095  __kmp_release_atomic_lock(&ATOMIC_LOCK##LCK_ID, gtid); \
3096  return;
3097 // ------------------------------------------------------------------------
3098 
3099 #ifdef KMP_GOMP_COMPAT
3100 #define OP_GOMP_CRITICAL_CPT_REV_WRK(OP, FLAG) \
3101  if ((FLAG) && (__kmp_atomic_mode == 2)) { \
3102  KMP_CHECK_GTID; \
3103  OP_CRITICAL_CPT_REV_WRK(OP, 0); \
3104  }
3105 #else
3106 #define OP_GOMP_CRITICAL_CPT_REV_WRK(OP, FLAG)
3107 #endif /* KMP_GOMP_COMPAT */
3108 // ------------------------------------------------------------------------
3109 
3110 #define ATOMIC_CRITICAL_CPT_REV_WRK(TYPE_ID, OP_ID, TYPE, OP, LCK_ID, \
3111  GOMP_FLAG) \
3112  ATOMIC_BEGIN_WRK(TYPE_ID, OP_ID, TYPE) \
3113  OP_GOMP_CRITICAL_CPT_REV_WRK(OP, GOMP_FLAG) \
3114  OP_CRITICAL_CPT_REV_WRK(OP, LCK_ID) \
3115  }
3116 // The end of workaround for cmplx4
3117 
3118 // !!! TODO: check if we need to return void for cmplx4 routines
3119 // cmplx4 routines to return void
3120 ATOMIC_CRITICAL_CPT_REV_WRK(cmplx4, sub_cpt_rev, kmp_cmplx32, -, 8c,
3121  1) // __kmpc_atomic_cmplx4_sub_cpt_rev
3122 ATOMIC_CRITICAL_CPT_REV_WRK(cmplx4, div_cpt_rev, kmp_cmplx32, /, 8c,
3123  1) // __kmpc_atomic_cmplx4_div_cpt_rev
3124 
3125 ATOMIC_CRITICAL_CPT_REV(cmplx8, sub_cpt_rev, kmp_cmplx64, -, 16c,
3126  1) // __kmpc_atomic_cmplx8_sub_cpt_rev
3127 ATOMIC_CRITICAL_CPT_REV(cmplx8, div_cpt_rev, kmp_cmplx64, /, 16c,
3128  1) // __kmpc_atomic_cmplx8_div_cpt_rev
3129 ATOMIC_CRITICAL_CPT_REV(cmplx10, sub_cpt_rev, kmp_cmplx80, -, 20c,
3130  1) // __kmpc_atomic_cmplx10_sub_cpt_rev
3131 ATOMIC_CRITICAL_CPT_REV(cmplx10, div_cpt_rev, kmp_cmplx80, /, 20c,
3132  1) // __kmpc_atomic_cmplx10_div_cpt_rev
3133 #if KMP_HAVE_QUAD
3134 ATOMIC_CRITICAL_CPT_REV(cmplx16, sub_cpt_rev, CPLX128_LEG, -, 32c,
3135  1) // __kmpc_atomic_cmplx16_sub_cpt_rev
3136 ATOMIC_CRITICAL_CPT_REV(cmplx16, div_cpt_rev, CPLX128_LEG, /, 32c,
3137  1) // __kmpc_atomic_cmplx16_div_cpt_rev
3138 #if (KMP_ARCH_X86)
3139 ATOMIC_CRITICAL_CPT_REV(cmplx16, sub_a16_cpt_rev, kmp_cmplx128_a16_t, -, 32c,
3140  1) // __kmpc_atomic_cmplx16_sub_a16_cpt_rev
3141 ATOMIC_CRITICAL_CPT_REV(cmplx16, div_a16_cpt_rev, kmp_cmplx128_a16_t, /, 32c,
3142  1) // __kmpc_atomic_cmplx16_div_a16_cpt_rev
3143 #endif // (KMP_ARCH_X86)
3144 #endif // KMP_HAVE_QUAD
3145 
3146 // Capture reverse for mixed type: RHS=float16
3147 #if KMP_HAVE_QUAD
3148 
3149 // Beginning of a definition (provides name, parameters, gebug trace)
3150 // TYPE_ID - operands type and size (fixed*, fixed*u for signed, unsigned
3151 // fixed)
3152 // OP_ID - operation identifier (add, sub, mul, ...)
3153 // TYPE - operands' type
3154 // -------------------------------------------------------------------------
3155 #define ATOMIC_CMPXCHG_CPT_REV_MIX(TYPE_ID, TYPE, OP_ID, BITS, OP, RTYPE_ID, \
3156  RTYPE, LCK_ID, MASK, GOMP_FLAG) \
3157  ATOMIC_BEGIN_CPT_MIX(TYPE_ID, OP_ID, TYPE, RTYPE_ID, RTYPE) \
3158  TYPE new_value; \
3159  OP_GOMP_CRITICAL_CPT_REV(TYPE, OP, GOMP_FLAG) \
3160  OP_CMPXCHG_CPT_REV(TYPE, BITS, OP) \
3161  }
3162 
3163 // -------------------------------------------------------------------------
3164 #define ATOMIC_CRITICAL_CPT_REV_MIX(TYPE_ID, TYPE, OP_ID, OP, RTYPE_ID, RTYPE, \
3165  LCK_ID, GOMP_FLAG) \
3166  ATOMIC_BEGIN_CPT_MIX(TYPE_ID, OP_ID, TYPE, RTYPE_ID, RTYPE) \
3167  TYPE new_value; \
3168  OP_GOMP_CRITICAL_CPT_REV(TYPE, OP, GOMP_FLAG) /* send assignment */ \
3169  OP_CRITICAL_CPT_REV(TYPE, OP, LCK_ID) /* send assignment */ \
3170  }
3171 
3172 ATOMIC_CMPXCHG_CPT_REV_MIX(fixed1, char, sub_cpt_rev, 8, -, fp, _Quad, 1i, 0,
3173  KMP_ARCH_X86) // __kmpc_atomic_fixed1_sub_cpt_rev_fp
3174 ATOMIC_CMPXCHG_CPT_REV_MIX(fixed1u, uchar, sub_cpt_rev, 8, -, fp, _Quad, 1i, 0,
3175  KMP_ARCH_X86) // __kmpc_atomic_fixed1u_sub_cpt_rev_fp
3176 ATOMIC_CMPXCHG_CPT_REV_MIX(fixed1, char, div_cpt_rev, 8, /, fp, _Quad, 1i, 0,
3177  KMP_ARCH_X86) // __kmpc_atomic_fixed1_div_cpt_rev_fp
3178 ATOMIC_CMPXCHG_CPT_REV_MIX(fixed1u, uchar, div_cpt_rev, 8, /, fp, _Quad, 1i, 0,
3179  KMP_ARCH_X86) // __kmpc_atomic_fixed1u_div_cpt_rev_fp
3180 
3181 ATOMIC_CMPXCHG_CPT_REV_MIX(fixed2, short, sub_cpt_rev, 16, -, fp, _Quad, 2i, 1,
3182  KMP_ARCH_X86) // __kmpc_atomic_fixed2_sub_cpt_rev_fp
3183 ATOMIC_CMPXCHG_CPT_REV_MIX(fixed2u, ushort, sub_cpt_rev, 16, -, fp, _Quad, 2i,
3184  1,
3185  KMP_ARCH_X86) // __kmpc_atomic_fixed2u_sub_cpt_rev_fp
3186 ATOMIC_CMPXCHG_CPT_REV_MIX(fixed2, short, div_cpt_rev, 16, /, fp, _Quad, 2i, 1,
3187  KMP_ARCH_X86) // __kmpc_atomic_fixed2_div_cpt_rev_fp
3188 ATOMIC_CMPXCHG_CPT_REV_MIX(fixed2u, ushort, div_cpt_rev, 16, /, fp, _Quad, 2i,
3189  1,
3190  KMP_ARCH_X86) // __kmpc_atomic_fixed2u_div_cpt_rev_fp
3191 
3192 ATOMIC_CMPXCHG_CPT_REV_MIX(fixed4, kmp_int32, sub_cpt_rev, 32, -, fp, _Quad, 4i,
3193  3, 0) // __kmpc_atomic_fixed4_sub_cpt_rev_fp
3194 ATOMIC_CMPXCHG_CPT_REV_MIX(fixed4u, kmp_uint32, sub_cpt_rev, 32, -, fp, _Quad,
3195  4i, 3, 0) // __kmpc_atomic_fixed4u_sub_cpt_rev_fp
3196 ATOMIC_CMPXCHG_CPT_REV_MIX(fixed4, kmp_int32, div_cpt_rev, 32, /, fp, _Quad, 4i,
3197  3, 0) // __kmpc_atomic_fixed4_div_cpt_rev_fp
3198 ATOMIC_CMPXCHG_CPT_REV_MIX(fixed4u, kmp_uint32, div_cpt_rev, 32, /, fp, _Quad,
3199  4i, 3, 0) // __kmpc_atomic_fixed4u_div_cpt_rev_fp
3200 
3201 ATOMIC_CMPXCHG_CPT_REV_MIX(fixed8, kmp_int64, sub_cpt_rev, 64, -, fp, _Quad, 8i,
3202  7,
3203  KMP_ARCH_X86) // __kmpc_atomic_fixed8_sub_cpt_rev_fp
3204 ATOMIC_CMPXCHG_CPT_REV_MIX(fixed8u, kmp_uint64, sub_cpt_rev, 64, -, fp, _Quad,
3205  8i, 7,
3206  KMP_ARCH_X86) // __kmpc_atomic_fixed8u_sub_cpt_rev_fp
3207 ATOMIC_CMPXCHG_CPT_REV_MIX(fixed8, kmp_int64, div_cpt_rev, 64, /, fp, _Quad, 8i,
3208  7,
3209  KMP_ARCH_X86) // __kmpc_atomic_fixed8_div_cpt_rev_fp
3210 ATOMIC_CMPXCHG_CPT_REV_MIX(fixed8u, kmp_uint64, div_cpt_rev, 64, /, fp, _Quad,
3211  8i, 7,
3212  KMP_ARCH_X86) // __kmpc_atomic_fixed8u_div_cpt_rev_fp
3213 
3214 ATOMIC_CMPXCHG_CPT_REV_MIX(float4, kmp_real32, sub_cpt_rev, 32, -, fp, _Quad,
3215  4r, 3,
3216  KMP_ARCH_X86) // __kmpc_atomic_float4_sub_cpt_rev_fp
3217 ATOMIC_CMPXCHG_CPT_REV_MIX(float4, kmp_real32, div_cpt_rev, 32, /, fp, _Quad,
3218  4r, 3,
3219  KMP_ARCH_X86) // __kmpc_atomic_float4_div_cpt_rev_fp
3220 
3221 ATOMIC_CMPXCHG_CPT_REV_MIX(float8, kmp_real64, sub_cpt_rev, 64, -, fp, _Quad,
3222  8r, 7,
3223  KMP_ARCH_X86) // __kmpc_atomic_float8_sub_cpt_rev_fp
3224 ATOMIC_CMPXCHG_CPT_REV_MIX(float8, kmp_real64, div_cpt_rev, 64, /, fp, _Quad,
3225  8r, 7,
3226  KMP_ARCH_X86) // __kmpc_atomic_float8_div_cpt_rev_fp
3227 
3228 ATOMIC_CRITICAL_CPT_REV_MIX(float10, long double, sub_cpt_rev, -, fp, _Quad,
3229  10r, 1) // __kmpc_atomic_float10_sub_cpt_rev_fp
3230 ATOMIC_CRITICAL_CPT_REV_MIX(float10, long double, div_cpt_rev, /, fp, _Quad,
3231  10r, 1) // __kmpc_atomic_float10_div_cpt_rev_fp
3232 
3233 #endif // KMP_HAVE_QUAD
3234 
3235 // OpenMP 4.0 Capture-write (swap): {v = x; x = expr;}
3236 
3237 #define ATOMIC_BEGIN_SWP(TYPE_ID, TYPE) \
3238  TYPE __kmpc_atomic_##TYPE_ID##_swp(ident_t *id_ref, int gtid, TYPE *lhs, \
3239  TYPE rhs) { \
3240  KMP_DEBUG_ASSERT(__kmp_init_serial); \
3241  KA_TRACE(100, ("__kmpc_atomic_" #TYPE_ID "_swp: T#%d\n", gtid));
3242 
3243 #define CRITICAL_SWP(LCK_ID) \
3244  __kmp_acquire_atomic_lock(&ATOMIC_LOCK##LCK_ID, gtid); \
3245  \
3246  old_value = (*lhs); \
3247  (*lhs) = rhs; \
3248  \
3249  __kmp_release_atomic_lock(&ATOMIC_LOCK##LCK_ID, gtid); \
3250  return old_value;
3251 
3252 // ------------------------------------------------------------------------
3253 #ifdef KMP_GOMP_COMPAT
3254 #define GOMP_CRITICAL_SWP(FLAG) \
3255  if ((FLAG) && (__kmp_atomic_mode == 2)) { \
3256  KMP_CHECK_GTID; \
3257  CRITICAL_SWP(0); \
3258  }
3259 #else
3260 #define GOMP_CRITICAL_SWP(FLAG)
3261 #endif /* KMP_GOMP_COMPAT */
3262 
3263 #define ATOMIC_XCHG_SWP(TYPE_ID, TYPE, BITS, GOMP_FLAG) \
3264  ATOMIC_BEGIN_SWP(TYPE_ID, TYPE) \
3265  TYPE old_value; \
3266  GOMP_CRITICAL_SWP(GOMP_FLAG) \
3267  old_value = KMP_XCHG_FIXED##BITS(lhs, rhs); \
3268  return old_value; \
3269  }
3270 // ------------------------------------------------------------------------
3271 #define ATOMIC_XCHG_FLOAT_SWP(TYPE_ID, TYPE, BITS, GOMP_FLAG) \
3272  ATOMIC_BEGIN_SWP(TYPE_ID, TYPE) \
3273  TYPE old_value; \
3274  GOMP_CRITICAL_SWP(GOMP_FLAG) \
3275  old_value = KMP_XCHG_REAL##BITS(lhs, rhs); \
3276  return old_value; \
3277  }
3278 
3279 // ------------------------------------------------------------------------
3280 #define CMPXCHG_SWP(TYPE, BITS) \
3281  { \
3282  TYPE KMP_ATOMIC_VOLATILE temp_val; \
3283  TYPE old_value, new_value; \
3284  temp_val = *lhs; \
3285  old_value = temp_val; \
3286  new_value = rhs; \
3287  while (!KMP_COMPARE_AND_STORE_ACQ##BITS( \
3288  (kmp_int##BITS *)lhs, *VOLATILE_CAST(kmp_int##BITS *) & old_value, \
3289  *VOLATILE_CAST(kmp_int##BITS *) & new_value)) { \
3290  temp_val = *lhs; \
3291  old_value = temp_val; \
3292  new_value = rhs; \
3293  } \
3294  return old_value; \
3295  }
3296 
3297 // -------------------------------------------------------------------------
3298 #define ATOMIC_CMPXCHG_SWP(TYPE_ID, TYPE, BITS, GOMP_FLAG) \
3299  ATOMIC_BEGIN_SWP(TYPE_ID, TYPE) \
3300  TYPE old_value; \
3301  (void)old_value; \
3302  GOMP_CRITICAL_SWP(GOMP_FLAG) \
3303  CMPXCHG_SWP(TYPE, BITS) \
3304  }
3305 
3306 ATOMIC_XCHG_SWP(fixed1, kmp_int8, 8, KMP_ARCH_X86) // __kmpc_atomic_fixed1_swp
3307 ATOMIC_XCHG_SWP(fixed2, kmp_int16, 16, KMP_ARCH_X86) // __kmpc_atomic_fixed2_swp
3308 ATOMIC_XCHG_SWP(fixed4, kmp_int32, 32, KMP_ARCH_X86) // __kmpc_atomic_fixed4_swp
3309 
3310 ATOMIC_XCHG_FLOAT_SWP(float4, kmp_real32, 32,
3311  KMP_ARCH_X86) // __kmpc_atomic_float4_swp
3312 
3313 #if (KMP_ARCH_X86)
3314 ATOMIC_CMPXCHG_SWP(fixed8, kmp_int64, 64,
3315  KMP_ARCH_X86) // __kmpc_atomic_fixed8_swp
3316 ATOMIC_CMPXCHG_SWP(float8, kmp_real64, 64,
3317  KMP_ARCH_X86) // __kmpc_atomic_float8_swp
3318 #else
3319 ATOMIC_XCHG_SWP(fixed8, kmp_int64, 64, KMP_ARCH_X86) // __kmpc_atomic_fixed8_swp
3320 ATOMIC_XCHG_FLOAT_SWP(float8, kmp_real64, 64,
3321  KMP_ARCH_X86) // __kmpc_atomic_float8_swp
3322 #endif // (KMP_ARCH_X86)
3323 
3324 // ------------------------------------------------------------------------
3325 // Routines for Extended types: long double, _Quad, complex flavours (use
3326 // critical section)
3327 #define ATOMIC_CRITICAL_SWP(TYPE_ID, TYPE, LCK_ID, GOMP_FLAG) \
3328  ATOMIC_BEGIN_SWP(TYPE_ID, TYPE) \
3329  TYPE old_value; \
3330  GOMP_CRITICAL_SWP(GOMP_FLAG) \
3331  CRITICAL_SWP(LCK_ID) \
3332  }
3333 
3334 // ------------------------------------------------------------------------
3335 // !!! TODO: check if we need to return void for cmplx4 routines
3336 // Workaround for cmplx4. Regular routines with return value don't work
3337 // on Win_32e. Let's return captured values through the additional parameter.
3338 
3339 #define ATOMIC_BEGIN_SWP_WRK(TYPE_ID, TYPE) \
3340  void __kmpc_atomic_##TYPE_ID##_swp(ident_t *id_ref, int gtid, TYPE *lhs, \
3341  TYPE rhs, TYPE *out) { \
3342  KMP_DEBUG_ASSERT(__kmp_init_serial); \
3343  KA_TRACE(100, ("__kmpc_atomic_" #TYPE_ID "_swp: T#%d\n", gtid));
3344 
3345 #define CRITICAL_SWP_WRK(LCK_ID) \
3346  __kmp_acquire_atomic_lock(&ATOMIC_LOCK##LCK_ID, gtid); \
3347  \
3348  tmp = (*lhs); \
3349  (*lhs) = (rhs); \
3350  (*out) = tmp; \
3351  __kmp_release_atomic_lock(&ATOMIC_LOCK##LCK_ID, gtid); \
3352  return;
3353 // ------------------------------------------------------------------------
3354 
3355 #ifdef KMP_GOMP_COMPAT
3356 #define GOMP_CRITICAL_SWP_WRK(FLAG) \
3357  if ((FLAG) && (__kmp_atomic_mode == 2)) { \
3358  KMP_CHECK_GTID; \
3359  CRITICAL_SWP_WRK(0); \
3360  }
3361 #else
3362 #define GOMP_CRITICAL_SWP_WRK(FLAG)
3363 #endif /* KMP_GOMP_COMPAT */
3364 // ------------------------------------------------------------------------
3365 
3366 #define ATOMIC_CRITICAL_SWP_WRK(TYPE_ID, TYPE, LCK_ID, GOMP_FLAG) \
3367  ATOMIC_BEGIN_SWP_WRK(TYPE_ID, TYPE) \
3368  TYPE tmp; \
3369  GOMP_CRITICAL_SWP_WRK(GOMP_FLAG) \
3370  CRITICAL_SWP_WRK(LCK_ID) \
3371  }
3372 // The end of workaround for cmplx4
3373 
3374 ATOMIC_CRITICAL_SWP(float10, long double, 10r, 1) // __kmpc_atomic_float10_swp
3375 #if KMP_HAVE_QUAD
3376 ATOMIC_CRITICAL_SWP(float16, QUAD_LEGACY, 16r, 1) // __kmpc_atomic_float16_swp
3377 #endif // KMP_HAVE_QUAD
3378 // cmplx4 routine to return void
3379 ATOMIC_CRITICAL_SWP_WRK(cmplx4, kmp_cmplx32, 8c, 1) // __kmpc_atomic_cmplx4_swp
3380 
3381 // ATOMIC_CRITICAL_SWP( cmplx4, kmp_cmplx32, 8c, 1 ) //
3382 // __kmpc_atomic_cmplx4_swp
3383 
3384 ATOMIC_CRITICAL_SWP(cmplx8, kmp_cmplx64, 16c, 1) // __kmpc_atomic_cmplx8_swp
3385 ATOMIC_CRITICAL_SWP(cmplx10, kmp_cmplx80, 20c, 1) // __kmpc_atomic_cmplx10_swp
3386 #if KMP_HAVE_QUAD
3387 ATOMIC_CRITICAL_SWP(cmplx16, CPLX128_LEG, 32c, 1) // __kmpc_atomic_cmplx16_swp
3388 #if (KMP_ARCH_X86)
3389 ATOMIC_CRITICAL_SWP(float16_a16, Quad_a16_t, 16r,
3390  1) // __kmpc_atomic_float16_a16_swp
3391 ATOMIC_CRITICAL_SWP(cmplx16_a16, kmp_cmplx128_a16_t, 32c,
3392  1) // __kmpc_atomic_cmplx16_a16_swp
3393 #endif // (KMP_ARCH_X86)
3394 #endif // KMP_HAVE_QUAD
3395 
3396 // End of OpenMP 4.0 Capture
3397 
3398 #endif // KMP_ARCH_X86 || KMP_ARCH_X86_64
3399 
3400 #undef OP_CRITICAL
3401 
3402 /* ------------------------------------------------------------------------ */
3403 /* Generic atomic routines */
3404 
3405 void __kmpc_atomic_1(ident_t *id_ref, int gtid, void *lhs, void *rhs,
3406  void (*f)(void *, void *, void *)) {
3407  KMP_DEBUG_ASSERT(__kmp_init_serial);
3408 
3409  if (
3410 #if KMP_ARCH_X86 && defined(KMP_GOMP_COMPAT)
3411  FALSE /* must use lock */
3412 #else
3413  TRUE
3414 #endif // KMP_ARCH_X86 && defined(KMP_GOMP_COMPAT)
3415  ) {
3416  kmp_int8 old_value, new_value;
3417 
3418  old_value = *(kmp_int8 *)lhs;
3419  (*f)(&new_value, &old_value, rhs);
3420 
3421  /* TODO: Should this be acquire or release? */
3422  while (!KMP_COMPARE_AND_STORE_ACQ8((kmp_int8 *)lhs, *(kmp_int8 *)&old_value,
3423  *(kmp_int8 *)&new_value)) {
3424  KMP_CPU_PAUSE();
3425 
3426  old_value = *(kmp_int8 *)lhs;
3427  (*f)(&new_value, &old_value, rhs);
3428  }
3429 
3430  return;
3431  } else {
3432  // All 1-byte data is of integer data type.
3433 
3434 #ifdef KMP_GOMP_COMPAT
3435  if (__kmp_atomic_mode == 2) {
3436  __kmp_acquire_atomic_lock(&__kmp_atomic_lock, gtid);
3437  } else
3438 #endif /* KMP_GOMP_COMPAT */
3439  __kmp_acquire_atomic_lock(&__kmp_atomic_lock_1i, gtid);
3440 
3441  (*f)(lhs, lhs, rhs);
3442 
3443 #ifdef KMP_GOMP_COMPAT
3444  if (__kmp_atomic_mode == 2) {
3445  __kmp_release_atomic_lock(&__kmp_atomic_lock, gtid);
3446  } else
3447 #endif /* KMP_GOMP_COMPAT */
3448  __kmp_release_atomic_lock(&__kmp_atomic_lock_1i, gtid);
3449  }
3450 }
3451 
3452 void __kmpc_atomic_2(ident_t *id_ref, int gtid, void *lhs, void *rhs,
3453  void (*f)(void *, void *, void *)) {
3454  if (
3455 #if KMP_ARCH_X86 && defined(KMP_GOMP_COMPAT)
3456  FALSE /* must use lock */
3457 #elif KMP_ARCH_X86 || KMP_ARCH_X86_64
3458  TRUE /* no alignment problems */
3459 #else
3460  !((kmp_uintptr_t)lhs & 0x1) /* make sure address is 2-byte aligned */
3461 #endif // KMP_ARCH_X86 && defined(KMP_GOMP_COMPAT)
3462  ) {
3463  kmp_int16 old_value, new_value;
3464 
3465  old_value = *(kmp_int16 *)lhs;
3466  (*f)(&new_value, &old_value, rhs);
3467 
3468  /* TODO: Should this be acquire or release? */
3469  while (!KMP_COMPARE_AND_STORE_ACQ16(
3470  (kmp_int16 *)lhs, *(kmp_int16 *)&old_value, *(kmp_int16 *)&new_value)) {
3471  KMP_CPU_PAUSE();
3472 
3473  old_value = *(kmp_int16 *)lhs;
3474  (*f)(&new_value, &old_value, rhs);
3475  }
3476 
3477  return;
3478  } else {
3479  // All 2-byte data is of integer data type.
3480 
3481 #ifdef KMP_GOMP_COMPAT
3482  if (__kmp_atomic_mode == 2) {
3483  __kmp_acquire_atomic_lock(&__kmp_atomic_lock, gtid);
3484  } else
3485 #endif /* KMP_GOMP_COMPAT */
3486  __kmp_acquire_atomic_lock(&__kmp_atomic_lock_2i, gtid);
3487 
3488  (*f)(lhs, lhs, rhs);
3489 
3490 #ifdef KMP_GOMP_COMPAT
3491  if (__kmp_atomic_mode == 2) {
3492  __kmp_release_atomic_lock(&__kmp_atomic_lock, gtid);
3493  } else
3494 #endif /* KMP_GOMP_COMPAT */
3495  __kmp_release_atomic_lock(&__kmp_atomic_lock_2i, gtid);
3496  }
3497 }
3498 
3499 void __kmpc_atomic_4(ident_t *id_ref, int gtid, void *lhs, void *rhs,
3500  void (*f)(void *, void *, void *)) {
3501  KMP_DEBUG_ASSERT(__kmp_init_serial);
3502 
3503  if (
3504 // FIXME: On IA-32 architecture, gcc uses cmpxchg only for 4-byte ints.
3505 // Gomp compatibility is broken if this routine is called for floats.
3506 #if KMP_ARCH_X86 || KMP_ARCH_X86_64
3507  TRUE /* no alignment problems */
3508 #else
3509  !((kmp_uintptr_t)lhs & 0x3) /* make sure address is 4-byte aligned */
3510 #endif // KMP_ARCH_X86 || KMP_ARCH_X86_64
3511  ) {
3512  kmp_int32 old_value, new_value;
3513 
3514  old_value = *(kmp_int32 *)lhs;
3515  (*f)(&new_value, &old_value, rhs);
3516 
3517  /* TODO: Should this be acquire or release? */
3518  while (!KMP_COMPARE_AND_STORE_ACQ32(
3519  (kmp_int32 *)lhs, *(kmp_int32 *)&old_value, *(kmp_int32 *)&new_value)) {
3520  KMP_CPU_PAUSE();
3521 
3522  old_value = *(kmp_int32 *)lhs;
3523  (*f)(&new_value, &old_value, rhs);
3524  }
3525 
3526  return;
3527  } else {
3528  // Use __kmp_atomic_lock_4i for all 4-byte data,
3529  // even if it isn't of integer data type.
3530 
3531 #ifdef KMP_GOMP_COMPAT
3532  if (__kmp_atomic_mode == 2) {
3533  __kmp_acquire_atomic_lock(&__kmp_atomic_lock, gtid);
3534  } else
3535 #endif /* KMP_GOMP_COMPAT */
3536  __kmp_acquire_atomic_lock(&__kmp_atomic_lock_4i, gtid);
3537 
3538  (*f)(lhs, lhs, rhs);
3539 
3540 #ifdef KMP_GOMP_COMPAT
3541  if (__kmp_atomic_mode == 2) {
3542  __kmp_release_atomic_lock(&__kmp_atomic_lock, gtid);
3543  } else
3544 #endif /* KMP_GOMP_COMPAT */
3545  __kmp_release_atomic_lock(&__kmp_atomic_lock_4i, gtid);
3546  }
3547 }
3548 
3549 void __kmpc_atomic_8(ident_t *id_ref, int gtid, void *lhs, void *rhs,
3550  void (*f)(void *, void *, void *)) {
3551  KMP_DEBUG_ASSERT(__kmp_init_serial);
3552  if (
3553 
3554 #if KMP_ARCH_X86 && defined(KMP_GOMP_COMPAT)
3555  FALSE /* must use lock */
3556 #elif KMP_ARCH_X86 || KMP_ARCH_X86_64
3557  TRUE /* no alignment problems */
3558 #else
3559  !((kmp_uintptr_t)lhs & 0x7) /* make sure address is 8-byte aligned */
3560 #endif // KMP_ARCH_X86 && defined(KMP_GOMP_COMPAT)
3561  ) {
3562  kmp_int64 old_value, new_value;
3563 
3564  old_value = *(kmp_int64 *)lhs;
3565  (*f)(&new_value, &old_value, rhs);
3566  /* TODO: Should this be acquire or release? */
3567  while (!KMP_COMPARE_AND_STORE_ACQ64(
3568  (kmp_int64 *)lhs, *(kmp_int64 *)&old_value, *(kmp_int64 *)&new_value)) {
3569  KMP_CPU_PAUSE();
3570 
3571  old_value = *(kmp_int64 *)lhs;
3572  (*f)(&new_value, &old_value, rhs);
3573  }
3574 
3575  return;
3576  } else {
3577  // Use __kmp_atomic_lock_8i for all 8-byte data,
3578  // even if it isn't of integer data type.
3579 
3580 #ifdef KMP_GOMP_COMPAT
3581  if (__kmp_atomic_mode == 2) {
3582  __kmp_acquire_atomic_lock(&__kmp_atomic_lock, gtid);
3583  } else
3584 #endif /* KMP_GOMP_COMPAT */
3585  __kmp_acquire_atomic_lock(&__kmp_atomic_lock_8i, gtid);
3586 
3587  (*f)(lhs, lhs, rhs);
3588 
3589 #ifdef KMP_GOMP_COMPAT
3590  if (__kmp_atomic_mode == 2) {
3591  __kmp_release_atomic_lock(&__kmp_atomic_lock, gtid);
3592  } else
3593 #endif /* KMP_GOMP_COMPAT */
3594  __kmp_release_atomic_lock(&__kmp_atomic_lock_8i, gtid);
3595  }
3596 }
3597 
3598 void __kmpc_atomic_10(ident_t *id_ref, int gtid, void *lhs, void *rhs,
3599  void (*f)(void *, void *, void *)) {
3600  KMP_DEBUG_ASSERT(__kmp_init_serial);
3601 
3602 #ifdef KMP_GOMP_COMPAT
3603  if (__kmp_atomic_mode == 2) {
3604  __kmp_acquire_atomic_lock(&__kmp_atomic_lock, gtid);
3605  } else
3606 #endif /* KMP_GOMP_COMPAT */
3607  __kmp_acquire_atomic_lock(&__kmp_atomic_lock_10r, gtid);
3608 
3609  (*f)(lhs, lhs, rhs);
3610 
3611 #ifdef KMP_GOMP_COMPAT
3612  if (__kmp_atomic_mode == 2) {
3613  __kmp_release_atomic_lock(&__kmp_atomic_lock, gtid);
3614  } else
3615 #endif /* KMP_GOMP_COMPAT */
3616  __kmp_release_atomic_lock(&__kmp_atomic_lock_10r, gtid);
3617 }
3618 
3619 void __kmpc_atomic_16(ident_t *id_ref, int gtid, void *lhs, void *rhs,
3620  void (*f)(void *, void *, void *)) {
3621  KMP_DEBUG_ASSERT(__kmp_init_serial);
3622 
3623 #ifdef KMP_GOMP_COMPAT
3624  if (__kmp_atomic_mode == 2) {
3625  __kmp_acquire_atomic_lock(&__kmp_atomic_lock, gtid);
3626  } else
3627 #endif /* KMP_GOMP_COMPAT */
3628  __kmp_acquire_atomic_lock(&__kmp_atomic_lock_16c, gtid);
3629 
3630  (*f)(lhs, lhs, rhs);
3631 
3632 #ifdef KMP_GOMP_COMPAT
3633  if (__kmp_atomic_mode == 2) {
3634  __kmp_release_atomic_lock(&__kmp_atomic_lock, gtid);
3635  } else
3636 #endif /* KMP_GOMP_COMPAT */
3637  __kmp_release_atomic_lock(&__kmp_atomic_lock_16c, gtid);
3638 }
3639 
3640 void __kmpc_atomic_20(ident_t *id_ref, int gtid, void *lhs, void *rhs,
3641  void (*f)(void *, void *, void *)) {
3642  KMP_DEBUG_ASSERT(__kmp_init_serial);
3643 
3644 #ifdef KMP_GOMP_COMPAT
3645  if (__kmp_atomic_mode == 2) {
3646  __kmp_acquire_atomic_lock(&__kmp_atomic_lock, gtid);
3647  } else
3648 #endif /* KMP_GOMP_COMPAT */
3649  __kmp_acquire_atomic_lock(&__kmp_atomic_lock_20c, gtid);
3650 
3651  (*f)(lhs, lhs, rhs);
3652 
3653 #ifdef KMP_GOMP_COMPAT
3654  if (__kmp_atomic_mode == 2) {
3655  __kmp_release_atomic_lock(&__kmp_atomic_lock, gtid);
3656  } else
3657 #endif /* KMP_GOMP_COMPAT */
3658  __kmp_release_atomic_lock(&__kmp_atomic_lock_20c, gtid);
3659 }
3660 
3661 void __kmpc_atomic_32(ident_t *id_ref, int gtid, void *lhs, void *rhs,
3662  void (*f)(void *, void *, void *)) {
3663  KMP_DEBUG_ASSERT(__kmp_init_serial);
3664 
3665 #ifdef KMP_GOMP_COMPAT
3666  if (__kmp_atomic_mode == 2) {
3667  __kmp_acquire_atomic_lock(&__kmp_atomic_lock, gtid);
3668  } else
3669 #endif /* KMP_GOMP_COMPAT */
3670  __kmp_acquire_atomic_lock(&__kmp_atomic_lock_32c, gtid);
3671 
3672  (*f)(lhs, lhs, rhs);
3673 
3674 #ifdef KMP_GOMP_COMPAT
3675  if (__kmp_atomic_mode == 2) {
3676  __kmp_release_atomic_lock(&__kmp_atomic_lock, gtid);
3677  } else
3678 #endif /* KMP_GOMP_COMPAT */
3679  __kmp_release_atomic_lock(&__kmp_atomic_lock_32c, gtid);
3680 }
3681 
3682 // AC: same two routines as GOMP_atomic_start/end, but will be called by our
3683 // compiler; duplicated in order to not use 3-party names in pure Intel code
3684 // TODO: consider adding GTID parameter after consultation with Ernesto/Xinmin.
3685 void __kmpc_atomic_start(void) {
3686  int gtid = __kmp_entry_gtid();
3687  KA_TRACE(20, ("__kmpc_atomic_start: T#%d\n", gtid));
3688  __kmp_acquire_atomic_lock(&__kmp_atomic_lock, gtid);
3689 }
3690 
3691 void __kmpc_atomic_end(void) {
3692  int gtid = __kmp_get_gtid();
3693  KA_TRACE(20, ("__kmpc_atomic_end: T#%d\n", gtid));
3694  __kmp_release_atomic_lock(&__kmp_atomic_lock, gtid);
3695 }
3696 
3697 #if KMP_ARCH_X86 || KMP_ARCH_X86_64
3698 
3699 // OpenMP 5.1 compare and swap
3700 
3715 bool __kmpc_atomic_bool_1_cas(ident_t *loc, int gtid, char *x, char e, char d) {
3716  return KMP_COMPARE_AND_STORE_ACQ8(x, e, d);
3717 }
3718 bool __kmpc_atomic_bool_2_cas(ident_t *loc, int gtid, short *x, short e,
3719  short d) {
3720  return KMP_COMPARE_AND_STORE_ACQ16(x, e, d);
3721 }
3722 bool __kmpc_atomic_bool_4_cas(ident_t *loc, int gtid, kmp_int32 *x, kmp_int32 e,
3723  kmp_int32 d) {
3724  return KMP_COMPARE_AND_STORE_ACQ32(x, e, d);
3725 }
3726 bool __kmpc_atomic_bool_8_cas(ident_t *loc, int gtid, kmp_int64 *x, kmp_int64 e,
3727  kmp_int64 d) {
3728  return KMP_COMPARE_AND_STORE_ACQ64(x, e, d);
3729 }
3730 
3745 char __kmpc_atomic_val_1_cas(ident_t *loc, int gtid, char *x, char e, char d) {
3746  return KMP_COMPARE_AND_STORE_RET8(x, e, d);
3747 }
3748 short __kmpc_atomic_val_2_cas(ident_t *loc, int gtid, short *x, short e,
3749  short d) {
3750  return KMP_COMPARE_AND_STORE_RET16(x, e, d);
3751 }
3752 kmp_int32 __kmpc_atomic_val_4_cas(ident_t *loc, int gtid, kmp_int32 *x,
3753  kmp_int32 e, kmp_int32 d) {
3754  return KMP_COMPARE_AND_STORE_RET32(x, e, d);
3755 }
3756 kmp_int64 __kmpc_atomic_val_8_cas(ident_t *loc, int gtid, kmp_int64 *x,
3757  kmp_int64 e, kmp_int64 d) {
3758  return KMP_COMPARE_AND_STORE_RET64(x, e, d);
3759 }
3760 
3777 bool __kmpc_atomic_bool_1_cas_cpt(ident_t *loc, int gtid, char *x, char e,
3778  char d, char *pv) {
3779  char old = KMP_COMPARE_AND_STORE_RET8(x, e, d);
3780  if (old == e)
3781  return true;
3782  KMP_ASSERT(pv != NULL);
3783  *pv = old;
3784  return false;
3785 }
3786 bool __kmpc_atomic_bool_2_cas_cpt(ident_t *loc, int gtid, short *x, short e,
3787  short d, short *pv) {
3788  short old = KMP_COMPARE_AND_STORE_RET16(x, e, d);
3789  if (old == e)
3790  return true;
3791  KMP_ASSERT(pv != NULL);
3792  *pv = old;
3793  return false;
3794 }
3795 bool __kmpc_atomic_bool_4_cas_cpt(ident_t *loc, int gtid, kmp_int32 *x,
3796  kmp_int32 e, kmp_int32 d, kmp_int32 *pv) {
3797  kmp_int32 old = KMP_COMPARE_AND_STORE_RET32(x, e, d);
3798  if (old == e)
3799  return true;
3800  KMP_ASSERT(pv != NULL);
3801  *pv = old;
3802  return false;
3803 }
3804 bool __kmpc_atomic_bool_8_cas_cpt(ident_t *loc, int gtid, kmp_int64 *x,
3805  kmp_int64 e, kmp_int64 d, kmp_int64 *pv) {
3806  kmp_int64 old = KMP_COMPARE_AND_STORE_RET64(x, e, d);
3807  if (old == e)
3808  return true;
3809  KMP_ASSERT(pv != NULL);
3810  *pv = old;
3811  return false;
3812 }
3813 
3830 char __kmpc_atomic_val_1_cas_cpt(ident_t *loc, int gtid, char *x, char e,
3831  char d, char *pv) {
3832  char old = KMP_COMPARE_AND_STORE_RET8(x, e, d);
3833  KMP_ASSERT(pv != NULL);
3834  *pv = old == e ? d : old;
3835  return old;
3836 }
3837 short __kmpc_atomic_val_2_cas_cpt(ident_t *loc, int gtid, short *x, short e,
3838  short d, short *pv) {
3839  short old = KMP_COMPARE_AND_STORE_RET16(x, e, d);
3840  KMP_ASSERT(pv != NULL);
3841  *pv = old == e ? d : old;
3842  return old;
3843 }
3844 kmp_int32 __kmpc_atomic_val_4_cas_cpt(ident_t *loc, int gtid, kmp_int32 *x,
3845  kmp_int32 e, kmp_int32 d, kmp_int32 *pv) {
3846  kmp_int32 old = KMP_COMPARE_AND_STORE_RET32(x, e, d);
3847  KMP_ASSERT(pv != NULL);
3848  *pv = old == e ? d : old;
3849  return old;
3850 }
3851 kmp_int64 __kmpc_atomic_val_8_cas_cpt(ident_t *loc, int gtid, kmp_int64 *x,
3852  kmp_int64 e, kmp_int64 d, kmp_int64 *pv) {
3853  kmp_int64 old = KMP_COMPARE_AND_STORE_RET64(x, e, d);
3854  KMP_ASSERT(pv != NULL);
3855  *pv = old == e ? d : old;
3856  return old;
3857 }
3858 
3859 // End OpenMP 5.1 compare + capture
3860 #endif // KMP_ARCH_X86 || KMP_ARCH_X86_64
3861 
3866 // end of file
ident
Definition: kmp.h:234