LLVM OpenMP* Runtime Library
kmp_atomic.cpp
1 /*
2  * kmp_atomic.cpp -- ATOMIC implementation routines
3  */
4 
5 //===----------------------------------------------------------------------===//
6 //
7 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
8 // See https://llvm.org/LICENSE.txt for license information.
9 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
10 //
11 //===----------------------------------------------------------------------===//
12 
13 #include "kmp_atomic.h"
14 #include "kmp.h" // TRUE, asm routines prototypes
15 
16 typedef unsigned char uchar;
17 typedef unsigned short ushort;
18 
561 /*
562  * Global vars
563  */
564 
565 #ifndef KMP_GOMP_COMPAT
566 int __kmp_atomic_mode = 1; // Intel perf
567 #else
568 int __kmp_atomic_mode = 2; // GOMP compatibility
569 #endif /* KMP_GOMP_COMPAT */
570 
571 KMP_ALIGN(128)
572 
573 // Control access to all user coded atomics in Gnu compat mode
574 kmp_atomic_lock_t __kmp_atomic_lock;
575 // Control access to all user coded atomics for 1-byte fixed data types
576 kmp_atomic_lock_t __kmp_atomic_lock_1i;
577 // Control access to all user coded atomics for 2-byte fixed data types
578 kmp_atomic_lock_t __kmp_atomic_lock_2i;
579 // Control access to all user coded atomics for 4-byte fixed data types
580 kmp_atomic_lock_t __kmp_atomic_lock_4i;
581 // Control access to all user coded atomics for kmp_real32 data type
582 kmp_atomic_lock_t __kmp_atomic_lock_4r;
583 // Control access to all user coded atomics for 8-byte fixed data types
584 kmp_atomic_lock_t __kmp_atomic_lock_8i;
585 // Control access to all user coded atomics for kmp_real64 data type
586 kmp_atomic_lock_t __kmp_atomic_lock_8r;
587 // Control access to all user coded atomics for complex byte data type
588 kmp_atomic_lock_t __kmp_atomic_lock_8c;
589 // Control access to all user coded atomics for long double data type
590 kmp_atomic_lock_t __kmp_atomic_lock_10r;
591 // Control access to all user coded atomics for _Quad data type
592 kmp_atomic_lock_t __kmp_atomic_lock_16r;
593 // Control access to all user coded atomics for double complex data type
594 kmp_atomic_lock_t __kmp_atomic_lock_16c;
595 // Control access to all user coded atomics for long double complex type
596 kmp_atomic_lock_t __kmp_atomic_lock_20c;
597 // Control access to all user coded atomics for _Quad complex data type
598 kmp_atomic_lock_t __kmp_atomic_lock_32c;
599 
600 /* 2007-03-02:
601  Without "volatile" specifier in OP_CMPXCHG and MIN_MAX_CMPXCHG we have a bug
602  on *_32 and *_32e. This is just a temporary workaround for the problem. It
603  seems the right solution is writing OP_CMPXCHG and MIN_MAX_CMPXCHG routines
604  in assembler language. */
605 #define KMP_ATOMIC_VOLATILE volatile
606 
607 #if (KMP_ARCH_X86) && KMP_HAVE_QUAD
608 
609 static inline Quad_a4_t operator+(Quad_a4_t &lhs, Quad_a4_t &rhs) {
610  return lhs.q + rhs.q;
611 }
612 static inline Quad_a4_t operator-(Quad_a4_t &lhs, Quad_a4_t &rhs) {
613  return lhs.q - rhs.q;
614 }
615 static inline Quad_a4_t operator*(Quad_a4_t &lhs, Quad_a4_t &rhs) {
616  return lhs.q * rhs.q;
617 }
618 static inline Quad_a4_t operator/(Quad_a4_t &lhs, Quad_a4_t &rhs) {
619  return lhs.q / rhs.q;
620 }
621 static inline bool operator<(Quad_a4_t &lhs, Quad_a4_t &rhs) {
622  return lhs.q < rhs.q;
623 }
624 static inline bool operator>(Quad_a4_t &lhs, Quad_a4_t &rhs) {
625  return lhs.q > rhs.q;
626 }
627 
628 static inline Quad_a16_t operator+(Quad_a16_t &lhs, Quad_a16_t &rhs) {
629  return lhs.q + rhs.q;
630 }
631 static inline Quad_a16_t operator-(Quad_a16_t &lhs, Quad_a16_t &rhs) {
632  return lhs.q - rhs.q;
633 }
634 static inline Quad_a16_t operator*(Quad_a16_t &lhs, Quad_a16_t &rhs) {
635  return lhs.q * rhs.q;
636 }
637 static inline Quad_a16_t operator/(Quad_a16_t &lhs, Quad_a16_t &rhs) {
638  return lhs.q / rhs.q;
639 }
640 static inline bool operator<(Quad_a16_t &lhs, Quad_a16_t &rhs) {
641  return lhs.q < rhs.q;
642 }
643 static inline bool operator>(Quad_a16_t &lhs, Quad_a16_t &rhs) {
644  return lhs.q > rhs.q;
645 }
646 
647 static inline kmp_cmplx128_a4_t operator+(kmp_cmplx128_a4_t &lhs,
648  kmp_cmplx128_a4_t &rhs) {
649  return lhs.q + rhs.q;
650 }
651 static inline kmp_cmplx128_a4_t operator-(kmp_cmplx128_a4_t &lhs,
652  kmp_cmplx128_a4_t &rhs) {
653  return lhs.q - rhs.q;
654 }
655 static inline kmp_cmplx128_a4_t operator*(kmp_cmplx128_a4_t &lhs,
656  kmp_cmplx128_a4_t &rhs) {
657  return lhs.q * rhs.q;
658 }
659 static inline kmp_cmplx128_a4_t operator/(kmp_cmplx128_a4_t &lhs,
660  kmp_cmplx128_a4_t &rhs) {
661  return lhs.q / rhs.q;
662 }
663 
664 static inline kmp_cmplx128_a16_t operator+(kmp_cmplx128_a16_t &lhs,
665  kmp_cmplx128_a16_t &rhs) {
666  return lhs.q + rhs.q;
667 }
668 static inline kmp_cmplx128_a16_t operator-(kmp_cmplx128_a16_t &lhs,
669  kmp_cmplx128_a16_t &rhs) {
670  return lhs.q - rhs.q;
671 }
672 static inline kmp_cmplx128_a16_t operator*(kmp_cmplx128_a16_t &lhs,
673  kmp_cmplx128_a16_t &rhs) {
674  return lhs.q * rhs.q;
675 }
676 static inline kmp_cmplx128_a16_t operator/(kmp_cmplx128_a16_t &lhs,
677  kmp_cmplx128_a16_t &rhs) {
678  return lhs.q / rhs.q;
679 }
680 
681 #endif // (KMP_ARCH_X86) && KMP_HAVE_QUAD
682 
683 // ATOMIC implementation routines -----------------------------------------
684 // One routine for each operation and operand type.
685 // All routines declarations looks like
686 // void __kmpc_atomic_RTYPE_OP( ident_t*, int, TYPE *lhs, TYPE rhs );
687 
688 #define KMP_CHECK_GTID \
689  if (gtid == KMP_GTID_UNKNOWN) { \
690  gtid = __kmp_entry_gtid(); \
691  } // check and get gtid when needed
692 
693 // Beginning of a definition (provides name, parameters, gebug trace)
694 // TYPE_ID - operands type and size (fixed*, fixed*u for signed, unsigned
695 // fixed)
696 // OP_ID - operation identifier (add, sub, mul, ...)
697 // TYPE - operands' type
698 #define ATOMIC_BEGIN(TYPE_ID, OP_ID, TYPE, RET_TYPE) \
699  RET_TYPE __kmpc_atomic_##TYPE_ID##_##OP_ID(ident_t *id_ref, int gtid, \
700  TYPE *lhs, TYPE rhs) { \
701  KMP_DEBUG_ASSERT(__kmp_init_serial); \
702  KA_TRACE(100, ("__kmpc_atomic_" #TYPE_ID "_" #OP_ID ": T#%d\n", gtid));
703 
704 // ------------------------------------------------------------------------
705 // Lock variables used for critical sections for various size operands
706 #define ATOMIC_LOCK0 __kmp_atomic_lock // all types, for Gnu compat
707 #define ATOMIC_LOCK1i __kmp_atomic_lock_1i // char
708 #define ATOMIC_LOCK2i __kmp_atomic_lock_2i // short
709 #define ATOMIC_LOCK4i __kmp_atomic_lock_4i // long int
710 #define ATOMIC_LOCK4r __kmp_atomic_lock_4r // float
711 #define ATOMIC_LOCK8i __kmp_atomic_lock_8i // long long int
712 #define ATOMIC_LOCK8r __kmp_atomic_lock_8r // double
713 #define ATOMIC_LOCK8c __kmp_atomic_lock_8c // float complex
714 #define ATOMIC_LOCK10r __kmp_atomic_lock_10r // long double
715 #define ATOMIC_LOCK16r __kmp_atomic_lock_16r // _Quad
716 #define ATOMIC_LOCK16c __kmp_atomic_lock_16c // double complex
717 #define ATOMIC_LOCK20c __kmp_atomic_lock_20c // long double complex
718 #define ATOMIC_LOCK32c __kmp_atomic_lock_32c // _Quad complex
719 
720 // ------------------------------------------------------------------------
721 // Operation on *lhs, rhs bound by critical section
722 // OP - operator (it's supposed to contain an assignment)
723 // LCK_ID - lock identifier
724 // Note: don't check gtid as it should always be valid
725 // 1, 2-byte - expect valid parameter, other - check before this macro
726 #define OP_CRITICAL(OP, LCK_ID) \
727  __kmp_acquire_atomic_lock(&ATOMIC_LOCK##LCK_ID, gtid); \
728  \
729  (*lhs) OP(rhs); \
730  \
731  __kmp_release_atomic_lock(&ATOMIC_LOCK##LCK_ID, gtid);
732 
733 #define OP_UPDATE_CRITICAL(TYPE, OP, LCK_ID) \
734  __kmp_acquire_atomic_lock(&ATOMIC_LOCK##LCK_ID, gtid); \
735  (*lhs) = (TYPE)((*lhs)OP rhs); \
736  __kmp_release_atomic_lock(&ATOMIC_LOCK##LCK_ID, gtid);
737 
738 // ------------------------------------------------------------------------
739 // For GNU compatibility, we may need to use a critical section,
740 // even though it is not required by the ISA.
741 //
742 // On IA-32 architecture, all atomic operations except for fixed 4 byte add,
743 // sub, and bitwise logical ops, and 1 & 2 byte logical ops use a common
744 // critical section. On Intel(R) 64, all atomic operations are done with fetch
745 // and add or compare and exchange. Therefore, the FLAG parameter to this
746 // macro is either KMP_ARCH_X86 or 0 (or 1, for Intel-specific extension which
747 // require a critical section, where we predict that they will be implemented
748 // in the Gnu codegen by calling GOMP_atomic_start() / GOMP_atomic_end()).
749 //
750 // When the OP_GOMP_CRITICAL macro is used in a *CRITICAL* macro construct,
751 // the FLAG parameter should always be 1. If we know that we will be using
752 // a critical section, then we want to make certain that we use the generic
753 // lock __kmp_atomic_lock to protect the atomic update, and not of of the
754 // locks that are specialized based upon the size or type of the data.
755 //
756 // If FLAG is 0, then we are relying on dead code elimination by the build
757 // compiler to get rid of the useless block of code, and save a needless
758 // branch at runtime.
759 
760 #ifdef KMP_GOMP_COMPAT
761 #define OP_GOMP_CRITICAL(OP, FLAG) \
762  if ((FLAG) && (__kmp_atomic_mode == 2)) { \
763  KMP_CHECK_GTID; \
764  OP_CRITICAL(OP, 0); \
765  return; \
766  }
767 
768 #define OP_UPDATE_GOMP_CRITICAL(TYPE, OP, FLAG) \
769  if ((FLAG) && (__kmp_atomic_mode == 2)) { \
770  KMP_CHECK_GTID; \
771  OP_UPDATE_CRITICAL(TYPE, OP, 0); \
772  return; \
773  }
774 #else
775 #define OP_GOMP_CRITICAL(OP, FLAG)
776 #define OP_UPDATE_GOMP_CRITICAL(TYPE, OP, FLAG)
777 #endif /* KMP_GOMP_COMPAT */
778 
779 #if KMP_MIC
780 #define KMP_DO_PAUSE _mm_delay_32(1)
781 #else
782 #define KMP_DO_PAUSE
783 #endif /* KMP_MIC */
784 
785 // ------------------------------------------------------------------------
786 // Operation on *lhs, rhs using "compare_and_store" routine
787 // TYPE - operands' type
788 // BITS - size in bits, used to distinguish low level calls
789 // OP - operator
790 #define OP_CMPXCHG(TYPE, BITS, OP) \
791  { \
792  TYPE old_value, new_value; \
793  old_value = *(TYPE volatile *)lhs; \
794  new_value = (TYPE)(old_value OP rhs); \
795  while (!KMP_COMPARE_AND_STORE_ACQ##BITS( \
796  (kmp_int##BITS *)lhs, *VOLATILE_CAST(kmp_int##BITS *) & old_value, \
797  *VOLATILE_CAST(kmp_int##BITS *) & new_value)) { \
798  KMP_DO_PAUSE; \
799  \
800  old_value = *(TYPE volatile *)lhs; \
801  new_value = (TYPE)(old_value OP rhs); \
802  } \
803  }
804 
805 #if USE_CMPXCHG_FIX
806 // 2007-06-25:
807 // workaround for C78287 (complex(kind=4) data type). lin_32, lin_32e, win_32
808 // and win_32e are affected (I verified the asm). Compiler ignores the volatile
809 // qualifier of the temp_val in the OP_CMPXCHG macro. This is a problem of the
810 // compiler. Related tracker is C76005, targeted to 11.0. I verified the asm of
811 // the workaround.
812 #define OP_CMPXCHG_WORKAROUND(TYPE, BITS, OP) \
813  { \
814  struct _sss { \
815  TYPE cmp; \
816  kmp_int##BITS *vvv; \
817  }; \
818  struct _sss old_value, new_value; \
819  old_value.vvv = (kmp_int##BITS *)&old_value.cmp; \
820  new_value.vvv = (kmp_int##BITS *)&new_value.cmp; \
821  *old_value.vvv = *(volatile kmp_int##BITS *)lhs; \
822  new_value.cmp = (TYPE)(old_value.cmp OP rhs); \
823  while (!KMP_COMPARE_AND_STORE_ACQ##BITS( \
824  (kmp_int##BITS *)lhs, *VOLATILE_CAST(kmp_int##BITS *) old_value.vvv, \
825  *VOLATILE_CAST(kmp_int##BITS *) new_value.vvv)) { \
826  KMP_DO_PAUSE; \
827  \
828  *old_value.vvv = *(volatile kmp_int##BITS *)lhs; \
829  new_value.cmp = (TYPE)(old_value.cmp OP rhs); \
830  } \
831  }
832 // end of the first part of the workaround for C78287
833 #endif // USE_CMPXCHG_FIX
834 
835 #if KMP_OS_WINDOWS && (KMP_ARCH_AARCH64 || KMP_ARCH_ARM)
836 // Undo explicit type casts to get MSVC ARM64 to build. Uses
837 // OP_CMPXCHG_WORKAROUND definition for OP_CMPXCHG
838 #undef OP_CMPXCHG
839 #define OP_CMPXCHG(TYPE, BITS, OP) \
840  { \
841  struct _sss { \
842  TYPE cmp; \
843  kmp_int##BITS *vvv; \
844  }; \
845  struct _sss old_value, new_value; \
846  old_value.vvv = (kmp_int##BITS *)&old_value.cmp; \
847  new_value.vvv = (kmp_int##BITS *)&new_value.cmp; \
848  *old_value.vvv = *(volatile kmp_int##BITS *)lhs; \
849  new_value.cmp = old_value.cmp OP rhs; \
850  while (!KMP_COMPARE_AND_STORE_ACQ##BITS( \
851  (kmp_int##BITS *)lhs, *VOLATILE_CAST(kmp_int##BITS *) old_value.vvv, \
852  *VOLATILE_CAST(kmp_int##BITS *) new_value.vvv)) { \
853  KMP_DO_PAUSE; \
854  \
855  *old_value.vvv = *(volatile kmp_int##BITS *)lhs; \
856  new_value.cmp = old_value.cmp OP rhs; \
857  } \
858  }
859 
860 #undef OP_UPDATE_CRITICAL
861 #define OP_UPDATE_CRITICAL(TYPE, OP, LCK_ID) \
862  __kmp_acquire_atomic_lock(&ATOMIC_LOCK##LCK_ID, gtid); \
863  (*lhs) = (*lhs)OP rhs; \
864  __kmp_release_atomic_lock(&ATOMIC_LOCK##LCK_ID, gtid);
865 
866 #endif // KMP_OS_WINDOWS && (KMP_ARCH_AARCH64 || KMP_ARCH_ARM)
867 
868 #if KMP_ARCH_X86 || KMP_ARCH_X86_64
869 
870 // ------------------------------------------------------------------------
871 // X86 or X86_64: no alignment problems ====================================
872 #define ATOMIC_FIXED_ADD(TYPE_ID, OP_ID, TYPE, BITS, OP, LCK_ID, MASK, \
873  GOMP_FLAG) \
874  ATOMIC_BEGIN(TYPE_ID, OP_ID, TYPE, void) \
875  OP_UPDATE_GOMP_CRITICAL(TYPE, OP, GOMP_FLAG) \
876  /* OP used as a sign for subtraction: (lhs-rhs) --> (lhs+-rhs) */ \
877  KMP_TEST_THEN_ADD##BITS(lhs, OP rhs); \
878  }
879 // -------------------------------------------------------------------------
880 #define ATOMIC_CMPXCHG(TYPE_ID, OP_ID, TYPE, BITS, OP, LCK_ID, MASK, \
881  GOMP_FLAG) \
882  ATOMIC_BEGIN(TYPE_ID, OP_ID, TYPE, void) \
883  OP_UPDATE_GOMP_CRITICAL(TYPE, OP, GOMP_FLAG) \
884  OP_CMPXCHG(TYPE, BITS, OP) \
885  }
886 #if USE_CMPXCHG_FIX
887 // -------------------------------------------------------------------------
888 // workaround for C78287 (complex(kind=4) data type)
889 #define ATOMIC_CMPXCHG_WORKAROUND(TYPE_ID, OP_ID, TYPE, BITS, OP, LCK_ID, \
890  MASK, GOMP_FLAG) \
891  ATOMIC_BEGIN(TYPE_ID, OP_ID, TYPE, void) \
892  OP_UPDATE_GOMP_CRITICAL(TYPE, OP, GOMP_FLAG) \
893  OP_CMPXCHG_WORKAROUND(TYPE, BITS, OP) \
894  }
895 // end of the second part of the workaround for C78287
896 #endif // USE_CMPXCHG_FIX
897 
898 #else
899 // -------------------------------------------------------------------------
900 // Code for other architectures that don't handle unaligned accesses.
901 #define ATOMIC_FIXED_ADD(TYPE_ID, OP_ID, TYPE, BITS, OP, LCK_ID, MASK, \
902  GOMP_FLAG) \
903  ATOMIC_BEGIN(TYPE_ID, OP_ID, TYPE, void) \
904  OP_UPDATE_GOMP_CRITICAL(TYPE, OP, GOMP_FLAG) \
905  if (!((kmp_uintptr_t)lhs & 0x##MASK)) { \
906  /* OP used as a sign for subtraction: (lhs-rhs) --> (lhs+-rhs) */ \
907  KMP_TEST_THEN_ADD##BITS(lhs, OP rhs); \
908  } else { \
909  KMP_CHECK_GTID; \
910  OP_UPDATE_CRITICAL(TYPE, OP, \
911  LCK_ID) /* unaligned address - use critical */ \
912  } \
913  }
914 // -------------------------------------------------------------------------
915 #define ATOMIC_CMPXCHG(TYPE_ID, OP_ID, TYPE, BITS, OP, LCK_ID, MASK, \
916  GOMP_FLAG) \
917  ATOMIC_BEGIN(TYPE_ID, OP_ID, TYPE, void) \
918  OP_UPDATE_GOMP_CRITICAL(TYPE, OP, GOMP_FLAG) \
919  if (!((kmp_uintptr_t)lhs & 0x##MASK)) { \
920  OP_CMPXCHG(TYPE, BITS, OP) /* aligned address */ \
921  } else { \
922  KMP_CHECK_GTID; \
923  OP_UPDATE_CRITICAL(TYPE, OP, \
924  LCK_ID) /* unaligned address - use critical */ \
925  } \
926  }
927 #if USE_CMPXCHG_FIX
928 // -------------------------------------------------------------------------
929 // workaround for C78287 (complex(kind=4) data type)
930 #define ATOMIC_CMPXCHG_WORKAROUND(TYPE_ID, OP_ID, TYPE, BITS, OP, LCK_ID, \
931  MASK, GOMP_FLAG) \
932  ATOMIC_BEGIN(TYPE_ID, OP_ID, TYPE, void) \
933  OP_UPDATE_GOMP_CRITICAL(TYPE, OP, GOMP_FLAG) \
934  if (!((kmp_uintptr_t)lhs & 0x##MASK)) { \
935  OP_CMPXCHG(TYPE, BITS, OP) /* aligned address */ \
936  } else { \
937  KMP_CHECK_GTID; \
938  OP_UPDATE_CRITICAL(TYPE, OP, \
939  LCK_ID) /* unaligned address - use critical */ \
940  } \
941  }
942 // end of the second part of the workaround for C78287
943 #endif // USE_CMPXCHG_FIX
944 #endif /* KMP_ARCH_X86 || KMP_ARCH_X86_64 */
945 
946 // Routines for ATOMIC 4-byte operands addition and subtraction
947 ATOMIC_FIXED_ADD(fixed4, add, kmp_int32, 32, +, 4i, 3,
948  0) // __kmpc_atomic_fixed4_add
949 ATOMIC_FIXED_ADD(fixed4, sub, kmp_int32, 32, -, 4i, 3,
950  0) // __kmpc_atomic_fixed4_sub
951 
952 ATOMIC_CMPXCHG(float4, add, kmp_real32, 32, +, 4r, 3,
953  KMP_ARCH_X86) // __kmpc_atomic_float4_add
954 ATOMIC_CMPXCHG(float4, sub, kmp_real32, 32, -, 4r, 3,
955  KMP_ARCH_X86) // __kmpc_atomic_float4_sub
956 
957 // Routines for ATOMIC 8-byte operands addition and subtraction
958 ATOMIC_FIXED_ADD(fixed8, add, kmp_int64, 64, +, 8i, 7,
959  KMP_ARCH_X86) // __kmpc_atomic_fixed8_add
960 ATOMIC_FIXED_ADD(fixed8, sub, kmp_int64, 64, -, 8i, 7,
961  KMP_ARCH_X86) // __kmpc_atomic_fixed8_sub
962 
963 ATOMIC_CMPXCHG(float8, add, kmp_real64, 64, +, 8r, 7,
964  KMP_ARCH_X86) // __kmpc_atomic_float8_add
965 ATOMIC_CMPXCHG(float8, sub, kmp_real64, 64, -, 8r, 7,
966  KMP_ARCH_X86) // __kmpc_atomic_float8_sub
967 
968 // ------------------------------------------------------------------------
969 // Entries definition for integer operands
970 // TYPE_ID - operands type and size (fixed4, float4)
971 // OP_ID - operation identifier (add, sub, mul, ...)
972 // TYPE - operand type
973 // BITS - size in bits, used to distinguish low level calls
974 // OP - operator (used in critical section)
975 // LCK_ID - lock identifier, used to possibly distinguish lock variable
976 // MASK - used for alignment check
977 
978 // TYPE_ID,OP_ID, TYPE, BITS,OP,LCK_ID,MASK,GOMP_FLAG
979 // ------------------------------------------------------------------------
980 // Routines for ATOMIC integer operands, other operators
981 // ------------------------------------------------------------------------
982 // TYPE_ID,OP_ID, TYPE, OP, LCK_ID, GOMP_FLAG
983 ATOMIC_CMPXCHG(fixed1, add, kmp_int8, 8, +, 1i, 0,
984  KMP_ARCH_X86) // __kmpc_atomic_fixed1_add
985 ATOMIC_CMPXCHG(fixed1, andb, kmp_int8, 8, &, 1i, 0,
986  0) // __kmpc_atomic_fixed1_andb
987 ATOMIC_CMPXCHG(fixed1, div, kmp_int8, 8, /, 1i, 0,
988  KMP_ARCH_X86) // __kmpc_atomic_fixed1_div
989 ATOMIC_CMPXCHG(fixed1u, div, kmp_uint8, 8, /, 1i, 0,
990  KMP_ARCH_X86) // __kmpc_atomic_fixed1u_div
991 ATOMIC_CMPXCHG(fixed1, mul, kmp_int8, 8, *, 1i, 0,
992  KMP_ARCH_X86) // __kmpc_atomic_fixed1_mul
993 ATOMIC_CMPXCHG(fixed1, orb, kmp_int8, 8, |, 1i, 0,
994  0) // __kmpc_atomic_fixed1_orb
995 ATOMIC_CMPXCHG(fixed1, shl, kmp_int8, 8, <<, 1i, 0,
996  KMP_ARCH_X86) // __kmpc_atomic_fixed1_shl
997 ATOMIC_CMPXCHG(fixed1, shr, kmp_int8, 8, >>, 1i, 0,
998  KMP_ARCH_X86) // __kmpc_atomic_fixed1_shr
999 ATOMIC_CMPXCHG(fixed1u, shr, kmp_uint8, 8, >>, 1i, 0,
1000  KMP_ARCH_X86) // __kmpc_atomic_fixed1u_shr
1001 ATOMIC_CMPXCHG(fixed1, sub, kmp_int8, 8, -, 1i, 0,
1002  KMP_ARCH_X86) // __kmpc_atomic_fixed1_sub
1003 ATOMIC_CMPXCHG(fixed1, xor, kmp_int8, 8, ^, 1i, 0,
1004  0) // __kmpc_atomic_fixed1_xor
1005 ATOMIC_CMPXCHG(fixed2, add, kmp_int16, 16, +, 2i, 1,
1006  KMP_ARCH_X86) // __kmpc_atomic_fixed2_add
1007 ATOMIC_CMPXCHG(fixed2, andb, kmp_int16, 16, &, 2i, 1,
1008  0) // __kmpc_atomic_fixed2_andb
1009 ATOMIC_CMPXCHG(fixed2, div, kmp_int16, 16, /, 2i, 1,
1010  KMP_ARCH_X86) // __kmpc_atomic_fixed2_div
1011 ATOMIC_CMPXCHG(fixed2u, div, kmp_uint16, 16, /, 2i, 1,
1012  KMP_ARCH_X86) // __kmpc_atomic_fixed2u_div
1013 ATOMIC_CMPXCHG(fixed2, mul, kmp_int16, 16, *, 2i, 1,
1014  KMP_ARCH_X86) // __kmpc_atomic_fixed2_mul
1015 ATOMIC_CMPXCHG(fixed2, orb, kmp_int16, 16, |, 2i, 1,
1016  0) // __kmpc_atomic_fixed2_orb
1017 ATOMIC_CMPXCHG(fixed2, shl, kmp_int16, 16, <<, 2i, 1,
1018  KMP_ARCH_X86) // __kmpc_atomic_fixed2_shl
1019 ATOMIC_CMPXCHG(fixed2, shr, kmp_int16, 16, >>, 2i, 1,
1020  KMP_ARCH_X86) // __kmpc_atomic_fixed2_shr
1021 ATOMIC_CMPXCHG(fixed2u, shr, kmp_uint16, 16, >>, 2i, 1,
1022  KMP_ARCH_X86) // __kmpc_atomic_fixed2u_shr
1023 ATOMIC_CMPXCHG(fixed2, sub, kmp_int16, 16, -, 2i, 1,
1024  KMP_ARCH_X86) // __kmpc_atomic_fixed2_sub
1025 ATOMIC_CMPXCHG(fixed2, xor, kmp_int16, 16, ^, 2i, 1,
1026  0) // __kmpc_atomic_fixed2_xor
1027 ATOMIC_CMPXCHG(fixed4, andb, kmp_int32, 32, &, 4i, 3,
1028  0) // __kmpc_atomic_fixed4_andb
1029 ATOMIC_CMPXCHG(fixed4, div, kmp_int32, 32, /, 4i, 3,
1030  KMP_ARCH_X86) // __kmpc_atomic_fixed4_div
1031 ATOMIC_CMPXCHG(fixed4u, div, kmp_uint32, 32, /, 4i, 3,
1032  KMP_ARCH_X86) // __kmpc_atomic_fixed4u_div
1033 ATOMIC_CMPXCHG(fixed4, mul, kmp_int32, 32, *, 4i, 3,
1034  KMP_ARCH_X86) // __kmpc_atomic_fixed4_mul
1035 ATOMIC_CMPXCHG(fixed4, orb, kmp_int32, 32, |, 4i, 3,
1036  0) // __kmpc_atomic_fixed4_orb
1037 ATOMIC_CMPXCHG(fixed4, shl, kmp_int32, 32, <<, 4i, 3,
1038  KMP_ARCH_X86) // __kmpc_atomic_fixed4_shl
1039 ATOMIC_CMPXCHG(fixed4, shr, kmp_int32, 32, >>, 4i, 3,
1040  KMP_ARCH_X86) // __kmpc_atomic_fixed4_shr
1041 ATOMIC_CMPXCHG(fixed4u, shr, kmp_uint32, 32, >>, 4i, 3,
1042  KMP_ARCH_X86) // __kmpc_atomic_fixed4u_shr
1043 ATOMIC_CMPXCHG(fixed4, xor, kmp_int32, 32, ^, 4i, 3,
1044  0) // __kmpc_atomic_fixed4_xor
1045 ATOMIC_CMPXCHG(fixed8, andb, kmp_int64, 64, &, 8i, 7,
1046  KMP_ARCH_X86) // __kmpc_atomic_fixed8_andb
1047 ATOMIC_CMPXCHG(fixed8, div, kmp_int64, 64, /, 8i, 7,
1048  KMP_ARCH_X86) // __kmpc_atomic_fixed8_div
1049 ATOMIC_CMPXCHG(fixed8u, div, kmp_uint64, 64, /, 8i, 7,
1050  KMP_ARCH_X86) // __kmpc_atomic_fixed8u_div
1051 ATOMIC_CMPXCHG(fixed8, mul, kmp_int64, 64, *, 8i, 7,
1052  KMP_ARCH_X86) // __kmpc_atomic_fixed8_mul
1053 ATOMIC_CMPXCHG(fixed8, orb, kmp_int64, 64, |, 8i, 7,
1054  KMP_ARCH_X86) // __kmpc_atomic_fixed8_orb
1055 ATOMIC_CMPXCHG(fixed8, shl, kmp_int64, 64, <<, 8i, 7,
1056  KMP_ARCH_X86) // __kmpc_atomic_fixed8_shl
1057 ATOMIC_CMPXCHG(fixed8, shr, kmp_int64, 64, >>, 8i, 7,
1058  KMP_ARCH_X86) // __kmpc_atomic_fixed8_shr
1059 ATOMIC_CMPXCHG(fixed8u, shr, kmp_uint64, 64, >>, 8i, 7,
1060  KMP_ARCH_X86) // __kmpc_atomic_fixed8u_shr
1061 ATOMIC_CMPXCHG(fixed8, xor, kmp_int64, 64, ^, 8i, 7,
1062  KMP_ARCH_X86) // __kmpc_atomic_fixed8_xor
1063 ATOMIC_CMPXCHG(float4, div, kmp_real32, 32, /, 4r, 3,
1064  KMP_ARCH_X86) // __kmpc_atomic_float4_div
1065 ATOMIC_CMPXCHG(float4, mul, kmp_real32, 32, *, 4r, 3,
1066  KMP_ARCH_X86) // __kmpc_atomic_float4_mul
1067 ATOMIC_CMPXCHG(float8, div, kmp_real64, 64, /, 8r, 7,
1068  KMP_ARCH_X86) // __kmpc_atomic_float8_div
1069 ATOMIC_CMPXCHG(float8, mul, kmp_real64, 64, *, 8r, 7,
1070  KMP_ARCH_X86) // __kmpc_atomic_float8_mul
1071 // TYPE_ID,OP_ID, TYPE, OP, LCK_ID, GOMP_FLAG
1072 
1073 /* ------------------------------------------------------------------------ */
1074 /* Routines for C/C++ Reduction operators && and || */
1075 
1076 // ------------------------------------------------------------------------
1077 // Need separate macros for &&, || because there is no combined assignment
1078 // TODO: eliminate ATOMIC_CRIT_{L,EQV} macros as not used
1079 #define ATOMIC_CRIT_L(TYPE_ID, OP_ID, TYPE, OP, LCK_ID, GOMP_FLAG) \
1080  ATOMIC_BEGIN(TYPE_ID, OP_ID, TYPE, void) \
1081  OP_GOMP_CRITICAL(= *lhs OP, GOMP_FLAG) \
1082  OP_CRITICAL(= *lhs OP, LCK_ID) \
1083  }
1084 
1085 #if KMP_ARCH_X86 || KMP_ARCH_X86_64
1086 
1087 // ------------------------------------------------------------------------
1088 // X86 or X86_64: no alignment problems ===================================
1089 #define ATOMIC_CMPX_L(TYPE_ID, OP_ID, TYPE, BITS, OP, LCK_ID, MASK, GOMP_FLAG) \
1090  ATOMIC_BEGIN(TYPE_ID, OP_ID, TYPE, void) \
1091  OP_GOMP_CRITICAL(= *lhs OP, GOMP_FLAG) \
1092  OP_CMPXCHG(TYPE, BITS, OP) \
1093  }
1094 
1095 #else
1096 // ------------------------------------------------------------------------
1097 // Code for other architectures that don't handle unaligned accesses.
1098 #define ATOMIC_CMPX_L(TYPE_ID, OP_ID, TYPE, BITS, OP, LCK_ID, MASK, GOMP_FLAG) \
1099  ATOMIC_BEGIN(TYPE_ID, OP_ID, TYPE, void) \
1100  OP_GOMP_CRITICAL(= *lhs OP, GOMP_FLAG) \
1101  if (!((kmp_uintptr_t)lhs & 0x##MASK)) { \
1102  OP_CMPXCHG(TYPE, BITS, OP) /* aligned address */ \
1103  } else { \
1104  KMP_CHECK_GTID; \
1105  OP_CRITICAL(= *lhs OP, LCK_ID) /* unaligned - use critical */ \
1106  } \
1107  }
1108 #endif /* KMP_ARCH_X86 || KMP_ARCH_X86_64 */
1109 
1110 ATOMIC_CMPX_L(fixed1, andl, char, 8, &&, 1i, 0,
1111  KMP_ARCH_X86) // __kmpc_atomic_fixed1_andl
1112 ATOMIC_CMPX_L(fixed1, orl, char, 8, ||, 1i, 0,
1113  KMP_ARCH_X86) // __kmpc_atomic_fixed1_orl
1114 ATOMIC_CMPX_L(fixed2, andl, short, 16, &&, 2i, 1,
1115  KMP_ARCH_X86) // __kmpc_atomic_fixed2_andl
1116 ATOMIC_CMPX_L(fixed2, orl, short, 16, ||, 2i, 1,
1117  KMP_ARCH_X86) // __kmpc_atomic_fixed2_orl
1118 ATOMIC_CMPX_L(fixed4, andl, kmp_int32, 32, &&, 4i, 3,
1119  0) // __kmpc_atomic_fixed4_andl
1120 ATOMIC_CMPX_L(fixed4, orl, kmp_int32, 32, ||, 4i, 3,
1121  0) // __kmpc_atomic_fixed4_orl
1122 ATOMIC_CMPX_L(fixed8, andl, kmp_int64, 64, &&, 8i, 7,
1123  KMP_ARCH_X86) // __kmpc_atomic_fixed8_andl
1124 ATOMIC_CMPX_L(fixed8, orl, kmp_int64, 64, ||, 8i, 7,
1125  KMP_ARCH_X86) // __kmpc_atomic_fixed8_orl
1126 
1127 /* ------------------------------------------------------------------------- */
1128 /* Routines for Fortran operators that matched no one in C: */
1129 /* MAX, MIN, .EQV., .NEQV. */
1130 /* Operators .AND., .OR. are covered by __kmpc_atomic_*_{andl,orl} */
1131 /* Intrinsics IAND, IOR, IEOR are covered by __kmpc_atomic_*_{andb,orb,xor} */
1132 
1133 // -------------------------------------------------------------------------
1134 // MIN and MAX need separate macros
1135 // OP - operator to check if we need any actions?
1136 #define MIN_MAX_CRITSECT(OP, LCK_ID) \
1137  __kmp_acquire_atomic_lock(&ATOMIC_LOCK##LCK_ID, gtid); \
1138  \
1139  if (*lhs OP rhs) { /* still need actions? */ \
1140  *lhs = rhs; \
1141  } \
1142  __kmp_release_atomic_lock(&ATOMIC_LOCK##LCK_ID, gtid);
1143 
1144 // -------------------------------------------------------------------------
1145 #ifdef KMP_GOMP_COMPAT
1146 #define GOMP_MIN_MAX_CRITSECT(OP, FLAG) \
1147  if ((FLAG) && (__kmp_atomic_mode == 2)) { \
1148  KMP_CHECK_GTID; \
1149  MIN_MAX_CRITSECT(OP, 0); \
1150  return; \
1151  }
1152 #else
1153 #define GOMP_MIN_MAX_CRITSECT(OP, FLAG)
1154 #endif /* KMP_GOMP_COMPAT */
1155 
1156 // -------------------------------------------------------------------------
1157 #define MIN_MAX_CMPXCHG(TYPE, BITS, OP) \
1158  { \
1159  TYPE KMP_ATOMIC_VOLATILE temp_val; \
1160  TYPE old_value; \
1161  temp_val = *lhs; \
1162  old_value = temp_val; \
1163  while (old_value OP rhs && /* still need actions? */ \
1164  !KMP_COMPARE_AND_STORE_ACQ##BITS( \
1165  (kmp_int##BITS *)lhs, \
1166  *VOLATILE_CAST(kmp_int##BITS *) & old_value, \
1167  *VOLATILE_CAST(kmp_int##BITS *) & rhs)) { \
1168  temp_val = *lhs; \
1169  old_value = temp_val; \
1170  } \
1171  }
1172 
1173 // -------------------------------------------------------------------------
1174 // 1-byte, 2-byte operands - use critical section
1175 #define MIN_MAX_CRITICAL(TYPE_ID, OP_ID, TYPE, OP, LCK_ID, GOMP_FLAG) \
1176  ATOMIC_BEGIN(TYPE_ID, OP_ID, TYPE, void) \
1177  if (*lhs OP rhs) { /* need actions? */ \
1178  GOMP_MIN_MAX_CRITSECT(OP, GOMP_FLAG) \
1179  MIN_MAX_CRITSECT(OP, LCK_ID) \
1180  } \
1181  }
1182 
1183 #if KMP_ARCH_X86 || KMP_ARCH_X86_64
1184 
1185 // -------------------------------------------------------------------------
1186 // X86 or X86_64: no alignment problems ====================================
1187 #define MIN_MAX_COMPXCHG(TYPE_ID, OP_ID, TYPE, BITS, OP, LCK_ID, MASK, \
1188  GOMP_FLAG) \
1189  ATOMIC_BEGIN(TYPE_ID, OP_ID, TYPE, void) \
1190  if (*lhs OP rhs) { \
1191  GOMP_MIN_MAX_CRITSECT(OP, GOMP_FLAG) \
1192  MIN_MAX_CMPXCHG(TYPE, BITS, OP) \
1193  } \
1194  }
1195 
1196 #else
1197 // -------------------------------------------------------------------------
1198 // Code for other architectures that don't handle unaligned accesses.
1199 #define MIN_MAX_COMPXCHG(TYPE_ID, OP_ID, TYPE, BITS, OP, LCK_ID, MASK, \
1200  GOMP_FLAG) \
1201  ATOMIC_BEGIN(TYPE_ID, OP_ID, TYPE, void) \
1202  if (*lhs OP rhs) { \
1203  GOMP_MIN_MAX_CRITSECT(OP, GOMP_FLAG) \
1204  if (!((kmp_uintptr_t)lhs & 0x##MASK)) { \
1205  MIN_MAX_CMPXCHG(TYPE, BITS, OP) /* aligned address */ \
1206  } else { \
1207  KMP_CHECK_GTID; \
1208  MIN_MAX_CRITSECT(OP, LCK_ID) /* unaligned address */ \
1209  } \
1210  } \
1211  }
1212 #endif /* KMP_ARCH_X86 || KMP_ARCH_X86_64 */
1213 
1214 MIN_MAX_COMPXCHG(fixed1, max, char, 8, <, 1i, 0,
1215  KMP_ARCH_X86) // __kmpc_atomic_fixed1_max
1216 MIN_MAX_COMPXCHG(fixed1, min, char, 8, >, 1i, 0,
1217  KMP_ARCH_X86) // __kmpc_atomic_fixed1_min
1218 MIN_MAX_COMPXCHG(fixed2, max, short, 16, <, 2i, 1,
1219  KMP_ARCH_X86) // __kmpc_atomic_fixed2_max
1220 MIN_MAX_COMPXCHG(fixed2, min, short, 16, >, 2i, 1,
1221  KMP_ARCH_X86) // __kmpc_atomic_fixed2_min
1222 MIN_MAX_COMPXCHG(fixed4, max, kmp_int32, 32, <, 4i, 3,
1223  0) // __kmpc_atomic_fixed4_max
1224 MIN_MAX_COMPXCHG(fixed4, min, kmp_int32, 32, >, 4i, 3,
1225  0) // __kmpc_atomic_fixed4_min
1226 MIN_MAX_COMPXCHG(fixed8, max, kmp_int64, 64, <, 8i, 7,
1227  KMP_ARCH_X86) // __kmpc_atomic_fixed8_max
1228 MIN_MAX_COMPXCHG(fixed8, min, kmp_int64, 64, >, 8i, 7,
1229  KMP_ARCH_X86) // __kmpc_atomic_fixed8_min
1230 MIN_MAX_COMPXCHG(float4, max, kmp_real32, 32, <, 4r, 3,
1231  KMP_ARCH_X86) // __kmpc_atomic_float4_max
1232 MIN_MAX_COMPXCHG(float4, min, kmp_real32, 32, >, 4r, 3,
1233  KMP_ARCH_X86) // __kmpc_atomic_float4_min
1234 MIN_MAX_COMPXCHG(float8, max, kmp_real64, 64, <, 8r, 7,
1235  KMP_ARCH_X86) // __kmpc_atomic_float8_max
1236 MIN_MAX_COMPXCHG(float8, min, kmp_real64, 64, >, 8r, 7,
1237  KMP_ARCH_X86) // __kmpc_atomic_float8_min
1238 #if KMP_ARCH_X86 || KMP_ARCH_X86_64
1239 MIN_MAX_CRITICAL(float10, max, long double, <, 10r,
1240  1) // __kmpc_atomic_float10_max
1241 MIN_MAX_CRITICAL(float10, min, long double, >, 10r,
1242  1) // __kmpc_atomic_float10_min
1243 #endif // KMP_ARCH_X86 || KMP_ARCH_X86_64
1244 #if KMP_HAVE_QUAD
1245 MIN_MAX_CRITICAL(float16, max, QUAD_LEGACY, <, 16r,
1246  1) // __kmpc_atomic_float16_max
1247 MIN_MAX_CRITICAL(float16, min, QUAD_LEGACY, >, 16r,
1248  1) // __kmpc_atomic_float16_min
1249 #if (KMP_ARCH_X86)
1250 MIN_MAX_CRITICAL(float16, max_a16, Quad_a16_t, <, 16r,
1251  1) // __kmpc_atomic_float16_max_a16
1252 MIN_MAX_CRITICAL(float16, min_a16, Quad_a16_t, >, 16r,
1253  1) // __kmpc_atomic_float16_min_a16
1254 #endif // (KMP_ARCH_X86)
1255 #endif // KMP_HAVE_QUAD
1256 // ------------------------------------------------------------------------
1257 // Need separate macros for .EQV. because of the need of complement (~)
1258 // OP ignored for critical sections, ^=~ used instead
1259 #define ATOMIC_CRIT_EQV(TYPE_ID, OP_ID, TYPE, OP, LCK_ID, GOMP_FLAG) \
1260  ATOMIC_BEGIN(TYPE_ID, OP_ID, TYPE, void) \
1261  OP_GOMP_CRITICAL(^= (TYPE) ~, GOMP_FLAG) /* send assignment */ \
1262  OP_CRITICAL(^= (TYPE) ~, LCK_ID) /* send assignment and complement */ \
1263  }
1264 
1265 // ------------------------------------------------------------------------
1266 #if KMP_ARCH_X86 || KMP_ARCH_X86_64
1267 // ------------------------------------------------------------------------
1268 // X86 or X86_64: no alignment problems ===================================
1269 #define ATOMIC_CMPX_EQV(TYPE_ID, OP_ID, TYPE, BITS, OP, LCK_ID, MASK, \
1270  GOMP_FLAG) \
1271  ATOMIC_BEGIN(TYPE_ID, OP_ID, TYPE, void) \
1272  OP_GOMP_CRITICAL(^= (TYPE) ~, GOMP_FLAG) /* send assignment */ \
1273  OP_CMPXCHG(TYPE, BITS, OP) \
1274  }
1275 // ------------------------------------------------------------------------
1276 #else
1277 // ------------------------------------------------------------------------
1278 // Code for other architectures that don't handle unaligned accesses.
1279 #define ATOMIC_CMPX_EQV(TYPE_ID, OP_ID, TYPE, BITS, OP, LCK_ID, MASK, \
1280  GOMP_FLAG) \
1281  ATOMIC_BEGIN(TYPE_ID, OP_ID, TYPE, void) \
1282  OP_GOMP_CRITICAL(^= (TYPE) ~, GOMP_FLAG) \
1283  if (!((kmp_uintptr_t)lhs & 0x##MASK)) { \
1284  OP_CMPXCHG(TYPE, BITS, OP) /* aligned address */ \
1285  } else { \
1286  KMP_CHECK_GTID; \
1287  OP_CRITICAL(^= (TYPE) ~, LCK_ID) /* unaligned address - use critical */ \
1288  } \
1289  }
1290 #endif /* KMP_ARCH_X86 || KMP_ARCH_X86_64 */
1291 
1292 ATOMIC_CMPXCHG(fixed1, neqv, kmp_int8, 8, ^, 1i, 0,
1293  KMP_ARCH_X86) // __kmpc_atomic_fixed1_neqv
1294 ATOMIC_CMPXCHG(fixed2, neqv, kmp_int16, 16, ^, 2i, 1,
1295  KMP_ARCH_X86) // __kmpc_atomic_fixed2_neqv
1296 ATOMIC_CMPXCHG(fixed4, neqv, kmp_int32, 32, ^, 4i, 3,
1297  KMP_ARCH_X86) // __kmpc_atomic_fixed4_neqv
1298 ATOMIC_CMPXCHG(fixed8, neqv, kmp_int64, 64, ^, 8i, 7,
1299  KMP_ARCH_X86) // __kmpc_atomic_fixed8_neqv
1300 ATOMIC_CMPX_EQV(fixed1, eqv, kmp_int8, 8, ^~, 1i, 0,
1301  KMP_ARCH_X86) // __kmpc_atomic_fixed1_eqv
1302 ATOMIC_CMPX_EQV(fixed2, eqv, kmp_int16, 16, ^~, 2i, 1,
1303  KMP_ARCH_X86) // __kmpc_atomic_fixed2_eqv
1304 ATOMIC_CMPX_EQV(fixed4, eqv, kmp_int32, 32, ^~, 4i, 3,
1305  KMP_ARCH_X86) // __kmpc_atomic_fixed4_eqv
1306 ATOMIC_CMPX_EQV(fixed8, eqv, kmp_int64, 64, ^~, 8i, 7,
1307  KMP_ARCH_X86) // __kmpc_atomic_fixed8_eqv
1308 
1309 // ------------------------------------------------------------------------
1310 // Routines for Extended types: long double, _Quad, complex flavours (use
1311 // critical section)
1312 // TYPE_ID, OP_ID, TYPE - detailed above
1313 // OP - operator
1314 // LCK_ID - lock identifier, used to possibly distinguish lock variable
1315 #define ATOMIC_CRITICAL(TYPE_ID, OP_ID, TYPE, OP, LCK_ID, GOMP_FLAG) \
1316  ATOMIC_BEGIN(TYPE_ID, OP_ID, TYPE, void) \
1317  OP_UPDATE_GOMP_CRITICAL(TYPE, OP, GOMP_FLAG) /* send assignment */ \
1318  OP_UPDATE_CRITICAL(TYPE, OP, LCK_ID) /* send assignment */ \
1319  }
1320 
1321 /* ------------------------------------------------------------------------- */
1322 #if KMP_ARCH_X86 || KMP_ARCH_X86_64
1323 // routines for long double type
1324 ATOMIC_CRITICAL(float10, add, long double, +, 10r,
1325  1) // __kmpc_atomic_float10_add
1326 ATOMIC_CRITICAL(float10, sub, long double, -, 10r,
1327  1) // __kmpc_atomic_float10_sub
1328 ATOMIC_CRITICAL(float10, mul, long double, *, 10r,
1329  1) // __kmpc_atomic_float10_mul
1330 ATOMIC_CRITICAL(float10, div, long double, /, 10r,
1331  1) // __kmpc_atomic_float10_div
1332 #endif // KMP_ARCH_X86 || KMP_ARCH_X86_64
1333 #if KMP_HAVE_QUAD
1334 // routines for _Quad type
1335 ATOMIC_CRITICAL(float16, add, QUAD_LEGACY, +, 16r,
1336  1) // __kmpc_atomic_float16_add
1337 ATOMIC_CRITICAL(float16, sub, QUAD_LEGACY, -, 16r,
1338  1) // __kmpc_atomic_float16_sub
1339 ATOMIC_CRITICAL(float16, mul, QUAD_LEGACY, *, 16r,
1340  1) // __kmpc_atomic_float16_mul
1341 ATOMIC_CRITICAL(float16, div, QUAD_LEGACY, /, 16r,
1342  1) // __kmpc_atomic_float16_div
1343 #if (KMP_ARCH_X86)
1344 ATOMIC_CRITICAL(float16, add_a16, Quad_a16_t, +, 16r,
1345  1) // __kmpc_atomic_float16_add_a16
1346 ATOMIC_CRITICAL(float16, sub_a16, Quad_a16_t, -, 16r,
1347  1) // __kmpc_atomic_float16_sub_a16
1348 ATOMIC_CRITICAL(float16, mul_a16, Quad_a16_t, *, 16r,
1349  1) // __kmpc_atomic_float16_mul_a16
1350 ATOMIC_CRITICAL(float16, div_a16, Quad_a16_t, /, 16r,
1351  1) // __kmpc_atomic_float16_div_a16
1352 #endif // (KMP_ARCH_X86)
1353 #endif // KMP_HAVE_QUAD
1354 // routines for complex types
1355 
1356 #if USE_CMPXCHG_FIX
1357 // workaround for C78287 (complex(kind=4) data type)
1358 ATOMIC_CMPXCHG_WORKAROUND(cmplx4, add, kmp_cmplx32, 64, +, 8c, 7,
1359  1) // __kmpc_atomic_cmplx4_add
1360 ATOMIC_CMPXCHG_WORKAROUND(cmplx4, sub, kmp_cmplx32, 64, -, 8c, 7,
1361  1) // __kmpc_atomic_cmplx4_sub
1362 ATOMIC_CMPXCHG_WORKAROUND(cmplx4, mul, kmp_cmplx32, 64, *, 8c, 7,
1363  1) // __kmpc_atomic_cmplx4_mul
1364 ATOMIC_CMPXCHG_WORKAROUND(cmplx4, div, kmp_cmplx32, 64, /, 8c, 7,
1365  1) // __kmpc_atomic_cmplx4_div
1366 // end of the workaround for C78287
1367 #else
1368 ATOMIC_CRITICAL(cmplx4, add, kmp_cmplx32, +, 8c, 1) // __kmpc_atomic_cmplx4_add
1369 ATOMIC_CRITICAL(cmplx4, sub, kmp_cmplx32, -, 8c, 1) // __kmpc_atomic_cmplx4_sub
1370 ATOMIC_CRITICAL(cmplx4, mul, kmp_cmplx32, *, 8c, 1) // __kmpc_atomic_cmplx4_mul
1371 ATOMIC_CRITICAL(cmplx4, div, kmp_cmplx32, /, 8c, 1) // __kmpc_atomic_cmplx4_div
1372 #endif // USE_CMPXCHG_FIX
1373 
1374 ATOMIC_CRITICAL(cmplx8, add, kmp_cmplx64, +, 16c, 1) // __kmpc_atomic_cmplx8_add
1375 ATOMIC_CRITICAL(cmplx8, sub, kmp_cmplx64, -, 16c, 1) // __kmpc_atomic_cmplx8_sub
1376 ATOMIC_CRITICAL(cmplx8, mul, kmp_cmplx64, *, 16c, 1) // __kmpc_atomic_cmplx8_mul
1377 ATOMIC_CRITICAL(cmplx8, div, kmp_cmplx64, /, 16c, 1) // __kmpc_atomic_cmplx8_div
1378 #if KMP_ARCH_X86 || KMP_ARCH_X86_64
1379 ATOMIC_CRITICAL(cmplx10, add, kmp_cmplx80, +, 20c,
1380  1) // __kmpc_atomic_cmplx10_add
1381 ATOMIC_CRITICAL(cmplx10, sub, kmp_cmplx80, -, 20c,
1382  1) // __kmpc_atomic_cmplx10_sub
1383 ATOMIC_CRITICAL(cmplx10, mul, kmp_cmplx80, *, 20c,
1384  1) // __kmpc_atomic_cmplx10_mul
1385 ATOMIC_CRITICAL(cmplx10, div, kmp_cmplx80, /, 20c,
1386  1) // __kmpc_atomic_cmplx10_div
1387 #endif // KMP_ARCH_X86 || KMP_ARCH_X86_64
1388 #if KMP_HAVE_QUAD
1389 ATOMIC_CRITICAL(cmplx16, add, CPLX128_LEG, +, 32c,
1390  1) // __kmpc_atomic_cmplx16_add
1391 ATOMIC_CRITICAL(cmplx16, sub, CPLX128_LEG, -, 32c,
1392  1) // __kmpc_atomic_cmplx16_sub
1393 ATOMIC_CRITICAL(cmplx16, mul, CPLX128_LEG, *, 32c,
1394  1) // __kmpc_atomic_cmplx16_mul
1395 ATOMIC_CRITICAL(cmplx16, div, CPLX128_LEG, /, 32c,
1396  1) // __kmpc_atomic_cmplx16_div
1397 #if (KMP_ARCH_X86)
1398 ATOMIC_CRITICAL(cmplx16, add_a16, kmp_cmplx128_a16_t, +, 32c,
1399  1) // __kmpc_atomic_cmplx16_add_a16
1400 ATOMIC_CRITICAL(cmplx16, sub_a16, kmp_cmplx128_a16_t, -, 32c,
1401  1) // __kmpc_atomic_cmplx16_sub_a16
1402 ATOMIC_CRITICAL(cmplx16, mul_a16, kmp_cmplx128_a16_t, *, 32c,
1403  1) // __kmpc_atomic_cmplx16_mul_a16
1404 ATOMIC_CRITICAL(cmplx16, div_a16, kmp_cmplx128_a16_t, /, 32c,
1405  1) // __kmpc_atomic_cmplx16_div_a16
1406 #endif // (KMP_ARCH_X86)
1407 #endif // KMP_HAVE_QUAD
1408 
1409 // OpenMP 4.0: x = expr binop x for non-commutative operations.
1410 // Supported only on IA-32 architecture and Intel(R) 64
1411 #if KMP_ARCH_X86 || KMP_ARCH_X86_64
1412 
1413 // ------------------------------------------------------------------------
1414 // Operation on *lhs, rhs bound by critical section
1415 // OP - operator (it's supposed to contain an assignment)
1416 // LCK_ID - lock identifier
1417 // Note: don't check gtid as it should always be valid
1418 // 1, 2-byte - expect valid parameter, other - check before this macro
1419 #define OP_CRITICAL_REV(TYPE, OP, LCK_ID) \
1420  __kmp_acquire_atomic_lock(&ATOMIC_LOCK##LCK_ID, gtid); \
1421  \
1422  (*lhs) = (TYPE)((rhs)OP(*lhs)); \
1423  \
1424  __kmp_release_atomic_lock(&ATOMIC_LOCK##LCK_ID, gtid);
1425 
1426 #ifdef KMP_GOMP_COMPAT
1427 #define OP_GOMP_CRITICAL_REV(TYPE, OP, FLAG) \
1428  if ((FLAG) && (__kmp_atomic_mode == 2)) { \
1429  KMP_CHECK_GTID; \
1430  OP_CRITICAL_REV(TYPE, OP, 0); \
1431  return; \
1432  }
1433 
1434 #else
1435 #define OP_GOMP_CRITICAL_REV(TYPE, OP, FLAG)
1436 #endif /* KMP_GOMP_COMPAT */
1437 
1438 // Beginning of a definition (provides name, parameters, gebug trace)
1439 // TYPE_ID - operands type and size (fixed*, fixed*u for signed, unsigned
1440 // fixed)
1441 // OP_ID - operation identifier (add, sub, mul, ...)
1442 // TYPE - operands' type
1443 #define ATOMIC_BEGIN_REV(TYPE_ID, OP_ID, TYPE, RET_TYPE) \
1444  RET_TYPE __kmpc_atomic_##TYPE_ID##_##OP_ID##_rev(ident_t *id_ref, int gtid, \
1445  TYPE *lhs, TYPE rhs) { \
1446  KMP_DEBUG_ASSERT(__kmp_init_serial); \
1447  KA_TRACE(100, ("__kmpc_atomic_" #TYPE_ID "_" #OP_ID "_rev: T#%d\n", gtid));
1448 
1449 // ------------------------------------------------------------------------
1450 // Operation on *lhs, rhs using "compare_and_store" routine
1451 // TYPE - operands' type
1452 // BITS - size in bits, used to distinguish low level calls
1453 // OP - operator
1454 // Note: temp_val introduced in order to force the compiler to read
1455 // *lhs only once (w/o it the compiler reads *lhs twice)
1456 #define OP_CMPXCHG_REV(TYPE, BITS, OP) \
1457  { \
1458  TYPE KMP_ATOMIC_VOLATILE temp_val; \
1459  TYPE old_value, new_value; \
1460  temp_val = *lhs; \
1461  old_value = temp_val; \
1462  new_value = (TYPE)(rhs OP old_value); \
1463  while (!KMP_COMPARE_AND_STORE_ACQ##BITS( \
1464  (kmp_int##BITS *)lhs, *VOLATILE_CAST(kmp_int##BITS *) & old_value, \
1465  *VOLATILE_CAST(kmp_int##BITS *) & new_value)) { \
1466  KMP_DO_PAUSE; \
1467  \
1468  temp_val = *lhs; \
1469  old_value = temp_val; \
1470  new_value = (TYPE)(rhs OP old_value); \
1471  } \
1472  }
1473 
1474 // -------------------------------------------------------------------------
1475 #define ATOMIC_CMPXCHG_REV(TYPE_ID, OP_ID, TYPE, BITS, OP, LCK_ID, GOMP_FLAG) \
1476  ATOMIC_BEGIN_REV(TYPE_ID, OP_ID, TYPE, void) \
1477  OP_GOMP_CRITICAL_REV(TYPE, OP, GOMP_FLAG) \
1478  OP_CMPXCHG_REV(TYPE, BITS, OP) \
1479  }
1480 
1481 // ------------------------------------------------------------------------
1482 // Entries definition for integer operands
1483 // TYPE_ID - operands type and size (fixed4, float4)
1484 // OP_ID - operation identifier (add, sub, mul, ...)
1485 // TYPE - operand type
1486 // BITS - size in bits, used to distinguish low level calls
1487 // OP - operator (used in critical section)
1488 // LCK_ID - lock identifier, used to possibly distinguish lock variable
1489 
1490 // TYPE_ID,OP_ID, TYPE, BITS,OP,LCK_ID,GOMP_FLAG
1491 // ------------------------------------------------------------------------
1492 // Routines for ATOMIC integer operands, other operators
1493 // ------------------------------------------------------------------------
1494 // TYPE_ID,OP_ID, TYPE, BITS, OP, LCK_ID, GOMP_FLAG
1495 ATOMIC_CMPXCHG_REV(fixed1, div, kmp_int8, 8, /, 1i,
1496  KMP_ARCH_X86) // __kmpc_atomic_fixed1_div_rev
1497 ATOMIC_CMPXCHG_REV(fixed1u, div, kmp_uint8, 8, /, 1i,
1498  KMP_ARCH_X86) // __kmpc_atomic_fixed1u_div_rev
1499 ATOMIC_CMPXCHG_REV(fixed1, shl, kmp_int8, 8, <<, 1i,
1500  KMP_ARCH_X86) // __kmpc_atomic_fixed1_shl_rev
1501 ATOMIC_CMPXCHG_REV(fixed1, shr, kmp_int8, 8, >>, 1i,
1502  KMP_ARCH_X86) // __kmpc_atomic_fixed1_shr_rev
1503 ATOMIC_CMPXCHG_REV(fixed1u, shr, kmp_uint8, 8, >>, 1i,
1504  KMP_ARCH_X86) // __kmpc_atomic_fixed1u_shr_rev
1505 ATOMIC_CMPXCHG_REV(fixed1, sub, kmp_int8, 8, -, 1i,
1506  KMP_ARCH_X86) // __kmpc_atomic_fixed1_sub_rev
1507 
1508 ATOMIC_CMPXCHG_REV(fixed2, div, kmp_int16, 16, /, 2i,
1509  KMP_ARCH_X86) // __kmpc_atomic_fixed2_div_rev
1510 ATOMIC_CMPXCHG_REV(fixed2u, div, kmp_uint16, 16, /, 2i,
1511  KMP_ARCH_X86) // __kmpc_atomic_fixed2u_div_rev
1512 ATOMIC_CMPXCHG_REV(fixed2, shl, kmp_int16, 16, <<, 2i,
1513  KMP_ARCH_X86) // __kmpc_atomic_fixed2_shl_rev
1514 ATOMIC_CMPXCHG_REV(fixed2, shr, kmp_int16, 16, >>, 2i,
1515  KMP_ARCH_X86) // __kmpc_atomic_fixed2_shr_rev
1516 ATOMIC_CMPXCHG_REV(fixed2u, shr, kmp_uint16, 16, >>, 2i,
1517  KMP_ARCH_X86) // __kmpc_atomic_fixed2u_shr_rev
1518 ATOMIC_CMPXCHG_REV(fixed2, sub, kmp_int16, 16, -, 2i,
1519  KMP_ARCH_X86) // __kmpc_atomic_fixed2_sub_rev
1520 
1521 ATOMIC_CMPXCHG_REV(fixed4, div, kmp_int32, 32, /, 4i,
1522  KMP_ARCH_X86) // __kmpc_atomic_fixed4_div_rev
1523 ATOMIC_CMPXCHG_REV(fixed4u, div, kmp_uint32, 32, /, 4i,
1524  KMP_ARCH_X86) // __kmpc_atomic_fixed4u_div_rev
1525 ATOMIC_CMPXCHG_REV(fixed4, shl, kmp_int32, 32, <<, 4i,
1526  KMP_ARCH_X86) // __kmpc_atomic_fixed4_shl_rev
1527 ATOMIC_CMPXCHG_REV(fixed4, shr, kmp_int32, 32, >>, 4i,
1528  KMP_ARCH_X86) // __kmpc_atomic_fixed4_shr_rev
1529 ATOMIC_CMPXCHG_REV(fixed4u, shr, kmp_uint32, 32, >>, 4i,
1530  KMP_ARCH_X86) // __kmpc_atomic_fixed4u_shr_rev
1531 ATOMIC_CMPXCHG_REV(fixed4, sub, kmp_int32, 32, -, 4i,
1532  KMP_ARCH_X86) // __kmpc_atomic_fixed4_sub_rev
1533 
1534 ATOMIC_CMPXCHG_REV(fixed8, div, kmp_int64, 64, /, 8i,
1535  KMP_ARCH_X86) // __kmpc_atomic_fixed8_div_rev
1536 ATOMIC_CMPXCHG_REV(fixed8u, div, kmp_uint64, 64, /, 8i,
1537  KMP_ARCH_X86) // __kmpc_atomic_fixed8u_div_rev
1538 ATOMIC_CMPXCHG_REV(fixed8, shl, kmp_int64, 64, <<, 8i,
1539  KMP_ARCH_X86) // __kmpc_atomic_fixed8_shl_rev
1540 ATOMIC_CMPXCHG_REV(fixed8, shr, kmp_int64, 64, >>, 8i,
1541  KMP_ARCH_X86) // __kmpc_atomic_fixed8_shr_rev
1542 ATOMIC_CMPXCHG_REV(fixed8u, shr, kmp_uint64, 64, >>, 8i,
1543  KMP_ARCH_X86) // __kmpc_atomic_fixed8u_shr_rev
1544 ATOMIC_CMPXCHG_REV(fixed8, sub, kmp_int64, 64, -, 8i,
1545  KMP_ARCH_X86) // __kmpc_atomic_fixed8_sub_rev
1546 
1547 ATOMIC_CMPXCHG_REV(float4, div, kmp_real32, 32, /, 4r,
1548  KMP_ARCH_X86) // __kmpc_atomic_float4_div_rev
1549 ATOMIC_CMPXCHG_REV(float4, sub, kmp_real32, 32, -, 4r,
1550  KMP_ARCH_X86) // __kmpc_atomic_float4_sub_rev
1551 
1552 ATOMIC_CMPXCHG_REV(float8, div, kmp_real64, 64, /, 8r,
1553  KMP_ARCH_X86) // __kmpc_atomic_float8_div_rev
1554 ATOMIC_CMPXCHG_REV(float8, sub, kmp_real64, 64, -, 8r,
1555  KMP_ARCH_X86) // __kmpc_atomic_float8_sub_rev
1556 // TYPE_ID,OP_ID, TYPE, BITS,OP,LCK_ID, GOMP_FLAG
1557 
1558 // ------------------------------------------------------------------------
1559 // Routines for Extended types: long double, _Quad, complex flavours (use
1560 // critical section)
1561 // TYPE_ID, OP_ID, TYPE - detailed above
1562 // OP - operator
1563 // LCK_ID - lock identifier, used to possibly distinguish lock variable
1564 #define ATOMIC_CRITICAL_REV(TYPE_ID, OP_ID, TYPE, OP, LCK_ID, GOMP_FLAG) \
1565  ATOMIC_BEGIN_REV(TYPE_ID, OP_ID, TYPE, void) \
1566  OP_GOMP_CRITICAL_REV(TYPE, OP, GOMP_FLAG) \
1567  OP_CRITICAL_REV(TYPE, OP, LCK_ID) \
1568  }
1569 
1570 /* ------------------------------------------------------------------------- */
1571 // routines for long double type
1572 ATOMIC_CRITICAL_REV(float10, sub, long double, -, 10r,
1573  1) // __kmpc_atomic_float10_sub_rev
1574 ATOMIC_CRITICAL_REV(float10, div, long double, /, 10r,
1575  1) // __kmpc_atomic_float10_div_rev
1576 #if KMP_HAVE_QUAD
1577 // routines for _Quad type
1578 ATOMIC_CRITICAL_REV(float16, sub, QUAD_LEGACY, -, 16r,
1579  1) // __kmpc_atomic_float16_sub_rev
1580 ATOMIC_CRITICAL_REV(float16, div, QUAD_LEGACY, /, 16r,
1581  1) // __kmpc_atomic_float16_div_rev
1582 #if (KMP_ARCH_X86)
1583 ATOMIC_CRITICAL_REV(float16, sub_a16, Quad_a16_t, -, 16r,
1584  1) // __kmpc_atomic_float16_sub_a16_rev
1585 ATOMIC_CRITICAL_REV(float16, div_a16, Quad_a16_t, /, 16r,
1586  1) // __kmpc_atomic_float16_div_a16_rev
1587 #endif // KMP_ARCH_X86
1588 #endif // KMP_HAVE_QUAD
1589 
1590 // routines for complex types
1591 ATOMIC_CRITICAL_REV(cmplx4, sub, kmp_cmplx32, -, 8c,
1592  1) // __kmpc_atomic_cmplx4_sub_rev
1593 ATOMIC_CRITICAL_REV(cmplx4, div, kmp_cmplx32, /, 8c,
1594  1) // __kmpc_atomic_cmplx4_div_rev
1595 ATOMIC_CRITICAL_REV(cmplx8, sub, kmp_cmplx64, -, 16c,
1596  1) // __kmpc_atomic_cmplx8_sub_rev
1597 ATOMIC_CRITICAL_REV(cmplx8, div, kmp_cmplx64, /, 16c,
1598  1) // __kmpc_atomic_cmplx8_div_rev
1599 ATOMIC_CRITICAL_REV(cmplx10, sub, kmp_cmplx80, -, 20c,
1600  1) // __kmpc_atomic_cmplx10_sub_rev
1601 ATOMIC_CRITICAL_REV(cmplx10, div, kmp_cmplx80, /, 20c,
1602  1) // __kmpc_atomic_cmplx10_div_rev
1603 #if KMP_HAVE_QUAD
1604 ATOMIC_CRITICAL_REV(cmplx16, sub, CPLX128_LEG, -, 32c,
1605  1) // __kmpc_atomic_cmplx16_sub_rev
1606 ATOMIC_CRITICAL_REV(cmplx16, div, CPLX128_LEG, /, 32c,
1607  1) // __kmpc_atomic_cmplx16_div_rev
1608 #if (KMP_ARCH_X86)
1609 ATOMIC_CRITICAL_REV(cmplx16, sub_a16, kmp_cmplx128_a16_t, -, 32c,
1610  1) // __kmpc_atomic_cmplx16_sub_a16_rev
1611 ATOMIC_CRITICAL_REV(cmplx16, div_a16, kmp_cmplx128_a16_t, /, 32c,
1612  1) // __kmpc_atomic_cmplx16_div_a16_rev
1613 #endif // KMP_ARCH_X86
1614 #endif // KMP_HAVE_QUAD
1615 
1616 #endif // KMP_ARCH_X86 || KMP_ARCH_X86_64
1617 // End of OpenMP 4.0: x = expr binop x for non-commutative operations.
1618 
1619 /* ------------------------------------------------------------------------ */
1620 /* Routines for mixed types of LHS and RHS, when RHS is "larger" */
1621 /* Note: in order to reduce the total number of types combinations */
1622 /* it is supposed that compiler converts RHS to longest floating type,*/
1623 /* that is _Quad, before call to any of these routines */
1624 /* Conversion to _Quad will be done by the compiler during calculation, */
1625 /* conversion back to TYPE - before the assignment, like: */
1626 /* *lhs = (TYPE)( (_Quad)(*lhs) OP rhs ) */
1627 /* Performance penalty expected because of SW emulation use */
1628 /* ------------------------------------------------------------------------ */
1629 
1630 #define ATOMIC_BEGIN_MIX(TYPE_ID, TYPE, OP_ID, RTYPE_ID, RTYPE) \
1631  void __kmpc_atomic_##TYPE_ID##_##OP_ID##_##RTYPE_ID( \
1632  ident_t *id_ref, int gtid, TYPE *lhs, RTYPE rhs) { \
1633  KMP_DEBUG_ASSERT(__kmp_init_serial); \
1634  KA_TRACE(100, \
1635  ("__kmpc_atomic_" #TYPE_ID "_" #OP_ID "_" #RTYPE_ID ": T#%d\n", \
1636  gtid));
1637 
1638 // -------------------------------------------------------------------------
1639 #define ATOMIC_CRITICAL_FP(TYPE_ID, TYPE, OP_ID, OP, RTYPE_ID, RTYPE, LCK_ID, \
1640  GOMP_FLAG) \
1641  ATOMIC_BEGIN_MIX(TYPE_ID, TYPE, OP_ID, RTYPE_ID, RTYPE) \
1642  OP_UPDATE_GOMP_CRITICAL(TYPE, OP, GOMP_FLAG) /* send assignment */ \
1643  OP_UPDATE_CRITICAL(TYPE, OP, LCK_ID) /* send assignment */ \
1644  }
1645 
1646 // -------------------------------------------------------------------------
1647 #if KMP_ARCH_X86 || KMP_ARCH_X86_64
1648 // -------------------------------------------------------------------------
1649 // X86 or X86_64: no alignment problems ====================================
1650 #define ATOMIC_CMPXCHG_MIX(TYPE_ID, TYPE, OP_ID, BITS, OP, RTYPE_ID, RTYPE, \
1651  LCK_ID, MASK, GOMP_FLAG) \
1652  ATOMIC_BEGIN_MIX(TYPE_ID, TYPE, OP_ID, RTYPE_ID, RTYPE) \
1653  OP_UPDATE_GOMP_CRITICAL(TYPE, OP, GOMP_FLAG) \
1654  OP_CMPXCHG(TYPE, BITS, OP) \
1655  }
1656 // -------------------------------------------------------------------------
1657 #else
1658 // ------------------------------------------------------------------------
1659 // Code for other architectures that don't handle unaligned accesses.
1660 #define ATOMIC_CMPXCHG_MIX(TYPE_ID, TYPE, OP_ID, BITS, OP, RTYPE_ID, RTYPE, \
1661  LCK_ID, MASK, GOMP_FLAG) \
1662  ATOMIC_BEGIN_MIX(TYPE_ID, TYPE, OP_ID, RTYPE_ID, RTYPE) \
1663  OP_UPDATE_GOMP_CRITICAL(TYPE, OP, GOMP_FLAG) \
1664  if (!((kmp_uintptr_t)lhs & 0x##MASK)) { \
1665  OP_CMPXCHG(TYPE, BITS, OP) /* aligned address */ \
1666  } else { \
1667  KMP_CHECK_GTID; \
1668  OP_UPDATE_CRITICAL(TYPE, OP, \
1669  LCK_ID) /* unaligned address - use critical */ \
1670  } \
1671  }
1672 #endif /* KMP_ARCH_X86 || KMP_ARCH_X86_64 */
1673 
1674 // -------------------------------------------------------------------------
1675 #if KMP_ARCH_X86 || KMP_ARCH_X86_64
1676 // -------------------------------------------------------------------------
1677 #define ATOMIC_CMPXCHG_REV_MIX(TYPE_ID, TYPE, OP_ID, BITS, OP, RTYPE_ID, \
1678  RTYPE, LCK_ID, MASK, GOMP_FLAG) \
1679  ATOMIC_BEGIN_MIX(TYPE_ID, TYPE, OP_ID, RTYPE_ID, RTYPE) \
1680  OP_GOMP_CRITICAL_REV(TYPE, OP, GOMP_FLAG) \
1681  OP_CMPXCHG_REV(TYPE, BITS, OP) \
1682  }
1683 #define ATOMIC_CRITICAL_REV_FP(TYPE_ID, TYPE, OP_ID, OP, RTYPE_ID, RTYPE, \
1684  LCK_ID, GOMP_FLAG) \
1685  ATOMIC_BEGIN_MIX(TYPE_ID, TYPE, OP_ID, RTYPE_ID, RTYPE) \
1686  OP_GOMP_CRITICAL_REV(TYPE, OP, GOMP_FLAG) \
1687  OP_CRITICAL_REV(TYPE, OP, LCK_ID) \
1688  }
1689 #endif /* KMP_ARCH_X86 || KMP_ARCH_X86_64 */
1690 
1691 // RHS=float8
1692 ATOMIC_CMPXCHG_MIX(fixed1, char, mul, 8, *, float8, kmp_real64, 1i, 0,
1693  KMP_ARCH_X86) // __kmpc_atomic_fixed1_mul_float8
1694 ATOMIC_CMPXCHG_MIX(fixed1, char, div, 8, /, float8, kmp_real64, 1i, 0,
1695  KMP_ARCH_X86) // __kmpc_atomic_fixed1_div_float8
1696 ATOMIC_CMPXCHG_MIX(fixed2, short, mul, 16, *, float8, kmp_real64, 2i, 1,
1697  KMP_ARCH_X86) // __kmpc_atomic_fixed2_mul_float8
1698 ATOMIC_CMPXCHG_MIX(fixed2, short, div, 16, /, float8, kmp_real64, 2i, 1,
1699  KMP_ARCH_X86) // __kmpc_atomic_fixed2_div_float8
1700 ATOMIC_CMPXCHG_MIX(fixed4, kmp_int32, mul, 32, *, float8, kmp_real64, 4i, 3,
1701  0) // __kmpc_atomic_fixed4_mul_float8
1702 ATOMIC_CMPXCHG_MIX(fixed4, kmp_int32, div, 32, /, float8, kmp_real64, 4i, 3,
1703  0) // __kmpc_atomic_fixed4_div_float8
1704 ATOMIC_CMPXCHG_MIX(fixed8, kmp_int64, mul, 64, *, float8, kmp_real64, 8i, 7,
1705  KMP_ARCH_X86) // __kmpc_atomic_fixed8_mul_float8
1706 ATOMIC_CMPXCHG_MIX(fixed8, kmp_int64, div, 64, /, float8, kmp_real64, 8i, 7,
1707  KMP_ARCH_X86) // __kmpc_atomic_fixed8_div_float8
1708 ATOMIC_CMPXCHG_MIX(float4, kmp_real32, add, 32, +, float8, kmp_real64, 4r, 3,
1709  KMP_ARCH_X86) // __kmpc_atomic_float4_add_float8
1710 ATOMIC_CMPXCHG_MIX(float4, kmp_real32, sub, 32, -, float8, kmp_real64, 4r, 3,
1711  KMP_ARCH_X86) // __kmpc_atomic_float4_sub_float8
1712 ATOMIC_CMPXCHG_MIX(float4, kmp_real32, mul, 32, *, float8, kmp_real64, 4r, 3,
1713  KMP_ARCH_X86) // __kmpc_atomic_float4_mul_float8
1714 ATOMIC_CMPXCHG_MIX(float4, kmp_real32, div, 32, /, float8, kmp_real64, 4r, 3,
1715  KMP_ARCH_X86) // __kmpc_atomic_float4_div_float8
1716 
1717 // RHS=float16 (deprecated, to be removed when we are sure the compiler does not
1718 // use them)
1719 #if KMP_HAVE_QUAD
1720 ATOMIC_CMPXCHG_MIX(fixed1, char, add, 8, +, fp, _Quad, 1i, 0,
1721  KMP_ARCH_X86) // __kmpc_atomic_fixed1_add_fp
1722 ATOMIC_CMPXCHG_MIX(fixed1u, uchar, add, 8, +, fp, _Quad, 1i, 0,
1723  KMP_ARCH_X86) // __kmpc_atomic_fixed1u_add_fp
1724 ATOMIC_CMPXCHG_MIX(fixed1, char, sub, 8, -, fp, _Quad, 1i, 0,
1725  KMP_ARCH_X86) // __kmpc_atomic_fixed1_sub_fp
1726 ATOMIC_CMPXCHG_MIX(fixed1u, uchar, sub, 8, -, fp, _Quad, 1i, 0,
1727  KMP_ARCH_X86) // __kmpc_atomic_fixed1u_sub_fp
1728 ATOMIC_CMPXCHG_MIX(fixed1, char, mul, 8, *, fp, _Quad, 1i, 0,
1729  KMP_ARCH_X86) // __kmpc_atomic_fixed1_mul_fp
1730 ATOMIC_CMPXCHG_MIX(fixed1u, uchar, mul, 8, *, fp, _Quad, 1i, 0,
1731  KMP_ARCH_X86) // __kmpc_atomic_fixed1u_mul_fp
1732 ATOMIC_CMPXCHG_MIX(fixed1, char, div, 8, /, fp, _Quad, 1i, 0,
1733  KMP_ARCH_X86) // __kmpc_atomic_fixed1_div_fp
1734 ATOMIC_CMPXCHG_MIX(fixed1u, uchar, div, 8, /, fp, _Quad, 1i, 0,
1735  KMP_ARCH_X86) // __kmpc_atomic_fixed1u_div_fp
1736 
1737 ATOMIC_CMPXCHG_MIX(fixed2, short, add, 16, +, fp, _Quad, 2i, 1,
1738  KMP_ARCH_X86) // __kmpc_atomic_fixed2_add_fp
1739 ATOMIC_CMPXCHG_MIX(fixed2u, ushort, add, 16, +, fp, _Quad, 2i, 1,
1740  KMP_ARCH_X86) // __kmpc_atomic_fixed2u_add_fp
1741 ATOMIC_CMPXCHG_MIX(fixed2, short, sub, 16, -, fp, _Quad, 2i, 1,
1742  KMP_ARCH_X86) // __kmpc_atomic_fixed2_sub_fp
1743 ATOMIC_CMPXCHG_MIX(fixed2u, ushort, sub, 16, -, fp, _Quad, 2i, 1,
1744  KMP_ARCH_X86) // __kmpc_atomic_fixed2u_sub_fp
1745 ATOMIC_CMPXCHG_MIX(fixed2, short, mul, 16, *, fp, _Quad, 2i, 1,
1746  KMP_ARCH_X86) // __kmpc_atomic_fixed2_mul_fp
1747 ATOMIC_CMPXCHG_MIX(fixed2u, ushort, mul, 16, *, fp, _Quad, 2i, 1,
1748  KMP_ARCH_X86) // __kmpc_atomic_fixed2u_mul_fp
1749 ATOMIC_CMPXCHG_MIX(fixed2, short, div, 16, /, fp, _Quad, 2i, 1,
1750  KMP_ARCH_X86) // __kmpc_atomic_fixed2_div_fp
1751 ATOMIC_CMPXCHG_MIX(fixed2u, ushort, div, 16, /, fp, _Quad, 2i, 1,
1752  KMP_ARCH_X86) // __kmpc_atomic_fixed2u_div_fp
1753 
1754 ATOMIC_CMPXCHG_MIX(fixed4, kmp_int32, add, 32, +, fp, _Quad, 4i, 3,
1755  0) // __kmpc_atomic_fixed4_add_fp
1756 ATOMIC_CMPXCHG_MIX(fixed4u, kmp_uint32, add, 32, +, fp, _Quad, 4i, 3,
1757  0) // __kmpc_atomic_fixed4u_add_fp
1758 ATOMIC_CMPXCHG_MIX(fixed4, kmp_int32, sub, 32, -, fp, _Quad, 4i, 3,
1759  0) // __kmpc_atomic_fixed4_sub_fp
1760 ATOMIC_CMPXCHG_MIX(fixed4u, kmp_uint32, sub, 32, -, fp, _Quad, 4i, 3,
1761  0) // __kmpc_atomic_fixed4u_sub_fp
1762 ATOMIC_CMPXCHG_MIX(fixed4, kmp_int32, mul, 32, *, fp, _Quad, 4i, 3,
1763  0) // __kmpc_atomic_fixed4_mul_fp
1764 ATOMIC_CMPXCHG_MIX(fixed4u, kmp_uint32, mul, 32, *, fp, _Quad, 4i, 3,
1765  0) // __kmpc_atomic_fixed4u_mul_fp
1766 ATOMIC_CMPXCHG_MIX(fixed4, kmp_int32, div, 32, /, fp, _Quad, 4i, 3,
1767  0) // __kmpc_atomic_fixed4_div_fp
1768 ATOMIC_CMPXCHG_MIX(fixed4u, kmp_uint32, div, 32, /, fp, _Quad, 4i, 3,
1769  0) // __kmpc_atomic_fixed4u_div_fp
1770 
1771 ATOMIC_CMPXCHG_MIX(fixed8, kmp_int64, add, 64, +, fp, _Quad, 8i, 7,
1772  KMP_ARCH_X86) // __kmpc_atomic_fixed8_add_fp
1773 ATOMIC_CMPXCHG_MIX(fixed8u, kmp_uint64, add, 64, +, fp, _Quad, 8i, 7,
1774  KMP_ARCH_X86) // __kmpc_atomic_fixed8u_add_fp
1775 ATOMIC_CMPXCHG_MIX(fixed8, kmp_int64, sub, 64, -, fp, _Quad, 8i, 7,
1776  KMP_ARCH_X86) // __kmpc_atomic_fixed8_sub_fp
1777 ATOMIC_CMPXCHG_MIX(fixed8u, kmp_uint64, sub, 64, -, fp, _Quad, 8i, 7,
1778  KMP_ARCH_X86) // __kmpc_atomic_fixed8u_sub_fp
1779 ATOMIC_CMPXCHG_MIX(fixed8, kmp_int64, mul, 64, *, fp, _Quad, 8i, 7,
1780  KMP_ARCH_X86) // __kmpc_atomic_fixed8_mul_fp
1781 ATOMIC_CMPXCHG_MIX(fixed8u, kmp_uint64, mul, 64, *, fp, _Quad, 8i, 7,
1782  KMP_ARCH_X86) // __kmpc_atomic_fixed8u_mul_fp
1783 ATOMIC_CMPXCHG_MIX(fixed8, kmp_int64, div, 64, /, fp, _Quad, 8i, 7,
1784  KMP_ARCH_X86) // __kmpc_atomic_fixed8_div_fp
1785 ATOMIC_CMPXCHG_MIX(fixed8u, kmp_uint64, div, 64, /, fp, _Quad, 8i, 7,
1786  KMP_ARCH_X86) // __kmpc_atomic_fixed8u_div_fp
1787 
1788 ATOMIC_CMPXCHG_MIX(float4, kmp_real32, add, 32, +, fp, _Quad, 4r, 3,
1789  KMP_ARCH_X86) // __kmpc_atomic_float4_add_fp
1790 ATOMIC_CMPXCHG_MIX(float4, kmp_real32, sub, 32, -, fp, _Quad, 4r, 3,
1791  KMP_ARCH_X86) // __kmpc_atomic_float4_sub_fp
1792 ATOMIC_CMPXCHG_MIX(float4, kmp_real32, mul, 32, *, fp, _Quad, 4r, 3,
1793  KMP_ARCH_X86) // __kmpc_atomic_float4_mul_fp
1794 ATOMIC_CMPXCHG_MIX(float4, kmp_real32, div, 32, /, fp, _Quad, 4r, 3,
1795  KMP_ARCH_X86) // __kmpc_atomic_float4_div_fp
1796 
1797 ATOMIC_CMPXCHG_MIX(float8, kmp_real64, add, 64, +, fp, _Quad, 8r, 7,
1798  KMP_ARCH_X86) // __kmpc_atomic_float8_add_fp
1799 ATOMIC_CMPXCHG_MIX(float8, kmp_real64, sub, 64, -, fp, _Quad, 8r, 7,
1800  KMP_ARCH_X86) // __kmpc_atomic_float8_sub_fp
1801 ATOMIC_CMPXCHG_MIX(float8, kmp_real64, mul, 64, *, fp, _Quad, 8r, 7,
1802  KMP_ARCH_X86) // __kmpc_atomic_float8_mul_fp
1803 ATOMIC_CMPXCHG_MIX(float8, kmp_real64, div, 64, /, fp, _Quad, 8r, 7,
1804  KMP_ARCH_X86) // __kmpc_atomic_float8_div_fp
1805 
1806 #if KMP_ARCH_X86 || KMP_ARCH_X86_64
1807 ATOMIC_CRITICAL_FP(float10, long double, add, +, fp, _Quad, 10r,
1808  1) // __kmpc_atomic_float10_add_fp
1809 ATOMIC_CRITICAL_FP(float10, long double, sub, -, fp, _Quad, 10r,
1810  1) // __kmpc_atomic_float10_sub_fp
1811 ATOMIC_CRITICAL_FP(float10, long double, mul, *, fp, _Quad, 10r,
1812  1) // __kmpc_atomic_float10_mul_fp
1813 ATOMIC_CRITICAL_FP(float10, long double, div, /, fp, _Quad, 10r,
1814  1) // __kmpc_atomic_float10_div_fp
1815 
1816 // Reverse operations
1817 ATOMIC_CMPXCHG_REV_MIX(fixed1, char, sub_rev, 8, -, fp, _Quad, 1i, 0,
1818  KMP_ARCH_X86) // __kmpc_atomic_fixed1_sub_rev_fp
1819 ATOMIC_CMPXCHG_REV_MIX(fixed1u, uchar, sub_rev, 8, -, fp, _Quad, 1i, 0,
1820  KMP_ARCH_X86) // __kmpc_atomic_fixed1u_sub_rev_fp
1821 ATOMIC_CMPXCHG_REV_MIX(fixed1, char, div_rev, 8, /, fp, _Quad, 1i, 0,
1822  KMP_ARCH_X86) // __kmpc_atomic_fixed1_div_rev_fp
1823 ATOMIC_CMPXCHG_REV_MIX(fixed1u, uchar, div_rev, 8, /, fp, _Quad, 1i, 0,
1824  KMP_ARCH_X86) // __kmpc_atomic_fixed1u_div_rev_fp
1825 
1826 ATOMIC_CMPXCHG_REV_MIX(fixed2, short, sub_rev, 16, -, fp, _Quad, 2i, 1,
1827  KMP_ARCH_X86) // __kmpc_atomic_fixed2_sub_rev_fp
1828 ATOMIC_CMPXCHG_REV_MIX(fixed2u, ushort, sub_rev, 16, -, fp, _Quad, 2i, 1,
1829  KMP_ARCH_X86) // __kmpc_atomic_fixed2u_sub_rev_fp
1830 ATOMIC_CMPXCHG_REV_MIX(fixed2, short, div_rev, 16, /, fp, _Quad, 2i, 1,
1831  KMP_ARCH_X86) // __kmpc_atomic_fixed2_div_rev_fp
1832 ATOMIC_CMPXCHG_REV_MIX(fixed2u, ushort, div_rev, 16, /, fp, _Quad, 2i, 1,
1833  KMP_ARCH_X86) // __kmpc_atomic_fixed2u_div_rev_fp
1834 
1835 ATOMIC_CMPXCHG_REV_MIX(fixed4, kmp_int32, sub_rev, 32, -, fp, _Quad, 4i, 3,
1836  0) // __kmpc_atomic_fixed4_sub_rev_fp
1837 ATOMIC_CMPXCHG_REV_MIX(fixed4u, kmp_uint32, sub_rev, 32, -, fp, _Quad, 4i, 3,
1838  0) // __kmpc_atomic_fixed4u_sub_rev_fp
1839 ATOMIC_CMPXCHG_REV_MIX(fixed4, kmp_int32, div_rev, 32, /, fp, _Quad, 4i, 3,
1840  0) // __kmpc_atomic_fixed4_div_rev_fp
1841 ATOMIC_CMPXCHG_REV_MIX(fixed4u, kmp_uint32, div_rev, 32, /, fp, _Quad, 4i, 3,
1842  0) // __kmpc_atomic_fixed4u_div_rev_fp
1843 
1844 ATOMIC_CMPXCHG_REV_MIX(fixed8, kmp_int64, sub_rev, 64, -, fp, _Quad, 8i, 7,
1845  KMP_ARCH_X86) // __kmpc_atomic_fixed8_sub_rev_fp
1846 ATOMIC_CMPXCHG_REV_MIX(fixed8u, kmp_uint64, sub_rev, 64, -, fp, _Quad, 8i, 7,
1847  KMP_ARCH_X86) // __kmpc_atomic_fixed8u_sub_rev_fp
1848 ATOMIC_CMPXCHG_REV_MIX(fixed8, kmp_int64, div_rev, 64, /, fp, _Quad, 8i, 7,
1849  KMP_ARCH_X86) // __kmpc_atomic_fixed8_div_rev_fp
1850 ATOMIC_CMPXCHG_REV_MIX(fixed8u, kmp_uint64, div_rev, 64, /, fp, _Quad, 8i, 7,
1851  KMP_ARCH_X86) // __kmpc_atomic_fixed8u_div_rev_fp
1852 
1853 ATOMIC_CMPXCHG_REV_MIX(float4, kmp_real32, sub_rev, 32, -, fp, _Quad, 4r, 3,
1854  KMP_ARCH_X86) // __kmpc_atomic_float4_sub_rev_fp
1855 ATOMIC_CMPXCHG_REV_MIX(float4, kmp_real32, div_rev, 32, /, fp, _Quad, 4r, 3,
1856  KMP_ARCH_X86) // __kmpc_atomic_float4_div_rev_fp
1857 
1858 ATOMIC_CMPXCHG_REV_MIX(float8, kmp_real64, sub_rev, 64, -, fp, _Quad, 8r, 7,
1859  KMP_ARCH_X86) // __kmpc_atomic_float8_sub_rev_fp
1860 ATOMIC_CMPXCHG_REV_MIX(float8, kmp_real64, div_rev, 64, /, fp, _Quad, 8r, 7,
1861  KMP_ARCH_X86) // __kmpc_atomic_float8_div_rev_fp
1862 
1863 ATOMIC_CRITICAL_REV_FP(float10, long double, sub_rev, -, fp, _Quad, 10r,
1864  1) // __kmpc_atomic_float10_sub_rev_fp
1865 ATOMIC_CRITICAL_REV_FP(float10, long double, div_rev, /, fp, _Quad, 10r,
1866  1) // __kmpc_atomic_float10_div_rev_fp
1867 #endif /* KMP_ARCH_X86 || KMP_ARCH_X86_64 */
1868 
1869 #endif // KMP_HAVE_QUAD
1870 
1871 #if KMP_ARCH_X86 || KMP_ARCH_X86_64
1872 // ------------------------------------------------------------------------
1873 // X86 or X86_64: no alignment problems ====================================
1874 #if USE_CMPXCHG_FIX
1875 // workaround for C78287 (complex(kind=4) data type)
1876 #define ATOMIC_CMPXCHG_CMPLX(TYPE_ID, TYPE, OP_ID, BITS, OP, RTYPE_ID, RTYPE, \
1877  LCK_ID, MASK, GOMP_FLAG) \
1878  ATOMIC_BEGIN_MIX(TYPE_ID, TYPE, OP_ID, RTYPE_ID, RTYPE) \
1879  OP_UPDATE_GOMP_CRITICAL(TYPE, OP, GOMP_FLAG) \
1880  OP_CMPXCHG_WORKAROUND(TYPE, BITS, OP) \
1881  }
1882 // end of the second part of the workaround for C78287
1883 #else
1884 #define ATOMIC_CMPXCHG_CMPLX(TYPE_ID, TYPE, OP_ID, BITS, OP, RTYPE_ID, RTYPE, \
1885  LCK_ID, MASK, GOMP_FLAG) \
1886  ATOMIC_BEGIN_MIX(TYPE_ID, TYPE, OP_ID, RTYPE_ID, RTYPE) \
1887  OP_UPDATE_GOMP_CRITICAL(TYPE, OP, GOMP_FLAG) \
1888  OP_CMPXCHG(TYPE, BITS, OP) \
1889  }
1890 #endif // USE_CMPXCHG_FIX
1891 #else
1892 // ------------------------------------------------------------------------
1893 // Code for other architectures that don't handle unaligned accesses.
1894 #define ATOMIC_CMPXCHG_CMPLX(TYPE_ID, TYPE, OP_ID, BITS, OP, RTYPE_ID, RTYPE, \
1895  LCK_ID, MASK, GOMP_FLAG) \
1896  ATOMIC_BEGIN_MIX(TYPE_ID, TYPE, OP_ID, RTYPE_ID, RTYPE) \
1897  OP_UPDATE_GOMP_CRITICAL(TYPE, OP, GOMP_FLAG) \
1898  if (!((kmp_uintptr_t)lhs & 0x##MASK)) { \
1899  OP_CMPXCHG(TYPE, BITS, OP) /* aligned address */ \
1900  } else { \
1901  KMP_CHECK_GTID; \
1902  OP_UPDATE_CRITICAL(TYPE, OP, \
1903  LCK_ID) /* unaligned address - use critical */ \
1904  } \
1905  }
1906 #endif /* KMP_ARCH_X86 || KMP_ARCH_X86_64 */
1907 
1908 ATOMIC_CMPXCHG_CMPLX(cmplx4, kmp_cmplx32, add, 64, +, cmplx8, kmp_cmplx64, 8c,
1909  7, KMP_ARCH_X86) // __kmpc_atomic_cmplx4_add_cmplx8
1910 ATOMIC_CMPXCHG_CMPLX(cmplx4, kmp_cmplx32, sub, 64, -, cmplx8, kmp_cmplx64, 8c,
1911  7, KMP_ARCH_X86) // __kmpc_atomic_cmplx4_sub_cmplx8
1912 ATOMIC_CMPXCHG_CMPLX(cmplx4, kmp_cmplx32, mul, 64, *, cmplx8, kmp_cmplx64, 8c,
1913  7, KMP_ARCH_X86) // __kmpc_atomic_cmplx4_mul_cmplx8
1914 ATOMIC_CMPXCHG_CMPLX(cmplx4, kmp_cmplx32, div, 64, /, cmplx8, kmp_cmplx64, 8c,
1915  7, KMP_ARCH_X86) // __kmpc_atomic_cmplx4_div_cmplx8
1916 
1917 // READ, WRITE, CAPTURE
1918 
1919 // ------------------------------------------------------------------------
1920 // Atomic READ routines
1921 
1922 // ------------------------------------------------------------------------
1923 // Beginning of a definition (provides name, parameters, gebug trace)
1924 // TYPE_ID - operands type and size (fixed*, fixed*u for signed, unsigned
1925 // fixed)
1926 // OP_ID - operation identifier (add, sub, mul, ...)
1927 // TYPE - operands' type
1928 #define ATOMIC_BEGIN_READ(TYPE_ID, OP_ID, TYPE, RET_TYPE) \
1929  RET_TYPE __kmpc_atomic_##TYPE_ID##_##OP_ID(ident_t *id_ref, int gtid, \
1930  TYPE *loc) { \
1931  KMP_DEBUG_ASSERT(__kmp_init_serial); \
1932  KA_TRACE(100, ("__kmpc_atomic_" #TYPE_ID "_" #OP_ID ": T#%d\n", gtid));
1933 
1934 // ------------------------------------------------------------------------
1935 // Operation on *lhs, rhs using "compare_and_store_ret" routine
1936 // TYPE - operands' type
1937 // BITS - size in bits, used to distinguish low level calls
1938 // OP - operator
1939 // Note: temp_val introduced in order to force the compiler to read
1940 // *lhs only once (w/o it the compiler reads *lhs twice)
1941 // TODO: check if it is still necessary
1942 // Return old value regardless of the result of "compare & swap# operation
1943 #define OP_CMPXCHG_READ(TYPE, BITS, OP) \
1944  { \
1945  TYPE KMP_ATOMIC_VOLATILE temp_val; \
1946  union f_i_union { \
1947  TYPE f_val; \
1948  kmp_int##BITS i_val; \
1949  }; \
1950  union f_i_union old_value; \
1951  temp_val = *loc; \
1952  old_value.f_val = temp_val; \
1953  old_value.i_val = KMP_COMPARE_AND_STORE_RET##BITS( \
1954  (kmp_int##BITS *)loc, \
1955  *VOLATILE_CAST(kmp_int##BITS *) & old_value.i_val, \
1956  *VOLATILE_CAST(kmp_int##BITS *) & old_value.i_val); \
1957  new_value = old_value.f_val; \
1958  return new_value; \
1959  }
1960 
1961 // -------------------------------------------------------------------------
1962 // Operation on *lhs, rhs bound by critical section
1963 // OP - operator (it's supposed to contain an assignment)
1964 // LCK_ID - lock identifier
1965 // Note: don't check gtid as it should always be valid
1966 // 1, 2-byte - expect valid parameter, other - check before this macro
1967 #define OP_CRITICAL_READ(OP, LCK_ID) \
1968  __kmp_acquire_atomic_lock(&ATOMIC_LOCK##LCK_ID, gtid); \
1969  \
1970  new_value = (*loc); \
1971  \
1972  __kmp_release_atomic_lock(&ATOMIC_LOCK##LCK_ID, gtid);
1973 
1974 // -------------------------------------------------------------------------
1975 #ifdef KMP_GOMP_COMPAT
1976 #define OP_GOMP_CRITICAL_READ(OP, FLAG) \
1977  if ((FLAG) && (__kmp_atomic_mode == 2)) { \
1978  KMP_CHECK_GTID; \
1979  OP_CRITICAL_READ(OP, 0); \
1980  return new_value; \
1981  }
1982 #else
1983 #define OP_GOMP_CRITICAL_READ(OP, FLAG)
1984 #endif /* KMP_GOMP_COMPAT */
1985 
1986 // -------------------------------------------------------------------------
1987 #define ATOMIC_FIXED_READ(TYPE_ID, OP_ID, TYPE, BITS, OP, GOMP_FLAG) \
1988  ATOMIC_BEGIN_READ(TYPE_ID, OP_ID, TYPE, TYPE) \
1989  TYPE new_value; \
1990  OP_GOMP_CRITICAL_READ(OP## =, GOMP_FLAG) \
1991  new_value = KMP_TEST_THEN_ADD##BITS(loc, OP 0); \
1992  return new_value; \
1993  }
1994 // -------------------------------------------------------------------------
1995 #define ATOMIC_CMPXCHG_READ(TYPE_ID, OP_ID, TYPE, BITS, OP, GOMP_FLAG) \
1996  ATOMIC_BEGIN_READ(TYPE_ID, OP_ID, TYPE, TYPE) \
1997  TYPE new_value; \
1998  OP_GOMP_CRITICAL_READ(OP## =, GOMP_FLAG) \
1999  OP_CMPXCHG_READ(TYPE, BITS, OP) \
2000  }
2001 // ------------------------------------------------------------------------
2002 // Routines for Extended types: long double, _Quad, complex flavours (use
2003 // critical section)
2004 // TYPE_ID, OP_ID, TYPE - detailed above
2005 // OP - operator
2006 // LCK_ID - lock identifier, used to possibly distinguish lock variable
2007 #define ATOMIC_CRITICAL_READ(TYPE_ID, OP_ID, TYPE, OP, LCK_ID, GOMP_FLAG) \
2008  ATOMIC_BEGIN_READ(TYPE_ID, OP_ID, TYPE, TYPE) \
2009  TYPE new_value; \
2010  OP_GOMP_CRITICAL_READ(OP## =, GOMP_FLAG) /* send assignment */ \
2011  OP_CRITICAL_READ(OP, LCK_ID) /* send assignment */ \
2012  return new_value; \
2013  }
2014 
2015 // ------------------------------------------------------------------------
2016 // Fix for cmplx4 read (CQ220361) on Windows* OS. Regular routine with return
2017 // value doesn't work.
2018 // Let's return the read value through the additional parameter.
2019 #if (KMP_OS_WINDOWS)
2020 
2021 #define OP_CRITICAL_READ_WRK(OP, LCK_ID) \
2022  __kmp_acquire_atomic_lock(&ATOMIC_LOCK##LCK_ID, gtid); \
2023  \
2024  (*out) = (*loc); \
2025  \
2026  __kmp_release_atomic_lock(&ATOMIC_LOCK##LCK_ID, gtid);
2027 // ------------------------------------------------------------------------
2028 #ifdef KMP_GOMP_COMPAT
2029 #define OP_GOMP_CRITICAL_READ_WRK(OP, FLAG) \
2030  if ((FLAG) && (__kmp_atomic_mode == 2)) { \
2031  KMP_CHECK_GTID; \
2032  OP_CRITICAL_READ_WRK(OP, 0); \
2033  }
2034 #else
2035 #define OP_GOMP_CRITICAL_READ_WRK(OP, FLAG)
2036 #endif /* KMP_GOMP_COMPAT */
2037 // ------------------------------------------------------------------------
2038 #define ATOMIC_BEGIN_READ_WRK(TYPE_ID, OP_ID, TYPE) \
2039  void __kmpc_atomic_##TYPE_ID##_##OP_ID(TYPE *out, ident_t *id_ref, int gtid, \
2040  TYPE *loc) { \
2041  KMP_DEBUG_ASSERT(__kmp_init_serial); \
2042  KA_TRACE(100, ("__kmpc_atomic_" #TYPE_ID "_" #OP_ID ": T#%d\n", gtid));
2043 
2044 // ------------------------------------------------------------------------
2045 #define ATOMIC_CRITICAL_READ_WRK(TYPE_ID, OP_ID, TYPE, OP, LCK_ID, GOMP_FLAG) \
2046  ATOMIC_BEGIN_READ_WRK(TYPE_ID, OP_ID, TYPE) \
2047  OP_GOMP_CRITICAL_READ_WRK(OP## =, GOMP_FLAG) /* send assignment */ \
2048  OP_CRITICAL_READ_WRK(OP, LCK_ID) /* send assignment */ \
2049  }
2050 
2051 #endif // KMP_OS_WINDOWS
2052 
2053 // ------------------------------------------------------------------------
2054 // TYPE_ID,OP_ID, TYPE, OP, GOMP_FLAG
2055 ATOMIC_FIXED_READ(fixed4, rd, kmp_int32, 32, +, 0) // __kmpc_atomic_fixed4_rd
2056 ATOMIC_FIXED_READ(fixed8, rd, kmp_int64, 64, +,
2057  KMP_ARCH_X86) // __kmpc_atomic_fixed8_rd
2058 ATOMIC_CMPXCHG_READ(float4, rd, kmp_real32, 32, +,
2059  KMP_ARCH_X86) // __kmpc_atomic_float4_rd
2060 ATOMIC_CMPXCHG_READ(float8, rd, kmp_real64, 64, +,
2061  KMP_ARCH_X86) // __kmpc_atomic_float8_rd
2062 
2063 // !!! TODO: Remove lock operations for "char" since it can't be non-atomic
2064 ATOMIC_CMPXCHG_READ(fixed1, rd, kmp_int8, 8, +,
2065  KMP_ARCH_X86) // __kmpc_atomic_fixed1_rd
2066 ATOMIC_CMPXCHG_READ(fixed2, rd, kmp_int16, 16, +,
2067  KMP_ARCH_X86) // __kmpc_atomic_fixed2_rd
2068 
2069 ATOMIC_CRITICAL_READ(float10, rd, long double, +, 10r,
2070  1) // __kmpc_atomic_float10_rd
2071 #if KMP_HAVE_QUAD
2072 ATOMIC_CRITICAL_READ(float16, rd, QUAD_LEGACY, +, 16r,
2073  1) // __kmpc_atomic_float16_rd
2074 #endif // KMP_HAVE_QUAD
2075 
2076 // Fix for CQ220361 on Windows* OS
2077 #if (KMP_OS_WINDOWS)
2078 ATOMIC_CRITICAL_READ_WRK(cmplx4, rd, kmp_cmplx32, +, 8c,
2079  1) // __kmpc_atomic_cmplx4_rd
2080 #else
2081 ATOMIC_CRITICAL_READ(cmplx4, rd, kmp_cmplx32, +, 8c,
2082  1) // __kmpc_atomic_cmplx4_rd
2083 #endif // (KMP_OS_WINDOWS)
2084 ATOMIC_CRITICAL_READ(cmplx8, rd, kmp_cmplx64, +, 16c,
2085  1) // __kmpc_atomic_cmplx8_rd
2086 ATOMIC_CRITICAL_READ(cmplx10, rd, kmp_cmplx80, +, 20c,
2087  1) // __kmpc_atomic_cmplx10_rd
2088 #if KMP_HAVE_QUAD
2089 ATOMIC_CRITICAL_READ(cmplx16, rd, CPLX128_LEG, +, 32c,
2090  1) // __kmpc_atomic_cmplx16_rd
2091 #if (KMP_ARCH_X86)
2092 ATOMIC_CRITICAL_READ(float16, a16_rd, Quad_a16_t, +, 16r,
2093  1) // __kmpc_atomic_float16_a16_rd
2094 ATOMIC_CRITICAL_READ(cmplx16, a16_rd, kmp_cmplx128_a16_t, +, 32c,
2095  1) // __kmpc_atomic_cmplx16_a16_rd
2096 #endif // (KMP_ARCH_X86)
2097 #endif // KMP_HAVE_QUAD
2098 
2099 // ------------------------------------------------------------------------
2100 // Atomic WRITE routines
2101 
2102 #define ATOMIC_XCHG_WR(TYPE_ID, OP_ID, TYPE, BITS, OP, GOMP_FLAG) \
2103  ATOMIC_BEGIN(TYPE_ID, OP_ID, TYPE, void) \
2104  OP_GOMP_CRITICAL(OP, GOMP_FLAG) \
2105  KMP_XCHG_FIXED##BITS(lhs, rhs); \
2106  }
2107 // ------------------------------------------------------------------------
2108 #define ATOMIC_XCHG_FLOAT_WR(TYPE_ID, OP_ID, TYPE, BITS, OP, GOMP_FLAG) \
2109  ATOMIC_BEGIN(TYPE_ID, OP_ID, TYPE, void) \
2110  OP_GOMP_CRITICAL(OP, GOMP_FLAG) \
2111  KMP_XCHG_REAL##BITS(lhs, rhs); \
2112  }
2113 
2114 // ------------------------------------------------------------------------
2115 // Operation on *lhs, rhs using "compare_and_store" routine
2116 // TYPE - operands' type
2117 // BITS - size in bits, used to distinguish low level calls
2118 // OP - operator
2119 // Note: temp_val introduced in order to force the compiler to read
2120 // *lhs only once (w/o it the compiler reads *lhs twice)
2121 #define OP_CMPXCHG_WR(TYPE, BITS, OP) \
2122  { \
2123  TYPE KMP_ATOMIC_VOLATILE temp_val; \
2124  TYPE old_value, new_value; \
2125  temp_val = *lhs; \
2126  old_value = temp_val; \
2127  new_value = rhs; \
2128  while (!KMP_COMPARE_AND_STORE_ACQ##BITS( \
2129  (kmp_int##BITS *)lhs, *VOLATILE_CAST(kmp_int##BITS *) & old_value, \
2130  *VOLATILE_CAST(kmp_int##BITS *) & new_value)) { \
2131  temp_val = *lhs; \
2132  old_value = temp_val; \
2133  new_value = rhs; \
2134  } \
2135  }
2136 
2137 // -------------------------------------------------------------------------
2138 #define ATOMIC_CMPXCHG_WR(TYPE_ID, OP_ID, TYPE, BITS, OP, GOMP_FLAG) \
2139  ATOMIC_BEGIN(TYPE_ID, OP_ID, TYPE, void) \
2140  OP_GOMP_CRITICAL(OP, GOMP_FLAG) \
2141  OP_CMPXCHG_WR(TYPE, BITS, OP) \
2142  }
2143 
2144 // ------------------------------------------------------------------------
2145 // Routines for Extended types: long double, _Quad, complex flavours (use
2146 // critical section)
2147 // TYPE_ID, OP_ID, TYPE - detailed above
2148 // OP - operator
2149 // LCK_ID - lock identifier, used to possibly distinguish lock variable
2150 #define ATOMIC_CRITICAL_WR(TYPE_ID, OP_ID, TYPE, OP, LCK_ID, GOMP_FLAG) \
2151  ATOMIC_BEGIN(TYPE_ID, OP_ID, TYPE, void) \
2152  OP_GOMP_CRITICAL(OP, GOMP_FLAG) /* send assignment */ \
2153  OP_CRITICAL(OP, LCK_ID) /* send assignment */ \
2154  }
2155 // -------------------------------------------------------------------------
2156 
2157 ATOMIC_XCHG_WR(fixed1, wr, kmp_int8, 8, =,
2158  KMP_ARCH_X86) // __kmpc_atomic_fixed1_wr
2159 ATOMIC_XCHG_WR(fixed2, wr, kmp_int16, 16, =,
2160  KMP_ARCH_X86) // __kmpc_atomic_fixed2_wr
2161 ATOMIC_XCHG_WR(fixed4, wr, kmp_int32, 32, =,
2162  KMP_ARCH_X86) // __kmpc_atomic_fixed4_wr
2163 #if (KMP_ARCH_X86)
2164 ATOMIC_CMPXCHG_WR(fixed8, wr, kmp_int64, 64, =,
2165  KMP_ARCH_X86) // __kmpc_atomic_fixed8_wr
2166 #else
2167 ATOMIC_XCHG_WR(fixed8, wr, kmp_int64, 64, =,
2168  KMP_ARCH_X86) // __kmpc_atomic_fixed8_wr
2169 #endif // (KMP_ARCH_X86)
2170 
2171 ATOMIC_XCHG_FLOAT_WR(float4, wr, kmp_real32, 32, =,
2172  KMP_ARCH_X86) // __kmpc_atomic_float4_wr
2173 #if (KMP_ARCH_X86)
2174 ATOMIC_CMPXCHG_WR(float8, wr, kmp_real64, 64, =,
2175  KMP_ARCH_X86) // __kmpc_atomic_float8_wr
2176 #else
2177 ATOMIC_XCHG_FLOAT_WR(float8, wr, kmp_real64, 64, =,
2178  KMP_ARCH_X86) // __kmpc_atomic_float8_wr
2179 #endif // (KMP_ARCH_X86)
2180 
2181 ATOMIC_CRITICAL_WR(float10, wr, long double, =, 10r,
2182  1) // __kmpc_atomic_float10_wr
2183 #if KMP_HAVE_QUAD
2184 ATOMIC_CRITICAL_WR(float16, wr, QUAD_LEGACY, =, 16r,
2185  1) // __kmpc_atomic_float16_wr
2186 #endif // KMP_HAVE_QUAD
2187 ATOMIC_CRITICAL_WR(cmplx4, wr, kmp_cmplx32, =, 8c, 1) // __kmpc_atomic_cmplx4_wr
2188 ATOMIC_CRITICAL_WR(cmplx8, wr, kmp_cmplx64, =, 16c,
2189  1) // __kmpc_atomic_cmplx8_wr
2190 ATOMIC_CRITICAL_WR(cmplx10, wr, kmp_cmplx80, =, 20c,
2191  1) // __kmpc_atomic_cmplx10_wr
2192 #if KMP_HAVE_QUAD
2193 ATOMIC_CRITICAL_WR(cmplx16, wr, CPLX128_LEG, =, 32c,
2194  1) // __kmpc_atomic_cmplx16_wr
2195 #if (KMP_ARCH_X86)
2196 ATOMIC_CRITICAL_WR(float16, a16_wr, Quad_a16_t, =, 16r,
2197  1) // __kmpc_atomic_float16_a16_wr
2198 ATOMIC_CRITICAL_WR(cmplx16, a16_wr, kmp_cmplx128_a16_t, =, 32c,
2199  1) // __kmpc_atomic_cmplx16_a16_wr
2200 #endif // (KMP_ARCH_X86)
2201 #endif // KMP_HAVE_QUAD
2202 
2203 // ------------------------------------------------------------------------
2204 // Atomic CAPTURE routines
2205 
2206 // Beginning of a definition (provides name, parameters, gebug trace)
2207 // TYPE_ID - operands type and size (fixed*, fixed*u for signed, unsigned
2208 // fixed)
2209 // OP_ID - operation identifier (add, sub, mul, ...)
2210 // TYPE - operands' type
2211 #define ATOMIC_BEGIN_CPT(TYPE_ID, OP_ID, TYPE, RET_TYPE) \
2212  RET_TYPE __kmpc_atomic_##TYPE_ID##_##OP_ID(ident_t *id_ref, int gtid, \
2213  TYPE *lhs, TYPE rhs, int flag) { \
2214  KMP_DEBUG_ASSERT(__kmp_init_serial); \
2215  KA_TRACE(100, ("__kmpc_atomic_" #TYPE_ID "_" #OP_ID ": T#%d\n", gtid));
2216 
2217 // -------------------------------------------------------------------------
2218 // Operation on *lhs, rhs bound by critical section
2219 // OP - operator (it's supposed to contain an assignment)
2220 // LCK_ID - lock identifier
2221 // Note: don't check gtid as it should always be valid
2222 // 1, 2-byte - expect valid parameter, other - check before this macro
2223 #define OP_CRITICAL_CPT(OP, LCK_ID) \
2224  __kmp_acquire_atomic_lock(&ATOMIC_LOCK##LCK_ID, gtid); \
2225  \
2226  if (flag) { \
2227  (*lhs) OP rhs; \
2228  new_value = (*lhs); \
2229  } else { \
2230  new_value = (*lhs); \
2231  (*lhs) OP rhs; \
2232  } \
2233  \
2234  __kmp_release_atomic_lock(&ATOMIC_LOCK##LCK_ID, gtid); \
2235  return new_value;
2236 
2237 #define OP_UPDATE_CRITICAL_CPT(TYPE, OP, LCK_ID) \
2238  __kmp_acquire_atomic_lock(&ATOMIC_LOCK##LCK_ID, gtid); \
2239  \
2240  if (flag) { \
2241  (*lhs) = (TYPE)((*lhs)OP rhs); \
2242  new_value = (*lhs); \
2243  } else { \
2244  new_value = (*lhs); \
2245  (*lhs) = (TYPE)((*lhs)OP rhs); \
2246  } \
2247  \
2248  __kmp_release_atomic_lock(&ATOMIC_LOCK##LCK_ID, gtid); \
2249  return new_value;
2250 
2251 // ------------------------------------------------------------------------
2252 #ifdef KMP_GOMP_COMPAT
2253 #define OP_GOMP_CRITICAL_CPT(TYPE, OP, FLAG) \
2254  if ((FLAG) && (__kmp_atomic_mode == 2)) { \
2255  KMP_CHECK_GTID; \
2256  OP_UPDATE_CRITICAL_CPT(TYPE, OP, 0); \
2257  }
2258 #else
2259 #define OP_GOMP_CRITICAL_CPT(TYPE, OP, FLAG)
2260 #endif /* KMP_GOMP_COMPAT */
2261 
2262 // ------------------------------------------------------------------------
2263 // Operation on *lhs, rhs using "compare_and_store" routine
2264 // TYPE - operands' type
2265 // BITS - size in bits, used to distinguish low level calls
2266 // OP - operator
2267 // Note: temp_val introduced in order to force the compiler to read
2268 // *lhs only once (w/o it the compiler reads *lhs twice)
2269 #define OP_CMPXCHG_CPT(TYPE, BITS, OP) \
2270  { \
2271  TYPE KMP_ATOMIC_VOLATILE temp_val; \
2272  TYPE old_value, new_value; \
2273  temp_val = *lhs; \
2274  old_value = temp_val; \
2275  new_value = (TYPE)(old_value OP rhs); \
2276  while (!KMP_COMPARE_AND_STORE_ACQ##BITS( \
2277  (kmp_int##BITS *)lhs, *VOLATILE_CAST(kmp_int##BITS *) & old_value, \
2278  *VOLATILE_CAST(kmp_int##BITS *) & new_value)) { \
2279  temp_val = *lhs; \
2280  old_value = temp_val; \
2281  new_value = (TYPE)(old_value OP rhs); \
2282  } \
2283  if (flag) { \
2284  return new_value; \
2285  } else \
2286  return old_value; \
2287  }
2288 
2289 // -------------------------------------------------------------------------
2290 #define ATOMIC_CMPXCHG_CPT(TYPE_ID, OP_ID, TYPE, BITS, OP, GOMP_FLAG) \
2291  ATOMIC_BEGIN_CPT(TYPE_ID, OP_ID, TYPE, TYPE) \
2292  TYPE new_value; \
2293  (void)new_value; \
2294  OP_GOMP_CRITICAL_CPT(TYPE, OP, GOMP_FLAG) \
2295  OP_CMPXCHG_CPT(TYPE, BITS, OP) \
2296  }
2297 
2298 // -------------------------------------------------------------------------
2299 #define ATOMIC_FIXED_ADD_CPT(TYPE_ID, OP_ID, TYPE, BITS, OP, GOMP_FLAG) \
2300  ATOMIC_BEGIN_CPT(TYPE_ID, OP_ID, TYPE, TYPE) \
2301  TYPE old_value, new_value; \
2302  (void)new_value; \
2303  OP_GOMP_CRITICAL_CPT(TYPE, OP, GOMP_FLAG) \
2304  /* OP used as a sign for subtraction: (lhs-rhs) --> (lhs+-rhs) */ \
2305  old_value = KMP_TEST_THEN_ADD##BITS(lhs, OP rhs); \
2306  if (flag) { \
2307  return old_value OP rhs; \
2308  } else \
2309  return old_value; \
2310  }
2311 // -------------------------------------------------------------------------
2312 
2313 ATOMIC_FIXED_ADD_CPT(fixed4, add_cpt, kmp_int32, 32, +,
2314  0) // __kmpc_atomic_fixed4_add_cpt
2315 ATOMIC_FIXED_ADD_CPT(fixed4, sub_cpt, kmp_int32, 32, -,
2316  0) // __kmpc_atomic_fixed4_sub_cpt
2317 ATOMIC_FIXED_ADD_CPT(fixed8, add_cpt, kmp_int64, 64, +,
2318  KMP_ARCH_X86) // __kmpc_atomic_fixed8_add_cpt
2319 ATOMIC_FIXED_ADD_CPT(fixed8, sub_cpt, kmp_int64, 64, -,
2320  KMP_ARCH_X86) // __kmpc_atomic_fixed8_sub_cpt
2321 
2322 ATOMIC_CMPXCHG_CPT(float4, add_cpt, kmp_real32, 32, +,
2323  KMP_ARCH_X86) // __kmpc_atomic_float4_add_cpt
2324 ATOMIC_CMPXCHG_CPT(float4, sub_cpt, kmp_real32, 32, -,
2325  KMP_ARCH_X86) // __kmpc_atomic_float4_sub_cpt
2326 ATOMIC_CMPXCHG_CPT(float8, add_cpt, kmp_real64, 64, +,
2327  KMP_ARCH_X86) // __kmpc_atomic_float8_add_cpt
2328 ATOMIC_CMPXCHG_CPT(float8, sub_cpt, kmp_real64, 64, -,
2329  KMP_ARCH_X86) // __kmpc_atomic_float8_sub_cpt
2330 
2331 // ------------------------------------------------------------------------
2332 // Entries definition for integer operands
2333 // TYPE_ID - operands type and size (fixed4, float4)
2334 // OP_ID - operation identifier (add, sub, mul, ...)
2335 // TYPE - operand type
2336 // BITS - size in bits, used to distinguish low level calls
2337 // OP - operator (used in critical section)
2338 // TYPE_ID,OP_ID, TYPE, BITS,OP,GOMP_FLAG
2339 // ------------------------------------------------------------------------
2340 // Routines for ATOMIC integer operands, other operators
2341 // ------------------------------------------------------------------------
2342 // TYPE_ID,OP_ID, TYPE, OP, GOMP_FLAG
2343 ATOMIC_CMPXCHG_CPT(fixed1, add_cpt, kmp_int8, 8, +,
2344  KMP_ARCH_X86) // __kmpc_atomic_fixed1_add_cpt
2345 ATOMIC_CMPXCHG_CPT(fixed1, andb_cpt, kmp_int8, 8, &,
2346  0) // __kmpc_atomic_fixed1_andb_cpt
2347 ATOMIC_CMPXCHG_CPT(fixed1, div_cpt, kmp_int8, 8, /,
2348  KMP_ARCH_X86) // __kmpc_atomic_fixed1_div_cpt
2349 ATOMIC_CMPXCHG_CPT(fixed1u, div_cpt, kmp_uint8, 8, /,
2350  KMP_ARCH_X86) // __kmpc_atomic_fixed1u_div_cpt
2351 ATOMIC_CMPXCHG_CPT(fixed1, mul_cpt, kmp_int8, 8, *,
2352  KMP_ARCH_X86) // __kmpc_atomic_fixed1_mul_cpt
2353 ATOMIC_CMPXCHG_CPT(fixed1, orb_cpt, kmp_int8, 8, |,
2354  0) // __kmpc_atomic_fixed1_orb_cpt
2355 ATOMIC_CMPXCHG_CPT(fixed1, shl_cpt, kmp_int8, 8, <<,
2356  KMP_ARCH_X86) // __kmpc_atomic_fixed1_shl_cpt
2357 ATOMIC_CMPXCHG_CPT(fixed1, shr_cpt, kmp_int8, 8, >>,
2358  KMP_ARCH_X86) // __kmpc_atomic_fixed1_shr_cpt
2359 ATOMIC_CMPXCHG_CPT(fixed1u, shr_cpt, kmp_uint8, 8, >>,
2360  KMP_ARCH_X86) // __kmpc_atomic_fixed1u_shr_cpt
2361 ATOMIC_CMPXCHG_CPT(fixed1, sub_cpt, kmp_int8, 8, -,
2362  KMP_ARCH_X86) // __kmpc_atomic_fixed1_sub_cpt
2363 ATOMIC_CMPXCHG_CPT(fixed1, xor_cpt, kmp_int8, 8, ^,
2364  0) // __kmpc_atomic_fixed1_xor_cpt
2365 ATOMIC_CMPXCHG_CPT(fixed2, add_cpt, kmp_int16, 16, +,
2366  KMP_ARCH_X86) // __kmpc_atomic_fixed2_add_cpt
2367 ATOMIC_CMPXCHG_CPT(fixed2, andb_cpt, kmp_int16, 16, &,
2368  0) // __kmpc_atomic_fixed2_andb_cpt
2369 ATOMIC_CMPXCHG_CPT(fixed2, div_cpt, kmp_int16, 16, /,
2370  KMP_ARCH_X86) // __kmpc_atomic_fixed2_div_cpt
2371 ATOMIC_CMPXCHG_CPT(fixed2u, div_cpt, kmp_uint16, 16, /,
2372  KMP_ARCH_X86) // __kmpc_atomic_fixed2u_div_cpt
2373 ATOMIC_CMPXCHG_CPT(fixed2, mul_cpt, kmp_int16, 16, *,
2374  KMP_ARCH_X86) // __kmpc_atomic_fixed2_mul_cpt
2375 ATOMIC_CMPXCHG_CPT(fixed2, orb_cpt, kmp_int16, 16, |,
2376  0) // __kmpc_atomic_fixed2_orb_cpt
2377 ATOMIC_CMPXCHG_CPT(fixed2, shl_cpt, kmp_int16, 16, <<,
2378  KMP_ARCH_X86) // __kmpc_atomic_fixed2_shl_cpt
2379 ATOMIC_CMPXCHG_CPT(fixed2, shr_cpt, kmp_int16, 16, >>,
2380  KMP_ARCH_X86) // __kmpc_atomic_fixed2_shr_cpt
2381 ATOMIC_CMPXCHG_CPT(fixed2u, shr_cpt, kmp_uint16, 16, >>,
2382  KMP_ARCH_X86) // __kmpc_atomic_fixed2u_shr_cpt
2383 ATOMIC_CMPXCHG_CPT(fixed2, sub_cpt, kmp_int16, 16, -,
2384  KMP_ARCH_X86) // __kmpc_atomic_fixed2_sub_cpt
2385 ATOMIC_CMPXCHG_CPT(fixed2, xor_cpt, kmp_int16, 16, ^,
2386  0) // __kmpc_atomic_fixed2_xor_cpt
2387 ATOMIC_CMPXCHG_CPT(fixed4, andb_cpt, kmp_int32, 32, &,
2388  0) // __kmpc_atomic_fixed4_andb_cpt
2389 ATOMIC_CMPXCHG_CPT(fixed4, div_cpt, kmp_int32, 32, /,
2390  KMP_ARCH_X86) // __kmpc_atomic_fixed4_div_cpt
2391 ATOMIC_CMPXCHG_CPT(fixed4u, div_cpt, kmp_uint32, 32, /,
2392  KMP_ARCH_X86) // __kmpc_atomic_fixed4u_div_cpt
2393 ATOMIC_CMPXCHG_CPT(fixed4, mul_cpt, kmp_int32, 32, *,
2394  KMP_ARCH_X86) // __kmpc_atomic_fixed4_mul_cpt
2395 ATOMIC_CMPXCHG_CPT(fixed4, orb_cpt, kmp_int32, 32, |,
2396  0) // __kmpc_atomic_fixed4_orb_cpt
2397 ATOMIC_CMPXCHG_CPT(fixed4, shl_cpt, kmp_int32, 32, <<,
2398  KMP_ARCH_X86) // __kmpc_atomic_fixed4_shl_cpt
2399 ATOMIC_CMPXCHG_CPT(fixed4, shr_cpt, kmp_int32, 32, >>,
2400  KMP_ARCH_X86) // __kmpc_atomic_fixed4_shr_cpt
2401 ATOMIC_CMPXCHG_CPT(fixed4u, shr_cpt, kmp_uint32, 32, >>,
2402  KMP_ARCH_X86) // __kmpc_atomic_fixed4u_shr_cpt
2403 ATOMIC_CMPXCHG_CPT(fixed4, xor_cpt, kmp_int32, 32, ^,
2404  0) // __kmpc_atomic_fixed4_xor_cpt
2405 ATOMIC_CMPXCHG_CPT(fixed8, andb_cpt, kmp_int64, 64, &,
2406  KMP_ARCH_X86) // __kmpc_atomic_fixed8_andb_cpt
2407 ATOMIC_CMPXCHG_CPT(fixed8, div_cpt, kmp_int64, 64, /,
2408  KMP_ARCH_X86) // __kmpc_atomic_fixed8_div_cpt
2409 ATOMIC_CMPXCHG_CPT(fixed8u, div_cpt, kmp_uint64, 64, /,
2410  KMP_ARCH_X86) // __kmpc_atomic_fixed8u_div_cpt
2411 ATOMIC_CMPXCHG_CPT(fixed8, mul_cpt, kmp_int64, 64, *,
2412  KMP_ARCH_X86) // __kmpc_atomic_fixed8_mul_cpt
2413 ATOMIC_CMPXCHG_CPT(fixed8, orb_cpt, kmp_int64, 64, |,
2414  KMP_ARCH_X86) // __kmpc_atomic_fixed8_orb_cpt
2415 ATOMIC_CMPXCHG_CPT(fixed8, shl_cpt, kmp_int64, 64, <<,
2416  KMP_ARCH_X86) // __kmpc_atomic_fixed8_shl_cpt
2417 ATOMIC_CMPXCHG_CPT(fixed8, shr_cpt, kmp_int64, 64, >>,
2418  KMP_ARCH_X86) // __kmpc_atomic_fixed8_shr_cpt
2419 ATOMIC_CMPXCHG_CPT(fixed8u, shr_cpt, kmp_uint64, 64, >>,
2420  KMP_ARCH_X86) // __kmpc_atomic_fixed8u_shr_cpt
2421 ATOMIC_CMPXCHG_CPT(fixed8, xor_cpt, kmp_int64, 64, ^,
2422  KMP_ARCH_X86) // __kmpc_atomic_fixed8_xor_cpt
2423 ATOMIC_CMPXCHG_CPT(float4, div_cpt, kmp_real32, 32, /,
2424  KMP_ARCH_X86) // __kmpc_atomic_float4_div_cpt
2425 ATOMIC_CMPXCHG_CPT(float4, mul_cpt, kmp_real32, 32, *,
2426  KMP_ARCH_X86) // __kmpc_atomic_float4_mul_cpt
2427 ATOMIC_CMPXCHG_CPT(float8, div_cpt, kmp_real64, 64, /,
2428  KMP_ARCH_X86) // __kmpc_atomic_float8_div_cpt
2429 ATOMIC_CMPXCHG_CPT(float8, mul_cpt, kmp_real64, 64, *,
2430  KMP_ARCH_X86) // __kmpc_atomic_float8_mul_cpt
2431 // TYPE_ID,OP_ID, TYPE, OP, GOMP_FLAG
2432 
2433 // CAPTURE routines for mixed types RHS=float16
2434 #if KMP_HAVE_QUAD
2435 
2436 // Beginning of a definition (provides name, parameters, gebug trace)
2437 // TYPE_ID - operands type and size (fixed*, fixed*u for signed, unsigned
2438 // fixed)
2439 // OP_ID - operation identifier (add, sub, mul, ...)
2440 // TYPE - operands' type
2441 #define ATOMIC_BEGIN_CPT_MIX(TYPE_ID, OP_ID, TYPE, RTYPE_ID, RTYPE) \
2442  TYPE __kmpc_atomic_##TYPE_ID##_##OP_ID##_##RTYPE_ID( \
2443  ident_t *id_ref, int gtid, TYPE *lhs, RTYPE rhs, int flag) { \
2444  KMP_DEBUG_ASSERT(__kmp_init_serial); \
2445  KA_TRACE(100, \
2446  ("__kmpc_atomic_" #TYPE_ID "_" #OP_ID "_" #RTYPE_ID ": T#%d\n", \
2447  gtid));
2448 
2449 // -------------------------------------------------------------------------
2450 #define ATOMIC_CMPXCHG_CPT_MIX(TYPE_ID, TYPE, OP_ID, BITS, OP, RTYPE_ID, \
2451  RTYPE, LCK_ID, MASK, GOMP_FLAG) \
2452  ATOMIC_BEGIN_CPT_MIX(TYPE_ID, OP_ID, TYPE, RTYPE_ID, RTYPE) \
2453  TYPE new_value; \
2454  (void)new_value; \
2455  OP_GOMP_CRITICAL_CPT(TYPE, OP, GOMP_FLAG) \
2456  OP_CMPXCHG_CPT(TYPE, BITS, OP) \
2457  }
2458 
2459 // -------------------------------------------------------------------------
2460 #define ATOMIC_CRITICAL_CPT_MIX(TYPE_ID, TYPE, OP_ID, OP, RTYPE_ID, RTYPE, \
2461  LCK_ID, GOMP_FLAG) \
2462  ATOMIC_BEGIN_CPT_MIX(TYPE_ID, OP_ID, TYPE, RTYPE_ID, RTYPE) \
2463  TYPE new_value; \
2464  (void)new_value; \
2465  OP_GOMP_CRITICAL_CPT(TYPE, OP, GOMP_FLAG) /* send assignment */ \
2466  OP_UPDATE_CRITICAL_CPT(TYPE, OP, LCK_ID) /* send assignment */ \
2467  }
2468 
2469 ATOMIC_CMPXCHG_CPT_MIX(fixed1, char, add_cpt, 8, +, fp, _Quad, 1i, 0,
2470  KMP_ARCH_X86) // __kmpc_atomic_fixed1_add_cpt_fp
2471 ATOMIC_CMPXCHG_CPT_MIX(fixed1u, uchar, add_cpt, 8, +, fp, _Quad, 1i, 0,
2472  KMP_ARCH_X86) // __kmpc_atomic_fixed1u_add_cpt_fp
2473 ATOMIC_CMPXCHG_CPT_MIX(fixed1, char, sub_cpt, 8, -, fp, _Quad, 1i, 0,
2474  KMP_ARCH_X86) // __kmpc_atomic_fixed1_sub_cpt_fp
2475 ATOMIC_CMPXCHG_CPT_MIX(fixed1u, uchar, sub_cpt, 8, -, fp, _Quad, 1i, 0,
2476  KMP_ARCH_X86) // __kmpc_atomic_fixed1u_sub_cpt_fp
2477 ATOMIC_CMPXCHG_CPT_MIX(fixed1, char, mul_cpt, 8, *, fp, _Quad, 1i, 0,
2478  KMP_ARCH_X86) // __kmpc_atomic_fixed1_mul_cpt_fp
2479 ATOMIC_CMPXCHG_CPT_MIX(fixed1u, uchar, mul_cpt, 8, *, fp, _Quad, 1i, 0,
2480  KMP_ARCH_X86) // __kmpc_atomic_fixed1u_mul_cpt_fp
2481 ATOMIC_CMPXCHG_CPT_MIX(fixed1, char, div_cpt, 8, /, fp, _Quad, 1i, 0,
2482  KMP_ARCH_X86) // __kmpc_atomic_fixed1_div_cpt_fp
2483 ATOMIC_CMPXCHG_CPT_MIX(fixed1u, uchar, div_cpt, 8, /, fp, _Quad, 1i, 0,
2484  KMP_ARCH_X86) // __kmpc_atomic_fixed1u_div_cpt_fp
2485 
2486 ATOMIC_CMPXCHG_CPT_MIX(fixed2, short, add_cpt, 16, +, fp, _Quad, 2i, 1,
2487  KMP_ARCH_X86) // __kmpc_atomic_fixed2_add_cpt_fp
2488 ATOMIC_CMPXCHG_CPT_MIX(fixed2u, ushort, add_cpt, 16, +, fp, _Quad, 2i, 1,
2489  KMP_ARCH_X86) // __kmpc_atomic_fixed2u_add_cpt_fp
2490 ATOMIC_CMPXCHG_CPT_MIX(fixed2, short, sub_cpt, 16, -, fp, _Quad, 2i, 1,
2491  KMP_ARCH_X86) // __kmpc_atomic_fixed2_sub_cpt_fp
2492 ATOMIC_CMPXCHG_CPT_MIX(fixed2u, ushort, sub_cpt, 16, -, fp, _Quad, 2i, 1,
2493  KMP_ARCH_X86) // __kmpc_atomic_fixed2u_sub_cpt_fp
2494 ATOMIC_CMPXCHG_CPT_MIX(fixed2, short, mul_cpt, 16, *, fp, _Quad, 2i, 1,
2495  KMP_ARCH_X86) // __kmpc_atomic_fixed2_mul_cpt_fp
2496 ATOMIC_CMPXCHG_CPT_MIX(fixed2u, ushort, mul_cpt, 16, *, fp, _Quad, 2i, 1,
2497  KMP_ARCH_X86) // __kmpc_atomic_fixed2u_mul_cpt_fp
2498 ATOMIC_CMPXCHG_CPT_MIX(fixed2, short, div_cpt, 16, /, fp, _Quad, 2i, 1,
2499  KMP_ARCH_X86) // __kmpc_atomic_fixed2_div_cpt_fp
2500 ATOMIC_CMPXCHG_CPT_MIX(fixed2u, ushort, div_cpt, 16, /, fp, _Quad, 2i, 1,
2501  KMP_ARCH_X86) // __kmpc_atomic_fixed2u_div_cpt_fp
2502 
2503 ATOMIC_CMPXCHG_CPT_MIX(fixed4, kmp_int32, add_cpt, 32, +, fp, _Quad, 4i, 3,
2504  0) // __kmpc_atomic_fixed4_add_cpt_fp
2505 ATOMIC_CMPXCHG_CPT_MIX(fixed4u, kmp_uint32, add_cpt, 32, +, fp, _Quad, 4i, 3,
2506  0) // __kmpc_atomic_fixed4u_add_cpt_fp
2507 ATOMIC_CMPXCHG_CPT_MIX(fixed4, kmp_int32, sub_cpt, 32, -, fp, _Quad, 4i, 3,
2508  0) // __kmpc_atomic_fixed4_sub_cpt_fp
2509 ATOMIC_CMPXCHG_CPT_MIX(fixed4u, kmp_uint32, sub_cpt, 32, -, fp, _Quad, 4i, 3,
2510  0) // __kmpc_atomic_fixed4u_sub_cpt_fp
2511 ATOMIC_CMPXCHG_CPT_MIX(fixed4, kmp_int32, mul_cpt, 32, *, fp, _Quad, 4i, 3,
2512  0) // __kmpc_atomic_fixed4_mul_cpt_fp
2513 ATOMIC_CMPXCHG_CPT_MIX(fixed4u, kmp_uint32, mul_cpt, 32, *, fp, _Quad, 4i, 3,
2514  0) // __kmpc_atomic_fixed4u_mul_cpt_fp
2515 ATOMIC_CMPXCHG_CPT_MIX(fixed4, kmp_int32, div_cpt, 32, /, fp, _Quad, 4i, 3,
2516  0) // __kmpc_atomic_fixed4_div_cpt_fp
2517 ATOMIC_CMPXCHG_CPT_MIX(fixed4u, kmp_uint32, div_cpt, 32, /, fp, _Quad, 4i, 3,
2518  0) // __kmpc_atomic_fixed4u_div_cpt_fp
2519 
2520 ATOMIC_CMPXCHG_CPT_MIX(fixed8, kmp_int64, add_cpt, 64, +, fp, _Quad, 8i, 7,
2521  KMP_ARCH_X86) // __kmpc_atomic_fixed8_add_cpt_fp
2522 ATOMIC_CMPXCHG_CPT_MIX(fixed8u, kmp_uint64, add_cpt, 64, +, fp, _Quad, 8i, 7,
2523  KMP_ARCH_X86) // __kmpc_atomic_fixed8u_add_cpt_fp
2524 ATOMIC_CMPXCHG_CPT_MIX(fixed8, kmp_int64, sub_cpt, 64, -, fp, _Quad, 8i, 7,
2525  KMP_ARCH_X86) // __kmpc_atomic_fixed8_sub_cpt_fp
2526 ATOMIC_CMPXCHG_CPT_MIX(fixed8u, kmp_uint64, sub_cpt, 64, -, fp, _Quad, 8i, 7,
2527  KMP_ARCH_X86) // __kmpc_atomic_fixed8u_sub_cpt_fp
2528 ATOMIC_CMPXCHG_CPT_MIX(fixed8, kmp_int64, mul_cpt, 64, *, fp, _Quad, 8i, 7,
2529  KMP_ARCH_X86) // __kmpc_atomic_fixed8_mul_cpt_fp
2530 ATOMIC_CMPXCHG_CPT_MIX(fixed8u, kmp_uint64, mul_cpt, 64, *, fp, _Quad, 8i, 7,
2531  KMP_ARCH_X86) // __kmpc_atomic_fixed8u_mul_cpt_fp
2532 ATOMIC_CMPXCHG_CPT_MIX(fixed8, kmp_int64, div_cpt, 64, /, fp, _Quad, 8i, 7,
2533  KMP_ARCH_X86) // __kmpc_atomic_fixed8_div_cpt_fp
2534 ATOMIC_CMPXCHG_CPT_MIX(fixed8u, kmp_uint64, div_cpt, 64, /, fp, _Quad, 8i, 7,
2535  KMP_ARCH_X86) // __kmpc_atomic_fixed8u_div_cpt_fp
2536 
2537 ATOMIC_CMPXCHG_CPT_MIX(float4, kmp_real32, add_cpt, 32, +, fp, _Quad, 4r, 3,
2538  KMP_ARCH_X86) // __kmpc_atomic_float4_add_cpt_fp
2539 ATOMIC_CMPXCHG_CPT_MIX(float4, kmp_real32, sub_cpt, 32, -, fp, _Quad, 4r, 3,
2540  KMP_ARCH_X86) // __kmpc_atomic_float4_sub_cpt_fp
2541 ATOMIC_CMPXCHG_CPT_MIX(float4, kmp_real32, mul_cpt, 32, *, fp, _Quad, 4r, 3,
2542  KMP_ARCH_X86) // __kmpc_atomic_float4_mul_cpt_fp
2543 ATOMIC_CMPXCHG_CPT_MIX(float4, kmp_real32, div_cpt, 32, /, fp, _Quad, 4r, 3,
2544  KMP_ARCH_X86) // __kmpc_atomic_float4_div_cpt_fp
2545 
2546 ATOMIC_CMPXCHG_CPT_MIX(float8, kmp_real64, add_cpt, 64, +, fp, _Quad, 8r, 7,
2547  KMP_ARCH_X86) // __kmpc_atomic_float8_add_cpt_fp
2548 ATOMIC_CMPXCHG_CPT_MIX(float8, kmp_real64, sub_cpt, 64, -, fp, _Quad, 8r, 7,
2549  KMP_ARCH_X86) // __kmpc_atomic_float8_sub_cpt_fp
2550 ATOMIC_CMPXCHG_CPT_MIX(float8, kmp_real64, mul_cpt, 64, *, fp, _Quad, 8r, 7,
2551  KMP_ARCH_X86) // __kmpc_atomic_float8_mul_cpt_fp
2552 ATOMIC_CMPXCHG_CPT_MIX(float8, kmp_real64, div_cpt, 64, /, fp, _Quad, 8r, 7,
2553  KMP_ARCH_X86) // __kmpc_atomic_float8_div_cpt_fp
2554 
2555 ATOMIC_CRITICAL_CPT_MIX(float10, long double, add_cpt, +, fp, _Quad, 10r,
2556  1) // __kmpc_atomic_float10_add_cpt_fp
2557 ATOMIC_CRITICAL_CPT_MIX(float10, long double, sub_cpt, -, fp, _Quad, 10r,
2558  1) // __kmpc_atomic_float10_sub_cpt_fp
2559 ATOMIC_CRITICAL_CPT_MIX(float10, long double, mul_cpt, *, fp, _Quad, 10r,
2560  1) // __kmpc_atomic_float10_mul_cpt_fp
2561 ATOMIC_CRITICAL_CPT_MIX(float10, long double, div_cpt, /, fp, _Quad, 10r,
2562  1) // __kmpc_atomic_float10_div_cpt_fp
2563 
2564 #endif // KMP_HAVE_QUAD
2565 
2566 // ------------------------------------------------------------------------
2567 // Routines for C/C++ Reduction operators && and ||
2568 
2569 // -------------------------------------------------------------------------
2570 // Operation on *lhs, rhs bound by critical section
2571 // OP - operator (it's supposed to contain an assignment)
2572 // LCK_ID - lock identifier
2573 // Note: don't check gtid as it should always be valid
2574 // 1, 2-byte - expect valid parameter, other - check before this macro
2575 #define OP_CRITICAL_L_CPT(OP, LCK_ID) \
2576  __kmp_acquire_atomic_lock(&ATOMIC_LOCK##LCK_ID, gtid); \
2577  \
2578  if (flag) { \
2579  new_value OP rhs; \
2580  (*lhs) = new_value; \
2581  } else { \
2582  new_value = (*lhs); \
2583  (*lhs) OP rhs; \
2584  } \
2585  \
2586  __kmp_release_atomic_lock(&ATOMIC_LOCK##LCK_ID, gtid);
2587 
2588 // ------------------------------------------------------------------------
2589 #ifdef KMP_GOMP_COMPAT
2590 #define OP_GOMP_CRITICAL_L_CPT(OP, FLAG) \
2591  if ((FLAG) && (__kmp_atomic_mode == 2)) { \
2592  KMP_CHECK_GTID; \
2593  OP_CRITICAL_L_CPT(OP, 0); \
2594  return new_value; \
2595  }
2596 #else
2597 #define OP_GOMP_CRITICAL_L_CPT(OP, FLAG)
2598 #endif /* KMP_GOMP_COMPAT */
2599 
2600 // ------------------------------------------------------------------------
2601 // Need separate macros for &&, || because there is no combined assignment
2602 #define ATOMIC_CMPX_L_CPT(TYPE_ID, OP_ID, TYPE, BITS, OP, GOMP_FLAG) \
2603  ATOMIC_BEGIN_CPT(TYPE_ID, OP_ID, TYPE, TYPE) \
2604  TYPE new_value; \
2605  (void)new_value; \
2606  OP_GOMP_CRITICAL_L_CPT(= *lhs OP, GOMP_FLAG) \
2607  OP_CMPXCHG_CPT(TYPE, BITS, OP) \
2608  }
2609 
2610 ATOMIC_CMPX_L_CPT(fixed1, andl_cpt, char, 8, &&,
2611  KMP_ARCH_X86) // __kmpc_atomic_fixed1_andl_cpt
2612 ATOMIC_CMPX_L_CPT(fixed1, orl_cpt, char, 8, ||,
2613  KMP_ARCH_X86) // __kmpc_atomic_fixed1_orl_cpt
2614 ATOMIC_CMPX_L_CPT(fixed2, andl_cpt, short, 16, &&,
2615  KMP_ARCH_X86) // __kmpc_atomic_fixed2_andl_cpt
2616 ATOMIC_CMPX_L_CPT(fixed2, orl_cpt, short, 16, ||,
2617  KMP_ARCH_X86) // __kmpc_atomic_fixed2_orl_cpt
2618 ATOMIC_CMPX_L_CPT(fixed4, andl_cpt, kmp_int32, 32, &&,
2619  0) // __kmpc_atomic_fixed4_andl_cpt
2620 ATOMIC_CMPX_L_CPT(fixed4, orl_cpt, kmp_int32, 32, ||,
2621  0) // __kmpc_atomic_fixed4_orl_cpt
2622 ATOMIC_CMPX_L_CPT(fixed8, andl_cpt, kmp_int64, 64, &&,
2623  KMP_ARCH_X86) // __kmpc_atomic_fixed8_andl_cpt
2624 ATOMIC_CMPX_L_CPT(fixed8, orl_cpt, kmp_int64, 64, ||,
2625  KMP_ARCH_X86) // __kmpc_atomic_fixed8_orl_cpt
2626 
2627 // -------------------------------------------------------------------------
2628 // Routines for Fortran operators that matched no one in C:
2629 // MAX, MIN, .EQV., .NEQV.
2630 // Operators .AND., .OR. are covered by __kmpc_atomic_*_{andl,orl}_cpt
2631 // Intrinsics IAND, IOR, IEOR are covered by __kmpc_atomic_*_{andb,orb,xor}_cpt
2632 
2633 // -------------------------------------------------------------------------
2634 // MIN and MAX need separate macros
2635 // OP - operator to check if we need any actions?
2636 #define MIN_MAX_CRITSECT_CPT(OP, LCK_ID) \
2637  __kmp_acquire_atomic_lock(&ATOMIC_LOCK##LCK_ID, gtid); \
2638  \
2639  if (*lhs OP rhs) { /* still need actions? */ \
2640  old_value = *lhs; \
2641  *lhs = rhs; \
2642  if (flag) \
2643  new_value = rhs; \
2644  else \
2645  new_value = old_value; \
2646  } else { \
2647  new_value = *lhs; \
2648  } \
2649  __kmp_release_atomic_lock(&ATOMIC_LOCK##LCK_ID, gtid); \
2650  return new_value;
2651 
2652 // -------------------------------------------------------------------------
2653 #ifdef KMP_GOMP_COMPAT
2654 #define GOMP_MIN_MAX_CRITSECT_CPT(OP, FLAG) \
2655  if ((FLAG) && (__kmp_atomic_mode == 2)) { \
2656  KMP_CHECK_GTID; \
2657  MIN_MAX_CRITSECT_CPT(OP, 0); \
2658  }
2659 #else
2660 #define GOMP_MIN_MAX_CRITSECT_CPT(OP, FLAG)
2661 #endif /* KMP_GOMP_COMPAT */
2662 
2663 // -------------------------------------------------------------------------
2664 #define MIN_MAX_CMPXCHG_CPT(TYPE, BITS, OP) \
2665  { \
2666  TYPE KMP_ATOMIC_VOLATILE temp_val; \
2667  /*TYPE old_value; */ \
2668  temp_val = *lhs; \
2669  old_value = temp_val; \
2670  while (old_value OP rhs && /* still need actions? */ \
2671  !KMP_COMPARE_AND_STORE_ACQ##BITS( \
2672  (kmp_int##BITS *)lhs, \
2673  *VOLATILE_CAST(kmp_int##BITS *) & old_value, \
2674  *VOLATILE_CAST(kmp_int##BITS *) & rhs)) { \
2675  temp_val = *lhs; \
2676  old_value = temp_val; \
2677  } \
2678  if (flag) \
2679  return rhs; \
2680  else \
2681  return old_value; \
2682  }
2683 
2684 // -------------------------------------------------------------------------
2685 // 1-byte, 2-byte operands - use critical section
2686 #define MIN_MAX_CRITICAL_CPT(TYPE_ID, OP_ID, TYPE, OP, LCK_ID, GOMP_FLAG) \
2687  ATOMIC_BEGIN_CPT(TYPE_ID, OP_ID, TYPE, TYPE) \
2688  TYPE new_value, old_value; \
2689  if (*lhs OP rhs) { /* need actions? */ \
2690  GOMP_MIN_MAX_CRITSECT_CPT(OP, GOMP_FLAG) \
2691  MIN_MAX_CRITSECT_CPT(OP, LCK_ID) \
2692  } \
2693  return *lhs; \
2694  }
2695 
2696 #define MIN_MAX_COMPXCHG_CPT(TYPE_ID, OP_ID, TYPE, BITS, OP, GOMP_FLAG) \
2697  ATOMIC_BEGIN_CPT(TYPE_ID, OP_ID, TYPE, TYPE) \
2698  TYPE new_value, old_value; \
2699  (void)new_value; \
2700  if (*lhs OP rhs) { \
2701  GOMP_MIN_MAX_CRITSECT_CPT(OP, GOMP_FLAG) \
2702  MIN_MAX_CMPXCHG_CPT(TYPE, BITS, OP) \
2703  } \
2704  return *lhs; \
2705  }
2706 
2707 MIN_MAX_COMPXCHG_CPT(fixed1, max_cpt, char, 8, <,
2708  KMP_ARCH_X86) // __kmpc_atomic_fixed1_max_cpt
2709 MIN_MAX_COMPXCHG_CPT(fixed1, min_cpt, char, 8, >,
2710  KMP_ARCH_X86) // __kmpc_atomic_fixed1_min_cpt
2711 MIN_MAX_COMPXCHG_CPT(fixed2, max_cpt, short, 16, <,
2712  KMP_ARCH_X86) // __kmpc_atomic_fixed2_max_cpt
2713 MIN_MAX_COMPXCHG_CPT(fixed2, min_cpt, short, 16, >,
2714  KMP_ARCH_X86) // __kmpc_atomic_fixed2_min_cpt
2715 MIN_MAX_COMPXCHG_CPT(fixed4, max_cpt, kmp_int32, 32, <,
2716  0) // __kmpc_atomic_fixed4_max_cpt
2717 MIN_MAX_COMPXCHG_CPT(fixed4, min_cpt, kmp_int32, 32, >,
2718  0) // __kmpc_atomic_fixed4_min_cpt
2719 MIN_MAX_COMPXCHG_CPT(fixed8, max_cpt, kmp_int64, 64, <,
2720  KMP_ARCH_X86) // __kmpc_atomic_fixed8_max_cpt
2721 MIN_MAX_COMPXCHG_CPT(fixed8, min_cpt, kmp_int64, 64, >,
2722  KMP_ARCH_X86) // __kmpc_atomic_fixed8_min_cpt
2723 MIN_MAX_COMPXCHG_CPT(float4, max_cpt, kmp_real32, 32, <,
2724  KMP_ARCH_X86) // __kmpc_atomic_float4_max_cpt
2725 MIN_MAX_COMPXCHG_CPT(float4, min_cpt, kmp_real32, 32, >,
2726  KMP_ARCH_X86) // __kmpc_atomic_float4_min_cpt
2727 MIN_MAX_COMPXCHG_CPT(float8, max_cpt, kmp_real64, 64, <,
2728  KMP_ARCH_X86) // __kmpc_atomic_float8_max_cpt
2729 MIN_MAX_COMPXCHG_CPT(float8, min_cpt, kmp_real64, 64, >,
2730  KMP_ARCH_X86) // __kmpc_atomic_float8_min_cpt
2731 MIN_MAX_CRITICAL_CPT(float10, max_cpt, long double, <, 10r,
2732  1) // __kmpc_atomic_float10_max_cpt
2733 MIN_MAX_CRITICAL_CPT(float10, min_cpt, long double, >, 10r,
2734  1) // __kmpc_atomic_float10_min_cpt
2735 #if KMP_HAVE_QUAD
2736 MIN_MAX_CRITICAL_CPT(float16, max_cpt, QUAD_LEGACY, <, 16r,
2737  1) // __kmpc_atomic_float16_max_cpt
2738 MIN_MAX_CRITICAL_CPT(float16, min_cpt, QUAD_LEGACY, >, 16r,
2739  1) // __kmpc_atomic_float16_min_cpt
2740 #if (KMP_ARCH_X86)
2741 MIN_MAX_CRITICAL_CPT(float16, max_a16_cpt, Quad_a16_t, <, 16r,
2742  1) // __kmpc_atomic_float16_max_a16_cpt
2743 MIN_MAX_CRITICAL_CPT(float16, min_a16_cpt, Quad_a16_t, >, 16r,
2744  1) // __kmpc_atomic_float16_mix_a16_cpt
2745 #endif // (KMP_ARCH_X86)
2746 #endif // KMP_HAVE_QUAD
2747 
2748 // ------------------------------------------------------------------------
2749 #ifdef KMP_GOMP_COMPAT
2750 #define OP_GOMP_CRITICAL_EQV_CPT(OP, FLAG) \
2751  if ((FLAG) && (__kmp_atomic_mode == 2)) { \
2752  KMP_CHECK_GTID; \
2753  OP_CRITICAL_CPT(OP, 0); \
2754  }
2755 #else
2756 #define OP_GOMP_CRITICAL_EQV_CPT(OP, FLAG)
2757 #endif /* KMP_GOMP_COMPAT */
2758 // ------------------------------------------------------------------------
2759 #define ATOMIC_CMPX_EQV_CPT(TYPE_ID, OP_ID, TYPE, BITS, OP, GOMP_FLAG) \
2760  ATOMIC_BEGIN_CPT(TYPE_ID, OP_ID, TYPE, TYPE) \
2761  TYPE new_value; \
2762  (void)new_value; \
2763  OP_GOMP_CRITICAL_EQV_CPT(^= (TYPE) ~, GOMP_FLAG) /* send assignment */ \
2764  OP_CMPXCHG_CPT(TYPE, BITS, OP) \
2765  }
2766 
2767 // ------------------------------------------------------------------------
2768 
2769 ATOMIC_CMPXCHG_CPT(fixed1, neqv_cpt, kmp_int8, 8, ^,
2770  KMP_ARCH_X86) // __kmpc_atomic_fixed1_neqv_cpt
2771 ATOMIC_CMPXCHG_CPT(fixed2, neqv_cpt, kmp_int16, 16, ^,
2772  KMP_ARCH_X86) // __kmpc_atomic_fixed2_neqv_cpt
2773 ATOMIC_CMPXCHG_CPT(fixed4, neqv_cpt, kmp_int32, 32, ^,
2774  KMP_ARCH_X86) // __kmpc_atomic_fixed4_neqv_cpt
2775 ATOMIC_CMPXCHG_CPT(fixed8, neqv_cpt, kmp_int64, 64, ^,
2776  KMP_ARCH_X86) // __kmpc_atomic_fixed8_neqv_cpt
2777 ATOMIC_CMPX_EQV_CPT(fixed1, eqv_cpt, kmp_int8, 8, ^~,
2778  KMP_ARCH_X86) // __kmpc_atomic_fixed1_eqv_cpt
2779 ATOMIC_CMPX_EQV_CPT(fixed2, eqv_cpt, kmp_int16, 16, ^~,
2780  KMP_ARCH_X86) // __kmpc_atomic_fixed2_eqv_cpt
2781 ATOMIC_CMPX_EQV_CPT(fixed4, eqv_cpt, kmp_int32, 32, ^~,
2782  KMP_ARCH_X86) // __kmpc_atomic_fixed4_eqv_cpt
2783 ATOMIC_CMPX_EQV_CPT(fixed8, eqv_cpt, kmp_int64, 64, ^~,
2784  KMP_ARCH_X86) // __kmpc_atomic_fixed8_eqv_cpt
2785 
2786 // ------------------------------------------------------------------------
2787 // Routines for Extended types: long double, _Quad, complex flavours (use
2788 // critical section)
2789 // TYPE_ID, OP_ID, TYPE - detailed above
2790 // OP - operator
2791 // LCK_ID - lock identifier, used to possibly distinguish lock variable
2792 #define ATOMIC_CRITICAL_CPT(TYPE_ID, OP_ID, TYPE, OP, LCK_ID, GOMP_FLAG) \
2793  ATOMIC_BEGIN_CPT(TYPE_ID, OP_ID, TYPE, TYPE) \
2794  TYPE new_value; \
2795  OP_GOMP_CRITICAL_CPT(TYPE, OP, GOMP_FLAG) /* send assignment */ \
2796  OP_UPDATE_CRITICAL_CPT(TYPE, OP, LCK_ID) /* send assignment */ \
2797  }
2798 
2799 // ------------------------------------------------------------------------
2800 // Workaround for cmplx4. Regular routines with return value don't work
2801 // on Win_32e. Let's return captured values through the additional parameter.
2802 #define OP_CRITICAL_CPT_WRK(OP, LCK_ID) \
2803  __kmp_acquire_atomic_lock(&ATOMIC_LOCK##LCK_ID, gtid); \
2804  \
2805  if (flag) { \
2806  (*lhs) OP rhs; \
2807  (*out) = (*lhs); \
2808  } else { \
2809  (*out) = (*lhs); \
2810  (*lhs) OP rhs; \
2811  } \
2812  \
2813  __kmp_release_atomic_lock(&ATOMIC_LOCK##LCK_ID, gtid); \
2814  return;
2815 // ------------------------------------------------------------------------
2816 
2817 #ifdef KMP_GOMP_COMPAT
2818 #define OP_GOMP_CRITICAL_CPT_WRK(OP, FLAG) \
2819  if ((FLAG) && (__kmp_atomic_mode == 2)) { \
2820  KMP_CHECK_GTID; \
2821  OP_CRITICAL_CPT_WRK(OP## =, 0); \
2822  }
2823 #else
2824 #define OP_GOMP_CRITICAL_CPT_WRK(OP, FLAG)
2825 #endif /* KMP_GOMP_COMPAT */
2826 // ------------------------------------------------------------------------
2827 
2828 #define ATOMIC_BEGIN_WRK(TYPE_ID, OP_ID, TYPE) \
2829  void __kmpc_atomic_##TYPE_ID##_##OP_ID(ident_t *id_ref, int gtid, TYPE *lhs, \
2830  TYPE rhs, TYPE *out, int flag) { \
2831  KMP_DEBUG_ASSERT(__kmp_init_serial); \
2832  KA_TRACE(100, ("__kmpc_atomic_" #TYPE_ID "_" #OP_ID ": T#%d\n", gtid));
2833 // ------------------------------------------------------------------------
2834 
2835 #define ATOMIC_CRITICAL_CPT_WRK(TYPE_ID, OP_ID, TYPE, OP, LCK_ID, GOMP_FLAG) \
2836  ATOMIC_BEGIN_WRK(TYPE_ID, OP_ID, TYPE) \
2837  OP_GOMP_CRITICAL_CPT_WRK(OP, GOMP_FLAG) \
2838  OP_CRITICAL_CPT_WRK(OP## =, LCK_ID) \
2839  }
2840 // The end of workaround for cmplx4
2841 
2842 /* ------------------------------------------------------------------------- */
2843 // routines for long double type
2844 ATOMIC_CRITICAL_CPT(float10, add_cpt, long double, +, 10r,
2845  1) // __kmpc_atomic_float10_add_cpt
2846 ATOMIC_CRITICAL_CPT(float10, sub_cpt, long double, -, 10r,
2847  1) // __kmpc_atomic_float10_sub_cpt
2848 ATOMIC_CRITICAL_CPT(float10, mul_cpt, long double, *, 10r,
2849  1) // __kmpc_atomic_float10_mul_cpt
2850 ATOMIC_CRITICAL_CPT(float10, div_cpt, long double, /, 10r,
2851  1) // __kmpc_atomic_float10_div_cpt
2852 #if KMP_HAVE_QUAD
2853 // routines for _Quad type
2854 ATOMIC_CRITICAL_CPT(float16, add_cpt, QUAD_LEGACY, +, 16r,
2855  1) // __kmpc_atomic_float16_add_cpt
2856 ATOMIC_CRITICAL_CPT(float16, sub_cpt, QUAD_LEGACY, -, 16r,
2857  1) // __kmpc_atomic_float16_sub_cpt
2858 ATOMIC_CRITICAL_CPT(float16, mul_cpt, QUAD_LEGACY, *, 16r,
2859  1) // __kmpc_atomic_float16_mul_cpt
2860 ATOMIC_CRITICAL_CPT(float16, div_cpt, QUAD_LEGACY, /, 16r,
2861  1) // __kmpc_atomic_float16_div_cpt
2862 #if (KMP_ARCH_X86)
2863 ATOMIC_CRITICAL_CPT(float16, add_a16_cpt, Quad_a16_t, +, 16r,
2864  1) // __kmpc_atomic_float16_add_a16_cpt
2865 ATOMIC_CRITICAL_CPT(float16, sub_a16_cpt, Quad_a16_t, -, 16r,
2866  1) // __kmpc_atomic_float16_sub_a16_cpt
2867 ATOMIC_CRITICAL_CPT(float16, mul_a16_cpt, Quad_a16_t, *, 16r,
2868  1) // __kmpc_atomic_float16_mul_a16_cpt
2869 ATOMIC_CRITICAL_CPT(float16, div_a16_cpt, Quad_a16_t, /, 16r,
2870  1) // __kmpc_atomic_float16_div_a16_cpt
2871 #endif // (KMP_ARCH_X86)
2872 #endif // KMP_HAVE_QUAD
2873 
2874 // routines for complex types
2875 
2876 // cmplx4 routines to return void
2877 ATOMIC_CRITICAL_CPT_WRK(cmplx4, add_cpt, kmp_cmplx32, +, 8c,
2878  1) // __kmpc_atomic_cmplx4_add_cpt
2879 ATOMIC_CRITICAL_CPT_WRK(cmplx4, sub_cpt, kmp_cmplx32, -, 8c,
2880  1) // __kmpc_atomic_cmplx4_sub_cpt
2881 ATOMIC_CRITICAL_CPT_WRK(cmplx4, mul_cpt, kmp_cmplx32, *, 8c,
2882  1) // __kmpc_atomic_cmplx4_mul_cpt
2883 ATOMIC_CRITICAL_CPT_WRK(cmplx4, div_cpt, kmp_cmplx32, /, 8c,
2884  1) // __kmpc_atomic_cmplx4_div_cpt
2885 
2886 ATOMIC_CRITICAL_CPT(cmplx8, add_cpt, kmp_cmplx64, +, 16c,
2887  1) // __kmpc_atomic_cmplx8_add_cpt
2888 ATOMIC_CRITICAL_CPT(cmplx8, sub_cpt, kmp_cmplx64, -, 16c,
2889  1) // __kmpc_atomic_cmplx8_sub_cpt
2890 ATOMIC_CRITICAL_CPT(cmplx8, mul_cpt, kmp_cmplx64, *, 16c,
2891  1) // __kmpc_atomic_cmplx8_mul_cpt
2892 ATOMIC_CRITICAL_CPT(cmplx8, div_cpt, kmp_cmplx64, /, 16c,
2893  1) // __kmpc_atomic_cmplx8_div_cpt
2894 ATOMIC_CRITICAL_CPT(cmplx10, add_cpt, kmp_cmplx80, +, 20c,
2895  1) // __kmpc_atomic_cmplx10_add_cpt
2896 ATOMIC_CRITICAL_CPT(cmplx10, sub_cpt, kmp_cmplx80, -, 20c,
2897  1) // __kmpc_atomic_cmplx10_sub_cpt
2898 ATOMIC_CRITICAL_CPT(cmplx10, mul_cpt, kmp_cmplx80, *, 20c,
2899  1) // __kmpc_atomic_cmplx10_mul_cpt
2900 ATOMIC_CRITICAL_CPT(cmplx10, div_cpt, kmp_cmplx80, /, 20c,
2901  1) // __kmpc_atomic_cmplx10_div_cpt
2902 #if KMP_HAVE_QUAD
2903 ATOMIC_CRITICAL_CPT(cmplx16, add_cpt, CPLX128_LEG, +, 32c,
2904  1) // __kmpc_atomic_cmplx16_add_cpt
2905 ATOMIC_CRITICAL_CPT(cmplx16, sub_cpt, CPLX128_LEG, -, 32c,
2906  1) // __kmpc_atomic_cmplx16_sub_cpt
2907 ATOMIC_CRITICAL_CPT(cmplx16, mul_cpt, CPLX128_LEG, *, 32c,
2908  1) // __kmpc_atomic_cmplx16_mul_cpt
2909 ATOMIC_CRITICAL_CPT(cmplx16, div_cpt, CPLX128_LEG, /, 32c,
2910  1) // __kmpc_atomic_cmplx16_div_cpt
2911 #if (KMP_ARCH_X86)
2912 ATOMIC_CRITICAL_CPT(cmplx16, add_a16_cpt, kmp_cmplx128_a16_t, +, 32c,
2913  1) // __kmpc_atomic_cmplx16_add_a16_cpt
2914 ATOMIC_CRITICAL_CPT(cmplx16, sub_a16_cpt, kmp_cmplx128_a16_t, -, 32c,
2915  1) // __kmpc_atomic_cmplx16_sub_a16_cpt
2916 ATOMIC_CRITICAL_CPT(cmplx16, mul_a16_cpt, kmp_cmplx128_a16_t, *, 32c,
2917  1) // __kmpc_atomic_cmplx16_mul_a16_cpt
2918 ATOMIC_CRITICAL_CPT(cmplx16, div_a16_cpt, kmp_cmplx128_a16_t, /, 32c,
2919  1) // __kmpc_atomic_cmplx16_div_a16_cpt
2920 #endif // (KMP_ARCH_X86)
2921 #endif // KMP_HAVE_QUAD
2922 
2923 // OpenMP 4.0: v = x = expr binop x; { v = x; x = expr binop x; } { x = expr
2924 // binop x; v = x; } for non-commutative operations.
2925 // Supported only on IA-32 architecture and Intel(R) 64
2926 
2927 #if KMP_ARCH_X86 || KMP_ARCH_X86_64
2928 // -------------------------------------------------------------------------
2929 // Operation on *lhs, rhs bound by critical section
2930 // OP - operator (it's supposed to contain an assignment)
2931 // LCK_ID - lock identifier
2932 // Note: don't check gtid as it should always be valid
2933 // 1, 2-byte - expect valid parameter, other - check before this macro
2934 #define OP_CRITICAL_CPT_REV(TYPE, OP, LCK_ID) \
2935  __kmp_acquire_atomic_lock(&ATOMIC_LOCK##LCK_ID, gtid); \
2936  \
2937  if (flag) { \
2938  /*temp_val = (*lhs);*/ \
2939  (*lhs) = (TYPE)((rhs)OP(*lhs)); \
2940  new_value = (*lhs); \
2941  } else { \
2942  new_value = (*lhs); \
2943  (*lhs) = (TYPE)((rhs)OP(*lhs)); \
2944  } \
2945  __kmp_release_atomic_lock(&ATOMIC_LOCK##LCK_ID, gtid); \
2946  return new_value;
2947 
2948 // ------------------------------------------------------------------------
2949 #ifdef KMP_GOMP_COMPAT
2950 #define OP_GOMP_CRITICAL_CPT_REV(TYPE, OP, FLAG) \
2951  if ((FLAG) && (__kmp_atomic_mode == 2)) { \
2952  KMP_CHECK_GTID; \
2953  OP_CRITICAL_CPT_REV(TYPE, OP, 0); \
2954  }
2955 #else
2956 #define OP_GOMP_CRITICAL_CPT_REV(TYPE, OP, FLAG)
2957 #endif /* KMP_GOMP_COMPAT */
2958 
2959 // ------------------------------------------------------------------------
2960 // Operation on *lhs, rhs using "compare_and_store" routine
2961 // TYPE - operands' type
2962 // BITS - size in bits, used to distinguish low level calls
2963 // OP - operator
2964 // Note: temp_val introduced in order to force the compiler to read
2965 // *lhs only once (w/o it the compiler reads *lhs twice)
2966 #define OP_CMPXCHG_CPT_REV(TYPE, BITS, OP) \
2967  { \
2968  TYPE KMP_ATOMIC_VOLATILE temp_val; \
2969  TYPE old_value, new_value; \
2970  temp_val = *lhs; \
2971  old_value = temp_val; \
2972  new_value = (TYPE)(rhs OP old_value); \
2973  while (!KMP_COMPARE_AND_STORE_ACQ##BITS( \
2974  (kmp_int##BITS *)lhs, *VOLATILE_CAST(kmp_int##BITS *) & old_value, \
2975  *VOLATILE_CAST(kmp_int##BITS *) & new_value)) { \
2976  temp_val = *lhs; \
2977  old_value = temp_val; \
2978  new_value = (TYPE)(rhs OP old_value); \
2979  } \
2980  if (flag) { \
2981  return new_value; \
2982  } else \
2983  return old_value; \
2984  }
2985 
2986 // -------------------------------------------------------------------------
2987 #define ATOMIC_CMPXCHG_CPT_REV(TYPE_ID, OP_ID, TYPE, BITS, OP, GOMP_FLAG) \
2988  ATOMIC_BEGIN_CPT(TYPE_ID, OP_ID, TYPE, TYPE) \
2989  TYPE new_value; \
2990  (void)new_value; \
2991  OP_GOMP_CRITICAL_CPT_REV(TYPE, OP, GOMP_FLAG) \
2992  OP_CMPXCHG_CPT_REV(TYPE, BITS, OP) \
2993  }
2994 
2995 ATOMIC_CMPXCHG_CPT_REV(fixed1, div_cpt_rev, kmp_int8, 8, /,
2996  KMP_ARCH_X86) // __kmpc_atomic_fixed1_div_cpt_rev
2997 ATOMIC_CMPXCHG_CPT_REV(fixed1u, div_cpt_rev, kmp_uint8, 8, /,
2998  KMP_ARCH_X86) // __kmpc_atomic_fixed1u_div_cpt_rev
2999 ATOMIC_CMPXCHG_CPT_REV(fixed1, shl_cpt_rev, kmp_int8, 8, <<,
3000  KMP_ARCH_X86) // __kmpc_atomic_fixed1_shl_cpt_rev
3001 ATOMIC_CMPXCHG_CPT_REV(fixed1, shr_cpt_rev, kmp_int8, 8, >>,
3002  KMP_ARCH_X86) // __kmpc_atomic_fixed1_shr_cpt_rev
3003 ATOMIC_CMPXCHG_CPT_REV(fixed1u, shr_cpt_rev, kmp_uint8, 8, >>,
3004  KMP_ARCH_X86) // __kmpc_atomic_fixed1u_shr_cpt_rev
3005 ATOMIC_CMPXCHG_CPT_REV(fixed1, sub_cpt_rev, kmp_int8, 8, -,
3006  KMP_ARCH_X86) // __kmpc_atomic_fixed1_sub_cpt_rev
3007 ATOMIC_CMPXCHG_CPT_REV(fixed2, div_cpt_rev, kmp_int16, 16, /,
3008  KMP_ARCH_X86) // __kmpc_atomic_fixed2_div_cpt_rev
3009 ATOMIC_CMPXCHG_CPT_REV(fixed2u, div_cpt_rev, kmp_uint16, 16, /,
3010  KMP_ARCH_X86) // __kmpc_atomic_fixed2u_div_cpt_rev
3011 ATOMIC_CMPXCHG_CPT_REV(fixed2, shl_cpt_rev, kmp_int16, 16, <<,
3012  KMP_ARCH_X86) // __kmpc_atomic_fixed2_shl_cpt_rev
3013 ATOMIC_CMPXCHG_CPT_REV(fixed2, shr_cpt_rev, kmp_int16, 16, >>,
3014  KMP_ARCH_X86) // __kmpc_atomic_fixed2_shr_cpt_rev
3015 ATOMIC_CMPXCHG_CPT_REV(fixed2u, shr_cpt_rev, kmp_uint16, 16, >>,
3016  KMP_ARCH_X86) // __kmpc_atomic_fixed2u_shr_cpt_rev
3017 ATOMIC_CMPXCHG_CPT_REV(fixed2, sub_cpt_rev, kmp_int16, 16, -,
3018  KMP_ARCH_X86) // __kmpc_atomic_fixed2_sub_cpt_rev
3019 ATOMIC_CMPXCHG_CPT_REV(fixed4, div_cpt_rev, kmp_int32, 32, /,
3020  KMP_ARCH_X86) // __kmpc_atomic_fixed4_div_cpt_rev
3021 ATOMIC_CMPXCHG_CPT_REV(fixed4u, div_cpt_rev, kmp_uint32, 32, /,
3022  KMP_ARCH_X86) // __kmpc_atomic_fixed4u_div_cpt_rev
3023 ATOMIC_CMPXCHG_CPT_REV(fixed4, shl_cpt_rev, kmp_int32, 32, <<,
3024  KMP_ARCH_X86) // __kmpc_atomic_fixed4_shl_cpt_rev
3025 ATOMIC_CMPXCHG_CPT_REV(fixed4, shr_cpt_rev, kmp_int32, 32, >>,
3026  KMP_ARCH_X86) // __kmpc_atomic_fixed4_shr_cpt_rev
3027 ATOMIC_CMPXCHG_CPT_REV(fixed4u, shr_cpt_rev, kmp_uint32, 32, >>,
3028  KMP_ARCH_X86) // __kmpc_atomic_fixed4u_shr_cpt_rev
3029 ATOMIC_CMPXCHG_CPT_REV(fixed4, sub_cpt_rev, kmp_int32, 32, -,
3030  KMP_ARCH_X86) // __kmpc_atomic_fixed4_sub_cpt_rev
3031 ATOMIC_CMPXCHG_CPT_REV(fixed8, div_cpt_rev, kmp_int64, 64, /,
3032  KMP_ARCH_X86) // __kmpc_atomic_fixed8_div_cpt_rev
3033 ATOMIC_CMPXCHG_CPT_REV(fixed8u, div_cpt_rev, kmp_uint64, 64, /,
3034  KMP_ARCH_X86) // __kmpc_atomic_fixed8u_div_cpt_rev
3035 ATOMIC_CMPXCHG_CPT_REV(fixed8, shl_cpt_rev, kmp_int64, 64, <<,
3036  KMP_ARCH_X86) // __kmpc_atomic_fixed8_shl_cpt_rev
3037 ATOMIC_CMPXCHG_CPT_REV(fixed8, shr_cpt_rev, kmp_int64, 64, >>,
3038  KMP_ARCH_X86) // __kmpc_atomic_fixed8_shr_cpt_rev
3039 ATOMIC_CMPXCHG_CPT_REV(fixed8u, shr_cpt_rev, kmp_uint64, 64, >>,
3040  KMP_ARCH_X86) // __kmpc_atomic_fixed8u_shr_cpt_rev
3041 ATOMIC_CMPXCHG_CPT_REV(fixed8, sub_cpt_rev, kmp_int64, 64, -,
3042  KMP_ARCH_X86) // __kmpc_atomic_fixed8_sub_cpt_rev
3043 ATOMIC_CMPXCHG_CPT_REV(float4, div_cpt_rev, kmp_real32, 32, /,
3044  KMP_ARCH_X86) // __kmpc_atomic_float4_div_cpt_rev
3045 ATOMIC_CMPXCHG_CPT_REV(float4, sub_cpt_rev, kmp_real32, 32, -,
3046  KMP_ARCH_X86) // __kmpc_atomic_float4_sub_cpt_rev
3047 ATOMIC_CMPXCHG_CPT_REV(float8, div_cpt_rev, kmp_real64, 64, /,
3048  KMP_ARCH_X86) // __kmpc_atomic_float8_div_cpt_rev
3049 ATOMIC_CMPXCHG_CPT_REV(float8, sub_cpt_rev, kmp_real64, 64, -,
3050  KMP_ARCH_X86) // __kmpc_atomic_float8_sub_cpt_rev
3051 // TYPE_ID,OP_ID, TYPE, OP, GOMP_FLAG
3052 
3053 // ------------------------------------------------------------------------
3054 // Routines for Extended types: long double, _Quad, complex flavours (use
3055 // critical section)
3056 // TYPE_ID, OP_ID, TYPE - detailed above
3057 // OP - operator
3058 // LCK_ID - lock identifier, used to possibly distinguish lock variable
3059 #define ATOMIC_CRITICAL_CPT_REV(TYPE_ID, OP_ID, TYPE, OP, LCK_ID, GOMP_FLAG) \
3060  ATOMIC_BEGIN_CPT(TYPE_ID, OP_ID, TYPE, TYPE) \
3061  TYPE new_value; \
3062  /*printf("__kmp_atomic_mode = %d\n", __kmp_atomic_mode);*/ \
3063  OP_GOMP_CRITICAL_CPT_REV(TYPE, OP, GOMP_FLAG) \
3064  OP_CRITICAL_CPT_REV(TYPE, OP, LCK_ID) \
3065  }
3066 
3067 /* ------------------------------------------------------------------------- */
3068 // routines for long double type
3069 ATOMIC_CRITICAL_CPT_REV(float10, sub_cpt_rev, long double, -, 10r,
3070  1) // __kmpc_atomic_float10_sub_cpt_rev
3071 ATOMIC_CRITICAL_CPT_REV(float10, div_cpt_rev, long double, /, 10r,
3072  1) // __kmpc_atomic_float10_div_cpt_rev
3073 #if KMP_HAVE_QUAD
3074 // routines for _Quad type
3075 ATOMIC_CRITICAL_CPT_REV(float16, sub_cpt_rev, QUAD_LEGACY, -, 16r,
3076  1) // __kmpc_atomic_float16_sub_cpt_rev
3077 ATOMIC_CRITICAL_CPT_REV(float16, div_cpt_rev, QUAD_LEGACY, /, 16r,
3078  1) // __kmpc_atomic_float16_div_cpt_rev
3079 #if (KMP_ARCH_X86)
3080 ATOMIC_CRITICAL_CPT_REV(float16, sub_a16_cpt_rev, Quad_a16_t, -, 16r,
3081  1) // __kmpc_atomic_float16_sub_a16_cpt_rev
3082 ATOMIC_CRITICAL_CPT_REV(float16, div_a16_cpt_rev, Quad_a16_t, /, 16r,
3083  1) // __kmpc_atomic_float16_div_a16_cpt_rev
3084 #endif // (KMP_ARCH_X86)
3085 #endif // KMP_HAVE_QUAD
3086 
3087 // routines for complex types
3088 
3089 // ------------------------------------------------------------------------
3090 // Workaround for cmplx4. Regular routines with return value don't work
3091 // on Win_32e. Let's return captured values through the additional parameter.
3092 #define OP_CRITICAL_CPT_REV_WRK(OP, LCK_ID) \
3093  __kmp_acquire_atomic_lock(&ATOMIC_LOCK##LCK_ID, gtid); \
3094  \
3095  if (flag) { \
3096  (*lhs) = (rhs)OP(*lhs); \
3097  (*out) = (*lhs); \
3098  } else { \
3099  (*out) = (*lhs); \
3100  (*lhs) = (rhs)OP(*lhs); \
3101  } \
3102  \
3103  __kmp_release_atomic_lock(&ATOMIC_LOCK##LCK_ID, gtid); \
3104  return;
3105 // ------------------------------------------------------------------------
3106 
3107 #ifdef KMP_GOMP_COMPAT
3108 #define OP_GOMP_CRITICAL_CPT_REV_WRK(OP, FLAG) \
3109  if ((FLAG) && (__kmp_atomic_mode == 2)) { \
3110  KMP_CHECK_GTID; \
3111  OP_CRITICAL_CPT_REV_WRK(OP, 0); \
3112  }
3113 #else
3114 #define OP_GOMP_CRITICAL_CPT_REV_WRK(OP, FLAG)
3115 #endif /* KMP_GOMP_COMPAT */
3116 // ------------------------------------------------------------------------
3117 
3118 #define ATOMIC_CRITICAL_CPT_REV_WRK(TYPE_ID, OP_ID, TYPE, OP, LCK_ID, \
3119  GOMP_FLAG) \
3120  ATOMIC_BEGIN_WRK(TYPE_ID, OP_ID, TYPE) \
3121  OP_GOMP_CRITICAL_CPT_REV_WRK(OP, GOMP_FLAG) \
3122  OP_CRITICAL_CPT_REV_WRK(OP, LCK_ID) \
3123  }
3124 // The end of workaround for cmplx4
3125 
3126 // !!! TODO: check if we need to return void for cmplx4 routines
3127 // cmplx4 routines to return void
3128 ATOMIC_CRITICAL_CPT_REV_WRK(cmplx4, sub_cpt_rev, kmp_cmplx32, -, 8c,
3129  1) // __kmpc_atomic_cmplx4_sub_cpt_rev
3130 ATOMIC_CRITICAL_CPT_REV_WRK(cmplx4, div_cpt_rev, kmp_cmplx32, /, 8c,
3131  1) // __kmpc_atomic_cmplx4_div_cpt_rev
3132 
3133 ATOMIC_CRITICAL_CPT_REV(cmplx8, sub_cpt_rev, kmp_cmplx64, -, 16c,
3134  1) // __kmpc_atomic_cmplx8_sub_cpt_rev
3135 ATOMIC_CRITICAL_CPT_REV(cmplx8, div_cpt_rev, kmp_cmplx64, /, 16c,
3136  1) // __kmpc_atomic_cmplx8_div_cpt_rev
3137 ATOMIC_CRITICAL_CPT_REV(cmplx10, sub_cpt_rev, kmp_cmplx80, -, 20c,
3138  1) // __kmpc_atomic_cmplx10_sub_cpt_rev
3139 ATOMIC_CRITICAL_CPT_REV(cmplx10, div_cpt_rev, kmp_cmplx80, /, 20c,
3140  1) // __kmpc_atomic_cmplx10_div_cpt_rev
3141 #if KMP_HAVE_QUAD
3142 ATOMIC_CRITICAL_CPT_REV(cmplx16, sub_cpt_rev, CPLX128_LEG, -, 32c,
3143  1) // __kmpc_atomic_cmplx16_sub_cpt_rev
3144 ATOMIC_CRITICAL_CPT_REV(cmplx16, div_cpt_rev, CPLX128_LEG, /, 32c,
3145  1) // __kmpc_atomic_cmplx16_div_cpt_rev
3146 #if (KMP_ARCH_X86)
3147 ATOMIC_CRITICAL_CPT_REV(cmplx16, sub_a16_cpt_rev, kmp_cmplx128_a16_t, -, 32c,
3148  1) // __kmpc_atomic_cmplx16_sub_a16_cpt_rev
3149 ATOMIC_CRITICAL_CPT_REV(cmplx16, div_a16_cpt_rev, kmp_cmplx128_a16_t, /, 32c,
3150  1) // __kmpc_atomic_cmplx16_div_a16_cpt_rev
3151 #endif // (KMP_ARCH_X86)
3152 #endif // KMP_HAVE_QUAD
3153 
3154 // Capture reverse for mixed type: RHS=float16
3155 #if KMP_HAVE_QUAD
3156 
3157 // Beginning of a definition (provides name, parameters, gebug trace)
3158 // TYPE_ID - operands type and size (fixed*, fixed*u for signed, unsigned
3159 // fixed)
3160 // OP_ID - operation identifier (add, sub, mul, ...)
3161 // TYPE - operands' type
3162 // -------------------------------------------------------------------------
3163 #define ATOMIC_CMPXCHG_CPT_REV_MIX(TYPE_ID, TYPE, OP_ID, BITS, OP, RTYPE_ID, \
3164  RTYPE, LCK_ID, MASK, GOMP_FLAG) \
3165  ATOMIC_BEGIN_CPT_MIX(TYPE_ID, OP_ID, TYPE, RTYPE_ID, RTYPE) \
3166  TYPE new_value; \
3167  (void)new_value; \
3168  OP_GOMP_CRITICAL_CPT_REV(TYPE, OP, GOMP_FLAG) \
3169  OP_CMPXCHG_CPT_REV(TYPE, BITS, OP) \
3170  }
3171 
3172 // -------------------------------------------------------------------------
3173 #define ATOMIC_CRITICAL_CPT_REV_MIX(TYPE_ID, TYPE, OP_ID, OP, RTYPE_ID, RTYPE, \
3174  LCK_ID, GOMP_FLAG) \
3175  ATOMIC_BEGIN_CPT_MIX(TYPE_ID, OP_ID, TYPE, RTYPE_ID, RTYPE) \
3176  TYPE new_value; \
3177  (void)new_value; \
3178  OP_GOMP_CRITICAL_CPT_REV(TYPE, OP, GOMP_FLAG) /* send assignment */ \
3179  OP_CRITICAL_CPT_REV(TYPE, OP, LCK_ID) /* send assignment */ \
3180  }
3181 
3182 ATOMIC_CMPXCHG_CPT_REV_MIX(fixed1, char, sub_cpt_rev, 8, -, fp, _Quad, 1i, 0,
3183  KMP_ARCH_X86) // __kmpc_atomic_fixed1_sub_cpt_rev_fp
3184 ATOMIC_CMPXCHG_CPT_REV_MIX(fixed1u, uchar, sub_cpt_rev, 8, -, fp, _Quad, 1i, 0,
3185  KMP_ARCH_X86) // __kmpc_atomic_fixed1u_sub_cpt_rev_fp
3186 ATOMIC_CMPXCHG_CPT_REV_MIX(fixed1, char, div_cpt_rev, 8, /, fp, _Quad, 1i, 0,
3187  KMP_ARCH_X86) // __kmpc_atomic_fixed1_div_cpt_rev_fp
3188 ATOMIC_CMPXCHG_CPT_REV_MIX(fixed1u, uchar, div_cpt_rev, 8, /, fp, _Quad, 1i, 0,
3189  KMP_ARCH_X86) // __kmpc_atomic_fixed1u_div_cpt_rev_fp
3190 
3191 ATOMIC_CMPXCHG_CPT_REV_MIX(fixed2, short, sub_cpt_rev, 16, -, fp, _Quad, 2i, 1,
3192  KMP_ARCH_X86) // __kmpc_atomic_fixed2_sub_cpt_rev_fp
3193 ATOMIC_CMPXCHG_CPT_REV_MIX(fixed2u, ushort, sub_cpt_rev, 16, -, fp, _Quad, 2i,
3194  1,
3195  KMP_ARCH_X86) // __kmpc_atomic_fixed2u_sub_cpt_rev_fp
3196 ATOMIC_CMPXCHG_CPT_REV_MIX(fixed2, short, div_cpt_rev, 16, /, fp, _Quad, 2i, 1,
3197  KMP_ARCH_X86) // __kmpc_atomic_fixed2_div_cpt_rev_fp
3198 ATOMIC_CMPXCHG_CPT_REV_MIX(fixed2u, ushort, div_cpt_rev, 16, /, fp, _Quad, 2i,
3199  1,
3200  KMP_ARCH_X86) // __kmpc_atomic_fixed2u_div_cpt_rev_fp
3201 
3202 ATOMIC_CMPXCHG_CPT_REV_MIX(fixed4, kmp_int32, sub_cpt_rev, 32, -, fp, _Quad, 4i,
3203  3, 0) // __kmpc_atomic_fixed4_sub_cpt_rev_fp
3204 ATOMIC_CMPXCHG_CPT_REV_MIX(fixed4u, kmp_uint32, sub_cpt_rev, 32, -, fp, _Quad,
3205  4i, 3, 0) // __kmpc_atomic_fixed4u_sub_cpt_rev_fp
3206 ATOMIC_CMPXCHG_CPT_REV_MIX(fixed4, kmp_int32, div_cpt_rev, 32, /, fp, _Quad, 4i,
3207  3, 0) // __kmpc_atomic_fixed4_div_cpt_rev_fp
3208 ATOMIC_CMPXCHG_CPT_REV_MIX(fixed4u, kmp_uint32, div_cpt_rev, 32, /, fp, _Quad,
3209  4i, 3, 0) // __kmpc_atomic_fixed4u_div_cpt_rev_fp
3210 
3211 ATOMIC_CMPXCHG_CPT_REV_MIX(fixed8, kmp_int64, sub_cpt_rev, 64, -, fp, _Quad, 8i,
3212  7,
3213  KMP_ARCH_X86) // __kmpc_atomic_fixed8_sub_cpt_rev_fp
3214 ATOMIC_CMPXCHG_CPT_REV_MIX(fixed8u, kmp_uint64, sub_cpt_rev, 64, -, fp, _Quad,
3215  8i, 7,
3216  KMP_ARCH_X86) // __kmpc_atomic_fixed8u_sub_cpt_rev_fp
3217 ATOMIC_CMPXCHG_CPT_REV_MIX(fixed8, kmp_int64, div_cpt_rev, 64, /, fp, _Quad, 8i,
3218  7,
3219  KMP_ARCH_X86) // __kmpc_atomic_fixed8_div_cpt_rev_fp
3220 ATOMIC_CMPXCHG_CPT_REV_MIX(fixed8u, kmp_uint64, div_cpt_rev, 64, /, fp, _Quad,
3221  8i, 7,
3222  KMP_ARCH_X86) // __kmpc_atomic_fixed8u_div_cpt_rev_fp
3223 
3224 ATOMIC_CMPXCHG_CPT_REV_MIX(float4, kmp_real32, sub_cpt_rev, 32, -, fp, _Quad,
3225  4r, 3,
3226  KMP_ARCH_X86) // __kmpc_atomic_float4_sub_cpt_rev_fp
3227 ATOMIC_CMPXCHG_CPT_REV_MIX(float4, kmp_real32, div_cpt_rev, 32, /, fp, _Quad,
3228  4r, 3,
3229  KMP_ARCH_X86) // __kmpc_atomic_float4_div_cpt_rev_fp
3230 
3231 ATOMIC_CMPXCHG_CPT_REV_MIX(float8, kmp_real64, sub_cpt_rev, 64, -, fp, _Quad,
3232  8r, 7,
3233  KMP_ARCH_X86) // __kmpc_atomic_float8_sub_cpt_rev_fp
3234 ATOMIC_CMPXCHG_CPT_REV_MIX(float8, kmp_real64, div_cpt_rev, 64, /, fp, _Quad,
3235  8r, 7,
3236  KMP_ARCH_X86) // __kmpc_atomic_float8_div_cpt_rev_fp
3237 
3238 ATOMIC_CRITICAL_CPT_REV_MIX(float10, long double, sub_cpt_rev, -, fp, _Quad,
3239  10r, 1) // __kmpc_atomic_float10_sub_cpt_rev_fp
3240 ATOMIC_CRITICAL_CPT_REV_MIX(float10, long double, div_cpt_rev, /, fp, _Quad,
3241  10r, 1) // __kmpc_atomic_float10_div_cpt_rev_fp
3242 
3243 #endif // KMP_HAVE_QUAD
3244 
3245 // OpenMP 4.0 Capture-write (swap): {v = x; x = expr;}
3246 
3247 #define ATOMIC_BEGIN_SWP(TYPE_ID, TYPE) \
3248  TYPE __kmpc_atomic_##TYPE_ID##_swp(ident_t *id_ref, int gtid, TYPE *lhs, \
3249  TYPE rhs) { \
3250  KMP_DEBUG_ASSERT(__kmp_init_serial); \
3251  KA_TRACE(100, ("__kmpc_atomic_" #TYPE_ID "_swp: T#%d\n", gtid));
3252 
3253 #define CRITICAL_SWP(LCK_ID) \
3254  __kmp_acquire_atomic_lock(&ATOMIC_LOCK##LCK_ID, gtid); \
3255  \
3256  old_value = (*lhs); \
3257  (*lhs) = rhs; \
3258  \
3259  __kmp_release_atomic_lock(&ATOMIC_LOCK##LCK_ID, gtid); \
3260  return old_value;
3261 
3262 // ------------------------------------------------------------------------
3263 #ifdef KMP_GOMP_COMPAT
3264 #define GOMP_CRITICAL_SWP(FLAG) \
3265  if ((FLAG) && (__kmp_atomic_mode == 2)) { \
3266  KMP_CHECK_GTID; \
3267  CRITICAL_SWP(0); \
3268  }
3269 #else
3270 #define GOMP_CRITICAL_SWP(FLAG)
3271 #endif /* KMP_GOMP_COMPAT */
3272 
3273 #define ATOMIC_XCHG_SWP(TYPE_ID, TYPE, BITS, GOMP_FLAG) \
3274  ATOMIC_BEGIN_SWP(TYPE_ID, TYPE) \
3275  TYPE old_value; \
3276  GOMP_CRITICAL_SWP(GOMP_FLAG) \
3277  old_value = KMP_XCHG_FIXED##BITS(lhs, rhs); \
3278  return old_value; \
3279  }
3280 // ------------------------------------------------------------------------
3281 #define ATOMIC_XCHG_FLOAT_SWP(TYPE_ID, TYPE, BITS, GOMP_FLAG) \
3282  ATOMIC_BEGIN_SWP(TYPE_ID, TYPE) \
3283  TYPE old_value; \
3284  GOMP_CRITICAL_SWP(GOMP_FLAG) \
3285  old_value = KMP_XCHG_REAL##BITS(lhs, rhs); \
3286  return old_value; \
3287  }
3288 
3289 // ------------------------------------------------------------------------
3290 #define CMPXCHG_SWP(TYPE, BITS) \
3291  { \
3292  TYPE KMP_ATOMIC_VOLATILE temp_val; \
3293  TYPE old_value, new_value; \
3294  temp_val = *lhs; \
3295  old_value = temp_val; \
3296  new_value = rhs; \
3297  while (!KMP_COMPARE_AND_STORE_ACQ##BITS( \
3298  (kmp_int##BITS *)lhs, *VOLATILE_CAST(kmp_int##BITS *) & old_value, \
3299  *VOLATILE_CAST(kmp_int##BITS *) & new_value)) { \
3300  temp_val = *lhs; \
3301  old_value = temp_val; \
3302  new_value = rhs; \
3303  } \
3304  return old_value; \
3305  }
3306 
3307 // -------------------------------------------------------------------------
3308 #define ATOMIC_CMPXCHG_SWP(TYPE_ID, TYPE, BITS, GOMP_FLAG) \
3309  ATOMIC_BEGIN_SWP(TYPE_ID, TYPE) \
3310  TYPE old_value; \
3311  (void)old_value; \
3312  GOMP_CRITICAL_SWP(GOMP_FLAG) \
3313  CMPXCHG_SWP(TYPE, BITS) \
3314  }
3315 
3316 ATOMIC_XCHG_SWP(fixed1, kmp_int8, 8, KMP_ARCH_X86) // __kmpc_atomic_fixed1_swp
3317 ATOMIC_XCHG_SWP(fixed2, kmp_int16, 16, KMP_ARCH_X86) // __kmpc_atomic_fixed2_swp
3318 ATOMIC_XCHG_SWP(fixed4, kmp_int32, 32, KMP_ARCH_X86) // __kmpc_atomic_fixed4_swp
3319 
3320 ATOMIC_XCHG_FLOAT_SWP(float4, kmp_real32, 32,
3321  KMP_ARCH_X86) // __kmpc_atomic_float4_swp
3322 
3323 #if (KMP_ARCH_X86)
3324 ATOMIC_CMPXCHG_SWP(fixed8, kmp_int64, 64,
3325  KMP_ARCH_X86) // __kmpc_atomic_fixed8_swp
3326 ATOMIC_CMPXCHG_SWP(float8, kmp_real64, 64,
3327  KMP_ARCH_X86) // __kmpc_atomic_float8_swp
3328 #else
3329 ATOMIC_XCHG_SWP(fixed8, kmp_int64, 64, KMP_ARCH_X86) // __kmpc_atomic_fixed8_swp
3330 ATOMIC_XCHG_FLOAT_SWP(float8, kmp_real64, 64,
3331  KMP_ARCH_X86) // __kmpc_atomic_float8_swp
3332 #endif // (KMP_ARCH_X86)
3333 
3334 // ------------------------------------------------------------------------
3335 // Routines for Extended types: long double, _Quad, complex flavours (use
3336 // critical section)
3337 #define ATOMIC_CRITICAL_SWP(TYPE_ID, TYPE, LCK_ID, GOMP_FLAG) \
3338  ATOMIC_BEGIN_SWP(TYPE_ID, TYPE) \
3339  TYPE old_value; \
3340  GOMP_CRITICAL_SWP(GOMP_FLAG) \
3341  CRITICAL_SWP(LCK_ID) \
3342  }
3343 
3344 // ------------------------------------------------------------------------
3345 // !!! TODO: check if we need to return void for cmplx4 routines
3346 // Workaround for cmplx4. Regular routines with return value don't work
3347 // on Win_32e. Let's return captured values through the additional parameter.
3348 
3349 #define ATOMIC_BEGIN_SWP_WRK(TYPE_ID, TYPE) \
3350  void __kmpc_atomic_##TYPE_ID##_swp(ident_t *id_ref, int gtid, TYPE *lhs, \
3351  TYPE rhs, TYPE *out) { \
3352  KMP_DEBUG_ASSERT(__kmp_init_serial); \
3353  KA_TRACE(100, ("__kmpc_atomic_" #TYPE_ID "_swp: T#%d\n", gtid));
3354 
3355 #define CRITICAL_SWP_WRK(LCK_ID) \
3356  __kmp_acquire_atomic_lock(&ATOMIC_LOCK##LCK_ID, gtid); \
3357  \
3358  tmp = (*lhs); \
3359  (*lhs) = (rhs); \
3360  (*out) = tmp; \
3361  __kmp_release_atomic_lock(&ATOMIC_LOCK##LCK_ID, gtid); \
3362  return;
3363 // ------------------------------------------------------------------------
3364 
3365 #ifdef KMP_GOMP_COMPAT
3366 #define GOMP_CRITICAL_SWP_WRK(FLAG) \
3367  if ((FLAG) && (__kmp_atomic_mode == 2)) { \
3368  KMP_CHECK_GTID; \
3369  CRITICAL_SWP_WRK(0); \
3370  }
3371 #else
3372 #define GOMP_CRITICAL_SWP_WRK(FLAG)
3373 #endif /* KMP_GOMP_COMPAT */
3374 // ------------------------------------------------------------------------
3375 
3376 #define ATOMIC_CRITICAL_SWP_WRK(TYPE_ID, TYPE, LCK_ID, GOMP_FLAG) \
3377  ATOMIC_BEGIN_SWP_WRK(TYPE_ID, TYPE) \
3378  TYPE tmp; \
3379  GOMP_CRITICAL_SWP_WRK(GOMP_FLAG) \
3380  CRITICAL_SWP_WRK(LCK_ID) \
3381  }
3382 // The end of workaround for cmplx4
3383 
3384 ATOMIC_CRITICAL_SWP(float10, long double, 10r, 1) // __kmpc_atomic_float10_swp
3385 #if KMP_HAVE_QUAD
3386 ATOMIC_CRITICAL_SWP(float16, QUAD_LEGACY, 16r, 1) // __kmpc_atomic_float16_swp
3387 #endif // KMP_HAVE_QUAD
3388 // cmplx4 routine to return void
3389 ATOMIC_CRITICAL_SWP_WRK(cmplx4, kmp_cmplx32, 8c, 1) // __kmpc_atomic_cmplx4_swp
3390 
3391 // ATOMIC_CRITICAL_SWP( cmplx4, kmp_cmplx32, 8c, 1 ) //
3392 // __kmpc_atomic_cmplx4_swp
3393 
3394 ATOMIC_CRITICAL_SWP(cmplx8, kmp_cmplx64, 16c, 1) // __kmpc_atomic_cmplx8_swp
3395 ATOMIC_CRITICAL_SWP(cmplx10, kmp_cmplx80, 20c, 1) // __kmpc_atomic_cmplx10_swp
3396 #if KMP_HAVE_QUAD
3397 ATOMIC_CRITICAL_SWP(cmplx16, CPLX128_LEG, 32c, 1) // __kmpc_atomic_cmplx16_swp
3398 #if (KMP_ARCH_X86)
3399 ATOMIC_CRITICAL_SWP(float16_a16, Quad_a16_t, 16r,
3400  1) // __kmpc_atomic_float16_a16_swp
3401 ATOMIC_CRITICAL_SWP(cmplx16_a16, kmp_cmplx128_a16_t, 32c,
3402  1) // __kmpc_atomic_cmplx16_a16_swp
3403 #endif // (KMP_ARCH_X86)
3404 #endif // KMP_HAVE_QUAD
3405 
3406 // End of OpenMP 4.0 Capture
3407 
3408 #endif // KMP_ARCH_X86 || KMP_ARCH_X86_64
3409 
3410 #undef OP_CRITICAL
3411 
3412 /* ------------------------------------------------------------------------ */
3413 /* Generic atomic routines */
3414 
3415 void __kmpc_atomic_1(ident_t *id_ref, int gtid, void *lhs, void *rhs,
3416  void (*f)(void *, void *, void *)) {
3417  KMP_DEBUG_ASSERT(__kmp_init_serial);
3418 
3419  if (
3420 #if KMP_ARCH_X86 && defined(KMP_GOMP_COMPAT)
3421  FALSE /* must use lock */
3422 #else
3423  TRUE
3424 #endif // KMP_ARCH_X86 && defined(KMP_GOMP_COMPAT)
3425  ) {
3426  kmp_int8 old_value, new_value;
3427 
3428  old_value = *(kmp_int8 *)lhs;
3429  (*f)(&new_value, &old_value, rhs);
3430 
3431  /* TODO: Should this be acquire or release? */
3432  while (!KMP_COMPARE_AND_STORE_ACQ8((kmp_int8 *)lhs, *(kmp_int8 *)&old_value,
3433  *(kmp_int8 *)&new_value)) {
3434  KMP_CPU_PAUSE();
3435 
3436  old_value = *(kmp_int8 *)lhs;
3437  (*f)(&new_value, &old_value, rhs);
3438  }
3439 
3440  return;
3441  } else {
3442  // All 1-byte data is of integer data type.
3443 
3444 #ifdef KMP_GOMP_COMPAT
3445  if (__kmp_atomic_mode == 2) {
3446  __kmp_acquire_atomic_lock(&__kmp_atomic_lock, gtid);
3447  } else
3448 #endif /* KMP_GOMP_COMPAT */
3449  __kmp_acquire_atomic_lock(&__kmp_atomic_lock_1i, gtid);
3450 
3451  (*f)(lhs, lhs, rhs);
3452 
3453 #ifdef KMP_GOMP_COMPAT
3454  if (__kmp_atomic_mode == 2) {
3455  __kmp_release_atomic_lock(&__kmp_atomic_lock, gtid);
3456  } else
3457 #endif /* KMP_GOMP_COMPAT */
3458  __kmp_release_atomic_lock(&__kmp_atomic_lock_1i, gtid);
3459  }
3460 }
3461 
3462 void __kmpc_atomic_2(ident_t *id_ref, int gtid, void *lhs, void *rhs,
3463  void (*f)(void *, void *, void *)) {
3464  if (
3465 #if KMP_ARCH_X86 && defined(KMP_GOMP_COMPAT)
3466  FALSE /* must use lock */
3467 #elif KMP_ARCH_X86 || KMP_ARCH_X86_64
3468  TRUE /* no alignment problems */
3469 #else
3470  !((kmp_uintptr_t)lhs & 0x1) /* make sure address is 2-byte aligned */
3471 #endif // KMP_ARCH_X86 && defined(KMP_GOMP_COMPAT)
3472  ) {
3473  kmp_int16 old_value, new_value;
3474 
3475  old_value = *(kmp_int16 *)lhs;
3476  (*f)(&new_value, &old_value, rhs);
3477 
3478  /* TODO: Should this be acquire or release? */
3479  while (!KMP_COMPARE_AND_STORE_ACQ16(
3480  (kmp_int16 *)lhs, *(kmp_int16 *)&old_value, *(kmp_int16 *)&new_value)) {
3481  KMP_CPU_PAUSE();
3482 
3483  old_value = *(kmp_int16 *)lhs;
3484  (*f)(&new_value, &old_value, rhs);
3485  }
3486 
3487  return;
3488  } else {
3489  // All 2-byte data is of integer data type.
3490 
3491 #ifdef KMP_GOMP_COMPAT
3492  if (__kmp_atomic_mode == 2) {
3493  __kmp_acquire_atomic_lock(&__kmp_atomic_lock, gtid);
3494  } else
3495 #endif /* KMP_GOMP_COMPAT */
3496  __kmp_acquire_atomic_lock(&__kmp_atomic_lock_2i, gtid);
3497 
3498  (*f)(lhs, lhs, rhs);
3499 
3500 #ifdef KMP_GOMP_COMPAT
3501  if (__kmp_atomic_mode == 2) {
3502  __kmp_release_atomic_lock(&__kmp_atomic_lock, gtid);
3503  } else
3504 #endif /* KMP_GOMP_COMPAT */
3505  __kmp_release_atomic_lock(&__kmp_atomic_lock_2i, gtid);
3506  }
3507 }
3508 
3509 void __kmpc_atomic_4(ident_t *id_ref, int gtid, void *lhs, void *rhs,
3510  void (*f)(void *, void *, void *)) {
3511  KMP_DEBUG_ASSERT(__kmp_init_serial);
3512 
3513  if (
3514 // FIXME: On IA-32 architecture, gcc uses cmpxchg only for 4-byte ints.
3515 // Gomp compatibility is broken if this routine is called for floats.
3516 #if KMP_ARCH_X86 || KMP_ARCH_X86_64
3517  TRUE /* no alignment problems */
3518 #else
3519  !((kmp_uintptr_t)lhs & 0x3) /* make sure address is 4-byte aligned */
3520 #endif // KMP_ARCH_X86 || KMP_ARCH_X86_64
3521  ) {
3522  kmp_int32 old_value, new_value;
3523 
3524  old_value = *(kmp_int32 *)lhs;
3525  (*f)(&new_value, &old_value, rhs);
3526 
3527  /* TODO: Should this be acquire or release? */
3528  while (!KMP_COMPARE_AND_STORE_ACQ32(
3529  (kmp_int32 *)lhs, *(kmp_int32 *)&old_value, *(kmp_int32 *)&new_value)) {
3530  KMP_CPU_PAUSE();
3531 
3532  old_value = *(kmp_int32 *)lhs;
3533  (*f)(&new_value, &old_value, rhs);
3534  }
3535 
3536  return;
3537  } else {
3538  // Use __kmp_atomic_lock_4i for all 4-byte data,
3539  // even if it isn't of integer data type.
3540 
3541 #ifdef KMP_GOMP_COMPAT
3542  if (__kmp_atomic_mode == 2) {
3543  __kmp_acquire_atomic_lock(&__kmp_atomic_lock, gtid);
3544  } else
3545 #endif /* KMP_GOMP_COMPAT */
3546  __kmp_acquire_atomic_lock(&__kmp_atomic_lock_4i, gtid);
3547 
3548  (*f)(lhs, lhs, rhs);
3549 
3550 #ifdef KMP_GOMP_COMPAT
3551  if (__kmp_atomic_mode == 2) {
3552  __kmp_release_atomic_lock(&__kmp_atomic_lock, gtid);
3553  } else
3554 #endif /* KMP_GOMP_COMPAT */
3555  __kmp_release_atomic_lock(&__kmp_atomic_lock_4i, gtid);
3556  }
3557 }
3558 
3559 void __kmpc_atomic_8(ident_t *id_ref, int gtid, void *lhs, void *rhs,
3560  void (*f)(void *, void *, void *)) {
3561  KMP_DEBUG_ASSERT(__kmp_init_serial);
3562  if (
3563 
3564 #if KMP_ARCH_X86 && defined(KMP_GOMP_COMPAT)
3565  FALSE /* must use lock */
3566 #elif KMP_ARCH_X86 || KMP_ARCH_X86_64
3567  TRUE /* no alignment problems */
3568 #else
3569  !((kmp_uintptr_t)lhs & 0x7) /* make sure address is 8-byte aligned */
3570 #endif // KMP_ARCH_X86 && defined(KMP_GOMP_COMPAT)
3571  ) {
3572  kmp_int64 old_value, new_value;
3573 
3574  old_value = *(kmp_int64 *)lhs;
3575  (*f)(&new_value, &old_value, rhs);
3576  /* TODO: Should this be acquire or release? */
3577  while (!KMP_COMPARE_AND_STORE_ACQ64(
3578  (kmp_int64 *)lhs, *(kmp_int64 *)&old_value, *(kmp_int64 *)&new_value)) {
3579  KMP_CPU_PAUSE();
3580 
3581  old_value = *(kmp_int64 *)lhs;
3582  (*f)(&new_value, &old_value, rhs);
3583  }
3584 
3585  return;
3586  } else {
3587  // Use __kmp_atomic_lock_8i for all 8-byte data,
3588  // even if it isn't of integer data type.
3589 
3590 #ifdef KMP_GOMP_COMPAT
3591  if (__kmp_atomic_mode == 2) {
3592  __kmp_acquire_atomic_lock(&__kmp_atomic_lock, gtid);
3593  } else
3594 #endif /* KMP_GOMP_COMPAT */
3595  __kmp_acquire_atomic_lock(&__kmp_atomic_lock_8i, gtid);
3596 
3597  (*f)(lhs, lhs, rhs);
3598 
3599 #ifdef KMP_GOMP_COMPAT
3600  if (__kmp_atomic_mode == 2) {
3601  __kmp_release_atomic_lock(&__kmp_atomic_lock, gtid);
3602  } else
3603 #endif /* KMP_GOMP_COMPAT */
3604  __kmp_release_atomic_lock(&__kmp_atomic_lock_8i, gtid);
3605  }
3606 }
3607 #if KMP_ARCH_X86 || KMP_ARCH_X86_64
3608 void __kmpc_atomic_10(ident_t *id_ref, int gtid, void *lhs, void *rhs,
3609  void (*f)(void *, void *, void *)) {
3610  KMP_DEBUG_ASSERT(__kmp_init_serial);
3611 
3612 #ifdef KMP_GOMP_COMPAT
3613  if (__kmp_atomic_mode == 2) {
3614  __kmp_acquire_atomic_lock(&__kmp_atomic_lock, gtid);
3615  } else
3616 #endif /* KMP_GOMP_COMPAT */
3617  __kmp_acquire_atomic_lock(&__kmp_atomic_lock_10r, gtid);
3618 
3619  (*f)(lhs, lhs, rhs);
3620 
3621 #ifdef KMP_GOMP_COMPAT
3622  if (__kmp_atomic_mode == 2) {
3623  __kmp_release_atomic_lock(&__kmp_atomic_lock, gtid);
3624  } else
3625 #endif /* KMP_GOMP_COMPAT */
3626  __kmp_release_atomic_lock(&__kmp_atomic_lock_10r, gtid);
3627 }
3628 #endif // KMP_ARCH_X86 || KMP_ARCH_X86_64
3629 
3630 void __kmpc_atomic_16(ident_t *id_ref, int gtid, void *lhs, void *rhs,
3631  void (*f)(void *, void *, void *)) {
3632  KMP_DEBUG_ASSERT(__kmp_init_serial);
3633 
3634 #ifdef KMP_GOMP_COMPAT
3635  if (__kmp_atomic_mode == 2) {
3636  __kmp_acquire_atomic_lock(&__kmp_atomic_lock, gtid);
3637  } else
3638 #endif /* KMP_GOMP_COMPAT */
3639  __kmp_acquire_atomic_lock(&__kmp_atomic_lock_16c, gtid);
3640 
3641  (*f)(lhs, lhs, rhs);
3642 
3643 #ifdef KMP_GOMP_COMPAT
3644  if (__kmp_atomic_mode == 2) {
3645  __kmp_release_atomic_lock(&__kmp_atomic_lock, gtid);
3646  } else
3647 #endif /* KMP_GOMP_COMPAT */
3648  __kmp_release_atomic_lock(&__kmp_atomic_lock_16c, gtid);
3649 }
3650 #if KMP_ARCH_X86 || KMP_ARCH_X86_64
3651 void __kmpc_atomic_20(ident_t *id_ref, int gtid, void *lhs, void *rhs,
3652  void (*f)(void *, void *, void *)) {
3653  KMP_DEBUG_ASSERT(__kmp_init_serial);
3654 
3655 #ifdef KMP_GOMP_COMPAT
3656  if (__kmp_atomic_mode == 2) {
3657  __kmp_acquire_atomic_lock(&__kmp_atomic_lock, gtid);
3658  } else
3659 #endif /* KMP_GOMP_COMPAT */
3660  __kmp_acquire_atomic_lock(&__kmp_atomic_lock_20c, gtid);
3661 
3662  (*f)(lhs, lhs, rhs);
3663 
3664 #ifdef KMP_GOMP_COMPAT
3665  if (__kmp_atomic_mode == 2) {
3666  __kmp_release_atomic_lock(&__kmp_atomic_lock, gtid);
3667  } else
3668 #endif /* KMP_GOMP_COMPAT */
3669  __kmp_release_atomic_lock(&__kmp_atomic_lock_20c, gtid);
3670 }
3671 #endif // KMP_ARCH_X86 || KMP_ARCH_X86_64
3672 void __kmpc_atomic_32(ident_t *id_ref, int gtid, void *lhs, void *rhs,
3673  void (*f)(void *, void *, void *)) {
3674  KMP_DEBUG_ASSERT(__kmp_init_serial);
3675 
3676 #ifdef KMP_GOMP_COMPAT
3677  if (__kmp_atomic_mode == 2) {
3678  __kmp_acquire_atomic_lock(&__kmp_atomic_lock, gtid);
3679  } else
3680 #endif /* KMP_GOMP_COMPAT */
3681  __kmp_acquire_atomic_lock(&__kmp_atomic_lock_32c, gtid);
3682 
3683  (*f)(lhs, lhs, rhs);
3684 
3685 #ifdef KMP_GOMP_COMPAT
3686  if (__kmp_atomic_mode == 2) {
3687  __kmp_release_atomic_lock(&__kmp_atomic_lock, gtid);
3688  } else
3689 #endif /* KMP_GOMP_COMPAT */
3690  __kmp_release_atomic_lock(&__kmp_atomic_lock_32c, gtid);
3691 }
3692 
3693 // AC: same two routines as GOMP_atomic_start/end, but will be called by our
3694 // compiler; duplicated in order to not use 3-party names in pure Intel code
3695 // TODO: consider adding GTID parameter after consultation with Ernesto/Xinmin.
3696 void __kmpc_atomic_start(void) {
3697  int gtid = __kmp_entry_gtid();
3698  KA_TRACE(20, ("__kmpc_atomic_start: T#%d\n", gtid));
3699  __kmp_acquire_atomic_lock(&__kmp_atomic_lock, gtid);
3700 }
3701 
3702 void __kmpc_atomic_end(void) {
3703  int gtid = __kmp_get_gtid();
3704  KA_TRACE(20, ("__kmpc_atomic_end: T#%d\n", gtid));
3705  __kmp_release_atomic_lock(&__kmp_atomic_lock, gtid);
3706 }
3707 
3708 #if KMP_ARCH_X86 || KMP_ARCH_X86_64
3709 
3710 // OpenMP 5.1 compare and swap
3711 
3726 bool __kmpc_atomic_bool_1_cas(ident_t *loc, int gtid, char *x, char e, char d) {
3727  return KMP_COMPARE_AND_STORE_ACQ8(x, e, d);
3728 }
3729 bool __kmpc_atomic_bool_2_cas(ident_t *loc, int gtid, short *x, short e,
3730  short d) {
3731  return KMP_COMPARE_AND_STORE_ACQ16(x, e, d);
3732 }
3733 bool __kmpc_atomic_bool_4_cas(ident_t *loc, int gtid, kmp_int32 *x, kmp_int32 e,
3734  kmp_int32 d) {
3735  return KMP_COMPARE_AND_STORE_ACQ32(x, e, d);
3736 }
3737 bool __kmpc_atomic_bool_8_cas(ident_t *loc, int gtid, kmp_int64 *x, kmp_int64 e,
3738  kmp_int64 d) {
3739  return KMP_COMPARE_AND_STORE_ACQ64(x, e, d);
3740 }
3741 
3756 char __kmpc_atomic_val_1_cas(ident_t *loc, int gtid, char *x, char e, char d) {
3757  return KMP_COMPARE_AND_STORE_RET8(x, e, d);
3758 }
3759 short __kmpc_atomic_val_2_cas(ident_t *loc, int gtid, short *x, short e,
3760  short d) {
3761  return KMP_COMPARE_AND_STORE_RET16(x, e, d);
3762 }
3763 kmp_int32 __kmpc_atomic_val_4_cas(ident_t *loc, int gtid, kmp_int32 *x,
3764  kmp_int32 e, kmp_int32 d) {
3765  return KMP_COMPARE_AND_STORE_RET32(x, e, d);
3766 }
3767 kmp_int64 __kmpc_atomic_val_8_cas(ident_t *loc, int gtid, kmp_int64 *x,
3768  kmp_int64 e, kmp_int64 d) {
3769  return KMP_COMPARE_AND_STORE_RET64(x, e, d);
3770 }
3771 
3788 bool __kmpc_atomic_bool_1_cas_cpt(ident_t *loc, int gtid, char *x, char e,
3789  char d, char *pv) {
3790  char old = KMP_COMPARE_AND_STORE_RET8(x, e, d);
3791  if (old == e)
3792  return true;
3793  KMP_ASSERT(pv != NULL);
3794  *pv = old;
3795  return false;
3796 }
3797 bool __kmpc_atomic_bool_2_cas_cpt(ident_t *loc, int gtid, short *x, short e,
3798  short d, short *pv) {
3799  short old = KMP_COMPARE_AND_STORE_RET16(x, e, d);
3800  if (old == e)
3801  return true;
3802  KMP_ASSERT(pv != NULL);
3803  *pv = old;
3804  return false;
3805 }
3806 bool __kmpc_atomic_bool_4_cas_cpt(ident_t *loc, int gtid, kmp_int32 *x,
3807  kmp_int32 e, kmp_int32 d, kmp_int32 *pv) {
3808  kmp_int32 old = KMP_COMPARE_AND_STORE_RET32(x, e, d);
3809  if (old == e)
3810  return true;
3811  KMP_ASSERT(pv != NULL);
3812  *pv = old;
3813  return false;
3814 }
3815 bool __kmpc_atomic_bool_8_cas_cpt(ident_t *loc, int gtid, kmp_int64 *x,
3816  kmp_int64 e, kmp_int64 d, kmp_int64 *pv) {
3817  kmp_int64 old = KMP_COMPARE_AND_STORE_RET64(x, e, d);
3818  if (old == e)
3819  return true;
3820  KMP_ASSERT(pv != NULL);
3821  *pv = old;
3822  return false;
3823 }
3824 
3841 char __kmpc_atomic_val_1_cas_cpt(ident_t *loc, int gtid, char *x, char e,
3842  char d, char *pv) {
3843  char old = KMP_COMPARE_AND_STORE_RET8(x, e, d);
3844  KMP_ASSERT(pv != NULL);
3845  *pv = old == e ? d : old;
3846  return old;
3847 }
3848 short __kmpc_atomic_val_2_cas_cpt(ident_t *loc, int gtid, short *x, short e,
3849  short d, short *pv) {
3850  short old = KMP_COMPARE_AND_STORE_RET16(x, e, d);
3851  KMP_ASSERT(pv != NULL);
3852  *pv = old == e ? d : old;
3853  return old;
3854 }
3855 kmp_int32 __kmpc_atomic_val_4_cas_cpt(ident_t *loc, int gtid, kmp_int32 *x,
3856  kmp_int32 e, kmp_int32 d, kmp_int32 *pv) {
3857  kmp_int32 old = KMP_COMPARE_AND_STORE_RET32(x, e, d);
3858  KMP_ASSERT(pv != NULL);
3859  *pv = old == e ? d : old;
3860  return old;
3861 }
3862 kmp_int64 __kmpc_atomic_val_8_cas_cpt(ident_t *loc, int gtid, kmp_int64 *x,
3863  kmp_int64 e, kmp_int64 d, kmp_int64 *pv) {
3864  kmp_int64 old = KMP_COMPARE_AND_STORE_RET64(x, e, d);
3865  KMP_ASSERT(pv != NULL);
3866  *pv = old == e ? d : old;
3867  return old;
3868 }
3869 
3870 // End OpenMP 5.1 compare + capture
3871 #endif // KMP_ARCH_X86 || KMP_ARCH_X86_64
3872 
3877 // end of file
Definition: kmp.h:227