最終更新:2012-03-01 (木) 19:49:30 (2759d)  

emmintrin.h はてなブックマークを見る
Top / emmintrin.h

SSE2のヘッダ。

Principal header file for Willamette New Instruction intrinsics

関数

DP, arithmetic

  • __m128d _mm_add_sd?(__m128d _A, __m128d _B);
  • __m128d _mm_add_pd?(__m128d _A, __m128d _B);
  • __m128d _mm_sub_sd?(__m128d _A, __m128d _B);
  • __m128d _mm_sub_pd?(__m128d _A, __m128d _B);
  • __m128d _mm_mul_sd?(__m128d _A, __m128d _B);
  • __m128d _mm_mul_pd?(__m128d _A, __m128d _B);
  • __m128d _mm_sqrt_sd?(__m128d _A, __m128d _B);
  • __m128d _mm_sqrt_pd?(__m128d _A);
  • __m128d _mm_div_sd?(__m128d _A, __m128d _B);
  • __m128d _mm_div_pd?(__m128d _A, __m128d _B);
  • __m128d _mm_min_sd?(__m128d _A, __m128d _B);
  • __m128d _mm_min_pd?(__m128d _A, __m128d _B);
  • __m128d _mm_max_sd?(__m128d _A, __m128d _B);
  • __m128d _mm_max_pd?(__m128d _A, __m128d _B);

DP, logicals

  • __m128d _mm_and_pd?(__m128d _A, __m128d _B);
  • __m128d _mm_andnot_pd?(__m128d _A, __m128d _B);
  • __m128d _mm_or_pd?(__m128d _A, __m128d _B);
  • __m128d _mm_xor_pd?(__m128d _A, __m128d _B);

DP, comparisons

  • __m128d _mm_cmpeq_sd?(__m128d _A, __m128d _B);
  • __m128d _mm_cmpeq_pd?(__m128d _A, __m128d _B);
  • __m128d _mm_cmplt_sd?(__m128d _A, __m128d _B);
  • __m128d _mm_cmplt_pd?(__m128d _A, __m128d _B);
  • __m128d _mm_cmple_sd?(__m128d _A, __m128d _B);
  • __m128d _mm_cmple_pd?(__m128d _A, __m128d _B);
  • __m128d _mm_cmpgt_sd?(__m128d _A, __m128d _B);
  • __m128d _mm_cmpgt_pd?(__m128d _A, __m128d _B);
  • __m128d _mm_cmpge_sd?(__m128d _A, __m128d _B);
  • __m128d _mm_cmpge_pd?(__m128d _A, __m128d _B);
  • __m128d _mm_cmpneq_sd?(__m128d _A, __m128d _B);
  • __m128d _mm_cmpneq_pd?(__m128d _A, __m128d _B);
  • __m128d _mm_cmpnlt_sd?(__m128d _A, __m128d _B);
  • __m128d _mm_cmpnlt_pd?(__m128d _A, __m128d _B);
  • __m128d _mm_cmpnle_sd?(__m128d _A, __m128d _B);
  • __m128d _mm_cmpnle_pd?(__m128d _A, __m128d _B);
  • __m128d _mm_cmpngt_sd?(__m128d _A, __m128d _B);
  • __m128d _mm_cmpngt_pd?(__m128d _A, __m128d _B);
  • __m128d _mm_cmpnge_sd?(__m128d _A, __m128d _B);
  • __m128d _mm_cmpnge_pd?(__m128d _A, __m128d _B);
  • __m128d _mm_cmpord_pd?(__m128d _A, __m128d _B);
  • __m128d _mm_cmpord_sd?(__m128d _A, __m128d _B);
  • __m128d _mm_cmpunord_pd?(__m128d _A, __m128d _B);
  • __m128d _mm_cmpunord_sd?(__m128d _A, __m128d _B);
  • int _mm_comieq_sd?(__m128d _A, __m128d _B);
  • int _mm_comilt_sd?(__m128d _A, __m128d _B);
  • int _mm_comile_sd?(__m128d _A, __m128d _B);
  • int _mm_comigt_sd?(__m128d _A, __m128d _B);
  • int _mm_comige_sd?(__m128d _A, __m128d _B);
  • int _mm_comineq_sd?(__m128d _A, __m128d _B);
  • int _mm_ucomieq_sd?(__m128d _A, __m128d _B);
  • int _mm_ucomilt_sd?(__m128d _A, __m128d _B);
  • int _mm_ucomile_sd?(__m128d _A, __m128d _B);
  • int _mm_ucomigt_sd?(__m128d _A, __m128d _B);
  • int _mm_ucomige_sd?(__m128d _A, __m128d _B);
  • int _mm_ucomineq_sd?(__m128d _A, __m128d _B);

DP, converts

  • __m128d _mm_cvtepi32_pd?(__m128i _A);
  • __m128i _mm_cvtpd_epi32?(__m128d _A);
  • __m128i _mm_cvttpd_epi32?(__m128d _A);
  • __m128 _mm_cvtepi32_ps?(__m128i _A);
  • __m128i _mm_cvtps_epi32?(__m128 _A);
  • __m128i _mm_cvttps_epi32?(__m128 _A);
  • __m128 _mm_cvtpd_ps?(__m128d _A);
  • __m128d _mm_cvtps_pd?(__m128 _A);
  • __m128 _mm_cvtsd_ss?(__m128 _A, __m128d _B);
  • __m128d _mm_cvtss_sd?(__m128d _A, __m128 _B);
  • int _mm_cvtsd_si32?(__m128d _A);
  • int _mm_cvttsd_si32?(__m128d _A);
  • __m128d _mm_cvtsi32_sd?(__m128d _A, int _B);
  • __m64 _mm_cvtpd_pi32?(__m128d _A);
  • __m64 _mm_cvttpd_pi32?(__m128d _A);
  • __m128d _mm_cvtpi32_pd?(__m64 _A);

DP, misc

  • __m128d _mm_unpackhi_pd?(__m128d _A, __m128d _B);
  • __m128d _mm_unpacklo_pd?(__m128d _A, __m128d _B);
  • int _mm_movemask_pd?(__m128d _A);
  • __m128d _mm_shuffle_pd?(__m128d _A, __m128d _B, int _I);

DP, loads

  • __m128d _mm_load_pd?(double const*_Dp);
  • __m128d _mm_load1_pd?(double const*_Dp);
  • __m128d _mm_loadr_pd?(double const*_Dp);
  • __m128d _mm_loadu_pd?(double const*_Dp);
  • __m128d _mm_load_sd?(double const*_Dp);
  • __m128d _mm_loadh_pd?(__m128d _A, double const*_Dp);
  • __m128d _mm_loadl_pd?(__m128d _A, double const*_Dp);

DP, sets

DP, stores

  • void _mm_store_sd?(double *_Dp, __m128d _A);
  • void _mm_store1_pd?(double *_Dp, __m128d _A);
  • void _mm_store_pd?(double *_Dp, __m128d _A);
  • void _mm_storeu_pd?(double *_Dp, __m128d _A);
  • void _mm_storer_pd?(double *_Dp, __m128d _A);
  • void _mm_storeh_pd?(double *_Dp, __m128d _A);
  • void _mm_storel_pd?(double *_Dp, __m128d _A);

Integer, arithmetic

  • __m128i _mm_add_epi8?(__m128i _A, __m128i _B);
  • __m128i _mm_add_epi16?(__m128i _A, __m128i _B);
  • __m128i _mm_add_epi32?(__m128i _A, __m128i _B);
  • __m64 _mm_add_si64?(__m64 _A, __m64 _B);
  • __m128i _mm_add_epi64?(__m128i _A, __m128i _B);
  • __m128i _mm_adds_epi8?(__m128i _A, __m128i _B);
  • __m128i _mm_adds_epi16?(__m128i _A, __m128i _B);
  • __m128i _mm_adds_epu8?(__m128i _A, __m128i _B);
  • __m128i _mm_adds_epu16?(__m128i _A, __m128i _B);
  • __m128i _mm_avg_epu8?(__m128i _A, __m128i _B);
  • __m128i _mm_avg_epu16?(__m128i _A, __m128i _B);
  • __m128i _mm_madd_epi16?(__m128i _A, __m128i _B);
  • __m128i _mm_max_epi16?(__m128i _A, __m128i _B);
  • __m128i _mm_max_epu8?(__m128i _A, __m128i _B);
  • __m128i _mm_min_epi16?(__m128i _A, __m128i _B);
  • __m128i _mm_min_epu8?(__m128i _A, __m128i _B);
  • __m128i _mm_mulhi_epi16?(__m128i _A, __m128i _B);
  • __m128i _mm_mulhi_epu16?(__m128i _A, __m128i _B);
  • __m128i _mm_mullo_epi16?(__m128i _A, __m128i _B);
  • __m64 _mm_mul_su32?(__m64 _A, __m64 _B);
  • __m128i _mm_mul_epu32?(__m128i _A, __m128i _B);
  • __m128i _mm_sad_epu8?(__m128i _A, __m128i _B);
  • __m128i _mm_sub_epi8?(__m128i _A, __m128i _B);
  • __m128i _mm_sub_epi16?(__m128i _A, __m128i _B);
  • __m128i _mm_sub_epi32?(__m128i _A, __m128i _B);
  • __m64 _mm_sub_si64?(__m64 _A, __m64 _B);
  • __m128i _mm_sub_epi64?(__m128i _A, __m128i _B);
  • __m128i _mm_subs_epi8?(__m128i _A, __m128i _B);
  • __m128i _mm_subs_epi16?(__m128i _A, __m128i _B);
  • __m128i _mm_subs_epu8?(__m128i _A, __m128i _B);
  • __m128i _mm_subs_epu16?(__m128i _A, __m128i _B);

Integer, logicals

  • __m128i _mm_and_si128?(__m128i _A, __m128i _B);
  • __m128i _mm_andnot_si128?(__m128i _A, __m128i _B);
  • __m128i _mm_or_si128?(__m128i _A, __m128i _B);
  • __m128i _mm_xor_si128?(__m128i _A, __m128i _B);

Integer, shifts

  • __m128i _mm_slli_si128?(__m128i _A, int _Imm);
  • __m128i _mm_slli_epi16?(__m128i _A, int _Count);
  • __m128i _mm_sll_epi16?(__m128i _A, __m128i _Count);
  • __m128i _mm_slli_epi32?(__m128i _A, int _Count);
  • __m128i _mm_sll_epi32?(__m128i _A, __m128i _Count);
  • __m128i _mm_slli_epi64?(__m128i _A, int _Count);
  • __m128i _mm_sll_epi64?(__m128i _A, __m128i _Count);
  • __m128i _mm_srai_epi16?(__m128i _A, int _Count);
  • __m128i _mm_sra_epi16?(__m128i _A, __m128i _Count);
  • __m128i _mm_srai_epi32?(__m128i _A, int _Count);
  • __m128i _mm_sra_epi32?(__m128i _A, __m128i _Count);
  • __m128i _mm_srli_si128?(__m128i _A, int _Imm);
  • __m128i _mm_srli_epi16?(__m128i _A, int _Count);
  • __m128i _mm_srl_epi16?(__m128i _A, __m128i _Count);
  • __m128i _mm_srli_epi32?(__m128i _A, int _Count);
  • __m128i _mm_srl_epi32?(__m128i _A, __m128i _Count);
  • __m128i _mm_srli_epi64?(__m128i _A, int _Count);
  • __m128i _mm_srl_epi64?(__m128i _A, __m128i _Count);

Integer, comparisons

  • __m128i _mm_cmpeq_epi8?(__m128i _A, __m128i _B);
  • __m128i _mm_cmpeq_epi16?(__m128i _A, __m128i _B);
  • __m128i _mm_cmpeq_epi32?(__m128i _A, __m128i _B);
  • __m128i _mm_cmpgt_epi8?(__m128i _A, __m128i _B);
  • __m128i _mm_cmpgt_epi16?(__m128i _A, __m128i _B);
  • __m128i _mm_cmpgt_epi32?(__m128i _A, __m128i _B);
  • __m128i _mm_cmplt_epi8?(__m128i _A, __m128i _B);
  • __m128i _mm_cmplt_epi16?(__m128i _A, __m128i _B);
  • __m128i _mm_cmplt_epi32?(__m128i _A, __m128i _B);

Integer, converts

  • __m128i _mm_cvtsi32_si128?(int _A);
  • int _mm_cvtsi128_si32?(__m128i _A);

Integer, misc

Integer, loads

Integer, sets

  • __m128i _mm_set_epi64?(__m64 _Q1, __m64 _Q0);
  • __m128i _mm_set_epi32?(int _I3, int _I2, int _I1, int _I0);
  • __m128i _mm_set_epi16?(short _W7, short _W6, short _W5, short _W4, short _W3, short _W2, short _W1, short _W0);
  • __m128i _mm_set_epi8?(char _B15, char _B14, char _B13, char _B12, char _B11, char _B10, char _B9, char _B8, char _B7, char _B6, char _B5, char _B4, char _B3, char _B2, char _B1, char _B0);
  • __m128i _mm_set1_epi64?(__m64 _Q);
  • __m128i _mm_set1_epi32?(int _I);
  • __m128i _mm_set1_epi16?(short _W);
  • __m128i _mm_set1_epi8?(char _B);
  • __m128i _mm_setl_epi64?(__m128i _Q);
  • __m128i _mm_setr_epi64?(__m64 _Q0, __m64 _Q1);
  • __m128i _mm_setr_epi32?(int _I0, int _I1, int _I2, int _I3);
  • __m128i _mm_setr_epi16?(short _W0, short _W1, short _W2, short _W3, short _W4, short _W5, short _W6, short _W7);
  • __m128i _mm_setr_epi8?(char _B15, char _B14, char _B13, char _B12, char _B11, char _B10, char _B9, char _B8, char _B7, char _B6, char _B5, char _B4, char _B3, char _B2, char _B1, char _B0);
  • __m128i _mm_setzero_si128?(void);

Integer, stores

  • void _mm_store_si128(__m128i *_P, __m128i _B);
  • void _mm_storeu_si128?(__m128i *_P, __m128i _B);
  • void _mm_storel_epi64?(__m128i *_P, __m128i _Q);
  • void _mm_maskmoveu_si128?(__m128i _D, __m128i _N, char *_P);

Integer, moves

  • __m128i _mm_move_epi64?(__m128i _Q);
  • __m128i _mm_movpi64_epi64?(__m64 _Q);
  • __m64 _mm_movepi64_pi64?(__m128i _Q);

Cacheability support

  • void _mm_stream_pd?(double *_Dp, __m128d _A);
  • void _mm_stream_si128?(__m128i *_P, __m128i _A);
  • void _mm_clflush?(void const*_P);
  • void _mm_lfence?(void);
  • void _mm_mfence?(void);
  • void _mm_stream_si32?(int *_P, int _I);
  • void _mm_pause?(void);

New convert to float

Support for casting between various SP, DP, INT vector types.

  • Note that these do no conversion of values, they just change the type.

Support for 64-bit extension intrinsics

  • __int64? _mm_cvtsd_si64?(__m128d);
  • __int64? _mm_cvttsd_si64?(__m128d);
  • __m128d _mm_cvtsi64_sd?(__m128d, __int64);
  • __m128i _mm_cvtsi64_si128?(__int64);
  • __int64? _mm_cvtsi128_si64?(__m128i);

関連