libosmocore 1.10.0.27-aec32
Osmocom core library
conv_acc_sse_impl.h File Reference

Accelerated Viterbi decoder implementation: Actual definitions which are being included from both conv_acc_sse.c and conv_acc_sse_avx.c. More...

Go to the source code of this file.

Macros

#define __always_inline   inline __attribute__((always_inline))
 
#define SSE_BUTTERFLY(M0, M1, M2, M3, M4)
 
#define _I8_SHUFFLE_MASK   15, 14, 11, 10, 7, 6, 3, 2, 13, 12, 9, 8, 5, 4, 1, 0
 
#define SSE_DEINTERLEAVE_K5(M0, M1, M2, M3)
 
#define SSE_DEINTERLEAVE_K7(M0, M1, M2, M3, M4, M5, M6, M7, M8, M9, M10, M11, M12, M13, M14, M15)
 
#define SSE_BRANCH_METRIC_N2(M0, M1, M2, M3, M4, M6, M7)
 
#define SSE_BRANCH_METRIC_N4(M0, M1, M2, M3, M4, M5)
 
#define SSE_MINPOS(M0, M1)
 
#define SSE_NORMALIZE_K5(M0, M1, M2, M3)
 
#define SSE_NORMALIZE_K7(M0, M1, M2, M3, M4, M5, M6, M7, M8, M9, M10, M11)
 

Functions

static __always_inline void _sse_metrics_k5_n2 (const int16_t *val, const int16_t *out, int16_t *sums, int16_t *paths, int norm)
 
static __always_inline void _sse_metrics_k5_n4 (const int16_t *val, const int16_t *out, int16_t *sums, int16_t *paths, int norm)
 
static __always_inline void _sse_metrics_k7_n2 (const int16_t *val, const int16_t *out, int16_t *sums, int16_t *paths, int norm)
 
static __always_inline void _sse_metrics_k7_n4 (const int16_t *val, const int16_t *out, int16_t *sums, int16_t *paths, int norm)
 

Variables

int sse41_supported
 

Detailed Description

Accelerated Viterbi decoder implementation: Actual definitions which are being included from both conv_acc_sse.c and conv_acc_sse_avx.c.

Macro Definition Documentation

◆ __always_inline

#define __always_inline   inline __attribute__((always_inline))

◆ _I8_SHUFFLE_MASK

#define _I8_SHUFFLE_MASK   15, 14, 11, 10, 7, 6, 3, 2, 13, 12, 9, 8, 5, 4, 1, 0

◆ SSE_BRANCH_METRIC_N2

#define SSE_BRANCH_METRIC_N2 (   M0,
  M1,
  M2,
  M3,
  M4,
  M6,
  M7 
)
Value:
{ \
M0 = _mm_sign_epi16(M4, M0); \
M1 = _mm_sign_epi16(M4, M1); \
M2 = _mm_sign_epi16(M4, M2); \
M3 = _mm_sign_epi16(M4, M3); \
M6 = _mm_hadds_epi16(M0, M1); \
M7 = _mm_hadds_epi16(M2, M3); \
}

◆ SSE_BRANCH_METRIC_N4

#define SSE_BRANCH_METRIC_N4 (   M0,
  M1,
  M2,
  M3,
  M4,
  M5 
)
Value:
{ \
M0 = _mm_sign_epi16(M4, M0); \
M1 = _mm_sign_epi16(M4, M1); \
M2 = _mm_sign_epi16(M4, M2); \
M3 = _mm_sign_epi16(M4, M3); \
M0 = _mm_hadds_epi16(M0, M1); \
M1 = _mm_hadds_epi16(M2, M3); \
M5 = _mm_hadds_epi16(M0, M1); \
}

◆ SSE_BUTTERFLY

#define SSE_BUTTERFLY (   M0,
  M1,
  M2,
  M3,
  M4 
)
Value:
{ \
M3 = _mm_adds_epi16(M0, M2); \
M4 = _mm_subs_epi16(M1, M2); \
M0 = _mm_subs_epi16(M0, M2); \
M1 = _mm_adds_epi16(M1, M2); \
M2 = _mm_max_epi16(M3, M4); \
M3 = _mm_or_si128(_mm_cmpgt_epi16(M3, M4), _mm_cmpeq_epi16(M3, M4)); \
M4 = _mm_max_epi16(M0, M1); \
M1 = _mm_or_si128(_mm_cmpgt_epi16(M0, M1), _mm_cmpeq_epi16(M0, M1)); \
}

◆ SSE_DEINTERLEAVE_K5

#define SSE_DEINTERLEAVE_K5 (   M0,
  M1,
  M2,
  M3 
)
Value:
{ \
M2 = _mm_set_epi8(_I8_SHUFFLE_MASK); \
M0 = _mm_shuffle_epi8(M0, M2); \
M1 = _mm_shuffle_epi8(M1, M2); \
M2 = _mm_unpacklo_epi64(M0, M1); \
M3 = _mm_unpackhi_epi64(M0, M1); \
}
#define _I8_SHUFFLE_MASK
Definition: conv_acc_sse_impl.h:74

◆ SSE_DEINTERLEAVE_K7

#define SSE_DEINTERLEAVE_K7 (   M0,
  M1,
  M2,
  M3,
  M4,
  M5,
  M6,
  M7,
  M8,
  M9,
  M10,
  M11,
  M12,
  M13,
  M14,
  M15 
)
Value:
{ \
M8 = _mm_set_epi8(_I8_SHUFFLE_MASK); \
M0 = _mm_shuffle_epi8(M0, M8); \
M1 = _mm_shuffle_epi8(M1, M8); \
M2 = _mm_shuffle_epi8(M2, M8); \
M3 = _mm_shuffle_epi8(M3, M8); \
M4 = _mm_shuffle_epi8(M4, M8); \
M5 = _mm_shuffle_epi8(M5, M8); \
M6 = _mm_shuffle_epi8(M6, M8); \
M7 = _mm_shuffle_epi8(M7, M8); \
M8 = _mm_unpacklo_epi64(M0, M1); \
M9 = _mm_unpackhi_epi64(M0, M1); \
M10 = _mm_unpacklo_epi64(M2, M3); \
M11 = _mm_unpackhi_epi64(M2, M3); \
M12 = _mm_unpacklo_epi64(M4, M5); \
M13 = _mm_unpackhi_epi64(M4, M5); \
M14 = _mm_unpacklo_epi64(M6, M7); \
M15 = _mm_unpackhi_epi64(M6, M7); \
}

◆ SSE_MINPOS

#define SSE_MINPOS (   M0,
  M1 
)
Value:
{ \
M1 = _mm_shuffle_epi32(M0, _MM_SHUFFLE(0, 0, 3, 2)); \
M0 = _mm_min_epi16(M0, M1); \
M1 = _mm_shufflelo_epi16(M0, _MM_SHUFFLE(0, 0, 3, 2)); \
M0 = _mm_min_epi16(M0, M1); \
M1 = _mm_shufflelo_epi16(M0, _MM_SHUFFLE(0, 0, 0, 1)); \
M0 = _mm_min_epi16(M0, M1); \
}

◆ SSE_NORMALIZE_K5

#define SSE_NORMALIZE_K5 (   M0,
  M1,
  M2,
  M3 
)
Value:
{ \
M2 = _mm_min_epi16(M0, M1); \
SSE_MINPOS(M2, M3) \
SSE_BROADCAST(M2) \
M0 = _mm_subs_epi16(M0, M2); \
M1 = _mm_subs_epi16(M1, M2); \
}

◆ SSE_NORMALIZE_K7

#define SSE_NORMALIZE_K7 (   M0,
  M1,
  M2,
  M3,
  M4,
  M5,
  M6,
  M7,
  M8,
  M9,
  M10,
  M11 
)
Value:
{ \
M8 = _mm_min_epi16(M0, M1); \
M9 = _mm_min_epi16(M2, M3); \
M10 = _mm_min_epi16(M4, M5); \
M11 = _mm_min_epi16(M6, M7); \
M8 = _mm_min_epi16(M8, M9); \
M10 = _mm_min_epi16(M10, M11); \
M8 = _mm_min_epi16(M8, M10); \
SSE_MINPOS(M8, M9) \
SSE_BROADCAST(M8) \
M0 = _mm_subs_epi16(M0, M8); \
M1 = _mm_subs_epi16(M1, M8); \
M2 = _mm_subs_epi16(M2, M8); \
M3 = _mm_subs_epi16(M3, M8); \
M4 = _mm_subs_epi16(M4, M8); \
M5 = _mm_subs_epi16(M5, M8); \
M6 = _mm_subs_epi16(M6, M8); \
M7 = _mm_subs_epi16(M7, M8); \
}

Function Documentation

◆ _sse_metrics_k5_n2()

static __always_inline void _sse_metrics_k5_n2 ( const int16_t *  val,
const int16_t *  out,
int16_t *  sums,
int16_t *  paths,
int  norm 
)
static

◆ _sse_metrics_k5_n4()

static __always_inline void _sse_metrics_k5_n4 ( const int16_t *  val,
const int16_t *  out,
int16_t *  sums,
int16_t *  paths,
int  norm 
)
static

◆ _sse_metrics_k7_n2()

static __always_inline void _sse_metrics_k7_n2 ( const int16_t *  val,
const int16_t *  out,
int16_t *  sums,
int16_t *  paths,
int  norm 
)
static

◆ _sse_metrics_k7_n4()

static __always_inline void _sse_metrics_k7_n4 ( const int16_t *  val,
const int16_t *  out,
int16_t *  sums,
int16_t *  paths,
int  norm 
)
static

Variable Documentation

◆ sse41_supported

int sse41_supported
extern

Referenced by osmo_conv_init().