libosmocore 1.9.0.196-9975
Osmocom core library
conv_acc_neon_impl.h File Reference

Accelerated Viterbi decoder implementation: straight port of SSE to NEON based on Tom Tsous work. More...

Go to the source code of this file.

Macros

#define __always_inline   inline __attribute__((always_inline))
 
#define NEON_BUTTERFLY(M0, M1, M2, M3, M4)
 
#define NEON_DEINTERLEAVE_K5(M0, M1, M2, M3)
 
#define NEON_DEINTERLEAVE_K7(M0, M1, M2, M3, M4, M5, M6, M7, M8, M9, M10, M11, M12, M13, M14, M15)
 
#define NEON_BRANCH_METRIC_N2(M0, M1, M2, M3, M4, M6, M7)
 
#define NEON_BRANCH_METRIC_N4(M0, M1, M2, M3, M4, M5)
 
#define NEON_NORMALIZE_K5(M0, M1, M2, M3)
 
#define NEON_NORMALIZE_K7(M0, M1, M2, M3, M4, M5, M6, M7, M8, M9, M10, M11)
 

Functions

__always_inline void _neon_metrics_k5_n2 (const int16_t *val, const int16_t *outa, int16_t *sumsa, int16_t *paths, int norm)
 
__always_inline void _neon_metrics_k5_n4 (const int16_t *val, const int16_t *outa, int16_t *sumsa, int16_t *paths, int norm)
 
static __always_inline void _neon_metrics_k7_n2 (const int16_t *val, const int16_t *outa, int16_t *sumsa, int16_t *paths, int norm)
 
static __always_inline void _neon_metrics_k7_n4 (const int16_t *val, const int16_t *outa, int16_t *sumsa, int16_t *paths, int norm)
 

Detailed Description

Accelerated Viterbi decoder implementation: straight port of SSE to NEON based on Tom Tsous work.

Macro Definition Documentation

◆ __always_inline

#define __always_inline   inline __attribute__((always_inline))

◆ NEON_BRANCH_METRIC_N2

#define NEON_BRANCH_METRIC_N2 (   M0,
  M1,
  M2,
  M3,
  M4,
  M6,
  M7 
)
Value:
{ \
M0 = vmulq_s16(M4, M0); \
M1 = vmulq_s16(M4, M1); \
M2 = vmulq_s16(M4, M2); \
M3 = vmulq_s16(M4, M3); \
M6 = vcombine_s16(vpadd_s16(vget_low_s16(M0), vget_high_s16(M0)), vpadd_s16(vget_low_s16(M1), vget_high_s16(M1))); \
M7 = vcombine_s16(vpadd_s16(vget_low_s16(M2), vget_high_s16(M2)), vpadd_s16(vget_low_s16(M3), vget_high_s16(M3))); \
}

◆ NEON_BRANCH_METRIC_N4

#define NEON_BRANCH_METRIC_N4 (   M0,
  M1,
  M2,
  M3,
  M4,
  M5 
)
Value:
{ \
M0 = vmulq_s16(M4, M0); \
M1 = vmulq_s16(M4, M1); \
M2 = vmulq_s16(M4, M2); \
M3 = vmulq_s16(M4, M3); \
int16x4_t t1 = vpadd_s16(vpadd_s16(vget_low_s16(M0), vget_high_s16(M0)), vpadd_s16(vget_low_s16(M1), vget_high_s16(M1))); \
int16x4_t t2 = vpadd_s16(vpadd_s16(vget_low_s16(M2), vget_high_s16(M2)), vpadd_s16(vget_low_s16(M3), vget_high_s16(M3))); \
M5 = vcombine_s16(t1, t2); \
}

◆ NEON_BUTTERFLY

#define NEON_BUTTERFLY (   M0,
  M1,
  M2,
  M3,
  M4 
)
Value:
{ \
M3 = vqaddq_s16(M0, M2); \
M4 = vqsubq_s16(M1, M2); \
M0 = vqsubq_s16(M0, M2); \
M1 = vqaddq_s16(M1, M2); \
M2 = vmaxq_s16(M3, M4); \
M3 = vreinterpretq_s16_u16(vcgtq_s16(M3, M4)); \
M4 = vmaxq_s16(M0, M1); \
M1 = vreinterpretq_s16_u16(vcgtq_s16(M0, M1)); \
}

◆ NEON_DEINTERLEAVE_K5

#define NEON_DEINTERLEAVE_K5 (   M0,
  M1,
  M2,
  M3 
)
Value:
{ \
int16x8x2_t tmp; \
tmp = vuzpq_s16(M0, M1); \
M2 = tmp.val[0]; \
M3 = tmp.val[1]; \
}

◆ NEON_DEINTERLEAVE_K7

#define NEON_DEINTERLEAVE_K7 (   M0,
  M1,
  M2,
  M3,
  M4,
  M5,
  M6,
  M7,
  M8,
  M9,
  M10,
  M11,
  M12,
  M13,
  M14,
  M15 
)
Value:
{ \
int16x8x2_t tmp; \
tmp = vuzpq_s16(M0, M1); \
M8 = tmp.val[0]; M9 = tmp.val[1]; \
tmp = vuzpq_s16(M2, M3); \
M10 = tmp.val[0]; M11 = tmp.val[1]; \
tmp = vuzpq_s16(M4, M5); \
M12 = tmp.val[0]; M13 = tmp.val[1]; \
tmp = vuzpq_s16(M6, M7); \
M14 = tmp.val[0]; M15 = tmp.val[1]; \
}

◆ NEON_NORMALIZE_K5

#define NEON_NORMALIZE_K5 (   M0,
  M1,
  M2,
  M3 
)
Value:
{ \
M2 = vminq_s16(M0, M1); \
int16x4_t t = vpmin_s16(vget_low_s16(M2), vget_high_s16(M2)); \
t = vpmin_s16(t, t); \
t = vpmin_s16(t, t); \
M2 = vdupq_lane_s16(t, 0); \
M0 = vqsubq_s16(M0, M2); \
M1 = vqsubq_s16(M1, M2); \
}

◆ NEON_NORMALIZE_K7

#define NEON_NORMALIZE_K7 (   M0,
  M1,
  M2,
  M3,
  M4,
  M5,
  M6,
  M7,
  M8,
  M9,
  M10,
  M11 
)
Value:
{ \
M8 = vminq_s16(M0, M1); \
M9 = vminq_s16(M2, M3); \
M10 = vminq_s16(M4, M5); \
M11 = vminq_s16(M6, M7); \
M8 = vminq_s16(M8, M9); \
M10 = vminq_s16(M10, M11); \
M8 = vminq_s16(M8, M10); \
int16x4_t t = vpmin_s16(vget_low_s16(M8), vget_high_s16(M8)); \
t = vpmin_s16(t, t); \
t = vpmin_s16(t, t); \
M8 = vdupq_lane_s16(t, 0); \
M0 = vqsubq_s16(M0, M8); \
M1 = vqsubq_s16(M1, M8); \
M2 = vqsubq_s16(M2, M8); \
M3 = vqsubq_s16(M3, M8); \
M4 = vqsubq_s16(M4, M8); \
M5 = vqsubq_s16(M5, M8); \
M6 = vqsubq_s16(M6, M8); \
M7 = vqsubq_s16(M7, M8); \
}

Function Documentation

◆ _neon_metrics_k5_n2()

__always_inline void _neon_metrics_k5_n2 ( const int16_t *  val,
const int16_t *  outa,
int16_t *  sumsa,
int16_t *  paths,
int  norm 
)

◆ _neon_metrics_k5_n4()

__always_inline void _neon_metrics_k5_n4 ( const int16_t *  val,
const int16_t *  outa,
int16_t *  sumsa,
int16_t *  paths,
int  norm 
)

◆ _neon_metrics_k7_n2()

static __always_inline void _neon_metrics_k7_n2 ( const int16_t *  val,
const int16_t *  outa,
int16_t *  sumsa,
int16_t *  paths,
int  norm 
)
static

◆ _neon_metrics_k7_n4()

static __always_inline void _neon_metrics_k7_n4 ( const int16_t *  val,
const int16_t *  outa,
int16_t *  sumsa,
int16_t *  paths,
int  norm 
)
static