From 1c4c1a6f4d0e8ffab24419d136fbfe698a201d24 Mon Sep 17 00:00:00 2001 From: Chris Metcalf Date: Mon, 15 Sep 2014 20:02:50 -0400 Subject: tilegx: optimize string copy_byte() internal function We can use one "shufflebytes" instruction instead of 3 "bfins" instructions to optimize the string functions. --- sysdeps/tile/tilegx/string-endian.h | 11 +++++------ 1 file changed, 5 insertions(+), 6 deletions(-) (limited to 'sysdeps') diff --git a/sysdeps/tile/tilegx/string-endian.h b/sysdeps/tile/tilegx/string-endian.h index 0c4d51766d..47333891e0 100644 --- a/sysdeps/tile/tilegx/string-endian.h +++ b/sysdeps/tile/tilegx/string-endian.h @@ -36,12 +36,11 @@ #define REVCZ(x) __insn_ctz(x) #endif -/* Create eight copies of the byte in a uint64_t. */ +/* Create eight copies of the byte in a uint64_t. Byte Shuffle uses + the bytes of srcB as the index into the dest vector to select a + byte. With all indices of zero, the first byte is copied into all + the other bytes. */ static inline uint64_t copy_byte(uint8_t byte) { - uint64_t word = byte; - word = __insn_bfins(word, word, 8, 15); - word = __insn_bfins(word, word, 16, 31); - word = __insn_bfins(word, word, 32, 63); - return word; + return __insn_shufflebytes(byte, 0, 0); } -- cgit 1.4.1