Skip to content

Commit

Permalink
use tuple type for load size factor
Browse files Browse the repository at this point in the history
  • Loading branch information
saucecontrol committed Nov 3, 2024
1 parent 972cd2a commit 600f0d7
Show file tree
Hide file tree
Showing 3 changed files with 41 additions and 37 deletions.
4 changes: 2 additions & 2 deletions src/coreclr/jit/emitxarch.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -3142,7 +3142,7 @@ const static insTupleType insTupleTypeInfos[] =
// Return Value:
// true if this instruction has tuple type info.
//
inline bool hasTupleTypeInfo(instruction ins)
inline bool emitter::hasTupleTypeInfo(instruction ins)
{
assert((unsigned)ins < ArrLen(insTupleTypeInfos));
return (insTupleTypeInfos[ins] != INS_TT_NONE);
Expand All @@ -3157,7 +3157,7 @@ inline bool hasTupleTypeInfo(instruction ins)
// Return Value:
// the tuple type info for a given CPU instruction.
//
insTupleType emitter::insTupleTypeInfo(instruction ins) const
insTupleType emitter::insTupleTypeInfo(instruction ins)
{
assert((unsigned)ins < ArrLen(insTupleTypeInfos));
return insTupleTypeInfos[ins];
Expand Down
3 changes: 2 additions & 1 deletion src/coreclr/jit/emitxarch.h
Original file line number Diff line number Diff line change
Expand Up @@ -200,7 +200,8 @@ code_t AddVexPrefixIfNeededAndNotPresent(instruction ins, code_t code, emitAttr
return code;
}

insTupleType insTupleTypeInfo(instruction ins) const;
static bool hasTupleTypeInfo(instruction ins);
static insTupleType insTupleTypeInfo(instruction ins);

//------------------------------------------------------------------------
// HasKMaskRegisterDest: Temporary check to identify instructions that can
Expand Down
71 changes: 37 additions & 34 deletions src/coreclr/jit/lowerxarch.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -9212,59 +9212,62 @@ bool Lowering::IsContainableHWIntrinsicOp(GenTreeHWIntrinsic* parentNode, GenTre
}

// These are widening instructions. A load can be contained if the source is large enough
// after taking into account the multiplier of source to target element size. Most of these
// double the width, but a few instruction forms have higher multipliers (ex: PMOVZXBQ)
// after taking into account the multiplier of source to target element size.

const unsigned sizeof_baseType = genTypeSize(parentNode->GetSimdBaseType());
unsigned widenFactor = 2;
const var_types parentBaseType = parentNode->GetSimdBaseType();
const instruction parentInstruction = HWIntrinsicInfo::lookupIns(parentIntrinsicId, parentBaseType);

switch (parentIntrinsicId)
assert(emitter::hasTupleTypeInfo(parentInstruction));

const insTupleType tupleType = emitter::insTupleTypeInfo(parentInstruction);
const unsigned parentSize = parentNode->GetSimdSize();
unsigned widenFactor = 0;

switch (tupleType)
{
case NI_SSE41_ConvertToVector128Int32:
case NI_AVX2_ConvertToVector256Int32:
case NI_AVX512F_ConvertToVector512Int32:
case NI_AVX512F_ConvertToVector512UInt32:
case INS_TT_FULL:
case INS_TT_FULL_MEM:
{
if (sizeof_baseType == 1)
{
widenFactor = 4;
}
widenFactor = 1;
break;
}

case NI_SSE41_ConvertToVector128Int64:
case NI_AVX2_ConvertToVector256Int64:
case NI_AVX512F_ConvertToVector512Int64:
case NI_AVX512F_ConvertToVector512UInt64:
case INS_TT_HALF:
case INS_TT_HALF_MEM:
{
if (sizeof_baseType == 1)
{
widenFactor = 8;
}
else if (sizeof_baseType == 2)
{
widenFactor = 4;
}
widenFactor = 2;
break;
}
case INS_TT_QUARTER_MEM:
{
widenFactor = 4;
break;
}
case INS_TT_EIGHTH_MEM:
{
widenFactor = 8;
break;
}
case INS_TT_MOVDDUP:
{
widenFactor = parentSize == 16 ? 2 : 1;
break;
}

default:
{
unreached();
break;
}
}

const unsigned expectedSize = parentSize / widenFactor;
const unsigned operandSize = genTypeSize(childNode->TypeGet());
const unsigned expectedSize = genTypeSize(parentNode->TypeGet()) / widenFactor;

if (expectedSize < 16)
{
// If we need less than a full vector:
// * In MinOpts, we should never contain aligned loads because they will not
// fault on hardware not supporting VEX encoding as a full vector load would.
// * We can always contain an unaligned load of sufficient size because there are
// no alignment requirements below vector size.
// * We can contain a SIMD scalar load, provided the element type is large enough.
// We do not need to consider alignment for non-VEX encoding here because we're
// loading less than a full vector.
//
// We can also contain a SIMD scalar load, provided the element type is large enough.

supportsAlignedSIMDLoads = !comp->opts.MinOpts();
supportsGeneralLoads = (operandSize >= expectedSize);
Expand Down

0 comments on commit 600f0d7

Please sign in to comment.