From fc298f595aa2cc4e5faf99cb8deddb7089c684bf Mon Sep 17 00:00:00 2001 From: rodiazet Date: Mon, 30 Sep 2024 16:01:30 +0200 Subject: [PATCH] eof: Support `DATALOADN` (EIP-7480) --- libevmasm/Assembly.cpp | 44 ++++++++++++++++++- libevmasm/Assembly.h | 5 +++ libevmasm/AssemblyItem.cpp | 7 +++ libevmasm/AssemblyItem.h | 4 ++ libevmasm/Instruction.cpp | 2 + libevmasm/Instruction.h | 1 + liblangutil/EVMVersion.cpp | 3 ++ libyul/backends/evm/AbstractAssembly.h | 3 ++ libyul/backends/evm/EVMDialect.cpp | 20 +++++++++ libyul/backends/evm/EthAssemblyAdapter.cpp | 5 +++ libyul/backends/evm/EthAssemblyAdapter.h | 2 + libyul/backends/evm/NoOutputAssembly.cpp | 5 +++ libyul/backends/evm/NoOutputAssembly.h | 2 + .../strict_asm_eof_dataloadn_prague/args | 1 + .../strict_asm_eof_dataloadn_prague/input.yul | 9 ++++ .../strict_asm_eof_dataloadn_prague/output | 27 ++++++++++++ .../EVMInstructionInterpreter.cpp | 2 + 17 files changed, 141 insertions(+), 1 deletion(-) create mode 100644 test/cmdlineTests/strict_asm_eof_dataloadn_prague/args create mode 100644 test/cmdlineTests/strict_asm_eof_dataloadn_prague/input.yul create mode 100644 test/cmdlineTests/strict_asm_eof_dataloadn_prague/output diff --git a/libevmasm/Assembly.cpp b/libevmasm/Assembly.cpp index 00496a06c415..11c3f18dbe1a 100644 --- a/libevmasm/Assembly.cpp +++ b/libevmasm/Assembly.cpp @@ -717,6 +717,11 @@ AssemblyItem Assembly::newImmutableAssignment(std::string const& _identifier) return AssemblyItem{AssignImmutable, h}; } +AssemblyItem Assembly::newAuxDataLoadN(size_t _offset) +{ + return AssemblyItem{AuxDataLoadN, _offset}; +} + Assembly& Assembly::optimise(OptimiserSettings const& _settings) { optimiseInternal(_settings, {}); @@ -1348,6 +1353,23 @@ std::map Assembly::findReferencedContainers() const return replacements; } +std::optional Assembly::findMaxAuxDataLoadNOffset() const +{ + std::optional result = std::nullopt; + for (auto&& codeSection: m_codeSections) + for (AssemblyItem const& i: codeSection.items) + if (i.type() == AuxDataLoadN) + { + solAssert(i.data() <= std::numeric_limits::max(), "Invalid auxdataloadn index value."); + auto const offset = static_cast(i.data()); + if (!result.has_value() || offset > result.value()) + result = offset; + + } + + return result; +} + LinkerObject const& Assembly::assembleEOF() const { solAssert(m_eofVersion.has_value() && m_eofVersion == 1); @@ -1362,11 +1384,14 @@ LinkerObject const& Assembly::assembleEOF() const "Expected the first code section to have zero inputs and be non-returning." ); + auto const maxAuxDataLoadNOffset = findMaxAuxDataLoadNOffset(); + // Insert EOF1 header. auto [headerBytecode, codeSectionSizeOffsets, dataSectionSizeOffset] = createEOFHeader(referencedSubIds); ret.bytecode = headerBytecode; m_tagPositionsInBytecode = std::vector(m_usedTags, std::numeric_limits::max()); + std::map dataSectionRef; for (auto&& [codeSectionIndex, codeSection]: m_codeSections | ranges::views::enumerate) { @@ -1380,6 +1405,7 @@ LinkerObject const& Assembly::assembleEOF() const switch (item.type()) { case Operation: + solAssert(item.instruction() != Instruction::DATALOADN); solAssert(!(item.instruction() >= Instruction::PUSH0 && item.instruction() <= Instruction::PUSH32)); ret.bytecode += assembleOperation(item); break; @@ -1402,6 +1428,14 @@ LinkerObject const& Assembly::assembleEOF() const case Tag: ret.bytecode += assembleTag(item, ret.bytecode.size(), false); break; + case AuxDataLoadN: + { + assertThrow(item.data() <= std::numeric_limits::max(), AssemblyException, "Invalid auxdataloadn position."); + ret.bytecode.push_back(uint8_t(Instruction::DATALOADN)); + dataSectionRef[ret.bytecode.size()] = static_cast(item.data()); + appendBigEndianUint16(ret.bytecode, item.data()); + break; + } default: solThrow(InvalidOpcode, "Unexpected opcode while assembling."); } @@ -1423,7 +1457,15 @@ LinkerObject const& Assembly::assembleEOF() const ret.bytecode += m_auxiliaryData; - auto dataLength = ret.bytecode.size() - dataStart; + auto appendedDataAndAuxDataSize = ret.bytecode.size() - dataStart; + + // If some data was already added to data section we need to update data section refs accordigly + if (appendedDataAndAuxDataSize > 0) + for (auto [pos, val] : dataSectionRef) + setBigEndian(ret.bytecode, pos, 2, val + appendedDataAndAuxDataSize); + + auto dataLength = appendedDataAndAuxDataSize + (maxAuxDataLoadNOffset.has_value() ? (maxAuxDataLoadNOffset.value() + 32u) : 0u); + setBigEndianUint16(ret.bytecode, dataSectionSizeOffset, dataLength); return ret; diff --git a/libevmasm/Assembly.h b/libevmasm/Assembly.h index de1ac20f2015..38c1d52eeeb9 100644 --- a/libevmasm/Assembly.h +++ b/libevmasm/Assembly.h @@ -79,6 +79,7 @@ class Assembly AssemblyItem newPushLibraryAddress(std::string const& _identifier); AssemblyItem newPushImmutable(std::string const& _identifier); AssemblyItem newImmutableAssignment(std::string const& _identifier); + AssemblyItem newAuxDataLoadN(size_t offset); AssemblyItem const& append(AssemblyItem _i); AssemblyItem const& append(bytes const& _data) { return append(newData(_data)); } @@ -91,6 +92,7 @@ class Assembly void appendLibraryAddress(std::string const& _identifier) { append(newPushLibraryAddress(_identifier)); } void appendImmutable(std::string const& _identifier) { append(newPushImmutable(_identifier)); } void appendImmutableAssignment(std::string const& _identifier) { append(newImmutableAssignment(_identifier)); } + void appendAuxDataLoadN(size_t offset) { append(newAuxDataLoadN(offset));} void appendVerbatim(bytes _data, size_t _arguments, size_t _returnVariables) { @@ -240,6 +242,9 @@ class Assembly /// Returns map from m_subs to an index of subcontainer in the final EOF bytecode std::map findReferencedContainers() const; + /// Returns max AuxDataLoadN offset for the assembly. + /// TODO: Can and should be merged with findReferencedContainers to avoid additional run + std::optional findMaxAuxDataLoadNOffset() const; /// Assemble bytecode for AssemblyItem type. [[nodiscard]] bytes assembleOperation(AssemblyItem const& _item) const; diff --git a/libevmasm/AssemblyItem.cpp b/libevmasm/AssemblyItem.cpp index 1bc805780145..b69c8a58ab92 100644 --- a/libevmasm/AssemblyItem.cpp +++ b/libevmasm/AssemblyItem.cpp @@ -161,6 +161,8 @@ size_t AssemblyItem::bytesRequired(size_t _addressLength, langutil::EVMVersion _ } case VerbatimBytecode: return std::get<2>(*m_verbatimBytecode).size(); + case AuxDataLoadN: + return 1 + 2; default: break; } @@ -203,6 +205,8 @@ size_t AssemblyItem::returnValues() const return 0; case VerbatimBytecode: return std::get<1>(*m_verbatimBytecode); + case AuxDataLoadN: + return 1; default: break; } @@ -327,6 +331,9 @@ std::string AssemblyItem::toAssemblyText(Assembly const& _assembly) const case VerbatimBytecode: text = std::string("verbatimbytecode_") + util::toHex(std::get<2>(*m_verbatimBytecode)); break; + case AuxDataLoadN: + text = "auxdataloadn(" + std::to_string(static_cast(data())) + ")"; + break; default: assertThrow(false, InvalidOpcode, ""); } diff --git a/libevmasm/AssemblyItem.h b/libevmasm/AssemblyItem.h index f06c72bac2ff..226a126c368c 100644 --- a/libevmasm/AssemblyItem.h +++ b/libevmasm/AssemblyItem.h @@ -51,6 +51,10 @@ enum AssemblyItemType PushDeployTimeAddress, ///< Push an address to be filled at deploy time. Should not be touched by the optimizer. PushImmutable, ///< Push the currently unknown value of an immutable variable. The actual value will be filled in by the constructor. AssignImmutable, ///< Assigns the current value on the stack to an immutable variable. Only valid during creation code. + + /// Loads 32 bytes from static auxiliary data of EOF data section. The offset does *not* have to be always from the beginning + /// of the data EOF section. More details here: https://github.com/ipsilon/eof/blob/main/spec/eof.md#data-section-lifecycle + AuxDataLoadN, VerbatimBytecode ///< Contains data that is inserted into the bytecode code section without modification. }; diff --git a/libevmasm/Instruction.cpp b/libevmasm/Instruction.cpp index 943f34ee4c1f..b9bb21227c29 100644 --- a/libevmasm/Instruction.cpp +++ b/libevmasm/Instruction.cpp @@ -168,6 +168,7 @@ std::map const solidity::evmasm::c_instructions = { "LOG2", Instruction::LOG2 }, { "LOG3", Instruction::LOG3 }, { "LOG4", Instruction::LOG4 }, + { "DATALOADN", Instruction::DATALOADN }, { "CREATE", Instruction::CREATE }, { "CALL", Instruction::CALL }, { "CALLCODE", Instruction::CALLCODE }, @@ -252,6 +253,7 @@ static std::map const c_instructionInfo = {Instruction::MSIZE, {"MSIZE", 0, 0, 1, false, Tier::Base}}, {Instruction::GAS, {"GAS", 0, 0, 1, false, Tier::Base}}, {Instruction::JUMPDEST, {"JUMPDEST", 0, 0, 0, true, Tier::Special}}, + {Instruction::DATALOADN, {"DATALOADN", 2, 0, 1, true, Tier::Low}}, {Instruction::PUSH0, {"PUSH0", 0, 0, 1, false, Tier::Base}}, {Instruction::PUSH1, {"PUSH1", 1, 0, 1, false, Tier::VeryLow}}, {Instruction::PUSH2, {"PUSH2", 2, 0, 1, false, Tier::VeryLow}}, diff --git a/libevmasm/Instruction.h b/libevmasm/Instruction.h index a3629eec2db8..2ed773cc9e7b 100644 --- a/libevmasm/Instruction.h +++ b/libevmasm/Instruction.h @@ -182,6 +182,7 @@ enum class Instruction: uint8_t LOG3, ///< Makes a log entry; 3 topics. LOG4, ///< Makes a log entry; 4 topics. + DATALOADN = 0xd1, ///< load data from EOF data section CREATE = 0xf0, ///< create a new account with associated code CALL, ///< message-call into an account CALLCODE, ///< message-call with another account's code only diff --git a/liblangutil/EVMVersion.cpp b/liblangutil/EVMVersion.cpp index 4245938f4e88..d76b4bbaffaa 100644 --- a/liblangutil/EVMVersion.cpp +++ b/liblangutil/EVMVersion.cpp @@ -76,6 +76,9 @@ bool EVMVersion::hasOpcode(Instruction _opcode, std::optional _eofVersi case Instruction::EXTCODECOPY: case Instruction::GAS: return !_eofVersion.has_value(); + // Instructions below available only in EOF + case Instruction::DATALOADN: + return _eofVersion.has_value() && this->m_version >= prague(); default: return true; } diff --git a/libyul/backends/evm/AbstractAssembly.h b/libyul/backends/evm/AbstractAssembly.h index fe45008962c8..ed477446e8ef 100644 --- a/libyul/backends/evm/AbstractAssembly.h +++ b/libyul/backends/evm/AbstractAssembly.h @@ -112,6 +112,9 @@ class AbstractAssembly /// Appends an assignment to an immutable variable. virtual void appendImmutableAssignment(std::string const& _identifier) = 0; + /// Appends 32 bytes data load from EOF data section in dataOffset pos + virtual void appendAuxDataLoadN(size_t _dataOffset) = 0; + /// Appends data to the very end of the bytecode. Repeated calls concatenate. virtual void appendToAuxiliaryData(bytes const& _data) = 0; diff --git a/libyul/backends/evm/EVMDialect.cpp b/libyul/backends/evm/EVMDialect.cpp index 0beec06b51ea..39001a105895 100644 --- a/libyul/backends/evm/EVMDialect.cpp +++ b/libyul/backends/evm/EVMDialect.cpp @@ -199,6 +199,7 @@ std::map createBuiltins(langutil::EVMVersion _ev opcode != evmasm::Instruction::JUMP && opcode != evmasm::Instruction::JUMPI && opcode != evmasm::Instruction::JUMPDEST && + opcode != evmasm::Instruction::DATALOADN && _evmVersion.hasOpcode(opcode, _eofVersion) && !prevRandaoException(name) ) @@ -342,6 +343,25 @@ std::map createBuiltins(langutil::EVMVersion _ev _assembly.appendImmutable(formatLiteral(std::get(_call.arguments.front()))); } )); + + if (_eofVersion.has_value()) + { + builtins.emplace(createFunction( + "auxdataloadn", + 1, + 1, + SideEffects{}, + {LiteralKind::String}, + []( + FunctionCall const& _call, + AbstractAssembly& _assembly, + BuiltinContext& + ) { + yulAssert(_call.arguments.size() == 1, ""); + _assembly.appendAuxDataLoadN(std::stoul(formatLiteral(std::get(_call.arguments.front())))); + } + )); + } } return builtins; } diff --git a/libyul/backends/evm/EthAssemblyAdapter.cpp b/libyul/backends/evm/EthAssemblyAdapter.cpp index e3a2e01e1b1f..de235b3f725f 100644 --- a/libyul/backends/evm/EthAssemblyAdapter.cpp +++ b/libyul/backends/evm/EthAssemblyAdapter.cpp @@ -175,6 +175,11 @@ void EthAssemblyAdapter::appendImmutableAssignment(std::string const& _identifie m_assembly.appendImmutableAssignment(_identifier); } +void EthAssemblyAdapter::appendAuxDataLoadN(size_t _dataOffset) +{ + m_assembly.appendAuxDataLoadN(_dataOffset); +} + void EthAssemblyAdapter::markAsInvalid() { m_assembly.markAsInvalid(); diff --git a/libyul/backends/evm/EthAssemblyAdapter.h b/libyul/backends/evm/EthAssemblyAdapter.h index 011081dedaac..6960f1f6bb8e 100644 --- a/libyul/backends/evm/EthAssemblyAdapter.h +++ b/libyul/backends/evm/EthAssemblyAdapter.h @@ -65,6 +65,8 @@ class EthAssemblyAdapter: public AbstractAssembly void appendImmutable(std::string const& _identifier) override; void appendImmutableAssignment(std::string const& _identifier) override; + void appendAuxDataLoadN(size_t dataOffset) override; + void markAsInvalid() override; langutil::EVMVersion evmVersion() const override; diff --git a/libyul/backends/evm/NoOutputAssembly.cpp b/libyul/backends/evm/NoOutputAssembly.cpp index c444a71435c0..5a12d0c2799d 100644 --- a/libyul/backends/evm/NoOutputAssembly.cpp +++ b/libyul/backends/evm/NoOutputAssembly.cpp @@ -129,6 +129,11 @@ void NoOutputAssembly::appendImmutableAssignment(std::string const&) yulAssert(false, "setimmutable not implemented."); } +void NoOutputAssembly::appendAuxDataLoadN(size_t) +{ + yulAssert(false, "auxdataloadn not implemented."); +} + NoOutputEVMDialect::NoOutputEVMDialect(EVMDialect const& _copyFrom): EVMDialect(_copyFrom.evmVersion(), _copyFrom.eofVersion(), _copyFrom.providesObjectAccess()) { diff --git a/libyul/backends/evm/NoOutputAssembly.h b/libyul/backends/evm/NoOutputAssembly.h index 8d7dda0bb50c..3aa7c60a4666 100644 --- a/libyul/backends/evm/NoOutputAssembly.h +++ b/libyul/backends/evm/NoOutputAssembly.h @@ -75,6 +75,8 @@ class NoOutputAssembly: public AbstractAssembly void appendImmutable(std::string const& _identifier) override; void appendImmutableAssignment(std::string const& _identifier) override; + void appendAuxDataLoadN(size_t) override; + void markAsInvalid() override {} langutil::EVMVersion evmVersion() const override { return m_evmVersion; } diff --git a/test/cmdlineTests/strict_asm_eof_dataloadn_prague/args b/test/cmdlineTests/strict_asm_eof_dataloadn_prague/args new file mode 100644 index 000000000000..0078e6e68e36 --- /dev/null +++ b/test/cmdlineTests/strict_asm_eof_dataloadn_prague/args @@ -0,0 +1 @@ + --strict-assembly --experimental-eof-version 1 --evm-version prague --asm --ir-optimized --bin --debug-info none diff --git a/test/cmdlineTests/strict_asm_eof_dataloadn_prague/input.yul b/test/cmdlineTests/strict_asm_eof_dataloadn_prague/input.yul new file mode 100644 index 000000000000..7068838225fd --- /dev/null +++ b/test/cmdlineTests/strict_asm_eof_dataloadn_prague/input.yul @@ -0,0 +1,9 @@ +object "a" { + code { + mstore(0, auxdataloadn("0")) + return(0, 32) + } + + data "data1" "Hello, World!" +} + diff --git a/test/cmdlineTests/strict_asm_eof_dataloadn_prague/output b/test/cmdlineTests/strict_asm_eof_dataloadn_prague/output new file mode 100644 index 000000000000..1e296aba70a3 --- /dev/null +++ b/test/cmdlineTests/strict_asm_eof_dataloadn_prague/output @@ -0,0 +1,27 @@ + +======= strict_asm_eof_dataloadn_prague/input.yul (EVM) ======= + +Pretty printed source: +object "a" { + code { + { + mstore(0, auxdataloadn("0")) + return(0, 32) + } + } + data "data1" hex"48656c6c6f2c20576f726c6421" +} + + +Binary representation: +ef0001010004020001000904002d000080ffffd1000d5f5260205ff348656c6c6f2c20576f726c6421 + +Text representation: + auxdataloadn(0) + 0x00 + mstore + 0x20 + 0x00 + return +stop +data_acaf3289d7b601cbd114fb36c4d29c85bbfd5e133f14cb355c3fd8d99367964f 48656c6c6f2c20576f726c6421 diff --git a/test/tools/yulInterpreter/EVMInstructionInterpreter.cpp b/test/tools/yulInterpreter/EVMInstructionInterpreter.cpp index d2cbdb11b360..2bb5594f46f8 100644 --- a/test/tools/yulInterpreter/EVMInstructionInterpreter.cpp +++ b/test/tools/yulInterpreter/EVMInstructionInterpreter.cpp @@ -487,6 +487,8 @@ u256 EVMInstructionInterpreter::eval( case Instruction::SWAP14: case Instruction::SWAP15: case Instruction::SWAP16: + // TODO: Not sure about it. + case Instruction::DATALOADN: { yulAssert(false, ""); return 0;