Fork me on GitHub
pikachu's Blog

Function Selector and Argument Encoding

前言

  • 最近看了一些奇怪的东西,触及到了知识盲区,就水了一篇博客记录一下
  • 写的也不是很专业,只是按照个人理解,有错误的话欢迎及时交流 👀
  • 参考了 👉 官方文档 👈

Function Selector

  • 定义就不介绍了,不太清楚的可自行百度,直接来看例子(一看就懂,就不解释了)
    • 参数包含结构体,相当于把结构体拆分成单个参数,只不过这些参数用 () 引起来
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
pragma solidity >=0.4.16 <0.9.0;
pragma experimental ABIEncoderV2;

contract Demo {
struct Test {
string name;
string policies;
uint num;
}

uint public x;
function test1(bytes3) public {x = 1;}
function test2(bytes3[2] memory) public { x = 1; }
function test3(uint32 x, bool y) public { x = 1; }
function test4(uint, uint32[] memory, bytes10, bytes memory) public { x = 1; }
function test5(uint, Test memory test) public { x = 1; }
function test6(uint, Test[] memory tests) public { x = 1; }
function test7(uint[][] memory,string[] memory) public { x = 1; }
}

/* 函数选择器
{
"0d2032f1": "test1(bytes3)",
"2b231dad": "test2(bytes3[2])",
"92e92919": "test3(uint32,bool)",
"4d189ce2": "test4(uint256,uint32[],bytes10,bytes)",
"4ca373dc": "test5(uint256,(string,string,uint256))",
"ccc5bdd2": "test6(uint256,(string,string,uint256)[])",
"cc80bc65": "test7(uint256[][],string[])",
"0c55699c": "x()"
}
*/

Function Selector and Argument Encoding

  • 动态类型的数据,比如动态数组,结构体,变长字节,其编码后存储其 offsetlengthdata

    • 先把参数顺序存储:如果是定长数据类型,直接存储其 data,如果是变长数据类型,先存储其 offset
    • 顺序遍历变长数据:先存储 offset,对于第一个变长数据,先存储其 offset = 0x20 * number ( number 是函数参数的个数 );对于下一个变长数据,其 offset = offset_of_prev + 0x20 + 0x20 * number (第一个 0x20 是存储前一个变长数据的长度占用的大小,number 是前一个变长数据的元素个数)
    • 顺序遍历变长数据:存储完 offset ,接着就是遍历每个变长数据,分别存储其 lengthdata
    • ( ps: 对于结构体这样的类型,存储的时候可把结构体内元素看成是一个新函数的参数,这样的话,对于结构体中的第一个变长数据,其 offset = 0x20 * numnum 是结构体元素的个数 )
  • 对于上述的合约例子,其函数调用最终编码如下

  1. test1("0x112233")
1
2
0x0d2032f1                                                             // function selector
0 - 0x1122330000000000000000000000000000000000000000000000000000000000 // data of first parameter
  1. test2(["0x112233", "0x445566"])
1
2
3
0x2b231dad                                                             // function selector
0 - 0x1122330000000000000000000000000000000000000000000000000000000000 // first data of first parameter
1 - 0x4455660000000000000000000000000000000000000000000000000000000000 // second data of first parameter
  1. test3(0x123, 1)
1
2
3
0x92e92919                                                             // function selector
0 - 0x0000000000000000000000000000000000000000000000000000000000000123 // data of first parameter
1 - 0x0000000000000000000000000000000000000000000000000000000000000001 // data of second parameter
  1. test4(0x123, ["0x11221122", "0x33443344"], "0x31323334353637383930", "0x3132333435")
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
0x4d189ce2                                                             // function selector
0 - 0x0000000000000000000000000000000000000000000000000000000000000123 // data of first parameter
1 - 0x0000000000000000000000000000000000000000000000000000000000000080 // offset of second parameter
2 - 0x3132333435363738393000000000000000000000000000000000000000000000 // data of third parameter
3 - 0x00000000000000000000000000000000000000000000000000000000000000e0 // offset of forth parameter
4 - 0x0000000000000000000000000000000000000000000000000000000000000002 // length of second parameter
5 - 0x0000000000000000000000000000000000000000000000000000000011221122 // first data of second parameter
6 - 0x0000000000000000000000000000000000000000000000000000000033443344 // second data of second parameter
7 - 0x0000000000000000000000000000000000000000000000000000000000000005 // length of forth parameter
8 - 0x3132333435000000000000000000000000000000000000000000000000000000 // data of forth parameter

/* 一些解释说明
data of first parameter: uint定长类型,直接存储其data
offset of second parameter: uint32[]动态数组,先存储其offset=0x20*4 (4代表函数参数的个数)
data of third parameter: bytes10定长类型,直接存储其data
offset of forth parameter: bytes变长类型,先存储其offset=0x80+0x20*3=0xe0 (0x80是前一个变长类型的offset,3是前一个变长类型存储其长度和两个元素占用的插槽个数)
length of second parameter: 存储完data或者offset后,便开始存储变长数据的length和data,这里是第二个参数的长度
first data of second parameter: 第二个参数的第一个数据
second data of second parameter: 第二个参数的第二个数据
length of forth parameter: 上面就把第二个变长数据存储完成,这里就是存储下一个变长数据的长度
data of forth parameter: 第四个参数的数据
*/
  1. test5(0x123, ["cxy", "pika", 123])
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
0x4ca373dc                                                             // function selector
0 - 0x0000000000000000000000000000000000000000000000000000000000000123 // data of first parameter
1 - 0x0000000000000000000000000000000000000000000000000000000000000040 // offset of second parameter
2 - 0x0000000000000000000000000000000000000000000000000000000000000060 // first data offset of second parameter
3 - 0x00000000000000000000000000000000000000000000000000000000000000a0 // second data offset of second parameter
4 - 0x000000000000000000000000000000000000000000000000000000000000007b // third data of second parameter
5 - 0x0000000000000000000000000000000000000000000000000000000000000003 // first data length of second parameter
6 - 0x6378790000000000000000000000000000000000000000000000000000000000 // first data of second parameter
7 - 0x0000000000000000000000000000000000000000000000000000000000000004 // second data length of second parameter
8 - 0x70696b6100000000000000000000000000000000000000000000000000000000 // second data of second parameter

/* 一些解释说明
data of first parameter: uint定长类型,直接存储其data
offset of second parameter: 结构体,先存储其offset=0x20*2 (2代表函数参数的个数)
first data offset of second parameter: 结构体内元素可当成函数参数拆分,有三个元素,因第一个元素是string类型,所以先存储其offset=0x20*3=0x60
second data offset of second parameter: 结构体第二个元素是string类型,先存储其offset=0x60+0x20+0x20=0xa0 (第一个0x20是存储第一个string的长度所占大小,第二个0x20是存储第一个string的数据所占大小)
third data of second parameter: 结构体第三个元素是uint定长类型,直接存储其data
first data length of second parameter: 存储结构体第一个元素的length
first data of second parameter: 存储结构体第一个元素的data
second data length of second parameter: 存储结构体第二个元素的length
second data of second parameter: 存储结构体第二个元素的data
*/
  1. test6(0x123, [["cxy1", "pika1", 123], ["cxy2", "pika2", 456]])
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
由于是结构体数组,所以需要拆分,由内向外。内部是两个结构体,分别来看其encoding

对于["cxy1", "pika1", 123]结构体,其encoding如下(直接当成函数参数encoding)
0 - 0x0000000000000000000000000000000000000000000000000000000000000060 // offset of "cxy1"
1 - 0x00000000000000000000000000000000000000000000000000000000000000a0 // offset of "pika1"
2 - 0x000000000000000000000000000000000000000000000000000000000000007b // encoding of 123
3 - 0x0000000000000000000000000000000000000000000000000000000000000004 // length of "cxy1"
4 - 0x6378793100000000000000000000000000000000000000000000000000000000 // encoding of "cxy1"
5 - 0x0000000000000000000000000000000000000000000000000000000000000005 // length of "pika1"
6 - 0x70696b6131000000000000000000000000000000000000000000000000000000 // encoding of "pika1"

对于["cxy2", "pika2", 456]结构体,其encoding如下(直接当成函数参数encoding)
0 - 0x0000000000000000000000000000000000000000000000000000000000000060 // offset of "cxy2"
1 - 0x00000000000000000000000000000000000000000000000000000000000000a0 // offset of "pika2"
2 - 0x00000000000000000000000000000000000000000000000000000000000001c8 // encoding of 456
3 - 0x0000000000000000000000000000000000000000000000000000000000000004 // length of "cxy2"
4 - 0x6378793200000000000000000000000000000000000000000000000000000000 // encoding of "cxy2"
5 - 0x0000000000000000000000000000000000000000000000000000000000000005 // length of "pika2"
6 - 0x70696b6132000000000000000000000000000000000000000000000000000000 // encoding of "pika2"

由于是结构体,所以还需要["cxy1", "pika1", 123]的offset和["cxy2", "pika2", 456]的offset,如下
0 - a // offset of ["cxy1", "pika1", 123]
1 - b // offset of ["cxy2", "pika2", 456]
2 - 0x0000000000000000000000000000000000000000000000000000000000000060 // offset of "cxy1"
3 - 0x00000000000000000000000000000000000000000000000000000000000000a0 // offset of "pika1"
4 - 0x000000000000000000000000000000000000000000000000000000000000007b // encoding of 123
5 - 0x0000000000000000000000000000000000000000000000000000000000000004 // length of "cxy1"
6 - 0x6378793100000000000000000000000000000000000000000000000000000000 // encoding of "cxy1"
7 - 0x0000000000000000000000000000000000000000000000000000000000000005 // length of "pika1"
8 - 0x70696b6131000000000000000000000000000000000000000000000000000000 // encoding of "pika1"
9 - 0x0000000000000000000000000000000000000000000000000000000000000060 // offset of "cxy2"
10- 0x00000000000000000000000000000000000000000000000000000000000000a0 // offset of "pika2"
11- 0x00000000000000000000000000000000000000000000000000000000000001c8 // encoding of 456
12- 0x0000000000000000000000000000000000000000000000000000000000000004 // length of "cxy2"
13- 0x6378793200000000000000000000000000000000000000000000000000000000 // encoding of "cxy2"
14- 0x0000000000000000000000000000000000000000000000000000000000000005 // length of "pika2"
15- 0x70696b6132000000000000000000000000000000000000000000000000000000 // encoding of "pika2"
a指向offset of "cxy1",所以a=0x20*2=0x40
b指向offset of "cxy2",所以b=0x20*9=0x120

由于是结构体数组,结构体外面是数组,所以要按照动态数组encoding的方法,如下
0 - c // offset of [["cxy1", "pika1", 123], ["cxy2", "pika2", 456]]
1 - 0x0000000000000000000000000000000000000000000000000000000000000002 // count of second parameter
2 - 0x0000000000000000000000000000000000000000000000000000000000000040 // offset of ["cxy1", "pika1", "1"]
3 - 0x0000000000000000000000000000000000000000000000000000000000000120 // offset of ["cxy2", "pika2", "1"]
4 - 0x0000000000000000000000000000000000000000000000000000000000000060 // offset of "cxy1"
5 - 0x00000000000000000000000000000000000000000000000000000000000000a0 // offset of "pika1"
6 - 0x000000000000000000000000000000000000000000000000000000000000007b // encoding of 123
7 - 0x0000000000000000000000000000000000000000000000000000000000000004 // length of "cxy1"
8 - 0x6378793100000000000000000000000000000000000000000000000000000000 // encoding of "cxy1"
9 - 0x0000000000000000000000000000000000000000000000000000000000000005 // length of "pika1"
10- 0x70696b6131000000000000000000000000000000000000000000000000000000 // encoding of "pika1"
11- 0x0000000000000000000000000000000000000000000000000000000000000060 // offset of "cxy2"
12- 0x00000000000000000000000000000000000000000000000000000000000000a0 // offset of "pika2"
13- 0x00000000000000000000000000000000000000000000000000000000000001c8 // encoding of 456
14- 0x0000000000000000000000000000000000000000000000000000000000000004 // length of "cxy2"
15- 0x6378793200000000000000000000000000000000000000000000000000000000 // encoding of "cxy2"
16- 0x0000000000000000000000000000000000000000000000000000000000000005 // length of "pika2"
17- 0x70696b6132000000000000000000000000000000000000000000000000000000 // encoding of "pika2"
c是函数参数的第二个参数,是动态类型,所以offset c = 0x20*2 = 0x40

所以总的encoding如下
0xccc5bdd2 // function selector
0 - 0x0000000000000000000000000000000000000000000000000000000000000123 // encoding of 0x123
1 - 0x0000000000000000000000000000000000000000000000000000000000000040 // offset of second parameter
2 - 0x0000000000000000000000000000000000000000000000000000000000000002 // count of second parameter
3 - 0x0000000000000000000000000000000000000000000000000000000000000040 // offset of ["cxy1", "pika1", "1"]
4 - 0x0000000000000000000000000000000000000000000000000000000000000120 // offset of ["cxy2", "pika2", "1"]
5 - 0x0000000000000000000000000000000000000000000000000000000000000060 // offset of "cxy1"
6 - 0x00000000000000000000000000000000000000000000000000000000000000a0 // offset of "pika1"
7 - 0x000000000000000000000000000000000000000000000000000000000000007b // encoding of 123
8 - 0x0000000000000000000000000000000000000000000000000000000000000004 // length of "cxy1"
9 - 0x6378793100000000000000000000000000000000000000000000000000000000 // encoding of "cxy1"
10- 0x0000000000000000000000000000000000000000000000000000000000000005 // length of "pika1"
11- 0x70696b6131000000000000000000000000000000000000000000000000000000 // encoding of "pika1"
12- 0x0000000000000000000000000000000000000000000000000000000000000060 // offset of "cxy2"
13- 0x00000000000000000000000000000000000000000000000000000000000000a0 // offset of "pika2"
14- 0x00000000000000000000000000000000000000000000000000000000000001c8 // encoding of 456
15- 0x0000000000000000000000000000000000000000000000000000000000000004 // length of "cxy2"
16- 0x6378793200000000000000000000000000000000000000000000000000000000 // encoding of "cxy2"
17- 0x0000000000000000000000000000000000000000000000000000000000000005 // length of "pika2"
18- 0x70696b6132000000000000000000000000000000000000000000000000000000 // encoding of "pika2"
  1. test7([[1, 2], [3]], ["one", "two", "three"])
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
同理进行由内向外的拆分,首先是[[1, 2], [3]]动态数组中的[1, 2]和[3]两个动态数组
0 - a // offset of [1, 2]
1 - b // offset of [3]
2 - 0x0000000000000000000000000000000000000000000000000000000000000002 // count for [1, 2]
3 - 0x0000000000000000000000000000000000000000000000000000000000000001 // encoding of 1
4 - 0x0000000000000000000000000000000000000000000000000000000000000002 // encoding of 2
5 - 0x0000000000000000000000000000000000000000000000000000000000000001 // count for [3]
6 - 0x0000000000000000000000000000000000000000000000000000000000000003 // encoding of 3
a指向[1, 2]的开始,所以a=0x20*2=0x40
b指向[3]的开始,所以b=0x20*5=0xa0

然后是[[1, 2], [3]]动态数组本身的encoding
0 - c // offset of [[1, 2], [3]]
1 - 0x0000000000000000000000000000000000000000000000000000000000000002 // count for [[1, 2], [3]]
2 - 0x0000000000000000000000000000000000000000000000000000000000000040 // offset of [1, 2]
3 - 0x00000000000000000000000000000000000000000000000000000000000000a0 // offset of [3]
4 - 0x0000000000000000000000000000000000000000000000000000000000000002 // count for [1, 2]
5 - 0x0000000000000000000000000000000000000000000000000000000000000001 // encoding of 1
6 - 0x0000000000000000000000000000000000000000000000000000000000000002 // encoding of 2
7 - 0x0000000000000000000000000000000000000000000000000000000000000001 // count for [3]
8 - 0x0000000000000000000000000000000000000000000000000000000000000003 // encoding of 3
c指向[[1, 2], [3]]的开始,所以a=0x20*2=0x40

其次是["one", "two", "three"]动态数组中每个string的encoding
0 - d // offset for "one"
1 - e // offset for "two"
2 - f // offset for "three"
3 - 0x0000000000000000000000000000000000000000000000000000000000000003 // count for "one"
4 - 0x6f6e650000000000000000000000000000000000000000000000000000000000 // encoding of "one"
5 - 0x0000000000000000000000000000000000000000000000000000000000000003 // count for "two"
6 - 0x74776f0000000000000000000000000000000000000000000000000000000000 // encoding of "two"
7 - 0x0000000000000000000000000000000000000000000000000000000000000005 // count for "three"
8 - 0x7468726565000000000000000000000000000000000000000000000000000000 // encoding of "three"
d指向“one”的开始,所以d=0x20*3=0x60
e指向“two”的开始,所以e=0x20*5=0xa0
f指向“three”的开始,所以f=0x20*7=0xe0

然后是["one", "two", "three"]动态数组本身的encoding
0 - g // offset of ["one", "two", "three"]
1 - 0x0000000000000000000000000000000000000000000000000000000000000003 // count for ["one", "two", "three"]
2 - 0x0000000000000000000000000000000000000000000000000000000000000060 // offset for "one"
3 - 0x00000000000000000000000000000000000000000000000000000000000000a0 // offset for "two"
4 - 0x00000000000000000000000000000000000000000000000000000000000000e0 // offset for "three"
5 - 0x0000000000000000000000000000000000000000000000000000000000000003 // count for "one"
6 - 0x6f6e650000000000000000000000000000000000000000000000000000000000 // encoding of "one"
7 - 0x0000000000000000000000000000000000000000000000000000000000000003 // count for "two"
8 - 0x74776f0000000000000000000000000000000000000000000000000000000000 // encoding of "two"
9 - 0x0000000000000000000000000000000000000000000000000000000000000005 // count for "three"
10- 0x7468726565000000000000000000000000000000000000000000000000000000 // encoding of "three"
这里g先不进行计算,因为涉及到函数参数整体的一个encoding

上面就已经把最后就是[[1, 2], [3]]和["one", "two", "three"]分析完毕,最后就是其作为一个整体进行encoding
0 - 0x0000000000000000000000000000000000000000000000000000000000000040 // offset of [[1, 2], [3]]
1 - g // offset of ["one", "two", "three"]
2 - 0x0000000000000000000000000000000000000000000000000000000000000002 // count for [[1, 2], [3]]
3 - 0x0000000000000000000000000000000000000000000000000000000000000040 // offset of [1, 2]
4 - 0x00000000000000000000000000000000000000000000000000000000000000a0 // offset of [3]
5 - 0x0000000000000000000000000000000000000000000000000000000000000002 // count for [1, 2]
6 - 0x0000000000000000000000000000000000000000000000000000000000000001 // encoding of 1
7 - 0x0000000000000000000000000000000000000000000000000000000000000002 // encoding of 2
8 - 0x0000000000000000000000000000000000000000000000000000000000000001 // count for [3]
9 - 0x0000000000000000000000000000000000000000000000000000000000000003 // encoding of 3
10- 0x0000000000000000000000000000000000000000000000000000000000000003 // count for ["one", "two", "three"]
11- 0x0000000000000000000000000000000000000000000000000000000000000060 // offset for "one"
12- 0x00000000000000000000000000000000000000000000000000000000000000a0 // offset for "two"
13- 0x00000000000000000000000000000000000000000000000000000000000000e0 // offset for "three"
14- 0x0000000000000000000000000000000000000000000000000000000000000003 // count for "one"
15- 0x6f6e650000000000000000000000000000000000000000000000000000000000 // encoding of "one"
16- 0x0000000000000000000000000000000000000000000000000000000000000003 // count for "two"
17- 0x74776f0000000000000000000000000000000000000000000000000000000000 // encoding of "two"
18- 0x0000000000000000000000000000000000000000000000000000000000000005 // count for "three"
19- 0x7468726565000000000000000000000000000000000000000000000000000000 // encoding of "three"
g指向字符串数组的开始,所以g=0x20*10=140

所以总的selector+encoding如下所示
0xcc80bc65 // function selector
0 - 0x0000000000000000000000000000000000000000000000000000000000000040 // offset of [[1, 2], [3]]
1 - 0x0000000000000000000000000000000000000000000000000000000000000140 // offset of ["one", "two", "three"]
2 - 0x0000000000000000000000000000000000000000000000000000000000000002 // count for [[1, 2], [3]]
3 - 0x0000000000000000000000000000000000000000000000000000000000000040 // offset of [1, 2]
4 - 0x00000000000000000000000000000000000000000000000000000000000000a0 // offset of [3]
5 - 0x0000000000000000000000000000000000000000000000000000000000000002 // count for [1, 2]
6 - 0x0000000000000000000000000000000000000000000000000000000000000001 // encoding of 1
7 - 0x0000000000000000000000000000000000000000000000000000000000000002 // encoding of 2
8 - 0x0000000000000000000000000000000000000000000000000000000000000001 // count for [3]
9 - 0x0000000000000000000000000000000000000000000000000000000000000003 // encoding of 3
10- 0x0000000000000000000000000000000000000000000000000000000000000003 // count for ["one", "two", "three"]
11- 0x0000000000000000000000000000000000000000000000000000000000000060 // offset for "one"
12- 0x00000000000000000000000000000000000000000000000000000000000000a0 // offset for "two"
13- 0x00000000000000000000000000000000000000000000000000000000000000e0 // offset for "three"
14- 0x0000000000000000000000000000000000000000000000000000000000000003 // count for "one"
15- 0x6f6e650000000000000000000000000000000000000000000000000000000000 // encoding of "one"
16- 0x0000000000000000000000000000000000000000000000000000000000000003 // count for "two"
17- 0x74776f0000000000000000000000000000000000000000000000000000000000 // encoding of "two"
18- 0x0000000000000000000000000000000000000000000000000000000000000005 // count for "three"
19- 0x7468726565000000000000000000000000000000000000000000000000000000 // encoding of "three"
---------------- The End ----------------
谢谢大爷~

Author:pikachu
Link:https://hitcxy.com/2021/argument-encoding/
Contact:hitcxy.cn@gmail.com
本文基于 知识共享署名-相同方式共享 4.0 国际许可协议发布
转载请注明出处,谢谢!