32
32
.globl ulp_stack_helper
33
33
.type ulp_stack_helper, @function
34
34
35
+ # __tls_get_addr
36
+ .globl __tls_get_addr
37
+ .type __tls_get_addr, @function
38
+
35
39
.section ".text"
36
40
.align 2
37
41
.p2align 4 ,,15
40
44
.globl trampoline_routine
41
45
.type trampoline_routine, @function
42
46
trampoline_routine:
43
- .cfi_startproc
47
+ .cfi_startproc
44
48
45
49
# Concatenate two registers from prologue.
46
50
rldimi %r6, %r5, 32 , 0
47
51
48
52
# Move the target function ptr to control register so we can free r6.
49
53
mtctr %r6
50
54
51
- # Load the ulp_stack into r5 through r13 (thread local storage ptr)
52
- addis %r5, %r13 , ulp_stack@tprel@ha
53
- addi %r5, %r5, ulp_stack@tprel@l
55
+ # Save all volatile registers
56
+ # r5 & r6 are designated temp regs, having data already on stack.
57
+ # After return from expand_ulp_stack, both regs construct values
58
+ # before use.
59
+ std %r2, -24 (%r1)
60
+ std %r3, -32 (%r1)
61
+ std %r12 , -40 (%r1)
62
+ mflr %r2
63
+ std %r2, -48 (%r1)
64
+
65
+ # Move stack register
66
+ addi %r1, %r1, -(48 + 32 + 8 ) # 32 + 8 for padding
67
+
68
+ # Fix TOC. %r12 must be pointing to the address of trampoline_routine.
69
+ addis %r2,%r12 , .TOC.-trampoline_routine@ha
70
+ addi %r2,%r2 , .TOC.-trampoline_routine@l
71
+
72
+ # Load ulp_stack
73
+ addis %r3, %r2, ulp_stack@got@tlsgd@ha
74
+ addi %r3, %r3, ulp_stack@got@tlsgd@l
54
75
55
- # Load real_size
56
- ld %r6, ULP_STACK_REAL_SIZE(%r5) # Load real_size (allocated by mmap )
57
- ld %r5, ULP_STACK_USED_SIZE(%r5) # Load used_size (currently in use)
76
+ # Get address of ulp_stack
77
+ bl __tls_get_addr(ulp_stack@tlsgd )
78
+ nop
58
79
59
- # Check if we have space.
60
- cmpd %cr0 , %r6, %r5
80
+ # Load ulp_stack attributes
81
+ ld %r6, ULP_STACK_REAL_SIZE(%r3) # Load real_size (allocated by mmap)
82
+ ld %r5, ULP_STACK_USED_SIZE(%r3) # Load used_size
83
+
84
+ # Check if we have space
85
+ cmpd %cr0 , %r6, %r5
61
86
ble %cr0 , .Lexpand_ulp_stack
62
87
63
88
.Lcontinue_ulp_prologue:
64
89
65
- # Reload the ulp_stack into r5 through r13 (thread local storage ptr)
66
- addis %r5, %r13 , ulp_stack@tprel@ha
67
- addi %r5, %r5, ulp_stack@tprel@l
90
+ # Here we must ensure that %r3 points to ulp_stack. If we are here from
91
+ # the .Lexpand_ulp_stack, then r3 will point to it because
92
+ # ulp_stack_helper returned it.
68
93
69
94
# Load used_size
70
- ld %r6, ULP_STACK_USED_SIZE(%r5 )
95
+ ld %r6, ULP_STACK_USED_SIZE(%r3 )
71
96
72
97
# Update top_of_stack in the struct field.
73
98
addi %r6, %r6, 16
74
- std %r6, ULP_STACK_USED_SIZE(%r5 ) # Store new used size value.
99
+ std %r6, ULP_STACK_USED_SIZE(%r3 ) # Store new used size value.
75
100
76
101
# Load stack ptr
77
- ld %r5, ULP_STACK_PTR(%r5 )
102
+ ld %r5, ULP_STACK_PTR(%r3 )
78
103
79
104
# Store TOC
80
- add %r5, %r5, %r6 # ulp_stack + used_size
105
+ add %r5, %r5, %r6 # ulp_stack_ptr + used_size
106
+
107
+ # Restore stack register.
108
+ addi %r1, %r1, (48 + 32 + 8 )
109
+
110
+ # Load original LR
111
+ ld %r2, -48 (%r1)
112
+ mtlr %r2
113
+
114
+ # Load original TOC
115
+ ld %r6, -24 (%r1)
81
116
82
117
# At this point, %r5 points to 16 bytes ahead of the slot where we shall
83
118
# save TOC. Hence we have to subtract 16 bytes of the storing location,
@@ -86,29 +121,55 @@ trampoline_routine:
86
121
# +----------------------------------v
87
122
# | TOC1 | LR1 || ... || _8b_ | _8b_ |
88
123
# +----------------------------------+
89
- std %r2, -16 (%r5) # store in *(ulp_stack + used_size - 16)
90
- mflr %r2
124
+ std %r6, -16 (%r5) # store in *(ulp_stack + used_size - 16)
91
125
std %r2, -8 (%r5) # store in *(ulp_stack + used_size - 8)
92
126
127
+
93
128
# Restore registers
94
- ld %r5, -8 (%r1) # Restore register.
95
- ld %r6, -16 (%r1) # Restore register.
129
+ ld %r5, -8 (%r1) # Restore register.
130
+ ld %r6, -16 (%r1) # Restore register.
131
+ #ld %r2, -24(%r1) # r2 was already loaded
132
+ ld %r3, -32 (%r1)
133
+ ld %r12 , -40 (%r1)
96
134
97
- # Jump to target function
135
+ # jump to target function
98
136
mfctr %r12
99
137
bctrl
100
138
101
- # Load the ulp_stack into r5 through r13 (thread local storage ptr)
102
- addis %r5, %r13 , ulp_stack@tprel@ha
103
- addi %r5, %r5, ulp_stack@tprel@l
139
+ # Save return registers and ones used by __get_tls_addr.
140
+ std %r3, -8 (%r1)
141
+ std %r12 , -16 (%r1)
142
+
143
+ # Move stack register
144
+ addi %r1, %r1, -(16 + 32 + 8 ) # 32 + 8 for padding
145
+
146
+ # Do a trick to load PC into LR register.
147
+ bl .return_to_caller
148
+ .return_to_caller:
149
+ mflr %r12
150
+
151
+ # Get the function address.
152
+ addi %r12 , %r12 , trampoline_routine - .return_to_caller
153
+
154
+ # Fix TOC. %r12 must be pointing to the address of trampoline_routine.
155
+ addis %r2,%r12 , .TOC.-trampoline_routine@ha
156
+ addi %r2,%r2 , .TOC.-trampoline_routine@l
157
+
158
+ # Load ulp_stack
159
+ addis %r3, %r2, ulp_stack@got@tlsgd@ha
160
+ addi %r3, %r3, ulp_stack@got@tlsgd@l
161
+
162
+ # Get address of ulp_stack
163
+ bl __tls_get_addr(ulp_stack@tlsgd)
164
+ nop
104
165
105
166
# Deference ulp_stack.
106
- ld %r6, ULP_STACK_USED_SIZE(%r5 )
167
+ ld %r6, ULP_STACK_USED_SIZE(%r3 )
107
168
addi %r6, %r6, -16 # Sub 16 bytes because the first entry stores the top of stack, and we need to store 2 longs.
108
- std %r6, ULP_STACK_USED_SIZE(%r5 ) # Store new used_size value.
169
+ std %r6, ULP_STACK_USED_SIZE(%r3 ) # Store new used_size value.
109
170
110
171
# Load ulp_stack ptr field.
111
- ld %r5, ULP_STACK_PTR(%r5 )
172
+ ld %r5, ULP_STACK_PTR(%r3 )
112
173
113
174
# Point to the top of stack but two, these two entries are popped in
114
175
# previous step and accessed in next step (stack size decremented before access).
@@ -119,73 +180,68 @@ trampoline_routine:
119
180
ld %r8 , 8 (%r5) # Restore LR
120
181
mtlr %r8 # Load LR
121
182
122
- # Return execution to caller.
183
+ # Restore used registers
184
+ addi %r1, %r1, (16 + 32 + 8 ) # 32 + 8 for padding
185
+ ld %r3, -8 (%r1)
186
+ ld %r12 , -16 (%r1)
187
+
188
+ # Return.
123
189
blr
124
190
125
191
.Lexpand_ulp_stack:
126
192
127
193
# Save all volatile registers
128
194
# r5 & r6 are designated temp regs, having data already on stack.
195
+ # r0, r2 & r12 is as well in this slow path.
129
196
# After return from expand_ulp_stack, both regs construct values
130
197
# before use.
131
- std %r2, -24 (%r1)
132
- std %r3, -32 (%r1)
133
- std %r4, -40 (%r1)
134
- std %r7, -48 (%r1)
135
- std %r8 , -56 (%r1)
136
- std %r9 , -64 (%r1)
137
- std %r10 , -72 (%r1)
138
- std %r11 , -80 (%r1)
139
- std %r12 , -88 (%r1)
140
- mfctr %r3
141
- std %r3, -96 (%r1)
142
- mflr %r3,
143
- std %r3, -104 (%r1)
144
-
145
- # As per ppc64le ABIv2, the minimum stack frame is of 32 bytes and
146
- # additional 8 bytes padding is needed for alignment in stack frame.
147
- # The regs stored in redzone must have this 32+8 bytes padding to form
148
- # auxiliary stack frame before calling ulp_stack_helper which will
149
- # have its own proper stack frame.
150
-
151
- # Move stack register
152
- addi %r1, %r1, -(104 + 32 + 8 ) # 32 + 8 for padding
153
-
154
- # Fix TOC. %r12 must be pointing to the address of trampoline_routine.
155
- addis %r2,%r12 , .TOC.-trampoline_routine@ha
156
- addi %r2,%r2 , .TOC.-trampoline_routine@l
198
+ std %r4, -8 (%r1)
199
+ std %r7, -16 (%r1)
200
+ std %r8 , -24 (%r1)
201
+ std %r9 , -32 (%r1)
202
+ std %r10 , -40 (%r1)
203
+ std %r11 , -48 (%r1)
204
+ mfctr %r4
205
+ std %r4, -56 (%r1)
206
+ mflr %r4,
207
+ std %r4, -64 (%r1)
208
+
209
+ # Setup stack frame
210
+ addi %r1, %r1, -(64 + 32 + 8 )
157
211
158
212
# Call C helper routine.
159
- bl ulp_stack_helper
213
+ bl ulp_stack_helper
160
214
nop
161
215
162
- # Restore stack register.
163
- addi %r1, %r1, (104 + 32 + 8 )
164
-
165
- # Restore registers
166
- ld %r3, -104 (%r1)
167
- mtlr %r3
168
- ld %r3, -96 (%r1)
169
- mtctr %r3
170
- ld %r12 , -88 (%r1)
171
- ld %r11 , -80 (%r1)
172
- ld %r10 , -72 (%r1)
173
- ld %r9 , -64 (%r1)
174
- ld %r8 , -56 (%r1)
175
- ld %r7, -48 (%r1)
176
- ld %r4, -40 (%r1)
177
- ld %r3, -32 (%r1)
178
- ld %r2, -24 (%r1)
179
-
216
+ # Restore stack frame
217
+ addi %r1, %r1, (64 + 32 + 8 )
218
+
219
+ # Load back registers.
220
+ ld %r7, -16 (%r1)
221
+ ld %r8 , -24 (%r1)
222
+ ld %r9 , -32 (%r1)
223
+ ld %r10 , -40 (%r1)
224
+ ld %r11 , -48 (%r1)
225
+ ld %r4, -56 (%r1)
226
+ mtctr %r4
227
+ ld %r4, -64 (%r1)
228
+ mtlr %r4
229
+ ld %r4, -8 (%r1)
230
+
231
+ # Continue execution
180
232
b .Lcontinue_ulp_prologue
181
233
182
234
.long 0
183
235
.byte 0 ,0 ,0 ,0 ,0 ,0 ,0 ,0
184
236
.cfi_endproc
185
237
.size trampoline_routine,.-trampoline_routine
186
238
187
- .globl ulp_prologue
188
- .type ulp_prologue, @function
239
+ # The following function needs to be placed in .data, as it is a template to be
240
+ # copied in the prologue of tha patched function. Placing this in .data avoids
241
+ # text relocations.
242
+ .section ".data"
243
+ .globl ulp_prologue
244
+ .type ulp_prologue, @function
189
245
ulp_prologue:
190
246
.cfi_startproc
191
247
std %r5, -8 (%r1) # Save one register used as function parameter
@@ -195,7 +251,7 @@ ulp_prologue:
195
251
lis %r5, trampoline_routine@highest #0x1122
196
252
ori %r5, %r5, trampoline_routine@higher #0x3344
197
253
lis %r12 , trampoline_routine@high #0x5566
198
- ori %r12 , %r12 , trampoline_routine@l #0x7788
254
+ ori %r12 , %r12 , trampoline_routine@l #0x7788
199
255
200
256
# Concatenate two registers
201
257
rldimi %r12 , %r5, 32 , 0
@@ -223,7 +279,7 @@ ulp_prologue_end = .
223
279
.size ulp_prologue,.-ulp_prologue
224
280
ulp_prologue_padding_end = .
225
281
226
- .section ".data "
282
+ .section ".rodata "
227
283
.align 2
228
284
.type ulp_prologue_size, @object
229
285
.size ulp_prologue_size, 4
0 commit comments