32
32
.globl ulp_stack_helper
33
33
.type ulp_stack_helper, @function
34
34
35
+ # __tls_get_addr
36
+ .globl __tls_get_addr
37
+ .type __tls_get_addr, @function
38
+
35
39
.section ".text"
36
40
.align 2
37
41
.p2align 4 ,,15
40
44
.globl trampoline_routine
41
45
.type trampoline_routine, @function
42
46
trampoline_routine:
43
- .cfi_startproc
47
+ .cfi_startproc
44
48
45
49
# Concatenate two registers from prologue.
46
50
rldimi %r6, %r5, 32 , 0
47
51
48
52
# Move the target function ptr to control register so we can free r6.
49
53
mtctr %r6
50
54
51
- # Load the ulp_stack into r5 through r13 (thread local storage ptr)
52
- addis %r5, %r13 , ulp_stack@tprel@ha
53
- addi %r5, %r5, ulp_stack@tprel@l
55
+ # Save all volatile registers
56
+ # r5 & r6 are designated temp regs, having data already on stack.
57
+ # After return from expand_ulp_stack, both regs construct values
58
+ # before use.
59
+ std %r0, -24 (%r1)
60
+ std %r2, -32 (%r1)
61
+ std %r3, -40 (%r1)
62
+ std %r12 , -48 (%r1)
63
+ mflr %r2
64
+ std %r2, -56 (%r1)
65
+
66
+ # Move stack register
67
+ addi %r1, %r1, -(56 + 32 + 8 ) # 32 + 8 for padding
54
68
55
- # Load real_size
56
- ld %r6, ULP_STACK_REAL_SIZE(%r5) # Load real_size (allocated by mmap)
57
- ld %r5, ULP_STACK_USED_SIZE(%r5) # Load used_size (currently in use)
69
+ # Fix TOC. %r12 must be pointing to the address of trampoline_routine.
70
+ addis %r2, %r12 , .TOC.-trampoline_routine@ha
71
+ addi %r2,%r2 , .TOC.-trampoline_routine@l
58
72
59
- # Check if we have space.
60
- cmpd %cr0 , %r6, %r5
73
+ # Load ulp_stack
74
+ addis %r3, %r2, ulp_stack@got@tlsgd@ha
75
+ addi %r3, %r3, ulp_stack@got@tlsgd@l
76
+
77
+ # Get address of ulp_stack
78
+ bl __tls_get_addr(ulp_stack@tlsgd)
79
+ nop
80
+
81
+ # Load ulp_stack attributes
82
+ ld %r6, ULP_STACK_REAL_SIZE(%r3) # Load real_size (allocated by mmap)
83
+ ld %r5, ULP_STACK_USED_SIZE(%r3) # Load used_size
84
+
85
+ # Check if we have space
86
+ cmpd %cr0 , %r6, %r5
61
87
ble %cr0 , .Lexpand_ulp_stack
62
88
63
89
.Lcontinue_ulp_prologue:
64
90
65
- # Reload the ulp_stack into r5 through r13 (thread local storage ptr)
66
- addis %r5, %r13 , ulp_stack@tprel@ha
67
- addi %r5, %r5, ulp_stack@tprel@l
91
+ # Here we must ensure that %r3 points to ulp_stack. If we are here from
92
+ # the .Lexpand_ulp_stack, then r3 will point to it because
93
+ # ulp_stack_helper returned it.
68
94
69
95
# Load used_size
70
- ld %r6, ULP_STACK_USED_SIZE(%r5 )
96
+ ld %r6, ULP_STACK_USED_SIZE(%r3 )
71
97
72
98
# Update top_of_stack in the struct field.
73
99
addi %r6, %r6, 16
74
- std %r6, ULP_STACK_USED_SIZE(%r5 ) # Store new used size value.
100
+ std %r6, ULP_STACK_USED_SIZE(%r3 ) # Store new used size value.
75
101
76
102
# Load stack ptr
77
- ld %r5, ULP_STACK_PTR(%r5 )
103
+ ld %r5, ULP_STACK_PTR(%r3 )
78
104
79
105
# Store TOC
80
- add %r5, %r5, %r6 # ulp_stack + used_size
106
+ add %r5, %r5, %r6 # ulp_stack_ptr + used_size
107
+
108
+ # Restore stack register.
109
+ addi %r1, %r1, (56 + 32 + 8 )
110
+
111
+ # Load original LR
112
+ ld %r2, -56 (%r1)
113
+ mtlr %r2
114
+
115
+ # Load original TOC
116
+ ld %r2, -32 (%r1)
81
117
82
118
# At this point, %r5 points to 16 bytes ahead of the slot where we shall
83
119
# save TOC. Hence we have to subtract 16 bytes of the storing location,
@@ -90,25 +126,54 @@ trampoline_routine:
90
126
mflr %r2
91
127
std %r2, -8 (%r5) # store in *(ulp_stack + used_size - 8)
92
128
93
- # Restore registers
94
- ld %r5, -8 (%r1) # Restore register.
95
- ld %r6, -16 (%r1) # Restore register.
96
129
97
- # Jump to target function
130
+ # Restore registers
131
+ ld %r5, -8 (%r1) # Restore register.
132
+ ld %r6, -16 (%r1) # Restore register.
133
+ ld %r0, -24 (%r1)
134
+ #ld %r2, -32(%r1) # r2 was already loaded
135
+ ld %r3, -40 (%r1)
136
+ ld %r12 , -48 (%r1)
137
+
138
+ # jump to target function
98
139
mfctr %r12
99
140
bctrl
100
141
101
- # Load the ulp_stack into r5 through r13 (thread local storage ptr)
102
- addis %r5, %r13 , ulp_stack@tprel@ha
103
- addi %r5, %r5, ulp_stack@tprel@l
142
+ # Save return registers and ones used by __get_tls_addr.
143
+ std %r0, -8 (%r1)
144
+ std %r3, -16 (%r1)
145
+ std %r12 , -24 (%r1)
146
+
147
+ # Move stack register
148
+ addi %r1, %r1, -(24 + 32 + 8 ) # 32 + 8 for padding
149
+
150
+ # Do a trick to load PC into LR register.
151
+ bl .return_to_caller
152
+ .return_to_caller:
153
+ mflr %r12
154
+
155
+ # Get the function address.
156
+ addi %r12 , %r12 , trampoline_routine - .return_to_caller
157
+
158
+ # Fix TOC. %r12 must be pointing to the address of trampoline_routine.
159
+ addis %r2,%r12 , .TOC.-trampoline_routine@ha
160
+ addi %r2,%r2 , .TOC.-trampoline_routine@l
161
+
162
+ # Load ulp_stack
163
+ addis %r3, %r2, ulp_stack@got@tlsgd@ha
164
+ addi %r3, %r3, ulp_stack@got@tlsgd@l
165
+
166
+ # Get address of ulp_stack
167
+ bl __tls_get_addr(ulp_stack@tlsgd)
168
+ nop
104
169
105
170
# Deference ulp_stack.
106
- ld %r6, ULP_STACK_USED_SIZE(%r5 )
171
+ ld %r6, ULP_STACK_USED_SIZE(%r3 )
107
172
addi %r6, %r6, -16 # Sub 16 bytes because the first entry stores the top of stack, and we need to store 2 longs.
108
- std %r6, ULP_STACK_USED_SIZE(%r5 ) # Store new used_size value.
173
+ std %r6, ULP_STACK_USED_SIZE(%r3 ) # Store new used_size value.
109
174
110
175
# Load ulp_stack ptr field.
111
- ld %r5, ULP_STACK_PTR(%r5 )
176
+ ld %r5, ULP_STACK_PTR(%r3 )
112
177
113
178
# Point to the top of stack but two, these two entries are popped in
114
179
# previous step and accessed in next step (stack size decremented before access).
@@ -119,73 +184,69 @@ trampoline_routine:
119
184
ld %r8 , 8 (%r5) # Restore LR
120
185
mtlr %r8 # Load LR
121
186
122
- # Return execution to caller.
187
+ # Restore used registers
188
+ addi %r1, %r1, (24 + 32 + 8 ) # 32 + 8 for padding
189
+ ld %r0, -8 (%r1)
190
+ ld %r3, -16 (%r1)
191
+ ld %r12 , -24 (%r1)
192
+
193
+ # Return.
123
194
blr
124
195
125
196
.Lexpand_ulp_stack:
126
197
127
198
# Save all volatile registers
128
199
# r5 & r6 are designated temp regs, having data already on stack.
200
+ # r0, r2 & r12 is as well in this slow path.
129
201
# After return from expand_ulp_stack, both regs construct values
130
202
# before use.
131
- std %r2, -24 (%r1)
132
- std %r3, -32 (%r1)
133
- std %r4, -40 (%r1)
134
- std %r7, -48 (%r1)
135
- std %r8 , -56 (%r1)
136
- std %r9 , -64 (%r1)
137
- std %r10 , -72 (%r1)
138
- std %r11 , -80 (%r1)
139
- std %r12 , -88 (%r1)
140
- mfctr %r3
141
- std %r3, -96 (%r1)
142
- mflr %r3,
143
- std %r3, -104 (%r1)
144
-
145
- # As per ppc64le ABIv2, the minimum stack frame is of 32 bytes and
146
- # additional 8 bytes padding is needed for alignment in stack frame.
147
- # The regs stored in redzone must have this 32+8 bytes padding to form
148
- # auxiliary stack frame before calling ulp_stack_helper which will
149
- # have its own proper stack frame.
150
-
151
- # Move stack register
152
- addi %r1, %r1, -(104 + 32 + 8 ) # 32 + 8 for padding
153
-
154
- # Fix TOC. %r12 must be pointing to the address of trampoline_routine.
155
- addis %r2,%r12 , .TOC.-trampoline_routine@ha
156
- addi %r2,%r2 , .TOC.-trampoline_routine@l
203
+ std %r4, -8 (%r1)
204
+ std %r7, -16 (%r1)
205
+ std %r8 , -24 (%r1)
206
+ std %r9 , -32 (%r1)
207
+ std %r10 , -40 (%r1)
208
+ std %r11 , -48 (%r1)
209
+ mfctr %r4
210
+ std %r4, -56 (%r1)
211
+ mflr %r4,
212
+ std %r4, -64 (%r1)
213
+
214
+ # Setup stack frame
215
+ addi %r1, %r1, -(64 + 32 + 8 )
157
216
158
217
# Call C helper routine.
159
- bl ulp_stack_helper
218
+ bl ulp_stack_helper
160
219
nop
161
220
162
- # Restore stack register.
163
- addi %r1, %r1, (104 + 32 + 8 )
164
-
165
- # Restore registers
166
- ld %r3, -104 (%r1)
167
- mtlr %r3
168
- ld %r3, -96 (%r1)
169
- mtctr %r3
170
- ld %r12 , -88 (%r1)
171
- ld %r11 , -80 (%r1)
172
- ld %r10 , -72 (%r1)
173
- ld %r9 , -64 (%r1)
174
- ld %r8 , -56 (%r1)
175
- ld %r7, -48 (%r1)
176
- ld %r4, -40 (%r1)
177
- ld %r3, -32 (%r1)
178
- ld %r2, -24 (%r1)
179
-
221
+ # Restore stack frame
222
+ addi %r1, %r1, (64 + 32 + 8 )
223
+
224
+ # Load back registers.
225
+ ld %r7, -16 (%r1)
226
+ ld %r8 , -24 (%r1)
227
+ ld %r9 , -32 (%r1)
228
+ ld %r10 , -40 (%r1)
229
+ ld %r11 , -48 (%r1)
230
+ ld %r4, -56 (%r1)
231
+ mtctr %r4
232
+ ld %r4, -64 (%r1)
233
+ mtlr %r4
234
+ ld %r4, -8 (%r1)
235
+
236
+ # Continue execution
180
237
b .Lcontinue_ulp_prologue
181
238
182
239
.long 0
183
240
.byte 0 ,0 ,0 ,0 ,0 ,0 ,0 ,0
184
241
.cfi_endproc
185
242
.size trampoline_routine,.-trampoline_routine
186
243
187
- .globl ulp_prologue
188
- .type ulp_prologue, @function
244
+ # The following function needs to be placed in .data, as it is a template to be
245
+ # copied in the prologue of tha patched function. Placing this in .data avoids
246
+ # text relocations.
247
+ .section ".data"
248
+ .globl ulp_prologue
249
+ .type ulp_prologue, @function
189
250
ulp_prologue:
190
251
.cfi_startproc
191
252
std %r5, -8 (%r1) # Save one register used as function parameter
@@ -195,7 +256,7 @@ ulp_prologue:
195
256
lis %r5, trampoline_routine@highest #0x1122
196
257
ori %r5, %r5, trampoline_routine@higher #0x3344
197
258
lis %r12 , trampoline_routine@high #0x5566
198
- ori %r12 , %r12 , trampoline_routine@l #0x7788
259
+ ori %r12 , %r12 , trampoline_routine@l #0x7788
199
260
200
261
# Concatenate two registers
201
262
rldimi %r12 , %r5, 32 , 0
@@ -223,7 +284,7 @@ ulp_prologue_end = .
223
284
.size ulp_prologue,.-ulp_prologue
224
285
ulp_prologue_padding_end = .
225
286
226
- .section ".data "
287
+ .section ".rodata "
227
288
.align 2
228
289
.type ulp_prologue_size, @object
229
290
.size ulp_prologue_size, 4
0 commit comments