···11+From 6001db79c477b03eacc7e7049560921fb54b7845 Mon Sep 17 00:00:00 2001
22+From: Richard Sandiford <richard.sandiford@arm.com>
33+Date: Mon, 7 Sep 2020 20:15:36 +0100
44+Subject: [PATCH] lra: Avoid cycling on certain subreg reloads [PR96796]
55+66+This PR is about LRA cycling for a reload of the form:
77+88+----------------------------------------------------------------------------
99+Changing pseudo 196 in operand 1 of insn 103 on equiv [r105:DI*0x8+r140:DI]
1010+ Creating newreg=287, assigning class ALL_REGS to slow/invalid mem r287
1111+ Creating newreg=288, assigning class ALL_REGS to slow/invalid mem r288
1212+ 103: r203:SI=r288:SI<<0x1+r196:DI#0
1313+ REG_DEAD r196:DI
1414+ Inserting slow/invalid mem reload before:
1515+ 316: r287:DI=[r105:DI*0x8+r140:DI]
1616+ 317: r288:SI=r287:DI#0
1717+----------------------------------------------------------------------------
1818+1919+The problem is with r287. We rightly give it a broad starting class of
2020+POINTER_AND_FP_REGS (reduced from ALL_REGS by preferred_reload_class).
2121+However, we never make forward progress towards narrowing it down to
2222+a specific choice of class (POINTER_REGS or FP_REGS).
2323+2424+I think in practice we rely on two things to narrow a reload pseudo's
2525+class down to a specific choice:
2626+2727+(1) a restricted class is specified when the pseudo is created
2828+2929+ This happens for input address reloads, where the class is taken
3030+ from the target's chosen base register class. It also happens
3131+ for simple REG reloads, where the class is taken from the chosen
3232+ alternative's constraints.
3333+3434+(2) uses of the reload pseudo as a direct input operand
3535+3636+ In this case get_reload_reg tries to reuse the existing register
3737+ and narrow its class, instead of creating a new reload pseudo.
3838+3939+However, neither occurs here. As described above, r287 rightly
4040+starts out with a wide choice of class, ultimately derived from
4141+ALL_REGS, so we don't get (1). And as the comments in the PR
4242+explain, r287 is never used as an input reload, only the subreg is,
4343+so we don't get (2):
4444+4545+----------------------------------------------------------------------------
4646+ Choosing alt 13 in insn 317: (0) r (1) w {*movsi_aarch64}
4747+ Creating newreg=291, assigning class FP_REGS to r291
4848+ 317: r288:SI=r291:SI
4949+ Inserting insn reload before:
5050+ 320: r291:SI=r287:DI#0
5151+----------------------------------------------------------------------------
5252+5353+IMO, in this case we should rely on the reload of r316 to narrow
5454+down the class of r278. Currently we do:
5555+5656+----------------------------------------------------------------------------
5757+ Choosing alt 7 in insn 316: (0) r (1) m {*movdi_aarch64}
5858+ Creating newreg=289 from oldreg=287, assigning class GENERAL_REGS to r289
5959+ 316: r289:DI=[r105:DI*0x8+r140:DI]
6060+ Inserting insn reload after:
6161+ 318: r287:DI=r289:DI
6262+---------------------------------------------------
6363+6464+i.e. we create a new pseudo register r289 and give *that* pseudo
6565+GENERAL_REGS instead. This is because get_reload_reg only narrows
6666+down the existing class for OP_IN and OP_INOUT, not OP_OUT.
6767+6868+But if we have a reload pseudo in a reload instruction and have chosen
6969+a specific class for the reload pseudo, I think we should simply install
7070+it for OP_OUT reloads too, if the class is a subset of the existing class.
7171+We will need to pick such a register whatever happens (for r289 in the
7272+example above). And as explained in the PR, doing this actually avoids
7373+an unnecessary move via the FP registers too.
7474+7575+The patch is quite aggressive in that it does this for all reload
7676+pseudos in all reload instructions. I wondered about reusing the
7777+condition for a reload move in in_class_p:
7878+7979+ INSN_UID (curr_insn) >= new_insn_uid_start
8080+ && curr_insn_set != NULL
8181+ && ((OBJECT_P (SET_SRC (curr_insn_set))
8282+ && ! CONSTANT_P (SET_SRC (curr_insn_set)))
8383+ || (GET_CODE (SET_SRC (curr_insn_set)) == SUBREG
8484+ && OBJECT_P (SUBREG_REG (SET_SRC (curr_insn_set)))
8585+ && ! CONSTANT_P (SUBREG_REG (SET_SRC (curr_insn_set)))))))
8686+8787+but I can't really justify that on first principles. I think we
8888+should apply the rule consistently until we have a specific reason
8989+for doing otherwise.
9090+9191+gcc/
9292+ PR rtl-optimization/96796
9393+ * lra-constraints.c (in_class_p): Add a default-false
9494+ allow_all_reload_class_changes_p parameter. Do not treat
9595+ reload moves specially when the parameter is true.
9696+ (get_reload_reg): Try to narrow the class of an existing OP_OUT
9797+ reload if we're reloading a reload pseudo in a reload instruction.
9898+9999+gcc/testsuite/
100100+ PR rtl-optimization/96796
101101+ * gcc.c-torture/compile/pr96796.c: New test.
102102+---
103103+ gcc/lra-constraints.c | 54 ++++++++++++++----
104104+ gcc/testsuite/gcc.c-torture/compile/pr96796.c | 55 +++++++++++++++++++
105105+ 2 files changed, 99 insertions(+), 10 deletions(-)
106106+ create mode 100644 gcc/testsuite/gcc.c-torture/compile/pr96796.c
107107+108108+diff --git a/gcc/lra-constraints.c b/gcc/lra-constraints.c
109109+index 580da9c3ed6..161b721efb1 100644
110110+--- a/gcc/lra-constraints.c
111111++++ b/gcc/lra-constraints.c
112112+@@ -236,12 +236,17 @@ get_reg_class (int regno)
113113+ CL. Use elimination first if REG is a hard register. If REG is a
114114+ reload pseudo created by this constraints pass, assume that it will
115115+ be allocated a hard register from its allocno class, but allow that
116116+- class to be narrowed to CL if it is currently a superset of CL.
117117++ class to be narrowed to CL if it is currently a superset of CL and
118118++ if either:
119119++
120120++ - ALLOW_ALL_RELOAD_CLASS_CHANGES_P is true or
121121++ - the instruction we're processing is not a reload move.
122122+123123+ If NEW_CLASS is nonnull, set *NEW_CLASS to the new allocno class of
124124+ REGNO (reg), or NO_REGS if no change in its class was needed. */
125125+ static bool
126126+-in_class_p (rtx reg, enum reg_class cl, enum reg_class *new_class)
127127++in_class_p (rtx reg, enum reg_class cl, enum reg_class *new_class,
128128++ bool allow_all_reload_class_changes_p = false)
129129+ {
130130+ enum reg_class rclass, common_class;
131131+ machine_mode reg_mode;
132132+@@ -266,7 +271,8 @@ in_class_p (rtx reg, enum reg_class cl, enum reg_class *new_class)
133133+ typically moves that have many alternatives, and restricting
134134+ reload pseudos for one alternative may lead to situations
135135+ where other reload pseudos are no longer allocatable. */
136136+- || (INSN_UID (curr_insn) >= new_insn_uid_start
137137++ || (!allow_all_reload_class_changes_p
138138++ && INSN_UID (curr_insn) >= new_insn_uid_start
139139+ && curr_insn_set != NULL
140140+ && ((OBJECT_P (SET_SRC (curr_insn_set))
141141+ && ! CONSTANT_P (SET_SRC (curr_insn_set)))
142142+@@ -598,13 +604,12 @@ canonicalize_reload_addr (rtx addr)
143143+ return addr;
144144+ }
145145+146146+-/* Create a new pseudo using MODE, RCLASS, ORIGINAL or reuse already
147147+- created input reload pseudo (only if TYPE is not OP_OUT). Don't
148148+- reuse pseudo if IN_SUBREG_P is true and the reused pseudo should be
149149+- wrapped up in SUBREG. The result pseudo is returned through
150150+- RESULT_REG. Return TRUE if we created a new pseudo, FALSE if we
151151+- reused the already created input reload pseudo. Use TITLE to
152152+- describe new registers for debug purposes. */
153153++/* Create a new pseudo using MODE, RCLASS, ORIGINAL or reuse an existing
154154++ reload pseudo. Don't reuse an existing reload pseudo if IN_SUBREG_P
155155++ is true and the reused pseudo should be wrapped up in a SUBREG.
156156++ The result pseudo is returned through RESULT_REG. Return TRUE if we
157157++ created a new pseudo, FALSE if we reused an existing reload pseudo.
158158++ Use TITLE to describe new registers for debug purposes. */
159159+ static bool
160160+ get_reload_reg (enum op_type type, machine_mode mode, rtx original,
161161+ enum reg_class rclass, bool in_subreg_p,
162162+@@ -616,6 +621,35 @@ get_reload_reg (enum op_type type, machine_mode mode, rtx original,
163163+164164+ if (type == OP_OUT)
165165+ {
166166++ /* Output reload registers tend to start out with a conservative
167167++ choice of register class. Usually this is ALL_REGS, although
168168++ a target might narrow it (for performance reasons) through
169169++ targetm.preferred_reload_class. It's therefore quite common
170170++ for a reload instruction to require a more restrictive class
171171++ than the class that was originally assigned to the reload register.
172172++
173173++ In these situations, it's more efficient to refine the choice
174174++ of register class rather than create a second reload register.
175175++ This also helps to avoid cycling for registers that are only
176176++ used by reload instructions. */
177177++ if (REG_P (original)
178178++ && (int) REGNO (original) >= new_regno_start
179179++ && INSN_UID (curr_insn) >= new_insn_uid_start
180180++ && in_class_p (original, rclass, &new_class, true))
181181++ {
182182++ unsigned int regno = REGNO (original);
183183++ if (lra_dump_file != NULL)
184184++ {
185185++ fprintf (lra_dump_file, " Reuse r%d for output ", regno);
186186++ dump_value_slim (lra_dump_file, original, 1);
187187++ }
188188++ if (new_class != lra_get_allocno_class (regno))
189189++ lra_change_class (regno, new_class, ", change to", false);
190190++ if (lra_dump_file != NULL)
191191++ fprintf (lra_dump_file, "\n");
192192++ *result_reg = original;
193193++ return false;
194194++ }
195195+ *result_reg
196196+ = lra_create_new_reg_with_unique_value (mode, original, rclass, title);
197197+ return true;
198198+diff --git a/gcc/testsuite/gcc.c-torture/compile/pr96796.c b/gcc/testsuite/gcc.c-torture/compile/pr96796.c
199199+new file mode 100644
200200+index 00000000000..8808e62fe77
201201+--- /dev/null
202202++++ b/gcc/testsuite/gcc.c-torture/compile/pr96796.c
203203+@@ -0,0 +1,55 @@
204204++/* { dg-additional-options "-fcommon" } */
205205++
206206++struct S0 {
207207++ signed f0 : 8;
208208++ unsigned f1;
209209++ unsigned f4;
210210++};
211211++struct S1 {
212212++ long f3;
213213++ char f4;
214214++} g_3_4;
215215++
216216++int g_5, func_1_l_32, func_50___trans_tmp_31;
217217++static struct S0 g_144, g_834, g_1255, g_1261;
218218++
219219++int g_273[120] = {};
220220++int *g_555;
221221++char **g_979;
222222++static int g_1092_0;
223223++static int g_1193;
224224++int safe_mul_func_int16_t_s_s(int si1, int si2) { return si1 * si2; }
225225++static struct S0 *func_50();
226226++int func_1() { func_50(g_3_4, g_5, func_1_l_32, 8, 3); }
227227++void safe_div_func_int64_t_s_s(int *);
228228++void safe_mod_func_uint32_t_u_u(struct S0);
229229++struct S0 *func_50(int p_51, struct S0 p_52, struct S1 p_53, int p_54,
230230++ int p_55) {
231231++ int __trans_tmp_30;
232232++ char __trans_tmp_22;
233233++ short __trans_tmp_19;
234234++ long l_985_1;
235235++ long l_1191[8];
236236++ safe_div_func_int64_t_s_s(g_273);
237237++ __builtin_printf((char*)g_1261.f4);
238238++ safe_mod_func_uint32_t_u_u(g_834);
239239++ g_144.f0 += 1;
240240++ for (;;) {
241241++ struct S1 l_1350 = {&l_1350};
242242++ for (; p_53.f3; p_53.f3 -= 1)
243243++ for (; g_1193 <= 2; g_1193 += 1) {
244244++ __trans_tmp_19 = safe_mul_func_int16_t_s_s(l_1191[l_985_1 + p_53.f3],
245245++ p_55 % (**g_979 = 10));
246246++ __trans_tmp_22 = g_1255.f1 * p_53.f4;
247247++ __trans_tmp_30 = __trans_tmp_19 + __trans_tmp_22;
248248++ if (__trans_tmp_30)
249249++ g_1261.f0 = p_51;
250250++ else {
251251++ g_1255.f0 = p_53.f3;
252252++ int *l_1422 = g_834.f0 = g_144.f4 != (*l_1422)++ > 0 < 0 ^ 51;
253253++ g_555 = ~0;
254254++ g_1092_0 |= func_50___trans_tmp_31;
255255++ }
256256++ }
257257++ }
258258++}
259259+--
260260+2.18.4
261261+
+7-1
pkgs/development/compilers/gcc/9/default.nix
···5858 inherit (stdenv) buildPlatform hostPlatform targetPlatform;
59596060 patches =
6161- optional (targetPlatform != hostPlatform) ../libstdc++-target.patch
6161+ # Fix ICE: Max. number of generated reload insns per insn is achieved (90)
6262+ #
6363+ # https://gcc.gnu.org/bugzilla/show_bug.cgi?id=96796
6464+ #
6565+ # This patch can most likely be removed by a post 9.3.0-release.
6666+ [ ./avoid-cycling-subreg-reloads.patch ]
6767+ ++ optional (targetPlatform != hostPlatform) ../libstdc++-target.patch
6268 ++ optional noSysDirs ../no-sys-dirs.patch
6369 /* ++ optional (hostPlatform != buildPlatform) (fetchpatch { # XXX: Refine when this should be applied
6470 url = "https://git.busybox.net/buildroot/plain/package/gcc/${version}/0900-remove-selftests.patch?id=11271540bfe6adafbc133caf6b5b902a816f5f02";