@@ -160,3 +160,136 @@ void flint_mpn_mulmod_preinvn_2(mp_ptr r,
160160 r [1 ] = r1 ;
161161 }
162162}
163+
164+ void flint_mpn_fmmamod_preinvn (mp_ptr r ,
165+ mp_srcptr a , mp_srcptr b ,
166+ mp_srcptr e , mp_srcptr f ,
167+ mp_size_t n ,
168+ mp_srcptr d , mp_srcptr dinv , ulong norm )
169+ {
170+ mp_ptr t , u ;
171+ ulong cy ;
172+ TMP_INIT ;
173+
174+ TMP_START ;
175+ t = TMP_ALLOC ((7 * n ) * sizeof (mp_limb_t ));
176+ u = t + (5 * n );
177+
178+ if (a == b )
179+ flint_mpn_sqr (t , a , n );
180+ else
181+ flint_mpn_mul_n (t , a , b , n );
182+
183+ if (e == f )
184+ flint_mpn_sqr (u , e , n );
185+ else
186+ flint_mpn_mul_n (u , e , f , n );
187+
188+ if (norm )
189+ {
190+ mpn_add_n (t , t , u , 2 * n );
191+ cy = mpn_lshift (t , t , 2 * n , norm );
192+ }
193+ else
194+ {
195+ cy = mpn_add_n (t , t , u , 2 * n );
196+ }
197+
198+ if (cy != 0 || mpn_cmp (t + n , d , n ) >= 0 )
199+ {
200+ mpn_sub_n (t + n , t + n , d , n );
201+ }
202+
203+ flint_mpn_mul_or_mulhigh_n (t + 3 * n , t + n , dinv , n );
204+ mpn_add_n (t + 4 * n , t + 4 * n , t + n , n );
205+
206+ /* note: we rely on the fact that mul_or_mullow_n actually
207+ writes at least n + 1 limbs */
208+ flint_mpn_mul_or_mullow_n (t + 2 * n , t + 4 * n , d , n );
209+ cy = t [n ] - t [3 * n ] - mpn_sub_n (r , t , t + 2 * n , n );
210+
211+ while (cy > 0 )
212+ cy -= mpn_sub_n (r , r , d , n );
213+
214+ if (mpn_cmp (r , d , n ) >= 0 )
215+ mpn_sub_n (r , r , d , n );
216+
217+ FLINT_ASSERT (mpn_cmp (r , d , n ) < 0 );
218+
219+ if (norm )
220+ mpn_rshift (r , r , n , norm );
221+
222+ TMP_END ;
223+ }
224+
225+ void flint_mpn_fmmamod_preinvn_2 (mp_ptr r ,
226+ mp_srcptr a , mp_srcptr b ,
227+ mp_srcptr e , mp_srcptr f ,
228+ mp_srcptr d , mp_srcptr dinv , ulong norm )
229+ {
230+ mp_limb_t cy , b0 , b1 , r0 , r1 ;
231+ mp_limb_t f0 , f1 ;
232+ mp_limb_t t [10 ], u [4 ];
233+
234+ if (norm )
235+ {
236+ /* mpn_lshift(b, b, n, norm) */
237+ b0 = (b [0 ] << norm );
238+ b1 = (b [1 ] << norm ) | (b [0 ] >> (FLINT_BITS - norm ));
239+ f0 = (f [0 ] << norm );
240+ f1 = (f [1 ] << norm ) | (f [0 ] >> (FLINT_BITS - norm ));
241+ }
242+ else
243+ {
244+ b0 = b [0 ];
245+ b1 = b [1 ];
246+ f0 = f [0 ];
247+ f1 = f [1 ];
248+ }
249+
250+ /* mpn_mul_n(t, a, b, n) */
251+ FLINT_MPN_MUL_2X2 (t [3 ], t [2 ], t [1 ], t [0 ], a [1 ], a [0 ], b1 , b0 );
252+ /* mpn_mul_n(u, e, f, n) */
253+ FLINT_MPN_MUL_2X2 (u [3 ], u [2 ], u [1 ], u [0 ], e [1 ], e [0 ], f1 , f0 );
254+ add_sssssaaaaaaaaaa (cy , t [3 ], t [2 ], t [1 ], t [0 ],
255+ 0 , t [3 ], t [2 ], t [1 ], t [0 ],
256+ 0 , u [3 ], u [2 ], u [1 ], u [0 ]);
257+ if (cy || mpn_cmp (t + 2 , d , 2 ) >= 0 )
258+ sub_ddmmss (t [3 ], t [2 ], t [3 ], t [2 ], d [1 ], d [0 ]);
259+
260+ /* mpn_mul_n(t + 3*n, t + n, dinv, n) */
261+ FLINT_MPN_MUL_2X2 (t [9 ], t [8 ], t [7 ], t [6 ], t [3 ], t [2 ], dinv [1 ], dinv [0 ]);
262+
263+ /* mpn_add_n(t + 4*n, t + 4*n, t + n, n) */
264+ add_ssaaaa (t [9 ], t [8 ], t [9 ], t [8 ], t [3 ], t [2 ]);
265+
266+ /* mpn_mul_n(t + 2*n, t + 4*n, d, n) */
267+ FLINT_MPN_MUL_3P2X2 (t [6 ], t [5 ], t [4 ], t [9 ], t [8 ], d [1 ], d [0 ]);
268+
269+ /* cy = t[n] - t[3*n] - mpn_sub_n(r, t, t + 2*n, n) */
270+ sub_dddmmmsss (cy , r1 , r0 , t [2 ], t [1 ], t [0 ], t [6 ], t [5 ], t [4 ]);
271+
272+ while (cy > 0 )
273+ {
274+ /* cy -= mpn_sub_n(r, r, d, n) */
275+ sub_dddmmmsss (cy , r1 , r0 , cy , r1 , r0 , 0 , d [1 ], d [0 ]);
276+ }
277+
278+ if ((r1 > d [1 ]) || (r1 == d [1 ] && r0 >= d [0 ]))
279+ {
280+ /* mpn_sub_n(r, r, d, n) */
281+ sub_ddmmss (r1 , r0 , r1 , r0 , d [1 ], d [0 ]);
282+ }
283+
284+ if (norm )
285+ {
286+ r [0 ] = (r0 >> norm ) | (r1 << (FLINT_BITS - norm ));
287+ r [1 ] = (r1 >> norm );
288+ }
289+ else
290+ {
291+ r [0 ] = r0 ;
292+ r [1 ] = r1 ;
293+ }
294+ }
295+
0 commit comments