/* mpn_toomN2_mul -- Multiply {ap,an} and {bp,bn} where an is much larger
   than bn, or, more accurately, 2an > 3bn > 18.

   Contributed to the GNU project by Alberto Zanoni, based on code by
   Torbjorn Granlund and Marco Bodrato.

   The idea of applying Toom methods to unbalanced multiplication is
   due to Marco Bodrato and Alberto Zanoni.

   REFERENCES: Alberto Zanoni "Some Toom-Cook methods for even long
     integers", Acta Universitatis Apulensis, Mathematics-Informatics,
     Special Issue, Proceedings of the International Conference on
     Theory and Applications of Mathematics and Informatics, ICTAMI
     2009, Alba Iulia, Romania. Daniel Breaz, Nicoleta Breaz, Dorin
     Wainberg, eds. pp. 807--828. Aeternitas Publishing House,
     September 2009

Copyright 2006, 2007, 2008, 2009 Free Software Foundation, Inc.

This file is part of the GNU MP Library.

The GNU MP Library is free software; you can redistribute it and/or modify
it under the terms of the GNU Lesser General Public License as published by
the Free Software Foundation; either version 3 of the License, or (at your
option) any later version.

The GNU MP Library is distributed in the hope that it will be useful, but
WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public
License for more details.

You should have received a copy of the GNU Lesser General Public License
along with the GNU MP Library. If not, see http://www.gnu.org/licenses/.

*/

#include "gmp.h"
#include "gmp-impl.h"

/*
  Iterative Toom-2.5 method for very unbalanced factors.
  Evaluate in: -1, 0, +1, +inf

    <-s-><--n--><--n--><--n--> ....  <--n--><--n--><--n--><--n-->
     ___ ______ ______ ______ ______ ______ ______ ______ ______ 
  a:|ad_|a(d-1)|a(d-2)|a(d-3)|_····_|___a3_|___a2_|___a1_|___a0_|
  b:                                               |__b1_|___b0_|
	                                           <--t--><--n-->

  If b1 + b0 (and b1 - b0 as well) is even, divide by 2 both
  evaluations. Doing so, one can avoid ALL later shifts, as what we
  have to compute is

  (a0+a1+a2)(b0+b1) + (a0-a1+a2)(b0-b1)             b0+b1             b0-b1
  ------------------------------------- = (a0+a1+a2)----- + (a0-a1+a2)-----
                    2                                 2                 2   
  and

  (a0+a1+a2)(b0+b1) - (a0-a1+a2)(b0-b1)             b0+b1             b0-b1
  ------------------------------------- = (a0+a1+a2)----- - (a0-a1+a2)------
                    2                                 2                 2

  Evaluation of a is done iteratively. Evaluation of b just ONCE.

  v0  =  ai               * b0       # Ai(0)*B(0)
  v1  = (ai+a(i+1)+a(i+2))*(b0 + b1) # Ai(1)*B(1)      ai_h  <= 2 ; bh <= 1
  vm1 = (ai-a(i+1)+a(i+2))*(b0 - b1) # Ai(-1)*B(-1)   |ai_h| <= 1 ; bh = 0
  vinf=            a(i+2) *      b1  # Ai(inf)*B(inf)
*/

#define TOOMN2_MUL_N_REC(p, a, b, n, ws) \
  do {					 \
    mpn_mul_n (p, a, b, n);		 \
  } while (0)


#ifdef ONLY_B_EVEN

#define mpn_toomN2_mul_ONLY_B_EVEN mpn_toomN2_mul

void
mpn_toomN2_mul_ONLY_B_EVEN (mp_ptr pp,
		mp_srcptr ap, mp_size_t an,
		mp_srcptr bp, mp_size_t bn,
		mp_ptr scratch)
{
  mp_size_t    n, s, t, evenEvaluationsOfB, evenEvaluations = 0;
  int          vm1_neg_b, vm1_neg, c_n = 0, c_2n = 0, c_3n, cy;
  mp_limb_t    keep;
  mp_srcptr    a0;
  mp_ptr       asm1, bs1, vinf, evalProdMinus1;

  t         = bn >> 1;         /* Number of limbs of b1 (either n or n-1) */
  n         = bn - t;          /* Number of limbs of a0, a1 and b0.       */

  ASSERT (n-2 < t && t <= n);

#define a0_a2   (pp)			/* n+ carry (cy)  */
#define as1     (pp + n)		/* n+1            */
  bs1 = pp + an + bn - 2 * n - 1;	/* n+1 (top)      */
#define bsm1    (bs1 + n + 1)		/* n              */

#define b0      (bp)
#define b1      (bp + n)
#define a1      (a0 + n)                /* Pointers to middle and high    */
#define a2      (a1 + n)                /* parts of current section of a. */

#define v0	(pp)			/* 2n             */
#define vm1	(pp + n)		/* 2n+1           */
#define vinfp	(scratch)		/* bn (max 2n)    */
  vinf = vinfp;
#define v1	(scratch + 2 * n)	/* 2n+1+1         */
#define scratch_out  (scratch + 4 * n + 2)

  evenEvaluationsOfB = ((b0[0] ^ b1[0]) & 1) - 1; /* 0 or ~0             */
  a0 = (evenEvaluationsOfB) ? bs1 : b0;
  if (t == n)                            /* Compute bs1, bsm1 just ONCE. */
    {
      bs1[n] = mpn_add_n (bs1, b0, b1, n);        /* b0 + b1: carry OK.  */
      if (evenEvaluationsOfB)
	mpn_rshift (bs1, bs1, n+1, 1);            /* n limbs, now.       */

      if (mpn_cmp (a0, b1, n) < 0)
	{
	  mpn_sub_n (bsm1, b1, a0, n);
	  vm1_neg_b = 1;
	}
      else
	{
	  mpn_sub_n (bsm1, a0, b1, n);
	  vm1_neg_b = 0;
	}
    }
  else /* t == n - 1 */
    {
      bs1[t] = b0[t] + mpn_add_n (bs1, b0, b1, t);
      bs1[n] = (bs1[t] < b0[t]);
      if (evenEvaluationsOfB)
	mpn_rshift (bs1, bs1, n+1, 1);     /* n limbs, now.              */

      if (a0[t] == 0 && mpn_cmp (a0, b1, t) < 0)
	{
	  mpn_sub_n (bsm1, b1, a0, t);
	  bsm1[t]   = 0;
	  vm1_neg_b = 1;
	}
      else
	{
	  bsm1[t] = a0[t] - mpn_sub_n (bsm1, a0, b1, t);
	  vm1_neg_b = 0;
	}
    }
  ASSERT (bs1[n] <= 1);
  ASSERT (an >= 3*n);
  a0 = ap; s = n;
  do {
    cy = mpn_add (a0_a2, a0, n, a2, s);              /* a0+a2: consider carry. */
    /*   n   n    n   n    n                      n    n    n     n            */
    /* |   |   |    |   |a0_a2| pp            ___|   |   |Hprec|Lprec| scratch */

    as1[n] = cy + mpn_add_n (as1, a0_a2, a1, n);     /*   as1 = a0 + a1 + a2   */
    /* |   |   |   .|as1|a0_a2| pp            ___|   |   |Hprec|Lprec| scratch */
    ASSERT (as1[n] <= 2);

    /* Evaluation in 1 : v1, 2n+1(+1, the last one is 0) limbs.                */
    /* (evenEvaluationsOfB) ? (1/2 1/2 1/2 1/2) : (1 1 1 1)                    */

    TOOMN2_MUL_N_REC(v1, as1, bs1, n+1, scratch_out);
    /* |   |   |    |    |a0_a2| pp     __|Z| H1_| L1_|Hprec|Lprec| scratch    */
#if HAVE_NATIVE_mpn_add_n_sub_n
    asm1 = pp;
#else
    asm1 = pp + 2*(n & evenEvaluationsOfB);
#endif

    /* Evaluation in -1. If no carry in a0 + a2 and a0 + a2 < a1 ...           */
    if (cy == 0 && mpn_cmp (a0_a2, a1, n) < 0)
      {
	mpn_sub_n (asm1, a1, a0_a2, n);                 /* -(a0 - a1 + a2) > 0 */
	vm1_neg = vm1_neg_b ^ 1;                   /* keep the absolute value  */
      }
    else /* Borrow */
      {
	cy -= mpn_sub_n (asm1, a0_a2, a1, n);              /* a0 - a1 + a2 > 0 */
	vm1_neg = vm1_neg_b;
      }
    /* |   |   |asm1|   |     | pp   __|Z| H1_| L1_|Hprec|Lprec| scratch       */
    /* or                                                                      */
    /* |   |   |    |   |asm1 | pp   __|Z| H1_| L1_|Hprec|Lprec| scratch       */
    
    ASSERT (cy <= 1);
#if HAVE_NATIVE_mpn_add_n_sub_n
    evalProdMinus1 = pp + n;
#else
    evalProdMinus1 = pp + (n & ~(evenEvaluationsOfB));
#endif
    /* vm1, 2n+1 limbs                          Recursive multiplication in -1. */
    TOOMN2_MUL_N_REC(evalProdMinus1, asm1, bsm1, n, scratch_out); /*(-1 1 -1 1) */
    if (cy)                                          /* or  (-1/2 1/2 -1/2 1/2) */
      cy = mpn_add_n(evalProdMinus1+n, evalProdMinus1+n, bsm1, n);
    /*   n   n    n   n    n                      n    n    n     n             */
    /* |   |   |    |Hm1_|Lm1_| pp        ___|Z| H1_| L1_|Hprec|Lprec| scratch  */
    /* or                                                                       */
    /*   n   n    n    n    n                      n    n    n     n            */
    /* |   |   |Hm1_|Lm1_|    | pp        ___|Z| H1_| L1_|Hprec|Lprec| scratch  */

    /** BEGINNING OF THE INTERPOLATION CODE : DECOUPLING **/
    if ( evenEvaluationsOfB )
      {
	if (vm1_neg)                             /* If vm1 is negative          */
	  {
#if HAVE_NATIVE_mpn_add_n_sub_n
	    c_3n     = mpn_add_n_sub_n (v1, vm1, v1, vm1, 2*n );
	    vm1[2*n] = v1[2*n] - cy - (c_3n & 1);     /* Most meaningful limbs. */
	    v1[2*n] += cy + (c_3n>>1);
#else
	    vm1[2*n] = v1[2*n] - cy - mpn_sub_n (vm1+n, v1+n, evalProdMinus1+n, n);
    /* |   |   |X|Hm1 |Hm1_|Lm1_| pp       ___|Z| H1_| L1_|Hprec|Lprec| scratch */
	    v1[2*n] += cy + mpn_add_n (v1+n, v1+n, evalProdMinus1+n, n);
    /* |   |   |X|Hm1 |    |Lm1_| pp       ___|Z| H1 | L1_|Hprec|Lprec| scratch */
	    cy = mpn_sub_n (vm1, v1, evalProdMinus1, n);
	    MPN_DECR_U(vm1+n, n+1, cy);
    /* |   |   |X|Hm1 |Lm1 |Lm1_| pp       ___|Z| H1 | L1_|Hprec|Lprec| scratch */
	    cy = mpn_add_n (v1, v1, evalProdMinus1, n);
	    MPN_INCR_U(v1+n, n+1, cy);
    /* |   |   |X|Hm1 |Lm1 |    | pp       ___|Z| H1 | L1 |Hprec|Lprec| scratch */
#endif
	  }
	else
	  {
#if HAVE_NATIVE_mpn_add_n_sub_n
	    c_3n     = mpn_add_n_sub_n (vm1, v1, v1, vm1, 2*n );
	    vm1[2*n] = v1[2*n] + cy + (c_3n>>1);      /* Most meaningful limbs. */
	    v1[2*n] -= cy + (c_3n & 1);
#else
	    vm1[2*n] = v1[2*n] + cy + mpn_add_n (vm1+n, v1+n, evalProdMinus1+n, n);
    /* |   |   |X|Hm1 |Hm1_|Lm1_| pp       ___|Z| H1_| L1_|Hprec|Lprec| scratch */
	    v1[2*n] += cy - mpn_sub_n (v1+n,  v1+n, evalProdMinus1+n, n);
    /* |   |   |X|Hm1 |    |Lm1_| pp       ___|Z| H1 | L1_|Hprec|Lprec| scratch */
	    cy = mpn_add_n (vm1, v1, evalProdMinus1, n);
	    MPN_INCR_U(vm1+n, n+1, cy);
    /* |   |   |X|Hm1 |Lm1 |Lm1_| pp       ___|Z| H1 | L1_|Hprec|Lprec| scratch */
	    cy = mpn_sub_n (v1, v1, evalProdMinus1, n);
	    MPN_DECR_U(v1+n, n+1, cy);
    /* |   |   |X|Hm1 |Lm1 |    | pp       ___|Z| H1 | L1 |Hprec|Lprec| scratch */
#endif
	  }
      }
    else
      {
	vm1[2*n] = cy;
	if (vm1_neg)                                      /* If vm1 is negative */
	  {
#if HAVE_NATIVE_mpn_rsh1sub_n
	    mpn_rsh1sub_n (vm1, v1, vm1, 2*n + 1);
#else                                                     /* Decoupling begins. */
	    mpn_sub_n (vm1, v1, vm1, 2*n + 1);            /* (0 2 0 2)          */
	    mpn_rshift (vm1, vm1, 2*n + 1, 1);            /* (0 1 0 1)          */
#endif
	  }
	else
	  {
#if HAVE_NATIVE_mpn_rsh1add_n
	    mpn_rsh1add_n (vm1, v1, vm1, 2*n + 1);
#else                                                     /* Decoupling begins. */
	    mpn_add_n (vm1, v1, vm1, 2*n + 1);            /* (0 2 0 2)          */
	    mpn_rshift (vm1, vm1, 2*n + 1, 1);            /* (0 1 0 1)          */
#endif
	  }
	mpn_sub_n (v1, v1, vm1, 2*n+1);                   /* (1 0 1 0)          */
      }
    /* |   |   |X|Hm1|Lm1 |  | pp          ___|Z| H1 | L1 |Hprec|Lprec| scratch */

    v1[2*n] += mpn_add_n(v1+n, v1+n, vm1, n);
    /* |    |  |X|Hm1|   |   | pp          ___|Q| H1 | L1 |Hprec|Lprec| scratch */
    /*                                          | Lm1|                          */
    /* v0, 2n limbs */                       /* Recursive multiplication for 0. */
    TOOMN2_MUL_N_REC(v0, a0, b0, n, scratch_out);           /* (0 0 0 1)        */
    /* |    |  |X|Hm1| H0| L0| pp          ___|Q| H1 | L1 |Hprec|Lprec| scratch */
    /*                                          | Lm1|                          */
    v1[2*n] -= mpn_sub_n (v1+n, v1+n, v0, n);
    /* |    |  |X|Hm1| H0| L0| pp          ___|Y| H1 | L1 |Hprec|Lprec| scratch */
    /*                                          | Lm1|                          */
    /*                                          | -L0|                          */

    if ( LIKELY(ap != a0) )         /* Not first time: precedent result to add. */
      {
	c_n  = mpn_add_n (v0, v0, vinfp, n);      /* Carry for the first part   */
	c_2n = mpn_add (v1, v1, n, vinfp + n, t); /* ... and the second part.   */
      }
    /*                   |Lprec|                     |Hprec|                    */
    /* |    |  |X|Hm1| H0| L0  | pp        ___|Y| H1 | L1  |    |     | scratch */
    /*                  / \                     | Lm1|\____                     */
    /*                  c_n                     | -L0| c_2n                     */

    cy = vm1[2*n];
    if (LIKELY (s >= t) ) mpn_mul (vinf, a2, s, b1, t);
    else                  mpn_mul (vinf, b1, t, a2, s);
    ASSERT (s + t > n);
    /*                   |Lprec|                     |Hprec|                    */
    /* |    |   | Hm1| H0| L0  | pp        ___|Y| H1 | L1  |Hinf|Linf | scratch */
    /*                                          | Lm1|\____                     */
    /*                                          | -L0| c_2n                     */
    /* or (last time, if (an % 3*n) > 2*n + 2)                                  */
    /*                   |Lprec|                     |Hprec|                    */
    /* |Hinf|Linf|Hm1| H0| L0  | pp        ___|Y| H1 | L1  |    |     | scratch */
    /*                                          | Lm1|\____                     */
    /*                                          | -L0| c_2n                     */
    
    c_3n = mpn_sub_n(v0+n, vinf, v0+n, n);        /* Two-complement if negative */
    /*                    |Lprec|                    |Hprec|                    */
    /* |    |   | Hm1|Linf| L0  | pp       ___|Y| H1 | L1  |Hinf|     | scratch */
    /*           ____/-H0                       | Lm1|\____                     */
    /*           c_3n                           | -L0| c_2n                     */
    /* or (last time, if (an % 3*n) > 2*n + 2)                                  */
    /*                    |Lprec|                    |Hprec|                    */
    /* |Hinf|    |Hm1|Linf| L0  | pp       ___|Y| H1 | L1  |    |     | scratch */
    /*           ____/-H0                       | Lm1|\____                     */
    /*           c_3n                           | -L0| c_2n                     */

    cy = cy - c_3n + mpn_add_n(vinf, vm1+n, pp+n, n);
    /*                    |Lprec|                    |Hprec|     Hm1            */
    /* |    |   |    |Linf| L0  | pp       ___|Y| H1 | L1  |Hinf|Linf | scratch */
    /*           ____/-H0                       | Lm1|        __/-H0            */
    /*           c_3n                           | -L0|        cy                */
    /* or (last time, if (an % 3*n) > 2*n + 2)                                  */
    /*        Hm1         |Lprec|                    |Hprec|                    */
    /* |Hinf|Linf|   |Linf| L0  | pp       ___|Y| H1 | L1  |    |     | scratch */
    /*    __/-H0       -H0                      | Lm1|\____                     */
    /*    cy                                    | -L0| c_2n                     */

    c_2n += c_3n - mpn_sub_n(pp+n, v1, pp+n, n);
    /*               |Hprec|Lprec|                               Hm1            */
    /* |Hinf|   |    |-Linf| L0  | pp      ___|Y| H1 |     |Hinf|Linf | scratch */
    /*           ____/ H0                       | Lm1|        __/-H0            */
    /*           c_2n  L1                       | -L0|        cy                */
    /* or (last time, if (an % 3*n) > 2*n + 2)                                  */
    /*        Hm1    |Hprec|Lprec|                                              */
    /* |Hinf|Linf|   |-Linf| L0  | pp      ___|Y| H1 |     |    |     | scratch */
    /*    __/-H0  ___/ H0                       | Lm1|                          */
    /*    cy      c_2n L1                       | -L0|                          */

    c_3n = v1[2*n] - mpn_sub(pp+2*n, v1+n, n, vinf+n, t+s-n);
    /*            Lm1 |Hprec|Lprec|                               Hm1           */
    /* |    |   | -L0 | H0  | L0  | pp       ___|    |     |Hinf|Linf | scratch */
    /*      ____/-Hinf|-Linf                                  __/-H0            */
    /*      c_3n   H1 | L1                                    cy                */
    /* or (last time, if (an % 3*n) > 2*n + 2)                                  */
    /*       Hm1   Lm1 |Hprec|Lprec|                                            */
    /* |Hinf|Linf|-Hinf|-Linf| L0  | pp      ___|    |     |    |     | scratch */
    /*       -H0   -L0 |  H0                                                    */
    /*              H1    L1                                                    */

    keep    = pp[3*n];                /* Final carry/borrows propagation.       */
    pp[3*n] = c_3n + 1;
    MPN_INCR_U(pp+n, 2*n+1, c_n);     /* c_n  */

    if (c_2n >= 0)                    /* c_2n */
      MPN_INCR_U(pp+2*n, n+1, c_2n);
    else
      MPN_DECR_U(pp+2*n, n+1, -c_2n);

    c_3n        = pp[3*n] - 1;
    pp[3*n]     = keep;
    keep        = vinf[n+t-1];
    vinf[n+t-1] = 1;
    if (c_3n >= 0)                    /* c_3n */
      MPN_INCR_U(vinf, n+t, c_3n);
    else
      MPN_DECR_U(vinf, n+t, -c_3n);

    MPN_INCR_U(vinf+n, t+s-n, cy);    /* cy   */

    vinf[n+t-1] += keep - 1;

    a0 += 3*n;
    pp += 3*n;
    an -= 3*n;
    if (an <= 3*n) /* Last time. */
      {
	s = an - 2*n;
	if (s < 3) break;
	vinf = pp + 3*n;
      }
  } while(1);

  /* Highest remaining multiplication and recombination. */
  if ( an > 0 )
    {
      if ( an > bn ) mpn_mul(pp, a0, an, bp, bn);
      else           mpn_mul(pp, bp, bn, a0, an);

      cy = mpn_add_n (pp, pp, vinfp, bn);
      MPN_INCR_U (pp + bn, an, cy);
    }

#undef b0
#undef b1
#undef v0
#undef v1
#undef vinfp
#undef vm1
#undef a0_a2
#undef bsm1
#undef asm1
#undef bs1
#undef as1

#undef a1
#undef a2
#undef scratch_out
}

#endif  /* ONLY_B_EVEN */

//////////////////////////////////////////////////////////////

#ifdef A_AND_B_EVEN

/* If b1 + b0 (and b1 - b0 as well) is even, we divide by 2 both
   evaluations. Doing so, we can avoid ALL later shifts, as what we
   have to compute is

  (a0+a1+a2)(b0+b1) + (a0-a1+a2)(b0-b1)             b0+b1             b0-b1
  ------------------------------------- = (a0+a1+a2)----- + (a0-a1+a2)-----
                    2                                 2                 2   
  and

  (a0+a1+a2)(b0+b1) - (a0-a1+a2)(b0-b1)             b0+b1             b0-b1
  ------------------------------------- = (a0+a1+a2)----- - (a0-a1+a2)------
                    2                                 2                 2

  If it is odd, check if the same happens for the generic section, 
                                              so that half shift
        a0+a1+a2          a0-a1+a2            can be saved for it.
  ... = --------(b0+b1) - --------(b0-b1)
           2                 2

  Evaluation of a is done iteratively. Evaluation of b just ONCE.

  v0  =  ai               * b0       # Ai(0)*B(0)
  v1  = (ai+a(i+1)+a(i+2))*(b0 + b1) # Ai(1)*B(1)      ai_h  <= 2 ; bh <= 1
  vm1 = (ai-a(i+1)+a(i+2))*(b0 - b1) # Ai(-1)*B(-1)   |ai_h| <= 1 ; bh = 0
  vinf=            a(i+2) *      b1  # Ai(inf)*B(inf)
*/

#define mpn_toomN2_mul_a_and_b_even mpn_toomN2_mul

void
mpn_toomN2_mul_a_and_b_even (mp_ptr pp,
		mp_srcptr ap, mp_size_t an,
		mp_srcptr bp, mp_size_t bn,
		mp_ptr scratch)
{
  mp_size_t    n, s, t, evenEvaluationsOfB, evenEvaluations;
  int          vm1_neg_b, vm1_neg, c_n = 0, c_2n = 0, c_3n, cy;
  mp_limb_t    keep;
  mp_srcptr    a0;
  mp_ptr       asm1, bs1, vinf, evalProdMinus1;

  t         = bn >> 1;         /* Number of limbs of b1 (either n or n-1) */
  n         = bn - t;          /* Number of limbs of a0, a1 and b0.       */

  ASSERT (n-2 < t && t <= n);

#define a0_a2   (pp)			/* n+ carry (cy)  */
#define as1     (pp + n)		/* n+1            */
  bs1 = pp + an + bn - 2 * n - 1;	/* n+1 (top)      */
#define bsm1    (bs1 + n + 1)		/* n              */

#define b0      (bp)
#define b1      (bp + n)
#define a1      (a0 + n)                /* Pointers to middle and high    */
#define a2      (a1 + n)                /* parts of current section of a. */

#define v0	(pp)			/* 2n             */
#define vm1	(pp + n)		/* 2n+1           */
#define vinfp	(scratch)		/* bn (max 2n)    */
  vinf = vinfp;
#define v1	(scratch + 2 * n)	/* 2n+1+1         */
#define scratch_out  (scratch + 4 * n + 2)

  evenEvaluationsOfB = ((b0[0] ^ b1[0]) & 1) - 1; /* 0 or ~0             */
  a0 = (evenEvaluationsOfB) ? bs1 : b0;
  if (t == n)                            /* Compute bs1, bsm1 just ONCE. */
    {
      bs1[n] = mpn_add_n (bs1, b0, b1, n);        /* b0 + b1: carry OK.  */
      if (evenEvaluationsOfB)
	mpn_rshift (bs1, bs1, n+1, 1);            /* n limbs, now.       */

      if (mpn_cmp (a0, b1, n) < 0)
	{
	  mpn_sub_n (bsm1, b1, a0, n);
	  vm1_neg_b = 1;
	}
      else
	{
	  mpn_sub_n (bsm1, a0, b1, n);
	  vm1_neg_b = 0;
	}
    }
  else /* t == n - 1 */
    {
      bs1[t] = b0[t] + mpn_add_n (bs1, b0, b1, t);
      bs1[n] = (bs1[t] < b0[t]);
      if (evenEvaluationsOfB)
	mpn_rshift (bs1, bs1, n+1, 1);            /* n limbs, now.       */

      if (a0[t] == 0 && mpn_cmp (a0, b1, t) < 0)
	{
	  mpn_sub_n (bsm1, b1, a0, t);
	  bsm1[t]   = 0;
	  vm1_neg_b = 1;
	}
      else
	{
	  bsm1[t] = a0[t] - mpn_sub_n (bsm1, a0, b1, t);
	  vm1_neg_b = 0;
	}
    }
  ASSERT (bs1[n] <= 1);
  ASSERT (an >= 3*n);
  a0 = ap; s = n;
  do {
    cy = mpn_add (a0_a2, a0, n, a2, s);              /* a0+a2: consider carry. */
    /*   n   n    n   n    n                      n    n    n     n            */
    /* |   |   |    |   |a0_a2| pp           ___|   |    |Hprec|Lprec| scratch */

    as1[n] = cy + mpn_add_n (as1, a0_a2, a1, n);     /*   as1 = a0 + a1 + a2   */
    /* |   |   |   .|as1|a0_a2| pp           ___|   |    |Hprec|Lprec| scratch */
    ASSERT (as1[n] <= 2);

    /* Check if the evaluation of this section of a is even, and b's is not.   */
    /* If it is, divide evaluation in 1 by 2 (shift by 1).                     */
    if ( (evenEvaluations = (((as1[0] & 1)-1) & (~evenEvaluationsOfB))) )
      {
	mpn_rshift (as1, as1, n+1, 1);
	ASSERT (as1[n] <= 1);
      }
    /* Evaluation in 1 : v1, 2n+1(+1, the last one is 0) limbs.                */
    /* (evenEvaluations|evenEvaluationsOfB) ? (1/2 1/2 1/2 1/2) : (1 1 1 1)    */

    TOOMN2_MUL_N_REC(v1, as1, bs1, n+1, scratch_out);
    evalProdMinus1 = pp + (n & evenEvaluations);
    cy             = (cy & ~ evenEvaluations) | (as1[n] & evenEvaluations);
    asm1           = pp + 2*(n & (evenEvaluations|=evenEvaluationsOfB));
    /* |   |   |    |    |a0_a2| pp     __|Z| H1_| L1_|Hprec|Lprec| scratch    */

    /* Evaluation in -1. If no carry in a0 + a2 and a0 + a2 < a1 ...           */
    if (cy == 0 && mpn_cmp (evalProdMinus1, a1, n) < 0)
      {
	mpn_sub_n (asm1, a1, evalProdMinus1, n);   /* -(a0 - a1 + a2) > 0      */
	vm1_neg = vm1_neg_b ^ 1;                   /* keep the absolute value  */
      }
    else /* Borrow */
      {
	cy -= mpn_sub_n (asm1, evalProdMinus1, a1, n);     /* a0 - a1 + a2 > 0 */
	vm1_neg = vm1_neg_b;
      }
    /* |   |   |asm1|   |     | pp   __|Z| H1_| L1_|Hprec|Lprec| scratch       */
    /* or                                                                      */
    /* |   |   |    |   |asm1 | pp   __|Z| H1_| L1_|Hprec|Lprec| scratch       */
    
    ASSERT (cy <= 1);
    evalProdMinus1 = pp + (n & ~(evenEvaluations));
    /* vm1, 2n+1 limbs                         Recursive multiplication in -1. */
    TOOMN2_MUL_N_REC(evalProdMinus1, asm1, bsm1, n, scratch_out); /*(-1 1 -1 1)*/
    if (cy)                                         /* or  (-1/2 1/2 -1/2 1/2) */
      cy = mpn_add_n(evalProdMinus1+n, evalProdMinus1+n, bsm1, n);
    /*   n   n    n   n    n                      n    n    n     n            */
    /* |   |   |    |Hm1_|Lm1_| pp        ___|Z| H1_| L1_|Hprec|Lprec| scratch */
    /* or                                                                      */
    /*   n   n    n    n    n                      n    n    n     n           */
    /* |   |   |Hm1_|Lm1_|    | pp        ___|Z| H1_| L1_|Hprec|Lprec| scratch */

    /** BEGINNING OF THE INTERPOLATION CODE **/
    if ( evenEvaluations )
      {
	if (vm1_neg)                             /* If vm1 is negative         */
	  {
	    vm1[2*n] = v1[2*n] - cy - mpn_sub_n (vm1+n, v1+n, evalProdMinus1+n, n);
    /* |   |   |X|Hm1 |Hm1_|Lm1_| pp      ___|Z| H1_| L1_|Hprec|Lprec| scratch */
	    v1[2*n] += cy + mpn_add_n (v1+n, v1+n, evalProdMinus1+n, n);
    /* |   |   |X|Hm1 |    |Lm1_| pp      ___|Z| H1 | L1_|Hprec|Lprec| scratch */
	    cy = mpn_sub_n (vm1, v1, evalProdMinus1, n);
	    MPN_DECR_U(vm1+n, n+1, cy);
    /* |   |   |X|Hm1 |Lm1 |Lm1_| pp      ___|Z| H1 | L1_|Hprec|Lprec| scratch */
	    cy = mpn_add_n (v1, v1, evalProdMinus1, n);
	    MPN_INCR_U(v1+n, n+1, cy);
    /* |   |   |X|Hm1 |Lm1 |    | pp      ___|Z| H1 | L1 |Hprec|Lprec| scratch */
	  }
	else
	  {
	    vm1[2*n] = v1[2*n] + cy + mpn_add_n (vm1+n, v1+n, evalProdMinus1+n, n);
    /* |   |   |X|Hm1 |Hm1_|Lm1_| pp      ___|Z| H1_| L1_|Hprec|Lprec| scratch */
	    v1[2*n] += cy - mpn_sub_n (v1+n,  v1+n, evalProdMinus1+n, n);
    /* |   |   |X|Hm1 |    |Lm1_| pp      ___|Z| H1 | L1_|Hprec|Lprec| scratch */
	    cy = mpn_add_n (vm1, v1, evalProdMinus1, n);
	    MPN_INCR_U(vm1+n, n+1, cy);
    /* |   |   |X|Hm1 |Lm1 |Lm1_| pp      ___|Z| H1 | L1_|Hprec|Lprec| scratch */
	    cy = mpn_sub_n (v1, v1, evalProdMinus1, n);
	    MPN_DECR_U(v1+n, n+1, cy);
    /* |   |   |X|Hm1 |Lm1 |    | pp      ___|Z| H1 | L1 |Hprec|Lprec| scratch */
	  }
      }
    else
      {
	vm1[2*n] = cy;
	if (vm1_neg)                                     /* If vm1 is negative */
	  {
#if HAVE_NATIVE_mpn_rsh1sub_n
	    mpn_rsh1sub_n (vm1, v1, vm1, 2*n + 1);
#else                                                    /* Decoupling begins. */
	    mpn_sub_n (vm1, v1, vm1, 2*n + 1);           /* (0 2 0 2)          */
	    mpn_rshift (vm1, vm1, 2*n + 1, 1);           /* (0 1 0 1)          */
#endif
	  }
	else
	  {
#if HAVE_NATIVE_mpn_rsh1add_n
	    mpn_rsh1add_n (vm1, v1, vm1, 2*n + 1);
#else                                                    /* Decoupling begins. */
	    mpn_add_n (vm1, v1, vm1, 2*n + 1);           /* (0 2 0 2)          */
	    mpn_rshift (vm1, vm1, 2*n + 1, 1);           /* (0 1 0 1)          */
#endif
	  }
	mpn_sub_n (v1, v1, vm1, 2*n+1);                  /* (1 0 1 0)          */
      }
    /* |   |   |X|Hm1|Lm1 |  | pp         ___|Z| H1 | L1 |Hprec|Lprec| scratch */

    v1[2*n] += mpn_add_n(v1+n, v1+n, vm1, n);
    /* |    |  |X|Hm1|   |   | pp         ___|Q| H1 | L1 |Hprec|Lprec| scratch */
    /*                                         | Lm1|                          */
    /* v0, 2n limbs */                      /* Recursive multiplication for 0. */
    TOOMN2_MUL_N_REC(v0, a0, b0, n, scratch_out);          /* (0 0 0 1)        */
    /* |    |  |X|Hm1| H0| L0| pp         ___|Q| H1 | L1 |Hprec|Lprec| scratch */
    /*                                         | Lm1|                          */
    v1[2*n] -= mpn_sub_n (v1+n, v1+n, v0, n);
    /* |    |  |X|Hm1| H0| L0| pp         ___|Y| H1 | L1 |Hprec|Lprec| scratch */
    /*                                         | Lm1|                          */
    /*                                         | -L0|                          */

    if ( LIKELY(ap != a0) )        /* Not first time: precedent result to add. */
      {
	c_n  = mpn_add_n (v0, v0, vinfp, n);      /* Carry for the first part  */
	c_2n = mpn_add (v1, v1, n, vinfp + n, t); /* ... and the second part.  */
      }
    /*                   |Lprec|                    |Hprec|                    */
    /* |    |  |X|Hm1| H0| L0  | pp       ___|Y| H1 | L1  |    |     | scratch */
    /*                  / \                    | Lm1|\____                     */
    /*                  c_n                    | -L0| c_2n                     */

    cy = vm1[2*n];
    if (LIKELY (s >= t) ) mpn_mul (vinf, a2, s, b1, t);
    else                  mpn_mul (vinf, b1, t, a2, s);
    ASSERT (s + t > n);
    /*                   |Lprec|                    |Hprec|                    */
    /* |    |   | Hm1| H0| L0  | pp       ___|Y| H1 | L1  |Hinf|Linf | scratch */
    /*                                         | Lm1|\____                     */
    /*                                         | -L0| c_2n                     */
    /* or (last time, if (an % 3*n) > 2*n + 2)                                 */
    /*                   |Lprec|                    |Hprec|                    */
    /* |Hinf|Linf|Hm1| H0| L0  | pp       ___|Y| H1 | L1  |    |     | scratch */
    /*                                         | Lm1|\____                     */
    /*                                         | -L0| c_2n                     */
    
    c_3n = mpn_sub_n(v0+n, vinf, v0+n, n);       /* Two-complement if negative */
    /*                    |Lprec|                   |Hprec|                    */
    /* |    |   | Hm1|Linf| L0  | pp      ___|Y| H1 | L1  |Hinf|     | scratch */
    /*           ____/-H0                      | Lm1|\____                     */
    /*           c_3n                          | -L0| c_2n                     */
    /* or (last time, if (an % 3*n) > 2*n + 2)                                 */
    /*                    |Lprec|                   |Hprec|                    */
    /* |Hinf|    |Hm1|Linf| L0  | pp      ___|Y| H1 | L1  |    |     | scratch */
    /*           ____/-H0                      | Lm1|\____                     */
    /*           c_3n                          | -L0| c_2n                     */

    cy = cy - c_3n + mpn_add_n(vinf, vm1+n, pp+n, n);
    /*                    |Lprec|                   |Hprec|     Hm1            */
    /* |    |   |    |Linf| L0  | pp      ___|Y| H1 | L1  |Hinf|Linf | scratch */
    /*           ____/-H0                      | Lm1|        __/-H0            */
    /*           c_3n                          | -L0|        cy                */
    /* or (last time, if (an % 3*n) > 2*n + 2)                                 */
    /*        Hm1         |Lprec|                   |Hprec|                    */
    /* |Hinf|Linf|   |Linf| L0  | pp      ___|Y| H1 | L1  |    |     | scratch */
    /*    __/-H0       -H0                     | Lm1|\____                     */
    /*    cy                                   | -L0| c_2n                     */

    c_2n += c_3n - mpn_sub_n(pp+n, v1, pp+n, n);
    /*               |Hprec|Lprec|                              Hm1            */
    /* |Hinf|   |    |-Linf| L0  | pp     ___|Y| H1 |     |Hinf|Linf | scratch */
    /*           ____/ H0                      | Lm1|        __/-H0            */
    /*           c_2n  L1                      | -L0|        cy                */
    /* or (last time, if (an % 3*n) > 2*n + 2)                                 */
    /*        Hm1    |Hprec|Lprec|                                             */
    /* |Hinf|Linf|   |-Linf| L0  | pp     ___|Y| H1 |     |    |     | scratch */
    /*    __/-H0  ___/ H0                      | Lm1|                          */
    /*    cy      c_2n L1                      | -L0|                          */

    c_3n = v1[2*n] - mpn_sub(pp+2*n, v1+n, n, vinf+n, t+s-n);
    /*            Lm1 |Hprec|Lprec|                              Hm1           */
    /* |    |   | -L0 | H0  | L0  | pp      ___|    |     |Hinf|Linf | scratch */
    /*      ____/-Hinf|-Linf                                 __/-H0            */
    /*      c_3n   H1 | L1                                   cy                */
    /* or (last time, if (an % 3*n) > 2*n + 2)                                 */
    /*       Hm1   Lm1 |Hprec|Lprec|                                           */
    /* |Hinf|Linf|-Hinf|-Linf| L0  | pp     ___|    |     |    |     | scratch */
    /*       -H0   -L0 |  H0                                                   */
    /*              H1    L1                                                   */

    keep    = pp[3*n];                /* Final carry/borrows propagation.      */
    pp[3*n] = c_3n + 1;
    MPN_INCR_U(pp+n, 2*n+1, c_n);     /* c_n  */

    if (c_2n >= 0)                    /* c_2n */
      MPN_INCR_U(pp+2*n, n+1, c_2n);
    else
      MPN_DECR_U(pp+2*n, n+1, -c_2n);

    c_3n        = pp[3*n] - 1;
    pp[3*n]     = keep;
    keep        = vinf[n+t-1];
    vinf[n+t-1] = 1;
    if (c_3n >= 0)                    /* c_3n */
      MPN_INCR_U(vinf, n+t, c_3n);
    else
      MPN_DECR_U(vinf, n+t, -c_3n);

    MPN_INCR_U(vinf+n, t+s-n, cy);    /* cy   */

    vinf[n+t-1] += keep - 1;

    a0 += 3*n;
    pp += 3*n;
    an -= 3*n;
    if (an <= 3*n)              /* Last time. */
      {
	s = an - 2*n;
	if (s < 3) break;
	vinf = pp + 3*n;
      }
  } while(1);

  /* Highest remaining multiplication and recombination. */
  if ( an > 0 )
    {
      if ( an > bn ) mpn_mul(pp, a0, an, bp, bn);
      else           mpn_mul(pp, bp, bn, a0, an);

      cy = mpn_add_n (pp, pp, vinfp, bn);
      MPN_INCR_U (pp + bn, an, cy);
    }

#undef b0
#undef b1
#undef v0
#undef v1
#undef vinfp
#undef vm1
#undef a0_a2
#undef bsm1
#undef asm1
#undef bs1
#undef as1

#undef a1
#undef a2
#undef scratch_out
}

#endif /* A_AND_B_EVEN */

