/*
	MPINTLIB.h
	Copyright (C) 2018 Paul C. Pratt and many others

	Multi Precision INTeger LIBrary

	Distantly descended from MacPGP code, which as far as I
	can tell allows derived works for non commercial use.
	Code that I have written is dual licensed as both GPL
	version 2 and the original MacPGP license.

	original MacPGP copryight notice follows:
*/

/*
	pgp.c -- main module for PGP.
	PGP: Pretty Good(tm) Privacy - public key cryptography
	for the masses.

	Synopsis:  PGP uses public-key encryption to protect
	E-mail. Communicate securely with people you've never
	met, with no secure channels needed for prior exchange
	of keys.  PGP is well featured and fast, with
	sophisticated key management, digital signatures, data
	compression, and good ergonomic design.

	The original PGP version 1.0 was written by Philip
	Zimmermann, of Phil's Pretty Good(tm) Software.  Many
	parts of later versions of PGP were developed by an
	international collaborative effort, involving a number
	of contributors, including major efforts by:
	Branko Lankester <branko@hacktic.nl>
	Hal Finney <74076.1041@compuserve.com>
	Peter Gutmann <pgut1@cs.aukuni.ac.nz>
	Other contributors who ported or translated or otherwise
	helped include:
	Jean-loup Gailly in France
	Hugh Kennedy in Germany
	Lutz Frank in Germany
	Cor Bosman in The Netherlands
	Felipe Rodriquez Svensson in The Netherlands
	Armando Ramos in Spain
	Miguel Angel Gallardo Ortiz in Spain
	Harry Bush and Maris Gabalins in Latvia
	Zygimantas Cepaitis in Lithuania
	Alexander Smishlajev
	Peter Suchkow and Andrew Chernov in Russia
	David Vincenzetti in Italy
	...and others.


	(c) Copyright 1990-1996 by Philip Zimmermann.  All
	rights reserved. The author assumes no liability for
	damages resulting from the use of this software, even if
	the damage results from defects in this software.  No
	warranty is expressed or implied.

	Note that while most PGP source modules bear Philip
	Zimmermann's copyright notice, many of them have been
	revised or entirely written by contributors who
	frequently failed to put their names in their code.
	Code that has been incorporated into PGP from other
	authors was either originally published in the public
	domain or is used with permission from the various
	authors.

	PGP is available for free to the public under certain
	restrictions. See the PGP User's Guide (included in the
	release package) for important information about
	licensing, patent restrictions on certain algorithms,
	trademarks, copyrights, and export controls.


	Philip Zimmermann may be reached at:
	Boulder Software Engineering
	3021 Eleventh Street
	Boulder, Colorado 80304  USA
	(303) 541-0140  (voice or FAX)
	email:  prz@acm.org


	PGP will run on MSDOS, Sun Unix, VAX/VMS, Ultrix, Atari
	ST, Commodore Amiga, and OS/2.  Note:  Don't try to do
	anything with this source code without looking at the
	PGP User's Guide.

	PGP combines the convenience of the
	Rivest-Shamir-Adleman (RSA) public key cryptosystem with
	the speed of fast conventional cryptographic algorithms,
	fast message digest algorithms, data compression, and
	sophisticated key management.  And PGP performs the RSA
	functions faster than most other software
	implementations. PGP is RSA public key cryptography for
	the masses.
*/

/* log2 of size in bits of mp_UNIT_t */
#ifndef mp_UNIT_ln2bsz
#define mp_UNIT_ln2bsz 4
#endif

#if 3 == mp_UNIT_ln2bsz
typedef ui3b mp_UNIT_t;
typedef ui4b mp_UNITp1_t;
	/* at least one bit wider than mp_UNIT_t */
typedef si4b mp_UNITp1_st;
#endif

#if 4 == mp_UNIT_ln2bsz
typedef ui4b mp_UNIT_t;
typedef ui5b mp_UNITp1_t;
	/* at least one bit wider than mp_UNIT_t */
typedef si5b mp_UNITp1_st;
#endif

#if 5 == mp_UNIT_ln2bsz
typedef ui5b mp_UNIT_t;
typedef ui6b mp_UNITp1_t;
	/* at least one bit wider than mp_UNIT_t */
typedef si6b mp_UNITp1_st;
#endif

typedef mp_UNIT_t *mp_UNIT_p;

#define mp_UNIT_ln2sz (mp_UNIT_ln2bsz - 3)
	/* log2 of size in bytes of mp_UNIT_t */
#define mp_UNIT_bsz (1UL << mp_UNIT_ln2bsz)
	/* number of bits in a mp_UNIT_t */
#define mp_UNIT_sz (1UL << mp_UNIT_ln2sz)
	/* number of bytes in a mp_UNIT_t */
#define mp_units2bits(n) ((n) << mp_UNIT_ln2bsz)
#define mp_units2bytes(n) ((n) << mp_UNIT_ln2sz)
#define mp_bits2units(n) (((n) + (mp_UNIT_bsz - 1)) \
	>> mp_UNIT_ln2bsz)
#define mp_bytes2units(n) (((n) + (mp_UNIT_sz - 1)) \
	>> mp_UNIT_ln2sz)
#define mp_highbit (((mp_UNIT_t) 1) << (mp_UNIT_bsz - 1))

#define bits2bytes(n) (((n) + 7) >> 3)


#define MIN_KEY_BITS  384
#define MAX_KEY_BITS 2048

#define MAX_BIT_PRECISION (MAX_KEY_BITS + (2 * mp_UNIT_bsz))
#define MAX_BYTE_PRECISION (MAX_BIT_PRECISION / 8)
#define MAX_UNIT_PRECISION (MAX_BIT_PRECISION / mp_UNIT_bsz)


LOCALVAR ui4r mp_precision = 0;
	/* units of precision for all routines */
/*
	mp_precision is the mp_UNIT_t precision last set by
	mp_precision_set.
	Initially, mp_precision_set() should be called to define
	mp_precision before using any of these other
	multiprecision library routines.
	i.e.:   mp_precision_set(MAX_UNIT_PRECISION);
*/

/* set working precision to specified number of bits. */
#define mp_precision_set(prec) (mp_precision = (prec))

#define power_of_2(b) ((mp_UNIT_t) 1 << (b))
	/* computes power-of-2 bit masks */

#define mp_bitmsk(n) power_of_2((n) & (mp_UNIT_bsz - 1))

LOCALPROC mp_move(register mp_UNIT_p dst, register mp_UNIT_p src)
{
	register ui4r i;

	for (i = mp_precision + 1; 0 != --i; ) {
		*dst++ = *src++;
	}
}

LOCALPROC mp_convertoMSB(mp_UNIT_p mp, ui3b *bp)
{
	ui4r i;
	mp_UNIT_t t2;
	ui3b *p1 = bp;
	mp_UNIT_p p2 = mp + mp_precision;

	for (i = mp_precision + 1; 0 != --i; ) {
		t2 = *--p2;

		*p1++ = t2 >> 8;
		*p1++ = t2 & 0x00FF;
	}
}

LOCALPROC mp_converfromMSB(mp_UNIT_p mp, ui3b *bp)
{
	ui4r i;
	mp_UNIT_t v1;
	mp_UNIT_t v2;
	ui3b *p1 = bp;
	mp_UNIT_p p2 = mp + mp_precision;

	for (i = mp_precision + 1; 0 != --i; ) {
		v1 = *p1++;
		v2 = *p1++;
		*--p2 = (v1 << 8) | v2;
	}
}

LOCALPROC mp_convertoLSB(mp_UNIT_p mp, ui3b *bp)
{
	ui4r i;
	mp_UNIT_t t2;
	ui3b *p1 = bp;
	mp_UNIT_p p2 = mp;

	for (i = mp_precision + 1; 0 != --i; ) {
		t2 = *p2++;

		*p1++ = t2 & 0x00FF;
		*p1++ = t2 >> 8;
	}
}

LOCALPROC mp_converfromLSB(mp_UNIT_p mp, ui3b *bp)
{
	ui4r i;
	mp_UNIT_t v1;
	mp_UNIT_t v2;
	ui3b *p1 = bp;
	mp_UNIT_p p2 = mp;

	for (i = mp_precision + 1; 0 != --i; ) {
		v2 = *p1++;
		v1 = *p1++;
		*p2++ = (v1 << 8) | v2;
	}
}

LOCALFUNC ui4r mp_significance(mp_UNIT_p r)
{
	register ui4r i;
	mp_UNIT_p r1p = r + mp_precision;

	for (i = mp_precision + 1; 0 != --i; ) {
		if (0 != *--r1p) {
			return i;
		}
	}

	return 0;
}

LOCALFUNC int mp_countbits(mp_UNIT_p r)
{
	mp_UNIT_t bitmask;
	mp_UNIT_t v;
	int bit;
	register ui4r i;
	mp_UNIT_p r1p = r + mp_precision;

	for (i = mp_precision + 1; 0 != --i; ) {
		v = *--r1p;
		if (0 != v) {
			bit = mp_units2bits(i);
			bitmask = mp_highbit;
			while (0 == (v & bitmask)) {
				bitmask >>= 1;
				--bit;
			}

			return bit;
		}
	}

	return 0;
}

#define mp_countbytes(r) ((mp_countbits(r) + 7) >> 3)

LOCALFUNC blnr mp_testbit(mp_UNIT_p r, int bit)
{
	return 0 != (r[bit >> mp_UNIT_ln2bsz] & mp_bitmsk(bit));
}

LOCALPROC mp_setbit(mp_UNIT_p r, int bit)
{
	r[bit >> mp_UNIT_ln2bsz] |= mp_bitmsk(bit);
}

LOCALPROC mp_SetRestZero(register mp_UNIT_p r, ui4r n)
{
	register ui4r i;

	r += n;

	for (i = (mp_precision - n) + 1; 0 != --i; ) {
		*r++ = 0;
	}
}

LOCALFUNC blnr mp_RestIsZero(register mp_UNIT_p r, ui4r n)
/*
	returns (mp_significance(r) <= n), but usually faster
	since starts checking at least significant units, which
	are more likely to be non zero.
*/
{
	register ui4r i;

	r += n;

	for (i = (mp_precision - n) + 1; 0 != --i; ) {
		if (0 != *r++) {
			return falseblnr;
		}
	}

	return trueblnr;
}

LOCALPROC mp_SetZero(register mp_UNIT_p r)
{
	mp_SetRestZero(r, 0);
}

LOCALPROC mp_SetOne(register mp_UNIT_p r)
{
	r[0] = 1;
	mp_SetRestZero(r, 1);
}

LOCALFUNC blnr mp_isZero(mp_UNIT_p r)
{
	return (mp_precision == 0)
		|| ((r[0] == 0) && mp_RestIsZero(r, 1));
}

LOCALFUNC blnr mp_isOne(mp_UNIT_p r)
{
	return (mp_precision != 0)
		&& ((r[0] == 1) && mp_RestIsZero(r, 1));
}

/* equal to */
LOCALFUNC blnr mp_eq(mp_UNIT_p r1, mp_UNIT_p r2)
{
	mp_UNIT_t v1;
	mp_UNIT_t v2;
	blnr v;
	register ui4r i;
	mp_UNIT_p r1p = r1 + mp_precision;
	mp_UNIT_p r2p = r2 + mp_precision;

	for (i = mp_precision + 1; 0 != --i; ) {
		v1 = *--r1p;
		v2 = *--r2p;

		if (v1 != v2) {
			v = falseblnr;
			goto l_exit;
		}
	}

	v = trueblnr;

l_exit:
	return v;
}

/* greater than or equal to */
LOCALFUNC blnr mp_ge(mp_UNIT_p r1, mp_UNIT_p r2)
{
	mp_UNIT_t v1;
	mp_UNIT_t v2;
	blnr v;
	register ui4r i;
	mp_UNIT_p r1p = r1 + mp_precision;
	mp_UNIT_p r2p = r2 + mp_precision;

	for (i = mp_precision + 1; 0 != --i; ) {
		v1 = *--r1p;
		v2 = *--r2p;

		if (v1 != v2) {
			v = (v1 >= v2);
			goto l_exit;
		}
	}

	v = trueblnr;

l_exit:
	return v;
}

/*
	multiprecision add, r2 to r1, result in r1
	return the final carry flag bit
*/
LOCALFUNC blnr mp_add(register mp_UNIT_p r1,
	register mp_UNIT_p r2)
{
	register ui4r i;
	register mp_UNITp1_t x = 0;

	for (i = mp_precision + 1; 0 != --i; ) {
		x += ((mp_UNITp1_t)*r1)
			+ ((mp_UNITp1_t)*r2++);
		*r1++ = x;
		x >>= mp_UNIT_bsz;
	}

	return (0 != x);
}

/*
	multiprecision subtract, r2 from r1, result in r1
	return the final carry/borrow flag bit
*/
LOCALFUNC blnr mp_sub(register mp_UNIT_p r1,
	register mp_UNIT_p r2)
{
	register ui4r i;
	register mp_UNITp1_t x = 0;

	for (i = mp_precision + 1; 0 != --i; ) {
		x += ((mp_UNITp1_t)*r1)
			- ((mp_UNITp1_t)*r2++);
		*r1++ = x;
		x = (mp_UNITp1_t)(((mp_UNITp1_st)x) >> mp_UNIT_bsz);
		/* x = - (((x) >> mp_UNIT_bsz) & 1); */
	}

	return (0 != x);
}

/*
	multiprecision rotate left 1 bit, result in r1.
	return the final carry flag bit
*/
LOCALFUNC blnr mp_rotate_left(register mp_UNIT_p r1)
{
	register ui4r i;
	register mp_UNITp1_t x = 0;

	for (i = mp_precision + 1; 0 != --i; ) {
		x += (((mp_UNITp1_t)*r1) << 1);
		*r1++ = x;
		x >>= mp_UNIT_bsz;
	}

	return (0 != x);
}

/*
	multiprecision rotate left 1 bit, result in r1.
	return the final carry flag bit
*/
LOCALFUNC blnr mp_rotate_leftc(register mp_UNIT_p r1,
	register blnr carry)
{
	register ui4r i;
	register mp_UNITp1_t x = carry ? 1 : 0;

	for (i = mp_precision + 1; 0 != --i; ) {
		x += (((mp_UNITp1_t)*r1) << 1);
		*r1++ = x;
		x >>= mp_UNIT_bsz;
	}

	return (0 != x);
}

/*
	Computes multiprecision prod = multiplicand * multiplier
	Uses "Russian peasant" multiply algorithm.
*/
LOCALFUNC tMyErr mp_mult(register mp_UNIT_p prod,
	register mp_UNIT_p multiplicand, register mp_UNIT_p multiplier)
{
	tMyErr err = kMyErr_noErr;
	int bits;

	mp_SetZero(prod);
	if (mp_isZero(multiplicand)) {
		goto l_exit; /* zero multiplicand means zero product */
	}

	bits = mp_countbits(multiplier);

	while (bits) {
		--bits;
		if (mp_rotate_left(prod)) {
#if DebugCheck
			dbglog_writeln("mp_rotate_left overflow in mp_mult");
#endif
			err = kMyErr_failr; /* over flow */
			goto l_exit;
		}
		if (mp_testbit(multiplier, bits)) {
			if (mp_add(prod, multiplicand)) {
#if DebugCheck
				dbglog_writeln("mp_add overflow in mp_mult");
#endif
				err = kMyErr_failr; /* over flow */
				goto l_exit;
			}
		}

		if (kMyErr_noErr != (err =
			CheckAbortRequested()))
		{
			goto l_exit;
		}
	}

l_exit:
	return ErrReportStack(err, "mp_mult");
}

/* Unsigned divide, treats both operands as positive. */
LOCALFUNC tMyErr mp_udiv(mp_UNIT_p remainder, mp_UNIT_p quotient,
	mp_UNIT_p dividend, mp_UNIT_p divisor)
{
	tMyErr err;
	int bits;

	if (mp_isZero(divisor)) {
		err = kMyErrParamErr;  /* zero divisor means divide error */
		goto l_exit;
	}

	mp_SetZero(remainder);
	mp_SetZero(quotient);

	bits = mp_countbits(dividend);

	while (bits) {
		--bits;
		if (mp_rotate_leftc(remainder, mp_testbit(dividend, bits))
			|| mp_ge(remainder, divisor))
		{
			(void) mp_sub(remainder, divisor);
			mp_setbit(quotient, bits);
		}

		if (kMyErr_noErr != (err =
			CheckAbortRequested()))
		{
			goto l_exit;
		}
	}

	err = kMyErr_noErr;

l_exit:
	return ErrReportStack(err, "mp_udiv");
}

/* Unsigned divide, treats both operands as positive. */
LOCALFUNC tMyErr mp_mod(mp_UNIT_p remainder,
	mp_UNIT_p dividend, mp_UNIT_p divisor)
{
	tMyErr err;
	int bits;

	if (mp_isZero(divisor)) {
		err = kMyErrParamErr;  /* zero divisor means divide error */
		goto l_exit;
	}

	mp_SetZero(remainder);

	bits = mp_countbits(dividend);

	while (bits) {
		--bits;
		if (mp_rotate_leftc(remainder, mp_testbit(dividend, bits))
			|| mp_ge(remainder, divisor))
		{
			(void) mp_sub(remainder, divisor);
		}

		if (kMyErr_noErr != (err =
			CheckAbortRequested()))
		{
			goto l_exit;
		}
	}

	err = kMyErr_noErr;

l_exit:
	return ErrReportStack(err, "mp_mod");
}

/*
	This function does a fast mod operation on a
	multiprecision dividend using a short integer modulus
	returning a short integer remainder. This is an unsigned
	divide.  It treats both operands as positive. It is used
	mainly for fast sieve searches for large primes.
*/
LOCALFUNC ui4r mp_shortmod(register mp_UNIT_p dividend,
	register ui4r divisor)
{
	register ui4r remainder;
	int bits;

	if (0 == divisor) {
		remainder = (ui4r) -1; /* zero divisor means divide error */
		goto l_exit;
	}

	remainder = 0;

	bits = mp_countbits(dividend);

	while (bits) {
		--bits;
		remainder <<= 1;
		if (mp_testbit(dividend, bits)) {
			remainder++;
		}

		if (remainder >= divisor) {
			remainder -= divisor;
		}
	}

l_exit:
	return remainder;
}

LOCALPROC mp_rotate3(mp_UNIT_p *x, mp_UNIT_p *y, mp_UNIT_p *z)
{
	mp_UNIT_p t = *x;

	*x = *y;
	*y = *z;
	*z = t;
}

/* Computes greatest common divisor via Euclid's algorithm. */
LOCALPROC mp_gcd(mp_UNIT_p result, mp_UNIT_p a, mp_UNIT_p n)
{
	mp_UNIT_t g0[MAX_UNIT_PRECISION];
	mp_UNIT_t g1[MAX_UNIT_PRECISION];
	mp_UNIT_t g2[MAX_UNIT_PRECISION];
	mp_UNIT_p g_prev = g0;
	mp_UNIT_p g_cur = g1;
	mp_UNIT_p g_next = g2;

	mp_move(g_prev, n);
	mp_move(g_cur, a);

	while (! mp_isZero(g_cur)) {
		(void) mp_mod(g_next, g_prev, g_cur);
		mp_rotate3(&g_prev, &g_cur, &g_next);
	}
	mp_move(result, g_prev);
}



/* Init multiprecision register r with short value. */
LOCALPROC mp_Ui4rSet(register mp_UNIT_p r, ui4r i)
{
#if 3 == mp_UNIT_ln2bsz
	r[0] = (i & 0xFF);
	r[1] = i >> mp_UNIT_bsz;
	mp_SetRestZero(r, 2);
#else
	r[0] = i;
	mp_SetRestZero(r, 1);
#endif
}

LOCALFUNC blnr mp_Ui4rEq(register mp_UNIT_p r, ui4r i)
{
	blnr v;

	if (mp_precision == 0) {
		v = (0 == i);
	} else {
#if 3 == mp_UNIT_ln2bsz
		v = (r[0] == (i & 0xFF))
			&& (r[1] == (i >> 8))
			&& mp_RestIsZero(r, 2);
#else
		v = (r[0] == i)
			&& mp_RestIsZero(r, 1);
#endif
	}

	return v;
}

/*
	multiprecision add small value
	return the final carry flag bit
*/
LOCALFUNC blnr mp_Ui4rAdd(register mp_UNIT_p r,
	ui4r v)
{
	ui4r n = mp_precision;
	register ui4r i;
	register mp_UNITp1_t x = 0;

#if 3 == mp_UNIT_ln2bsz
	x += ((mp_UNITp1_t)*r)
		+ (v & 0xFF);
	*r++ = x;
	x >>= mp_UNIT_bsz;
	--n;
	v >>= mp_UNIT_bsz;
#endif

	if (0 != n) {
		x += ((mp_UNITp1_t)*r)
			+ v;
		*r++ = x;
		x >>= mp_UNIT_bsz;
		--n;

		for (i = n + 1; (0 != --i) && (0 != x); ) {
			x += ((mp_UNITp1_t)*r);
			*r++ = x;
			x >>= mp_UNIT_bsz;
		}
	}

	return (0 != x);
}

/*
	multiprecision subtract, r2 from r1, result in r1
	return the final carry/borrow flag bit
*/
LOCALFUNC blnr mp_Ui4rSub(register mp_UNIT_p r,
	ui4r v)
{
	ui4r n = mp_precision;
	register ui4r i;
	register mp_UNITp1_t x = 0;

#if 3 == mp_UNIT_ln2bsz
	x += ((mp_UNITp1_t)*r)
		- (v & 0xFF);
	*r++ = x;
	x = (mp_UNITp1_t)(((mp_UNITp1_st)x) >> mp_UNIT_bsz);
	/* x = - (((x) >> mp_UNIT_bsz) & 1); */
	--n;
	v >>= mp_UNIT_bsz;
#endif

	if (0 != n) {
		x += ((mp_UNITp1_t)*r)
			- v;
		*r++ = x;
		x = (mp_UNITp1_t)(((mp_UNITp1_st)x) >> mp_UNIT_bsz);
		/* x = - (((x) >> mp_UNIT_bsz) & 1); */
		--n;

		for (i = n + 1; (0 != --i) && (0 != x); ) {
			x += ((mp_UNITp1_t)*r);
			*r++ = x;
			x = (mp_UNITp1_t)(((mp_UNITp1_st)x) >> mp_UNIT_bsz);
			/* x = - (((x) >> mp_UNIT_bsz) & 1); */
		}
	}

	return (0 != x);
}

/*
	Increment multiprecision integer r
	return the final carry flag bit
*/
LOCALFUNC blnr mp_inc(register mp_UNIT_p r)
{
	return mp_Ui4rAdd(r, 1);
}

/*
	Decrement multiprecision integer r
	return the final carry/borrow flag bit
*/
LOCALFUNC blnr mp_dec(register mp_UNIT_p r)
{
	return mp_Ui4rSub(r, 1);
}



LOCALVAR mp_UNIT_p mp_modulus = nullpr;

LOCALPROC mp_modadd(mp_UNIT_p r1, mp_UNIT_p r2)
{
	if (mp_add(r1, r2)
		|| (mp_ge(r1, mp_modulus)))
	{
		(void) mp_sub(r1, mp_modulus);
	}
}

LOCALPROC mp_modsub(mp_UNIT_p r1, mp_UNIT_p r2)
{
	if (mp_sub(r1, r2)) {
		(void) mp_add(r1, mp_modulus);
	}
}

LOCALPROC mp_mod2x(mp_UNIT_p r1)
{
	if (mp_rotate_left(r1)
		|| (mp_ge(r1, mp_modulus)))
	{
		(void) mp_sub(r1, mp_modulus);
	}
}

LOCALFUNC tMyErr mp_modmult(register mp_UNIT_p prod,
	mp_UNIT_p multiplicand, mp_UNIT_p multiplier)
{
	tMyErr err;
	int bits = mp_countbits(multiplier);

	if (0 == bits) {
		mp_SetZero(prod);
	} else {
		--bits;
		mp_move(prod, multiplicand);

		while (bits) {
			--bits;
			mp_mod2x(prod);
			if (mp_testbit(multiplier, bits)) {
				mp_modadd(prod, multiplicand);
			}

			if (kMyErr_noErr != (err =
				CheckAbortRequested()))
			{
				goto l_exit;
			}
		}
	}

	err = kMyErr_noErr;

l_exit:
	return ErrReportStack(err, "mp_modmult");
}

/*
	Russian peasant combined exponentiation/modulo algorithm.
	Calls modmult instead of mult.
	Computes:  expout = (expin**exponent) mod mp_modulus
	WARNING: All the arguments must be less than the modulus!
*/
LOCALFUNC tMyErr mp_modexp_0(register mp_UNIT_p expout,
	register mp_UNIT_p expin,
	register mp_UNIT_p exponent)
{
	tMyErr err;
	mp_UNIT_t product[MAX_UNIT_PRECISION];
	int bits;

	bits = mp_countbits(exponent);
	if (0 == bits) {
		mp_SetOne(expout);
		if (mp_isZero(expin)) {
			err = kMyErrParamErr;
			goto l_exit;
				/* 0 to the 0th power means return error */
		}
	} else {
		--bits;
		mp_move(expout, expin);

		while (0 != bits) {
			--bits;
			if (kMyErr_noErr != (err =
				mp_modmult(product, expout, expout)))
			{
				goto l_exit;
			}

			if (mp_testbit(exponent, bits)) {
				if (kMyErr_noErr != (err =
					mp_modmult(expout, product, expin)))
				{
					goto l_exit;
				}
			} else {
				mp_move(expout, product);
			}
		}
	}

	err = kMyErr_noErr;

l_exit:
	return ErrReportStack(err, "mp_modexp_0");
}



LOCALFUNC tMyErr mp_modexp(register mp_UNIT_p expout,
	register mp_UNIT_p expin, register mp_UNIT_p exponent,
	register mp_UNIT_p modulus)
{
	tMyErr err;
	ui4r modsig = mp_significance(modulus);

	if (0 == modsig) {
#if DebugCheck
		dbglog_writeln("0 == modsig");
#endif
		err = kMyErrParamErr;
	} else
	if (mp_ge(expin, modulus)) {
#if DebugCheck
		dbglog_writeln("expin too large");
#endif
		err = kMyErrParamErr;
	} else
	if (mp_ge(exponent, modulus)) {
#if DebugCheck
		dbglog_writeln("exponent too large");
#endif
		err = kMyErrParamErr;
	} else
	{
		ui4r save_precision = mp_precision;
		mp_UNIT_p save_modulus = mp_modulus;

		/* set smallest optimum precision for this modulus */
		mp_precision_set(modsig);
		mp_modulus = modulus;

		err = mp_modexp_0(expout, expin, exponent);

		mp_modulus = save_modulus;
		mp_precision_set(save_precision);

		mp_SetRestZero(expout, modsig);
	}

	return ErrReportStack(err, "mp_modexp");
}

/*
	This is a faster modexp for moduli with a known
	factorisation into two relatively prime factors p and q,
	and an input relatively prime to the modulus, the
	Chinese Remainder Theorem to do the computation mod p
	and mod q, and then combine the results.  This relies on
	a number of precomputed values, but does not actually
	require the modulus n or the exponent e.

	expout = expin ^ e mod (p * q).
	We form this by evaluating
	p2 = (expin ^ e) mod p and
	q2 = (expin ^ e) mod q
	and then combining the two by the CRT.

	Two optimisations of this are possible.  First, we can
	reduce expin modulo p and q before starting.

	Second, since we know the factorisation of p and q
	(trivially derived from the factorisation of n = p*q),
	and expin is relatively prime to both p and q, we can
	use Euler's theorem, expin ^ phi(m) = 1 (mod m), to
	throw away multiples of phi(p) or phi(q) in e.
	Letting ep = e mod phi(p)
		and eq = e mod phi(q)
	then combining these two speedups, we only need to
	evaluate
	p2 = ((expin mod p) ^ ep) mod p and
	q2 = ((expin mod q) ^ eq) mod q.

	Now we need to apply the CRT.  Starting with
	expout = p2 (mod p) and
	expout = q2 (mod q)
	we can say that expout = p2 + p * k, and if we assume
	that 0 <= p2 < p, then 0 <= expout < p * q for some
	0 <= k < q.  Since we want expout = q2 (mod q),
	then p * k = q2 - p2 (mod q).  Since p and q are
	relatively prime, p has a multiplicative inverse u mod
	q.  In other words, u = 1 / p (mod q).

	Multiplying by u on both sides gives
	k = u * (q2 - p2) (mod q). Since we want 0 <= k < q, we
	can thus find k as k = (u * (q2 - p2)) mod q.

	Once we have k, evaluating p2 + p * k is easy, and that
	gives us the result.

	In the detailed implementation, there is a temporary,
	temp, used to hold intermediate results, p2 is held in
	expout, and q2 is used as a temporary in the final step
	when it is no longer needed.  With that, you should be
	able to understand the code below.
*/
LOCALFUNC tMyErr mp_modexp_crt(mp_UNIT_p expout, mp_UNIT_p expin,
	mp_UNIT_p p, mp_UNIT_p q, mp_UNIT_p ep, mp_UNIT_p eq, mp_UNIT_p u)
{
	tMyErr err;
	mp_UNIT_t q2[MAX_UNIT_PRECISION];
	mp_UNIT_t temp[MAX_UNIT_PRECISION];

/*      First, compute p2 (physically held in M) */

/*      p2 = [ (expin mod p) ^ ep ] mod p               */

	if (kMyErr_noErr != (err =
		mp_mod(temp, expin, p)))
		/* temp = expin mod p  */
	{
		goto l_exit;
	}

	if (kMyErr_noErr != (err =
		mp_modexp(expout, temp, ep, p)))
	{
		mp_SetOne(expout);
		goto l_exit;
	}
/*      And the same thing for q2 */

/*      q2 = [ (expin mod q) ^ eq ] mod q               */

	if (kMyErr_noErr != (err =
		mp_mod(temp, expin, q)))
		/* temp = expin mod q  */
	{
		goto l_exit;
	}

	if (kMyErr_noErr != (err =
		mp_modexp(q2, temp, eq, q)))
	{
		mp_SetOne(expout);
		goto l_exit; /* error return */
	}
/*
	Now use the multiplicative inverse u to glue together the
	two halves.
*/
#if 0
/*
	This optimisation is useful if you commonly get small results,
	but for random results and large q, the odds of (1/q) of it
	being useful do not warrant the test.
*/
	if (! mp_eq(expout, q2)) {
#endif
		/* Find q2-p2 mod q */
		if (mp_sub(q2, expout)) {
			/* if the result went negative */
			mp_add(q2, q); /* add q to q2 */
		}

		/* expout = p2 + ( p * [(q2*u) mod q] ) */
		mp_mult(temp, q2, u); /* q2*u  */
		(void) mp_mod(q2, temp, q);  /* (q2*u) mod q */
		mp_mult(temp, p, q2); /* p * [(q2*u) mod q] */
		mp_add(expout, temp); /* expout = p2 + p * [...] */
#if 0
	}
#endif

	err = kMyErr_noErr;

l_exit:
	return ErrReportStack(err, "mp_modexp_crt");
}

LOCALPROC mp_inv(mp_UNIT_p x, mp_UNIT_p a, mp_UNIT_p n)
{
	mp_UNIT_t y[MAX_UNIT_PRECISION];
	mp_UNIT_t temp[MAX_UNIT_PRECISION];
	mp_UNIT_t g0[MAX_UNIT_PRECISION];
	mp_UNIT_t g1[MAX_UNIT_PRECISION];
	mp_UNIT_t g2[MAX_UNIT_PRECISION];
	mp_UNIT_t v0[MAX_UNIT_PRECISION];
	mp_UNIT_t v1[MAX_UNIT_PRECISION];
	mp_UNIT_t v2[MAX_UNIT_PRECISION];
/*
	mp_UNIT_t u0[MAX_UNIT_PRECISION];
	mp_UNIT_t u1[MAX_UNIT_PRECISION];
	mp_UNIT_t u2[MAX_UNIT_PRECISION];
*/
	mp_UNIT_p g_prev = g0;
	mp_UNIT_p g_cur = g1;
	mp_UNIT_p g_next = g2;
	mp_UNIT_p v_prev = v0;
	mp_UNIT_p v_cur = v1;
	mp_UNIT_p v_next = v2;
/*
	mp_UNIT_p u_prev = u0;
	mp_UNIT_p u_cur = u1;
	mp_UNIT_p u_next = u2;
*/
	ui4r modsig = mp_significance(n);

	mp_move(g_prev, n);
	mp_move(g_cur, a);
/*
	mp_SetOne(u_prev);
	mp_SetZero(g_cur);
*/
	mp_SetZero(v_prev);
	mp_SetOne(v_cur);

	{
		ui4r save_precision = mp_precision;
		mp_UNIT_p save_modulus = mp_modulus;

		mp_precision_set(modsig);
		mp_modulus = n;

		while (! mp_isZero(g_cur)) {
			/*
				we know that at this point,  g_cur = u_cur*n + v_cur*a
			*/
			mp_udiv(g_next, y, g_prev, g_cur);
			(void) mp_modmult(temp, y, v_cur);
			mp_move(v_next, v_prev);
			mp_modsub(v_next, temp);
/*
			mp_mult(temp, y, u_cur);
			mp_move(u_next, u_prev);
			mp_sub(u_next, temp);
*/
			mp_rotate3(&g_prev, &g_cur, &g_next);
			mp_rotate3(&v_prev, &v_cur, &v_next);
		}
		mp_move(x, v_prev);

#if DebugCheck
		(void) mp_modmult(y, x, a);
		if (! mp_isOne(y)) {
			dbglog_writeln("inv seems to be wrong");
		}
#endif

		mp_modulus = save_modulus;
		mp_precision_set(save_precision);

		mp_SetRestZero(x, modsig);
	}
}
