| 123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140 |
- /*Copyright (C) 2008-2009 Timothy B. Terriberry (tterribe@xiph.org)
- You can redistribute this library and/or modify it under the terms of the
- GNU Lesser General Public License as published by the Free Software
- Foundation; either version 2.1 of the License, or (at your option) any later
- version.*/
- //#include <stdlib.h>
- #include "util.h"
- /*Computes floor(sqrt(_val)) exactly.*/
- unsigned qr_isqrt(unsigned _val){
- unsigned g;
- unsigned b;
- int bshift;
- /*Uses the second method from
- http://www.azillionmonkeys.com/qed/sqroot.html
- The main idea is to search for the largest binary digit b such that
- (g+b)*(g+b) <= _val, and add it to the solution g.*/
- g=0;
- b=0x8000;
- for(bshift=16;bshift-->0;){
- unsigned t;
- t=(g<<1)+b<<bshift;
- if(t<=_val){
- g+=b;
- _val-=t;
- }
- b>>=1;
- }
- return g;
- }
- /*Computes sqrt(_x*_x+_y*_y) using CORDIC.
- This implementation is valid for all 32-bit inputs and returns a result
- accurate to about 27 bits of precision.
- It has been tested for all postiive 16-bit inputs, where it returns correctly
- rounded results in 99.998% of cases and the maximum error is
- 0.500137134862598032 (for _x=48140, _y=63018).
- Very nearly all results less than (1<<16) are correctly rounded.
- All Pythagorean triples with a hypotenuse of less than ((1<<27)-1) evaluate
- correctly, and the total bias over all Pythagorean triples is -0.04579, with
- a relative RMS error of 7.2864E-10 and a relative peak error of 7.4387E-9.*/
- unsigned qr_ihypot(int _x,int _y){
- unsigned x;
- unsigned y;
- int mask;
- int shift;
- int u;
- int v;
- int i;
- x=_x=abs(_x);
- y=_y=abs(_y);
- mask=-(x>y)&(_x^_y);
- x^=mask;
- y^=mask;
- _y^=mask;
- shift=31-qr_ilog(y);
- shift=QR_MAXI(shift,0);
- x=(unsigned)((x<<shift)*0x9B74EDAAULL>>32);
- _y=(int)((_y<<shift)*0x9B74EDA9LL>>32);
- u=x;
- mask=-(_y<0);
- x+=_y+mask^mask;
- _y-=u+mask^mask;
- u=x+1>>1;
- v=_y+1>>1;
- mask=-(_y<0);
- x+=v+mask^mask;
- _y-=u+mask^mask;
- for(i=1;i<16;i++){
- int r;
- u=x+1>>2;
- r=(1<<2*i)>>1;
- v=_y+r>>2*i;
- mask=-(_y<0);
- x+=v+mask^mask;
- _y=_y-(u+mask^mask)<<1;
- }
- return x+((1U<<shift)>>1)>>shift;
- }
- #if defined(__GNUC__) && defined(HAVE_FEATURES_H)
- # include <features.h>
- # if __GNUC_PREREQ(3,4)
- # include <limits.h>
- # if INT_MAX>=2147483647
- # define QR_CLZ0 sizeof(unsigned)*CHAR_BIT
- # define QR_CLZ(_x) (__builtin_clz(_x))
- # elif LONG_MAX>=2147483647L
- # define QR_CLZ0 sizeof(unsigned long)*CHAR_BIT
- # define QR_CLZ(_x) (__builtin_clzl(_x))
- # endif
- # endif
- #endif
- int qr_ilog(unsigned _v){
- #if defined(QR_CLZ)
- /*Note that __builtin_clz is not defined when _x==0, according to the gcc
- documentation (and that of the x86 BSR instruction that implements it), so
- we have to special-case it.*/
- return QR_CLZ0-QR_CLZ(_v)&-!!_v;
- #else
- int ret;
- int m;
- m=!!(_v&0xFFFF0000)<<4;
- _v>>=m;
- ret=m;
- m=!!(_v&0xFF00)<<3;
- _v>>=m;
- ret|=m;
- m=!!(_v&0xF0)<<2;
- _v>>=m;
- ret|=m;
- m=!!(_v&0xC)<<1;
- _v>>=m;
- ret|=m;
- ret|=!!(_v&0x2);
- return ret + !!_v;
- #endif
- }
- #if defined(QR_TEST_SQRT)
- #include <math.h>
- #include <stdio.h>
- /*Exhaustively test the integer square root function.*/
- int main(void){
- unsigned u;
- u=0;
- do{
- unsigned r;
- unsigned s;
- r=qr_isqrt(u);
- s=(int)sqrt(u);
- if(r!=s)printf("%u: %u!=%u\n",u,r,s);
- u++;
- }
- while(u);
- return 0;
- }
- #endif
|