![]() |
Object-oriented Scientific Computing Library: Version 0.910
|
00001 /* 00002 ------------------------------------------------------------------- 00003 00004 Copyright (C) 2006-2012, Andrew W. Steiner 00005 00006 This file is part of O2scl. 00007 00008 O2scl is free software; you can redistribute it and/or modify 00009 it under the terms of the GNU General Public License as published by 00010 the Free Software Foundation; either version 3 of the License, or 00011 (at your option) any later version. 00012 00013 O2scl is distributed in the hope that it will be useful, 00014 but WITHOUT ANY WARRANTY; without even the implied warranty of 00015 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 00016 GNU General Public License for more details. 00017 00018 You should have received a copy of the GNU General Public License 00019 along with O2scl. If not, see <http://www.gnu.org/licenses/>. 00020 00021 ------------------------------------------------------------------- 00022 */ 00023 /* 00024 * Copyright (C) 1996, 1997, 1998, 1999, 2000 Gerard Jungman 00025 * Copyright (C) 2001, 2007 Brian Gough 00026 * 00027 * This program is free software; you can redistribute it and/or modify 00028 * it under the terms of the GNU General Public License as published by 00029 * the Free Software Foundation; either version 3 of the License, or (at 00030 * your option) any later version. 00031 * 00032 * This program is distributed in the hope that it will be useful, but 00033 * WITHOUT ANY WARRANTY; without even the implied warranty of 00034 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU 00035 * General Public License for more details. 00036 * 00037 * You should have received a copy of the GNU General Public License 00038 * along with this program; if not, write to the Free Software 00039 * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 00040 * 02110-1301, USA. 00041 */ 00042 /** \file cblas_base.h 00043 \brief O2scl basic linear algebra function templates 00044 00045 See \ref o2scl_cblas for more documentation on 00046 these functions. 00047 00048 \future Add float and complex versions. 00049 \future There are some Level-1 BLAS functions which are already 00050 present in vector.h. Should we move all of them to one place or 00051 the other? The ones in vector.h are generic in the sense that they 00052 can use doubles or floats, but the ones here can use either () or 00053 []. 00054 00055 */ 00056 00057 #ifdef DOXYGENP 00058 namespace o2scl_cblas { 00059 #endif 00060 00061 /// Matrix order, either column-major or row-major 00062 enum o2cblas_order {o2cblas_RowMajor=101, o2cblas_ColMajor=102}; 00063 00064 /// Transpose operations 00065 enum o2cblas_transpose {o2cblas_NoTrans=111, o2cblas_Trans=112, 00066 o2cblas_ConjTrans=113}; 00067 00068 /// Upper- or lower-triangular 00069 enum o2cblas_uplo {o2cblas_Upper=121, o2cblas_Lower=122}; 00070 00071 /// Unit or generic diagonal 00072 enum o2cblas_diag {o2cblas_NonUnit=131, o2cblas_Unit=132}; 00073 00074 /// Left or right sided operation 00075 enum o2cblas_side {o2cblas_Left=141, o2cblas_Right=142}; 00076 00077 /// \name Level-1 BLAS functions 00078 //@{ 00079 /** \brief Compute the absolute sum of vector elements 00080 00081 If \c alpha is zero, this function returns and performs 00082 no computations. 00083 */ 00084 template<class vec_t> double dasum(const size_t N, const vec_t &X) { 00085 double r=0.0; 00086 for(size_t i=0;i<N;i++) { 00087 r+=fabs(X[i]); 00088 } 00089 return r; 00090 } 00091 00092 /** \brief Compute \f$ y= \alpha x+y \f$ 00093 00094 If \c alpha is zero, this function returns and performs 00095 no computations. 00096 */ 00097 template<class vec_t, class vec2_t> 00098 void daxpy(const double alpha, const size_t N, const vec_t &X, 00099 vec2_t &Y) { 00100 00101 size_t i; 00102 00103 if (alpha == 0.0) { 00104 return; 00105 } 00106 00107 const size_t m=N % 4; 00108 00109 for (i=0;i<m;i++) { 00110 O2SCL_IX(Y,i)+=alpha*O2SCL_IX(X,i); 00111 } 00112 00113 for (i=m;i+3<N;i+=4) { 00114 O2SCL_IX(Y,i)+=alpha*O2SCL_IX(X,i); 00115 O2SCL_IX(Y,i+1)+=alpha*O2SCL_IX(X,i+1); 00116 O2SCL_IX(Y,i+2)+=alpha*O2SCL_IX(X,i+2); 00117 O2SCL_IX(Y,i+3)+=alpha*O2SCL_IX(X,i+3); 00118 } 00119 } 00120 00121 /// Compute \f$ r=x \cdot y \f$ 00122 template<class vec_t, class vec2_t> 00123 double ddot(const size_t N, const vec_t &X, const vec2_t &Y) { 00124 00125 double r=0.0; 00126 size_t i; 00127 00128 const size_t m=N % 4; 00129 00130 for (i=0;i<m;i++) { 00131 r+=O2SCL_IX(X,i)*O2SCL_IX(Y,i); 00132 } 00133 00134 for (i=m;i+3<N;i+=4) { 00135 r+=O2SCL_IX(X,i)*O2SCL_IX(Y,i); 00136 r+=O2SCL_IX(X,i+1)*O2SCL_IX(Y,i+1); 00137 r+=O2SCL_IX(X,i+2)*O2SCL_IX(Y,i+2); 00138 r+=O2SCL_IX(X,i+3)*O2SCL_IX(Y,i+3); 00139 } 00140 00141 return r; 00142 } 00143 00144 /** \brief Compute the norm of the vector \c X 00145 00146 \note The suffix "2" on the function name indicates that this 00147 computes the "2-norm", not that the norm is squared. 00148 00149 If \c N is less than or equal to zero, this function returns 00150 zero without calling the error handler. 00151 00152 This function works only with vectors which hold \c double. For 00153 the norm of a general floating point vector, see \ref 00154 vector_norm(). 00155 */ 00156 template<class vec_t> double dnrm2(const size_t N, const vec_t &X) { 00157 00158 double scale=0.0; 00159 double ssq=1.0; 00160 size_t i; 00161 00162 if (N == 0) { 00163 return 0; 00164 } else if (N == 1) { 00165 return fabs(O2SCL_IX(X,0)); 00166 } 00167 00168 for (i=0;i<N;i++) { 00169 const double x=O2SCL_IX(X,i); 00170 00171 if (x != 0.0) { 00172 const double ax=fabs(x); 00173 00174 if (scale<ax) { 00175 ssq=1.0+ssq*(scale/ax)*(scale/ax); 00176 scale=ax; 00177 } else { 00178 ssq+=(ax/scale)*(ax/scale); 00179 } 00180 } 00181 00182 } 00183 00184 return scale*sqrt(ssq); 00185 } 00186 00187 /** \brief Compute \f$ x=\alpha x \f$ 00188 */ 00189 template<class vec_t> 00190 void dscal(const double alpha, const size_t N, vec_t &X) { 00191 00192 size_t i; 00193 const size_t m=N % 4; 00194 00195 for (i=0;i<m;i++) { 00196 O2SCL_IX(X,i)*=alpha; 00197 } 00198 00199 for (i=m;i+3<N;i+=4) { 00200 O2SCL_IX(X,i)*=alpha; 00201 O2SCL_IX(X,i+1)*=alpha; 00202 O2SCL_IX(X,i+2)*=alpha; 00203 O2SCL_IX(X,i+3)*=alpha; 00204 } 00205 } 00206 //@} 00207 00208 /// \name Level-2 BLAS functions 00209 //@{ 00210 /** \brief Compute \f$ y=\alpha \left[\mathrm{op}(A)\right] x+ 00211 \beta y \f$. 00212 00213 If \c M or \c N is zero, or if \c alpha is zero and \c beta is 00214 one, this function performs no calculations and returns without 00215 calling the error handler. 00216 */ 00217 template<class mat_t, class vec_t> 00218 void dgemv(const enum o2cblas_order order, 00219 const enum o2cblas_transpose TransA, const size_t M, 00220 const size_t N, const double alpha, const mat_t &A, 00221 const vec_t &X, const double beta, vec_t &Y) { 00222 00223 size_t i, j; 00224 size_t lenX, lenY; 00225 00226 // If conjugate transpose is requested, just assume plain transpose 00227 const int Trans=(TransA != o2cblas_ConjTrans) ? TransA : o2cblas_Trans; 00228 00229 if (M == 0 || N == 0) { 00230 return; 00231 } 00232 00233 if (alpha == 0.0 && beta == 1.0) { 00234 return; 00235 } 00236 00237 if (Trans == o2cblas_NoTrans) { 00238 lenX=N; 00239 lenY=M; 00240 } else { 00241 lenX=M; 00242 lenY=N; 00243 } 00244 00245 /* form y := beta*y */ 00246 if (beta == 0.0) { 00247 size_t iy=0; 00248 for (i=0;i<lenY;i++) { 00249 O2SCL_IX(Y,iy)=0.0; 00250 iy++; 00251 } 00252 } else if (beta != 1.0) { 00253 size_t iy=0; 00254 for (i=0;i<lenY;i++) { 00255 O2SCL_IX(Y,iy) *= beta; 00256 iy++; 00257 } 00258 } 00259 00260 if (alpha == 0.0) { 00261 return; 00262 } 00263 00264 if ((order == o2cblas_RowMajor && Trans == o2cblas_NoTrans) || 00265 (order == o2cblas_ColMajor && Trans == o2cblas_Trans)) { 00266 00267 /* form y := alpha*A*x+y */ 00268 size_t iy=0; 00269 for (i=0;i<lenY;i++) { 00270 double temp=0.0; 00271 size_t ix=0; 00272 for (j=0;j<lenX;j++) { 00273 temp+=O2SCL_IX(X,ix)*O2SCL_IX2(A,i,j); 00274 ix++; 00275 } 00276 O2SCL_IX(Y,iy)+=alpha*temp; 00277 iy++; 00278 } 00279 00280 } else if ((order == o2cblas_RowMajor && Trans == o2cblas_Trans) || 00281 (order == o2cblas_ColMajor && Trans == o2cblas_NoTrans)) { 00282 00283 /* form y := alpha*A'*x+y */ 00284 size_t ix=0; 00285 for (j=0;j<lenX;j++) { 00286 const double temp=alpha*O2SCL_IX(X,ix); 00287 if (temp != 0.0) { 00288 size_t iy=0; 00289 for (i=0;i<lenY;i++) { 00290 O2SCL_IX(Y,iy)+=temp*O2SCL_IX2(A,j,i); 00291 iy++; 00292 } 00293 } 00294 ix++; 00295 } 00296 00297 } else { 00298 O2SCL_ERR("Unrecognized operation in dgemv().",o2scl::gsl_einval); 00299 } 00300 return; 00301 } 00302 00303 /** \brief Compute \f$ x=\mathrm{op} (A)^{-1} x \f$ 00304 00305 If \c N is zero, this function does nothing and returns zero. 00306 */ 00307 template<class mat_t, class vec_t> 00308 void dtrsv(const enum o2cblas_order order, 00309 const enum o2cblas_uplo Uplo, 00310 const enum o2cblas_transpose TransA, 00311 const enum o2cblas_diag Diag, 00312 const size_t M, const size_t N, const mat_t &A, vec_t &X) { 00313 00314 const int nonunit=(Diag == o2cblas_NonUnit); 00315 int ix, jx; 00316 int i, j; 00317 const int Trans=(TransA != o2cblas_ConjTrans) ? TransA : o2cblas_Trans; 00318 00319 if (N == 0) { 00320 return; 00321 } 00322 00323 /* form x := inv( A )*x */ 00324 00325 if ((order == o2cblas_RowMajor && Trans == o2cblas_NoTrans && 00326 Uplo == o2cblas_Upper) || 00327 (order == o2cblas_ColMajor && Trans == o2cblas_Trans && 00328 Uplo == o2cblas_Lower)) { 00329 00330 /* backsubstitution */ 00331 00332 // O2scl: Note that subtraction of 1 from size_t N here is ok 00333 // since we already handled the N=0 case above 00334 ix=(N-1); 00335 if (nonunit) { 00336 O2SCL_IX(X,ix)=O2SCL_IX(X,ix)/O2SCL_IX2(A,N-1,N-1); 00337 } 00338 ix--; 00339 00340 for (i=N-1;i>0 && i--;) { 00341 double tmp=O2SCL_IX(X,ix); 00342 jx=ix +1; 00343 for (j=i+1;j<((int)N);j++) { 00344 const double Aij=O2SCL_IX2(A,i,j); 00345 tmp-=Aij*O2SCL_IX(X,jx); 00346 jx++; 00347 } 00348 if (nonunit) { 00349 O2SCL_IX(X,ix)=tmp/O2SCL_IX2(A,i,i); 00350 } else { 00351 O2SCL_IX(X,ix)=tmp; 00352 } 00353 ix--; 00354 } 00355 00356 } else if ((order == o2cblas_RowMajor && Trans == o2cblas_NoTrans && 00357 Uplo == o2cblas_Lower) || 00358 (order == o2cblas_ColMajor && Trans == o2cblas_Trans && 00359 Uplo == o2cblas_Upper)) { 00360 00361 /* forward substitution */ 00362 ix=0; 00363 if (nonunit) { 00364 O2SCL_IX(X,ix)=O2SCL_IX(X,ix)/O2SCL_IX2(A,0,0); 00365 } 00366 ix++; 00367 for (i=1;i<((int)N);i++) { 00368 double tmp=O2SCL_IX(X,ix); 00369 jx=0; 00370 for (j=0;j<i;j++) { 00371 const double Aij=O2SCL_IX2(A,i,j); 00372 tmp-=Aij*O2SCL_IX(X,jx); 00373 jx++; 00374 } 00375 if (nonunit) { 00376 O2SCL_IX(X,ix)=tmp/O2SCL_IX2(A,i,i); 00377 } else { 00378 O2SCL_IX(X,ix)=tmp; 00379 } 00380 ix++; 00381 } 00382 00383 } else if ((order == o2cblas_RowMajor && Trans == o2cblas_Trans && 00384 Uplo == o2cblas_Upper) || 00385 (order == o2cblas_ColMajor && Trans == o2cblas_NoTrans && 00386 Uplo == o2cblas_Lower)) { 00387 00388 /* form x := inv( A' )*x */ 00389 00390 /* forward substitution */ 00391 ix=0; 00392 if (nonunit) { 00393 O2SCL_IX(X,ix)=O2SCL_IX(X,ix)/O2SCL_IX2(A,0,0); 00394 } 00395 ix++; 00396 for (i=1;i<((int)N);i++) { 00397 double tmp=O2SCL_IX(X,ix); 00398 jx=0; 00399 for (j=0;j<i;j++) { 00400 const double Aji=O2SCL_IX2(A,j,i); 00401 tmp-=Aji*O2SCL_IX(X,jx); 00402 jx++; 00403 } 00404 if (nonunit) { 00405 O2SCL_IX(X,ix)=tmp/O2SCL_IX2(A,i,i); 00406 } else { 00407 O2SCL_IX(X,ix)=tmp; 00408 } 00409 ix++; 00410 } 00411 00412 } else if ((order == o2cblas_RowMajor && Trans == o2cblas_Trans && 00413 Uplo == o2cblas_Lower) || 00414 (order == o2cblas_ColMajor && Trans == o2cblas_NoTrans && 00415 Uplo == o2cblas_Upper)) { 00416 00417 /* backsubstitution */ 00418 // O2scl: Note that subtraction of 1 from size_t N here is ok 00419 // since we already handled the N=0 case above 00420 ix=(N-1); 00421 if (nonunit) { 00422 O2SCL_IX(X,ix)=O2SCL_IX(X,ix)/O2SCL_IX2(A,N-1,N-1); 00423 } 00424 ix--; 00425 for (i=N-1;i>0 && i--;) { 00426 double tmp=O2SCL_IX(X,ix); 00427 jx=ix+1; 00428 for (j=i+1;j<((int)N);j++) { 00429 const double Aji=O2SCL_IX2(A,j,i); 00430 tmp-=Aji*O2SCL_IX(X,jx); 00431 jx++; 00432 } 00433 if (nonunit) { 00434 O2SCL_IX(X,ix)=tmp/O2SCL_IX2(A,i,i); 00435 } else { 00436 O2SCL_IX(X,ix)=tmp; 00437 } 00438 ix--; 00439 } 00440 00441 } else { 00442 O2SCL_ERR("Unrecognized operation in dtrsv().",o2scl::gsl_einval); 00443 } 00444 return; 00445 } 00446 00447 /** \brief Compute \f$ x=op(A) x \f$ for the triangular matrix \c A 00448 */ 00449 template<class mat_t, class vec_t> 00450 void dtrmv(const enum o2cblas_order Order, 00451 const enum o2cblas_uplo Uplo, 00452 const enum o2cblas_transpose TransA, 00453 const enum o2cblas_diag Diag, const size_t N, 00454 const mat_t &A, vec_t &x) { 00455 00456 int i, j; 00457 00458 const int nonunit=(Diag == o2cblas_NonUnit); 00459 const int Trans=(TransA != o2cblas_ConjTrans) ? TransA : o2cblas_Trans; 00460 00461 if ((Order == o2cblas_RowMajor && Trans == o2cblas_NoTrans && 00462 Uplo == o2cblas_Upper) || 00463 (Order == o2cblas_ColMajor && Trans == o2cblas_Trans && 00464 Uplo == o2cblas_Lower)) { 00465 00466 /* form x := A*x */ 00467 00468 for (i=0;i<N;i++) { 00469 double temp=0.0; 00470 const size_t j_min=i+1; 00471 const size_t j_max=N; 00472 size_t jx=j_min; 00473 for (j=j_min;j<j_max;j++) { 00474 temp+=O2SCL_IX(x,jx)*O2SCL_IX2(A,i,j); 00475 jx++; 00476 } 00477 if (nonunit) { 00478 O2SCL_IX(x,i)=temp+O2SCL_IX(x,i)*O2SCL_IX2(A,i,i); 00479 } else { 00480 O2SCL_IX(x,i)+=temp; 00481 } 00482 } 00483 00484 } else if ((Order == o2cblas_RowMajor && Trans == o2cblas_NoTrans && 00485 Uplo == o2cblas_Lower) || 00486 (Order == o2cblas_ColMajor && Trans == o2cblas_Trans && 00487 Uplo == o2cblas_Upper)) { 00488 00489 // O2scl: Note that subtraction of 1 from size_t N here is ok 00490 // since we already handled the N=0 case above 00491 size_t ix=N-1; 00492 for (i=N;i>0 && i--;) { 00493 double temp=0.0; 00494 const size_t j_min=0; 00495 const size_t j_max=i; 00496 size_t jx=j_min; 00497 for (j=j_min;j<j_max;j++) { 00498 temp+=O2SCL_IX(x,jx)*O2SCL_IX2(A,i,j); 00499 jx++; 00500 } 00501 if (nonunit) { 00502 O2SCL_IX(x,ix)=temp+O2SCL_IX(x,ix)*O2SCL_IX2(A,i,i); 00503 } else { 00504 O2SCL_IX(x,ix)+=temp; 00505 } 00506 ix--; 00507 } 00508 00509 } else if ((Order == o2cblas_RowMajor && Trans == o2cblas_Trans && 00510 Uplo == o2cblas_Upper) || 00511 (Order == o2cblas_ColMajor && Trans == o2cblas_NoTrans && 00512 Uplo == o2cblas_Lower)) { 00513 00514 /* form x := A'*x */ 00515 size_t ix=N-1; 00516 for (i=N;i>0 && i--;) { 00517 double temp=0.0; 00518 const size_t j_min=0; 00519 const size_t j_max=i; 00520 size_t jx=j_min; 00521 for (j=j_min;j<j_max;j++) { 00522 temp+=O2SCL_IX(x,jx)*O2SCL_IX2(A,j,i); 00523 jx++; 00524 } 00525 if (nonunit) { 00526 O2SCL_IX(x,ix)=temp+O2SCL_IX(x,ix)*O2SCL_IX2(A,i,i); 00527 } else { 00528 O2SCL_IX(x,ix)+=temp; 00529 } 00530 ix--; 00531 } 00532 00533 } else if ((Order == o2cblas_RowMajor && Trans == o2cblas_Trans && 00534 Uplo == o2cblas_Lower) || 00535 (Order == o2cblas_ColMajor && Trans == o2cblas_NoTrans && 00536 Uplo == o2cblas_Upper)) { 00537 00538 for (i=0;i<N;i++) { 00539 double temp=0.0; 00540 const size_t j_min=i+1; 00541 const size_t j_max=N; 00542 size_t jx=i+1; 00543 for (j=j_min;j<j_max;j++) { 00544 temp+=O2SCL_IX(x,jx)*O2SCL_IX2(A,j,i); 00545 jx++; 00546 } 00547 if (nonunit) { 00548 O2SCL_IX(x,i)=temp+O2SCL_IX(x,i)*O2SCL_IX2(A,i,i); 00549 } else { 00550 O2SCL_IX(x,i)+=temp; 00551 } 00552 } 00553 00554 } else { 00555 O2SCL_ERR("Unrecognized operation in dtrmv().", 00556 o2scl::gsl_einval); 00557 } 00558 00559 return; 00560 } 00561 //@} 00562 00563 00564 /// \name Level-3 BLAS functions 00565 //@{ 00566 /** \brief Compute \f$ y=\alpha \mathrm{op}(A) \mathrm{op}(B) + 00567 \beta C \f$ 00568 00569 \comment 00570 If \c Order is \c RowMajor, then the matrix \c A 00571 has \c M rows and \c K columns, the matrix \c B has 00572 \c K rows and \c N columns, and the 00573 matrix \c C has \c M rows and \c N columns. 00574 00575 Is this right? 00576 \endcommment 00577 00578 This function works for all values of \c Order, \c TransA, and 00579 \c TransB. 00580 */ 00581 template<class mat_t> 00582 void dgemm(const enum o2cblas_order Order, 00583 const enum o2cblas_transpose TransA, 00584 const enum o2cblas_transpose TransB, const size_t M, 00585 const size_t N, const size_t K, const double alpha, 00586 const mat_t &A, const mat_t &B, const double beta, mat_t &C) { 00587 00588 size_t i, j, k; 00589 size_t n1, n2; 00590 int TransF, TransG; 00591 00592 if (alpha == 0.0 && beta == 1.0) { 00593 return; 00594 } 00595 00596 /* 00597 This is a little more complicated than the original in GSL, 00598 which assigned the matrices A and B to variables *F and *G which 00599 then allowed some code duplication. We can't really do that 00600 here, since we don't have that kind of type info on A and B, so 00601 we just handle the two cases separately. 00602 */ 00603 00604 if (Order == o2cblas_RowMajor) { 00605 00606 n1=M; 00607 n2=N; 00608 00609 /* form y := beta*y */ 00610 if (beta == 0.0) { 00611 for (i=0;i<n1;i++) { 00612 for (j=0;j<n2;j++) { 00613 O2SCL_IX2(C,i,j)=0.0; 00614 } 00615 } 00616 } else if (beta != 1.0) { 00617 for (i=0;i<n1;i++) { 00618 for (j=0;j<n2;j++) { 00619 O2SCL_IX2(C,i,j)*=beta; 00620 } 00621 } 00622 } 00623 00624 if (alpha == 0.0) { 00625 return; 00626 } 00627 00628 TransF=(TransA == o2cblas_ConjTrans) ? o2cblas_Trans : TransA; 00629 TransG=(TransB == o2cblas_ConjTrans) ? o2cblas_Trans : TransB; 00630 00631 if (TransF == o2cblas_NoTrans && TransG == o2cblas_NoTrans) { 00632 00633 /* form C := alpha*A*B+C */ 00634 00635 for (k=0;k<K;k++) { 00636 for (i=0;i<n1;i++) { 00637 const double temp=alpha*O2SCL_IX2(A,i,k); 00638 if (temp != 0.0) { 00639 for (j=0;j<n2;j++) { 00640 O2SCL_IX2(C,i,j)+=temp*O2SCL_IX2(B,k,j); 00641 } 00642 } 00643 } 00644 } 00645 00646 } else if (TransF == o2cblas_NoTrans && TransG == o2cblas_Trans) { 00647 00648 /* form C := alpha*A*B'+C */ 00649 00650 for (i=0;i<n1;i++) { 00651 for (j=0;j<n2;j++) { 00652 double temp=0.0; 00653 for (k=0;k<K;k++) { 00654 temp+=O2SCL_IX2(A,i,k)*O2SCL_IX2(B,j,k); 00655 } 00656 O2SCL_IX2(C,i,j)+=alpha*temp; 00657 } 00658 } 00659 00660 } else if (TransF == o2cblas_Trans && TransG == o2cblas_NoTrans) { 00661 00662 for (k=0;k<K;k++) { 00663 for (i=0;i<n1;i++) { 00664 const double temp=alpha*O2SCL_IX2(A,k,i); 00665 if (temp != 0.0) { 00666 for (j=0;j<n2;j++) { 00667 O2SCL_IX2(C,i,j)+=temp*O2SCL_IX2(B,k,j); 00668 } 00669 } 00670 } 00671 } 00672 00673 } else if (TransF == o2cblas_Trans && TransG == o2cblas_Trans) { 00674 00675 for (i=0;i<n1;i++) { 00676 for (j=0;j<n2;j++) { 00677 double temp=0.0; 00678 for (k=0;k<K;k++) { 00679 temp+=O2SCL_IX2(A,k,i)*O2SCL_IX2(B,j,k); 00680 } 00681 O2SCL_IX2(C,i,j)+=alpha*temp; 00682 } 00683 } 00684 00685 } else { 00686 O2SCL_ERR("Unrecognized operation in dgemm().",o2scl::gsl_einval); 00687 } 00688 00689 } else { 00690 00691 // Column-major case 00692 00693 n1=N; 00694 n2=M; 00695 00696 /* form y := beta*y */ 00697 if (beta == 0.0) { 00698 for (i=0;i<n1;i++) { 00699 for (j=0;j<n2;j++) { 00700 O2SCL_IX2(C,i,j)=0.0; 00701 } 00702 } 00703 } else if (beta != 1.0) { 00704 for (i=0;i<n1;i++) { 00705 for (j=0;j<n2;j++) { 00706 O2SCL_IX2(C,i,j)*=beta; 00707 } 00708 } 00709 } 00710 00711 if (alpha == 0.0) { 00712 return; 00713 } 00714 00715 TransF=(TransB == o2cblas_ConjTrans) ? o2cblas_Trans : TransB; 00716 TransG=(TransA == o2cblas_ConjTrans) ? o2cblas_Trans : TransA; 00717 00718 if (TransF == o2cblas_NoTrans && TransG == o2cblas_NoTrans) { 00719 00720 /* form C := alpha*A*B+C */ 00721 00722 for (k=0;k<K;k++) { 00723 for (i=0;i<n1;i++) { 00724 const double temp=alpha*O2SCL_IX2(B,i,k); 00725 if (temp != 0.0) { 00726 for (j=0;j<n2;j++) { 00727 O2SCL_IX2(C,i,j)+=temp*O2SCL_IX2(A,k,j); 00728 } 00729 } 00730 } 00731 } 00732 00733 } else if (TransF == o2cblas_NoTrans && TransG == o2cblas_Trans) { 00734 00735 /* form C := alpha*A*B'+C */ 00736 00737 for (i=0;i<n1;i++) { 00738 for (j=0;j<n2;j++) { 00739 double temp=0.0; 00740 for (k=0;k<K;k++) { 00741 temp+=O2SCL_IX2(B,i,k)*O2SCL_IX2(A,j,k); 00742 } 00743 O2SCL_IX2(C,i,j)+=alpha*temp; 00744 } 00745 } 00746 00747 } else if (TransF == o2cblas_Trans && TransG == o2cblas_NoTrans) { 00748 00749 for (k=0;k<K;k++) { 00750 for (i=0;i<n1;i++) { 00751 const double temp=alpha*O2SCL_IX2(B,k,i); 00752 if (temp != 0.0) { 00753 for (j=0;j<n2;j++) { 00754 O2SCL_IX2(C,i,j)+=temp*O2SCL_IX2(A,k,j); 00755 } 00756 } 00757 } 00758 } 00759 00760 } else if (TransF == o2cblas_Trans && TransG == o2cblas_Trans) { 00761 00762 for (i=0;i<n1;i++) { 00763 for (j=0;j<n2;j++) { 00764 double temp=0.0; 00765 for (k=0;k<K;k++) { 00766 temp+=O2SCL_IX2(B,k,i)*O2SCL_IX2(A,j,k); 00767 } 00768 O2SCL_IX2(C,i,j)+=alpha*temp; 00769 } 00770 } 00771 00772 } else { 00773 O2SCL_ERR("Unrecognized operation in dgemm().",o2scl::gsl_einval); 00774 } 00775 } 00776 00777 return; 00778 } 00779 //@} 00780 00781 /// \name Helper BLAS functions - Subvectors 00782 //@{ 00783 /** \brief Compute \f$ y=\alpha x+y \f$ beginning with index \c ie 00784 and ending with index \c N-1 00785 00786 This function is used in \ref householder_hv(). 00787 00788 If \c alpha is identical with zero or <tt>N==ie</tt>, this 00789 function will perform no calculations and return without calling 00790 the error handler. 00791 00792 If <tt>ie</tt> is greater than <tt>N-1</tt> then the error 00793 handler will be called if \c O2SCL_NO_RANGE_CHECK is not 00794 defined. 00795 */ 00796 template<class vec_t, class vec2_t> 00797 void daxpy_subvec(const double alpha, const size_t N, const vec_t &X, 00798 vec2_t &Y, const size_t ie) { 00799 00800 size_t i; 00801 00802 if (alpha == 0.0) return; 00803 #if O2SCL_NO_RANGE_CHECK 00804 #else 00805 if (ie+1>N) { 00806 O2SCL_ERR("Invalid index in daxpy_subvec().",o2scl::gsl_einval); 00807 } 00808 #endif 00809 00810 const size_t m=(N-ie) % 4; 00811 00812 for (i=ie;i<ie+m;i++) { 00813 O2SCL_IX(Y,i)+=alpha*O2SCL_IX(X,i); 00814 } 00815 00816 for (;i+3<N;i+=4) { 00817 O2SCL_IX(Y,i)+=alpha*O2SCL_IX(X,i); 00818 O2SCL_IX(Y,i+1)+=alpha*O2SCL_IX(X,i+1); 00819 O2SCL_IX(Y,i+2)+=alpha*O2SCL_IX(X,i+2); 00820 O2SCL_IX(Y,i+3)+=alpha*O2SCL_IX(X,i+3); 00821 } 00822 } 00823 00824 /** \brief Compute \f$ r=x \cdot y \f$ beginning with index \c ie and 00825 ending with index \c N-1 00826 00827 This function is used in \ref householder_hv(). 00828 00829 If <tt>ie</tt> is greater than <tt>N-1</tt> then the error 00830 handler will be called if \c O2SCL_NO_RANGE_CHECK is not 00831 defined. 00832 */ 00833 template<class vec_t, class vec2_t> 00834 double ddot_subvec(const size_t N, const vec_t &X, const vec2_t &Y, 00835 const size_t ie) { 00836 double r=0.0; 00837 size_t i; 00838 00839 #if O2SCL_NO_RANGE_CHECK 00840 #else 00841 if (ie+1>N) { 00842 O2SCL_ERR("Invalid index in ddot_subvec().",o2scl::gsl_einval); 00843 } 00844 #endif 00845 00846 const size_t m=(N-ie) % 4; 00847 00848 for (i=ie;i<ie+m;i++) { 00849 r+=O2SCL_IX(X,i)*O2SCL_IX(Y,i); 00850 } 00851 00852 for (;i+3<N;i+=4) { 00853 r+=O2SCL_IX(X,i)*O2SCL_IX(Y,i); 00854 r+=O2SCL_IX(X,i+1)*O2SCL_IX(Y,i+1); 00855 r+=O2SCL_IX(X,i+2)*O2SCL_IX(Y,i+2); 00856 r+=O2SCL_IX(X,i+3)*O2SCL_IX(Y,i+3); 00857 } 00858 00859 return r; 00860 } 00861 00862 /** \brief Compute the norm of the vector \c X beginning with 00863 index \c ie and ending with index \c N-1 00864 00865 Used in \ref householder_transform(). 00866 00867 \note The suffix "2" on the function name indicates that this 00868 computes the "2-norm", not that the norm is squared. 00869 00870 If <tt>ie</tt> is greater than <tt>N-1</tt> then the error 00871 handler will be called if \c O2SCL_NO_RANGE_CHECK is not 00872 defined. 00873 */ 00874 template<class vec_t> 00875 double dnrm2_subvec(const size_t N, const vec_t &X, const size_t ie) { 00876 00877 double scale=0.0; 00878 double ssq=1.0; 00879 00880 #if O2SCL_NO_RANGE_CHECK 00881 #else 00882 if (ie+1>N) { 00883 O2SCL_ERR("Invalid index in dnrm2_subvec().",o2scl::gsl_einval); 00884 } 00885 #endif 00886 00887 if (ie+1==N) { 00888 return fabs(O2SCL_IX(X,ie)); 00889 } 00890 00891 for (size_t i=ie;i<N;i++) { 00892 const double x=O2SCL_IX(X,i); 00893 00894 if (x != 0.0) { 00895 const double ax=fabs(x); 00896 00897 if (scale<ax) { 00898 ssq=1.0+ssq*(scale/ax)*(scale/ax); 00899 scale=ax; 00900 } else { 00901 ssq+=(ax/scale)*(ax/scale); 00902 } 00903 } 00904 00905 } 00906 00907 return scale*sqrt(ssq); 00908 } 00909 00910 /** \brief Compute \f$ x=\alpha x \f$ beginning with index \c ie and 00911 ending with index \c N-1 00912 00913 This function is used in \ref householder_transform(). 00914 00915 If <tt>ie</tt> is greater than <tt>N-1</tt> then the error 00916 handler will be called if \c O2SCL_NO_RANGE_CHECK is not 00917 defined. 00918 */ 00919 template<class vec_t> 00920 void dscal_subvec(const double alpha, const size_t N, vec_t &X, 00921 const size_t ie) { 00922 00923 #if O2SCL_NO_RANGE_CHECK 00924 #else 00925 if (ie+1>N) { 00926 O2SCL_ERR("Invalid index in dscal_subvec().",o2scl::gsl_einval); 00927 } 00928 #endif 00929 00930 size_t i; 00931 const size_t m=(N-ie) % 4; 00932 00933 for (i=ie;i<ie+m;i++) { 00934 O2SCL_IX(X,i)*=alpha; 00935 } 00936 00937 for (;i+3<N;i+=4) { 00938 O2SCL_IX(X,i)*=alpha; 00939 O2SCL_IX(X,i+1)*=alpha; 00940 O2SCL_IX(X,i+2)*=alpha; 00941 O2SCL_IX(X,i+3)*=alpha; 00942 } 00943 } 00944 //@} 00945 00946 /// \name Helper BLAS functions - Subcolums of a matrix 00947 //@{ 00948 /** \brief Compute \f$ y=\alpha x+y \f$ for a subcolumn of a matrix 00949 00950 Given the matrix \c X, define the vector \c x as the column with 00951 index \c ic. This function computes \f$ y=\alpha x+y \f$ 00952 for elements in the vectors \c x and \c y from row \c ir to 00953 row \c <tt>M-1</tt> (inclusive). All other elements in \c 00954 x and \c y are not referenced. 00955 00956 Used in householder_hv_sub(). 00957 */ 00958 template<class mat_t, class vec_t> 00959 void daxpy_subcol(const double alpha, const size_t M, const mat_t &X, 00960 const size_t ir, const size_t ic, vec_t &y) { 00961 00962 #if O2SCL_NO_RANGE_CHECK 00963 #else 00964 if (ir+1>M) { 00965 O2SCL_ERR("Invalid index in daxpy_subcol().",o2scl::gsl_einval); 00966 } 00967 #endif 00968 00969 if (alpha == 0.0) { 00970 return; 00971 } 00972 00973 size_t i; 00974 const size_t m=(M-ir) % 4; 00975 00976 for (i=ir;i<m+ir;i++) { 00977 O2SCL_IX(y,i)+=alpha*O2SCL_IX2(X,i,ic); 00978 } 00979 00980 for (;i+3<M;i+=4) { 00981 O2SCL_IX(y,i)+=alpha*O2SCL_IX2(X,i,ic); 00982 O2SCL_IX(y,i+1)+=alpha*O2SCL_IX2(X,i+1,ic); 00983 O2SCL_IX(y,i+2)+=alpha*O2SCL_IX2(X,i+2,ic); 00984 O2SCL_IX(y,i+3)+=alpha*O2SCL_IX2(X,i+3,ic); 00985 } 00986 00987 return; 00988 } 00989 00990 /** \brief Compute \f$ r=x \cdot y \f$ for a subcolumn of a matrix 00991 00992 Given the matrix \c X, define the vector \c x as the column with 00993 index \c ic. This function computes \f$ r=x \cdot y \f$ 00994 for elements in the vectors \c x and \c y from row \c ir to 00995 row \c <tt>M-1</tt> (inclusive). All other elements in \c 00996 x and \c y are not referenced. 00997 00998 Used in householder_hv_sub(). 00999 */ 01000 template<class mat_t, class vec_t> 01001 double ddot_subcol(const size_t M, const mat_t &X, const size_t ir, 01002 const size_t ic, const vec_t &y) { 01003 #if O2SCL_NO_RANGE_CHECK 01004 #else 01005 if (ir+1>M) { 01006 O2SCL_ERR("Invalid index in ddot_subcol().",o2scl::gsl_einval); 01007 } 01008 #endif 01009 01010 double r=0.0; 01011 size_t i; 01012 const size_t m=(M-ir) % 4; 01013 01014 for (i=ir;i<m+ir;i++) { 01015 r+=O2SCL_IX2(X,i,ic)*O2SCL_IX(y,i); 01016 } 01017 01018 for (;i+3<M;i+=4) { 01019 r+=O2SCL_IX2(X,i,ic)*O2SCL_IX(y,i); 01020 r+=O2SCL_IX2(X,i+1,ic)*O2SCL_IX(y,i+1); 01021 r+=O2SCL_IX2(X,i+2,ic)*O2SCL_IX(y,i+2); 01022 r+=O2SCL_IX2(X,i+3,ic)*O2SCL_IX(y,i+3); 01023 } 01024 01025 return r; 01026 } 01027 01028 /** \brief Compute the norm of a subcolumn of a matrix 01029 01030 Given the matrix \c A, define the vector \c x as the column with 01031 index \c ic. This function computes the norm of the part of \c x 01032 from row \c ir to row \c <tt>M-1</tt> (inclusive). All other 01033 elements in \c x are not referenced. 01034 01035 if \c M is zero, then this function silently returns zero 01036 without calling the error handler. 01037 01038 This function is used in householder_transform_subcol(). 01039 01040 \note The suffix "2" on the function name indicates that 01041 this computes the "2-norm", not that the norm is squared. 01042 */ 01043 template<class mat_t> 01044 double dnrm2_subcol(const mat_t &A, const size_t ir, const size_t ic, 01045 const size_t M) { 01046 01047 double scale=0.0; 01048 double ssq=1.0; 01049 size_t i; 01050 01051 #if O2SCL_NO_RANGE_CHECK 01052 #else 01053 if (ir+1>M) { 01054 O2SCL_ERR("Invalid index in dnrm2_subcol().",o2scl::gsl_einval); 01055 } 01056 #endif 01057 01058 // Handle the one-element vector case separately 01059 if (ir+1 == M) { 01060 return fabs(O2SCL_IX2(A,ir,ic)); 01061 } 01062 01063 for (i=ir;i<M;i++) { 01064 const double x=O2SCL_IX2(A,i,ic); 01065 01066 if (x != 0.0) { 01067 const double ax=fabs(x); 01068 01069 if (scale<ax) { 01070 ssq=1.0+ssq*(scale/ax)*(scale/ax); 01071 scale=ax; 01072 } else { 01073 ssq+=(ax/scale)*(ax/scale); 01074 } 01075 } 01076 01077 } 01078 01079 return scale*sqrt(ssq); 01080 } 01081 01082 /** \brief Compute \f$ x=\alpha x \f$ for a subcolumn of a matrix 01083 01084 Given the matrix \c A, define the vector \c x as the column with 01085 index \c ic. This function computes \f$ x= \alpha x \f$ for 01086 elements in the vectors \c x from row \c ir to row \c 01087 <tt>M-1</tt> (inclusive). All other elements in \c x are not 01088 referenced. 01089 01090 Used in householder_transform_subcol(). 01091 */ 01092 template<class mat_t> 01093 void dscal_subcol(mat_t &A, const size_t ir, const size_t ic, 01094 const size_t M, const double alpha) { 01095 01096 #if O2SCL_NO_RANGE_CHECK 01097 #else 01098 if (ir+1>M) { 01099 O2SCL_ERR("Invalid index in dscal_subcol().",o2scl::gsl_einval); 01100 } 01101 #endif 01102 01103 size_t i; 01104 const size_t m=(M-ir) % 4; 01105 01106 for (i=ir;i<m+ir;i++) { 01107 O2SCL_IX2(A,i,ic)*=alpha; 01108 } 01109 01110 for (;i+3<M;i+=4) { 01111 O2SCL_IX2(A,i,ic)*=alpha; 01112 O2SCL_IX2(A,i+1,ic)*=alpha; 01113 O2SCL_IX2(A,i+2,ic)*=alpha; 01114 O2SCL_IX2(A,i+3,ic)*=alpha; 01115 } 01116 01117 return; 01118 } 01119 //@} 01120 01121 /// \name Helper BLAS functions - Subrows of a matrix 01122 //@{ 01123 /** \brief Compute \f$ y=\alpha x+y \f$ for a subrow of a matrix 01124 01125 Given the matrix \c X, define the vector \c x as the row with 01126 index \c ir. This function computes \f$ y=\alpha x+y \f$ for 01127 elements in the vectors \c x from column \c ic to column \c 01128 <tt>N-1</tt> (inclusive). All other elements in \c x and 01129 \c y are not referenced. 01130 01131 If <tt>ic</tt> is greater than <tt>N-1</tt> then the error 01132 handler will be called if \c O2SCL_NO_RANGE_CHECK is not 01133 defined. 01134 01135 Used in householder_hv_sub(). 01136 */ 01137 template<class mat_t, class vec_t> 01138 void daxpy_subrow(const double alpha, const size_t N, const mat_t &X, 01139 const size_t ir, const size_t ic, vec_t &Y) { 01140 01141 #if O2SCL_NO_RANGE_CHECK 01142 #else 01143 if (ic+1>N) { 01144 O2SCL_ERR("Invalid index in daxpy_subrow().",o2scl::gsl_einval); 01145 } 01146 #endif 01147 01148 if (alpha == 0.0) { 01149 return; 01150 } 01151 01152 size_t i; 01153 const size_t m=(N-ic) % 4; 01154 01155 for (i=ic;i<m+ic;i++) { 01156 O2SCL_IX(Y,i)+=alpha*O2SCL_IX2(X,ir,i); 01157 } 01158 01159 for (;i+3<N;i+=4) { 01160 O2SCL_IX(Y,i)+=alpha*O2SCL_IX2(X,ir,i); 01161 O2SCL_IX(Y,i+1)+=alpha*O2SCL_IX2(X,ir,i+1); 01162 O2SCL_IX(Y,i+2)+=alpha*O2SCL_IX2(X,ir,i+2); 01163 O2SCL_IX(Y,i+3)+=alpha*O2SCL_IX2(X,ir,i+3); 01164 } 01165 01166 return; 01167 } 01168 01169 /** \brief Compute \f$ r=x \cdot y \f$ for a subrow of a matrix 01170 01171 Given the matrix \c X, define the vector \c x as the row with 01172 index \c ir. This function computes \f$ r=x \cdot y \f$ for 01173 elements in the vectors \c x from column \c ic to column \c 01174 <tt>N-1</tt> (inclusive). All other elements in \c x and 01175 \c y are not referenced. 01176 01177 If <tt>ic</tt> is greater than <tt>N-1</tt> then the error 01178 handler will be called if \c O2SCL_NO_RANGE_CHECK is not 01179 defined. 01180 01181 Used in householder_hv_sub(). 01182 */ 01183 template<class mat_t, class vec_t> 01184 double ddot_subrow(const size_t N, const mat_t &X, const size_t ir, 01185 const size_t ic, const vec_t &Y) { 01186 01187 #if O2SCL_NO_RANGE_CHECK 01188 #else 01189 if (ic+1>N) { 01190 O2SCL_ERR("Invalid index in ddot_subrow().",o2scl::gsl_einval); 01191 } 01192 #endif 01193 01194 double r=0.0; 01195 size_t i; 01196 const size_t m=(N-ic) % 4; 01197 01198 for (i=ic;i<m+ic;i++) { 01199 r+=O2SCL_IX2(X,ir,i)*O2SCL_IX(Y,i); 01200 } 01201 01202 for (;i+3<N;i+=4) { 01203 r+=O2SCL_IX2(X,ir,i)*O2SCL_IX(Y,i); 01204 r+=O2SCL_IX2(X,ir,i+1)*O2SCL_IX(Y,i+1); 01205 r+=O2SCL_IX2(X,ir,i+2)*O2SCL_IX(Y,i+2); 01206 r+=O2SCL_IX2(X,ir,i+3)*O2SCL_IX(Y,i+3); 01207 } 01208 01209 return r; 01210 } 01211 01212 /** \brief Compute the norm of a subrow of a matrix 01213 01214 Given the matrix \c X, define the vector \c x as the row with 01215 index \c ir. This function computes the norm of the part of \c x 01216 from column \c ic to column \c <tt>N-1</tt> (inclusive). All 01217 other elements in \c x are not referenced. 01218 01219 \note The suffix "2" on the function name indicates that this 01220 computes the "2-norm", not that the norm is squared. 01221 */ 01222 template<class mat_t> 01223 double dnrm2_subrow(const mat_t &M, const size_t ir, const size_t ic, 01224 const size_t N) { 01225 01226 double scale=0.0; 01227 double ssq=1.0; 01228 size_t i; 01229 01230 if (ic+1==N) { 01231 return fabs(O2SCL_IX2(M,ir,ic)); 01232 } 01233 01234 for (i=ic;i<N;i++) { 01235 const double x=O2SCL_IX2(M,ir,i); 01236 01237 if (x != 0.0) { 01238 const double ax=fabs(x); 01239 01240 if (scale<ax) { 01241 ssq=1.0+ssq*(scale/ax)*(scale/ax); 01242 scale=ax; 01243 } else { 01244 ssq+=(ax/scale)*(ax/scale); 01245 } 01246 } 01247 01248 } 01249 01250 return scale*sqrt(ssq); 01251 } 01252 01253 /** \brief Compute \f$ x=\alpha x \f$ for a subrow of a matrix 01254 01255 Given the matrix \c A, define the vector \c x as the row with 01256 index \c ir. This function computes \f$ x = \alpha x \f$ for 01257 elements in the vectors \c x from column \c ic to column \c 01258 <tt>N-1</tt> (inclusive). All other elements in \c x and 01259 \c y are not referenced. 01260 01261 If <tt>ic</tt> is greater than <tt>N-1</tt> then the error 01262 handler will be called if \c O2SCL_NO_RANGE_CHECK is not 01263 defined. 01264 */ 01265 template<class mat_t> 01266 void dscal_subrow(mat_t &A, const size_t ir, const size_t ic, 01267 const size_t N, const double alpha) { 01268 01269 #if O2SCL_NO_RANGE_CHECK 01270 #else 01271 if (ic+1>N) { 01272 O2SCL_ERR("Invalid index in dscal_subrow().",o2scl::gsl_einval); 01273 } 01274 #endif 01275 01276 size_t i; 01277 const size_t m=(N-ic) % 4; 01278 01279 for (i=ic;i<m+ic;i++) { 01280 O2SCL_IX2(A,ir,i)*=alpha; 01281 } 01282 01283 for (;i+3<N;i+=4) { 01284 O2SCL_IX2(A,ir,i)*=alpha; 01285 O2SCL_IX2(A,ir,i+1)*=alpha; 01286 O2SCL_IX2(A,ir,i+2)*=alpha; 01287 O2SCL_IX2(A,ir,i+3)*=alpha; 01288 } 01289 01290 return; 01291 } 01292 //@} 01293 01294 #ifdef DOXYGENP 01295 } 01296 #endif 01297
Documentation generated with Doxygen. Provided under the GNU Free Documentation License (see License Information).