00001 /* 00002 ------------------------------------------------------------------- 00003 00004 Copyright (C) 2006, 2007, 2008, 2009, Andrew W. Steiner 00005 00006 This file is part of O2scl. 00007 00008 O2scl is free software; you can redistribute it and/or modify 00009 it under the terms of the GNU General Public License as published by 00010 the Free Software Foundation; either version 3 of the License, or 00011 (at your option) any later version. 00012 00013 O2scl is distributed in the hope that it will be useful, 00014 but WITHOUT ANY WARRANTY; without even the implied warranty of 00015 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 00016 GNU General Public License for more details. 00017 00018 You should have received a copy of the GNU General Public License 00019 along with O2scl. If not, see <http://www.gnu.org/licenses/>. 00020 00021 ------------------------------------------------------------------- 00022 */ 00023 /* 00024 * Copyright (C) 1996, 1997, 1998, 1999, 2000 Gerard Jungman 00025 * Copyright (C) 2001, 2007 Brian Gough 00026 * 00027 * This program is free software; you can redistribute it and/or modify 00028 * it under the terms of the GNU General Public License as published by 00029 * the Free Software Foundation; either version 3 of the License, or (at 00030 * your option) any later version. 00031 * 00032 * This program is distributed in the hope that it will be useful, but 00033 * WITHOUT ANY WARRANTY; without even the implied warranty of 00034 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU 00035 * General Public License for more details. 00036 * 00037 * You should have received a copy of the GNU General Public License 00038 * along with this program; if not, write to the Free Software 00039 * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 00040 * 02110-1301, USA. 00041 */ 00042 /** \file cblas_base.h 00043 \brief O2scl basic linear algebra function templates 00044 00045 \future Convert to size_t and add float and complex versions 00046 */ 00047 00048 #ifdef DOXYGENP 00049 namespace o2scl_cblas { 00050 #endif 00051 00052 /// Matrix order, either column-major or row-major 00053 enum O2CBLAS_ORDER {O2cblasRowMajor=101, O2cblasColMajor=102}; 00054 00055 /// Transpose operations 00056 enum O2CBLAS_TRANSPOSE {O2cblasNoTrans=111, O2cblasTrans=112, 00057 O2cblasConjTrans=113}; 00058 00059 /// Upper- or lower-triangular 00060 enum O2CBLAS_UPLO {O2cblasUpper=121, O2cblasLower=122}; 00061 00062 /// Unit or generic diagonal 00063 enum O2CBLAS_DIAG {O2cblasNonUnit=131, O2cblasUnit=132}; 00064 00065 /// Left or right sided operation 00066 enum O2CBLAS_SIDE {O2cblasLeft=141, O2cblasRight=142}; 00067 00068 /// \name Standard BLAS functions 00069 //@{ 00070 /** \brief Compute \f$ y= \alpha x + y \f$ 00071 00072 If \c alpha is zero, this function returns and performs 00073 no computations. 00074 */ 00075 template<class vec_t, class vec2_t> 00076 void daxpy(const int N, const double alpha, const vec_t &X, 00077 vec2_t &Y) { 00078 00079 int i; 00080 00081 if (alpha == 0.0) { 00082 return; 00083 } 00084 00085 const int m = N % 4; 00086 00087 for (i = 0; i < m; i++) { 00088 O2SCL_IX(Y,i) += alpha * O2SCL_IX(X,i); 00089 } 00090 00091 for (i = m; i + 3 < N; i += 4) { 00092 O2SCL_IX(Y,i) += alpha * O2SCL_IX(X,i); 00093 O2SCL_IX(Y,i+1) += alpha * O2SCL_IX(X,i+1); 00094 O2SCL_IX(Y,i+2) += alpha * O2SCL_IX(X,i+2); 00095 O2SCL_IX(Y,i+3) += alpha * O2SCL_IX(X,i+3); 00096 } 00097 } 00098 00099 /// Compute \f$ r = x \cdot y \f$ 00100 template<class vec_t, class vec2_t> double 00101 ddot(const int N, const vec_t &X, const vec2_t &Y) { 00102 double r = 0.0; 00103 int i; 00104 00105 const int m = N % 4; 00106 00107 for (i = 0; i < m; i++) { 00108 r += O2SCL_IX(X,i) * O2SCL_IX(Y,i); 00109 } 00110 00111 for (i = m; i + 3 < N; i += 4) { 00112 r += O2SCL_IX(X,i) * O2SCL_IX(Y,i); 00113 r += O2SCL_IX(X,i+1) * O2SCL_IX(Y,i+1); 00114 r += O2SCL_IX(X,i+2) * O2SCL_IX(Y,i+2); 00115 r += O2SCL_IX(X,i+3) * O2SCL_IX(Y,i+3); 00116 } 00117 00118 return r; 00119 } 00120 00121 /// Compute \f$ x = \alpha x \f$ 00122 template<class vec_t> void 00123 dscal(const int N, const double alpha, vec_t &X) { 00124 int i; 00125 const int m = N % 4; 00126 00127 for (i = 0; i < m; i++) { 00128 O2SCL_IX(X,i)*=alpha; 00129 } 00130 00131 for (i = m; i + 3 < N; i += 4) { 00132 O2SCL_IX(X,i)*=alpha; 00133 O2SCL_IX(X,i+1)*=alpha; 00134 O2SCL_IX(X,i+2)*=alpha; 00135 O2SCL_IX(X,i+3)*=alpha; 00136 } 00137 } 00138 00139 /// Compute the norm of the vector \c X 00140 template<class vec_t> double dnrm2(const int N, const vec_t &X) { 00141 00142 double scale = 0.0; 00143 double ssq = 1.0; 00144 int i; 00145 00146 if (N <= 0) { 00147 return 0; 00148 } else if (N == 1) { 00149 return fabs(O2SCL_IX(X,0)); 00150 } 00151 00152 for (i = 0; i < N; i++) { 00153 const double x = O2SCL_IX(X,i); 00154 00155 if (x != 0.0) { 00156 const double ax = fabs(x); 00157 00158 if (scale < ax) { 00159 ssq = 1.0 + ssq * (scale / ax) * (scale / ax); 00160 scale = ax; 00161 } else { 00162 ssq += (ax / scale) * (ax / scale); 00163 } 00164 } 00165 00166 } 00167 00168 return scale * sqrt(ssq); 00169 } 00170 00171 /// Compute \f$ y = \alpha \mathrm{op}(A) x + \beta y \f$. 00172 template<class mat_t, class vec_t> 00173 int dgemv(const enum O2CBLAS_ORDER order, 00174 const enum O2CBLAS_TRANSPOSE TransA, 00175 const int M, const int N, const double alpha, const mat_t &A, 00176 const vec_t &X, const double beta, vec_t &Y) { 00177 00178 int i, j; 00179 int lenX, lenY; 00180 00181 const int Trans = (TransA != O2cblasConjTrans) ? TransA : O2cblasTrans; 00182 00183 if (M == 0 || N == 0) { 00184 return 0; 00185 } 00186 00187 if (alpha == 0.0 && beta == 1.0) { 00188 return 0; 00189 } 00190 00191 if (Trans == O2cblasNoTrans) { 00192 lenX = N; 00193 lenY = M; 00194 } else { 00195 lenX = M; 00196 lenY = N; 00197 } 00198 00199 /* form y := beta*y */ 00200 if (beta == 0.0) { 00201 int iy = 0; 00202 for (i = 0; i < lenY; i++) { 00203 O2SCL_IX(Y,iy) = 0.0; 00204 iy++; 00205 } 00206 } else if (beta != 1.0) { 00207 int iy = 0; 00208 for (i = 0; i < lenY; i++) { 00209 O2SCL_IX(Y,iy) *= beta; 00210 iy++; 00211 } 00212 } 00213 00214 if (alpha == 0.0) { 00215 return 0; 00216 } 00217 00218 if ((order == O2cblasRowMajor && Trans == O2cblasNoTrans) 00219 || (order == O2cblasColMajor && Trans == O2cblasTrans)) { 00220 00221 /* form y := alpha*A*x + y */ 00222 int iy = 0; 00223 for (i = 0; i < lenY; i++) { 00224 double temp = 0.0; 00225 int ix = 0; 00226 for (j = 0; j < lenX; j++) { 00227 temp += O2SCL_IX(X,ix) * O2SCL_IX2(A,i,j); 00228 ix++; 00229 } 00230 O2SCL_IX(Y,iy) += alpha * temp; 00231 iy++; 00232 } 00233 00234 } else if ((order == O2cblasRowMajor && Trans == O2cblasTrans) 00235 || (order == O2cblasColMajor && Trans == O2cblasNoTrans)) { 00236 00237 /* form y := alpha*A'*x + y */ 00238 int ix = 0; 00239 for (j = 0; j < lenX; j++) { 00240 const double temp = alpha * O2SCL_IX(X,ix); 00241 if (temp != 0.0) { 00242 int iy = 0; 00243 for (i = 0; i < lenY; i++) { 00244 O2SCL_IX(Y,iy) += temp * O2SCL_IX2(A,j,i); 00245 iy++; 00246 } 00247 } 00248 ix++; 00249 } 00250 00251 } else { 00252 O2SCL_ERR_RET("Unrecognized operation in dgemv().",o2scl::gsl_einval); 00253 } 00254 return 0; 00255 } 00256 00257 /** \brief Compute \f$ x = \mathrm{op} (A)^{-1} x \f$ 00258 00259 If \c N is zero, this function does nothing and returns zero. 00260 */ 00261 template<class mat_t, class vec_t> 00262 int dtrsv(const enum O2CBLAS_ORDER order, 00263 const enum O2CBLAS_UPLO Uplo, 00264 const enum O2CBLAS_TRANSPOSE TransA, 00265 const enum O2CBLAS_DIAG Diag, 00266 const int M, const int N, const mat_t &A, vec_t &X) { 00267 00268 const int nonunit = (Diag == O2cblasNonUnit); 00269 int ix, jx; 00270 int i, j; 00271 const int Trans = (TransA != O2cblasConjTrans) ? TransA : O2cblasTrans; 00272 00273 if (N == 0) { 00274 return 0; 00275 } 00276 00277 /* form x := inv( A )*x */ 00278 00279 if ((order == O2cblasRowMajor && Trans == O2cblasNoTrans && 00280 Uplo == O2cblasUpper) 00281 || (order == O2cblasColMajor && Trans == O2cblasTrans && 00282 Uplo == O2cblasLower)) { 00283 00284 /* backsubstitution */ 00285 ix = (N - 1); 00286 if (nonunit) { 00287 O2SCL_IX(X,ix) = O2SCL_IX(X,ix) / O2SCL_IX2(A,N-1,N-1); 00288 } 00289 ix--; 00290 00291 for (i = N - 1; i > 0 && i--;) { 00292 double tmp = O2SCL_IX(X,ix); 00293 jx = ix +1; 00294 for (j = i + 1; j < N; j++) { 00295 const double Aij = O2SCL_IX2(A,i,j); 00296 tmp -= Aij * O2SCL_IX(X,jx); 00297 jx++; 00298 } 00299 if (nonunit) { 00300 O2SCL_IX(X,ix) = tmp / O2SCL_IX2(A,i,i); 00301 } else { 00302 O2SCL_IX(X,ix) = tmp; 00303 } 00304 ix--; 00305 } 00306 00307 } else if ((order == O2cblasRowMajor && Trans == O2cblasNoTrans && 00308 Uplo == O2cblasLower) || 00309 (order == O2cblasColMajor && Trans == O2cblasTrans && 00310 Uplo == O2cblasUpper)) { 00311 00312 /* forward substitution */ 00313 ix = 0; 00314 if (nonunit) { 00315 O2SCL_IX(X,ix) = O2SCL_IX(X,ix) / O2SCL_IX2(A,0,0); 00316 } 00317 ix++; 00318 for (i = 1; i < N; i++) { 00319 double tmp = O2SCL_IX(X,ix); 00320 jx = 0; 00321 for (j = 0; j < i; j++) { 00322 const double Aij = O2SCL_IX2(A,i,j); 00323 tmp -= Aij * O2SCL_IX(X,jx); 00324 jx++; 00325 } 00326 if (nonunit) { 00327 O2SCL_IX(X,ix) = tmp / O2SCL_IX2(A,i,i); 00328 } else { 00329 O2SCL_IX(X,ix) = tmp; 00330 } 00331 ix++; 00332 } 00333 00334 } else if ((order == O2cblasRowMajor && Trans == O2cblasTrans && 00335 Uplo == O2cblasUpper) 00336 || (order == O2cblasColMajor && Trans == O2cblasNoTrans && 00337 Uplo == O2cblasLower)) { 00338 00339 /* form x := inv( A' )*x */ 00340 00341 /* forward substitution */ 00342 ix = 0; 00343 if (nonunit) { 00344 O2SCL_IX(X,ix) = O2SCL_IX(X,ix) / O2SCL_IX2(A,0,0); 00345 } 00346 ix++; 00347 for (i = 1; i < N; i++) { 00348 double tmp = O2SCL_IX(X,ix); 00349 jx = 0; 00350 for (j = 0; j < i; j++) { 00351 const double Aji = O2SCL_IX2(A,j,i); 00352 tmp -= Aji * O2SCL_IX(X,jx); 00353 jx++; 00354 } 00355 if (nonunit) { 00356 O2SCL_IX(X,ix) = tmp / O2SCL_IX2(A,i,i); 00357 } else { 00358 O2SCL_IX(X,ix) = tmp; 00359 } 00360 ix++; 00361 } 00362 00363 } else if ((order == O2cblasRowMajor && Trans == O2cblasTrans && 00364 Uplo == O2cblasLower) 00365 || (order == O2cblasColMajor && Trans == O2cblasNoTrans && 00366 Uplo == O2cblasUpper)) { 00367 00368 /* backsubstitution */ 00369 ix = (N - 1); 00370 if (nonunit) { 00371 O2SCL_IX(X,ix) = O2SCL_IX(X,ix) / O2SCL_IX2(A,N-1,N-1); 00372 } 00373 ix--; 00374 for (i = N - 1; i > 0 && i--;) { 00375 double tmp = O2SCL_IX(X,ix); 00376 jx = ix +1; 00377 for (j = i + 1; j < N; j++) { 00378 const double Aji = O2SCL_IX2(A,j,i); 00379 tmp -= Aji * O2SCL_IX(X,jx); 00380 jx++; 00381 } 00382 if (nonunit) { 00383 O2SCL_IX(X,ix) = tmp / O2SCL_IX2(A,i,i); 00384 } else { 00385 O2SCL_IX(X,ix) = tmp; 00386 } 00387 ix--; 00388 } 00389 00390 } else { 00391 O2SCL_ERR_RET("Unrecognized operation in dtrsv().",o2scl::gsl_einval); 00392 } 00393 return 0; 00394 } 00395 00396 //@} 00397 00398 /// \name Helper BLAS functions 00399 //@{ 00400 /** 00401 \brief Compute \f$ x = \alpha x \f$ beginning with index \c ie and 00402 ending with index \c N-1 00403 00404 Used in \ref householder_hv(). 00405 */ 00406 template<class vec_t, class vec2_t> 00407 void daxpy_subvec(const int N, const double alpha, const vec_t &X, 00408 vec2_t &Y, const int ie) { 00409 00410 int i; 00411 00412 if (alpha == 0.0) { 00413 return; 00414 } 00415 00416 const int m = (N-ie) % 4; 00417 00418 for (i = ie; i < ie+m; i++) { 00419 O2SCL_IX(Y,i) += alpha * O2SCL_IX(X,i); 00420 } 00421 00422 for (i = ie+m; i + 3 < N; i += 4) { 00423 O2SCL_IX(Y,i) += alpha * O2SCL_IX(X,i); 00424 O2SCL_IX(Y,i+1) += alpha * O2SCL_IX(X,i+1); 00425 O2SCL_IX(Y,i+2) += alpha * O2SCL_IX(X,i+2); 00426 O2SCL_IX(Y,i+3) += alpha * O2SCL_IX(X,i+3); 00427 } 00428 } 00429 00430 /** 00431 \brief Compute \f$ r = x \cdot y \f$ beginning with index \c ie and 00432 ending with index \c N-1 00433 00434 Used in \ref householder_hv(). 00435 */ 00436 template<class vec_t, class vec2_t> 00437 double ddot_subvec(const int N, const vec_t &X, const vec2_t &Y, 00438 const int ie) { 00439 double r = 0.0; 00440 int i; 00441 00442 const int m = (N-ie) % 4; 00443 00444 for (i = ie; i < ie+m; i++) { 00445 r += O2SCL_IX(X,i) * O2SCL_IX(Y,i); 00446 } 00447 00448 for (i = ie+m; i + 3 < N; i += 4) { 00449 r += O2SCL_IX(X,i) * O2SCL_IX(Y,i); 00450 r += O2SCL_IX(X,i+1) * O2SCL_IX(Y,i+1); 00451 r += O2SCL_IX(X,i+2) * O2SCL_IX(Y,i+2); 00452 r += O2SCL_IX(X,i+3) * O2SCL_IX(Y,i+3); 00453 } 00454 00455 return r; 00456 } 00457 00458 /** 00459 \brief Compute \f$ x = \alpha x \f$ beginning with index \c ie and 00460 ending with index \c N-1 00461 00462 Used in \ref householder_transform(). 00463 */ 00464 template<class vec_t> 00465 void dscal_subvec(const int N, const double alpha, vec_t &X, 00466 const int ie) { 00467 int i; 00468 const int m = (N-ie) % 4; 00469 00470 for (i = ie; i < ie+m; i++) { 00471 O2SCL_IX(X,i)*=alpha; 00472 } 00473 00474 for (i = ie+m; i + 3 < N; i += 4) { 00475 O2SCL_IX(X,i)*=alpha; 00476 O2SCL_IX(X,i+1)*=alpha; 00477 O2SCL_IX(X,i+2)*=alpha; 00478 O2SCL_IX(X,i+3)*=alpha; 00479 } 00480 } 00481 00482 /** 00483 \brief Compute the squared norm of the vector \c X beginning with 00484 index \c ie and ending with index \c N-1 00485 00486 Used in \ref householder_transform(). 00487 */ 00488 template<class vec_t> 00489 double dnrm2_subvec(const int N, const vec_t &X, const int ie) { 00490 00491 double scale = 0.0; 00492 double ssq = 1.0; 00493 int i; 00494 00495 if (N <= 0) { 00496 return 0; 00497 } else if (N == 1) { 00498 return fabs(O2SCL_IX(X,ie)); 00499 } 00500 00501 for (i = ie; i < N; i++) { 00502 const double x = O2SCL_IX(X,i); 00503 00504 if (x != 0.0) { 00505 const double ax = fabs(x); 00506 00507 if (scale < ax) { 00508 ssq = 1.0 + ssq * (scale / ax) * (scale / ax); 00509 scale = ax; 00510 } else { 00511 ssq += (ax / scale) * (ax / scale); 00512 } 00513 } 00514 00515 } 00516 00517 return scale * sqrt(ssq); 00518 } 00519 00520 /** 00521 \brief Compute the squared norm of the last \c N rows 00522 of a column of a matrix 00523 00524 Given matrix \c M, this computes the norm of the last \c N rows 00525 of the column with index \c ic, beginning with the element with 00526 index \c ir. If the matrix \c M has \c r rows, and \c c columns, 00527 then the parameter \c N should be \c r-ir. 00528 00529 Used in householder_transform_subcol(). 00530 00531 \future Could be made more efficient with a matrix-col like 00532 object 00533 */ 00534 template<class mat_t> 00535 double dnrm2_subcol(const mat_t &M, const size_t ir, const size_t ic, 00536 const size_t N) { 00537 00538 double scale = 0.0; 00539 double ssq = 1.0; 00540 size_t i; 00541 00542 if (N <= 0) { 00543 return 0; 00544 } else if (N == 1) { 00545 return fabs(O2SCL_IX2(M,ir,ic)); 00546 } 00547 00548 size_t tot=ir+N; 00549 for (i = ir; i < tot; i++) { 00550 const double x = O2SCL_IX2(M,i,ic); 00551 00552 if (x != 0.0) { 00553 const double ax = fabs(x); 00554 00555 if (scale < ax) { 00556 ssq = 1.0 + ssq * (scale / ax) * (scale / ax); 00557 scale = ax; 00558 } else { 00559 ssq += (ax / scale) * (ax / scale); 00560 } 00561 } 00562 00563 } 00564 00565 return scale * sqrt(ssq); 00566 } 00567 00568 /// Desc 00569 template<class mat_t, class mat_subcol_t> 00570 double dnrm2_subcol2(const mat_t &M, const size_t ir, const size_t ic, 00571 const size_t N) { 00572 00573 double scale = 0.0; 00574 double ssq = 1.0; 00575 size_t i; 00576 00577 if (N <= 0) { 00578 return 0; 00579 } else if (N == 1) { 00580 return fabs(O2SCL_IX2(M,ir,ic)); 00581 } 00582 00583 mat_subcol_t matcol(M,ic); 00584 00585 size_t tot=ir+N; 00586 for (i = ir; i < tot; i++) { 00587 const double x = O2SCL_IX(matcol,i); 00588 00589 if (x != 0.0) { 00590 const double ax = fabs(x); 00591 00592 if (scale < ax) { 00593 ssq = 1.0 + ssq * (scale / ax) * (scale / ax); 00594 scale = ax; 00595 } else { 00596 ssq += (ax / scale) * (ax / scale); 00597 } 00598 } 00599 00600 } 00601 00602 return scale * sqrt(ssq); 00603 } 00604 00605 /** 00606 \brief Compute \f$ x = \alpha x \f$ 00607 00608 Used in householder_transform_subcol(). 00609 */ 00610 template<class mat_t> 00611 void dscal_subcol(mat_t &A, const size_t ir, const size_t ic, 00612 const size_t n, const double alpha) { 00613 00614 size_t tot=ir+n; 00615 size_t i; 00616 const size_t m = (tot-ir) % 4; 00617 00618 for (i = ir; i < m+ir; i++) { 00619 O2SCL_IX2(A,i,ic)*=alpha; 00620 } 00621 00622 for (i = m+ir; i + 3 < tot; i += 4) { 00623 O2SCL_IX2(A,i,ic)*=alpha; 00624 O2SCL_IX2(A,i+1,ic)*=alpha; 00625 O2SCL_IX2(A,i+2,ic)*=alpha; 00626 O2SCL_IX2(A,i+3,ic)*=alpha; 00627 } 00628 00629 } 00630 00631 /** 00632 \brief Compute \f$ x = \alpha x \f$ for \ref householder_hv_sub() 00633 00634 Used in householder_hv_sub(). 00635 00636 \future Implement explicit loop unrolling 00637 */ 00638 template<class mat_t, class vec_t> 00639 void daxpy_hv_sub(const int N, const double alpha, const mat_t &X, 00640 vec_t &Y, const int ie) { 00641 00642 if (alpha == 0.0) { 00643 return; 00644 } 00645 00646 for(int i=ie+1;i<N;i++) { 00647 O2SCL_IX(Y,i)+=alpha*O2SCL_IX2(X,i,ie); 00648 } 00649 00650 /* 00651 const int m = N % 4; 00652 00653 for (i = 0; i < m; i++) { 00654 O2SCL_IX(X,i)*=alpha; 00655 } 00656 00657 for (i = m; i + 3 < N; i += 4) { 00658 O2SCL_IX(X,i)*=alpha; 00659 O2SCL_IX(X,i+1)*=alpha; 00660 O2SCL_IX(X,i+2)*=alpha; 00661 O2SCL_IX(X,i+3)*=alpha; 00662 } 00663 */ 00664 } 00665 00666 /** 00667 \brief Compute \f$ r = x \cdot y \f$ for \ref householder_hv_sub() 00668 00669 Used in householder_hv_sub(). 00670 00671 \future Implement explicit loop unrolling 00672 */ 00673 template<class mat_t, class vec_t> 00674 double ddot_hv_sub(const int N, const mat_t &X, const vec_t &Y, 00675 const int ie) { 00676 double r = 0.0; 00677 int i; 00678 00679 for(i=ie+1;i<N;i++) { 00680 r+=O2SCL_IX2(X,i,ie)*O2SCL_IX(Y,i); 00681 } 00682 return r; 00683 } 00684 00685 //@} 00686 00687 /// Compute \f$ y = \alpha \mathrm{op}(A) x + \beta y \f$. 00688 template<class mat_t> 00689 int dgemm(const enum O2CBLAS_ORDER Order, 00690 const enum O2CBLAS_TRANSPOSE TransA, 00691 const enum O2CBLAS_TRANSPOSE TransB, const int M, const int N, 00692 const int K, const double alpha, const mat_t &A, 00693 const mat_t &B, const double beta, mat_t &C) { 00694 00695 int i, j, k; 00696 int n1, n2; 00697 int TransF, TransG; 00698 00699 if (alpha == 0.0 && beta == 1.0) { 00700 return 0; 00701 } 00702 00703 /* 00704 This is a little more complicated than the original, which 00705 assigned the matrices A and B to variables *F and *G which then 00706 allowed some code duplication. We can't really do that here, 00707 since we don't have that kind of type info on A and B, so we 00708 just handle the two cases separately. 00709 */ 00710 00711 if (Order == O2cblasRowMajor) { 00712 00713 n1 = M; 00714 n2 = N; 00715 00716 /* form y := beta*y */ 00717 if (beta == 0.0) { 00718 for (i = 0; i < n1; i++) { 00719 for (j = 0; j < n2; j++) { 00720 O2SCL_IX2(C,i,j)=0.0; 00721 } 00722 } 00723 } else if (beta != 1.0) { 00724 for (i = 0; i < n1; i++) { 00725 for (j = 0; j < n2; j++) { 00726 O2SCL_IX2(C,i,j)*=beta; 00727 } 00728 } 00729 } 00730 00731 if (alpha == 0.0) { 00732 return 0; 00733 } 00734 00735 TransF = (TransA == O2cblasConjTrans) ? O2cblasTrans : TransA; 00736 TransG = (TransB == O2cblasConjTrans) ? O2cblasTrans : TransB; 00737 00738 if (TransF == O2cblasNoTrans && TransG == O2cblasNoTrans) { 00739 00740 /* form C := alpha*A*B + C */ 00741 00742 for (k = 0; k < K; k++) { 00743 for (i = 0; i < n1; i++) { 00744 const double temp = alpha * O2SCL_IX2(A,i,k); 00745 if (temp != 0.0) { 00746 for (j = 0; j < n2; j++) { 00747 O2SCL_IX2(C,i,j) += temp * O2SCL_IX2(B,k,j); 00748 } 00749 } 00750 } 00751 } 00752 00753 } else if (TransF == O2cblasNoTrans && TransG == O2cblasTrans) { 00754 00755 /* form C := alpha*A*B' + C */ 00756 00757 for (i = 0; i < n1; i++) { 00758 for (j = 0; j < n2; j++) { 00759 double temp = 0.0; 00760 for (k = 0; k < K; k++) { 00761 temp += O2SCL_IX2(A,i,k) * O2SCL_IX2(B,j,k); 00762 } 00763 O2SCL_IX2(C,i,j) += alpha * temp; 00764 } 00765 } 00766 00767 } else if (TransF == O2cblasTrans && TransG == O2cblasNoTrans) { 00768 00769 for (k = 0; k < K; k++) { 00770 for (i = 0; i < n1; i++) { 00771 const double temp = alpha * O2SCL_IX2(A,k,i); 00772 if (temp != 0.0) { 00773 for (j = 0; j < n2; j++) { 00774 O2SCL_IX2(C,i,j) += temp * O2SCL_IX2(B,k,j); 00775 } 00776 } 00777 } 00778 } 00779 00780 } else if (TransF == O2cblasTrans && TransG == O2cblasTrans) { 00781 00782 for (i = 0; i < n1; i++) { 00783 for (j = 0; j < n2; j++) { 00784 double temp = 0.0; 00785 for (k = 0; k < K; k++) { 00786 temp += O2SCL_IX2(A,k,i) * O2SCL_IX2(B,j,k); 00787 } 00788 O2SCL_IX2(C,i,j) += alpha * temp; 00789 } 00790 } 00791 00792 } else { 00793 O2SCL_ERR_RET("Unrecognized operation in dgemm().",o2scl::gsl_einval); 00794 } 00795 00796 } else { 00797 00798 n1 = N; 00799 n2 = M; 00800 00801 /* form y := beta*y */ 00802 if (beta == 0.0) { 00803 for (i = 0; i < n1; i++) { 00804 for (j = 0; j < n2; j++) { 00805 O2SCL_IX2(C,i,j)=0.0; 00806 } 00807 } 00808 } else if (beta != 1.0) { 00809 for (i = 0; i < n1; i++) { 00810 for (j = 0; j < n2; j++) { 00811 O2SCL_IX2(C,i,j)*=beta; 00812 } 00813 } 00814 } 00815 00816 if (alpha == 0.0) { 00817 return 0; 00818 } 00819 00820 TransF = (TransB == O2cblasConjTrans) ? O2cblasTrans : TransB; 00821 TransG = (TransA == O2cblasConjTrans) ? O2cblasTrans : TransA; 00822 00823 if (TransF == O2cblasNoTrans && TransG == O2cblasNoTrans) { 00824 00825 /* form C := alpha*A*B + C */ 00826 00827 for (k = 0; k < K; k++) { 00828 for (i = 0; i < n1; i++) { 00829 const double temp = alpha * O2SCL_IX2(B,i,k); 00830 if (temp != 0.0) { 00831 for (j = 0; j < n2; j++) { 00832 O2SCL_IX2(C,i,j) += temp * O2SCL_IX2(A,k,j); 00833 } 00834 } 00835 } 00836 } 00837 00838 } else if (TransF == O2cblasNoTrans && TransG == O2cblasTrans) { 00839 00840 /* form C := alpha*A*B' + C */ 00841 00842 for (i = 0; i < n1; i++) { 00843 for (j = 0; j < n2; j++) { 00844 double temp = 0.0; 00845 for (k = 0; k < K; k++) { 00846 temp += O2SCL_IX2(B,i,k) * O2SCL_IX2(A,j,k); 00847 } 00848 O2SCL_IX2(C,i,j) += alpha * temp; 00849 } 00850 } 00851 00852 } else if (TransF == O2cblasTrans && TransG == O2cblasNoTrans) { 00853 00854 for (k = 0; k < K; k++) { 00855 for (i = 0; i < n1; i++) { 00856 const double temp = alpha * O2SCL_IX2(B,k,i); 00857 if (temp != 0.0) { 00858 for (j = 0; j < n2; j++) { 00859 O2SCL_IX2(C,i,j) += temp * O2SCL_IX2(A,k,j); 00860 } 00861 } 00862 } 00863 } 00864 00865 } else if (TransF == O2cblasTrans && TransG == O2cblasTrans) { 00866 00867 for (i = 0; i < n1; i++) { 00868 for (j = 0; j < n2; j++) { 00869 double temp = 0.0; 00870 for (k = 0; k < K; k++) { 00871 temp += O2SCL_IX2(B,k,i) * O2SCL_IX2(A,j,k); 00872 } 00873 O2SCL_IX2(C,i,j) += alpha * temp; 00874 } 00875 } 00876 00877 } else { 00878 O2SCL_ERR_RET("Unrecognized operation in dgemm().",o2scl::gsl_einval); 00879 } 00880 } 00881 00882 return 0; 00883 } 00884 00885 #ifdef DOXYGENP 00886 } 00887 #endif 00888
Documentation generated with Doxygen and provided under the GNU Free Documentation License. See License Information for details.
Project hosting provided by
,
O2scl Sourceforge Project Page