00001 /* 00002 ------------------------------------------------------------------- 00003 00004 Copyright (C) 2006, 2007, 2008, 2009, Andrew W. Steiner 00005 00006 This file is part of O2scl. 00007 00008 O2scl is free software; you can redistribute it and/or modify 00009 it under the terms of the GNU General Public License as published by 00010 the Free Software Foundation; either version 3 of the License, or 00011 (at your option) any later version. 00012 00013 O2scl is distributed in the hope that it will be useful, 00014 but WITHOUT ANY WARRANTY; without even the implied warranty of 00015 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 00016 GNU General Public License for more details. 00017 00018 You should have received a copy of the GNU General Public License 00019 along with O2scl. If not, see <http://www.gnu.org/licenses/>. 00020 00021 ------------------------------------------------------------------- 00022 */ 00023 /* 00024 * Copyright (C) 1996, 1997, 1998, 1999, 2000 Gerard Jungman 00025 * Copyright (C) 2001, 2007 Brian Gough 00026 * 00027 * This program is free software; you can redistribute it and/or modify 00028 * it under the terms of the GNU General Public License as published by 00029 * the Free Software Foundation; either version 3 of the License, or (at 00030 * your option) any later version. 00031 * 00032 * This program is distributed in the hope that it will be useful, but 00033 * WITHOUT ANY WARRANTY; without even the implied warranty of 00034 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU 00035 * General Public License for more details. 00036 * 00037 * You should have received a copy of the GNU General Public License 00038 * along with this program; if not, write to the Free Software 00039 * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 00040 * 02110-1301, USA. 00041 */ 00042 /** \file cblas_base.h 00043 \brief O2scl basic linear algebra function templates 00044 00045 \future Convert to size_t and add float and complex versions 00046 */ 00047 00048 #ifdef DOXYGENP 00049 namespace o2scl_cblas { 00050 #endif 00051 00052 /// Matrix order, either column-major or row-major 00053 enum O2CBLAS_ORDER {O2cblasRowMajor=101, O2cblasColMajor=102}; 00054 00055 /// Transpose operations 00056 enum O2CBLAS_TRANSPOSE {O2cblasNoTrans=111, O2cblasTrans=112, 00057 O2cblasConjTrans=113}; 00058 00059 /// Upper- or lower-triangular 00060 enum O2CBLAS_UPLO {O2cblasUpper=121, O2cblasLower=122}; 00061 00062 /// Unit or generic diagonal 00063 enum O2CBLAS_DIAG {O2cblasNonUnit=131, O2cblasUnit=132}; 00064 00065 /// Left or right sided operation 00066 enum O2CBLAS_SIDE {O2cblasLeft=141, O2cblasRight=142}; 00067 00068 /// \name Standard BLAS functions 00069 //@{ 00070 /// Compute \f$ y= \alpha x + y \f$ 00071 template<class vec_t, class vec2_t> void 00072 daxpy(const int N, const double alpha, const vec_t &X, 00073 vec2_t &Y) { 00074 00075 int i; 00076 00077 if (alpha == 0.0) { 00078 return; 00079 } 00080 00081 const int m = N % 4; 00082 00083 for (i = 0; i < m; i++) { 00084 O2SCL_IX(Y,i) += alpha * O2SCL_IX(X,i); 00085 } 00086 00087 for (i = m; i + 3 < N; i += 4) { 00088 O2SCL_IX(Y,i) += alpha * O2SCL_IX(X,i); 00089 O2SCL_IX(Y,i+1) += alpha * O2SCL_IX(X,i+1); 00090 O2SCL_IX(Y,i+2) += alpha * O2SCL_IX(X,i+2); 00091 O2SCL_IX(Y,i+3) += alpha * O2SCL_IX(X,i+3); 00092 } 00093 } 00094 00095 /// Compute \f$ r = x \cdot y \f$ 00096 template<class vec_t, class vec2_t> double 00097 ddot(const int N, const vec_t &X, const vec2_t &Y) { 00098 double r = 0.0; 00099 int i; 00100 00101 const int m = N % 4; 00102 00103 for (i = 0; i < m; i++) { 00104 r += O2SCL_IX(X,i) * O2SCL_IX(Y,i); 00105 } 00106 00107 for (i = m; i + 3 < N; i += 4) { 00108 r += O2SCL_IX(X,i) * O2SCL_IX(Y,i); 00109 r += O2SCL_IX(X,i+1) * O2SCL_IX(Y,i+1); 00110 r += O2SCL_IX(X,i+2) * O2SCL_IX(Y,i+2); 00111 r += O2SCL_IX(X,i+3) * O2SCL_IX(Y,i+3); 00112 } 00113 00114 return r; 00115 } 00116 00117 /// Compute \f$ x = \alpha x \f$ 00118 template<class vec_t> void 00119 dscal(const int N, const double alpha, vec_t &X) { 00120 int i; 00121 const int m = N % 4; 00122 00123 for (i = 0; i < m; i++) { 00124 O2SCL_IX(X,i)*=alpha; 00125 } 00126 00127 for (i = m; i + 3 < N; i += 4) { 00128 O2SCL_IX(X,i)*=alpha; 00129 O2SCL_IX(X,i+1)*=alpha; 00130 O2SCL_IX(X,i+2)*=alpha; 00131 O2SCL_IX(X,i+3)*=alpha; 00132 } 00133 } 00134 00135 /// Compute the squared norm of the vector \c X 00136 template<class vec_t> double dnrm2(const int N, const vec_t &X) { 00137 00138 double scale = 0.0; 00139 double ssq = 1.0; 00140 int i; 00141 00142 if (N <= 0) { 00143 return 0; 00144 } else if (N == 1) { 00145 return fabs(O2SCL_IX(X,0)); 00146 } 00147 00148 for (i = 0; i < N; i++) { 00149 const double x = O2SCL_IX(X,i); 00150 00151 if (x != 0.0) { 00152 const double ax = fabs(x); 00153 00154 if (scale < ax) { 00155 ssq = 1.0 + ssq * (scale / ax) * (scale / ax); 00156 scale = ax; 00157 } else { 00158 ssq += (ax / scale) * (ax / scale); 00159 } 00160 } 00161 00162 } 00163 00164 return scale * sqrt(ssq); 00165 } 00166 00167 /// Compute \f$ y = \alpha \mathrm{op}(A) x + \beta y \f$. 00168 template<class mat_t, class vec_t> 00169 int dgemv(const enum O2CBLAS_ORDER order, 00170 const enum O2CBLAS_TRANSPOSE TransA, 00171 const int M, const int N, const double alpha, const mat_t &A, 00172 const vec_t &X, const double beta, vec_t &Y) { 00173 00174 int i, j; 00175 int lenX, lenY; 00176 00177 const int Trans = (TransA != O2cblasConjTrans) ? TransA : O2cblasTrans; 00178 00179 if (M == 0 || N == 0) { 00180 return 0; 00181 } 00182 00183 if (alpha == 0.0 && beta == 1.0) { 00184 return 0; 00185 } 00186 00187 if (Trans == O2cblasNoTrans) { 00188 lenX = N; 00189 lenY = M; 00190 } else { 00191 lenX = M; 00192 lenY = N; 00193 } 00194 00195 /* form y := beta*y */ 00196 if (beta == 0.0) { 00197 int iy = 0; 00198 for (i = 0; i < lenY; i++) { 00199 O2SCL_IX(Y,iy) = 0.0; 00200 iy++; 00201 } 00202 } else if (beta != 1.0) { 00203 int iy = 0; 00204 for (i = 0; i < lenY; i++) { 00205 O2SCL_IX(Y,iy) *= beta; 00206 iy++; 00207 } 00208 } 00209 00210 if (alpha == 0.0) { 00211 return 0; 00212 } 00213 00214 if ((order == O2cblasRowMajor && Trans == O2cblasNoTrans) 00215 || (order == O2cblasColMajor && Trans == O2cblasTrans)) { 00216 00217 /* form y := alpha*A*x + y */ 00218 int iy = 0; 00219 for (i = 0; i < lenY; i++) { 00220 double temp = 0.0; 00221 int ix = 0; 00222 for (j = 0; j < lenX; j++) { 00223 temp += O2SCL_IX(X,ix) * O2SCL_IX2(A,i,j); 00224 ix++; 00225 } 00226 O2SCL_IX(Y,iy) += alpha * temp; 00227 iy++; 00228 } 00229 00230 } else if ((order == O2cblasRowMajor && Trans == O2cblasTrans) 00231 || (order == O2cblasColMajor && Trans == O2cblasNoTrans)) { 00232 00233 /* form y := alpha*A'*x + y */ 00234 int ix = 0; 00235 for (j = 0; j < lenX; j++) { 00236 const double temp = alpha * O2SCL_IX(X,ix); 00237 if (temp != 0.0) { 00238 int iy = 0; 00239 for (i = 0; i < lenY; i++) { 00240 O2SCL_IX(Y,iy) += temp * O2SCL_IX2(A,j,i); 00241 iy++; 00242 } 00243 } 00244 ix++; 00245 } 00246 00247 } else { 00248 O2SCL_ERR_RET("Unrecognized operation in dgemv().",o2scl::gsl_einval); 00249 } 00250 return 0; 00251 } 00252 00253 /// Compute \f$ x = \mathrm{op} (A)^{-1} x \f$ 00254 template<class mat_t, class vec_t> 00255 int dtrsv(const enum O2CBLAS_ORDER order, 00256 const enum O2CBLAS_UPLO Uplo, 00257 const enum O2CBLAS_TRANSPOSE TransA, 00258 const enum O2CBLAS_DIAG Diag, 00259 const int M, const int N, const mat_t &A, vec_t &X) { 00260 const int nonunit = (Diag == O2cblasNonUnit); 00261 int ix, jx; 00262 int i, j; 00263 const int Trans = (TransA != O2cblasConjTrans) ? TransA : O2cblasTrans; 00264 00265 if (N == 0) { 00266 return 0; 00267 } 00268 00269 /* form x := inv( A )*x */ 00270 00271 if ((order == O2cblasRowMajor && Trans == O2cblasNoTrans && 00272 Uplo == O2cblasUpper) 00273 || (order == O2cblasColMajor && Trans == O2cblasTrans && 00274 Uplo == O2cblasLower)) { 00275 00276 /* backsubstitution */ 00277 ix = (N - 1); 00278 if (nonunit) { 00279 O2SCL_IX(X,ix) = O2SCL_IX(X,ix) / O2SCL_IX2(A,N-1,N-1); 00280 } 00281 ix--; 00282 for (i = N - 1; i > 0 && i--;) { 00283 double tmp = O2SCL_IX(X,ix); 00284 jx = ix +1; 00285 for (j = i + 1; j < N; j++) { 00286 const double Aij = O2SCL_IX2(A,i,j); 00287 tmp -= Aij * O2SCL_IX(X,jx); 00288 jx++; 00289 } 00290 if (nonunit) { 00291 O2SCL_IX(X,ix) = tmp / O2SCL_IX2(A,i,i); 00292 } else { 00293 O2SCL_IX(X,ix) = tmp; 00294 } 00295 ix--; 00296 } 00297 00298 } else if ((order == O2cblasRowMajor && Trans == O2cblasNoTrans && 00299 Uplo == O2cblasLower) || 00300 (order == O2cblasColMajor && Trans == O2cblasTrans && 00301 Uplo == O2cblasUpper)) { 00302 00303 /* forward substitution */ 00304 ix = 0; 00305 if (nonunit) { 00306 O2SCL_IX(X,ix) = O2SCL_IX(X,ix) / O2SCL_IX2(A,0,0); 00307 } 00308 ix++; 00309 for (i = 1; i < N; i++) { 00310 double tmp = O2SCL_IX(X,ix); 00311 jx = 0; 00312 for (j = 0; j < i; j++) { 00313 const double Aij = O2SCL_IX2(A,i,j); 00314 tmp -= Aij * O2SCL_IX(X,jx); 00315 jx++; 00316 } 00317 if (nonunit) { 00318 O2SCL_IX(X,ix) = tmp / O2SCL_IX2(A,i,i); 00319 } else { 00320 O2SCL_IX(X,ix) = tmp; 00321 } 00322 ix++; 00323 } 00324 00325 } else if ((order == O2cblasRowMajor && Trans == O2cblasTrans && 00326 Uplo == O2cblasUpper) 00327 || (order == O2cblasColMajor && Trans == O2cblasNoTrans && 00328 Uplo == O2cblasLower)) { 00329 00330 /* form x := inv( A' )*x */ 00331 00332 /* forward substitution */ 00333 ix = 0; 00334 if (nonunit) { 00335 O2SCL_IX(X,ix) = O2SCL_IX(X,ix) / O2SCL_IX2(A,0,0); 00336 } 00337 ix++; 00338 for (i = 1; i < N; i++) { 00339 double tmp = O2SCL_IX(X,ix); 00340 jx = 0; 00341 for (j = 0; j < i; j++) { 00342 const double Aji = O2SCL_IX2(A,j,i); 00343 tmp -= Aji * O2SCL_IX(X,jx); 00344 jx++; 00345 } 00346 if (nonunit) { 00347 O2SCL_IX(X,ix) = tmp / O2SCL_IX2(A,i,i); 00348 } else { 00349 O2SCL_IX(X,ix) = tmp; 00350 } 00351 ix++; 00352 } 00353 00354 } else if ((order == O2cblasRowMajor && Trans == O2cblasTrans && 00355 Uplo == O2cblasLower) 00356 || (order == O2cblasColMajor && Trans == O2cblasNoTrans && 00357 Uplo == O2cblasUpper)) { 00358 00359 /* backsubstitution */ 00360 ix = (N - 1); 00361 if (nonunit) { 00362 O2SCL_IX(X,ix) = O2SCL_IX(X,ix) / O2SCL_IX2(A,N-1,N-1); 00363 } 00364 ix--; 00365 for (i = N - 1; i > 0 && i--;) { 00366 double tmp = O2SCL_IX(X,ix); 00367 jx = ix +1; 00368 for (j = i + 1; j < N; j++) { 00369 const double Aji = O2SCL_IX2(A,j,i); 00370 tmp -= Aji * O2SCL_IX(X,jx); 00371 jx++; 00372 } 00373 if (nonunit) { 00374 O2SCL_IX(X,ix) = tmp / O2SCL_IX2(A,i,i); 00375 } else { 00376 O2SCL_IX(X,ix) = tmp; 00377 } 00378 ix--; 00379 } 00380 00381 } else { 00382 O2SCL_ERR_RET("Unrecognized operation in dtrsv().",o2scl::gsl_einval); 00383 } 00384 return 0; 00385 } 00386 00387 //@} 00388 00389 /// \name Helper BLAS functions 00390 //@{ 00391 /** 00392 \brief Compute \f$ x = \alpha x \f$ beginning with index \c ie and 00393 ending with index \c N-1 00394 00395 Used in \ref householder_hv(). 00396 */ 00397 template<class vec_t, class vec2_t> 00398 void daxpy_subvec(const int N, const double alpha, const vec_t &X, 00399 vec2_t &Y, const int ie) { 00400 00401 int i; 00402 00403 if (alpha == 0.0) { 00404 return; 00405 } 00406 00407 const int m = (N-ie) % 4; 00408 00409 for (i = ie; i < ie+m; i++) { 00410 O2SCL_IX(Y,i) += alpha * O2SCL_IX(X,i); 00411 } 00412 00413 for (i = ie+m; i + 3 < N; i += 4) { 00414 O2SCL_IX(Y,i) += alpha * O2SCL_IX(X,i); 00415 O2SCL_IX(Y,i+1) += alpha * O2SCL_IX(X,i+1); 00416 O2SCL_IX(Y,i+2) += alpha * O2SCL_IX(X,i+2); 00417 O2SCL_IX(Y,i+3) += alpha * O2SCL_IX(X,i+3); 00418 } 00419 } 00420 00421 /** 00422 \brief Compute \f$ r = x \cdot y \f$ beginning with index \c ie and 00423 ending with index \c N-1 00424 00425 Used in \ref householder_hv(). 00426 */ 00427 template<class vec_t, class vec2_t> 00428 double ddot_subvec(const int N, const vec_t &X, const vec2_t &Y, 00429 const int ie) { 00430 double r = 0.0; 00431 int i; 00432 00433 const int m = (N-ie) % 4; 00434 00435 for (i = ie; i < ie+m; i++) { 00436 r += O2SCL_IX(X,i) * O2SCL_IX(Y,i); 00437 } 00438 00439 for (i = ie+m; i + 3 < N; i += 4) { 00440 r += O2SCL_IX(X,i) * O2SCL_IX(Y,i); 00441 r += O2SCL_IX(X,i+1) * O2SCL_IX(Y,i+1); 00442 r += O2SCL_IX(X,i+2) * O2SCL_IX(Y,i+2); 00443 r += O2SCL_IX(X,i+3) * O2SCL_IX(Y,i+3); 00444 } 00445 00446 return r; 00447 } 00448 00449 /** 00450 \brief Compute \f$ x = \alpha x \f$ beginning with index \c ie and 00451 ending with index \c N-1 00452 00453 Used in \ref householder_transform(). 00454 */ 00455 template<class vec_t> 00456 void dscal_subvec(const int N, const double alpha, vec_t &X, 00457 const int ie) { 00458 int i; 00459 const int m = (N-ie) % 4; 00460 00461 for (i = ie; i < ie+m; i++) { 00462 O2SCL_IX(X,i)*=alpha; 00463 } 00464 00465 for (i = ie+m; i + 3 < N; i += 4) { 00466 O2SCL_IX(X,i)*=alpha; 00467 O2SCL_IX(X,i+1)*=alpha; 00468 O2SCL_IX(X,i+2)*=alpha; 00469 O2SCL_IX(X,i+3)*=alpha; 00470 } 00471 } 00472 00473 /** 00474 \brief Compute the squared norm of the vector \c X beginning with 00475 index \c ie and ending with index \c N-1 00476 00477 Used in \ref householder_transform(). 00478 */ 00479 template<class vec_t> 00480 double dnrm2_subvec(const int N, const vec_t &X, const int ie) { 00481 00482 double scale = 0.0; 00483 double ssq = 1.0; 00484 int i; 00485 00486 if (N <= 0) { 00487 return 0; 00488 } else if (N == 1) { 00489 return fabs(O2SCL_IX(X,ie)); 00490 } 00491 00492 for (i = ie; i < N; i++) { 00493 const double x = O2SCL_IX(X,i); 00494 00495 if (x != 0.0) { 00496 const double ax = fabs(x); 00497 00498 if (scale < ax) { 00499 ssq = 1.0 + ssq * (scale / ax) * (scale / ax); 00500 scale = ax; 00501 } else { 00502 ssq += (ax / scale) * (ax / scale); 00503 } 00504 } 00505 00506 } 00507 00508 return scale * sqrt(ssq); 00509 } 00510 00511 /** 00512 \brief Compute the squared norm of the last \c N rows 00513 of a column of a matrix 00514 00515 Given matrix \c M, this computes the norm of the last \c N rows 00516 of the column with index \c ic, beginning with the element with 00517 index \c ir. If the matrix \c M has \c r rows, and \c c columns, 00518 then the parameter \c N should be \c r-ir. 00519 00520 Used in householder_transform_subcol(). 00521 00522 \future Could be made more efficient with a matrix-col like 00523 object 00524 */ 00525 template<class mat_t> 00526 double dnrm2_subcol(const mat_t &M, const size_t ir, const size_t ic, 00527 const size_t N) { 00528 00529 double scale = 0.0; 00530 double ssq = 1.0; 00531 size_t i; 00532 00533 if (N <= 0) { 00534 return 0; 00535 } else if (N == 1) { 00536 return fabs(O2SCL_IX2(M,ir,ic)); 00537 } 00538 00539 size_t tot=ir+N; 00540 for (i = ir; i < tot; i++) { 00541 const double x = O2SCL_IX2(M,i,ic); 00542 00543 if (x != 0.0) { 00544 const double ax = fabs(x); 00545 00546 if (scale < ax) { 00547 ssq = 1.0 + ssq * (scale / ax) * (scale / ax); 00548 scale = ax; 00549 } else { 00550 ssq += (ax / scale) * (ax / scale); 00551 } 00552 } 00553 00554 } 00555 00556 return scale * sqrt(ssq); 00557 } 00558 00559 /// Desc 00560 template<class mat_t, class mat_subcol_t> 00561 double dnrm2_subcol2(const mat_t &M, const size_t ir, const size_t ic, 00562 const size_t N) { 00563 00564 double scale = 0.0; 00565 double ssq = 1.0; 00566 size_t i; 00567 00568 if (N <= 0) { 00569 return 0; 00570 } else if (N == 1) { 00571 return fabs(O2SCL_IX2(M,ir,ic)); 00572 } 00573 00574 mat_subcol_t matcol(M,ic); 00575 00576 size_t tot=ir+N; 00577 for (i = ir; i < tot; i++) { 00578 const double x = O2SCL_IX(matcol,i); 00579 00580 if (x != 0.0) { 00581 const double ax = fabs(x); 00582 00583 if (scale < ax) { 00584 ssq = 1.0 + ssq * (scale / ax) * (scale / ax); 00585 scale = ax; 00586 } else { 00587 ssq += (ax / scale) * (ax / scale); 00588 } 00589 } 00590 00591 } 00592 00593 return scale * sqrt(ssq); 00594 } 00595 00596 /** 00597 \brief Compute \f$ x = \alpha x \f$ 00598 00599 Used in householder_transform_subcol(). 00600 */ 00601 template<class mat_t> 00602 void dscal_subcol(mat_t &A, const size_t ir, const size_t ic, 00603 const size_t n, const double alpha) { 00604 00605 size_t tot=ir+n; 00606 size_t i; 00607 const size_t m = (tot-ir) % 4; 00608 00609 for (i = ir; i < m+ir; i++) { 00610 O2SCL_IX2(A,i,ic)*=alpha; 00611 } 00612 00613 for (i = m+ir; i + 3 < tot; i += 4) { 00614 O2SCL_IX2(A,i,ic)*=alpha; 00615 O2SCL_IX2(A,i+1,ic)*=alpha; 00616 O2SCL_IX2(A,i+2,ic)*=alpha; 00617 O2SCL_IX2(A,i+3,ic)*=alpha; 00618 } 00619 00620 } 00621 00622 /** 00623 \brief Compute \f$ x = \alpha x \f$ for \ref householder_hv_sub() 00624 00625 Used in householder_hv_sub(). 00626 00627 \future Implement explicit loop unrolling 00628 */ 00629 template<class mat_t, class vec_t> 00630 void daxpy_hv_sub(const int N, const double alpha, const mat_t &X, 00631 vec_t &Y, const int ie) { 00632 00633 if (alpha == 0.0) { 00634 return; 00635 } 00636 00637 for(int i=ie+1;i<N;i++) { 00638 O2SCL_IX(Y,i)+=alpha*O2SCL_IX2(X,i,ie); 00639 } 00640 00641 /* 00642 const int m = N % 4; 00643 00644 for (i = 0; i < m; i++) { 00645 O2SCL_IX(X,i)*=alpha; 00646 } 00647 00648 for (i = m; i + 3 < N; i += 4) { 00649 O2SCL_IX(X,i)*=alpha; 00650 O2SCL_IX(X,i+1)*=alpha; 00651 O2SCL_IX(X,i+2)*=alpha; 00652 O2SCL_IX(X,i+3)*=alpha; 00653 } 00654 */ 00655 } 00656 00657 /** 00658 \brief Compute \f$ r = x \cdot y \f$ for \ref householder_hv_sub() 00659 00660 Used in householder_hv_sub(). 00661 00662 \future Implement explicit loop unrolling 00663 */ 00664 template<class mat_t, class vec_t> 00665 double ddot_hv_sub(const int N, const mat_t &X, const vec_t &Y, 00666 const int ie) { 00667 double r = 0.0; 00668 int i; 00669 00670 for(i=ie+1;i<N;i++) { 00671 r+=O2SCL_IX2(X,i,ie)*O2SCL_IX(Y,i); 00672 } 00673 return r; 00674 } 00675 00676 //@} 00677 00678 /// Compute \f$ y = \alpha \mathrm{op}(A) x + \beta y \f$. 00679 template<class mat_t> 00680 int dgemm(const enum O2CBLAS_ORDER Order, 00681 const enum O2CBLAS_TRANSPOSE TransA, 00682 const enum O2CBLAS_TRANSPOSE TransB, const int M, const int N, 00683 const int K, const double alpha, const mat_t &A, 00684 const mat_t &B, const double beta, mat_t &C) { 00685 00686 int i, j, k; 00687 int n1, n2; 00688 int TransF, TransG; 00689 00690 if (alpha == 0.0 && beta == 1.0) { 00691 return 0; 00692 } 00693 00694 /* 00695 This is a little more complicated than the original, which 00696 assigned the matrices A and B to variables *F and *G which then 00697 allowed some code duplication. We can't really do that here, 00698 since we don't have that kind of type info on A and B, so we 00699 just handle the two cases separately. 00700 */ 00701 00702 if (Order == O2cblasRowMajor) { 00703 00704 n1 = M; 00705 n2 = N; 00706 00707 /* form y := beta*y */ 00708 if (beta == 0.0) { 00709 for (i = 0; i < n1; i++) { 00710 for (j = 0; j < n2; j++) { 00711 O2SCL_IX2(C,i,j)=0.0; 00712 } 00713 } 00714 } else if (beta != 1.0) { 00715 for (i = 0; i < n1; i++) { 00716 for (j = 0; j < n2; j++) { 00717 O2SCL_IX2(C,i,j)*=beta; 00718 } 00719 } 00720 } 00721 00722 if (alpha == 0.0) { 00723 return 0; 00724 } 00725 00726 TransF = (TransA == O2cblasConjTrans) ? O2cblasTrans : TransA; 00727 TransG = (TransB == O2cblasConjTrans) ? O2cblasTrans : TransB; 00728 00729 if (TransF == O2cblasNoTrans && TransG == O2cblasNoTrans) { 00730 00731 /* form C := alpha*A*B + C */ 00732 00733 for (k = 0; k < K; k++) { 00734 for (i = 0; i < n1; i++) { 00735 const double temp = alpha * O2SCL_IX2(A,i,k); 00736 if (temp != 0.0) { 00737 for (j = 0; j < n2; j++) { 00738 O2SCL_IX2(C,i,j) += temp * O2SCL_IX2(B,k,j); 00739 } 00740 } 00741 } 00742 } 00743 00744 } else if (TransF == O2cblasNoTrans && TransG == O2cblasTrans) { 00745 00746 /* form C := alpha*A*B' + C */ 00747 00748 for (i = 0; i < n1; i++) { 00749 for (j = 0; j < n2; j++) { 00750 double temp = 0.0; 00751 for (k = 0; k < K; k++) { 00752 temp += O2SCL_IX2(A,i,k) * O2SCL_IX2(B,j,k); 00753 } 00754 O2SCL_IX2(C,i,j) += alpha * temp; 00755 } 00756 } 00757 00758 } else if (TransF == O2cblasTrans && TransG == O2cblasNoTrans) { 00759 00760 for (k = 0; k < K; k++) { 00761 for (i = 0; i < n1; i++) { 00762 const double temp = alpha * O2SCL_IX2(A,k,i); 00763 if (temp != 0.0) { 00764 for (j = 0; j < n2; j++) { 00765 O2SCL_IX2(C,i,j) += temp * O2SCL_IX2(B,k,j); 00766 } 00767 } 00768 } 00769 } 00770 00771 } else if (TransF == O2cblasTrans && TransG == O2cblasTrans) { 00772 00773 for (i = 0; i < n1; i++) { 00774 for (j = 0; j < n2; j++) { 00775 double temp = 0.0; 00776 for (k = 0; k < K; k++) { 00777 temp += O2SCL_IX2(A,k,i) * O2SCL_IX2(B,j,k); 00778 } 00779 O2SCL_IX2(C,i,j) += alpha * temp; 00780 } 00781 } 00782 00783 } else { 00784 O2SCL_ERR_RET("Unrecognized operation in dgemm().",o2scl::gsl_einval); 00785 } 00786 00787 } else { 00788 00789 n1 = N; 00790 n2 = M; 00791 00792 /* form y := beta*y */ 00793 if (beta == 0.0) { 00794 for (i = 0; i < n1; i++) { 00795 for (j = 0; j < n2; j++) { 00796 O2SCL_IX2(C,i,j)=0.0; 00797 } 00798 } 00799 } else if (beta != 1.0) { 00800 for (i = 0; i < n1; i++) { 00801 for (j = 0; j < n2; j++) { 00802 O2SCL_IX2(C,i,j)*=beta; 00803 } 00804 } 00805 } 00806 00807 if (alpha == 0.0) { 00808 return 0; 00809 } 00810 00811 TransF = (TransB == O2cblasConjTrans) ? O2cblasTrans : TransB; 00812 TransG = (TransA == O2cblasConjTrans) ? O2cblasTrans : TransA; 00813 00814 if (TransF == O2cblasNoTrans && TransG == O2cblasNoTrans) { 00815 00816 /* form C := alpha*A*B + C */ 00817 00818 for (k = 0; k < K; k++) { 00819 for (i = 0; i < n1; i++) { 00820 const double temp = alpha * O2SCL_IX2(B,i,k); 00821 if (temp != 0.0) { 00822 for (j = 0; j < n2; j++) { 00823 O2SCL_IX2(C,i,j) += temp * O2SCL_IX2(A,k,j); 00824 } 00825 } 00826 } 00827 } 00828 00829 } else if (TransF == O2cblasNoTrans && TransG == O2cblasTrans) { 00830 00831 /* form C := alpha*A*B' + C */ 00832 00833 for (i = 0; i < n1; i++) { 00834 for (j = 0; j < n2; j++) { 00835 double temp = 0.0; 00836 for (k = 0; k < K; k++) { 00837 temp += O2SCL_IX2(B,i,k) * O2SCL_IX2(A,j,k); 00838 } 00839 O2SCL_IX2(C,i,j) += alpha * temp; 00840 } 00841 } 00842 00843 } else if (TransF == O2cblasTrans && TransG == O2cblasNoTrans) { 00844 00845 for (k = 0; k < K; k++) { 00846 for (i = 0; i < n1; i++) { 00847 const double temp = alpha * O2SCL_IX2(B,k,i); 00848 if (temp != 0.0) { 00849 for (j = 0; j < n2; j++) { 00850 O2SCL_IX2(C,i,j) += temp * O2SCL_IX2(A,k,j); 00851 } 00852 } 00853 } 00854 } 00855 00856 } else if (TransF == O2cblasTrans && TransG == O2cblasTrans) { 00857 00858 for (i = 0; i < n1; i++) { 00859 for (j = 0; j < n2; j++) { 00860 double temp = 0.0; 00861 for (k = 0; k < K; k++) { 00862 temp += O2SCL_IX2(B,k,i) * O2SCL_IX2(A,j,k); 00863 } 00864 O2SCL_IX2(C,i,j) += alpha * temp; 00865 } 00866 } 00867 00868 } else { 00869 O2SCL_ERR_RET("Unrecognized operation in dgemm().",o2scl::gsl_einval); 00870 } 00871 } 00872 00873 return 0; 00874 } 00875 00876 #ifdef DOXYGENP 00877 } 00878 #endif 00879
Documentation generated with Doxygen and provided under the GNU Free Documentation License. See License Information for details.
Project hosting provided by
,
O2scl Sourceforge Project Page