00001 /* 00002 ------------------------------------------------------------------- 00003 00004 Copyright (C) 2006, 2007, 2008, 2009, Andrew W. Steiner 00005 00006 This file is part of O2scl. 00007 00008 O2scl is free software; you can redistribute it and/or modify 00009 it under the terms of the GNU General Public License as published by 00010 the Free Software Foundation; either version 3 of the License, or 00011 (at your option) any later version. 00012 00013 O2scl is distributed in the hope that it will be useful, 00014 but WITHOUT ANY WARRANTY; without even the implied warranty of 00015 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 00016 GNU General Public License for more details. 00017 00018 You should have received a copy of the GNU General Public License 00019 along with O2scl. If not, see <http://www.gnu.org/licenses/>. 00020 00021 ------------------------------------------------------------------- 00022 */ 00023 /* 00024 * Copyright (C) 1996, 1997, 1998, 1999, 2000 Gerard Jungman 00025 * Copyright (C) 2001, 2007 Brian Gough 00026 * 00027 * This program is free software; you can redistribute it and/or modify 00028 * it under the terms of the GNU General Public License as published by 00029 * the Free Software Foundation; either version 3 of the License, or (at 00030 * your option) any later version. 00031 * 00032 * This program is distributed in the hope that it will be useful, but 00033 * WITHOUT ANY WARRANTY; without even the implied warranty of 00034 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU 00035 * General Public License for more details. 00036 * 00037 * You should have received a copy of the GNU General Public License 00038 * along with this program; if not, write to the Free Software 00039 * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 00040 * 02110-1301, USA. 00041 */ 00042 /** \file cblas_base.h 00043 \brief O2scl basic linear algebra function templates 00044 00045 \future Convert to size_t and add float and complex versions 00046 */ 00047 00048 #ifdef DOXYGENP 00049 namespace o2scl_cblas { 00050 #endif 00051 00052 /// Matrix order, either column-major or row-major 00053 enum O2CBLAS_ORDER {O2cblasRowMajor=101, O2cblasColMajor=102}; 00054 00055 /// Transpose operations 00056 enum O2CBLAS_TRANSPOSE {O2cblasNoTrans=111, O2cblasTrans=112, 00057 O2cblasConjTrans=113}; 00058 00059 /// Upper- or lower-triangular 00060 enum O2CBLAS_UPLO {O2cblasUpper=121, O2cblasLower=122}; 00061 00062 /// Unit or generic diagonal 00063 enum O2CBLAS_DIAG {O2cblasNonUnit=131, O2cblasUnit=132}; 00064 00065 /// Left or right sided operation 00066 enum O2CBLAS_SIDE {O2cblasLeft=141, O2cblasRight=142}; 00067 00068 /// \name Standard BLAS functions 00069 //@{ 00070 /// Compute \f$ y= \alpha x + y \f$ 00071 template<class vec_t, class vec2_t> void 00072 daxpy(const int N, const double alpha, const vec_t &X, 00073 vec2_t &Y) { 00074 00075 int i; 00076 00077 if (alpha == 0.0) { 00078 return; 00079 } 00080 00081 const int m = N % 4; 00082 00083 for (i = 0; i < m; i++) { 00084 O2SCL_IX(Y,i) += alpha * O2SCL_IX(X,i); 00085 } 00086 00087 for (i = m; i + 3 < N; i += 4) { 00088 O2SCL_IX(Y,i) += alpha * O2SCL_IX(X,i); 00089 O2SCL_IX(Y,i+1) += alpha * O2SCL_IX(X,i+1); 00090 O2SCL_IX(Y,i+2) += alpha * O2SCL_IX(X,i+2); 00091 O2SCL_IX(Y,i+3) += alpha * O2SCL_IX(X,i+3); 00092 } 00093 } 00094 00095 /// Compute \f$ r = x \cdot y \f$ 00096 template<class vec_t, class vec2_t> double 00097 ddot(const int N, const vec_t &X, const vec2_t &Y) { 00098 double r = 0.0; 00099 int i; 00100 00101 const int m = N % 4; 00102 00103 for (i = 0; i < m; i++) { 00104 r += O2SCL_IX(X,i) * O2SCL_IX(Y,i); 00105 } 00106 00107 for (i = m; i + 3 < N; i += 4) { 00108 r += O2SCL_IX(X,i) * O2SCL_IX(Y,i); 00109 r += O2SCL_IX(X,i+1) * O2SCL_IX(Y,i+1); 00110 r += O2SCL_IX(X,i+2) * O2SCL_IX(Y,i+2); 00111 r += O2SCL_IX(X,i+3) * O2SCL_IX(Y,i+3); 00112 } 00113 00114 return r; 00115 } 00116 00117 /// Compute \f$ x = \alpha x \f$ 00118 template<class vec_t> void 00119 dscal(const int N, const double alpha, vec_t &X) { 00120 int i; 00121 const int m = N % 4; 00122 00123 for (i = 0; i < m; i++) { 00124 O2SCL_IX(X,i)*=alpha; 00125 } 00126 00127 for (i = m; i + 3 < N; i += 4) { 00128 O2SCL_IX(X,i)*=alpha; 00129 O2SCL_IX(X,i+1)*=alpha; 00130 O2SCL_IX(X,i+2)*=alpha; 00131 O2SCL_IX(X,i+3)*=alpha; 00132 } 00133 } 00134 00135 /// Compute the squared norm of the vector \c X 00136 template<class vec_t> double dnrm2(const int N, const vec_t &X) { 00137 00138 double scale = 0.0; 00139 double ssq = 1.0; 00140 int i; 00141 00142 if (N <= 0) { 00143 return 0; 00144 } else if (N == 1) { 00145 return fabs(O2SCL_IX(X,0)); 00146 } 00147 00148 for (i = 0; i < N; i++) { 00149 const double x = O2SCL_IX(X,i); 00150 00151 if (x != 0.0) { 00152 const double ax = fabs(x); 00153 00154 if (scale < ax) { 00155 ssq = 1.0 + ssq * (scale / ax) * (scale / ax); 00156 scale = ax; 00157 } else { 00158 ssq += (ax / scale) * (ax / scale); 00159 } 00160 } 00161 00162 } 00163 00164 return scale * sqrt(ssq); 00165 } 00166 00167 /// Compute \f$ y = \alpha \mathrm{op}(A) x + \beta y \f$. 00168 template<class mat_t, class vec_t> 00169 int dgemv(const enum O2CBLAS_ORDER order, 00170 const enum O2CBLAS_TRANSPOSE TransA, 00171 const int M, const int N, const double alpha, const mat_t &A, 00172 const vec_t &X, const double beta, vec_t &Y) { 00173 00174 int i, j; 00175 int lenX, lenY; 00176 00177 const int Trans = (TransA != O2cblasConjTrans) ? TransA : O2cblasTrans; 00178 00179 if (M == 0 || N == 0) { 00180 return 0; 00181 } 00182 00183 if (alpha == 0.0 && beta == 1.0) { 00184 return 0; 00185 } 00186 00187 if (Trans == O2cblasNoTrans) { 00188 lenX = N; 00189 lenY = M; 00190 } else { 00191 lenX = M; 00192 lenY = N; 00193 } 00194 00195 /* form y := beta*y */ 00196 if (beta == 0.0) { 00197 int iy = 0; 00198 for (i = 0; i < lenY; i++) { 00199 O2SCL_IX(Y,iy) = 0.0; 00200 iy++; 00201 } 00202 } else if (beta != 1.0) { 00203 int iy = 0; 00204 for (i = 0; i < lenY; i++) { 00205 O2SCL_IX(Y,iy) *= beta; 00206 iy++; 00207 } 00208 } 00209 00210 if (alpha == 0.0) { 00211 return 0; 00212 } 00213 00214 if ((order == O2cblasRowMajor && Trans == O2cblasNoTrans) 00215 || (order == O2cblasColMajor && Trans == O2cblasTrans)) { 00216 00217 /* form y := alpha*A*x + y */ 00218 int iy = 0; 00219 for (i = 0; i < lenY; i++) { 00220 double temp = 0.0; 00221 int ix = 0; 00222 for (j = 0; j < lenX; j++) { 00223 temp += O2SCL_IX(X,ix) * O2SCL_IX2(A,i,j); 00224 ix++; 00225 } 00226 O2SCL_IX(Y,iy) += alpha * temp; 00227 iy++; 00228 } 00229 00230 } else if ((order == O2cblasRowMajor && Trans == O2cblasTrans) 00231 || (order == O2cblasColMajor && Trans == O2cblasNoTrans)) { 00232 00233 /* form y := alpha*A'*x + y */ 00234 int ix = 0; 00235 for (j = 0; j < lenX; j++) { 00236 const double temp = alpha * O2SCL_IX(X,ix); 00237 if (temp != 0.0) { 00238 int iy = 0; 00239 for (i = 0; i < lenY; i++) { 00240 O2SCL_IX(Y,iy) += temp * O2SCL_IX2(A,j,i); 00241 iy++; 00242 } 00243 } 00244 ix++; 00245 } 00246 00247 } else { 00248 O2SCL_ERR_RET("Unrecognized operation in dgemv().",o2scl::gsl_einval); 00249 } 00250 return 0; 00251 } 00252 00253 /// Compute \f$ x = \mathrm{op} (A)^{-1} x \f$ 00254 template<class mat_t, class vec_t> 00255 int dtrsv(const enum O2CBLAS_ORDER order, 00256 const enum O2CBLAS_UPLO Uplo, 00257 const enum O2CBLAS_TRANSPOSE TransA, 00258 const enum O2CBLAS_DIAG Diag, 00259 const int M, const int N, const mat_t &A, vec_t &X) { 00260 const int nonunit = (Diag == O2cblasNonUnit); 00261 int ix, jx; 00262 int i, j; 00263 const int Trans = (TransA != O2cblasConjTrans) ? TransA : O2cblasTrans; 00264 00265 if (N == 0) { 00266 return 0; 00267 } 00268 00269 /* form x := inv( A )*x */ 00270 00271 if ((order == O2cblasRowMajor && Trans == O2cblasNoTrans && 00272 Uplo == O2cblasUpper) 00273 || (order == O2cblasColMajor && Trans == O2cblasTrans && 00274 Uplo == O2cblasLower)) { 00275 00276 /* backsubstitution */ 00277 ix = (N - 1); 00278 if (nonunit) { 00279 O2SCL_IX(X,ix) = O2SCL_IX(X,ix) / O2SCL_IX2(A,N-1,N-1); 00280 } 00281 ix--; 00282 for (i = N - 1; i > 0 && i--;) { 00283 double tmp = O2SCL_IX(X,ix); 00284 jx = ix +1; 00285 for (j = i + 1; j < N; j++) { 00286 const double Aij = O2SCL_IX2(A,i,j); 00287 tmp -= Aij * O2SCL_IX(X,jx); 00288 jx++; 00289 } 00290 if (nonunit) { 00291 O2SCL_IX(X,ix) = tmp / O2SCL_IX2(A,i,i); 00292 } else { 00293 O2SCL_IX(X,ix) = tmp; 00294 } 00295 ix--; 00296 } 00297 00298 } else if ((order == O2cblasRowMajor && Trans == O2cblasNoTrans && 00299 Uplo == O2cblasLower) || 00300 (order == O2cblasColMajor && Trans == O2cblasTrans && 00301 Uplo == O2cblasUpper)) { 00302 00303 /* forward substitution */ 00304 ix = 0; 00305 if (nonunit) { 00306 O2SCL_IX(X,ix) = O2SCL_IX(X,ix) / O2SCL_IX2(A,0,0); 00307 } 00308 ix++; 00309 for (i = 1; i < N; i++) { 00310 double tmp = O2SCL_IX(X,ix); 00311 jx = 0; 00312 for (j = 0; j < i; j++) { 00313 const double Aij = O2SCL_IX2(A,i,j); 00314 tmp -= Aij * O2SCL_IX(X,jx); 00315 jx++; 00316 } 00317 if (nonunit) { 00318 O2SCL_IX(X,ix) = tmp / O2SCL_IX2(A,i,i); 00319 } else { 00320 O2SCL_IX(X,ix) = tmp; 00321 } 00322 ix++; 00323 } 00324 00325 } else if ((order == O2cblasRowMajor && Trans == O2cblasTrans && 00326 Uplo == O2cblasUpper) 00327 || (order == O2cblasColMajor && Trans == O2cblasNoTrans && 00328 Uplo == O2cblasLower)) { 00329 00330 /* form x := inv( A' )*x */ 00331 00332 /* forward substitution */ 00333 ix = 0; 00334 if (nonunit) { 00335 O2SCL_IX(X,ix) = O2SCL_IX(X,ix) / O2SCL_IX2(A,0,0); 00336 } 00337 ix++; 00338 for (i = 1; i < N; i++) { 00339 double tmp = O2SCL_IX(X,ix); 00340 jx = 0; 00341 for (j = 0; j < i; j++) { 00342 const double Aji = O2SCL_IX2(A,j,i); 00343 tmp -= Aji * O2SCL_IX(X,jx); 00344 jx++; 00345 } 00346 if (nonunit) { 00347 O2SCL_IX(X,ix) = tmp / O2SCL_IX2(A,i,i); 00348 } else { 00349 O2SCL_IX(X,ix) = tmp; 00350 } 00351 ix++; 00352 } 00353 00354 } else if ((order == O2cblasRowMajor && Trans == O2cblasTrans && 00355 Uplo == O2cblasLower) 00356 || (order == O2cblasColMajor && Trans == O2cblasNoTrans && 00357 Uplo == O2cblasUpper)) { 00358 00359 /* backsubstitution */ 00360 ix = (N - 1); 00361 if (nonunit) { 00362 O2SCL_IX(X,ix) = O2SCL_IX(X,ix) / O2SCL_IX2(A,N-1,N-1); 00363 } 00364 ix--; 00365 for (i = N - 1; i > 0 && i--;) { 00366 double tmp = O2SCL_IX(X,ix); 00367 jx = ix +1; 00368 for (j = i + 1; j < N; j++) { 00369 const double Aji = O2SCL_IX2(A,j,i); 00370 tmp -= Aji * O2SCL_IX(X,jx); 00371 jx++; 00372 } 00373 if (nonunit) { 00374 O2SCL_IX(X,ix) = tmp / O2SCL_IX2(A,i,i); 00375 } else { 00376 O2SCL_IX(X,ix) = tmp; 00377 } 00378 ix--; 00379 } 00380 00381 } else { 00382 O2SCL_ERR_RET("Unrecognized operation in dtrsv().",o2scl::gsl_einval); 00383 } 00384 return 0; 00385 } 00386 00387 //@} 00388 00389 /// \name Helper BLAS functions 00390 //@{ 00391 /** 00392 \brief Compute \f$ x = \alpha x \f$ beginning with index \c ie and 00393 ending with index \c N-1 00394 00395 Used in \ref householder_hv(). 00396 */ 00397 template<class vec_t, class vec2_t> 00398 void daxpy_subvec(const int N, const double alpha, const vec_t &X, 00399 vec2_t &Y, const int ie) { 00400 00401 int i; 00402 00403 if (alpha == 0.0) { 00404 return; 00405 } 00406 00407 const int m = (N-ie) % 4; 00408 00409 for (i = ie; i < ie+m; i++) { 00410 O2SCL_IX(Y,i) += alpha * O2SCL_IX(X,i); 00411 } 00412 00413 for (i = ie+m; i + 3 < N; i += 4) { 00414 O2SCL_IX(Y,i) += alpha * O2SCL_IX(X,i); 00415 O2SCL_IX(Y,i+1) += alpha * O2SCL_IX(X,i+1); 00416 O2SCL_IX(Y,i+2) += alpha * O2SCL_IX(X,i+2); 00417 O2SCL_IX(Y,i+3) += alpha * O2SCL_IX(X,i+3); 00418 } 00419 } 00420 00421 /** 00422 \brief Compute \f$ r = x \cdot y \f$ beginning with index \c ie and 00423 ending with index \c N-1 00424 00425 Used in \ref householder_hv(). 00426 */ 00427 template<class vec_t, class vec2_t> 00428 double ddot_subvec(const int N, const vec_t &X, const vec2_t &Y, 00429 const int ie) { 00430 double r = 0.0; 00431 int i; 00432 00433 const int m = (N-ie) % 4; 00434 00435 for (i = ie; i < ie+m; i++) { 00436 r += O2SCL_IX(X,i) * O2SCL_IX(Y,i); 00437 } 00438 00439 for (i = ie+m; i + 3 < N; i += 4) { 00440 r += O2SCL_IX(X,i) * O2SCL_IX(Y,i); 00441 r += O2SCL_IX(X,i+1) * O2SCL_IX(Y,i+1); 00442 r += O2SCL_IX(X,i+2) * O2SCL_IX(Y,i+2); 00443 r += O2SCL_IX(X,i+3) * O2SCL_IX(Y,i+3); 00444 } 00445 00446 return r; 00447 } 00448 00449 /** 00450 \brief Compute \f$ x = \alpha x \f$ beginning with index \c ie and 00451 ending with index \c N-1 00452 00453 Used in \ref householder_transform(). 00454 */ 00455 template<class vec_t> 00456 void dscal_subvec(const int N, const double alpha, vec_t &X, 00457 const int ie) { 00458 int i; 00459 const int m = (N-ie) % 4; 00460 00461 for (i = ie; i < ie+m; i++) { 00462 O2SCL_IX(X,i)*=alpha; 00463 } 00464 00465 for (i = ie+m; i + 3 < N; i += 4) { 00466 O2SCL_IX(X,i)*=alpha; 00467 O2SCL_IX(X,i+1)*=alpha; 00468 O2SCL_IX(X,i+2)*=alpha; 00469 O2SCL_IX(X,i+3)*=alpha; 00470 } 00471 } 00472 00473 /** 00474 \brief Compute the squared norm of the vector \c X beginning with 00475 index \c ie and ending with index \c N-1 00476 00477 Used in \ref householder_transform(). 00478 */ 00479 template<class vec_t> 00480 double dnrm2_subvec(const int N, const vec_t &X, const int ie) { 00481 00482 double scale = 0.0; 00483 double ssq = 1.0; 00484 int i; 00485 00486 if (N <= 0) { 00487 return 0; 00488 } else if (N == 1) { 00489 return fabs(O2SCL_IX(X,ie)); 00490 } 00491 00492 for (i = ie; i < N; i++) { 00493 const double x = O2SCL_IX(X,i); 00494 00495 if (x != 0.0) { 00496 const double ax = fabs(x); 00497 00498 if (scale < ax) { 00499 ssq = 1.0 + ssq * (scale / ax) * (scale / ax); 00500 scale = ax; 00501 } else { 00502 ssq += (ax / scale) * (ax / scale); 00503 } 00504 } 00505 00506 } 00507 00508 return scale * sqrt(ssq); 00509 } 00510 00511 /** 00512 \brief Compute the squared norm of the last \c N rows 00513 of a column of a matrix 00514 00515 Given matrix \c M, this computes the norm of the last \c N rows 00516 of the column with index \c ic, beginning with the element with 00517 index \c ir. If the matrix \c M has \c r rows, and \c c columns, 00518 then the parameter \c N should be \c r-ir. 00519 00520 Used in householder_transform_subcol(). 00521 */ 00522 template<class mat_t> 00523 double dnrm2_subcol(const mat_t &M, const size_t ir, const size_t ic, 00524 const size_t N) { 00525 00526 double scale = 0.0; 00527 double ssq = 1.0; 00528 size_t i; 00529 00530 if (N <= 0) { 00531 return 0; 00532 } else if (N == 1) { 00533 return fabs(O2SCL_IX2(M,ir,ic)); 00534 } 00535 00536 size_t tot=ir+N; 00537 for (i = ir; i < tot; i++) { 00538 const double x = O2SCL_IX2(M,i,ic); 00539 00540 if (x != 0.0) { 00541 const double ax = fabs(x); 00542 00543 if (scale < ax) { 00544 ssq = 1.0 + ssq * (scale / ax) * (scale / ax); 00545 scale = ax; 00546 } else { 00547 ssq += (ax / scale) * (ax / scale); 00548 } 00549 } 00550 00551 } 00552 00553 return scale * sqrt(ssq); 00554 } 00555 00556 /** 00557 \brief Compute \f$ x = \alpha x \f$ 00558 00559 Used in householder_transform_subcol(). 00560 */ 00561 template<class mat_t> 00562 void dscal_subcol(mat_t &A, const size_t ir, const size_t ic, 00563 const size_t n, const double alpha) { 00564 00565 size_t tot=ir+n; 00566 size_t i; 00567 const size_t m = (tot-ir) % 4; 00568 00569 for (i = ir; i < m+ir; i++) { 00570 O2SCL_IX2(A,i,ic)*=alpha; 00571 } 00572 00573 for (i = m+ir; i + 3 < tot; i += 4) { 00574 O2SCL_IX2(A,i,ic)*=alpha; 00575 O2SCL_IX2(A,i+1,ic)*=alpha; 00576 O2SCL_IX2(A,i+2,ic)*=alpha; 00577 O2SCL_IX2(A,i+3,ic)*=alpha; 00578 } 00579 00580 } 00581 00582 /** 00583 \brief Compute \f$ x = \alpha x \f$ for \ref householder_hv_sub() 00584 00585 Used in householder_hv_sub(). 00586 00587 \future Implement explicit loop unrolling 00588 */ 00589 template<class mat_t, class vec_t> 00590 void daxpy_hv_sub(const int N, const double alpha, const mat_t &X, 00591 vec_t &Y, const int ie) { 00592 00593 if (alpha == 0.0) { 00594 return; 00595 } 00596 00597 for(int i=ie+1;i<N;i++) { 00598 O2SCL_IX(Y,i)+=alpha*O2SCL_IX2(X,i,ie); 00599 } 00600 00601 /* 00602 const int m = N % 4; 00603 00604 for (i = 0; i < m; i++) { 00605 O2SCL_IX(X,i)*=alpha; 00606 } 00607 00608 for (i = m; i + 3 < N; i += 4) { 00609 O2SCL_IX(X,i)*=alpha; 00610 O2SCL_IX(X,i+1)*=alpha; 00611 O2SCL_IX(X,i+2)*=alpha; 00612 O2SCL_IX(X,i+3)*=alpha; 00613 } 00614 */ 00615 } 00616 00617 /** 00618 \brief Compute \f$ r = x \cdot y \f$ for \ref householder_hv_sub() 00619 00620 Used in householder_hv_sub(). 00621 00622 \future Implement explicit loop unrolling 00623 */ 00624 template<class mat_t, class vec_t> 00625 double ddot_hv_sub(const int N, const mat_t &X, const vec_t &Y, 00626 const int ie) { 00627 double r = 0.0; 00628 int i; 00629 00630 for(i=ie+1;i<N;i++) { 00631 r+=O2SCL_IX2(X,i,ie)*O2SCL_IX(Y,i); 00632 } 00633 return r; 00634 } 00635 00636 //@} 00637 00638 /// Compute \f$ y = \alpha \mathrm{op}(A) x + \beta y \f$. 00639 template<class mat_t> 00640 int dgemm(const enum O2CBLAS_ORDER Order, 00641 const enum O2CBLAS_TRANSPOSE TransA, 00642 const enum O2CBLAS_TRANSPOSE TransB, const int M, const int N, 00643 const int K, const double alpha, const mat_t &A, 00644 const mat_t &B, const double beta, mat_t &C) { 00645 00646 int i, j, k; 00647 int n1, n2; 00648 int ldf, ldg; 00649 int TransF, TransG; 00650 00651 if (alpha == 0.0 && beta == 1.0) { 00652 return 0; 00653 } 00654 00655 /* 00656 This is a little more complicated than the original, which 00657 assigned the matrices A and B to variables *F and *G which then 00658 allowed some code duplication. We can't really do that here, 00659 since we don't have that kind of type info on A and B, so we 00660 just handle the two cases separately. 00661 */ 00662 00663 if (Order == O2cblasRowMajor) { 00664 00665 n1 = M; 00666 n2 = N; 00667 00668 /* form y := beta*y */ 00669 if (beta == 0.0) { 00670 for (i = 0; i < n1; i++) { 00671 for (j = 0; j < n2; j++) { 00672 O2SCL_IX2(C,i,j)=0.0; 00673 } 00674 } 00675 } else if (beta != 1.0) { 00676 for (i = 0; i < n1; i++) { 00677 for (j = 0; j < n2; j++) { 00678 O2SCL_IX2(C,i,j)*=beta; 00679 } 00680 } 00681 } 00682 00683 if (alpha == 0.0) { 00684 return 0; 00685 } 00686 00687 TransF = (TransA == O2cblasConjTrans) ? O2cblasTrans : TransA; 00688 TransG = (TransB == O2cblasConjTrans) ? O2cblasTrans : TransB; 00689 00690 if (TransF == O2cblasNoTrans && TransG == O2cblasNoTrans) { 00691 00692 /* form C := alpha*A*B + C */ 00693 00694 for (k = 0; k < K; k++) { 00695 for (i = 0; i < n1; i++) { 00696 const double temp = alpha * O2SCL_IX2(A,i,k); 00697 if (temp != 0.0) { 00698 for (j = 0; j < n2; j++) { 00699 O2SCL_IX2(C,i,j) += temp * O2SCL_IX2(B,k,j); 00700 } 00701 } 00702 } 00703 } 00704 00705 } else if (TransF == O2cblasNoTrans && TransG == O2cblasTrans) { 00706 00707 /* form C := alpha*A*B' + C */ 00708 00709 for (i = 0; i < n1; i++) { 00710 for (j = 0; j < n2; j++) { 00711 double temp = 0.0; 00712 for (k = 0; k < K; k++) { 00713 temp += O2SCL_IX2(A,i,k) * O2SCL_IX2(B,j,k); 00714 } 00715 O2SCL_IX2(C,i,j) += alpha * temp; 00716 } 00717 } 00718 00719 } else if (TransF == O2cblasTrans && TransG == O2cblasNoTrans) { 00720 00721 for (k = 0; k < K; k++) { 00722 for (i = 0; i < n1; i++) { 00723 const double temp = alpha * O2SCL_IX2(A,k,i); 00724 if (temp != 0.0) { 00725 for (j = 0; j < n2; j++) { 00726 O2SCL_IX2(C,i,j) += temp * O2SCL_IX2(B,k,j); 00727 } 00728 } 00729 } 00730 } 00731 00732 } else if (TransF == O2cblasTrans && TransG == O2cblasTrans) { 00733 00734 for (i = 0; i < n1; i++) { 00735 for (j = 0; j < n2; j++) { 00736 double temp = 0.0; 00737 for (k = 0; k < K; k++) { 00738 temp += O2SCL_IX2(A,k,i) * O2SCL_IX2(B,j,k); 00739 } 00740 O2SCL_IX2(C,i,j) += alpha * temp; 00741 } 00742 } 00743 00744 } else { 00745 O2SCL_ERR_RET("Unrecognized operation in dgemm().",o2scl::gsl_einval); 00746 } 00747 00748 } else { 00749 00750 n1 = N; 00751 n2 = M; 00752 00753 /* form y := beta*y */ 00754 if (beta == 0.0) { 00755 for (i = 0; i < n1; i++) { 00756 for (j = 0; j < n2; j++) { 00757 O2SCL_IX2(C,i,j)=0.0; 00758 } 00759 } 00760 } else if (beta != 1.0) { 00761 for (i = 0; i < n1; i++) { 00762 for (j = 0; j < n2; j++) { 00763 O2SCL_IX2(C,i,j)*=beta; 00764 } 00765 } 00766 } 00767 00768 if (alpha == 0.0) { 00769 return 0; 00770 } 00771 00772 TransF = (TransB == O2cblasConjTrans) ? O2cblasTrans : TransB; 00773 TransG = (TransA == O2cblasConjTrans) ? O2cblasTrans : TransA; 00774 00775 if (TransF == O2cblasNoTrans && TransG == O2cblasNoTrans) { 00776 00777 /* form C := alpha*A*B + C */ 00778 00779 for (k = 0; k < K; k++) { 00780 for (i = 0; i < n1; i++) { 00781 const double temp = alpha * O2SCL_IX2(B,i,k); 00782 if (temp != 0.0) { 00783 for (j = 0; j < n2; j++) { 00784 O2SCL_IX2(C,i,j) += temp * O2SCL_IX2(A,k,j); 00785 } 00786 } 00787 } 00788 } 00789 00790 } else if (TransF == O2cblasNoTrans && TransG == O2cblasTrans) { 00791 00792 /* form C := alpha*A*B' + C */ 00793 00794 for (i = 0; i < n1; i++) { 00795 for (j = 0; j < n2; j++) { 00796 double temp = 0.0; 00797 for (k = 0; k < K; k++) { 00798 temp += O2SCL_IX2(B,i,k) * O2SCL_IX2(A,j,k); 00799 } 00800 O2SCL_IX2(C,i,j) += alpha * temp; 00801 } 00802 } 00803 00804 } else if (TransF == O2cblasTrans && TransG == O2cblasNoTrans) { 00805 00806 for (k = 0; k < K; k++) { 00807 for (i = 0; i < n1; i++) { 00808 const double temp = alpha * O2SCL_IX2(B,k,i); 00809 if (temp != 0.0) { 00810 for (j = 0; j < n2; j++) { 00811 O2SCL_IX2(C,i,j) += temp * O2SCL_IX2(A,k,j); 00812 } 00813 } 00814 } 00815 } 00816 00817 } else if (TransF == O2cblasTrans && TransG == O2cblasTrans) { 00818 00819 for (i = 0; i < n1; i++) { 00820 for (j = 0; j < n2; j++) { 00821 double temp = 0.0; 00822 for (k = 0; k < K; k++) { 00823 temp += O2SCL_IX2(B,k,i) * O2SCL_IX2(A,j,k); 00824 } 00825 O2SCL_IX2(C,i,j) += alpha * temp; 00826 } 00827 } 00828 00829 } else { 00830 O2SCL_ERR_RET("Unrecognized operation in dgemm().",o2scl::gsl_einval); 00831 } 00832 } 00833 00834 return 0; 00835 } 00836 00837 #ifdef DOXYGENP 00838 } 00839 #endif 00840
Documentation generated with Doxygen and provided under the GNU Free Documentation License. See License Information for details.
Project hosting provided by
,
O2scl Sourceforge Project Page