00001 /* 00002 ------------------------------------------------------------------- 00003 00004 Copyright (C) 2006, 2007, 2008, Andrew W. Steiner 00005 00006 This file is part of O2scl. 00007 00008 O2scl is free software; you can redistribute it and/or modify 00009 it under the terms of the GNU General Public License as published by 00010 the Free Software Foundation; either version 3 of the License, or 00011 (at your option) any later version. 00012 00013 O2scl is distributed in the hope that it will be useful, 00014 but WITHOUT ANY WARRANTY; without even the implied warranty of 00015 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 00016 GNU General Public License for more details. 00017 00018 You should have received a copy of the GNU General Public License 00019 along with O2scl. If not, see <http://www.gnu.org/licenses/>. 00020 00021 ------------------------------------------------------------------- 00022 */ 00023 /** \file cblas_base.h 00024 \brief O2scl basic linear algebra function templates 00025 00026 \todo Finish dgemm() 00027 \future Convert to size_t and add float and complex versions 00028 */ 00029 00030 #ifdef DOXYGENP 00031 namespace o2scl_cblas { 00032 #endif 00033 00034 /// Matrix order, either column-major or row-major 00035 enum O2CBLAS_ORDER {O2cblasRowMajor=101, O2cblasColMajor=102}; 00036 00037 /// Transpose operations 00038 enum O2CBLAS_TRANSPOSE {O2cblasNoTrans=111, O2cblasTrans=112, 00039 O2cblasConjTrans=113}; 00040 00041 /// Upper- or lower-triangular 00042 enum O2CBLAS_UPLO {O2cblasUpper=121, O2cblasLower=122}; 00043 00044 /// Unit or generic diagonal 00045 enum O2CBLAS_DIAG {O2cblasNonUnit=131, O2cblasUnit=132}; 00046 00047 /// Left or right sided operation 00048 enum O2CBLAS_SIDE {O2cblasLeft=141, O2cblasRight=142}; 00049 00050 /// \name Standard BLAS functions 00051 //@{ 00052 /// Compute \f$ y= \alpha x + y \f$ 00053 template<class vec_t, class vec2_t> void 00054 daxpy(const int N, const double alpha, const vec_t &X, 00055 vec2_t &Y) { 00056 00057 int i; 00058 00059 if (alpha == 0.0) { 00060 return; 00061 } 00062 00063 const int m = N % 4; 00064 00065 for (i = 0; i < m; i++) { 00066 O2SCL_IX(Y,i) += alpha * O2SCL_IX(X,i); 00067 } 00068 00069 for (i = m; i + 3 < N; i += 4) { 00070 O2SCL_IX(Y,i) += alpha * O2SCL_IX(X,i); 00071 O2SCL_IX(Y,i+1) += alpha * O2SCL_IX(X,i+1); 00072 O2SCL_IX(Y,i+2) += alpha * O2SCL_IX(X,i+2); 00073 O2SCL_IX(Y,i+3) += alpha * O2SCL_IX(X,i+3); 00074 } 00075 } 00076 00077 /// Compute \f$ r = x \cdot y \f$ 00078 template<class vec_t, class vec2_t> double 00079 ddot(const int N, const vec_t &X, const vec2_t &Y) { 00080 double r = 0.0; 00081 int i; 00082 00083 const int m = N % 4; 00084 00085 for (i = 0; i < m; i++) { 00086 r += O2SCL_IX(X,i) * O2SCL_IX(Y,i); 00087 } 00088 00089 for (i = m; i + 3 < N; i += 4) { 00090 r += O2SCL_IX(X,i) * O2SCL_IX(Y,i); 00091 r += O2SCL_IX(X,i+1) * O2SCL_IX(Y,i+1); 00092 r += O2SCL_IX(X,i+2) * O2SCL_IX(Y,i+2); 00093 r += O2SCL_IX(X,i+3) * O2SCL_IX(Y,i+3); 00094 } 00095 00096 return r; 00097 } 00098 00099 /// Compute \f$ x = \alpha x \f$ 00100 template<class vec_t> void 00101 dscal(const int N, const double alpha, vec_t &X) { 00102 int i; 00103 const int m = N % 4; 00104 00105 for (i = 0; i < m; i++) { 00106 O2SCL_IX(X,i)*=alpha; 00107 } 00108 00109 for (i = m; i + 3 < N; i += 4) { 00110 O2SCL_IX(X,i)*=alpha; 00111 O2SCL_IX(X,i+1)*=alpha; 00112 O2SCL_IX(X,i+2)*=alpha; 00113 O2SCL_IX(X,i+3)*=alpha; 00114 } 00115 } 00116 00117 /// Compute the squared norm of the vector \c X 00118 template<class vec_t> double dnrm2(const int N, const vec_t &X) { 00119 00120 double scale = 0.0; 00121 double ssq = 1.0; 00122 int i; 00123 00124 if (N <= 0) { 00125 return 0; 00126 } else if (N == 1) { 00127 return fabs(O2SCL_IX(X,0)); 00128 } 00129 00130 for (i = 0; i < N; i++) { 00131 const double x = O2SCL_IX(X,i); 00132 00133 if (x != 0.0) { 00134 const double ax = fabs(x); 00135 00136 if (scale < ax) { 00137 ssq = 1.0 + ssq * (scale / ax) * (scale / ax); 00138 scale = ax; 00139 } else { 00140 ssq += (ax / scale) * (ax / scale); 00141 } 00142 } 00143 00144 } 00145 00146 return scale * sqrt(ssq); 00147 } 00148 00149 /// Compute \f$ y = \alpha \mathrm{op}(A) x + \beta y \f$. 00150 template<class mat_t, class vec_t> 00151 int dgemv(const enum O2CBLAS_ORDER order, 00152 const enum O2CBLAS_TRANSPOSE TransA, 00153 const int M, const int N, const double alpha, const mat_t &A, 00154 const vec_t &X, const double beta, vec_t &Y) { 00155 00156 int i, j; 00157 int lenX, lenY; 00158 00159 const int Trans = (TransA != O2cblasConjTrans) ? TransA : O2cblasTrans; 00160 00161 if (M == 0 || N == 0) { 00162 return 0; 00163 } 00164 00165 if (alpha == 0.0 && beta == 1.0) { 00166 return 0; 00167 } 00168 00169 if (Trans == O2cblasNoTrans) { 00170 lenX = N; 00171 lenY = M; 00172 } else { 00173 lenX = M; 00174 lenY = N; 00175 } 00176 00177 /* form y := beta*y */ 00178 if (beta == 0.0) { 00179 int iy = 0; 00180 for (i = 0; i < lenY; i++) { 00181 O2SCL_IX(Y,iy) = 0.0; 00182 iy++; 00183 } 00184 } else if (beta != 1.0) { 00185 int iy = 0; 00186 for (i = 0; i < lenY; i++) { 00187 O2SCL_IX(Y,iy) *= beta; 00188 iy++; 00189 } 00190 } 00191 00192 if (alpha == 0.0) { 00193 return 0; 00194 } 00195 00196 if ((order == O2cblasRowMajor && Trans == O2cblasNoTrans) 00197 || (order == O2cblasColMajor && Trans == O2cblasTrans)) { 00198 00199 /* form y := alpha*A*x + y */ 00200 int iy = 0; 00201 for (i = 0; i < lenY; i++) { 00202 double temp = 0.0; 00203 int ix = 0; 00204 for (j = 0; j < lenX; j++) { 00205 temp += O2SCL_IX(X,ix) * O2SCL_IX2(A,i,j); 00206 ix++; 00207 } 00208 O2SCL_IX(Y,iy) += alpha * temp; 00209 iy++; 00210 } 00211 00212 } else if ((order == O2cblasRowMajor && Trans == O2cblasTrans) 00213 || (order == O2cblasColMajor && Trans == O2cblasNoTrans)) { 00214 00215 /* form y := alpha*A'*x + y */ 00216 int ix = 0; 00217 for (j = 0; j < lenX; j++) { 00218 const double temp = alpha * O2SCL_IX(X,ix); 00219 if (temp != 0.0) { 00220 int iy = 0; 00221 for (i = 0; i < lenY; i++) { 00222 O2SCL_IX(Y,iy) += temp * O2SCL_IX2(A,j,i); 00223 iy++; 00224 } 00225 } 00226 ix++; 00227 } 00228 00229 } else { 00230 set_err_ret("Unrecognized operation in dgemv().",o2scl::gsl_einval); 00231 } 00232 return 0; 00233 } 00234 00235 /// Compute \f$ x = \mathrm{op} (A)^{-1} x \f$ 00236 template<class mat_t, class vec_t> 00237 int dtrsv(const enum O2CBLAS_ORDER order, 00238 const enum O2CBLAS_UPLO Uplo, 00239 const enum O2CBLAS_TRANSPOSE TransA, 00240 const enum O2CBLAS_DIAG Diag, 00241 const int M, const int N, const mat_t &A, vec_t &X) { 00242 const int nonunit = (Diag == O2cblasNonUnit); 00243 int ix, jx; 00244 int i, j; 00245 const int Trans = (TransA != O2cblasConjTrans) ? TransA : O2cblasTrans; 00246 00247 if (N == 0) { 00248 return 0; 00249 } 00250 00251 /* form x := inv( A )*x */ 00252 00253 if ((order == O2cblasRowMajor && Trans == O2cblasNoTrans && 00254 Uplo == O2cblasUpper) 00255 || (order == O2cblasColMajor && Trans == O2cblasTrans && 00256 Uplo == O2cblasLower)) { 00257 00258 /* backsubstitution */ 00259 ix = (N - 1); 00260 if (nonunit) { 00261 O2SCL_IX(X,ix) = O2SCL_IX(X,ix) / O2SCL_IX2(A,N-1,N-1); 00262 } 00263 ix--; 00264 for (i = N - 1; i > 0 && i--;) { 00265 double tmp = O2SCL_IX(X,ix); 00266 jx = ix +1; 00267 for (j = i + 1; j < N; j++) { 00268 const double Aij = O2SCL_IX2(A,i,j); 00269 tmp -= Aij * O2SCL_IX(X,jx); 00270 jx++; 00271 } 00272 if (nonunit) { 00273 O2SCL_IX(X,ix) = tmp / O2SCL_IX2(A,i,i); 00274 } else { 00275 O2SCL_IX(X,ix) = tmp; 00276 } 00277 ix--; 00278 } 00279 00280 } else if ((order == O2cblasRowMajor && Trans == O2cblasNoTrans && 00281 Uplo == O2cblasLower) || 00282 (order == O2cblasColMajor && Trans == O2cblasTrans && 00283 Uplo == O2cblasUpper)) { 00284 00285 /* forward substitution */ 00286 ix = 0; 00287 if (nonunit) { 00288 O2SCL_IX(X,ix) = O2SCL_IX(X,ix) / O2SCL_IX2(A,0,0); 00289 } 00290 ix++; 00291 for (i = 1; i < N; i++) { 00292 double tmp = O2SCL_IX(X,ix); 00293 jx = 0; 00294 for (j = 0; j < i; j++) { 00295 const double Aij = O2SCL_IX2(A,i,j); 00296 tmp -= Aij * O2SCL_IX(X,jx); 00297 jx++; 00298 } 00299 if (nonunit) { 00300 O2SCL_IX(X,ix) = tmp / O2SCL_IX2(A,i,i); 00301 } else { 00302 O2SCL_IX(X,ix) = tmp; 00303 } 00304 ix++; 00305 } 00306 00307 } else if ((order == O2cblasRowMajor && Trans == O2cblasTrans && 00308 Uplo == O2cblasUpper) 00309 || (order == O2cblasColMajor && Trans == O2cblasNoTrans && 00310 Uplo == O2cblasLower)) { 00311 00312 /* form x := inv( A' )*x */ 00313 00314 /* forward substitution */ 00315 ix = 0; 00316 if (nonunit) { 00317 O2SCL_IX(X,ix) = O2SCL_IX(X,ix) / O2SCL_IX2(A,0,0); 00318 } 00319 ix++; 00320 for (i = 1; i < N; i++) { 00321 double tmp = O2SCL_IX(X,ix); 00322 jx = 0; 00323 for (j = 0; j < i; j++) { 00324 const double Aji = O2SCL_IX2(A,j,i); 00325 tmp -= Aji * O2SCL_IX(X,jx); 00326 jx++; 00327 } 00328 if (nonunit) { 00329 O2SCL_IX(X,ix) = tmp / O2SCL_IX2(A,i,i); 00330 } else { 00331 O2SCL_IX(X,ix) = tmp; 00332 } 00333 ix++; 00334 } 00335 00336 } else if ((order == O2cblasRowMajor && Trans == O2cblasTrans && 00337 Uplo == O2cblasLower) 00338 || (order == O2cblasColMajor && Trans == O2cblasNoTrans && 00339 Uplo == O2cblasUpper)) { 00340 00341 /* backsubstitution */ 00342 ix = (N - 1); 00343 if (nonunit) { 00344 O2SCL_IX(X,ix) = O2SCL_IX(X,ix) / O2SCL_IX2(A,N-1,N-1); 00345 } 00346 ix--; 00347 for (i = N - 1; i > 0 && i--;) { 00348 double tmp = O2SCL_IX(X,ix); 00349 jx = ix +1; 00350 for (j = i + 1; j < N; j++) { 00351 const double Aji = O2SCL_IX2(A,j,i); 00352 tmp -= Aji * O2SCL_IX(X,jx); 00353 jx++; 00354 } 00355 if (nonunit) { 00356 O2SCL_IX(X,ix) = tmp / O2SCL_IX2(A,i,i); 00357 } else { 00358 O2SCL_IX(X,ix) = tmp; 00359 } 00360 ix--; 00361 } 00362 00363 } else { 00364 set_err_ret("Unrecognized operation in dtrsv().",o2scl::gsl_einval); 00365 } 00366 return 0; 00367 } 00368 00369 //@} 00370 00371 /// \name Helper BLAS functions 00372 //@{ 00373 /** 00374 \brief Compute \f$ x = \alpha x \f$ beginning with index \c ie and 00375 ending with index \c N-1 00376 00377 Used in \ref householder_hv(). 00378 */ 00379 template<class vec_t, class vec2_t> 00380 void daxpy_subvec(const int N, const double alpha, const vec_t &X, 00381 vec2_t &Y, const int ie) { 00382 00383 int i; 00384 00385 if (alpha == 0.0) { 00386 return; 00387 } 00388 00389 const int m = (N-ie) % 4; 00390 00391 for (i = ie; i < ie+m; i++) { 00392 O2SCL_IX(Y,i) += alpha * O2SCL_IX(X,i); 00393 } 00394 00395 for (i = ie+m; i + 3 < N; i += 4) { 00396 O2SCL_IX(Y,i) += alpha * O2SCL_IX(X,i); 00397 O2SCL_IX(Y,i+1) += alpha * O2SCL_IX(X,i+1); 00398 O2SCL_IX(Y,i+2) += alpha * O2SCL_IX(X,i+2); 00399 O2SCL_IX(Y,i+3) += alpha * O2SCL_IX(X,i+3); 00400 } 00401 } 00402 00403 /** 00404 \brief Compute \f$ r = x \cdot y \f$ beginning with index \c ie and 00405 ending with index \c N-1 00406 00407 Used in \ref householder_hv(). 00408 */ 00409 template<class vec_t, class vec2_t> 00410 double ddot_subvec(const int N, const vec_t &X, const vec2_t &Y, 00411 const int ie) { 00412 double r = 0.0; 00413 int i; 00414 00415 const int m = (N-ie) % 4; 00416 00417 for (i = ie; i < ie+m; i++) { 00418 r += O2SCL_IX(X,i) * O2SCL_IX(Y,i); 00419 } 00420 00421 for (i = ie+m; i + 3 < N; i += 4) { 00422 r += O2SCL_IX(X,i) * O2SCL_IX(Y,i); 00423 r += O2SCL_IX(X,i+1) * O2SCL_IX(Y,i+1); 00424 r += O2SCL_IX(X,i+2) * O2SCL_IX(Y,i+2); 00425 r += O2SCL_IX(X,i+3) * O2SCL_IX(Y,i+3); 00426 } 00427 00428 return r; 00429 } 00430 00431 /** 00432 \brief Compute \f$ x = \alpha x \f$ beginning with index \c ie and 00433 ending with index \c N-1 00434 00435 Used in \ref householder_transform(). 00436 */ 00437 template<class vec_t> 00438 void dscal_subvec(const int N, const double alpha, vec_t &X, 00439 const int ie) { 00440 int i; 00441 const int m = (N-ie) % 4; 00442 00443 for (i = ie; i < ie+m; i++) { 00444 O2SCL_IX(X,i)*=alpha; 00445 } 00446 00447 for (i = ie+m; i + 3 < N; i += 4) { 00448 O2SCL_IX(X,i)*=alpha; 00449 O2SCL_IX(X,i+1)*=alpha; 00450 O2SCL_IX(X,i+2)*=alpha; 00451 O2SCL_IX(X,i+3)*=alpha; 00452 } 00453 } 00454 00455 /** 00456 \brief Compute the squared norm of the vector \c X beginning with 00457 index \c ie and ending with index \c N-1 00458 00459 Used in \ref householder_transform(). 00460 */ 00461 template<class vec_t> 00462 double dnrm2_subvec(const int N, const vec_t &X, const int ie) { 00463 00464 double scale = 0.0; 00465 double ssq = 1.0; 00466 int i; 00467 00468 if (N <= 0) { 00469 return 0; 00470 } else if (N == 1) { 00471 return fabs(O2SCL_IX(X,ie)); 00472 } 00473 00474 for (i = ie; i < N; i++) { 00475 const double x = O2SCL_IX(X,i); 00476 00477 if (x != 0.0) { 00478 const double ax = fabs(x); 00479 00480 if (scale < ax) { 00481 ssq = 1.0 + ssq * (scale / ax) * (scale / ax); 00482 scale = ax; 00483 } else { 00484 ssq += (ax / scale) * (ax / scale); 00485 } 00486 } 00487 00488 } 00489 00490 return scale * sqrt(ssq); 00491 } 00492 00493 /** 00494 \brief Compute the squared norm of the last \c N rows 00495 of a column of a matrix 00496 00497 Given matrix \c M, this computes the norm of the last \c N rows 00498 of the column with index \c ic, beginning with the element with 00499 index \c ir. If the matrix \c M has \c r rows, and \c c columns, 00500 then the parameter \c N should be \c r-ir. 00501 00502 Used in householder_transform_subcol(). 00503 */ 00504 template<class mat_t> 00505 double dnrm2_subcol(const mat_t &M, const size_t ir, const size_t ic, 00506 const size_t N) { 00507 00508 double scale = 0.0; 00509 double ssq = 1.0; 00510 size_t i; 00511 00512 if (N <= 0) { 00513 return 0; 00514 } else if (N == 1) { 00515 return fabs(O2SCL_IX2(M,ir,ic)); 00516 } 00517 00518 for (i = 0; i < N; i++) { 00519 const double x = O2SCL_IX2(M,ir+i,ic); 00520 00521 if (x != 0.0) { 00522 const double ax = fabs(x); 00523 00524 if (scale < ax) { 00525 ssq = 1.0 + ssq * (scale / ax) * (scale / ax); 00526 scale = ax; 00527 } else { 00528 ssq += (ax / scale) * (ax / scale); 00529 } 00530 } 00531 00532 } 00533 00534 return scale * sqrt(ssq); 00535 } 00536 00537 /** 00538 \brief Compute \f$ x = \alpha x \f$ 00539 00540 Used in householder_transform_subcol(). 00541 00542 \future Implement explicit loop unrolling 00543 */ 00544 template<class mat_t> 00545 void dscal_subcol(mat_t &A, const size_t ir, const size_t ic, 00546 const size_t n, const double alpha) { 00547 for (size_t i = 0; i < n; i++) { 00548 O2SCL_IX2(A,ir+i,ic)*=alpha; 00549 } 00550 } 00551 00552 /** 00553 \brief Compute \f$ x = \alpha x \f$ for \ref householder_hv_sub() 00554 00555 Used in householder_hv_sub(). 00556 00557 \future Implement explicit loop unrolling 00558 */ 00559 template<class mat_t, class vec_t> 00560 void daxpy_hv_sub(const int N, const double alpha, const mat_t &X, 00561 vec_t &Y, const int ie) { 00562 00563 if (alpha == 0.0) { 00564 return; 00565 } 00566 00567 for(int i=ie+1;i<N;i++) { 00568 O2SCL_IX(Y,i)+=alpha*O2SCL_IX2(X,i,ie); 00569 } 00570 } 00571 00572 /** 00573 \brief Compute \f$ r = x \cdot y \f$ for \ref householder_hv_sub() 00574 00575 Used in householder_hv_sub(). 00576 00577 \future Implement explicit loop unrolling 00578 */ 00579 template<class mat_t, class vec_t> 00580 double ddot_hv_sub(const int N, const mat_t &X, const vec_t &Y, 00581 const int ie) { 00582 double r = 0.0; 00583 int i; 00584 00585 for(i=ie+1;i<N;i++) { 00586 r+=O2SCL_IX2(X,i,ie)*O2SCL_IX(Y,i); 00587 } 00588 return r; 00589 } 00590 00591 //@} 00592 00593 /// Compute \f$ y = \alpha \mathrm{op}(A) x + \beta y \f$. 00594 template<class mat_t, class vec_t> 00595 int dgemm(const enum O2CBLAS_ORDER Order, 00596 const enum O2CBLAS_TRANSPOSE TransA, 00597 const enum O2CBLAS_TRANSPOSE TransB, const int M, const int N, 00598 const int K, const double alpha, const mat_t &A, 00599 const mat_t &B, const double beta, mat_t &C) { 00600 00601 int i, j, k; 00602 int n1, n2; 00603 int ldf, ldg; 00604 int TransF, TransG; 00605 00606 if (alpha == 0.0 && beta == 1.0) { 00607 return 0; 00608 } 00609 00610 /* 00611 This is a little more complicated than the original, which 00612 assigned the matrices A and B to variables *F and *G which then 00613 allowed some code duplication. We can't really do that here, 00614 since we don't have that kind of type info on A and B, so we 00615 just handle the two cases separately. 00616 */ 00617 00618 if (Order == O2cblasRowMajor) { 00619 00620 n1 = M; 00621 n2 = N; 00622 00623 /* form y := beta*y */ 00624 if (beta == 0.0) { 00625 for (i = 0; i < n1; i++) { 00626 for (j = 0; j < n2; j++) { 00627 O2SCL_IX2(C,i,j)=0.0; 00628 } 00629 } 00630 } else if (beta != 1.0) { 00631 for (i = 0; i < n1; i++) { 00632 for (j = 0; j < n2; j++) { 00633 O2SCL_IX2(C,i,j)*=beta; 00634 } 00635 } 00636 } 00637 00638 if (alpha == 0.0) { 00639 return 0; 00640 } 00641 00642 TransF = (TransA == O2cblasConjTrans) ? O2cblasTrans : TransA; 00643 TransG = (TransB == O2cblasConjTrans) ? O2cblasTrans : TransB; 00644 00645 if (TransF == O2cblasNoTrans && TransG == O2cblasNoTrans) { 00646 00647 /* form C := alpha*A*B + C */ 00648 00649 for (k = 0; k < K; k++) { 00650 for (i = 0; i < n1; i++) { 00651 const double temp = alpha * O2SCL_IX2(A,i,k); 00652 if (temp != 0.0) { 00653 for (j = 0; j < n2; j++) { 00654 O2SCL_IX2(C,i,j) += temp * O2SCL_IX2(B,k,j); 00655 } 00656 } 00657 } 00658 } 00659 00660 } else if (TransF == O2cblasNoTrans && TransG == O2cblasTrans) { 00661 00662 /* form C := alpha*A*B' + C */ 00663 00664 for (i = 0; i < n1; i++) { 00665 for (j = 0; j < n2; j++) { 00666 double temp = 0.0; 00667 for (k = 0; k < K; k++) { 00668 temp += O2SCL_IX2(A,i,k) * O2SCL_IX2(B,j,k); 00669 } 00670 O2SCL_IX2(C,i,j) += alpha * temp; 00671 } 00672 } 00673 00674 } else if (TransF == O2cblasTrans && TransG == O2cblasNoTrans) { 00675 00676 for (k = 0; k < K; k++) { 00677 for (i = 0; i < n1; i++) { 00678 const double temp = alpha * O2SCL_IX2(A,k,i); 00679 if (temp != 0.0) { 00680 for (j = 0; j < n2; j++) { 00681 O2SCL_IX2(C,i,j) += temp * O2SCL_IX2(B,k,j); 00682 } 00683 } 00684 } 00685 } 00686 00687 } else if (TransF == O2cblasTrans && TransG == O2cblasTrans) { 00688 00689 for (i = 0; i < n1; i++) { 00690 for (j = 0; j < n2; j++) { 00691 double temp = 0.0; 00692 for (k = 0; k < K; k++) { 00693 temp += O2SCL_IX2(A,k,i) + O2SCL_IX2(B,j,k); 00694 } 00695 O2SCL_IX2(C,i,j) += alpha * temp; 00696 } 00697 } 00698 00699 } else { 00700 set_err_ret("Unrecognized operation in dgemm().",o2scl::gsl_einval); 00701 } 00702 00703 } else { 00704 00705 n1 = N; 00706 n2 = M; 00707 00708 /* form y := beta*y */ 00709 if (beta == 0.0) { 00710 for (i = 0; i < n1; i++) { 00711 for (j = 0; j < n2; j++) { 00712 O2SCL_IX2(C,i,j)=0.0; 00713 } 00714 } 00715 } else if (beta != 1.0) { 00716 for (i = 0; i < n1; i++) { 00717 for (j = 0; j < n2; j++) { 00718 O2SCL_IX2(C,i,j)*=beta; 00719 } 00720 } 00721 } 00722 00723 if (alpha == 0.0) { 00724 return 0; 00725 } 00726 00727 TransF = (TransB == O2cblasConjTrans) ? O2cblasTrans : TransB; 00728 TransG = (TransA == O2cblasConjTrans) ? O2cblasTrans : TransA; 00729 00730 if (TransF == O2cblasNoTrans && TransG == O2cblasNoTrans) { 00731 00732 /* form C := alpha*A*B + C */ 00733 00734 for (k = 0; k < K; k++) { 00735 for (i = 0; i < n1; i++) { 00736 const double temp = alpha * O2SCL_IX2(B,i,k); 00737 if (temp != 0.0) { 00738 for (j = 0; j < n2; j++) { 00739 O2SCL_IX2(C,i,j) += temp * O2SCL_IX2(A,k,j); 00740 } 00741 } 00742 } 00743 } 00744 00745 } else if (TransF == O2cblasNoTrans && TransG == O2cblasTrans) { 00746 00747 /* form C := alpha*A*B' + C */ 00748 00749 for (i = 0; i < n1; i++) { 00750 for (j = 0; j < n2; j++) { 00751 double temp = 0.0; 00752 for (k = 0; k < K; k++) { 00753 temp += O2SCL_IX2(B,i,k) * O2SCL_IX2(A,j,k); 00754 } 00755 O2SCL_IX2(C,i,j) += alpha * temp; 00756 } 00757 } 00758 00759 } else if (TransF == O2cblasTrans && TransG == O2cblasNoTrans) { 00760 00761 for (k = 0; k < K; k++) { 00762 for (i = 0; i < n1; i++) { 00763 const double temp = alpha * O2SCL_IX2(B,k,i); 00764 if (temp != 0.0) { 00765 for (j = 0; j < n2; j++) { 00766 O2SCL_IX2(C,i,j) += temp * O2SCL_IX2(A,k,j); 00767 } 00768 } 00769 } 00770 } 00771 00772 } else if (TransF == O2cblasTrans && TransG == O2cblasTrans) { 00773 00774 for (i = 0; i < n1; i++) { 00775 for (j = 0; j < n2; j++) { 00776 double temp = 0.0; 00777 for (k = 0; k < K; k++) { 00778 temp += O2SCL_IX2(B,k,i) * O2SCL_IX2(A,j,k); 00779 } 00780 O2SCL_IX2(C,i,j) += alpha * temp; 00781 } 00782 } 00783 00784 } else { 00785 set_err_ret("Unrecognized operation in dgemm().",o2scl::gsl_einval); 00786 } 00787 } 00788 00789 return 0; 00790 } 00791 00792 #ifdef DOXYGENP 00793 } 00794 #endif 00795
Documentation generated with Doxygen and provided under the GNU Free Documentation License. See License Information for details.
Project hosting provided by
,
O2scl Sourceforge Project Page