Actual source code: dsgsvd.c

slepc-3.18.2 2023-01-26
Report Typos and Errors
  1: /*
  2:    - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
  3:    SLEPc - Scalable Library for Eigenvalue Problem Computations
  4:    Copyright (c) 2002-, Universitat Politecnica de Valencia, Spain

  6:    This file is part of SLEPc.
  7:    SLEPc is distributed under a 2-clause BSD license (see LICENSE).
  8:    - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
  9: */
 10: #include <slepc/private/dsimpl.h>
 11: #include <slepcblaslapack.h>

 13: typedef struct {
 14:   PetscInt m;              /* number of columns */
 15:   PetscInt p;              /* number of rows of B */
 16:   PetscInt tm;             /* number of rows of X after truncating */
 17:   PetscInt tp;             /* number of rows of V after truncating */
 18: } DS_GSVD;

 20: PetscErrorCode DSAllocate_GSVD(DS ds,PetscInt ld)
 21: {
 22:   DSAllocateMat_Private(ds,DS_MAT_A);
 23:   DSAllocateMat_Private(ds,DS_MAT_B);
 24:   DSAllocateMat_Private(ds,DS_MAT_X);
 25:   DSAllocateMat_Private(ds,DS_MAT_U);
 26:   DSAllocateMat_Private(ds,DS_MAT_V);
 27:   DSAllocateMat_Private(ds,DS_MAT_T);
 28:   DSAllocateMat_Private(ds,DS_MAT_D);
 29:   PetscFree(ds->perm);
 30:   PetscMalloc1(ld,&ds->perm);
 31:   return 0;
 32: }

 34: /*
 35:   In compact form, A is either in form (a) or (b):

 37:                      (a)                                            (b)
 38:     lower bidiagonal with upper arrow (n=m+1)         square upper bidiagonal with upper arrow (n=m)
 39:      0       l           k                 m-1
 40:     -----------------------------------------         0     l           k                   m-1
 41:     |*                   .                  |        -----------------------------------------
 42:     |  *                 .                  |        |*                 .                    |
 43:     |    *               .                  |        |  *               .                    |
 44:     |      *             .                  |        |    *             .                    |
 45:   l |. . . . o           o                  |      l |. . . o           o                    |
 46:     |          o         o                  |        |        o         o                    |
 47:     |            o       o                  |        |          o       o                    |
 48:     |              o     o                  |        |            o     o                    |
 49:     |                o   o                  |        |              o   o                    |
 50:     |                  o o                  |        |                o o                    |
 51:   k |. . . . . . . . . . o                  |      k |. . . . . . . . . o x                  |
 52:     |                    x x                |        |                    x x                |
 53:     |                      x x              |        |                      x x              |
 54:     |                        x x            |        |                        x x            |
 55:     |                          x x          |        |                          x x          |
 56:     |                            x x        |        |                            x x        |
 57:     |                              x x      |        |                              x x      |
 58:     |                                x x    |        |                                x x    |
 59:     |                                  x x  |        |                                  x x  |
 60:     |                                    x x|        |                                    x x|
 61: n-1 |                                      x|    n-1 |                                      x|
 62:     -----------------------------------------        -----------------------------------------

 64:   and B is square bidiagonal with upper arrow (p=m)

 66:      0       l           k                 m-1
 67:     -----------------------------------------
 68:     |*                   .                  |
 69:     |  *                 .                  |
 70:     |    *               .                  |
 71:     |      *             .                  |
 72:   l |. . . . o           o                  |
 73:     |          o         o                  |
 74:     |            o       o                  |
 75:     |              o     o                  |
 76:     |                o   o                  |
 77:     |                  o o                  |
 78:   k |. . . . . . . . . . o x                |
 79:     |                      x x              |
 80:     |                        x x            |
 81:     |                          x x          |
 82:     |                            x x        |
 83:     |                              x x      |
 84:     |                                x x    |
 85:     |                                  x x  |
 86:     |                                    x x|
 87: p-1 |                                      x|
 88:      ----------------------------------------
 89: */
 90: PetscErrorCode DSView_GSVD(DS ds,PetscViewer viewer)
 91: {
 92:   DS_GSVD           *ctx = (DS_GSVD*)ds->data;
 93:   PetscViewerFormat format;
 94:   PetscInt          i,j,r,k=ds->k,n=ds->n,m=ctx->m,p=ctx->p,rowsa,rowsb,colsa,colsb;
 95:   PetscReal         *T,*S,value;

 97:   PetscViewerGetFormat(viewer,&format);
 98:   if (format == PETSC_VIEWER_ASCII_INFO) return 0;
 99:   if (format == PETSC_VIEWER_ASCII_INFO_DETAIL) {
100:     PetscViewerASCIIPrintf(viewer,"number of columns: %" PetscInt_FMT "\n",m);
101:     PetscViewerASCIIPrintf(viewer,"number of rows of B: %" PetscInt_FMT "\n",p);
102:     return 0;
103:   }
105:   if (ds->compact) {
106:     DSGetArrayReal(ds,DS_MAT_T,&T);
107:     DSGetArrayReal(ds,DS_MAT_D,&S);
108:     PetscViewerASCIIUseTabs(viewer,PETSC_FALSE);
109:     rowsa = n;
110:     colsa = ds->extrarow? m+1: m;
111:     rowsb = p;
112:     colsb = ds->extrarow? m+1: m;
113:     if (format == PETSC_VIEWER_ASCII_MATLAB) {
114:       PetscViewerASCIIPrintf(viewer,"%% Size = %" PetscInt_FMT " %" PetscInt_FMT "\n",rowsa,colsa);
115:       PetscViewerASCIIPrintf(viewer,"zzz = zeros(%" PetscInt_FMT ",3);\n",2*ds->n);
116:       PetscViewerASCIIPrintf(viewer,"zzz = [\n");
117:       for (i=0;i<PetscMin(rowsa,colsa);i++) PetscViewerASCIIPrintf(viewer,"%" PetscInt_FMT " %" PetscInt_FMT "  %18.16e\n",i+1,i+1,(double)T[i]);
118:       for (i=0;i<k;i++) PetscViewerASCIIPrintf(viewer,"%" PetscInt_FMT " %" PetscInt_FMT "  %18.16e\n",i+1,k+1,(double)T[i+ds->ld]);
119:       if (n>m) { /* A lower bidiagonal */
120:         for (i=k;i<rowsa-1;i++) PetscViewerASCIIPrintf(viewer,"%" PetscInt_FMT " %" PetscInt_FMT "  %18.16e\n",i+2,i+1,(double)T[i+ds->ld]);
121:       } else { /* A (square) upper bidiagonal */
122:         for (i=k;i<colsa-1;i++) PetscViewerASCIIPrintf(viewer,"%" PetscInt_FMT " %" PetscInt_FMT "  %18.16e\n",i+1,i+2,(double)T[i+ds->ld]);
123:       }
124:       PetscViewerASCIIPrintf(viewer,"];\n%s = spconvert(zzz);\n",DSMatName[DS_MAT_T]);
125:       PetscViewerASCIIPrintf(viewer,"%% Size = %" PetscInt_FMT " %" PetscInt_FMT "\n",rowsb,colsb);
126:       PetscViewerASCIIPrintf(viewer,"zzz = zeros(%" PetscInt_FMT ",3);\n",2*ds->n);
127:       PetscViewerASCIIPrintf(viewer,"zzz = [\n");
128:       for (i=0;i<rowsb;i++) PetscViewerASCIIPrintf(viewer,"%" PetscInt_FMT " %" PetscInt_FMT "  %18.16e\n",i+1,i+1,(double)S[i]);
129:       for (i=0;i<colsb-1;i++) {
130:         r = PetscMax(i+2,ds->k+1);
131:         PetscViewerASCIIPrintf(viewer,"%" PetscInt_FMT " %" PetscInt_FMT "  %18.16e\n",i+1,r,(double)T[i+2*ds->ld]);
132:       }
133:       PetscViewerASCIIPrintf(viewer,"];\n%s = spconvert(zzz);\n",DSMatName[DS_MAT_D]);
134:     } else {
135:       PetscViewerASCIIPrintf(viewer,"Matrix %s =\n",DSMatName[DS_MAT_T]);
136:       for (i=0;i<rowsa;i++) {
137:         for (j=0;j<colsa;j++) {
138:           if (i==j) value = T[i];
139:           else if (i<ds->k && j==ds->k) value = T[i+ds->ld];
140:           else if (n>m && i==j+1 && i>ds->k) value = T[j+ds->ld];
141:           else if (n<=m && i+1==j && i>=ds->k) value = T[i+ds->ld];
142:           else value = 0.0;
143:           PetscViewerASCIIPrintf(viewer," %18.16e ",(double)value);
144:         }
145:         PetscViewerASCIIPrintf(viewer,"\n");
146:       }
147:       PetscViewerASCIIPrintf(viewer,"Matrix %s =\n",DSMatName[DS_MAT_D]);
148:       for (i=0;i<rowsb;i++) {
149:         for (j=0;j<colsb;j++) {
150:           if (i==j) value = S[i];
151:           else if (i<ds->k && j==ds->k) value = T[PetscMin(i,j)+2*ds->ld];
152:           else if (i+1==j && i>=ds->k) value = T[i+2*ds->ld];
153:           else value = 0.0;
154:           PetscViewerASCIIPrintf(viewer," %18.16e ",(double)value);
155:         }
156:         PetscViewerASCIIPrintf(viewer,"\n");
157:       }
158:     }
159:     PetscViewerASCIIUseTabs(viewer,PETSC_TRUE);
160:     PetscViewerFlush(viewer);
161:     DSRestoreArrayReal(ds,DS_MAT_T,&T);
162:     DSRestoreArrayReal(ds,DS_MAT_D,&S);
163:   } else {
164:     DSViewMat(ds,viewer,DS_MAT_A);
165:     DSViewMat(ds,viewer,DS_MAT_B);
166:   }
167:   if (ds->state>DS_STATE_INTERMEDIATE) {
168:     DSViewMat(ds,viewer,DS_MAT_X);
169:     DSViewMat(ds,viewer,DS_MAT_U);
170:     DSViewMat(ds,viewer,DS_MAT_V);
171:   }
172:   return 0;
173: }

175: PetscErrorCode DSVectors_GSVD(DS ds,DSMatType mat,PetscInt *j,PetscReal *rnorm)
176: {
177:   switch (mat) {
178:     case DS_MAT_U:
179:     case DS_MAT_V:
180:       if (rnorm) *rnorm = 0.0;
181:       break;
182:     case DS_MAT_X:
183:       break;
184:     default:
185:       SETERRQ(PetscObjectComm((PetscObject)ds),PETSC_ERR_ARG_OUTOFRANGE,"Invalid mat parameter");
186:   }
187:   return 0;
188: }

190: PetscErrorCode DSSort_GSVD(DS ds,PetscScalar *wr,PetscScalar *wi,PetscScalar *rr,PetscScalar *ri,PetscInt *k)
191: {
192:   DS_GSVD        *ctx = (DS_GSVD*)ds->data;
193:   PetscInt       t,l,ld=ds->ld,i,*perm,*perm2;
194:   PetscReal      *T=NULL,*D=NULL,*eig;
195:   PetscScalar    *A=NULL,*B=NULL;
196:   PetscBool      compact=ds->compact;

198:   if (!ds->sc) return 0;
200:   l = ds->l;
201:   t = ds->t;
202:   perm = ds->perm;
203:   PetscMalloc2(t,&eig,t,&perm2);
204:   if (compact) {
205:     DSGetArrayReal(ds,DS_MAT_T,&T);
206:     DSGetArrayReal(ds,DS_MAT_D,&D);
207:     for (i=0;i<t;i++) eig[i] = (D[i]==0)?PETSC_INFINITY:T[i]/D[i];
208:   } else {
209:     MatDenseGetArray(ds->omat[DS_MAT_A],&A);
210:     MatDenseGetArray(ds->omat[DS_MAT_B],&B);
211:     for (i=0;i<t;i++) eig[i] = (B[i+i*ld]==0)?PETSC_INFINITY:PetscRealPart(A[i+i*ld])/PetscRealPart(B[i*(1+ld)]);
212:   }
213:   DSSortEigenvaluesReal_Private(ds,eig,perm);
214:   PetscArraycpy(perm2,perm,t);
215:   for (i=l;i<t;i++) wr[i] = eig[perm[i]];
216:   if (compact) {
217:     PetscArraycpy(eig,T,t);
218:     for (i=l;i<t;i++) T[i] = eig[perm[i]];
219:     PetscArraycpy(eig,D,t);
220:     for (i=l;i<t;i++) D[i] = eig[perm[i]];
221:     DSRestoreArrayReal(ds,DS_MAT_T,&T);
222:     DSRestoreArrayReal(ds,DS_MAT_D,&D);
223:   } else {
224:     for (i=l;i<t;i++) eig[i] = PetscRealPart(A[i*(1+ld)]);
225:     for (i=l;i<t;i++) A[i*(1+ld)] = eig[perm[i]];
226:     for (i=l;i<t;i++) eig[i] = PetscRealPart(B[i*(1+ld)]);
227:     for (i=l;i<t;i++) B[i*(1+ld)] = eig[perm[i]];
228:     MatDenseRestoreArray(ds->omat[DS_MAT_A],&A);
229:     MatDenseRestoreArray(ds->omat[DS_MAT_B],&B);
230:   }
231:   DSPermuteColumns_Private(ds,l,t,ds->n,DS_MAT_U,perm2);
232:   PetscArraycpy(perm2,perm,t);
233:   DSPermuteColumns_Private(ds,l,t,ctx->m,DS_MAT_X,perm2);
234:   DSPermuteColumns_Private(ds,l,t,ctx->p,DS_MAT_V,perm);
235:   PetscFree2(eig,perm2);
236:   return 0;
237: }

239: PetscErrorCode DSUpdateExtraRow_GSVD(DS ds)
240: {
241:   DS_GSVD           *ctx = (DS_GSVD*)ds->data;
242:   PetscInt          i;
243:   PetscBLASInt      n=0,m=0,ld=0;
244:   const PetscScalar *U,*V;
245:   PetscReal         *T,*e,*f,alpha,beta,betah;

249:   PetscBLASIntCast(ds->n,&n);
250:   PetscBLASIntCast(ctx->m,&m);
251:   PetscBLASIntCast(ds->ld,&ld);
252:   DSGetArrayReal(ds,DS_MAT_T,&T);
253:   e = T+ld;
254:   f = T+2*ld;
255:   MatDenseGetArrayRead(ds->omat[DS_MAT_U],&U);
256:   MatDenseGetArrayRead(ds->omat[DS_MAT_V],&V);
257:   if (n<=m) {   /* upper variant, A is square upper bidiagonal */
258:     beta  = e[m-1];   /* in compact, we assume all entries are zero except the last one */
259:     betah = f[m-1];
260:     for (i=0;i<m;i++) {
261:       e[i] = PetscRealPart(beta*U[m-1+i*ld]);
262:       f[i] = PetscRealPart(betah*V[m-1+i*ld]);
263:     }
264:   } else {   /* lower variant, A is (m+1)xm lower bidiagonal */
265:     alpha = T[m];
266:     betah = f[m-1];
267:     for (i=0;i<m;i++) {
268:       e[i] = PetscRealPart(alpha*U[m+i*ld]);
269:       f[i] = PetscRealPart(betah*V[m-1+i*ld]);
270:     }
271:     T[m] = PetscRealPart(alpha*U[m+m*ld]);
272:   }
273:   ds->k = m;
274:   MatDenseRestoreArrayRead(ds->omat[DS_MAT_U],&U);
275:   MatDenseRestoreArrayRead(ds->omat[DS_MAT_V],&V);
276:   DSRestoreArrayReal(ds,DS_MAT_T,&T);
277:   return 0;
278: }

280: PetscErrorCode DSTruncate_GSVD(DS ds,PetscInt n,PetscBool trim)
281: {
282:   DS_GSVD     *ctx = (DS_GSVD*)ds->data;
283:   PetscScalar *U;
284:   PetscReal   *T;
285:   PetscInt    i,m=ctx->m,ld=ds->ld;
286:   PetscBool   lower=(ds->n>ctx->m)?PETSC_TRUE:PETSC_FALSE;

289:   if (trim) {
290:     ds->l   = 0;
291:     ds->k   = 0;
292:     ds->n   = lower? n+1: n;
293:     ctx->m  = n;
294:     ctx->p  = n;
295:     ds->t   = ds->n;   /* truncated length equal to the new dimension */
296:     ctx->tm = ctx->m;  /* must also keep the previous dimension of X */
297:     ctx->tp = ctx->p;  /* must also keep the previous dimension of V */
298:   } else {
299:     if (lower) {
300:       /* move value of diagonal element of arrow (alpha) */
301:       DSGetArrayReal(ds,DS_MAT_T,&T);
302:       T[n] = T[m];
303:       DSRestoreArrayReal(ds,DS_MAT_T,&T);
304:       /* copy last column of U so that it updates the next initial vector of U1 */
305:       MatDenseGetArray(ds->omat[DS_MAT_U],&U);
306:       for (i=0;i<=m;i++) U[i+n*ld] = U[i+m*ld];
307:       MatDenseRestoreArray(ds->omat[DS_MAT_U],&U);
308:     }
309:     ds->k   = (ds->extrarow)? n: 0;
310:     ds->t   = ds->n;   /* truncated length equal to previous dimension */
311:     ctx->tm = ctx->m;  /* must also keep the previous dimension of X */
312:     ctx->tp = ctx->p;  /* must also keep the previous dimension of V */
313:     ds->n   = lower? n+1: n;
314:     ctx->m  = n;
315:     ctx->p  = n;
316:   }
317:   return 0;
318: }

320: static PetscErrorCode DSSwitchFormat_GSVD(DS ds)
321: {
322:   DS_GSVD        *ctx = (DS_GSVD*)ds->data;
323:   PetscReal      *T,*D;
324:   PetscScalar    *A,*B;
325:   PetscInt       i,n=ds->n,k=ds->k,ld=ds->ld,m=ctx->m;

328:   /* switch from compact (arrow) to dense storage */
329:   /* bidiagonal associated to B is stored in D and T+2*ld */
330:   MatDenseGetArrayWrite(ds->omat[DS_MAT_A],&A);
331:   MatDenseGetArrayWrite(ds->omat[DS_MAT_B],&B);
332:   DSGetArrayReal(ds,DS_MAT_T,&T);
333:   DSGetArrayReal(ds,DS_MAT_D,&D);
334:   PetscArrayzero(A,ld*ld);
335:   PetscArrayzero(B,ld*ld);
336:   for (i=0;i<k;i++) {
337:     A[i+i*ld] = T[i];
338:     A[i+k*ld] = T[i+ld];
339:     B[i+i*ld] = D[i];
340:     B[i+k*ld] = T[i+2*ld];
341:   }
342:   /* B is upper bidiagonal */
343:   B[k+k*ld] = D[k];
344:   for (i=k+1;i<m;i++) {
345:     B[i+i*ld]   = D[i];
346:     B[i-1+i*ld] = T[i-1+2*ld];
347:   }
348:   /* A can be upper (square) or lower bidiagonal */
349:   for (i=k;i<m;i++) A[i+i*ld] = T[i];
350:   if (n>m) for (i=k;i<m;i++) A[i+1+i*ld] = T[i+ld];
351:   else for (i=k+1;i<m;i++) A[i-1+i*ld] = T[i-1+ld];
352:   MatDenseRestoreArrayWrite(ds->omat[DS_MAT_A],&A);
353:   MatDenseRestoreArrayWrite(ds->omat[DS_MAT_B],&B);
354:   DSRestoreArrayReal(ds,DS_MAT_T,&T);
355:   DSRestoreArrayReal(ds,DS_MAT_D,&D);
356:   return 0;
357: }

359: /*
360:   Compact format is used when [A;B] has orthonormal columns.
361:   In this case R=I and the GSVD of (A,B) is the CS decomposition
362: */
363: PetscErrorCode DSSolve_GSVD(DS ds,PetscScalar *wr,PetscScalar *wi)
364: {
365:   DS_GSVD        *ctx = (DS_GSVD*)ds->data;
366:   PetscInt       i,j;
367:   PetscBLASInt   n1,m1,info,lc = 0,n = 0,m = 0,p = 0,p1,l,k,q,ld,off,lwork,r;
368:   PetscScalar    *A,*B,*X,*U,*V,sone=1.0,smone=-1.0;
369:   PetscReal      *alpha,*beta,*T,*D;
370: #if !defined(SLEPC_MISSING_LAPACK_GGSVD3)
371:   PetscScalar    a,dummy;
372:   PetscReal      rdummy;
373:   PetscBLASInt   idummy;
374: #endif

377:   PetscBLASIntCast(ds->n,&m);
378:   PetscBLASIntCast(ctx->m,&n);
379:   PetscBLASIntCast(ctx->p,&p);
380:   PetscBLASIntCast(ds->l,&lc);
382:   /* In compact storage B is always nxn and A can be either nxn or (n+1)xn */
384:   PetscBLASIntCast(ds->ld,&ld);
385:   n1 = n-lc;     /* n1 = size of leading block, excl. locked + size of trailing block */
386:   m1 = m-lc;
387:   p1 = p-lc;
388:   off = lc+lc*ld;
389:   MatDenseGetArray(ds->omat[DS_MAT_A],&A);
390:   MatDenseGetArray(ds->omat[DS_MAT_B],&B);
391:   MatDenseGetArray(ds->omat[DS_MAT_X],&X);
392:   MatDenseGetArray(ds->omat[DS_MAT_U],&U);
393:   MatDenseGetArray(ds->omat[DS_MAT_V],&V);
394:   PetscArrayzero(X,ld*ld);
395:   for (i=0;i<lc;i++) X[i+i*ld] = 1.0;
396:   PetscArrayzero(U,ld*ld);
397:   for (i=0;i<lc;i++) U[i+i*ld] = 1.0;
398:   PetscArrayzero(V,ld*ld);
399:   for (i=0;i<lc;i++) V[i+i*ld] = 1.0;
400:   if (ds->compact) DSSwitchFormat_GSVD(ds);

402: #if !defined(SLEPC_MISSING_LAPACK_GGSVD3)
403:   PetscFPTrapPush(PETSC_FP_TRAP_OFF);
404:   /* workspace query and memory allocation */
405:   lwork = -1;
406: #if !defined (PETSC_USE_COMPLEX)
407:   PetscCallBLAS("LAPACKggsvd3",LAPACKggsvd3_("U","V","Q",&m1,&n1,&p1,&k,&l,&dummy,&ld,&dummy,&ld,&rdummy,&rdummy,&dummy,&ld,&dummy,&ld,&dummy,&ld,&a,&lwork,&idummy,&info));
408:   PetscBLASIntCast((PetscInt)a,&lwork);
409: #else
410:   PetscCallBLAS("LAPACKggsvd3",LAPACKggsvd3_("U","V","Q",&m1,&n1,&p1,&k,&l,&dummy,&ld,&dummy,&ld,&rdummy,&rdummy,&dummy,&ld,&dummy,&ld,&dummy,&ld,&a,&lwork,&rdummy,&idummy,&info));
411:   PetscBLASIntCast((PetscInt)PetscRealPart(a),&lwork);
412: #endif

414: #if !defined (PETSC_USE_COMPLEX)
415:   DSAllocateWork_Private(ds,lwork,2*ds->ld,ds->ld);
416:   alpha = ds->rwork;
417:   beta  = ds->rwork+ds->ld;
418:   PetscCallBLAS("LAPACKggsvd3",LAPACKggsvd3_("U","V","Q",&m1,&n1,&p1,&k,&l,A+off,&ld,B+off,&ld,alpha,beta,U+off,&ld,V+off,&ld,X+off,&ld,ds->work,&lwork,ds->iwork,&info));
419: #else
420:   DSAllocateWork_Private(ds,lwork,4*ds->ld,ds->ld);
421:   alpha = ds->rwork+2*ds->ld;
422:   beta  = ds->rwork+3*ds->ld;
423:   PetscCallBLAS("LAPACKggsvd3",LAPACKggsvd3_("U","V","Q",&m1,&n1,&p1,&k,&l,A+off,&ld,B+off,&ld,alpha,beta,U+off,&ld,V+off,&ld,X+off,&ld,ds->work,&lwork,ds->rwork,ds->iwork,&info));
424: #endif
425:   PetscFPTrapPop();
426:   SlepcCheckLapackInfo("ggsvd3",info);

428: #else  /* defined(SLEPC_MISSING_LAPACK_GGSVD3) */

430:   lwork = PetscMax(PetscMax(3*n,m),p)+n;
431:   PetscFPTrapPush(PETSC_FP_TRAP_OFF);
432: #if !defined (PETSC_USE_COMPLEX)
433:   DSAllocateWork_Private(ds,lwork,2*ds->ld,ds->ld);
434:   alpha = ds->rwork;
435:   beta  = ds->rwork+ds->ld;
436:   PetscCallBLAS("LAPACKggsvd",LAPACKggsvd_("U","V","Q",&m1,&n1,&p1,&k,&l,A+off,&ld,B+off,&ld,alpha,beta,U+off,&ld,V+off,&ld,X+off,&ld,ds->work,ds->iwork,&info));
437: #else
438:   DSAllocateWork_Private(ds,lwork,4*ds->ld,ds->ld);
439:   alpha = ds->rwork+2*ds->ld;
440:   beta  = ds->rwork+3*ds->ld;
441:   PetscCallBLAS("LAPACKggsvd",LAPACKggsvd_("U","V","Q",&m1,&n1,&p1,&k,&l,A+off,&ld,B+off,&ld,alpha,beta,U+off,&ld,V+off,&ld,X+off,&ld,ds->work,ds->rwork,ds->iwork,&info));
442: #endif
443:   PetscFPTrapPop();
444:   SlepcCheckLapackInfo("ggsvd",info);

446: #endif

449:   if (ds->compact) {
450:     DSGetArrayReal(ds,DS_MAT_T,&T);
451:     DSGetArrayReal(ds,DS_MAT_D,&D);
452:     /* R is the identity matrix (except the sign) */
453:     for (i=lc;i<n;i++) {
454:       if (PetscRealPart(A[i+i*ld])<0.0) { /* scale column i */
455:         for (j=lc;j<n;j++) X[j+i*ld] = -X[j+i*ld];
456:       }
457:     }
458:     PetscArrayzero(T+ld,m-1);
459:     PetscArrayzero(T+2*ld,n-1);
460:     for (i=lc;i<n;i++) {
461:       T[i] = alpha[i-lc];
462:       D[i] = beta[i-lc];
463:       if (D[i]==0.0) wr[i] = PETSC_INFINITY;
464:       else wr[i] = T[i]/D[i];
465:     }
466:     ds->t = n;
467:     DSRestoreArrayReal(ds,DS_MAT_D,&D);
468:     DSRestoreArrayReal(ds,DS_MAT_T,&T);
469:   } else {
470:     /* X = X*inv(R) */
471:     q = PetscMin(m,n);
472:     PetscCallBLAS("BLAStrsm",BLAStrsm_("R","U","N","N",&n,&q,&sone,A,&ld,X,&ld));
473:     if (m<n) {
474:       r = n-m;
475:       PetscCallBLAS("BLASgemm",BLASgemm_("N","N",&n,&r,&m,&sone,X,&ld,A,&ld,&smone,X+m*ld,&ld));
476:       PetscCallBLAS("BLAStrsm",BLAStrsm_("R","U","N","N",&n,&r,&sone,B+m*ld,&ld,X+m*ld,&ld));
477:     }
478:     if (k>0) {
479:       for (i=k;i<PetscMin(m,k+l);i++) {
480:         PetscArraycpy(X+(i-k)*ld,X+i*ld,ld);
481:         PetscArraycpy(U+(i-k)*ld,U+i*ld,ld);
482:       }
483:     }
484:     /* singular values */
485:     PetscArrayzero(A,ld*ld);
486:     PetscArrayzero(B,ld*ld);
487:     for (j=k;j<PetscMin(m,k+l);j++) {
488:       A[(j-k)*(1+ld)] = alpha[j];
489:       B[(j-k)*(1+ld)] = beta[j];
490:       wr[j-k] = alpha[j]/beta[j];
491:     }
492:     ds->t = PetscMin(m,k+l)-k; /* set number of computed values */
493:   }
494:   MatDenseRestoreArray(ds->omat[DS_MAT_A],&A);
495:   MatDenseRestoreArray(ds->omat[DS_MAT_B],&B);
496:   MatDenseRestoreArray(ds->omat[DS_MAT_X],&X);
497:   MatDenseRestoreArray(ds->omat[DS_MAT_U],&U);
498:   MatDenseRestoreArray(ds->omat[DS_MAT_V],&V);
499:   return 0;
500: }

502: PetscErrorCode DSCond_GSVD(DS ds,PetscReal *cond)
503: {
504:   DS_GSVD           *ctx = (DS_GSVD*)ds->data;
505:   PetscBLASInt      lwork,lrwork=0,info,m,n,p,ld;
506:   PetscScalar       *A,*work;
507:   const PetscScalar *M;
508:   PetscReal         *sigma,conda,condb;
509: #if defined(PETSC_USE_COMPLEX)
510:   PetscReal         *rwork;
511: #endif

513:   PetscBLASIntCast(ds->n,&m);
514:   PetscBLASIntCast(ctx->m,&n);
515:   PetscBLASIntCast(ctx->p,&p);
516:   PetscBLASIntCast(ds->ld,&ld);
517:   lwork = 5*n;
518: #if defined(PETSC_USE_COMPLEX)
519:   lrwork = 5*n;
520: #endif
521:   DSAllocateWork_Private(ds,ld*n+lwork,n+lrwork,0);
522:   A     = ds->work;
523:   work  = ds->work+ld*n;
524:   sigma = ds->rwork;
525: #if defined(PETSC_USE_COMPLEX)
526:   rwork = ds->rwork+n;
527: #endif
528:   if (ds->compact) DSSwitchFormat_GSVD(ds);

530:   PetscFPTrapPush(PETSC_FP_TRAP_OFF);
531:   MatDenseGetArrayRead(ds->omat[DS_MAT_A],&M);
532:   PetscArraycpy(A,M,ld*n);
533:   MatDenseRestoreArrayRead(ds->omat[DS_MAT_A],&M);
534: #if defined(PETSC_USE_COMPLEX)
535:   PetscCallBLAS("LAPACKgesvd",LAPACKgesvd_("N","N",&m,&n,A,&ld,sigma,NULL,&ld,NULL,&ld,work,&lwork,rwork,&info));
536: #else
537:   PetscCallBLAS("LAPACKgesvd",LAPACKgesvd_("N","N",&m,&n,A,&ld,sigma,NULL,&ld,NULL,&ld,work,&lwork,&info));
538: #endif
539:   SlepcCheckLapackInfo("gesvd",info);
540:   conda = sigma[0]/sigma[PetscMin(m,n)-1];

542:   MatDenseGetArrayRead(ds->omat[DS_MAT_B],&M);
543:   PetscArraycpy(A,M,ld*n);
544:   MatDenseRestoreArrayRead(ds->omat[DS_MAT_B],&M);
545: #if defined(PETSC_USE_COMPLEX)
546:   PetscCallBLAS("LAPACKgesvd",LAPACKgesvd_("N","N",&p,&n,A,&ld,sigma,NULL,&ld,NULL,&ld,work,&lwork,rwork,&info));
547: #else
548:   PetscCallBLAS("LAPACKgesvd",LAPACKgesvd_("N","N",&p,&n,A,&ld,sigma,NULL,&ld,NULL,&ld,work,&lwork,&info));
549: #endif
550:   SlepcCheckLapackInfo("gesvd",info);
551:   condb = sigma[0]/sigma[PetscMin(p,n)-1];
552:   PetscFPTrapPop();

554:   *cond = PetscMax(conda,condb);
555:   return 0;
556: }

558: #if !defined(PETSC_HAVE_MPIUNI)
559: PetscErrorCode DSSynchronize_GSVD(DS ds,PetscScalar eigr[],PetscScalar eigi[])
560: {
561:   DS_GSVD        *ctx = (DS_GSVD*)ds->data;
562:   PetscInt       ld=ds->ld,l=ds->l,k=0,kr=0;
563:   PetscMPIInt    m=ctx->m,rank,off=0,size,n,ldn,ld3;
564:   PetscScalar    *A,*U,*V,*X;
565:   PetscReal      *T;

567:   if (ds->compact) kr = 3*ld;
568:   else k = 2*(m-l)*ld;
569:   if (ds->state>DS_STATE_RAW) k += 3*(m-l)*ld;
570:   if (eigr) k += m-l;
571:   DSAllocateWork_Private(ds,k+kr,0,0);
572:   PetscMPIIntCast(k*sizeof(PetscScalar)+kr*sizeof(PetscReal),&size);
573:   PetscMPIIntCast(m-l,&n);
574:   PetscMPIIntCast(ld*(m-l),&ldn);
575:   PetscMPIIntCast(3*ld,&ld3);
576:   if (ds->compact) DSGetArrayReal(ds,DS_MAT_T,&T);
577:   else MatDenseGetArray(ds->omat[DS_MAT_A],&A);
578:   if (ds->state>DS_STATE_RAW) {
579:     MatDenseGetArray(ds->omat[DS_MAT_U],&U);
580:     MatDenseGetArray(ds->omat[DS_MAT_V],&V);
581:     MatDenseGetArray(ds->omat[DS_MAT_X],&X);
582:   }
583:   MPI_Comm_rank(PetscObjectComm((PetscObject)ds),&rank);
584:   if (!rank) {
585:     if (ds->compact) MPI_Pack(T,ld3,MPIU_REAL,ds->work,size,&off,PetscObjectComm((PetscObject)ds));
586:     else MPI_Pack(A+l*ld,ldn,MPIU_SCALAR,ds->work,size,&off,PetscObjectComm((PetscObject)ds));
587:     if (ds->state>DS_STATE_RAW) {
588:       MPI_Pack(U+l*ld,ldn,MPIU_SCALAR,ds->work,size,&off,PetscObjectComm((PetscObject)ds));
589:       MPI_Pack(V+l*ld,ldn,MPIU_SCALAR,ds->work,size,&off,PetscObjectComm((PetscObject)ds));
590:       MPI_Pack(X+l*ld,ldn,MPIU_SCALAR,ds->work,size,&off,PetscObjectComm((PetscObject)ds));
591:     }
592:     if (eigr) MPI_Pack(eigr+l,n,MPIU_SCALAR,ds->work,size,&off,PetscObjectComm((PetscObject)ds));
593:   }
594:   MPI_Bcast(ds->work,size,MPI_BYTE,0,PetscObjectComm((PetscObject)ds));
595:   if (rank) {
596:     if (ds->compact) MPI_Unpack(ds->work,size,&off,T,ld3,MPIU_REAL,PetscObjectComm((PetscObject)ds));
597:     else MPI_Unpack(ds->work,size,&off,A+l*ld,ldn,MPIU_SCALAR,PetscObjectComm((PetscObject)ds));
598:     if (ds->state>DS_STATE_RAW) {
599:       MPI_Unpack(ds->work,size,&off,U+l*ld,ldn,MPIU_SCALAR,PetscObjectComm((PetscObject)ds));
600:       MPI_Unpack(ds->work,size,&off,V+l*ld,ldn,MPIU_SCALAR,PetscObjectComm((PetscObject)ds));
601:       MPI_Unpack(ds->work,size,&off,X+l*ld,ldn,MPIU_SCALAR,PetscObjectComm((PetscObject)ds));
602:     }
603:     if (eigr) MPI_Unpack(ds->work,size,&off,eigr+l,n,MPIU_SCALAR,PetscObjectComm((PetscObject)ds));
604:   }
605:   if (ds->compact) DSRestoreArrayReal(ds,DS_MAT_T,&T);
606:   else MatDenseRestoreArray(ds->omat[DS_MAT_A],&A);
607:   if (ds->state>DS_STATE_RAW) {
608:     MatDenseRestoreArray(ds->omat[DS_MAT_U],&U);
609:     MatDenseRestoreArray(ds->omat[DS_MAT_V],&V);
610:     MatDenseRestoreArray(ds->omat[DS_MAT_X],&X);
611:   }
612:   return 0;
613: }
614: #endif

616: PetscErrorCode DSMatGetSize_GSVD(DS ds,DSMatType t,PetscInt *rows,PetscInt *cols)
617: {
618:   DS_GSVD *ctx = (DS_GSVD*)ds->data;

621:   switch (t) {
622:     case DS_MAT_A:
623:       *rows = ds->n;
624:       *cols = ds->extrarow? ctx->m+1: ctx->m;
625:       break;
626:     case DS_MAT_B:
627:       *rows = ctx->p;
628:       *cols = ds->extrarow? ctx->m+1: ctx->m;
629:       break;
630:     case DS_MAT_T:
631:       *rows = ds->n;
632:       *cols = PetscDefined(USE_COMPLEX)? 2: 3;
633:       break;
634:     case DS_MAT_D:
635:       *rows = ctx->p;
636:       *cols = 1;
637:       break;
638:     case DS_MAT_U:
639:       *rows = ds->state==DS_STATE_TRUNCATED? ds->t: ds->n;
640:       *cols = ds->n;
641:       break;
642:     case DS_MAT_V:
643:       *rows = ds->state==DS_STATE_TRUNCATED? ctx->tp: ctx->p;
644:       *cols = ctx->p;
645:       break;
646:     case DS_MAT_X:
647:       *rows = ds->state==DS_STATE_TRUNCATED? ctx->tm: ctx->m;
648:       *cols = ctx->m;
649:       break;
650:     default:
651:       SETERRQ(PetscObjectComm((PetscObject)ds),PETSC_ERR_ARG_OUTOFRANGE,"Invalid t parameter");
652:   }
653:   return 0;
654: }

656: static PetscErrorCode DSGSVDSetDimensions_GSVD(DS ds,PetscInt m,PetscInt p)
657: {
658:   DS_GSVD *ctx = (DS_GSVD*)ds->data;

660:   DSCheckAlloc(ds,1);
661:   if (m==PETSC_DECIDE || m==PETSC_DEFAULT) {
662:     ctx->m = ds->ld;
663:   } else {
665:     ctx->m = m;
666:   }
667:   if (p==PETSC_DECIDE || p==PETSC_DEFAULT) {
668:     ctx->p = ds->n;
669:   } else {
671:     ctx->p = p;
672:   }
673:   return 0;
674: }

676: /*@
677:    DSGSVDSetDimensions - Sets the number of columns and rows for a DSGSVD.

679:    Logically Collective on ds

681:    Input Parameters:
682: +  ds - the direct solver context
683: .  m  - the number of columns
684: -  p  - the number of rows for the second matrix (B)

686:    Notes:
687:    This call is complementary to DSSetDimensions(), to provide two dimensions
688:    that are specific to this DS type. The number of rows for the first matrix (A)
689:    is set by DSSetDimensions().

691:    Level: intermediate

693: .seealso: DSGSVDGetDimensions(), DSSetDimensions()
694: @*/
695: PetscErrorCode DSGSVDSetDimensions(DS ds,PetscInt m,PetscInt p)
696: {
700:   PetscTryMethod(ds,"DSGSVDSetDimensions_C",(DS,PetscInt,PetscInt),(ds,m,p));
701:   return 0;
702: }

704: static PetscErrorCode DSGSVDGetDimensions_GSVD(DS ds,PetscInt *m,PetscInt *p)
705: {
706:   DS_GSVD *ctx = (DS_GSVD*)ds->data;

708:   if (m) *m = ctx->m;
709:   if (p) *p = ctx->p;
710:   return 0;
711: }

713: /*@
714:    DSGSVDGetDimensions - Returns the number of columns and rows for a DSGSVD.

716:    Not collective

718:    Input Parameter:
719: .  ds - the direct solver context

721:    Output Parameters:
722: +  m - the number of columns
723: -  p - the number of rows for the second matrix (B)

725:    Level: intermediate

727: .seealso: DSGSVDSetDimensions()
728: @*/
729: PetscErrorCode DSGSVDGetDimensions(DS ds,PetscInt *m,PetscInt *p)
730: {
732:   PetscUseMethod(ds,"DSGSVDGetDimensions_C",(DS,PetscInt*,PetscInt*),(ds,m,p));
733:   return 0;
734: }

736: PetscErrorCode DSDestroy_GSVD(DS ds)
737: {
738:   PetscFree(ds->data);
739:   PetscObjectComposeFunction((PetscObject)ds,"DSGSVDSetDimensions_C",NULL);
740:   PetscObjectComposeFunction((PetscObject)ds,"DSGSVDGetDimensions_C",NULL);
741:   return 0;
742: }

744: /*MC
745:    DSGSVD - Dense Generalized Singular Value Decomposition.

747:    Level: beginner

749:    Notes:
750:    The problem is expressed as A*X = U*C, B*X = V*S, where A and B are
751:    matrices with the same number of columns, m, U and V are orthogonal
752:    (unitary), and X is an mxm invertible matrix. The DS object does not
753:    expose matrices C and S, instead the singular values sigma, which are
754:    the ratios c_i/s_i, are returned in the arguments of DSSolve().
755:    Note that the number of columns of the returned X, U, V may be smaller
756:    in the case that some c_i or s_i are zero.

758:    The number of rows of A (and U) is the value n passed with DSSetDimensions().
759:    The number of columns m and the number of rows of B (and V) must be
760:    set via DSGSVDSetDimensions().

762:    Internally, LAPACK's representation is used, U'*A*Q = C*[0 R], V'*B*Q = S*[0 R],
763:    where X = Q*inv(R) is computed at the end of DSSolve().

765:    If the compact storage format is selected, then a simplified problem is
766:    solved, where A and B are bidiagonal (possibly with an arrow), and [A;B]
767:    is assumed to have orthonormal columns. We consider two cases: (1) A and B
768:    are square mxm upper bidiagonal, and (2) A is lower bidiagonal with m+1
769:    rows and B is square upper bidiagonal. In these cases, R=I so it
770:    corresponds to the CS decomposition. The first matrix is stored in two
771:    diagonals of DS_MAT_T, while the second matrix is stored in DS_MAT_D
772:    and the remaining diagonal of DS_MAT_T.

774:    Allowed arguments of DSVectors() are DS_MAT_U, DS_MAT_V and DS_MAT_X.

776:    Used DS matrices:
777: +  DS_MAT_A - first problem matrix
778: .  DS_MAT_B - second problem matrix
779: .  DS_MAT_T - first upper bidiagonal matrix (if compact storage is selected)
780: -  DS_MAT_D - second upper bidiagonal matrix (if compact storage is selected)

782:    Implemented methods:
783: .  0 - Lapack (_ggsvd3 if available, or _ggsvd)

785: .seealso: DSCreate(), DSSetType(), DSType, DSGSVDSetDimensions()
786: M*/
787: SLEPC_EXTERN PetscErrorCode DSCreate_GSVD(DS ds)
788: {
789:   DS_GSVD        *ctx;

791:   PetscNew(&ctx);
792:   ds->data = (void*)ctx;

794:   ds->ops->allocate      = DSAllocate_GSVD;
795:   ds->ops->view          = DSView_GSVD;
796:   ds->ops->vectors       = DSVectors_GSVD;
797:   ds->ops->sort          = DSSort_GSVD;
798:   ds->ops->solve[0]      = DSSolve_GSVD;
799: #if !defined(PETSC_HAVE_MPIUNI)
800:   ds->ops->synchronize   = DSSynchronize_GSVD;
801: #endif
802:   ds->ops->truncate      = DSTruncate_GSVD;
803:   ds->ops->update        = DSUpdateExtraRow_GSVD;
804:   ds->ops->cond          = DSCond_GSVD;
805:   ds->ops->matgetsize    = DSMatGetSize_GSVD;
806:   ds->ops->destroy       = DSDestroy_GSVD;
807:   PetscObjectComposeFunction((PetscObject)ds,"DSGSVDSetDimensions_C",DSGSVDSetDimensions_GSVD);
808:   PetscObjectComposeFunction((PetscObject)ds,"DSGSVDGetDimensions_C",DSGSVDGetDimensions_GSVD);
809:   return 0;
810: }