可帮助理解的一些文章:
梯度,雅克比矩阵和海森矩阵
海森矩阵和牛顿法
如何通俗易懂地讲解牛顿迭代法?
最优化方法:牛顿迭代法和拟牛顿迭代法
经典举例:
#include <stdio.h> #include <math.h> #include <stdlib.h> #define MAXN 7 typedef struct { //n*m矩阵 int m,n; double data[MAXN][MAXN]; }mat; mat CreateMatrix(int p,int q); double arry2matrix(mat *a,mat *b,mat *r,double *x,double *y,double *beta,int m,int n); mat trans(mat a); mat mul(mat a,mat b); double *SolveMatrix(mat c,mat d); void arryans(double *beta,mat c,mat d); int main(){ double x[]={0.038, 0.194, 0.425, 0.626, 1.253, 2.500, 3.740}; double y[]={0.050, 0.127, 0.094, 0.2122, 0.2729, 0.2665, 0.3317}; mat r = {0,0,0}; mat a = {0,0,0}; mat b = {0,0,0}; mat c = {0,0,0}; mat d = {0,0,0}; double beta[2] = {0.9,0.2}; double rss = 0; int T; for(T=0;T<10;++T){ rss = arry2matrix(&a,&b,&r,x,y,beta,7,2); printf("%d %f %f %f\n",T,beta[0],beta[1],rss); c = mul(b,a); d = mul(b,r); arryans(beta,c,d); } system("pause"); } // Input x[],y[],m,n // Output a,b,r[] double arry2matrix(mat *a,mat *b,mat *r,double *x,double *y,double *beta,int m,int n){ int i; double rss = 0; (*a).m = m; // 雅克比矩阵a (*a).n = n; (*b).m = n; // a的转置矩阵b (*b).n = m; (*r).m = m; (*r).n = 1; for(i=0;i<m;++i){ (*a).data[i][0] = -x[i]/(beta[1]+x[i]); (*a).data[i][1] = beta[0]*x[i]/(beta[1]+x[i])/(beta[1]+x[i]); (*b).data[0][i] = (*a).data[i][0]; (*b).data[1][i] = (*a).data[i][1]; (*r).data[i][0] = y[i]-beta[0]*x[i]/(beta[1]+x[i]); rss += (*r).data[i][0]*(*r).data[i][0]; } return rss; } /******************************************************************** *Fuction: *Input: a,b *Output: c ********************************************************************/ mat mul(mat a,mat b){ int i,j,k; mat c = {0,0,0}; c.m = a.m; c.n = b.n; for(i=0;i<c.m;++i){ for(j=0;j<c.n;++j){ c.data[i][j] = 0; for(k=0;k<a.n;k++){ c.data[i][j] += a.data[i][k]*b.data[k][j]; } } } return c; } /******************************************************************** *Fuction: *Input: c,d,beta *Output: new_beta ********************************************************************/ void arryans(double *beta,mat c,mat d){ //解一个n阶的线性方程组 int i, j, k, mi; double mx, tmp; double beta_tmp[2]; beta_tmp[0] = beta[0]; beta_tmp[1] = beta[1]; for (i = 0; i < c.m-1; i++) { mi = i; mx = fabs(c.data[i][i]); for (j = i + 1; j < c.m; j++) //找主元素 if (fabs(c.data[j][i]) > mx) { mi = j; //记录矩阵下三角一列中绝对值最大值的行号 mx = fabs(c.data[j][i]); //记录矩阵下三角一列中绝对值最大值 } if (i < mi) { //交换两行 tmp = d.data[i][0]; d.data[i][0] = d.data[mi][0]; d.data[mi][0] = tmp; for (j = i; j < c.m; j++) { tmp = c.data[i][j]; c.data[i][j] = c.data[mi][j]; c.data[mi][j] = tmp; } } //---高斯消元---// for (j = i + 1; j < c.m; j++) { //第i行,第i+1、i+2、…、n-1、n列 tmp = -c.data[j][i] / c.data[i][i]; //第j行除以第i行 d.data[j][0] += d.data[i][0] * tmp; for (k = i; k < c.m; k++) //将第i行的tmp倍加到第j行 c.data[j][k] += c.data[i][k] * tmp; } } beta[c.m - 1] = d.data[c.m - 1][0] / c.data[c.m - 1][c.m - 1]; //求解方程 for (i = c.m - 2; i >= 0; i--) { beta[i] = d.data[i][0]; for (j = i + 1; j < c.m; j++) beta[i] -= c.data[i][j] * beta[j]; beta[i] /= c.data[i][i]; } beta[0] = beta_tmp[0]-beta[0]; beta[1] = beta_tmp[1]-beta[1]; } #include <cstdio> #include <cstring> #include <cmath> #include <iostream> using namespace std; int n; double x[10],y[10],beta[10]; struct matrix{ double a[10][10],tmp[10][10]; int n,m; void clear(){ // 清零 memset(a,0,sizeof(a)); memset(tmp,0,sizeof(tmp)); } void cpy(matrix &b){ // 复制矩阵 n=b.n; m=b.m; for (int i=1;i<=n;i++) for (int j=1;j<=m;j++) a[i][j]=b.a[i][j]; } void mul(matrix &b){ for (int i=0;i<=n;i++) for (int j=0;j<=b.m;j++) tmp[i][j]=0; for (int i=0;i<=n;i++) for (int k=0;k<=m;k++) if (a[i][k]){ for (int j=0;j<=b.m;j++) tmp[i][j]+=a[i][k]*b.a[k][j]; a[i][k]=0; } for (int i=0;i<=n;i++) for (int j=0;j<=b.m;j++) a[i][j]=tmp[i][j]; m=b.m; } void getinv(){ for (int i=1;i<=n;i++) for (int j=1;j<=n;j++) a[i][n+j]=0; for (int i=1;i<=n;i++) a[i][n+i]=1; for (int i=1;i<=n;i++){ int po;double maxi=0; for (int j=i;j<=n;j++){ if (fabs(a[j][i])>maxi){ maxi=fabs(a[j][i]);po=j; } } for (int j=1;j<=2*n;j++){ double t=a[i][j];a[i][j]=a[po][j];a[po][j]=t; } if (fabs(maxi)==0) continue; for (int j=i+1;j<=n;j++){ double tim=a[j][i]/a[i][i]; for (int k=i;k<=2*n;k++) a[j][k]-=a[i][k]*tim; } } for (int i=n;i>=1;i--){ for (int j=i+1;j<=n;j++){ for (int k=n+1;k<=2*n;k++) a[i][k]-=a[i][j]*a[j][k]; a[i][j]=0; } for (int j=n+1;j<=2*n;j++) a[i][j]/=a[i][i]; a[i][i]=1; } for (int i=1;i<=n;i++) for (int j=1;j<=n;j++) a[i][j]=a[i][j+n]; for (int i=1;i<=n;i++) for (int j=1;j<=n;j++) a[i][j+n]=0; } void trav(){ for (int i=1;i<=m;i++) for (int j=1;j<=n;j++) tmp[i][j]=a[j][i],a[j][i]=0; for (int i=1;i<=m;i++) for (int j=1;j<=n;j++) a[i][j]=tmp[i][j]; swap(n,m); } }a,b,c,d; int main(){ n = 7; double x[]={0,0.038, 0.194, 0.425, 0.626, 1.253, 2.500, 3.740}; double y[]={0,0.050, 0.127, 0.094, 0.2122, 0.2729, 0.2665, 0.3317}; double r = 0; double rss = 0; beta[1]=0.9;beta[2]=0.2; for (int i=1;i<=n;i++){ r = y[i]-beta[1]*x[i]/(beta[2]+x[i]); rss += r*r; } printf("0 %f %f %f\n",beta[1],beta[2],rss); for (int T=1;T<=9;T++){ a.clear();b.clear();c.clear();d.clear(); a.n=n;a.m=2; for (int i=1;i<=n;i++){ a.a[i][1]=-x[i]/(beta[2]+x[i]); a.a[i][2]=beta[1]*x[i]/(beta[2]+x[i])/(beta[2]+x[i]); } b.cpy(a);c.cpy(a); // 残差矩阵 d.n=n;d.m=1; for (int i=1;i<=n;i++){ d.a[i][1]=y[i]-beta[1]*x[i]/(beta[2]+x[i]); } a.trav(); // a转置 a.mul(b); // a.getinv(); c.trav(); // a.mul(c); a.mul(d); // beta[1]=beta[1]-a.a[1][1];beta[2]=beta[2]-a.a[2][1]; double rss=0; for (int i=1;i<=n;i++) rss+=(y[i]-beta[1]*x[i]/(beta[2]+x[i]))*(y[i]-beta[1]*x[i]/(beta[2]+x[i])); printf("%d %f %f %f\n",T,beta[1],beta[2],rss); } system("pause"); }运行结果与C语言运行结果一致。
可参考博客:
高斯-牛顿迭代
详解高斯牛顿迭代法原理和代码
