Neural Network

clear
% initial
lr=0.0000000034; % learning rate
%lr=0.00000115; % learning rate

mom=1; % momentum
iDim=1; % input dimension

%y=100+sin(2.*pi/12.*x);

% activation functions
sigmoid=@(x) 1./(1+exp(x));
dsigmoid=@(x) sigmoid(x).*(1-sigmoid(x));
relu=@(x) max(x,0);
drelu=@(x) x>0;
activF={sigmoid dsigmoid;relu drelu}; % cell of poential activation func


epochmax=20000;
N=25;
for L=1:5 % Neuron number
    TN=L; % test number
    bestEpoch=1;
    %dimV=[iDim N 1 ]; %dimension of each layer, from input to output
    dimV=cat(2,iDim,ones(1,L)*N,1);
    dimPerLay=repelem(dimV,2) ;dimPerLay([1,end])=[];
    dimPerLay=reshape(dimPerLay,2,[])';
    xDiam=50; %span of x coordinates
    activFidx=ones(1,size(dimV,2))*2;activFidx(end)=1; % last layer sigm, rest relu

    numSample=1000;
    x=(rand(numSample,1)-0.5).*xDiam;
    %x=linspace(-0.5*xDiam,0.5*xDiam,numSample)';
    for i=1:size(dimPerLay,1);
        w{i}=(rand(dimPerLay(i,:))-0.5)*1;
        dw{i}=0;
        b{i}=(rand(1,dimV(i+1))-0.5)*xDiam;
        %b{i}=linspace(0,50,dimV(i+1));
        db{i}=0;
    end
    yp{1}=x;
    y=100+cos(pi./12.*x)+sin(pi./5.*x);

    for epoch=1:epochmax
        for i=1:size(dimPerLay,1) % forward propagate
            v=yp{i}*w{i}+b{i};
            act=activF{activFidx,1};
            dact=activF{activFidx,2};
            %yp{i+1}=relu(v); %yp{i+1}=I;
            yp{i+1}=act(v); %yp{i+1}=I;
            %phi{i}=drelu(v);
            phi{i}=dact(v);
        end
        e=y-yp{end};
        bSize=size(e,1);
        E(epoch)=sum(0.5*(e).^2)/bSize; % square error
        % BP
        %delta{size(dimPerLay,1)}=sum(E)/bSize;
        delta{size(dimPerLay,1)}=e;
        for i=size(dimPerLay,1)-1:-1:1
            delta{i}=phi{i}.*(delta{i+1}*w{i+1}');
        end
        % weight update
        for i=size(dimPerLay,1):-1:1
            %d=mom*dw{i}+lr.*yp{i}'*delta{i};
            gd=yp{i}'*delta{i};
            dp=(dw{i}.*gd)>=0;% dot product
            %dp(dp==0)=-0.5;
            %         if epoch>2 & E(epoch)>E(epoch-1)
            %             dp=-ones(size(dp));
            %         end
            d=mom*dw{i}.*(dp)+lr.*gd;
            w{i}=w{i}+d;
            dw{i}=d;
            if E(epoch)<=E(bestEpoch)
                bestw=w; bestyp=yp;bestEpoch=epoch;
            end
            %gdb=sum(delta{i});
            %dp=(db{i}.*gdb)>=0;% dot product
            %dp(dp==0)=0.2;
            %d=mom*db{i}.*dp+sum(lr.*delta{i});
            d=mom*db{i}+sum(lr.*delta{i});
            b{i}=b{i}+d;
            db{i}=d;
        end
        if mod(epoch,500) ==0
            figure(2)
            subplot(1,2,1)
            plot(x',y','o',x',yp{end}','*',x',bestyp{end},'.')
            subplot(1,2,2)
            plot(epoch-500+1:epoch,E(epoch-500+1:epoch))

            %lr=lr*0.95;
        end
        %ydelta(epoch)=sum(delta{end});
        %delta{3}
    end
    figure(1)
    SPdist=mod(TN-1,5)+2*5*floor((TN-1)/5)+1;
    SPerror=SPdist+5;
    subplot(2,5,SPdist)
    plot(x',y','o',x',yp{end}','*',x',bestyp{end},'.')
    title(cat(2,'Distribution L=',num2str(L)))

    subplot(2,5,SPerror)
    plot(3000:epoch,E(3000:end))
    title(cat(2,'Error L=',num2str(L)))
    [min(E) E(end)]
    E(end)/min(E)
end
% figure (3) ; plot(sort(w{1}))
% figure (4) ; plot(sort(w{2}))
% figure (5) ; plot(sort(w{3}))