#include <iostream>
#include <string>
#include <math.h>
#include "LogisticRegression.h"
using namespace std; LogisticRegression::LogisticRegression(
int size, // N
int in, // n_in
int out // n_out
N = size;
n_in = in;
n_out = out; // initialize W, b
// W[n_out][n_in], b[n_out]
W = new double*[n_out];
for(int i=0; i<n_out; i++)
W[i] = new double[n_in];
b = new double[n_out]; for(int i=0; i<n_out; i++)
for(int j=0; j<n_in; j++)
W[i][j] = 0;
b[i] = 0;
} LogisticRegression::~LogisticRegression()
for(int i=0; i<n_out; i++)
delete[] W[i];
delete[] W;
delete[] b;
} void LogisticRegression::train (
int *x, // the input from input nodes in training set
int *y, // the output from output nodes in training set
double lr // the learning rate
// the probability of P(y|x)
double *p_y_given_x = new double[n_out];
// the tmp variable which is not necessary being an array
double *dy = new double[n_out]; // step 1: calculate the output of softmax given input
for(int i=0; i<n_out; i++)
// initialize
p_y_given_x[i] = 0;
for(int j=0; j<n_in; j++)
// the weight of networks
p_y_given_x[i] += W[i][j] * x[j];
// the bias
p_y_given_x[i] += b[i];
// the softmax value
softmax(p_y_given_x); // step 2: update the weight of networks
// w_new = w_old + learningRate * differential (导数)
// = w_old + learningRate * x (1{y_i=y} - p_yi_given_x)
// = w_old + learningRate * x * (y - p_y_given_x)
for(int i=0; i<n_out; i++)
dy[i] = y[i] - p_y_given_x[i];
for(int j=0; j<n_in; j++)
W[i][j] += lr * dy[i] * x[j] / N;
b[i] += lr * dy[i] / N;
delete[] p_y_given_x;
delete[] dy;
} void LogisticRegression::softmax (double *x)
double max = 0.0;
double sum = 0.0; // step1: get the max in the X vector
for(int i=0; i<n_out; i++)
if(max < x[i])
max = x[i];
// step 2: normalization and softmax
// normalize -- 'x[i]-max', it's not necessary in traditional LR.
// I wonder why it appears here?
for(int i=0; i<n_out; i++)
x[i] = exp(x[i] - max);
sum += x[i];
for(int i=0; i<n_out; i++)
x[i] /= sum;
} void LogisticRegression::predict(
int *x, // the input from input nodes in testing set
double *y // the calculated softmax probability
// get the softmax output value given the current networks
for(int i=0; i<n_out; i++)
y[i] = 0;
for(int j=0; j<n_in; j++)
y[i] += W[i][j] * x[j];
y[i] += b[i];
} softmax(y);
