Featured image of post C++与OpenCV实现完整的CNN神经网络

C++与OpenCV实现完整的CNN神经网络

C++与OpenCV实现完整的CNN神经网络

头文件部分

除了管理各层的部分NeuralNetwork,其他部分应该是不言自明的。

#pragma once
#include <iostream>
#include <opencv2/opencv.hpp>
#include <vector>
#include <memory>
#include <algorithm>
#include <random>
using namespace cv;
using namespace std;

// 打印矩阵尺寸
void printMat(const string& name, const cv::Size& size);

// 基础层类
class Layer {
public:
	virtual void forward(const Mat& input, Mat& output) = 0;
	virtual void backward(const Mat& grad_output, Mat& grad_input) = 0;
	virtual void updateWeights(float learning_rate) = 0;

	virtual void updateWeightsAdam(float learning_rate, int t, float beta1 = 0.9, float beta2 = 0.999, float eps = 1e-8) = 0;
private:
	// Adam parameters
	Mat m_weights, v_weights; // 一阶和二阶矩估计 - 权重
	Mat m_biases, v_biases;   // 一阶和二阶矩估计 - 偏差
};

// 卷积层
class ConvLayer : public Layer {
public:
	ConvLayer(int filters, int kernel_size, int stride, int padding, int type);
	void forward(const Mat& input, Mat& output) override;
	void backward(const Mat& grad_output, Mat& grad_input) override;
	void updateWeights(float learning_rate) override;

	void updateWeightsAdam(float learning_rate, int t, float beta1 = 0.9, float beta2 = 0.999, float eps = 1e-8) override;

private:
	vector<Mat> kernels;
	Mat grad_kernel;
	Mat input_cache;
	int kernel_size;
	int filters;			  // 核的数量
	int stride;
	int padding;
	int type;
	Mat biases, grad_biases;
	// Adam parameters
	Mat m_weights, v_weights; // 一阶和二阶矩估计 - 权重
	Mat m_biases, v_biases;   // 一阶和二阶矩估计 - 偏差
};

// ReLU激活层
class ReLULayer : public Layer {
public:
	void forward(const Mat& input, Mat& output) override;
	void backward(const Mat& grad_output, Mat& grad_input) override;
	void updateWeights(float learning_rate) override;

	void updateWeightsAdam(float learning_rate, int t, float beta1 = 0.9, float beta2 = 0.999, float eps = 1e-8) override;

private:
	Mat input_cache;
	// Adam parameters
	Mat m_weights, v_weights; // 一阶和二阶矩估计 - 权重
	Mat m_biases, v_biases;   // 一阶和二阶矩估计 - 偏差
};

// 池化层
class PoolingLayer : public Layer {
public:
	PoolingLayer(int pool_size, int stride, int padding);
	void forward(const Mat& input, Mat& output) override;
	void backward(const Mat& grad_output, Mat& grad_input) override;
	void updateWeights(float learning_rate) override;

	void updateWeightsAdam(float learning_rate, int t, float beta1 = 0.9, float beta2 = 0.999, float eps = 1e-8) override;

private:
	Mat input_cache;
	int pool_size;
	int stride;
	int padding = 0;
	// Adam parameters
	Mat m_weights, v_weights; // 一阶和二阶矩估计 - 权重
	Mat m_biases, v_biases;   // 一阶和二阶矩估计 - 偏差
};

// 全连接层
class FullyConnectedLayer : public Layer {
public:
	FullyConnectedLayer(int input_size, int output_size);
	void forward(const Mat& input, Mat& output) override;
	void backward(const Mat& grad_output, Mat& grad_input) override;
	void updateWeights(float learning_rate) override;

	void updateWeightsAdam(float learning_rate, int t, float beta1 = 0.9, float beta2 = 0.999, float eps = 1e-8) override;

private:
	Mat weights;
	Mat biases;
	Mat grad_weights;
	Mat grad_biases;
	Mat input_cache;
	// Adam parameters
	Mat m_weights, v_weights; // 一阶和二阶矩估计 - 权重
	Mat m_biases, v_biases;   // 一阶和二阶矩估计 - 偏差
};

class SoftmaxLayer : public Layer {
public:
	void forward(const Mat& input, Mat& output) override;
	void backward(const Mat& grad_output, Mat& grad_input) override;
	void updateWeights(float learning_rate) override;

	void updateWeightsAdam(float learning_rate, int t, float beta1 = 0.9, float beta2 = 0.999, float eps = 1e-8) override;
private:
	Mat input_cache;
};

// 神经网络类
class NeuralNetwork {
public:
	NeuralNetwork::NeuralNetwork();
	void addLayer(shared_ptr<Layer> layer);
	void forward(const Mat& input, Mat& output);
	void backward(const Mat& loss_grad);
	void updateWeights(float learning_rate);
	void train(const Mat& inputs, const Mat& labels, int epochs, float learning_rate);

	void train(const std::vector<cv::Mat>& images, const cv::Mat& labels, int epochs, float learning_rate);
private:
	vector<shared_ptr<Layer>> layers;
	vector<Mat> layerOutputs;
};

实现文件与详细解析

卷积层

ConvLayer::ConvLayer(int filters, int kernel_size, int stride, int padding, int type)
	: filters(filters), kernel_size(kernel_size), stride(stride), padding(padding), type(type) {

	// 初始化卷积核,输入是几个通道,核就是几个通道,一个核会输出一个特征图,最终特征图的通道数为核的个数。
	for (int f = 0; f < filters; ++f) {
		Mat one_kernel = Mat::zeros(kernel_size, kernel_size, type);
		initializeWeightsXavier(one_kernel);
		kernels.push_back(one_kernel);
	}

	// 初始化偏差
	biases = Mat::zeros(1, filters, type);
	initializeWeights(biases);

	// 初始化梯度矩阵
	grad_kernel = Mat::zeros(kernel_size, kernel_size, type);
	grad_biases = Mat::zeros(1, filters, type);

	// 初始化Adam优化器相关的矩阵
	m_weights = Mat::zeros(kernel_size, kernel_size, type);
	v_weights = Mat::zeros(kernel_size, kernel_size, type);
	m_biases = Mat::zeros(1, filters, type);
	v_biases = Mat::zeros(1, filters, type);
}


void ConvLayer::forward(const Mat& input, Mat& output) {
	input.copyTo(input_cache); // 反向传播用
	// 对每个通道进行padding
	Mat input_padded;
	copyMakeBorder(input, input_padded, padding, padding, padding, padding, BORDER_CONSTANT, 0);
	// 分通道进行卷积
	std::vector<Mat> input_channels(input_padded.channels(), Mat(input_padded.rows, input_padded.cols, CV_32F)); // 单通道的输入
	std::vector<Mat> output_channels(input_padded.channels()); // 用于临时存储特征图计算出的每个通道,此处可能不对,容器大小需要改成filters
	split(input_padded, input_channels);
    // 预先定义最终输出的大小
	int output_rows = (input.rows - kernel_size + 2 * padding) / stride + 1;
	int output_cols = (input.cols - kernel_size + 2 * padding) / stride + 1;
    
	for (int f = 0; f < filters; ++f) {
		Mat temp_map = Mat::zeros(output_rows, output_cols, CV_32F); // 用于临时存储单个通道的特征图
		Mat temp = Mat::zeros(input_padded.rows, input_padded.cols, CV_32F); // 用于临时做filter2D的输出
		std::vector<Mat> one_kernel_channels(kernels[f].channels(), Mat::zeros(kernels[f].rows, kernels[f].cols, CV_32F));
		split(kernels[f], one_kernel_channels);
		for (int c = 0; c < one_kernel_channels.size(); ++c) {
			filter2D(input_channels[c], temp, -1, one_kernel_channels[c]); // 此处是相关操作,在数学上和卷积是等价的。TensorFlow也是这么做的。
			temp = temp(Rect(padding, padding, output_cols, output_rows)); // 裁剪
			temp_map += temp;
		}
		output_channels[f] = temp_map;
	}
	cv::merge(output_channels, output);
	input_padded.release();
	input_channels.shrink_to_fit();
	output_channels.shrink_to_fit();
    
    
    // 其实这里很多都可以使用引用。
}



void ConvLayer::backward(const Mat& grad_output, Mat& grad_input) {
	// 创建带有padding的输入梯度
	Mat input_padded;
	copyMakeBorder(input_cache, input_padded, padding, padding, padding, padding, BORDER_CONSTANT, 0);

	// 将grad_output和input_padded分割成各自的通道
	std::vector<Mat> grad_output_channels;
	split(grad_output, grad_output_channels);

	std::vector<Mat> input_padded_channels;
	split(input_padded, input_padded_channels);

	// 初始化grad_input和grad_kernel
	grad_input = Mat::zeros(input_cache.size(), input_cache.type());

	std::vector<Mat> grad_input_channels;
	split(grad_input, grad_input_channels);

	grad_kernel = Mat::zeros(kernels[0].size(), kernels[0].type());
	std::vector<Mat> grad_kernel_channels;
	split(grad_kernel, grad_kernel_channels);

	// 计算梯度,分滤波器(即卷积核)算
	for (int f = 0; f < filters; ++f) {
		// 将当前滤波器按通道分隔
		std::vector<Mat> kernel_channels;
		split(kernels[f], kernel_channels);

		// 临时矩阵用于存储输入和kernel的梯度
		std::vector<Mat> temp_grad_kernel_channels(kernel_channels.size(), Mat::zeros(kernel_channels[0].size(), kernel_channels[0].type()));
		std::vector<Mat> temp_grad_input_channels(input_padded_channels.size(), Mat::zeros(input_padded_channels[0].size(), input_padded_channels[0].type()));

		for (int c = 0; c < input_padded_channels.size(); ++c) {
			// 手动实现全卷积操作,计算相对于kernel的梯度
			Mat rotated_kernel;
			flip(grad_output_channels[f], rotated_kernel, -1); // 卷积核旋转180度

			for (int i = 0; i <= input_padded_channels[c].rows - rotated_kernel.rows; ++i) {
				for (int j = 0; j <= input_padded_channels[c].cols - rotated_kernel.cols; ++j) {
					Rect roi(j, i, rotated_kernel.cols, rotated_kernel.rows);
					temp_grad_kernel_channels[c].at<float>(i, j) = sum(input_padded_channels[c](roi).mul(rotated_kernel))[0];
				}
			}

			// 计算相对于输入的梯度
			Mat flipped_kernel;
			flip(kernel_channels[c], flipped_kernel, -1); // 旋转180度
			filter2D(grad_output_channels[f], temp_grad_input_channels[c], -1, flipped_kernel);
		}

		// 累加各个通道的梯度
		for (int c = 0; c < input_padded_channels.size(); ++c) {
			grad_input_channels[c] += temp_grad_input_channels[c];
			grad_kernel_channels[c] += temp_grad_kernel_channels[c];
		}
	}

	for (int f = 0; f < filters; ++f) {
		// 算偏置梯度
		grad_biases.at<float>(0, f) = cv::sum(grad_output_channels[f])[0];
	}



	// 将通道合并回grad_kernel和grad_input
	cv::merge(grad_kernel_channels, grad_kernel);
	cv::merge(grad_input_channels, grad_input);

	// 释放不必要的内存
	input_padded.release();
	grad_output_channels.shrink_to_fit();
	grad_kernel_channels.shrink_to_fit();
	grad_input_channels.shrink_to_fit();
	input_padded_channels.shrink_to_fit();
	input_cache.release();
    
    
    // 一样,这里也是很多可以换为引用
}

// 梯度更新权重
void ConvLayer::updateWeights(float learning_rate) {
	for (int i = 0; i < filters; ++i) {
		kernels[i] -= learning_rate * grad_kernel;
	}
}

// adam的更新权重
void ConvLayer::updateWeightsAdam(float learning_rate, int t, float beta1, float beta2, float eps) {
	// 更新一阶和二阶矩估计
	for (int i = 0; i < filters; ++i) {
		m_weights = beta1 * m_weights + (1 - beta1) * grad_kernel;
		v_weights = beta2 * v_weights + (1 - beta2) * grad_kernel.mul(grad_kernel);

		// 计算偏差修正
		Mat mhat_w, vhat_w;
		mhat_w = m_weights / (1 - pow(beta1, t));
		vhat_w = v_weights / (1 - pow(beta2, t));

		// 更新权重
		Mat sqrt_vhat_w;
		cv::sqrt(vhat_w, sqrt_vhat_w);
		kernels[i] -= learning_rate * mhat_w / (sqrt_vhat_w + eps);
	}

	m_biases = beta1 * m_biases + (1 - beta1) * grad_biases;
	v_biases = beta2 * v_biases + (1 - beta2) * grad_biases.mul(grad_biases);

	// 计算偏差修正
	Mat mhat_b, vhat_b;
	mhat_b = m_biases / (1 - pow(beta1, t));
	vhat_b = v_biases / (1 - pow(beta2, t));

	// 更新偏差
	Mat sqrt_vhat_b;
	cv::sqrt(vhat_b, sqrt_vhat_b);
	biases -= learning_rate * mhat_b / (sqrt_vhat_b + eps);
}

Relu 激活层

// ReLULayer 实现
void ReLULayer::forward(const Mat& input, Mat& output) {
	input.copyTo(input_cache);
	max(input, 0, output);
}

void ReLULayer::backward(const Mat& grad_output, Mat& grad_input) {
	grad_output.copyTo(grad_input);
	grad_input.setTo(0, input_cache <= 0);
	input_cache.release();
}


// ReLU 层没有权重,因此不需要更新
void ReLULayer::updateWeights(float learning_rate) { }


void ReLULayer::updateWeightsAdam(float learning_rate, int t, float beta1, float beta2, float eps) {
	// ReLU层没有权重和偏差,不需要更新
}

池化层

// 最大值池化的层
PoolingLayer::PoolingLayer(int pool_size, int stride, int padding)
	: pool_size(pool_size), stride(stride), padding(padding) {}

void PoolingLayer::forward(const Mat& input, Mat& output) {
	input.copyTo(input_cache); // 存下来输入,不然反向传播是不知道原输入最大值的位置的

	int out_rows = (input.rows - pool_size + 2 * padding) / stride + 1;
	int out_cols = (input.cols - pool_size + 2 * padding) / stride + 1;

	std::vector<cv::Mat> input_channels(input.channels());
	std::vector<cv::Mat> output_channels(input.channels());

	cv::split(input, input_channels);

	for (int c = 0; c < input.channels(); ++c) {
		output_channels[c] = Mat(out_rows, out_cols, input_channels[c].type());

		for (int i = 0; i < out_rows; ++i) {
			for (int j = 0; j < out_cols; ++j) {
				int start_i = i * stride - padding;
				int start_j = j * stride - padding;
				int end_i = std::min(start_i + pool_size, input.rows);
				int end_j = std::min(start_j + pool_size, input.cols);
				start_i = std::max(start_i, 0);  // 防止取0
				start_j = std::max(start_j, 0);

				Rect roi(start_j, start_i, end_j - start_j, end_i - start_i);
				Mat subMat = input_channels[c](roi);

				double minVal, maxVal;
				cv::minMaxLoc(subMat, &minVal, &maxVal);
				output_channels[c].at<float>(i, j) = static_cast<float>(maxVal);
			}
		}
	}

	cv::merge(output_channels, output);

	input_channels.shrink_to_fit();
	output_channels.shrink_to_fit();
}


void PoolingLayer::backward(const Mat& grad_output, Mat& grad_input) {
	grad_input = Mat::zeros(input_cache.size(), input_cache.type());

	std::vector<cv::Mat> input_channels;
	std::vector<cv::Mat> grad_input_channels(input_cache.channels());
	std::vector<cv::Mat> grad_output_channels;

	cv::split(input_cache, input_channels);
	cv::split(grad_output, grad_output_channels);

	for (int c = 0; c < input_cache.channels(); ++c) {
		Mat& grad_input_channel = grad_input_channels[c];
		grad_input_channel = Mat::zeros(input_channels[c].size(), input_channels[c].type());

		for (int i = 0; i < grad_output.rows; ++i) {
			for (int j = 0; j < grad_output.cols; ++j) {
				int start_i = i * stride - padding;
				int start_j = j * stride - padding;
				int end_i = std::min(start_i + pool_size, input_channels[c].rows);
				int end_j = std::min(start_j + pool_size, input_channels[c].cols);
				start_i = std::max(start_i, 0);
				start_j = std::max(start_j, 0);

				Rect roi(start_j, start_i, end_j - start_j, end_i - start_i);
				Mat subMat = input_channels[c](roi);

				double maxVal;
				Point maxLoc;
				cv::minMaxLoc(subMat, nullptr, &maxVal, nullptr, &maxLoc);

				int grad_input_row = maxLoc.y + start_i;
				int grad_input_col = maxLoc.x + start_j;
				grad_input_channel.at<float>(grad_input_row, grad_input_col) += grad_output_channels[c].at<float>(i, j);
			}
		}
	}

	cv::merge(grad_input_channels, grad_input);

	input_cache.release();
	input_channels.shrink_to_fit();
	grad_input_channels.shrink_to_fit();
	grad_output_channels.shrink_to_fit();
}



void PoolingLayer::updateWeights(float learning_rate) {
	// 池化层没有权重,因此不需要更新
}


void PoolingLayer::updateWeightsAdam(float learning_rate, int t, float beta1, float beta2, float eps) {
	// 池化层没有权重和偏差,不需要更新
}

全连接层

// FullyConnectedLayer 实现
// 我没有做batch,所以都是全连接都是展平为单行
FullyConnectedLayer::FullyConnectedLayer(int input_size, int output_size) {
	weights = Mat::zeros(output_size, input_size, CV_32F);
	biases = Mat::zeros(1, output_size, CV_32F);
	initializeWeightsXavier(weights);
	initializeWeightsXavier(biases);
	//initializeWeightsXavier(weights);
	//initializeWeightsXavier(biases);
}


void FullyConnectedLayer::forward(const Mat& input, Mat& output) {
	input.copyTo(input_cache);
	// 展平输入并确保其类型为 CV_32F
	Mat flat_input = input_cache.reshape(1, 1); // 展平为单行
	flat_input.convertTo(flat_input, CV_32F);

	try {
		output = flat_input * weights.t(); // 矩阵乘法
		output = output + biases;
		DebugPrint("->FullyConnectedLayer forward output size", output.size());
	}
	catch (const cv::Exception& e) {
		cout << "OpenCV error: " << e.what() << endl;
	}
	catch (const std::exception& e) {
		cout << "Standard error: " << e.what() << endl;
	}
	catch (...) {
		cout << "Unknown error occurred during matrix multiplication." << endl;
	}
	flat_input.release();
}


void FullyConnectedLayer::backward(const Mat& grad_output, Mat& grad_input) {
	int input_cache_channels = input_cache.channels();
	int input_cache_rows = input_cache.rows;
	input_cache.convertTo(input_cache, CV_32F);
	// 计算 grad_weights
	grad_weights = grad_output.t() * input_cache.reshape(1, 1); // 矩阵乘法

	// 计算 grad_biases
	grad_output.copyTo(grad_biases);
	grad_input = grad_output * weights; // 矩阵乘法
	grad_input = grad_input.reshape(input_cache_channels, input_cache_rows);
	input_cache.release();
}


void FullyConnectedLayer::updateWeights(float learning_rate) {
	Mat lr_times_grad_weights = learning_rate * grad_weights;
	weights -= lr_times_grad_weights;
	biases -= learning_rate * grad_biases;
}


void FullyConnectedLayer::updateWeightsAdam(float learning_rate, int t, float beta1, float beta2, float eps) {
	// 初始化 m 和 v (如果这是第一次调用)
	if (m_weights.empty()) {
		m_weights = Mat::zeros(grad_weights.size(), grad_weights.type());
		v_weights = Mat::zeros(grad_weights.size(), grad_weights.type());
		m_biases = Mat::zeros(grad_biases.size(), grad_biases.type());
		v_biases = Mat::zeros(grad_biases.size(), grad_biases.type());
	}

	// 更新一阶和二阶矩估计
	m_weights = beta1 * m_weights + (1 - beta1) * grad_weights;
	v_weights = beta2 * v_weights + (1 - beta2) * grad_weights.mul(grad_weights);
	m_biases = beta1 * m_biases + (1 - beta1) * grad_biases;
	v_biases = beta2 * v_biases + (1 - beta2) * grad_biases.mul(grad_biases);

	// 计算偏差修正
	Mat mhat_w, vhat_w, mhat_b, vhat_b;
	cv::divide(m_weights, (1 - pow(beta1, t)), mhat_w);
	cv::divide(v_weights, (1 - pow(beta2, t)), vhat_w);
	cv::divide(m_biases, (1 - pow(beta1, t)), mhat_b);
	cv::divide(v_biases, (1 - pow(beta2, t)), vhat_b);

	// 更新权重和偏差
	Mat sqrt_vhat_w, sqrt_vhat_b;
	cv::sqrt(vhat_w, sqrt_vhat_w);
	cv::sqrt(vhat_b, sqrt_vhat_b);
	//cout << weights.rows << " " << weights.cols << endl;
	// 遍历 weights 的每一行进行更新 ( weights 的每一行表示连接到单个输出神经元的权重)
	for (int i = 0; i < weights.rows; ++i) {
		weights.row(i) -= learning_rate * mhat_w.row(i) / (sqrt_vhat_w.row(i) + eps);
	}

	biases -= learning_rate * mhat_b / (sqrt_vhat_b + eps);
}

神经网络层(管理层)

// NeuralNetwork 实现
NeuralNetwork::NeuralNetwork() {
	// 预先分配 layerOutputs 的内存空间,layerOutputs 的大小比 layers 多 1,用于存放网络的输入
	layerOutputs.resize(layers.size() + 1);
}

void NeuralNetwork::addLayer(shared_ptr<Layer> layer) {
	layers.push_back(layer);
	// 在添加层之后重新分配 layerOutputs 的内存空间
	layerOutputs.resize(layers.size() + 1);
}

void NeuralNetwork::forward(const Mat& input, Mat& output) {

	// 第一层的输入是网络的输入
	input.copyTo(layerOutputs[0]);

	// 调用每一层的 forward 函数
	for (size_t i = 0; i < layers.size(); ++i) {
		//cout << "NeuralNetwork forward switching!" << endl;
		layers[i]->forward(layerOutputs[i], layerOutputs[i + 1]);
		//cout << "NeuralNetwork forward switch finished!" << endl;
	}

	// 最后一层的输出是网络的输出
	output = layerOutputs.back();

}

void NeuralNetwork::backward(const Mat& loss_grad) {
	Mat current_grad = loss_grad;
	Mat next_grad;
	for (auto it = layers.rbegin(); it != layers.rend(); ++it) {
		(*it)->backward(current_grad, next_grad);
		current_grad = next_grad;
	}
}


void NeuralNetwork::updateWeights(float learning_rate) {
	// 遍历调用每个层的 updateWeights
	for (const auto& layer : layers) {
		//  layer->updateWeights(learning_rate);
		layer->updateWeightsAdam(learning_rate, 1, 0.9, 0.999, 1e-8);
	}
}


void NeuralNetwork::train(const std::vector<cv::Mat>& images, const cv::Mat& labels, int epochs, float learning_rate) {
	// Generate an index vector that will be shuffled for random order in each epoch.
	std::vector<int> indices(images.size());
	for (int i = 0; i < indices.size(); ++i) {
		indices[i] = i; // Initialize with consecutive integers.
	}

	for (int epoch = 0; epoch < epochs; ++epoch) {
		double totalLoss = 0.0;
		double frontLoss = 0.0;
		// Shuffle the indices at the beginning of each epoch.
		cv::randShuffle(indices);

		for (size_t i = 0; i < indices.size(); ++i) {
			int idx = indices[i]; // Get the next index in the shuffled order.

			// Forward pass
			cv::Mat output;
			forward(images[idx], output);
			//cout << output << endl; // 打印单次的输出

			// Extract one-hot encoded label
			cv::Mat currentLabel = labels.row(idx);

			// Compute loss
			double loss_value = crossEntropyLossV2(output, currentLabel);
			totalLoss += loss_value;

			// Backward pass
			cv::Mat loss_grad = crossEntropyLossGradV2(output, currentLabel);
			//cout << "start backword" << endl;
			backward(loss_grad);

			// Update weights
			updateWeights(learning_rate);


			//system("pause");
		}
		// Print average loss over the epoch
		std::cout << "Epoch " << epoch + 1 << "/" << epochs << ", Loss: " << totalLoss / images.size() << std::endl;
		//system("pause");
	}
}

未完善功能

  • 模型保存。
  • cuda 加速
  • 批量(batch)训练

有些函数我就不给了,有些东西总得自己来对吧?

Showcase