KerasとMPSで同じ計算をする(1)
↑この続き。今回はMetal Performance Shaders側、およびその比較結果。
(2)MPS側
Kerasで保存した係数をmacOS側で読み込んでMetal Performance Shadersで同じNN構成で計算する。
//入力用構造体
typedef struct
{
float angle0;
float angle1;
} Inputs_t;
//出力用構造体
typedef struct
{
float q0;
float q1;
float q2;
} MPSResult_t;
{
//インスタンス
id <MTLDevice> device;
id <MTLCommandQueue> commandQueue;
MPSCNNNeuronReLU *relu;
MPSImage *srcImage;
MPSImage *h1Image;
MPSImage *finalImage;
SlimMPSCNNFullyConnected *h1;
float *angles;//入力
float *qResults;//出力
MTLRegion srcImageRegion;
MTLRegion filnalImageRegion;
}
-(void)setupNN
{
NUM_INPUT = 2;
NUM_HIDDEN1 = 4;
NUM_OUTPUT = 3;
angles = calloc(NUM_INPUT, sizeof(float));
qResults = calloc(NUM_OUTPUT , sizeof(float));
srcImageRegion = MTLRegionMake2D(0, 0, NUM_INPUT, 1);
filnalImageRegion = MTLRegionMake2D(0, 0, 1, 1);
MPSImageDescriptor *sid = [MPSImageDescriptor imageDescriptorWithChannelFormat:MPSImageFeatureChannelFormatFloat32 width:NUM_INPUT height:1 featureChannels:1];//入力側
MPSImageDescriptor *h1id = [MPSImageDescriptor imageDescriptorWithChannelFormat:MPSImageFeatureChannelFormatFloat32 width:1 height:1 featureChannels:NUM_HIDDEN1];
MPSImageDescriptor *did = [MPSImageDescriptor imageDescriptorWithChannelFormat:MPSImageFeatureChannelFormatFloat32 width:1 height:1 featureChannels:NUM_OUTPUT];//出力側
device = MTLCreateSystemDefaultDevice();
commandQueue = [device newCommandQueue];
// Initialize MPSImage from descriptors
srcImage = [[MPSImage alloc] initWithDevice:device imageDescriptor:sid];
h1Image = [[MPSImage alloc] initWithDevice:device imageDescriptor:h1id];
finalImage = [[MPSImage alloc] initWithDevice:device imageDescriptor:did];
relu = [[MPSCNNNeuronReLU alloc] initWithDevice:device a:0];
}
-(void)makeLayers
{
h1 = [[SlimMPSCNNFullyConnected alloc]
initWithKernelWidth:NUM_INPUT
kernelHeight:1
inputFeatureChannels:1
outputFeatureChannels:NUM_HIDDEN1
neuronFilter:relu
device:device
kernelParamsBinaryName:@"1"];
h2 = [[SlimMPSCNNFullyConnected alloc]
initWithKernelWidth:1
kernelHeight:1
inputFeatureChannels:NUM_HIDDEN1
outputFeatureChannels:NUM_OUTPUT
neuronFilter:relu
device:device
kernelParamsBinaryName:@"2"];
}
-(void)checkNN
{
Inputs_t inputs;
inputs.angle0 = 0.1;
inputs.angle1 = 0.2;
//↑Keras側と同じ入力
MPSResult_t r = [self inferenceForInputs:inputs];
NSLog(@"q0=%f,q1=%f,q2=%f",r.q0,r.q1,r.q2);
}
-(MPSResult_t)inferenceForInputs:(Inputs_t)inputs
{
__block MPSResult_t mpsResult;
dispatch_semaphore_t semaphore = dispatch_semaphore_create(0);
dispatch_async(dispatch_get_global_queue(DISPATCH_QUEUE_PRIORITY_BACKGROUND, 0), ^{
self->angles[0] = inputs.angle0;
self->angles[1] = inputs.angle1;
[self->srcImage.texture replaceRegion:self->srcImageRegion
mipmapLevel:0
slice:0
withBytes:self->angles
bytesPerRow:sizeof(float)*4
bytesPerImage:0];
@autoreleasepool{
id <MTLCommandBuffer> commandBuffer = [self->commandQueue commandBuffer];
[self->h1 encodeToCommandBuffer:commandBuffer sourceImage:self->srcImage destinationImage:self->h1Image];
[self->h2 encodeToCommandBuffer:commandBuffer sourceImage:self->h1Image destinationImage:self->finalImage];
[commandBuffer addCompletedHandler:^(id<MTLCommandBuffer> buffer) {
[self->finalImage.texture getBytes:&self->qResults[0]
bytesPerRow:sizeof(float)*4
fromRegion:self->filnalImageRegion
mipmapLevel:0];
mpsResult.q0 = self->qResults[0];
mpsResult.q1 = self->qResults[1];
mpsResult.q2 = self->qResults[2];
dispatch_semaphore_signal(semaphore);
}];
[commandBuffer commit];
}
});
dispatch_semaphore_wait(semaphore, DISPATCH_TIME_FOREVER);
return mpsResult;
}
SlimMPSCNNFullyConnectedクラスはAPPLEのサンプルコードを参考にしてる。というかほとんどそのまま。
実装部分はこんな感じで↓
- (instancetype)initWithKernelWidth:(NSUInteger)kernelWidth
kernelHeight:(NSUInteger)kernelHeight
inputFeatureChannels:(NSUInteger)inputFeatureChannels
outputFeatureChannels:(NSUInteger)outputFeatureChannels
neuronFilter:(MPSCNNNeuron *)neuronFilter
device:(id<MTLDevice>)device
kernelParamsBinaryName:(NSString *)kernelParamsBinaryName
{
NSUInteger nofWeight = inputFeatureChannels*kernelHeight*kernelWidth*outputFeatureChannels;
NSUInteger nofBias = outputFeatureChannels;
float *weightP = calloc(nofWeight, sizeof(float));
float *biasP = calloc(nofBias, sizeof(float));
NSString *wbdataFolder = [NSString stringWithFormat:@"%@",BaseFolder];
NSString *fileName_w = [NSString stringWithFormat:@"%@_%@.dat",STR_WEIGHTS , kernelParamsBinaryName];
NSString *fileName_b = [NSString stringWithFormat:@"%@_%@.dat",STR_BIAS, kernelParamsBinaryName];
NSString *filePath_w = [NSString stringWithFormat:@"%@%@",wbdataFolder,fileName_w];
NSString *filePath_b = [NSString stringWithFormat:@"%@%@",wbdataFolder,fileName_b];
NSData *wData = [[NSData alloc] initWithContentsOfFile:filePath_w];
NSData *bData = [[NSData alloc] initWithContentsOfFile:filePath_b];
[wData getBytes:weightP length:nofWeight*sizeof(float)];
[bData getBytes:biasP length:nofBias*sizeof(float)];
// 係数チェック
//for (int ite=0;ite<nofWeight;ite++) {
// NSLog(@"weight%@:%f",kernelParamsBinaryName,weightP[ite]);
//}
//for (int ite=0;ite<nofBias;ite++) {
// NSLog(@"bias%@:%f",kernelParamsBinaryName,biasP[ite]);
//}
MPSCNNConvolutionDescriptor *convDesc = [MPSCNNConvolutionDescriptor
cnnConvolutionDescriptorWithKernelWidth:kernelWidth
kernelHeight:kernelHeight
inputFeatureChannels:inputFeatureChannels
outputFeatureChannels:outputFeatureChannels
neuronFilter:neuronFilter];
self = [super initWithDevice:device
convolutionDescriptor:convDesc
kernelWeights:weightP
biasTerms:biasP
flags:MPSCNNConvolutionFlagsNone];
self.destinationFeatureChannelOffset = 0;
free(weightP);
free(biasP);
return self;
}
(3)結果比較
もちろん入力は同じ。
resutl=[[0.69752 0.10237998 1.01858 ]]
↑Keras側の実行結果
q0=0.697416,q1=0.102600,q2=1.018707
↑MPS側の結果
いつものことながら、微妙に違うけどまあこんなもんかな。
とにかくこれでKerasの学習で得た係数をMPSで使い、同じ計算をすることができた。