KerasとMPSで同じ計算をする(2)
KerasとMPSで同じ計算をする(1)
↑この続き。今回はMetal Performance Shaders側、およびその比較結果。
(2)MPS側
Kerasで保存した係数をmacOS側で読み込んでMetal Performance Shadersで同じNN構成で計算する。
実装部分はこんな感じで↓
(3)結果比較
もちろん入力は同じ。
resutl=[[0.69752 0.10237998 1.01858 ]]
↑Keras側の実行結果
q0=0.697416,q1=0.102600,q2=1.018707
↑MPS側の結果
いつものことながら、微妙に違うけどまあこんなもんかな。
とにかくこれでKerasの学習で得た係数をMPSで使い、同じ計算をすることができた。
↑この続き。今回はMetal Performance Shaders側、およびその比較結果。
(2)MPS側
Kerasで保存した係数をmacOS側で読み込んでMetal Performance Shadersで同じNN構成で計算する。
//入力用構造体 typedef struct { float angle0; float angle1; } Inputs_t; //出力用構造体 typedef struct { float q0; float q1; float q2; } MPSResult_t; { //インスタンス id <MTLDevice> device; id <MTLCommandQueue> commandQueue; MPSCNNNeuronReLU *relu; MPSImage *srcImage; MPSImage *h1Image; MPSImage *finalImage; SlimMPSCNNFullyConnected *h1; float *angles;//入力 float *qResults;//出力 MTLRegion srcImageRegion; MTLRegion filnalImageRegion; } -(void)setupNN { NUM_INPUT = 2; NUM_HIDDEN1 = 4; NUM_OUTPUT = 3; angles = calloc(NUM_INPUT, sizeof(float)); qResults = calloc(NUM_OUTPUT , sizeof(float)); srcImageRegion = MTLRegionMake2D(0, 0, NUM_INPUT, 1); filnalImageRegion = MTLRegionMake2D(0, 0, 1, 1); MPSImageDescriptor *sid = [MPSImageDescriptor imageDescriptorWithChannelFormat:MPSImageFeatureChannelFormatFloat32 width:NUM_INPUT height:1 featureChannels:1];//入力側 MPSImageDescriptor *h1id = [MPSImageDescriptor imageDescriptorWithChannelFormat:MPSImageFeatureChannelFormatFloat32 width:1 height:1 featureChannels:NUM_HIDDEN1]; MPSImageDescriptor *did = [MPSImageDescriptor imageDescriptorWithChannelFormat:MPSImageFeatureChannelFormatFloat32 width:1 height:1 featureChannels:NUM_OUTPUT];//出力側 device = MTLCreateSystemDefaultDevice(); commandQueue = [device newCommandQueue]; // Initialize MPSImage from descriptors srcImage = [[MPSImage alloc] initWithDevice:device imageDescriptor:sid]; h1Image = [[MPSImage alloc] initWithDevice:device imageDescriptor:h1id]; finalImage = [[MPSImage alloc] initWithDevice:device imageDescriptor:did]; relu = [[MPSCNNNeuronReLU alloc] initWithDevice:device a:0]; } -(void)makeLayers { h1 = [[SlimMPSCNNFullyConnected alloc] initWithKernelWidth:NUM_INPUT kernelHeight:1 inputFeatureChannels:1 outputFeatureChannels:NUM_HIDDEN1 neuronFilter:relu device:device kernelParamsBinaryName:@"1"]; h2 = [[SlimMPSCNNFullyConnected alloc] initWithKernelWidth:1 kernelHeight:1 inputFeatureChannels:NUM_HIDDEN1 outputFeatureChannels:NUM_OUTPUT neuronFilter:relu device:device kernelParamsBinaryName:@"2"]; } -(void)checkNN { Inputs_t inputs; inputs.angle0 = 0.1; inputs.angle1 = 0.2; //↑Keras側と同じ入力 MPSResult_t r = [self inferenceForInputs:inputs]; NSLog(@"q0=%f,q1=%f,q2=%f",r.q0,r.q1,r.q2); } -(MPSResult_t)inferenceForInputs:(Inputs_t)inputs { __block MPSResult_t mpsResult; dispatch_semaphore_t semaphore = dispatch_semaphore_create(0); dispatch_async(dispatch_get_global_queue(DISPATCH_QUEUE_PRIORITY_BACKGROUND, 0), ^{ self->angles[0] = inputs.angle0; self->angles[1] = inputs.angle1; [self->srcImage.texture replaceRegion:self->srcImageRegion mipmapLevel:0 slice:0 withBytes:self->angles bytesPerRow:sizeof(float)*4 bytesPerImage:0]; @autoreleasepool{ id <MTLCommandBuffer> commandBuffer = [self->commandQueue commandBuffer]; [self->h1 encodeToCommandBuffer:commandBuffer sourceImage:self->srcImage destinationImage:self->h1Image]; [self->h2 encodeToCommandBuffer:commandBuffer sourceImage:self->h1Image destinationImage:self->finalImage]; [commandBuffer addCompletedHandler:^(id<MTLCommandBuffer> buffer) { [self->finalImage.texture getBytes:&self->qResults[0] bytesPerRow:sizeof(float)*4 fromRegion:self->filnalImageRegion mipmapLevel:0]; mpsResult.q0 = self->qResults[0]; mpsResult.q1 = self->qResults[1]; mpsResult.q2 = self->qResults[2]; dispatch_semaphore_signal(semaphore); }]; [commandBuffer commit]; } }); dispatch_semaphore_wait(semaphore, DISPATCH_TIME_FOREVER); return mpsResult; }SlimMPSCNNFullyConnectedクラスはAPPLEのサンプルコードを参考にしてる。というかほとんどそのまま。
実装部分はこんな感じで↓
- (instancetype)initWithKernelWidth:(NSUInteger)kernelWidth kernelHeight:(NSUInteger)kernelHeight inputFeatureChannels:(NSUInteger)inputFeatureChannels outputFeatureChannels:(NSUInteger)outputFeatureChannels neuronFilter:(MPSCNNNeuron *)neuronFilter device:(id<MTLDevice>)device kernelParamsBinaryName:(NSString *)kernelParamsBinaryName { NSUInteger nofWeight = inputFeatureChannels*kernelHeight*kernelWidth*outputFeatureChannels; NSUInteger nofBias = outputFeatureChannels; float *weightP = calloc(nofWeight, sizeof(float)); float *biasP = calloc(nofBias, sizeof(float)); NSString *wbdataFolder = [NSString stringWithFormat:@"%@",BaseFolder]; NSString *fileName_w = [NSString stringWithFormat:@"%@_%@.dat",STR_WEIGHTS , kernelParamsBinaryName]; NSString *fileName_b = [NSString stringWithFormat:@"%@_%@.dat",STR_BIAS, kernelParamsBinaryName]; NSString *filePath_w = [NSString stringWithFormat:@"%@%@",wbdataFolder,fileName_w]; NSString *filePath_b = [NSString stringWithFormat:@"%@%@",wbdataFolder,fileName_b]; NSData *wData = [[NSData alloc] initWithContentsOfFile:filePath_w]; NSData *bData = [[NSData alloc] initWithContentsOfFile:filePath_b]; [wData getBytes:weightP length:nofWeight*sizeof(float)]; [bData getBytes:biasP length:nofBias*sizeof(float)]; // 係数チェック //for (int ite=0;ite<nofWeight;ite++) { // NSLog(@"weight%@:%f",kernelParamsBinaryName,weightP[ite]); //} //for (int ite=0;ite<nofBias;ite++) { // NSLog(@"bias%@:%f",kernelParamsBinaryName,biasP[ite]); //} MPSCNNConvolutionDescriptor *convDesc = [MPSCNNConvolutionDescriptor cnnConvolutionDescriptorWithKernelWidth:kernelWidth kernelHeight:kernelHeight inputFeatureChannels:inputFeatureChannels outputFeatureChannels:outputFeatureChannels neuronFilter:neuronFilter]; self = [super initWithDevice:device convolutionDescriptor:convDesc kernelWeights:weightP biasTerms:biasP flags:MPSCNNConvolutionFlagsNone]; self.destinationFeatureChannelOffset = 0; free(weightP); free(biasP); return self; }
(3)結果比較
もちろん入力は同じ。
resutl=[[0.69752 0.10237998 1.01858 ]]
↑Keras側の実行結果
q0=0.697416,q1=0.102600,q2=1.018707
↑MPS側の結果
いつものことながら、微妙に違うけどまあこんなもんかな。
とにかくこれでKerasの学習で得た係数をMPSで使い、同じ計算をすることができた。
続き → MPSでsoftmax関数を使う
スポンサーサイト
<< MPSでsoftmax関数を使う TopPage KerasとMPSで同じ計算をする(1) >>
トラックバック
トラックバックURL
https://ringsbell.blog.fc2.com/tb.php/1187-1e02d696
https://ringsbell.blog.fc2.com/tb.php/1187-1e02d696