@inproceedings{10.1145/3460945.3464955, author = {Patabandi, Tharindu R. and Venkat, Anand and Kulkarni, Abhishek and Ratnalikar, Pushkar and Hall, Mary and Gottschlich, Justin}, title = {Predictive Data Locality Optimization for Higher-Order Tensor Computations}, year = {2021}, isbn = {9781450384674}, publisher = {Association for Computing Machinery}, address = {New York, NY, USA}, url = {https://doi.org/10.1145/3460945.3464955}, doi = {10.1145/3460945.3464955}, abstract = {Automating locality optimization is still an open problem for compiler writers. Compiler-based approaches, guided by analytical cost models have achieved some success in matching high performance libraries on a restricted set of computations such as general matrix multiply (GEMM). On the other hand, library-based approaches may present some open scalability concerns. Recent developments in convolutional neural networks has seen an explosion of models, each with differing combinations of parameters. Manually tuning each of these configurations can take many development months. Further, these operations are called multiple times during machine learning training, which necessitates highly optimized implementations. 2D convolutional operators are unique in that they consist of 7-deep loop nests with different loops carrying reuse for different tensors, making the problem of identifying an optimal loop ordering hard. We devise a machine learning-based compiler which learns a regression model, correlating performance with the loop order. We integrate this model with other traditional compiler analysis for transformations such as loop unrolling and vectorization, relying on the MultiLevel Intermediate Representation (MLIR) compiler framework. We achieve an average speedup of 1.67x and 1.41x against oneDNN for 2D convolution forward and weight update kernels respectively. We are also at 0.88x and 0.96x the performance of oneDNN’s best performing implementation which applies additional data layout transformations.}, booktitle = {Proceedings of the 5th ACM SIGPLAN International Symposium on Machine Programming}, pages = {43–52}, numpages = {10}, keywords = {Convolutional neural networks, Loop transformations, Machine learning, Compilers}, location = {Virtual, Canada}, series = {MAPS 2021} }