@inproceedings{f239608d6f874ef99c93c20712d05bf5,
title = "PipeMoE: Accelerating Mixture-of-Experts through Adaptive Pipelining",
abstract = "Large models have attracted much attention in the AI area. The sparsely activated mixture-of-experts (MoE) technique pushes the model size to a trillion-level with a sub-linear increase of computations as an MoE layer can be equipped with many separate experts, but only one or two experts need to be trained for each input data. However, the feature of dynamically activating experts of MoE introduces extensive communications in distributed training. In this work, we propose PipeMoE to adaptively pipeline the communications and computations in MoE to maximally hide the communication time. Specifically, we first identify the root reason why a higher pipeline degree does not always achieve better performance in training MoE models. Then we formulate an optimization problem that aims to minimize the training iteration time. To solve this problem, we build performance models for computation and communication tasks in MoE and develop an optimal solution to determine the pipeline degree such that the iteration time is minimal. We conduct extensive experiments with 174 typical MoE layers and two real-world NLP models on a 64-GPU cluster. Experimental results show that our PipeMoE almost always chooses the best pipeline degree and outperforms state-of-the-art MoE training systems by 5%-77% in training time.",
keywords = "Communication-Efficient Training, Distributed Deep Learning, Mixture-of-Experts, Pipelining",
author = "Shaohuai Shi and Xinglin Pan and Xiaowen Chu and Bo Li",
note = "The research was supported in part by a RGC RIF grant under the contract R6021-20, and RGC GRF grants under the contracts 16209120 and 16200221. Publisher Copyright: {\textcopyright} 2023 IEEE.; 42nd IEEE International Conference on Computer Communications, INFOCOM 2023 ; Conference date: 17-05-2023 Through 20-05-2023",
year = "2023",
month = may,
day = "17",
doi = "10.1109/INFOCOM53939.2023.10228874",
language = "English",
series = "Proceedings - IEEE INFOCOM",
publisher = "IEEE",
booktitle = "INFOCOM 2023 - IEEE Conference on Computer Communications",
address = "United States",
}