Abstract
In a unified resource scheduling architecture, containers within the same microservice often encounter temporal and spatial performance imbalances when deployed in large-scale shared clusters. As a result, the commonly employed load-balancing approach often leads to substantial resource wastage as applications are frequently over-provisioned to meet service level agreements (SLAs).
In this paper, we utilize an alternative approach by leveraging load imbalance. The central concept involves the dynamic load shifting across microservice containers with a focus on imbalance awareness. However, achieving seamless integration between load shifting and resource scaling, while accommodating the demands of partial connection between upstream and downstream containers, remains a challenge. To address this challenge, we introduce Imbres—a new microservice system that optimizes load shifting, connection management, and resource scaling in tandem. One significant advantage of Imbres lies in its rapid responsiveness, relying solely on online gradients of latency, eliminating the need for offline profiling. Evaluation using real microservice benchmarks reveals that Imbres reduces resource usage by up to 1.5× and decreases SLA violation probability by up to 5.2×, comparing to state-of-the-art systems.
Materials
PDF available here
Cite this work
@inbook{10.1145/3676641.3716255, author = {Luo, Shutian and Liao, Jianxiong and Lin, Chenyu and Xu, Huanle and Zhou, Zhi and Xu, Chengzhong}, title = {Embracing Imbalance: Dynamic Load Shifting among Microservice Containers in Shared Clusters}, year = {2025}, isbn = {9798400710797}, publisher = {Association for Computing Machinery}, address = {New York, NY, USA}, url = {https://doi.org/10.1145/3676641.3716255}, abstract = {In a unified resource scheduling architecture, containers within the same microservice often encounter temporal and spatial performance imbalance when deployed in large-scale shared clusters. As a result, the commonly employed load-balancing approach often leads to substantial resource wastage as applications are frequently over-provisioned to meet service level agreements (SLAs).In this paper, we utilize an alternative approach by leveraging load imbalance. The central concept involves the dynamic load shifting across microservice containers with a focus on imbalance awareness. However, achieving seamless integration between load shifting and resource scaling, while accommodating the demands of partial connection between upstream and downstream containers, remains a challenge. To address this challenge, we introduce Imbres-a new microservice system that optimizes load shifting, connection management, and resource scaling in tandem. One significant advantage of Imbres lies in its rapid responsiveness, relying solely on online gradients of latency, eliminating the need for offline profiling. Evaluation using real microservice benchmarks reveals that Imbres reduces resource allocation by up to 62\% and decreases SLA violation probability by up to 82\%, compared to state-of-the-art systems.}, booktitle = {Proceedings of the 30th ACM International Conference on Architectural Support for Programming Languages and Operating Systems, Volume 2}, pages = {309–324}, numpages = {16} }