@inproceedings{e95379f0bfdf4de6a2f2c8a0d7eefb43,
title = "IBCircuit: Towards Holistic Circuit Discovery with Information Bottleneck",
abstract = "Circuit discovery has recently attracted attention as a potential research direction to explain the non-trivial behaviors of language models. It aims to find the computational subgraphs, also known as circuits, within the model that are responsible for solving specific tasks. However, most existing studies overlook the holistic nature of these circuits and require designing specific corrupted activations for different tasks, which is inaccurate and inefficient. In this work, we propose an end-to-end approach based on the principle of Information Bottleneck, called IBCircuit, to holistically identify informative circuits. In contrast to traditional causal interventions, IBCircuit is an optimization framework for holistic circuit discovery and can be applied to any given task without tediously corrupted activation design. In both the Indirect Object Identification (IOI) and Greater-Than tasks, IBCircuit identifies more faithful and minimal circuits in terms of critical node components and edge components compared to recent related work.",
keywords = "Circuit Discovery",
author = "Tian Bian and Yifan Niu and Chaohao Yuan and Chengzhi Piao and Bingzhe Wu and Long-Kai Huang and Yu Rong and Tingyang Xu and Hong Cheng and Jia Li",
note = "This research is supported by grants from the Research Grants Council of the Hong Kong Special Administrative Region, China (No. CUHK 14217622). This work was supported by Damo Academy (Hupan Laboratory) through the Damo Academy (Hupan Laboratory) Innovative Research Program. The authors would like to express their gratitude to the reviewers for their feedback, which has improved the clarity and contribution of the paper.; 42nd International Conference on Machine Learning, ICML 2025, ICML 2025 ; Conference date: 13-07-2025 Through 19-07-2025",
year = "2025",
month = jun,
day = "18",
language = "English",
series = "Proceedings of the International Conference on Machine Learning",
publisher = "ML Research Press",
booktitle = "Proceedings of the 42nd International Conference on Machine Learning, ICML 2025",
url = "https://icml.cc/Conferences/2025",
}