@inproceedings{sun2023principle,title={Principle-Driven Self-Alignment of Language Models from Scratch with Minimal Human Supervision},author={Sun, Zhiqing and Shen, Yikang and Zhou, Qinhong and Zhang, Hongxin and Chen, Zhenfang and Cox, David and Yang, Yiming and Gan, Chuang},booktitle={Thirty-seventh Conference on Neural Information Processing Systems},year={2023},url={https://openreview.net/forum?id=p40XRfBX96},}
NeurIPS (Spotlight)
DIFUSCO: Graph-based Diffusion Solvers for Combinatorial Optimization
Zhiqing Sun, and Yiming Yang
In Thirty-seventh Conference on Neural Information Processing Systems, 2023
@inproceedings{sun2023difusco,title={{DIFUSCO}: Graph-based Diffusion Solvers for Combinatorial Optimization},author={Sun, Zhiqing and Yang, Yiming},booktitle={Thirty-seventh Conference on Neural Information Processing Systems},year={2023},url={https://openreview.net/forum?id=JV8Ff0lgVV},}
ICML SODS Workshop (Oral)
Accelerating Diffusion-based Combinatorial Optimization Solvers by Progressive Distillation
Junwei Huang, Zhiqing Sun, and Yiming Yang
In ICML 2023 Workshop: Sampling and Optimization in Discrete Space, 2023
@inproceedings{huang2023accelerating,title={Accelerating Diffusion-based Combinatorial Optimization Solvers by Progressive Distillation},author={Huang, Junwei and Sun, Zhiqing and Yang, Yiming},booktitle={ICML 2023 Workshop: Sampling and Optimization in Discrete Space},year={2023},url={https://openreview.net/forum?id=AbMj31okE4}}
ICML
A Neural PDE Solver with Temporal Stencil Modeling
Zhiqing Sun, Yiming Yang, and Shinjae Yoo
In Proceedings of the 40th International Conference on Machine Learning, 2023
Numerical simulation of non-linear partial differential equations plays a crucial role in modeling physical science and engineering phenomena, such as weather, climate, and aerodynamics. Recent Machine Learning (ML) models trained on low-resolution spatio-temporal signals have shown new promises in capturing important dynamics in high-resolution signals, under the condition that the models can effectively recover the missing details. However, this study shows that significant information is often lost in the low-resolution down-sampled features. To address such issues, we propose a new approach, namely Temporal Stencil Modeling (TSM), which combines the strengths of advanced time-series sequence modeling (with the HiPPO features) and state-of-the-art neural PDE solvers (with learnable stencil modeling). TSM aims to recover the lost information from the PDE trajectories and can be regarded as a temporal generalization of classic finite volume methods such as WENO. Our experimental results show that TSM achieves the new state-of-the-art simulation accuracy for 2-D incompressible Navier-Stokes turbulent flows: it significantly outperforms the previously reported best results by 19.9% in terms of the highly-correlated duration time, and reduces the inference latency into 80%. We also show a strong generalization ability of the proposed method to various out-of-distribution turbulent flow settings, as well as lower resolution or 1-D / 3-D settings. Our code is available at https://github.com/Edward-Sun/TSM-PDE .
@inproceedings{pmlr-v202-sun23o,title={A Neural {PDE} Solver with Temporal Stencil Modeling},author={Sun, Zhiqing and Yang, Yiming and Yoo, Shinjae},booktitle={Proceedings of the 40th International Conference on Machine Learning},pages={33135--33155},year={2023},editor={Krause, Andreas and Brunskill, Emma and Cho, Kyunghyun and Engelhardt, Barbara and Sabato, Sivan and Scarlett, Jonathan},volume={202},series={Proceedings of Machine Learning Research},publisher={PMLR},url={https://proceedings.mlr.press/v202/sun23o.html},}
ICLR
Recitation-Augmented Language Models
Zhiqing Sun, Xuezhi Wang, Yi Tay, Yiming Yang, and Denny Zhou
In The Eleventh International Conference on Learning Representations, 2023
@inproceedings{sun2023recitation,title={Recitation-Augmented Language Models},author={Sun, Zhiqing and Wang, Xuezhi and Tay, Yi and Yang, Yiming and Zhou, Denny},booktitle={The Eleventh International Conference on Learning Representations},year={2023},}
2022
Preprint
Bloom: A 176b-parameter open-access multilingual language model
Teven Le Scao, Angela Fan, Christopher Akiki, Ellie Pavlick, Suzana Ilić, Daniel Hesslow, Roman Castagné, Alexandra Sasha Luccioni, François Yvon, Matthias Gallé, and others
@article{scao2022bloom,title={Bloom: A 176b-parameter open-access multilingual language model},author={Scao, Teven Le and Fan, Angela and Akiki, Christopher and Pavlick, Ellie and Ili{\'c}, Suzana and Hesslow, Daniel and Castagn{\'e}, Roman and Luccioni, Alexandra Sasha and Yvon, Fran{\c{c}}ois and Gall{\'e}, Matthias and others},journal={arXiv preprint arXiv:2211.05100},year={2022}}
NeurIPS
Dimes: A differentiable meta solver for combinatorial optimization problems
Ruizhong Qiu*, Zhiqing Sun*, and Yiming Yang
Advances in Neural Information Processing Systems, 2022
@inproceedings{sun2021sparse,title={Sparse attention with learning to hash},author={Sun, Zhiqing and Yang, Yiming and Yoo, Shinjae},booktitle={International Conference on Learning Representations},year={2022}}
ACL (Demo)
PromptSource: An Integrated Development Environment and Repository for Natural Language Prompts
Stephen Bach, Victor Sanh, Zheng Xin Yong, Albert Webson, Colin Raffel, Nihal V Nayak, Abheesht Sharma, Taewoon Kim, M Saiful Bari, Thibault Févry, and others
In Proceedings of the 60th Annual Meeting of the Association for Computational Linguistics: System Demonstrations, 2022
@inproceedings{bach2022promptsource,title={PromptSource: An Integrated Development Environment and Repository for Natural Language Prompts},author={Bach, Stephen and Sanh, Victor and Yong, Zheng Xin and Webson, Albert and Raffel, Colin and Nayak, Nihal V and Sharma, Abheesht and Kim, Taewoon and Bari, M Saiful and F{\'e}vry, Thibault and others},booktitle={Proceedings of the 60th Annual Meeting of the Association for Computational Linguistics: System Demonstrations},pages={93--104},year={2022}}
2021
ICCV
Rethinking transformer-based set prediction for object detection
Zhiqing Sun*, Shengcao Cao*, Yiming Yang, and Kris M Kitani
In Proceedings of the IEEE/CVF international conference on computer vision, 2021
@inproceedings{hung2021hierarchical,title={Hierarchical Probabilistic Ultrasound Image Inpainting via Variational Inference},author={Hung, Alex Ling Yu and Sun, Zhiqing and Chen, Wanwen and Galeotti, John},booktitle={MICCAI Workshop on Deep Generative Models},pages={83--92},year={2021}}
2020
ACL
MobileBERT: a Compact Task-Agnostic BERT for Resource-Limited Devices
@inproceedings{sun2020mobilebert,title={MobileBERT: a Compact Task-Agnostic BERT for Resource-Limited Devices},author={Sun, Zhiqing and Yu, Hongkun and Song, Xiaodan and Liu, Renjie and Yang, Yiming and Zhou, Denny},booktitle={Proceedings of the 58th Annual Meeting of the Association for Computational Linguistics},pages={2158--2170},year={2020},}
ACL
A Re-evaluation of Knowledge Graph Completion Methods
Zhiqing Sun*, Shikhar Vashishth*, Soumya Sanyal*, Partha Talukdar, and Yiming Yang
In Proceedings of the 58th Annual Meeting of the Association for Computational Linguistics, 2020
@inproceedings{sun2020approach,title={An EM approach to non-autoregressive conditional sequence generation},author={Sun, Zhiqing and Yang, Yiming},booktitle={International Conference on Machine Learning},pages={9249--9258},year={2020},organization={PMLR}}
ICLR
Dynamically Pruned Message Passing Networks for Large-scale Knowledge Graph Reasoning
Xiaoran Xu, Wei Feng, Yunsheng Jiang, Xiaohui Xie, Zhiqing Sun, and Zhi-Hong Deng
In International Conference on Learning Representations, 2020
@inproceedings{xu2019dynamically,title={Dynamically Pruned Message Passing Networks for Large-scale Knowledge Graph Reasoning},author={Xu, Xiaoran and Feng, Wei and Jiang, Yunsheng and Xie, Xiaohui and Sun, Zhiqing and Deng, Zhi-Hong},booktitle={International Conference on Learning Representations},year={2020}}
ICLR DeepDiffeq Workshop
Understanding and Improving Transformer From a Multi-Particle Dynamic System Point of View.
Yiping Lu, Zhuohan Li, Di He, Zhiqing Sun, Bin Dong, Tao Qin, Liwei Wang, and Tie-yan Liu
In ICLR 2020 Workshop on Integration of Deep Neural Models and Differential Equations, 2020
@inproceedings{lu2020understanding,title={Understanding and Improving Transformer From a Multi-Particle Dynamic System Point of View.},author={Lu, Yiping and Li, Zhuohan and He, Di and Sun, Zhiqing and Dong, Bin and Qin, Tao and Wang, Liwei and Liu, Tie-yan},booktitle={ICLR 2020 Workshop on Integration of Deep Neural Models and Differential Equations},year={2020}}
2019
NeurIPS
Fast structured decoding for sequence models
Zhiqing Sun*, Zhuohan Li*, Haoqing Wang, Di He, Zi Lin, and Zhihong Deng
Advances in Neural Information Processing Systems, 2019
@inproceedings{sun2019divgraphpointer,title={Divgraphpointer: A graph pointer network for extracting diverse keyphrases},author={Sun, Zhiqing and Tang, Jian and Du, Pan and Deng, Zhi-Hong and Nie, Jian-Yun},booktitle={Proceedings of the 42nd International ACM SIGIR Conference on Research and Development in Information Retrieval},pages={755--764},year={2019}}
ICLR
RotatE: Knowledge Graph Embedding by Relational Rotation in Complex Space
Zhiqing Sun, Zhi-Hong Deng, Jian-Yun Nie, and Jian Tang
In International Conference on Learning Representations, 2019
@inproceedings{sun2018rotate,title={RotatE: Knowledge Graph Embedding by Relational Rotation in Complex Space},author={Sun, Zhiqing and Deng, Zhi-Hong and Nie, Jian-Yun and Tang, Jian},booktitle={International Conference on Learning Representations},year={2019},}
2018
EMNLP
Unsupervised Neural Word Segmentation for Chinese via Segmental Language Modeling
Zhiqing Sun, and Zhi-Hong Deng
In Proceedings of the 2018 Conference on Empirical Methods in Natural Language Processing, 2018
@inproceedings{sun2018unsupervised,title={Unsupervised Neural Word Segmentation for Chinese via Segmental Language Modeling},author={Sun, Zhiqing and Deng, Zhi-Hong},booktitle={Proceedings of the 2018 Conference on Empirical Methods in Natural Language Processing},pages={4915--4920},year={2018}}