Publications
2025
-
MusicSem: A Dataset of Music Descriptions on Reddit Capturing Musical SemanticsRebecca Salganik, Teng Tu, Fei-Yueh Chen, and 8 more authors2025MusicSem is a 32k-pair language–audio dataset mined from organic Reddit discussions, designed to capture how listeners naturally describe music. The captions are organized into five semantic categories—descriptive, atmospheric, situational, metadata-related, and contextual—so models can be trained and evaluated for sensitivity to nuanced, human-centered intent in text-to-music tasks and cross-modal retrieval.
@demo{salganik2025musicsem, title = {MusicSem: A Dataset of Music Descriptions on Reddit Capturing Musical Semantics}, author = {Salganik, Rebecca and Tu, Teng and Chen, Fei-Yueh and Liu, Xiaohao and Lu, Kaifeng and Luvisia, Ethan and Duan, Zhiyao and Salha-Galvan, Guillaume and Kahng, Anson and Ma, Yunshan and Kang, Jian}, booktitle = {Demo, 26th International Society for Music Information Retrieval Conference}, year = {2025}, journal = {https://ismir2025program.ismir.net/lbd_388.html}, dataset = {https://huggingface.co/datasets/Rsalga/MusicSem}, }