| @misc{christop2025clonevalopenvoicecloning, | |
| title={{ClonEval: An Open Voice Cloning Benchmark}}, | |
| author={Iwona Christop and Tomasz Kuczyński and Marek Kubis}, | |
| year={2025}, | |
| eprint={2504.20581}, | |
| archivePrefix={arXiv}, | |
| primaryClass={cs.CL}, | |
| url={https://arxiv.org/abs/2504.20581}, | |
| } | |
| @article{crema-d, | |
| author={Cao, Houwei and Cooper, David G. and Keutmann, Michael K. and Gur, Ruben C. and Nenkova, Ani and Verma, Ragini}, | |
| journal={IEEE Transactions on Affective Computing}, | |
| title={{CREMA-D: Crowd-Sourced Emotional Multimodal Actors Dataset}}, | |
| year={2014}, | |
| volume={5}, | |
| number={4}, | |
| pages={377--390}, | |
| doi={10.1109/TAFFC.2014.2336244}, | |
| } | |
| @inproceedings{librispeech2015, | |
| author={Panayotov, Vassil and Chen, Guoguo and Povey, Daniel and Khudanpur, Sanjeev}, | |
| booktitle={2015 IEEE International Conference on Acoustics, Speech and Signal Processing (ICASSP)}, | |
| title={{Librispeech: An ASR corpus based on public domain audio books}}, | |
| year={2015}, | |
| pages={5206-5210}, | |
| keywords={Resource description framework;Genomics;Bioinformatics;Blogs;Information services;Electronic publishing;Speech Recognition;Corpus;LibriVox}, | |
| doi={10.1109/ICASSP.2015.7178964} | |
| } | |
| @article{ravdess, | |
| doi={10.1371/journal.pone.0196391}, | |
| author={Livingstone, Steven R. AND Russo, Frank A.}, | |
| journal={PLOS ONE}, | |
| publisher={Public Library of Science}, | |
| title={{The Ryerson Audio-Visual Database of Emotional Speech and Song (RAVDESS): A dynamic, multimodal set of facial and vocal expressions in North American English}}", | |
| year={2018}, | |
| month=may, | |
| volume={13}, | |
| URL={https://doi.org/10.1371/journal.pone.0196391}, | |
| pages={1--35}, | |
| number={5}, | |
| } | |
| @inbook{savee, | |
| author={Haq, S. and Jackson, P. J. B.}, | |
| booktitle={{Machine Audition: Principles, Algorithms and Systems}}, | |
| title={{Multimodal Emotion Recognition}}, | |
| publisher={IGI Global}, | |
| address={Hershey PA}, | |
| year={2010}, | |
| month=aug, | |
| editor={Wang, W.}, | |
| pages={398--423}, | |
| } | |
| @misc{tess, | |
| author={Pichora-Fuller, M. Kathleen and Dupuis, Kate}, | |
| publisher={Borealis}, | |
| title={{Toronto emotional speech set (TESS)}}, | |
| year={2020}, | |
| version={DRAFT VERSION}, | |
| doi={10.5683/SP2/E8H2MF}, | |
| URL={https://doi.org/10.5683/SP2/E8H2MF}, | |
| } |