Two master's thesis in collaboration between Audiocommunication Group and IRCAM
aimed at a parametric synthesis of crowd noises, more precisely
of many people speaking simultaneously (Grimaldi, 2016; Knörzer, 2017).
Using a concatenative approach, the resulting synthesis system
can be used to dynamically change the affective state of the
virtual crowd. The resulting algorithm was applied in
user studies in virtual acoustic environments.
Recordings
The corpus of speech was gathered in two group sessions,
each with five persons, in the anechoic chamber at TU Berlin.
For each speaker, the recording was annotated into
regions of different valence and arousal and then
segmented into syllables, automatically.
Features
Synthesis
The following example synthesizes a crowd
with a valence of -90 and an arousal of 80,
which can be categorized as frustrated, annoyed
or upset.
No virtual acoustic environment is used,
and the result is rather direct:
@inproceedings{grimaldi2017parametric,
author = "{Grimaldi, Vincent and Böhm, Christoph and Weinzierl, Stefan and von Coler, Henrik}",
title = "{Parametric Synthesis of Crowd Noises in Virtual Acoustic Environments}",
booktitle = "{Proceedings of the 142nd Audio Engineering Society Convention}",
year = "2017",
organization = "Audio Engineering Society",
location = "Berlin, Germany"
}
@mastersthesis{grimaldi2016parametric,
author = "Grimaldi, Vincent",
title = "Parametric crowd synthesis for virtualacoustic environments",
school = "IRCAM",
year = "2016"
}
@article{schwarz2006concatenative,
author = "Schwarz, Diemo",
journal = "Journal of New Music Research",
number = "1",
pages = "3–22",
publisher = "Taylor \\& Francis",
title = "{Concatenative sound synthesis: The early years}",
volume = "35",
year = "2006"
}
@inproceedings{Schwarz2006realtimecorpus,
author = "Schwarz, Diemo and Beller, Grégory and Verbrugghe, Bruno and Britton, Sam",
booktitle = "{In DAFx}",
title = "{Real-Time Corpus-Based Concatenative Synthesis with CataRT}",
year = "2006"
}
@inproceedings{schwarz2000,
author = "Schwarz, Diemo",
title = "{A System for Data-Driven Concatenative Sound Synthesis}",
booktitle = "{Proceedings of the COST-G6 Conference on Digital Audio Effects (DAFx-00)}",
year = "2000",
address = "Verona, Italy",
journal = "{Proceedings of the COST G-6 Conference on Digital Audio Effects (DAFx-00)}"
}
@inproceedings{charpentier1986diphone,
author = "{Charpentier}, F. and {Stella}, M.",
title = "Diphone synthesis using an overlap-add technique for speech waveforms concatenation",
booktitle = "ICASSP '86. IEEE International Conference on Acoustics, Speech, and Signal Processing",
year = "1986",
volume = "11",
pages = "2015-2018",
month = "April",
doi = "10.1109/ICASSP.1986.1168657",
keywords = "Speech synthesis;Signal synthesis;Linear predictive coding;Control system synthesis;Speech analysis;Filter bank;Signal analysis;Databases;Telecommunication control;Speech coding"
}