This data is transcribed speech data, in Amharic and Swahili and Wolof.
This repository is a result of the ALFFA project http://alffa.imag.fr
A summary of these resources and ASR performances, as well as a description of the ALFFA project has been published in the following paper:
Collecting Resources in Sub-Saharan African Languages for Automatic Speech Recognition: a Case Study of Wolof. Elodie Gauthier, Laurent Besacier, Sylvie Voisin, Michael Melese and Uriel Pascal Elingui. To appear at LREC 2016.So far, the ASR directory contains Kaldi recipes for 4 languages : Amharic, Swahili, Hausa and Wolof.
@InProceedings {Abate2005,
Author = {Solomon Teferra Abate and Wolfgang Menzel and Bairu Tafila},
booktitle = {INTERSPEECH-2005},
Title = {An Amharic Speech Corpus for Large Vocabulary Continuous Speech Recognition},
Year = {2005}
}
@InProceedings {gelas:hal-00954048,
author = {Gelas, Hadrien and Besacier, Laurent and Pellegrino, Francois},
title = {{D}evelopments of {S}wahili resources for an automatic speech recognition system},
booktitle = {{SLTU} - {W}orkshop on {S}poken {L}anguage {T}echnologies for {U}nder-{R}esourced {L}anguages},
year = {2012},
address = {Cape-Town, Afrique Du Sud},
abstract = {no abstract},
x-international-audience = {yes},
url = {http://hal.inria.fr/hal-00954048}
}
@article {gauthier2016collect,
Author = {Gauthier, Elodie and Besacier, Laurent and Voisin, Sylvie and Melese, Michael and Elingui, Uriel Pascal},
Journal = {LREC},
Title = {Collecting Resources in Sub-Saharan African Languages for Automatic Speech Recognition: a Case Study of Wolof},
Year = {2016}
}