I am making an audio player using XAudio2. We transmit data in packets of 640 bytes with a sampling frequency of 8000 Hz and a sampling depth of 16 bytes. We use SlimDX to access XAudio2.
But when playing sound, we notice that the sound quality is poor. This is, for example, a 3 kHz sine curve captured by Audacity. 
I configured the audio player for bare basics, but the sound quality is still bad. Is this a bug in XAudio2, SlimDX or my code, or is it just an artifact that occurs when switching from 8 kHz to 44.1 kHz? The latter seems unreasonable, since we also create wav files for PCM that play nicely with Windows Media Player.
Below is a basic implementation that generates a broken sine.
public partial class MainWindow : Window { private XAudio2 device = new XAudio2(); private WaveFormatExtensible format = new WaveFormatExtensible(); private SourceVoice sourceVoice = null; private MasteringVoice masteringVoice = null; private Guid KSDATAFORMAT_SUBTYPE_PCM = new Guid("00000001-0000-0010-8000-00aa00389b71"); private AutoResetEvent BufferReady = new AutoResetEvent(false); private PlayBufferPool PlayBuffers = new PlayBufferPool(); public MainWindow() { InitializeComponent(); Closing += OnClosing; format.Channels = 1; format.BitsPerSample = 16; format.FormatTag = WaveFormatTag.Extensible; format.BlockAlignment = (short)(format.Channels * (format.BitsPerSample / 8)); format.SamplesPerSecond = 8000; format.AverageBytesPerSecond = format.SamplesPerSecond * format.BlockAlignment; format.SubFormat = KSDATAFORMAT_SUBTYPE_PCM; } private void OnClosing(object sender, CancelEventArgs cancelEventArgs) { sourceVoice.Stop(); sourceVoice.Dispose(); masteringVoice.Dispose(); PlayBuffers.Dispose(); } private void button_Click(object sender, RoutedEventArgs e) { masteringVoice = new MasteringVoice(device); PlayBuffer buffer = PlayBuffers.NextBuffer(); GenerateSine(buffer.Buffer); buffer.AudioBuffer.AudioBytes = 640; sourceVoice = new SourceVoice(device, format, VoiceFlags.None, 8); sourceVoice.BufferStart += new EventHandler<ContextEventArgs>(sourceVoice_BufferStart); sourceVoice.BufferEnd += new EventHandler<ContextEventArgs>(sourceVoice_BufferEnd); sourceVoice.SubmitSourceBuffer(buffer.AudioBuffer); sourceVoice.Start(); } private void sourceVoice_BufferEnd(object sender, ContextEventArgs e) { BufferReady.Set(); } private void sourceVoice_BufferStart(object sender, ContextEventArgs e) { BufferReady.WaitOne(1000); PlayBuffer nextBuffer = PlayBuffers.NextBuffer(); nextBuffer.DataStream.Position = 0; nextBuffer.AudioBuffer.AudioBytes = 640; GenerateSine(nextBuffer.Buffer); Result r = sourceVoice.SubmitSourceBuffer(nextBuffer.AudioBuffer); } private void GenerateSine(byte[] buffer) { double sampleRate = 8000.0; double amplitude = 0.25 * short.MaxValue; double frequency = 3000.0; for (int n = 0; n < buffer.Length / 2; n++) { short[] s = { (short)(amplitude * Math.Sin((2 * Math.PI * n * frequency) / sampleRate)) }; Buffer.BlockCopy(s, 0, buffer, n * 2, 2); } } } public class PlayBuffer : IDisposable { #region Private variables private IntPtr BufferPtr; private GCHandle BufferHandle; #endregion #region Constructors public PlayBuffer() { Index = 0; Buffer = new byte[640 * 4]; // 640 = 30ms BufferHandle = GCHandle.Alloc(this.Buffer, GCHandleType.Pinned); BufferPtr = new IntPtr(BufferHandle.AddrOfPinnedObject().ToInt32()); DataStream = new DataStream(BufferPtr, 640 * 4, true, false); AudioBuffer = new AudioBuffer(); AudioBuffer.AudioData = DataStream; } public PlayBuffer(int index) : this() { Index = index; } #endregion #region Destructor ~PlayBuffer() { Dispose(); } #endregion #region Properties protected int Index { get; private set; } public byte[] Buffer { get; private set; } public DataStream DataStream { get; private set; } public AudioBuffer AudioBuffer { get; private set; } #endregion #region Public functions public void Dispose() { if (AudioBuffer != null) { AudioBuffer.Dispose(); AudioBuffer = null; } if (DataStream != null) { DataStream.Dispose(); DataStream = null; } } #endregion } public class PlayBufferPool : IDisposable { #region Private variables private int _currentIndex = -1; private PlayBuffer[] _buffers = new PlayBuffer[2]; #endregion #region Constructors public PlayBufferPool() { for (int i = 0; i < 2; i++) Buffers[i] = new PlayBuffer(i); } #endregion #region Desctructor ~PlayBufferPool() { Dispose(); } #endregion #region Properties protected int CurrentIndex { get { return _currentIndex; } set { _currentIndex = value; } } protected PlayBuffer[] Buffers { get { return _buffers; } set { _buffers = value; } } #endregion #region Public functions public void Dispose() { for (int i = 0; i < Buffers.Length; i++) { if (Buffers[i] == null) continue; Buffers[i].Dispose(); Buffers[i] = null; } } public PlayBuffer NextBuffer() { CurrentIndex = (CurrentIndex + 1) % Buffers.Length; return Buffers[CurrentIndex]; } #endregion }
Additional information:
Used to play recorded voice with various compression, such as ALAW, μLAW or TrueSpeech. Data is sent in small packets, decoded and sent to this player. It is for this reason that we use such a low sampling rate and small buffers. However, there are no problems with our data, since creating a WAV file with data leads to the ideal reproduction of WMP or VLC.
edit: We now “solved” this by rewriting the player in NAudio. I would still be interested in everything that happens here. Is this our approach in PlayBuffers, or is it just a bug / limitation in DirectX or wrappers? I tried using SharpDX instead of SlimDX, but that didn't change anything.