Веб-сайт самохостера Lotigara

summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
-rw-r--r--assets/opensb/binds/opensb.binds14
-rw-r--r--source/application/StarMainApplication_sdl.cpp4
-rw-r--r--source/client/StarClientApplication.cpp8
-rw-r--r--source/frontend/StarVoice.cpp279
-rw-r--r--source/frontend/StarVoice.hpp76
5 files changed, 322 insertions, 59 deletions
diff --git a/assets/opensb/binds/opensb.binds b/assets/opensb/binds/opensb.binds
index b8a13c2..2f84e37 100644
--- a/assets/opensb/binds/opensb.binds
+++ b/assets/opensb/binds/opensb.binds
@@ -1,7 +1,8 @@
{
"opensb": {
"groups": {
- "camera": { "name": "Camera" }
+ "camera": { "name": "Camera" },
+ "voice": { "name": "Voice" }
},
"name": "Open^#ebd74a;Starbound",
"binds": {
@@ -21,13 +22,10 @@
"group" : "camera",
"name": "Zoom Out"
},
- "test": {
- "default": [{
- "type": "key",
- "value": "C",
- "mods": ["LShift"]
- }],
- "name": "Test Bind"
+ "pushToTalk": {
+ "default": [],
+ "group" : "voice",
+ "name": "Push To Talk"
}
}
}
diff --git a/source/application/StarMainApplication_sdl.cpp b/source/application/StarMainApplication_sdl.cpp
index 86b9a10..1685cc0 100644
--- a/source/application/StarMainApplication_sdl.cpp
+++ b/source/application/StarMainApplication_sdl.cpp
@@ -344,8 +344,10 @@ public:
SDL_AudioSpec obtained = {};
m_sdlAudioInputDevice = SDL_OpenAudioDevice(name, 1, &desired, &obtained, 0);
- if (m_sdlAudioInputDevice)
+ if (m_sdlAudioInputDevice) {
Logger::info("Opened audio input device '{}'", SDL_GetAudioDeviceName(m_sdlAudioInputDevice, 1));
+ SDL_PauseAudioDevice(m_sdlAudioInputDevice, 0);
+ }
else
Logger::info("Failed to open audio input device: {}", SDL_GetError());
diff --git a/source/client/StarClientApplication.cpp b/source/client/StarClientApplication.cpp
index f1c7595..8093582 100644
--- a/source/client/StarClientApplication.cpp
+++ b/source/client/StarClientApplication.cpp
@@ -375,6 +375,14 @@ void ClientApplication::update() {
else if (m_state > MainAppState::Title)
updateRunning();
+ { // testing
+ m_voice->setLocalSpeaker(0);
+ m_voice->setInput(m_input->bindHeld("opensb", "pushToTalk"));
+ DataStreamBuffer data;
+ if (m_voice->send(data, 5000))
+ m_voice->receive(m_voice->speaker(0), std::string_view(data.ptr(), data.size()));
+ }
+
m_guiContext->cleanup();
m_edgeKeyEvents.clear();
m_input->reset();
diff --git a/source/frontend/StarVoice.cpp b/source/frontend/StarVoice.cpp
index e5cb299..fcfaa35 100644
--- a/source/frontend/StarVoice.cpp
+++ b/source/frontend/StarVoice.cpp
@@ -3,9 +3,9 @@
#include "StarApplicationController.hpp"
#include "StarTime.hpp"
#include "StarRoot.hpp"
+#include "StarLogging.hpp"
#include "opus/include/opus.h"
-#include <queue>
#include "SDL.h"
constexpr int VOICE_SAMPLE_RATE = 48000;
@@ -59,45 +59,10 @@ float getAudioLoudness(int16_t* data, size_t samples) {
return highest;
}
-struct VoiceAudioChunk {
- std::unique_ptr<int16_t[]> data;
- size_t remaining;
- size_t offset = 0;
-
- VoiceAudioChunk(int16_t* ptr, size_t size) {
- data.reset(ptr);
- remaining = size;
- offset = 0;
- }
-
- inline size_t takeSamples(std::vector<int16_t>& out, size_t count) {
- size_t toRead = std::min<size_t>(count, remaining);
- int16_t* start = data.get() + offset;
- out.insert(out.end(), start, start + toRead);
- offset += toRead;
- remaining -= toRead;
- return toRead;
- }
-
- //this one's unsafe
- inline int16_t takeSample() {
- --remaining;
- return *(data.get() + offset++);
- }
-
- inline bool exhausted() {
- return remaining == 0;
- }
-};
-
struct VoiceAudioStream {
// TODO: This should really be a ring buffer instead.
std::queue<VoiceAudioChunk> chunks{};
size_t samples = 0;
- atomic<bool> muted = false;
- atomic<bool> playing = false;
- atomic<float> decibelLevel = 0.0f;
- atomic<Array<float, 2>> channelVolumes = Array<float, 2>::filled(1.0f);
Mutex mutex;
@@ -225,8 +190,8 @@ void Voice::save() const {
}
void Voice::scheduleSave() {
- if (nextSaveTime == 0.0)
- nextSaveTime = Time::monotonicTime() + 2.0;
+ if (!m_nextSaveTime)
+ m_nextSaveTime = Time::monotonicMilliseconds() + 2000;
}
Voice::SpeakerPtr Voice::setLocalSpeaker(SpeakerId speakerId) {
@@ -248,19 +213,130 @@ Voice::SpeakerPtr Voice::speaker(SpeakerId speakerId) {
}
}
-void Voice::getAudioData(uint8_t* stream, int len) {
+void Voice::readAudioData(uint8_t* stream, int len) {
+ auto now = Time::monotonicMilliseconds();
+ if (!m_encoder || m_inputMode == VoiceInputMode::PushToTalk && now > m_lastInputTime)
+ return;
+
+ // Stop encoding if 2048 bytes have been encoded and not taken by the game thread yet
+ if (m_encodedChunksLength > 2048)
+ return;
+
+ size_t samples = len / 2;
+ float decibels = getAudioLoudness((int16_t*)stream, samples);
+ m_clientSpeaker->decibelLevel = decibels;
+
+ bool active = true;
+
+ if (m_inputMode == VoiceInputMode::VoiceActivity) {
+ bool aboveThreshold = decibels > m_threshold;
+ if (aboveThreshold)
+ m_lastThresholdTime = now;
+ active = now - m_lastThresholdTime < 50;
+ }
+
+ if (active) {
+ m_capturedChunksFrames += samples / m_deviceChannels;
+ auto data = (opus_int16*)malloc(len);
+ memcpy(data, stream, len);
+ m_capturedChunks.emplace(data, samples);
+ }
+ else { // Clear out any residual data so they don't manifest at the start of the next encode, whenever that is
+ while (!m_capturedChunks.empty())
+ m_capturedChunks.pop();
+
+ m_capturedChunksFrames = 0;
+ }
+
+ std::vector<opus_int16> takenSamples;
+ while (m_capturedChunksFrames >= VOICE_FRAME_SIZE) {
+ takenSamples.clear();
+ size_t samplesToTake = VOICE_FRAME_SIZE * (size_t)m_deviceChannels;
+ takenSamples.reserve(samplesToTake);
+
+ while (!m_capturedChunks.empty()) {
+ auto& front = m_capturedChunks.front();
+ if (front.exhausted())
+ m_capturedChunks.pop();
+ else if ((samplesToTake -= front.takeSamples(takenSamples, samplesToTake)) == 0)
+ break;
+ }
+ m_capturedChunksFrames -= VOICE_FRAME_SIZE;
+
+ ByteArray encodedData(VOICE_MAX_PACKET_SIZE, 0);
+ float vol = m_inputVolume;
+ if (m_inputVolume != 1.0f) {
+ for (size_t i = 0; i != takenSamples.size(); ++i)
+ takenSamples[i] *= m_inputVolume;
+ }
+
+
+ if (opus_int32 size = opus_encode(m_encoder.get(), takenSamples.data(), VOICE_FRAME_SIZE, (unsigned char*)encodedData.ptr(), VOICE_MAX_PACKET_SIZE)) {
+ if (size == 1)
+ continue;
+ encodedData.resize(size);
+ MutexLocker lock(m_captureMutex);
+ m_encodedChunks.emplace_back(move(encodedData)); // reset takes ownership of data buffer
+ m_encodedChunksLength += size;
+ Logger::info("Voice: encoded Opus chunk {} bytes big", size);
+ }
+ else if (size < 0) {
+ Logger::error("Voice: Opus encode error {}", opus_strerror(size));
+ }
+ }
}
-void Voice::mix(int16_t* buffer, size_t frames, unsigned channels) {
+void Voice::mix(int16_t* buffer, size_t samples, unsigned channels) {
+ static std::vector<int16_t> finalMixBuffer{};
+ static std::vector<int32_t> voiceMixBuffer{};
+ finalMixBuffer.resize(samples);
+ voiceMixBuffer.resize(samples);
+ int32_t* mixBuf = (int32_t*)memset(voiceMixBuffer.data(), 0, samples * sizeof(int32_t));
+ //read into buffer now
+ bool mix = false;
+ {
+ MutexLocker lock(m_activeSpeakersMutex);
+ auto it = m_activeSpeakers.begin();
+ while (it != m_activeSpeakers.end()) {
+ SpeakerPtr const& speaker = *it;
+ VoiceAudioStream* audio = speaker->audioStream.get();
+ MutexLocker audioLock(audio->mutex);
+ if (!audio->empty()) {
+ if (!speaker->muted) {
+ mix = true;
+ auto channelVolumes = speaker->channelVolumes.load();
+ for (size_t i = 0; i != samples; ++i)
+ mixBuf[i] += (int32_t)(audio->getSample()) * channelVolumes[i % 2];
+ }
+ else {
+ for (size_t i = 0; i != samples; ++i)
+ audio->getSample();
+ }
+ ++it;
+ }
+ else {
+ speaker->playing = false;
+ it = m_activeSpeakers.erase(it);
+ }
+ }
+ }
+ if (mix) {
+ int16_t* finBuf = finalMixBuffer.data();
+
+ float vol = m_outputVolume;
+ for (size_t i = 0; i != samples; ++i)
+ finBuf[i] = (int16_t)std::clamp<int>(mixBuf[i] * vol, INT16_MIN, INT16_MAX);
+ SDL_MixAudioFormat((Uint8*)buffer, (Uint8*)finBuf, AUDIO_S16, samples * sizeof(int16_t), SDL_MIX_MAXVOLUME);
+ }
}
void Voice::update(PositionalAttenuationFunction positionalAttenuationFunction) {
if (positionalAttenuationFunction) {
for (auto& entry : m_speakers) {
if (SpeakerPtr& speaker = entry.second) {
- speaker->audioStream->channelVolumes = {
+ speaker->channelVolumes = {
positionalAttenuationFunction(0, speaker->position, 1.0f),
positionalAttenuationFunction(1, speaker->position, 1.0f)
};
@@ -268,9 +344,8 @@ void Voice::update(PositionalAttenuationFunction positionalAttenuationFunction)
}
}
- auto now = Time::monotonicTime();
- if (now > nextSaveTime) {
- nextSaveTime = 0.0;
+ if (Time::monotonicMilliseconds() > m_nextSaveTime) {
+ m_nextSaveTime = 0;
save();
}
}
@@ -285,6 +360,97 @@ void Voice::setDeviceName(Maybe<String> deviceName) {
openDevice();
}
+int Voice::send(DataStreamBuffer& out, size_t budget) {
+ out.setByteOrder(ByteOrder::LittleEndian);
+ out.write<uint16_t>(VOICE_VERSION);
+ MutexLocker captureLock(m_captureMutex);
+
+ if (!m_encoder || m_capturedChunks.empty())
+ return 0;
+
+ std::vector<ByteArray> encodedChunks = move(m_encodedChunks);
+ size_t encodedChunksLength = m_encodedChunksLength;
+ m_encodedChunksLength = 0;
+ captureLock.unlock();
+
+ for (auto& chunk : encodedChunks) {
+ out.write<uint32_t>(chunk.size());
+ out.writeBytes(chunk);
+ if ((budget -= min<size_t>(budget, chunk.size())) == 0)
+ break;
+ }
+
+ m_lastSentTime = Time::monotonicMilliseconds();
+ return 1;
+}
+
+bool Voice::receive(SpeakerPtr speaker, std::string_view view) {
+ if (!speaker || view.empty())
+ return false;
+
+ try {
+ DataStreamExternalBuffer reader(view.data(), view.size());
+ reader.setByteOrder(ByteOrder::LittleEndian);
+
+ if (reader.read<uint16_t>() > VOICE_VERSION)
+ return false;
+
+ uint32_t opusLength = 0;
+ while (!reader.atEnd()) {
+ reader >> opusLength;
+ auto opusData = (unsigned char*)reader.ptr() + reader.pos();
+ reader.seek(opusLength, IOSeek::Relative);
+
+ int channels = opus_packet_get_nb_channels(opusData);
+ if (channels == OPUS_INVALID_PACKET)
+ continue;
+
+ bool mono = channels == 1;
+ OpusDecoder* decoder = mono ? speaker->decoderMono.get() : speaker->decoderStereo.get();
+ int samples = opus_decoder_get_nb_samples(decoder, opusData, opusLength);
+ if (samples < 0)
+ throw VoiceException(strf("Decoder error: {}", opus_strerror(samples)), false);
+
+ size_t decodeBufferSize = samples * sizeof(opus_int16) * (size_t)channels;
+ opus_int16* decodeBuffer = (opus_int16*)malloc(decodeBufferSize);
+
+ int decodedSamples = opus_decode(decoder, opusData, opusLength, decodeBuffer, decodeBufferSize, 0);
+ if (decodedSamples < 0) {
+ free(decodeBuffer);
+ throw VoiceException(strf("Decoder error: {}", opus_strerror(samples)), false);
+ }
+
+ static auto getCVT = [](int channels) -> SDL_AudioCVT {
+ SDL_AudioCVT cvt;
+ SDL_BuildAudioCVT(&cvt, AUDIO_S16SYS, channels, VOICE_SAMPLE_RATE, AUDIO_S16, 2, 44100);
+ return cvt;
+ };
+
+ //TODO: This isn't the best way to resample to 44100 hz because SDL_ConvertAudio is not for streamed audio.
+ static SDL_AudioCVT monoCVT = getCVT(1);
+ static SDL_AudioCVT stereoCVT = getCVT(2);
+ SDL_AudioCVT& cvt = mono ? monoCVT : stereoCVT;
+ cvt.len = decodedSamples * sizeof(opus_int16) * (size_t)channels;
+ cvt.buf = (Uint8*)realloc(decodeBuffer, (size_t)(cvt.len * cvt.len_mult));
+ SDL_ConvertAudio(&cvt);
+
+ size_t reSamples = (size_t)cvt.len_cvt / 2;
+ speaker->decibelLevel = getAudioLoudness((int16_t*)cvt.buf, reSamples);
+ speaker->audioStream->take((opus_int16*)realloc(cvt.buf, cvt.len_cvt), reSamples);
+ playSpeaker(speaker, channels);
+ }
+ return true;
+ }
+ catch (StarException const& e) {
+ Logger::error("Voice: Error receiving voice data for speaker #{} ('{}'): {}", speaker->speakerId, speaker->name, e.what());
+ return false;
+ }
+}
+
+void Voice::setInput(bool input) {
+ m_lastInputTime = input ? Time::monotonicMilliseconds() + 1000 : 0;
+}
+
OpusDecoder* Voice::createDecoder(int channels) {
int error;
OpusDecoder* decoder = opus_decoder_create(VOICE_SAMPLE_RATE, channels, &error);
@@ -312,9 +478,17 @@ void Voice::resetEncoder() {
void Voice::openDevice() {
closeDevice();
- m_applicationController->openAudioInputDevice(m_deviceName ? m_deviceName->utf8Ptr() : nullptr, VOICE_SAMPLE_RATE, encoderChannels(), this, [](void* userdata, uint8_t* stream, int len) {
- ((Voice*)(userdata))->getAudioData(stream, len);
- });
+
+
+ m_applicationController->openAudioInputDevice(
+ m_deviceName ? m_deviceName->utf8Ptr() : nullptr,
+ VOICE_SAMPLE_RATE,
+ m_deviceChannels = encoderChannels(),
+ this,
+ [](void* userdata, uint8_t* stream, int len) {
+ ((Voice*)(userdata))->readAudioData(stream, len);
+ }
+ );
m_deviceOpen = true;
}
@@ -328,4 +502,15 @@ void Voice::closeDevice() {
m_deviceOpen = false;
}
+bool Voice::playSpeaker(SpeakerPtr const& speaker, int channels) {
+ unsigned int minSamples = speaker->minimumPlaySamples * channels;
+ if (speaker->playing || speaker->audioStream->samples < minSamples)
+ return false;
+
+ speaker->playing = true;
+ MutexLocker lock(m_activeSpeakersMutex);
+ m_activeSpeakers.insert(speaker);
+ return true;
+}
+
} \ No newline at end of file
diff --git a/source/frontend/StarVoice.hpp b/source/frontend/StarVoice.hpp
index 269adb4..e7ecd80 100644
--- a/source/frontend/StarVoice.hpp
+++ b/source/frontend/StarVoice.hpp
@@ -6,8 +6,11 @@
#include "StarGameTypes.hpp"
#include "StarMaybe.hpp"
#include "StarThread.hpp"
+#include "StarDataStreamDevices.hpp"
#include "StarApplicationController.hpp"
+#include <queue>
+
struct OpusDecoder;
typedef std::unique_ptr<OpusDecoder, void(*)(OpusDecoder*)> OpusDecoderPtr;
struct OpusEncoder;
@@ -27,6 +30,36 @@ STAR_CLASS(Voice);
STAR_CLASS(VoiceAudioStream);
STAR_CLASS(ApplicationController);
+struct VoiceAudioChunk {
+ std::unique_ptr<int16_t[]> data;
+ size_t remaining;
+ size_t offset = 0;
+
+ VoiceAudioChunk(int16_t* ptr, size_t size) {
+ data.reset(ptr);
+ remaining = size;
+ offset = 0;
+ }
+
+ inline size_t takeSamples(std::vector<int16_t>& out, size_t count) {
+ size_t toRead = std::min<size_t>(count, remaining);
+ int16_t* start = data.get() + offset;
+ out.insert(out.end(), start, start + toRead);
+ offset += toRead;
+ remaining -= toRead;
+ return toRead;
+ }
+
+ //this one's unsafe
+ inline int16_t takeSample() {
+ --remaining;
+ return *(data.get() + offset++);
+ }
+
+ inline bool exhausted() { return remaining == 0; }
+};
+
+
class Voice {
public:
// Individual speakers are represented by their connection ID.
@@ -45,6 +78,13 @@ public:
VoiceAudioStreamPtr audioStream;
Mutex mutex;
+ atomic<bool> muted = false;
+ atomic<bool> playing = false;
+ atomic<float> decibelLevel = 0.0f;
+ atomic<Array<float, 2>> channelVolumes = Array<float, 2>::filled(1.0f);
+
+ unsigned int minimumPlaySamples = 4096;
+
Speaker(SpeakerId speakerId);
};
@@ -77,7 +117,7 @@ public:
SpeakerPtr speaker(SpeakerId speakerId);
// Called when receiving input audio data from SDL, on its own thread.
- void getAudioData(uint8_t* stream, int len);
+ void readAudioData(uint8_t* stream, int len);
// Called to mix voice audio with the game.
void mix(int16_t* buffer, size_t frames, unsigned channels);
@@ -87,6 +127,12 @@ public:
void setDeviceName(Maybe<String> device);
+ int send(DataStreamBuffer& out, size_t budget);
+ bool receive(SpeakerPtr speaker, std::string_view view);
+
+ // Must be called every frame with input state, expires after 1s.
+ void setInput(bool input = true);
+
inline int encoderChannels() const {
return m_channelMode == VoiceChannelMode::Mono ? 1 : 2;
}
@@ -99,10 +145,13 @@ private:
void openDevice();
void closeDevice();
+ bool playSpeaker(SpeakerPtr const& speaker, int channels);
+
SpeakerId m_speakerId = 0;
SpeakerPtr m_clientSpeaker;
HashMap<SpeakerId, SpeakerPtr> m_speakers;
+ Mutex m_activeSpeakersMutex;
HashSet<SpeakerPtr> m_activeSpeakers;
OpusEncoderPtr m_encoder;
@@ -110,10 +159,15 @@ private:
float m_outputVolume = 1.0f;
float m_inputVolume = 1.0f;
float m_threshold = -50.0f;
-
+
+ int64_t m_lastSentTime = 0;
+ int64_t m_lastInputTime = 0;
+ int64_t m_lastThresholdTime = 0;
+ int64_t m_nextSaveTime = 0;
bool m_enabled = true;
bool m_inputEnabled = true;
+ int m_deviceChannels = 1;
bool m_deviceOpen = false;
Maybe<String> m_deviceName;
VoiceInputMode m_inputMode;
@@ -121,7 +175,23 @@ private:
ApplicationControllerPtr m_applicationController;
- double nextSaveTime = 0.0f;
+ struct EncodedChunk {
+ std::unique_ptr<unsigned char[]> data;
+ size_t size;
+
+ EncodedChunk(unsigned char* _data, size_t len) {
+ data.reset(_data);
+ size = len;
+ }
+ };
+
+ std::vector<ByteArray> m_encodedChunks;
+ size_t m_encodedChunksLength = 0;
+
+ std::queue<VoiceAudioChunk> m_capturedChunks;
+ size_t m_capturedChunksFrames = 0;
+
+ Mutex m_captureMutex;
};
}