Skip to content

Commit 3120753

Browse files
iceAndFireisFailedyuhongxiao
andauthored
feat: audio speech streaming (#56)
Co-authored-by: yuhongxiao <[email protected]>
1 parent 0ec7aac commit 3120753

File tree

5 files changed

+79
-0
lines changed

5 files changed

+79
-0
lines changed

core/src/main/java/ai/z/openapi/api/audio/AudioApi.java

Lines changed: 14 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -39,6 +39,20 @@ public interface AudioApi {
3939
@POST("audio/speech")
4040
Single<ResponseBody> audioSpeech(@Body AudioSpeechRequest request);
4141

42+
/**
43+
* Text-to-Speech (TTS) conversion using GLM-4-Voice Converts text input into
44+
* natural-sounding speech audio with emotion and tone control Supports multiple
45+
* voices, languages, speed adjustment, and various audio formats Features advanced
46+
* voice synthesis with customizable emotional expressions and dialects
47+
* @param request TTS parameters including text, voice selection, emotion, speed,
48+
* tone, and output format
49+
* @return Generated high-quality audio streaming in specified format with natural
50+
* prosody
51+
*/
52+
@Streaming
53+
@POST("audio/speech")
54+
Call<ResponseBody> audioSpeechStreaming(@Body AudioSpeechRequest request);
55+
4256
/**
4357
* Voice cloning and customization using advanced neural models Creates custom voice
4458
* models from provided audio samples with high fidelity Enables personalized speech

core/src/main/java/ai/z/openapi/service/audio/AudioService.java

Lines changed: 7 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -12,6 +12,13 @@ public interface AudioService {
1212
*/
1313
AudioSpeechResponse createSpeech(AudioSpeechRequest request);
1414

15+
/**
16+
* Creates speech from text using text-to-speech.
17+
* @param request the speech generation request
18+
* @return AudioSpeechStreamingResponse containing the generated speech streaming
19+
*/
20+
AudioSpeechStreamingResponse createStreamingSpeechStreaming(AudioSpeechRequest request);
21+
1522
/**
1623
* Creates customized speech with specific voice characteristics.
1724
* @param request the speech customization request

core/src/main/java/ai/z/openapi/service/audio/AudioServiceImpl.java

Lines changed: 8 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -6,6 +6,7 @@
66
import ai.z.openapi.utils.FlowableRequestSupplier;
77
import ai.z.openapi.utils.RequestSupplier;
88
import com.fasterxml.jackson.databind.ObjectMapper;
9+
import com.fasterxml.jackson.databind.node.ObjectNode;
910
import io.reactivex.rxjava3.core.Single;
1011
import lombok.extern.slf4j.Slf4j;
1112
import okhttp3.MediaType;
@@ -57,6 +58,13 @@ public AudioSpeechResponse createSpeech(AudioSpeechRequest request) {
5758
return this.zAiClient.executeRequest(request, supplier, AudioSpeechResponse.class);
5859
}
5960

61+
@Override
62+
public AudioSpeechStreamingResponse createStreamingSpeechStreaming(AudioSpeechRequest request) {
63+
validateSpeechParams(request);
64+
FlowableRequestSupplier<AudioSpeechRequest, retrofit2.Call<ResponseBody>> supplier = audioApi::audioSpeechStreaming;
65+
return this.zAiClient.streamRequest(request, supplier, AudioSpeechStreamingResponse.class, ObjectNode.class);
66+
}
67+
6068
@Override
6169
public AudioCustomizationResponse createCustomSpeech(AudioCustomizationRequest request) {
6270
validateCustomSpeechParams(request);
Lines changed: 27 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,27 @@
1+
package ai.z.openapi.service.audio;
2+
3+
import ai.z.openapi.core.model.ClientResponse;
4+
import ai.z.openapi.core.model.FlowableClientResponse;
5+
import ai.z.openapi.service.model.ChatError;
6+
import java.io.File;
7+
8+
import com.fasterxml.jackson.databind.node.ObjectNode;
9+
import io.reactivex.rxjava3.core.Flowable;
10+
import lombok.Data;
11+
12+
@Data
13+
public class AudioSpeechStreamingResponse implements FlowableClientResponse<ObjectNode> {
14+
15+
private int code;
16+
17+
private String msg;
18+
19+
private boolean success;
20+
21+
private ObjectNode data;
22+
23+
private ChatError error;
24+
25+
private Flowable<ObjectNode> flowable;
26+
27+
}

core/src/test/java/ai/z/openapi/service/audio/AudioServiceTest.java

Lines changed: 23 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -82,6 +82,29 @@ void shouldGenerateSpeechFromTextSuccessfully() throws JsonProcessingException {
8282
logger.info("Text-to-speech response: {}", mapper.writeValueAsString(response));
8383
}
8484

85+
@Test
86+
@DisplayName("Should generate speech streaming from text successfully")
87+
@EnabledIfEnvironmentVariable(named = "ZAI_API_KEY", matches = "^[^.]+\\.[^.]+$")
88+
void testAudioSpeechStreaming() {
89+
String requestId = String.format(REQUEST_ID_TEMPLATE, System.currentTimeMillis());
90+
AudioSpeechRequest audioSpeechRequest = AudioSpeechRequest.builder()
91+
.model(Constants.ModelTTS)
92+
.encodeFormat("base64")
93+
.input("Hello, this is a test for text-to-speech functionality.")
94+
.voice("female")
95+
.speed(1.0f)
96+
.volume(1.0f)
97+
.stream(Boolean.TRUE)
98+
.responseFormat("wav")
99+
.requestId(requestId)
100+
.build();
101+
AudioSpeechStreamingResponse audioSpeechStreamingApiResponse = audioService
102+
.createStreamingSpeechStreaming(audioSpeechRequest);
103+
audioSpeechStreamingApiResponse.getFlowable()
104+
.doOnNext(speechPro -> logger.info("speechPro: {}", speechPro.toString()))
105+
.blockingSubscribe();
106+
}
107+
85108
@Test
86109
@DisplayName("Should generate custom speech with voice cloning successfully")
87110
@EnabledIfEnvironmentVariable(named = "ZAI_API_KEY", matches = "^[^.]+\\.[^.]+$")

0 commit comments

Comments
 (0)