如何将 ibm watson 语音中的说话人标签更改为 android 中的文本 api?
How can i change the speaker label in ibm watson speech to text api in android?
美好的一天!
我在开发 android 应用程序时需要帮助。是否可以将扬声器标签的默认名称更改为自定义名称,如人名。我想创建这样的对话作为我的例子..
Jhon: Hi
Marie: Hello
Jhon: Good Day To you marie..
Marie:......
请帮助我需要帮助是否可以更改扬声器标签的默认名称我需要你的帮助
我是这样创建的
private RecognizeOptions getRecognizeOptions(InputStream captureStream) {
return new RecognizeOptions.Builder()
.audio(captureStream)
.contentType(ContentType.OPUS.toString())
.model("en-US_BroadbandModel")
.interimResults(true)
.inactivityTimeout(2000)
.timestamps(true)
.speakerLabels(true)
.maxAlternatives(3)
.smartFormatting(true)
.timestamps(true)
.wordConfidence(true)
.build();
}
这是.speakerLabels的方法
public class SpeakerLabelsDiarization {
public static class RecoToken {
private Double startTime;
private Double endTime;
private Long speaker;
private String word;
private Boolean spLabelIsFinal;
/**
* Instantiates a new reco token.
*
* @param speechTimestamp the speech timestamp
*/
RecoToken(SpeechTimestamp speechTimestamp) {
startTime = speechTimestamp.getStartTime();
endTime = speechTimestamp.getEndTime();
word = speechTimestamp.getWord();
}
/**
* Instantiates a new reco token.
*
* @param speakerLabel the speaker label
*/
RecoToken(SpeakerLabelsResult speakerLabel) {
startTime = Double.valueOf(speakerLabel.getFrom());
endTime = Double.valueOf(speakerLabel.getTo());
speaker = speakerLabel.getSpeaker();
}
/**
* Update from.
*
* @param speechTimestamp the speech timestamp
*/
public void updateFrom(SpeechTimestamp speechTimestamp) {
word = speechTimestamp.getWord();
}
/**
* Update from.
*
* @param speakerLabel the speaker label
*/
public void updateFrom(SpeakerLabelsResult speakerLabel) {
speaker = speakerLabel.getSpeaker();
}
}
/**
* The Class Utterance.
*/
public static class Utterance {
private Integer speaker;
private String transcript = "";
/**
* Instantiates a new utterance.
*
* @param speaker the speaker
* @param transcript the transcript
*/
public Utterance(final Integer speaker, final String transcript) {
this.speaker = speaker;
this.transcript = transcript;
}
}
/**
* The Class RecoTokens.
*/
public static class RecoTokens {
private Map<Double, RecoToken> recoTokenMap;
/**
* Instantiates a new reco tokens.
*/
public RecoTokens() {
recoTokenMap = new LinkedHashMap<Double, RecoToken>();
}
/**
* Adds the.
*
* @param speechResults the speech results
*/
public void add(SpeechRecognitionResults speechResults) {
if (speechResults.getResults() != null)
for (int i = 0; i < speechResults.getResults().size(); i++) {
SpeechRecognitionResult transcript = speechResults.getResults().get(i);
if (transcript.isFinalResults()) {
SpeechRecognitionAlternative speechAlternative = transcript.getAlternatives().get(0);
for (int ts = 0; ts < speechAlternative.getTimestamps().size(); ts++) {
SpeechTimestamp speechTimestamp = speechAlternative.getTimestamps().get(ts);
add(speechTimestamp);
}
}
}
if (speechResults.getSpeakerLabels() != null)
for (int i = 0; i < speechResults.getSpeakerLabels().size(); i++) {
add(speechResults.getSpeakerLabels().get(i));
}
}
/**
* Adds the.
*
* @param speechTimestamp the speech timestamp
*/
public void add(SpeechTimestamp speechTimestamp) {
RecoToken recoToken = recoTokenMap.get(speechTimestamp.getStartTime());
if (recoToken == null) {
recoToken = new RecoToken(speechTimestamp);
recoTokenMap.put(speechTimestamp.getStartTime(), recoToken);
} else {
recoToken.updateFrom(speechTimestamp);
}
}
/**
* Adds the.
*
* @param speakerLabel the speaker label
*/
public void add(SpeakerLabelsResult speakerLabel) {
RecoToken recoToken = recoTokenMap.get(speakerLabel.getFrom());
if (recoToken == null) {
recoToken = new RecoToken(speakerLabel);
recoTokenMap.put(Double.valueOf(speakerLabel.getFrom()), recoToken);
} else {
recoToken.updateFrom(speakerLabel);
}
if (speakerLabel.isFinalResults()) {
markTokensBeforeAsFinal(speakerLabel.getFrom());
report();
cleanFinal();
}
}
private void markTokensBeforeAsFinal(Float from) {
Map<Double, RecoToken> recoTokenMap = new LinkedHashMap<>();
for (RecoToken rt : recoTokenMap.values()) {
if (rt.startTime <= from)
rt.spLabelIsFinal = true;
}
}
/**
* Report.
*/
public void report() {
List<Utterance> uttterances = new ArrayList<Utterance>();
Utterance currentUtterance = new Utterance(0, "");
for (RecoToken rt : recoTokenMap.values()) {
if (currentUtterance.speaker != Math.toIntExact(rt.speaker)) {
uttterances.add(currentUtterance);
currentUtterance = new Utterance(Math.toIntExact(rt.speaker), "");
}
currentUtterance.transcript = currentUtterance.transcript + rt.word + " ";
}
uttterances.add(currentUtterance);
String result = GsonSingleton.getGson().toJson(uttterances);
System.out.println(result);
}
private void cleanFinal() {
Set<Map.Entry<Double, RecoToken>> set = recoTokenMap.entrySet();
for (Map.Entry<Double, RecoToken> e : set) {
if (e.getValue().spLabelIsFinal) {
recoTokenMap.remove(e.getKey());
}
}
}
}
private static CountDownLatch lock = new CountDownLatch(1);
}
里面的输出是这样的`
speaker 0: Hi
speaker 1: Hello
speaker 0: Good Day To you marie..
speaker 1:......
我想这样输出
Jhon: Hi
Marie: Hello
Jhon: Good Day To you marie..
Marie:......
我的问题是。是否可以像在 ibm watspon 语音转文本中那样创建 api 因为我在他们的文档中读到他们没有提到如何更改标签我只是想澄清一下这是否可能
API 和文档中没有任何内容表明可以使用服务本身修改输出中的标签。 https://cloud.ibm.com/docs/services/speech-to-text/output.html#speaker_labels
美好的一天!
我在开发 android 应用程序时需要帮助。是否可以将扬声器标签的默认名称更改为自定义名称,如人名。我想创建这样的对话作为我的例子..
Jhon: Hi
Marie: Hello
Jhon: Good Day To you marie..
Marie:......
请帮助我需要帮助是否可以更改扬声器标签的默认名称我需要你的帮助 我是这样创建的
private RecognizeOptions getRecognizeOptions(InputStream captureStream) {
return new RecognizeOptions.Builder()
.audio(captureStream)
.contentType(ContentType.OPUS.toString())
.model("en-US_BroadbandModel")
.interimResults(true)
.inactivityTimeout(2000)
.timestamps(true)
.speakerLabels(true)
.maxAlternatives(3)
.smartFormatting(true)
.timestamps(true)
.wordConfidence(true)
.build();
}
这是.speakerLabels的方法
public class SpeakerLabelsDiarization {
public static class RecoToken {
private Double startTime;
private Double endTime;
private Long speaker;
private String word;
private Boolean spLabelIsFinal;
/**
* Instantiates a new reco token.
*
* @param speechTimestamp the speech timestamp
*/
RecoToken(SpeechTimestamp speechTimestamp) {
startTime = speechTimestamp.getStartTime();
endTime = speechTimestamp.getEndTime();
word = speechTimestamp.getWord();
}
/**
* Instantiates a new reco token.
*
* @param speakerLabel the speaker label
*/
RecoToken(SpeakerLabelsResult speakerLabel) {
startTime = Double.valueOf(speakerLabel.getFrom());
endTime = Double.valueOf(speakerLabel.getTo());
speaker = speakerLabel.getSpeaker();
}
/**
* Update from.
*
* @param speechTimestamp the speech timestamp
*/
public void updateFrom(SpeechTimestamp speechTimestamp) {
word = speechTimestamp.getWord();
}
/**
* Update from.
*
* @param speakerLabel the speaker label
*/
public void updateFrom(SpeakerLabelsResult speakerLabel) {
speaker = speakerLabel.getSpeaker();
}
}
/**
* The Class Utterance.
*/
public static class Utterance {
private Integer speaker;
private String transcript = "";
/**
* Instantiates a new utterance.
*
* @param speaker the speaker
* @param transcript the transcript
*/
public Utterance(final Integer speaker, final String transcript) {
this.speaker = speaker;
this.transcript = transcript;
}
}
/**
* The Class RecoTokens.
*/
public static class RecoTokens {
private Map<Double, RecoToken> recoTokenMap;
/**
* Instantiates a new reco tokens.
*/
public RecoTokens() {
recoTokenMap = new LinkedHashMap<Double, RecoToken>();
}
/**
* Adds the.
*
* @param speechResults the speech results
*/
public void add(SpeechRecognitionResults speechResults) {
if (speechResults.getResults() != null)
for (int i = 0; i < speechResults.getResults().size(); i++) {
SpeechRecognitionResult transcript = speechResults.getResults().get(i);
if (transcript.isFinalResults()) {
SpeechRecognitionAlternative speechAlternative = transcript.getAlternatives().get(0);
for (int ts = 0; ts < speechAlternative.getTimestamps().size(); ts++) {
SpeechTimestamp speechTimestamp = speechAlternative.getTimestamps().get(ts);
add(speechTimestamp);
}
}
}
if (speechResults.getSpeakerLabels() != null)
for (int i = 0; i < speechResults.getSpeakerLabels().size(); i++) {
add(speechResults.getSpeakerLabels().get(i));
}
}
/**
* Adds the.
*
* @param speechTimestamp the speech timestamp
*/
public void add(SpeechTimestamp speechTimestamp) {
RecoToken recoToken = recoTokenMap.get(speechTimestamp.getStartTime());
if (recoToken == null) {
recoToken = new RecoToken(speechTimestamp);
recoTokenMap.put(speechTimestamp.getStartTime(), recoToken);
} else {
recoToken.updateFrom(speechTimestamp);
}
}
/**
* Adds the.
*
* @param speakerLabel the speaker label
*/
public void add(SpeakerLabelsResult speakerLabel) {
RecoToken recoToken = recoTokenMap.get(speakerLabel.getFrom());
if (recoToken == null) {
recoToken = new RecoToken(speakerLabel);
recoTokenMap.put(Double.valueOf(speakerLabel.getFrom()), recoToken);
} else {
recoToken.updateFrom(speakerLabel);
}
if (speakerLabel.isFinalResults()) {
markTokensBeforeAsFinal(speakerLabel.getFrom());
report();
cleanFinal();
}
}
private void markTokensBeforeAsFinal(Float from) {
Map<Double, RecoToken> recoTokenMap = new LinkedHashMap<>();
for (RecoToken rt : recoTokenMap.values()) {
if (rt.startTime <= from)
rt.spLabelIsFinal = true;
}
}
/**
* Report.
*/
public void report() {
List<Utterance> uttterances = new ArrayList<Utterance>();
Utterance currentUtterance = new Utterance(0, "");
for (RecoToken rt : recoTokenMap.values()) {
if (currentUtterance.speaker != Math.toIntExact(rt.speaker)) {
uttterances.add(currentUtterance);
currentUtterance = new Utterance(Math.toIntExact(rt.speaker), "");
}
currentUtterance.transcript = currentUtterance.transcript + rt.word + " ";
}
uttterances.add(currentUtterance);
String result = GsonSingleton.getGson().toJson(uttterances);
System.out.println(result);
}
private void cleanFinal() {
Set<Map.Entry<Double, RecoToken>> set = recoTokenMap.entrySet();
for (Map.Entry<Double, RecoToken> e : set) {
if (e.getValue().spLabelIsFinal) {
recoTokenMap.remove(e.getKey());
}
}
}
}
private static CountDownLatch lock = new CountDownLatch(1);
}
里面的输出是这样的`
speaker 0: Hi
speaker 1: Hello
speaker 0: Good Day To you marie..
speaker 1:......
我想这样输出
Jhon: Hi
Marie: Hello
Jhon: Good Day To you marie..
Marie:......
我的问题是。是否可以像在 ibm watspon 语音转文本中那样创建 api 因为我在他们的文档中读到他们没有提到如何更改标签我只是想澄清一下这是否可能
API 和文档中没有任何内容表明可以使用服务本身修改输出中的标签。 https://cloud.ibm.com/docs/services/speech-to-text/output.html#speaker_labels