在每个 RTP 数据包中捕获 8kHz、16 位线性样本和 4 帧 20ms 音频

Question

我需要编写简单的 Java 客户端程序来捕获实时音频流。

要求

RTP 音频数据包。
8kHz，16 位线性样本（线性 PCM）。
每个 RTP 数据包中将发送 4 帧 20ms 音频。

经过一番搜索，我在互联网上找到了捕获音频的示例代码，但它会播放哔声。

代码

import java.io.ByteArrayInputStream;
import java.net.DatagramPacket;
import java.net.DatagramSocket;
import javax.sound.sampled.AudioFormat;
import javax.sound.sampled.AudioInputStream;
import javax.sound.sampled.AudioSystem;
import javax.sound.sampled.DataLine;
import javax.sound.sampled.SourceDataLine;

public class Server {

    AudioInputStream audioInputStream;
    static AudioInputStream ais;
    static AudioFormat format;
    static boolean status = true;
    static int port = 31007;
    static int sampleRate = 44100;

    static DataLine.Info dataLineInfo;
    static SourceDataLine sourceDataLine;

    public static void main(String args[]) throws Exception 
    {
        System.out.println("Server started at port:"+port);

        @SuppressWarnings("resource")
        DatagramSocket serverSocket = new DatagramSocket(port);

        /**
         * Formula for lag = (byte_size/sample_rate)*2
         * Byte size 9728 will produce ~ 0.45 seconds of lag. Voice slightly broken.
         * Byte size 1400 will produce ~ 0.06 seconds of lag. Voice extremely broken.
         * Byte size 4000 will produce ~ 0.18 seconds of lag. Voice slightly more broken then 9728.
         */

        byte[] receiveData = new byte[4096];

        format = new AudioFormat(sampleRate, 16, 2, true, false);
        dataLineInfo = new DataLine.Info(SourceDataLine.class, format);
        sourceDataLine = (SourceDataLine) AudioSystem.getLine(dataLineInfo);
        sourceDataLine.open(format);
        sourceDataLine.start();

        //FloatControl volumeControl = (FloatControl) sourceDataLine.getControl(FloatControl.Type.MASTER_GAIN);
        //volumeControl.setValue(1.00f);

        DatagramPacket receivePacket = new DatagramPacket(receiveData, receiveData.length);

        ByteArrayInputStream baiss = new ByteArrayInputStream(receivePacket.getData());

        while (status == true) 
        {
            System.out.println("Reciving Packets");
            serverSocket.receive(receivePacket);
            ais = new AudioInputStream(baiss, format, receivePacket.getLength());
            toSpeaker(receivePacket.getData());
        }
        sourceDataLine.drain();
        sourceDataLine.close();
    }

    public static void toSpeaker(byte soundbytes[]) {
        try 
        {
            System.out.println("At the speaker");
            sourceDataLine.write(soundbytes, 0, soundbytes.length);
        } catch (Exception e) {
            System.out.println("Not working in speakers...");
            e.printStackTrace();
        }
    }
}

我想我找不到合适的格式来捕获以给定格式发送的数据包？

任何人都可以帮助我找到合适的 AudioFormat 来捕获此音频流或任何指向相同的 link 对我有帮助...谢谢... :)

回答

float sampleRate = 8000;
int sampleSizeInBits = 16;
int channels = 1;
boolean signed = true;
boolean bigEndian = true;
AudioFormat(sampleRate, sampleSizeInBits, channels, signed, bigEndian);

UDP + RTP 数据包格式

同时从数据中缓冲负 12 个字节，因为它包含 RTP header 信息。

receivePacket = new DatagramPacket(receiveData, receiveData.length);
byte[] packet = new byte[receivePacket.getLength() - 12];
serverSocket.receive(receivePacket);
packet = Arrays.copyOfRange(receivePacket.getData(), 12, receivePacket.getLength());

希望这对你以后有所帮助，如果不对请随时指正谢谢..

Answer 1

你可以试试这个基于数据报套接字的客户端和服务器的实现。它使用单声道 8000Hz 16bit 签名的大端音频格式。服务器运行在端口号 9786 上，而客户端使用端口号 8786。我想代码很容易理解。

服务器:

import java.io.*;
import java.net.*;
import javax.sound.sampled.*;

public class Server {

ByteArrayOutputStream byteOutputStream;
AudioFormat adFormat;
TargetDataLine targetDataLine;
AudioInputStream InputStream;
SourceDataLine sourceLine;

private AudioFormat getAudioFormat() {
    float sampleRate = 8000.0F;
    int sampleSizeInBits = 16;
    int channels = 1;
    boolean signed = true;
    boolean bigEndian = true;
    return new AudioFormat(sampleRate, sampleSizeInBits, channels, signed, bigEndian);
}

public static void main(String args[]) {
    new Server().runVOIP();
}

public void runVOIP() {
    try {
        DatagramSocket serverSocket = new DatagramSocket(9786);
        byte[] receiveData = new byte[4096];
        while (true) {
            DatagramPacket receivePacket = new DatagramPacket(receiveData, receiveData.length);
            serverSocket.receive(receivePacket);
            System.out.println("RECEIVED: " + receivePacket.getAddress().getHostAddress() + " " + receivePacket.getPort());
            try {
                byte audioData[] = receivePacket.getData();
                InputStream byteInputStream = new ByteArrayInputStream(audioData);
                AudioFormat adFormat = getAudioFormat();
                InputStream = new AudioInputStream(byteInputStream, adFormat, audioData.length / adFormat.getFrameSize());
                DataLine.Info dataLineInfo = new DataLine.Info(SourceDataLine.class, adFormat);
                sourceLine = (SourceDataLine) AudioSystem.getLine(dataLineInfo);
                sourceLine.open(adFormat);
                sourceLine.start();
                Thread playThread = new Thread(new PlayThread());
                playThread.start();
            } catch (Exception e) {
                System.out.println(e);
                System.exit(0);
            }
        }
    } catch (Exception e) {
        e.printStackTrace();
    }
}

class PlayThread extends Thread {

    byte tempBuffer[] = new byte[4096];

    public void run() {
        try {
            int cnt;
            while ((cnt = InputStream.read(tempBuffer, 0, tempBuffer.length)) != -1) {
                if (cnt > 0) {
                    sourceLine.write(tempBuffer, 0, cnt);
                }
            }
        } catch (Exception e) {
            System.out.println(e);
            System.exit(0);
        }
    }
}
}

客户:

import java.io.*;
import java.net.*;
import javax.sound.sampled.*;

public class Client {

boolean stopaudioCapture = false;
ByteArrayOutputStream byteOutputStream;
AudioFormat adFormat;
TargetDataLine targetDataLine;
AudioInputStream InputStream;
SourceDataLine sourceLine;

public static void main(String args[]) {
    new Client();
}

public Client() {
    captureAudio();
}

private AudioFormat getAudioFormat() {
    float sampleRate = 8000.0F;
    int sampleSizeInBits = 16;
    int channels = 1;
    boolean signed = true;
    boolean bigEndian = true;
    return new AudioFormat(sampleRate, sampleSizeInBits, channels, signed, bigEndian);
}

private void captureAudio() {
    try {
        adFormat = getAudioFormat();
        DataLine.Info dataLineInfo = new DataLine.Info(TargetDataLine.class, adFormat);
        targetDataLine = (TargetDataLine) AudioSystem.getLine(dataLineInfo);
        targetDataLine.open(adFormat);
        targetDataLine.start();

        Thread captureThread = new Thread(new CaptureThread());
        captureThread.start();
    } catch (Exception e) {
        StackTraceElement stackEle[] = e.getStackTrace();
        for (StackTraceElement val : stackEle) {
            System.out.println(val);
        }
        System.exit(0);
    }
}

class CaptureThread extends Thread {

    byte tempBuffer[] = new byte[4096];

    @Override
    public void run() {
        stopaudioCapture = false;
        try {
            DatagramSocket clientSocket = new DatagramSocket(8786);
            InetAddress IPAddress = InetAddress.getByName("127.0.0.1");
            int cnt;
            while (!stopaudioCapture) {
                cnt = targetDataLine.read(tempBuffer, 0, tempBuffer.length);
                if (cnt > 0) {
                    DatagramPacket sendPacket = new DatagramPacket(tempBuffer, tempBuffer.length, IPAddress, 9786);
                    clientSocket.send(sendPacket);
                }
            }
        } catch (Exception e) {
            System.out.println("CaptureThread::run()" + e);
            System.exit(0);
        }
    }
}

}

在每个 RTP 数据包中捕获 8kHz、16 位线性样本和 4 帧 20ms 音频

Capture 8kHz, 16-bit Linear Samples with 4 frames of 20ms audio in each RTP Packet

java

audio

rtp

audio-streaming

javax.sound.sampled