Visualization of a stereo audio chunk or frame using OpenCV & PyAudio implemented in python

Probably you are searching for a code that can visualize audio chunk data. Visualization is crucial for various purposes like analysis, manipulation of audio in the form of chunks. Here, I have used stereo audio as input and visualized streams separately. Jump to full code.

Pre-requisite: basic knowledge of python

Libraries used: Audio libraries like PyAudio, Wave are used along with OpenCV & NumPy. If you haven’t installed these libraries then install them first. you can install by executing following commands

pip install PyAudio
pip install Wave

The task is consist of two subsequent part, i.e. read the chunk then display it. Follow the steps to do so.

Step 1. import required libraries i.e. pyaudio, wave, numpy, opencv, and sys

import pyaudio
import wave
import sys
import numpy as np
import cv2
from numpy import *

Step 2. initialize visualizer image in the form of numpy array of size [height][width][3] , initialized with zero.

height=1000
width=1000
vframe=np.zeros((height,width,3), np.uint8)

Step 3. check whether audio file name (path) present in argument or not, if not then exit with a guide message.

if len(sys.argv) < 2:
    print("Plays a wave file.\n\nUsage: %s filename.wav" % sys.argv[0])
    sys.exit(-1)

Step 4: open .wav file using wave from arguments and initialize PyAudio

wf = wave.open(sys.argv[1], 'rb')
p = pyaudio.PyAudio()

Step 5: open pyaudio stream

stream = p.open(format=p.get_format_from_width(wf.getsampwidth()),
                channels=wf.getnchannels(),
                rate=wf.getframerate(),
                output=True)

Step 6: Read data in chunks until keyboard interrupt occurs and printing the frame/chunk number

CHUNK = 1024
data = wf.readframes(CHUNK)
cnt=0
try:
    while data != '':
        stream.write(data)
        data = wf.readframes(CHUNK)
        print(cnt)
        cnt=cnt+1
except KeyboardInterrupt:
    pass
print(cnt)

Step 7: stop and close stream, terminate pyaudio instance on keyboard interrupt

stream.stop_stream()
stream.close()
p.terminate()

Step 8: visualize the data in that frame

lent=len(list(data))
#compress factor
compress_factor=1
curr_data_bit=0
for i in range(int(width)):#lent changed to width
    #printing line for seperate stream in the image data
    cv2.line(vframe,((i%width),height-700-10),((i%width),height-700-int(list(data)[curr_data_bit*compress_factor*4])-10),(0,255,0),1)
    cv2.line(vframe,((i%width),height-500-10),((i%width),height-500-int(list(data)[curr_data_bit*compress_factor*4+1])-10),(0,0,255),1)
    cv2.line(vframe,((i%width),height-200-10),((i%width),height-200-int(list(data)[curr_data_bit*compress_factor*4+2])-10),(255,0,0),1)
    cv2.line(vframe,((i%width),height-10),((i%width),height-int(list(data)[curr_data_bit*compress_factor*4+3])-10),(0,255,255),1)
    curr_data_bit=curr_data_bit+1
    if curr_data_bit==int(1024/compress_factor):
        cv2.line(vframe,((i%width),height-10),((i%width),0),(255,255,255),1)
        curr_data_bit=0
        data = wf.readframes(CHUNK)
#image is shown in window named sound show
cv2.imshow('sound show',vframe)
while(True):#wait for press, exit on pressing q
    if cv2.waitKey(1) & 0xFF == ord('q'):
        break
#destroy all window
cv2.destroyAllWindows()

Combining togather

full sourcecode is given below

import pyaudio
import wave
import sys
import numpy as np
import cv2
from numpy import *

height=1000
width=1000
vframe=np.zeros((height,width,3), np.uint8)

if len(sys.argv) < 2:
    print("Plays a wave file.\n\nUsage: %s filename.wav" % sys.argv[0])
    sys.exit(-1)

wf = wave.open(sys.argv[1], 'rb')
p = pyaudio.PyAudio()

stream = p.open(format=p.get_format_from_width(wf.getsampwidth()),
                channels=wf.getnchannels(),
                rate=wf.getframerate(),
                output=True)

CHUNK = 1024
data = wf.readframes(CHUNK)
cnt=0
try:
    while data != '':
        stream.write(data)
        data = wf.readframes(CHUNK)
        print(cnt)
        cnt=cnt+1
except KeyboardInterrupt:
    pass
print(cnt)
stream.stop_stream()
stream.close()
p.terminate()

#display
lent=len(list(data))
compress_factor=1
curr_data_bit=0
for i in range(int(width)):#lent changed to width
    #printing line for seperate stream in the image data
    cv2.line(vframe,((i%width),height-700-10),((i%width),height-700-int(list(data)[curr_data_bit*compress_factor*4])-10),(0,255,0),1)
    cv2.line(vframe,((i%width),height-500-10),((i%width),height-500-int(list(data)[curr_data_bit*compress_factor*4+1])-10),(0,0,255),1)
    cv2.line(vframe,((i%width),height-200-10),((i%width),height-200-int(list(data)[curr_data_bit*compress_factor*4+2])-10),(255,0,0),1)
    cv2.line(vframe,((i%width),height-10),((i%width),height-int(list(data)[curr_data_bit*compress_factor*4+3])-10),(0,255,255),1)
    curr_data_bit=curr_data_bit+1
    if curr_data_bit==int(1024/compress_factor):
        cv2.line(vframe,((i%width),height-10),((i%width),0),(255,255,255),1)
        curr_data_bit=0
        data = wf.readframes(CHUNK)
        
#image is shown in window named sound show
cv2.imshow('sound show',vframe)
while(True):#wait for press, exit on pressing q
    if cv2.waitKey(1) & 0xFF == ord('q'):
        break
#destroy all window
cv2.destroyAllWindows()

run the above code with .wav filename as first argument, like the command given below

python3 python_filename.py wav_filename.wav

Sample output

visual for the audio chunk
visualization od audio chunk data for a stereo audio input

Additional Resources

  1. sample stereo audio file
  2. Github repository

Share on Social Media
Mukuldeep Maiti

Mukuldeep Maiti

An unrecognized crazy being with random thoughts lost in unknown

You may also like...

Leave a Reply