Visualization of a stereo audio chunk or frame using OpenCV & PyAudio implemented in python
Probably you are searching for a code that can visualize audio chunk data. Visualization is crucial for various purposes like analysis, manipulation of audio in the form of chunks. Here, I have used stereo audio as input and visualized streams separately. Jump to full code.
Pre-requisite: basic knowledge of python
Libraries used: Audio libraries like PyAudio, Wave are used along with OpenCV & NumPy. If you haven’t installed these libraries then install them first. you can install by executing following commands
pip install PyAudio
pip install Wave
The task is consist of two subsequent part, i.e. read the chunk then display it. Follow the steps to do so.
Step 1. import required libraries i.e. pyaudio, wave, numpy, opencv, and sys
import pyaudio import wave import sys import numpy as np import cv2 from numpy import *
Step 2. initialize visualizer image in the form of numpy array of size [height][width][3] , initialized with zero.
height=1000 width=1000 vframe=np.zeros((height,width,3), np.uint8)
Step 3. check whether audio file name (path) present in argument or not, if not then exit with a guide message.
if len(sys.argv) < 2: print("Plays a wave file.\n\nUsage: %s filename.wav" % sys.argv[0]) sys.exit(-1)
Step 4: open .wav file using wave from arguments and initialize PyAudio
wf = wave.open(sys.argv[1], 'rb') p = pyaudio.PyAudio()
Step 5: open pyaudio stream
stream = p.open(format=p.get_format_from_width(wf.getsampwidth()), channels=wf.getnchannels(), rate=wf.getframerate(), output=True)
Step 6: Read data in chunks until keyboard interrupt occurs and printing the frame/chunk number
CHUNK = 1024 data = wf.readframes(CHUNK) cnt=0 try: while data != '': stream.write(data) data = wf.readframes(CHUNK) print(cnt) cnt=cnt+1 except KeyboardInterrupt: pass print(cnt)
Step 7: stop and close stream, terminate pyaudio instance on keyboard interrupt
stream.stop_stream() stream.close() p.terminate()
Step 8: visualize the data in that frame
lent=len(list(data)) #compress factor compress_factor=1 curr_data_bit=0 for i in range(int(width)):#lent changed to width #printing line for seperate stream in the image data cv2.line(vframe,((i%width),height-700-10),((i%width),height-700-int(list(data)[curr_data_bit*compress_factor*4])-10),(0,255,0),1) cv2.line(vframe,((i%width),height-500-10),((i%width),height-500-int(list(data)[curr_data_bit*compress_factor*4+1])-10),(0,0,255),1) cv2.line(vframe,((i%width),height-200-10),((i%width),height-200-int(list(data)[curr_data_bit*compress_factor*4+2])-10),(255,0,0),1) cv2.line(vframe,((i%width),height-10),((i%width),height-int(list(data)[curr_data_bit*compress_factor*4+3])-10),(0,255,255),1) curr_data_bit=curr_data_bit+1 if curr_data_bit==int(1024/compress_factor): cv2.line(vframe,((i%width),height-10),((i%width),0),(255,255,255),1) curr_data_bit=0 data = wf.readframes(CHUNK) #image is shown in window named sound show cv2.imshow('sound show',vframe) while(True):#wait for press, exit on pressing q if cv2.waitKey(1) & 0xFF == ord('q'): break #destroy all window cv2.destroyAllWindows()
Combining togather
full sourcecode is given below
import pyaudio import wave import sys import numpy as np import cv2 from numpy import * height=1000 width=1000 vframe=np.zeros((height,width,3), np.uint8) if len(sys.argv) < 2: print("Plays a wave file.\n\nUsage: %s filename.wav" % sys.argv[0]) sys.exit(-1) wf = wave.open(sys.argv[1], 'rb') p = pyaudio.PyAudio() stream = p.open(format=p.get_format_from_width(wf.getsampwidth()), channels=wf.getnchannels(), rate=wf.getframerate(), output=True) CHUNK = 1024 data = wf.readframes(CHUNK) cnt=0 try: while data != '': stream.write(data) data = wf.readframes(CHUNK) print(cnt) cnt=cnt+1 except KeyboardInterrupt: pass print(cnt) stream.stop_stream() stream.close() p.terminate() #display lent=len(list(data)) compress_factor=1 curr_data_bit=0 for i in range(int(width)):#lent changed to width #printing line for seperate stream in the image data cv2.line(vframe,((i%width),height-700-10),((i%width),height-700-int(list(data)[curr_data_bit*compress_factor*4])-10),(0,255,0),1) cv2.line(vframe,((i%width),height-500-10),((i%width),height-500-int(list(data)[curr_data_bit*compress_factor*4+1])-10),(0,0,255),1) cv2.line(vframe,((i%width),height-200-10),((i%width),height-200-int(list(data)[curr_data_bit*compress_factor*4+2])-10),(255,0,0),1) cv2.line(vframe,((i%width),height-10),((i%width),height-int(list(data)[curr_data_bit*compress_factor*4+3])-10),(0,255,255),1) curr_data_bit=curr_data_bit+1 if curr_data_bit==int(1024/compress_factor): cv2.line(vframe,((i%width),height-10),((i%width),0),(255,255,255),1) curr_data_bit=0 data = wf.readframes(CHUNK) #image is shown in window named sound show cv2.imshow('sound show',vframe) while(True):#wait for press, exit on pressing q if cv2.waitKey(1) & 0xFF == ord('q'): break #destroy all window cv2.destroyAllWindows()
run the above code with .wav filename as first argument, like the command given below
python3 python_filename.py wav_filename.wav
Sample output
