-
Notifications
You must be signed in to change notification settings - Fork 0
Expand file tree
/
Copy pathtoSpectrogram.py
More file actions
103 lines (81 loc) · 3.24 KB
/
toSpectrogram.py
File metadata and controls
103 lines (81 loc) · 3.24 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
import numpy as np # linear algebra
import pandas as pd # data processing, CSV file I/O (e.g. pd.read_csv)
import os
import wave
import numpy as np
import librosa, librosa.display
import matplotlib.pyplot as plt
from os import path
FIG_SIZE = (15, 10)
DATA_NUM = 30
paths = []
abel_gubuns = []
def is_folderpath(folder_path):
if not os.path.exists(folder_path):
os.makedirs(folder_path)
for dirname, _, filenames in os.walk("dataset/wav"):
for filename in filenames:
# load audio file with Librosa
file_path = dirname + '/' + filename
subdir = dirname[-1]
paths.append(file_path)
sig, sr = librosa.load(file_path, sr=22050)
# 에너지 평균 구하기
sum = 0
for i in range(0, sig.shape[0]):
sum += sig[i] ** 2
mean = sum / sig.shape[0]
# 피크인덱스 찾기
for i in range(0, sig.shape[0]):
if (sig[i] ** 2 >= mean):
peekIndex = i
break;
START_LEN = 1102 # 0.05
END_LEN = 20948
if (peekIndex > 1102):
startPoint = peekIndex - START_LEN
endPoint = peekIndex + 22050
else:
startPoint = peekIndex
endPoint = peekIndex + END_LEN
# 단순 푸리에 변환 -> Specturm
fft = np.fft.fft(sig[startPoint:endPoint])
# 복소공간 값 절댓갑 취해서, magnitude 구하기
magnitude = np.abs(fft)
# Frequency 값 만들기
f = np.linspace(0, sr, len(magnitude))
# 푸리에 변환을 통과한 specturm은 대칭구조로 나와서 high frequency 부분 절반을 날려고 앞쪽 절반만 사용한다.
left_spectrum = magnitude[:int(len(magnitude) / 2)]
left_f = f[:int(len(magnitude) / 2)]
# STFT -> Spectrogram
hop_length = 512 # 전체 frame 수
n_fft = 2048 # frame 하나당 sample 수
# calculate duration hop length and window in seconds
hop_length_duration = float(hop_length) / sr
n_fft_duration = float(n_fft) / sr
# STFT
stft = librosa.stft(sig[startPoint:endPoint], n_fft=n_fft, hop_length=hop_length)
# 복소공간 값 절댓값 취하기
magnitude = np.abs(stft)
# magnitude > Decibels
log_spectrogram = librosa.amplitude_to_db(magnitude)
FIG_SIZE = (10, 10)
# display spectrogram
plt.figure(figsize=FIG_SIZE)
librosa.display.specshow(log_spectrogram, sr=sr, hop_length=hop_length, cmap='magma')
name_end1_pos = filename.find('_')
name_start2_pos = filename.rfind(('_'))+3
name_end2_pos = filename.rfind('.')
abel_gubuns.append(filename[0])
# save spectrogram image
filepath = 'dataset/image/' + subdir + '/' + filename[:name_end1_pos] + '_' + filename[name_start2_pos:name_end2_pos] + '.jpg'
print(filepath)
folder_path = "dataset/image/" + subdir
is_folderpath(folder_path)
plt.savefig(filepath)
plt.close()
# plt.show()
pd.set_option('display.max_colwidth', 200) # column의 width 설정
data_df = pd.DataFrame({'path': paths, 'label': abel_gubuns})
print('data_df shape:', data_df.shape)
data_df.head() # DataFrame 앞 5개 출력