update(第三章):规范代码格式并更新配图

cnlinxi · Mar 20, 2022 · 39639c4 · 39639c4
1 parent d437c7e
commit 39639c4
Show file tree

Hide file tree

Showing 4 changed files with 37 additions and 35 deletions.
diff --git a/text_to_speech.log b/text_to_speech.log
@@ -1,4 +1,4 @@
-This is XeTeX, Version 3.141592653-2.6-0.999993 (TeX Live 2021) (preloaded format=xelatex 2022.3.2)  20 MAR 2022 22:04
+This is XeTeX, Version 3.141592653-2.6-0.999993 (TeX Live 2021) (preloaded format=xelatex 2022.3.2)  20 MAR 2022 22:08
 entering extended mode
  restricted \write18 enabled.
  file:line:error style messages enabled.
@@ -2164,9 +2164,9 @@ File: ./image//vocoder_nhv_harmonic_sample.png Graphic file (type bmp)
 <./image//vocoder_nhv_harmonic_sample.png>
  [55] [56]
 LaTeX Font Info:    Font shape `T1/ntxtlf/m/n' will be
-(Font)              scaled to size 7.3pt on input line 2084.
+(Font)              scaled to size 7.3pt on input line 2086.
 LaTeX Font Info:    Font shape `T1/ntxtlf/m/n' will be
-(Font)              scaled to size 5.5pt on input line 2084.
+(Font)              scaled to size 5.5pt on input line 2086.
  [57] [58] [59] [60] [61] [62] [63] [64] [65] [66]
 第 7{} 章 7.
 File: ./image//text_to_speech_knowledge.png Graphic file (type bmp)
@@ -2181,10 +2181,10 @@ Package logreq Info: Writing requests to 'text_to_speech.run.xml'.
 
  ) 
 Here is how much of TeX's memory you used:
- 47084 strings out of 476919
- 975794 string characters out of 5821841
+ 47085 strings out of 476919
+ 975806 string characters out of 5821841
  2252612 words of memory out of 5000000
- 66186 multiletter control sequences out of 15000+600000
+ 66187 multiletter control sequences out of 15000+600000
  458064 words of font info for 113 fonts, out of 8000000 for 9000
  1348 hyphenation exceptions out of 8191
  118i,12n,131p,1436b,1887s stack positions out of 5000i,500n,10000p,200000b,80000s

diff --git a/text_to_speech.pdf b/text_to_speech.pdf
diff --git a/text_to_speech.synctex.gz b/text_to_speech.synctex.gz
diff --git a/text_to_speech.tex b/text_to_speech.tex
@@ -884,35 +884,35 @@ \section{具体操作}
 \subsection{利用librosa读取音频}
 
 \begin{lstlisting}
-  from matplotlib import pyplot as plt
-  import numpy as np
-  import librosa
-
-  # 利用librosa读取音频
-  input_wav_path = r'test.wav'
-  y, sr = librosa.load(input_wav_path)
-  y_num = np.arange(len(y))
-  
-  # 截取前0.3s的音频
-  sample_signal = y[0:int(sr*0.3)]
-  sample_num = np.arange(len(sample_signal))
-  
-  plt.figure(figsize=(11, 7), dpi=500)
-  plt.subplot(211)
-  plt.plot(y_num/sr, y, color='black')
-  plt.plot(sample_num/sr, sample_signal, color='blue')
-  plt.xlabel('Time (sec)')
-  plt.ylabel('Amplitude')
-  plt.title('Waveform')
-  
-  plt.subplot(212)
-  plt.plot(sample_num/sr, sample_signal, color='blue')
-  plt.xlabel('Time (sec)')
-  plt.ylabel('Amplitude')
-  plt.title('0~0.3s waveform')
-  plt.tight_layout()
-  plt.savefig('waveform.png', dpi=500)
-  plt.show()
+from matplotlib import pyplot as plt
+import numpy as np
+import librosa
+
+# 利用librosa读取音频
+input_wav_path = r'test.wav'
+y, sr = librosa.load(input_wav_path)
+y_num = np.arange(len(y))
+
+# 截取前0.3s的音频
+sample_signal = y[0:int(sr*0.3)]
+sample_num = np.arange(len(sample_signal))
+
+plt.figure(figsize=(11, 7), dpi=500)
+plt.subplot(211)
+plt.plot(y_num/sr, y, color='black')
+plt.plot(sample_num/sr, sample_signal, color='blue')
+plt.xlabel('Time (sec)')
+plt.ylabel('Amplitude')
+plt.title('Waveform')
+
+plt.subplot(212)
+plt.plot(sample_num/sr, sample_signal, color='blue')
+plt.xlabel('Time (sec)')
+plt.ylabel('Amplitude')
+plt.title('0~0.3s waveform')
+plt.tight_layout()
+plt.savefig('waveform.png', dpi=500)
+plt.show()
 \end{lstlisting}
 
 \begin{figure}[htbp]
@@ -996,6 +996,8 @@ \subsection{提取梅尔频谱}
 \subsection{提取MFCC}
 
 \begin{lstlisting}
+from scipy.fftpack import dct
+
 num_ceps = 12
 mfcc = dct(mel_spec, type=2, axis=1, norm='ortho')[:, 1 : (num_ceps + 1)]
 plot_spectrogram(mfcc, 'mfcc.png')