Advertisement
Guest User

Untitled

a guest
Mar 21st, 2019
49
0
Never
Not a member of Pastebin yet? Sign Up, it unlocks many cool features!
text 14.41 KB | None | 0 0
  1. SHELL := /bin/bash
  2.  
  3. # Use this file to override various settings
  4. -include /vagrant/options.aspire.Makefile
  5.  
  6. # Set to 'yes' if you want to do speaker ID for trs files
  7. DO_SPEAKER_ID?=no
  8. SID_THRESHOLD?=13
  9.  
  10. # Some audio produces no results (can't be segmented), or transcribes better
  11. # when segmented differently
  12. # Changing SEGMENTS to one of these values gives more flexibilty;
  13. # (see http://www-lium.univ-lemans.fr/diarization/doku.php/quick_start)
  14. #
  15. # show.seg : default - final segmentation with NCLR/CE clustering
  16. # show.i.seg : initial segmentation (entire audio)
  17. # show.pms.seg : sPeech/Music/Silence segmentation (don't use)
  18. # show.s.seg : GLR based segmentation, make small segments
  19. # show.l.seg : linear clustering (merge only side by side segments)
  20. # show.h.seg : hierarchical clustering
  21. # show.d.seg : viterbi decoding
  22. # show.adj.h.seg : boundaries adjusted
  23. # show.flt1.seg : filter spk segmentation according to pms segmentation
  24. # show.flt2.seg : filter spk segmentation according to pms segmentation
  25. # show.spl.seg : segments longer than 20 sec are split
  26. # show.spl10.seg : segments longer than 10 sec are split
  27. # show.g.seg : the gender and the bandwith are detected
  28. SEGMENTS ?= show.seg
  29.  
  30. # Where is Kaldi root directory?
  31. EESEN_ROOT ?= ~/eesen
  32. KALDI_ROOT ?=../kaldi
  33.  
  34. GRAPH_DIR?=$(EESEN_ROOT)/asr_egs/tedlium/v1/data/lang_phn_test
  35. MODEL_DIR?=$(EESEN_ROOT)/asr_egs/tedlium/v1/exp/train_phn_l5_c320
  36.  
  37. # How many processes to use for one transcription task
  38. # must be less than number of speakers, which for lium segmentation is often only 1
  39. njobs ?= 1
  40.  
  41. # How many threads to use in each process
  42. #nthreads ?= 1
  43.  
  44. # add Kaldi binaries to path
  45. PATH := utils:$(KALDI_ROOT)/src/bin:$(KALDI_ROOT)/tools/openfst/bin:$(KALDI_ROOT)/src/fstbin/:$(KALDI_ROOT)/src/gmmbin/:$(KALDI_ROOT)/src/featbin/:$(KALDI_ROOT)/src/lm/:$(KALDI_ROOT)/src/sgmmbin/:$(KALDI_ROOT)/src/sgmm2bin/:$(KALDI_ROOT)/src/fgmmbin/:$(KALDI_ROOT)/src/latbin/:$(KALDI_ROOT)/src/nnetbin:$(KALDI_ROOT)/src/nnet2bin/:$(KALDI_ROOT)/src/kwsbin:$(KALDI_ROOT)/src/ivectorbin:$(PATH)
  46. # add EESEN binaries to path
  47. PATH := $(EESEN_ROOT)/src/decoderbin:$(EESEN_ROOT)/src/featbin:$(EESEN_ROOT)/src/nnetbin:$(PATH)
  48. export train_cmd=run.pl
  49. export decode_cmd=run.pl
  50. export cuda_cmd=run.pl
  51. export mkgraph_cmd=run.pl
  52.  
  53. # optimum experimentally determined LM weight for TEDLIUM data set
  54. # (produces lowest WER)
  55. LM_SCALE?=8
  56.  
  57. # Find out where this Makefile is located (this is not really needed)
  58. where-am-i = $(lastword $(MAKEFILE_LIST))
  59. THIS_DIR := $(shell dirname $(call where-am-i))
  60.  
  61. # This ends up just being a folder name for output
  62. FINAL_PASS=eesen
  63.  
  64. #LD_LIBRARY_PATH=$(KALDI_ROOT)/tools/openfst/lib
  65.  
  66. .SECONDARY:
  67. .DELETE_ON_ERROR:
  68.  
  69. export
  70.  
  71. # Call this (once) before using the system
  72. .init: .kaldi #.lang .composed_lms
  73.  
  74. .kaldi:
  75. rm -f steps utils
  76. ln -fs $(KALDI_ROOT)/egs/wsj/s5/steps
  77. ln -fs $(KALDI_ROOT)/egs/wsj/s5/utils
  78. ln -fs $(KALDI_ROOT)/egs/sre08/v1/sid
  79. mkdir -p src-audio
  80.  
  81. build/audio/base/%.wav: src-audio/%.sph
  82. mkdir -p `dirname $@`
  83. #sox $^ build/audio/base/$*.wav rate -v $(sample_rate) #channels 1
  84. /data/ASR5/fmetze/kaldi-latest/tools/sph2pipe_v2.5/sph2pipe -f wav $^ | sox -t wav - -c 1 build/audio/base/$*.wav rate -v $(sample_rate)
  85.  
  86. build/audio/base/%.wav: src-audio/%.wav
  87. mkdir -p `dirname $@`
  88. sox $^ -c 1 -b 16 build/audio/base/$*.wav rate -v $(sample_rate)
  89.  
  90. build/audio/base/%.wav: src-audio/%.WAV
  91. mkdir -p `dirname $@`
  92. sox $^ -c 1 -b 16 build/audio/base/$*.wav rate -v $(sample_rate)
  93.  
  94. build/audio/base/%.wav: src-audio/%.mp3
  95. mkdir -p `dirname $@`
  96. # sox $^ -c 1 build/audio/base/$*.wav rate -v $(sample_rate)
  97. # lame --resample $(sample_rate) --decode $^ build/audio/base/$*.8k.wav
  98. # sox build/audio/base/$*.8k.wav -c 1 build/audio/base/$*.wav rate -v $(sample_rate)
  99. ffmpeg -loglevel warning -i $^ -f sox - | sox -t sox - -c 1 -b 16 -r $(sample_rate) $@
  100.  
  101. build/audio/base/%.wav: src-audio/%.MP3
  102. mkdir -p `dirname $@`
  103. sox $^ -c 1 build/audio/base/$*.wav rate -v $(sample_rate)
  104.  
  105. build/audio/base/%.wav: src-audio/%.ogg
  106. mkdir -p `dirname $@`
  107. sox $^ -c 1 build/audio/base/$*.wav rate -v $(sample_rate)
  108.  
  109. build/audio/base/%.wav: src-audio/%.mp2
  110. mkdir -p `dirname $@`
  111. sox $^ -c 1 build/audio/base/$*.wav rate -v $(sample_rate)
  112.  
  113. build/audio/base/%.wav: src-audio/%.wma
  114. mkdir -p `dirname $@`
  115. ffmpeg -loglevel warning -i $^ -f sox - | sox -t sox - -c 1 -b 16 -r $(sample_rate) $@
  116.  
  117. build/audio/base/%.wav: src-audio/%.WMA
  118. mkdir -p `dirname $@`
  119. ffmpeg -loglevel warning -i $^ -f sox - | sox -t sox - -c 1 -b 16 -r $(sample_rate) $@
  120.  
  121. build/audio/base/%.wav: src-audio/%.m4a
  122. mkdir -p `dirname $@`
  123. avconv -i $^ -ac 1 -ar $(sample_rate) -y $@
  124. # ffmpeg -i $^ -f sox - | sox -t sox - -c 1 -b 16 $@ rate -v $(sample_rate)
  125.  
  126. build/audio/base/%.wav: src-audio/%.mp4
  127. mkdir -p `dirname $@`
  128. # sox $^ -c 1 build/audio/base/$*.wav rate -v $(sample_rate)
  129. # avconv -i $^ -ac 1 -ar $(sample_rate) -y $@
  130. ffmpeg -i $^ -f sox - | sox -t sox - -c 1 -b 16 $@ rate -v $(sample_rate)
  131. echo "converted audio"
  132. date +%s%N | cut -b1-13
  133.  
  134. build/audio/base/%.wav: src-audio/%.MP4
  135. mkdir -p `dirname $@`
  136. # sox $^ -c 1 build/audio/base/$*.wav rate -v $(sample_rate)
  137. avconv -i $^ -ac 1 -ar $(sample_rate) -y $@
  138. echo "converted audio"
  139. date +%s%N | cut -b1-13
  140.  
  141. build/audio/base/%.wav: src-audio/%.flac
  142. mkdir -p `dirname $@`
  143. flac -s -d -c $^ | sox -t wav - -c 1 -b 16 build/audio/base/$*.wav rate -v $(sample_rate)
  144.  
  145. build/audio/base/%.wav: src-audio/%.amr
  146. mkdir -p `dirname $@`
  147. amrnb-decoder $^ $@.tmp.raw
  148. sox -s -b 16 -c 1 -r 8000 $@.tmp.raw -c 1 build/audio/base/$*.wav rate -v $(sample_rate)
  149. rm $@.tmp.raw
  150.  
  151. build/audio/base/%.wav: src-audio/%.mpg
  152. mkdir -p `dirname $@`
  153. avconv -i $^ -f sox - | sox -t sox - -c 1 -b 16 build/audio/base/$*.wav rate -v $(sample_rate)
  154. # ffmpeg -i $^ -f sox - | sox -t sox - -c 1 -b 16 build/audio/base/$*.wav rate -v $(sample_rate)
  155.  
  156. # Speaker diarization
  157. build/diarization/%/$(SEGMENTS): build/audio/base/%.wav
  158. rm -rf `dirname $@`
  159. mkdir -p `dirname $@`
  160. echo "$* 1 0 1000000000 U U U 1" > `dirname $@`/show.uem.seg;
  161. ./scripts/diarization.sh $^ `dirname $@`/show.uem.seg $(SEGMENTS);
  162. echo "diarization complete"
  163. date +%s%N | cut -b1-13
  164.  
  165. #build/audio/segmented/%: build/diarization/%/show.seg
  166. build/audio/segmented/%: build/diarization/%/$(SEGMENTS)
  167. rm -rf $@
  168. mkdir -p $@
  169. cat $^ | cut -f 3,4,8 -d " " | \
  170. while read LINE ; do \
  171. start=`echo $$LINE | cut -f 1 -d " " | perl -npe '$$_=$$_/100.0'`; \
  172. len=`echo $$LINE | cut -f 2 -d " " | perl -npe '$$_=$$_/100.0'`; \
  173. sp_id=`echo $$LINE | cut -f 3 -d " "`; \
  174. timeformatted=`echo "$$start $$len" | perl -ne '@t=split(); $$start=$$t[0]; $$len=$$t[1]; $$end=$$start+$$len; printf("%08.3f-%08.3f\n", $$start,$$end);'` ; \
  175. if [ $${sp_id} == 'A' ]; then \
  176. sox build/audio/base/$*.wav -c 1 $@/$*_$${timeformatted}_$${sp_id}.wav trim $$start $$len remix 1; \
  177. elif [ $${sp_id} == 'B' ]; then \
  178. sox build/audio/base/$*.wav -c 1 $@/$*_$${timeformatted}_$${sp_id}.wav trim $$start $$len remix 2; \
  179. else \
  180. sox build/audio/base/$*.wav --norm $@/$*_$${timeformatted}_$${sp_id}.wav trim $$start $$len; \
  181. fi \
  182. done
  183.  
  184. build/trans/%/wav.scp: build/audio/segmented/%
  185. mkdir -p `dirname $@`
  186. /bin/ls $</*.wav | \
  187. perl -npe 'chomp; $$orig=$$_; s/.*\/(.*)_(\d+\.\d+-\d+\.\d+)_(.*)\.wav/\1-\3---\2/; $$_=$$_ . " $$orig\n";' | LC_ALL=C sort > $@
  188.  
  189. build/trans/%/utt2spk: build/trans/%/wav.scp
  190. cat $^ | perl -npe 's/\s+.*//; s/((.*)---.*)/\1 \2/' > $@
  191.  
  192. build/trans/%/spk2utt: build/trans/%/utt2spk
  193. utils/utt2spk_to_spk2utt.pl $^ > $@
  194.  
  195.  
  196. # FBANK calculation
  197. # example target:
  198. # make build/trans/myvideo/fbank
  199. # note the % pattern matches e.g. myvideo
  200. build/trans/%/fbank: build/trans/%/spk2utt
  201. rm -rf $@ build/trans/$*/cmvn.scp
  202. steps/$(fbank).sh --fbank-config conf/fbank.$(sample_rate).conf --cmd "$$train_cmd" --nj $(njobs) \
  203. build/trans/$* build/trans/$*/exp/make_fbank $@ || exit 1
  204. steps/compute_cmvn_stats.sh build/trans/$* build/trans/$*/exp/make_fbank $@ || exit 1;
  205.  
  206. # Hires MFCC calculation
  207. build/trans/%/mfcc_hires: build/trans/%/spk2utt
  208. rm -rf $@ build/trans/$*/cmvn.scp
  209. steps/make_mfcc.sh --mfcc-config conf/mfcc_hires.conf --cmd "$$train_cmd" --nj $(njobs) \
  210. build/trans/$* build/trans/$*/exp/make_mfcc_hires $@ || exit 1
  211. steps/compute_cmvn_stats.sh build/trans/$* build/trans/$*/exp/make_mfcc_hires $@ || exit 1;
  212.  
  213. # i-vectors for adapted Aspire model and training
  214. # this should not be hardcoded in this way, but well ...
  215. aspireivectordir=/data/ASR5/sdalmia_1/spring2018/kaldi/kaldi/egs/aspire/s5_orig
  216. build/trans/%/ivectors: build/trans/%/mfcc_hires
  217. steps/online/nnet2/extract_ivectors.sh --cmd "$$train_cmd" --nj $(njobs) --silence-weight 0.00001 \
  218. --sub-speaker-frames 6000 --max-count 75 \
  219. build/trans/$* "$$aspireivectordir"/data/lang \
  220. "$$aspireivectordir"/exp/tdnn_7b_chain_online/ivector_extractor build/trans/$*/exp/make_ivectors || exit 1;
  221.  
  222.  
  223. # Decode
  224. # example target
  225. # make build/trans/myvideo/eesen/decode/log
  226. build/trans/%/$(FINAL_PASS)/decode/log: build/trans/%/spk2utt build/trans/%/fbank
  227. rm -rf build/trans/$*/$(FINAL_PASS) && mkdir -p build/trans/$*/$(FINAL_PASS)
  228. # (cd build/trans/$*/eesen; for f in $(MODEL_DIR)/*; do ln -s $$f; done)
  229. ifeq ($(MODELS),aspire.kaldi)
  230. local/decode_nnet3.sh --cmd "$$decode_cmd" --nj $(njobs) \
  231. --acwt $(ACWT) --post_decode_acwt 10.0 --beam $(BEAM) \
  232. --lattice_beam 6.0 --max-active 7000 --skip_scoring true \
  233. $(GRAPH_DIR) build/trans/$* `dirname $@` $(MODEL_DIR) || exit 1;
  234. else ifeq ($(MODELS),tedlium.eesen)
  235. $(EESEN_ROOT)/local/decode_ctc_lat_tf.sh --mdl $(MODEL_DIR) --scoredir build/trans/$* \
  236. --cmd "$$decode_cmd" --nj $(njobs) --beam $(BEAM) \
  237. --lattice_beam 8.0 --max-active 5000 --skip_scoring true \
  238. --acwt $(ACWT) --temperature 1.25 \
  239. $(GRAPH_DIR) build/trans/$* `dirname $@` || exit 1;
  240. endif
  241.  
  242. # scoring can happen here now, get_ctm_conf.sh only scores if -f build/trans/$*/stm
  243. # produces confidence scores
  244. # e.g. make build/trans/myvideo/eesen/decode/.ctm
  245. # % = build/trans/myvideo/eesen
  246. %/decode/.ctm: %/decode/log
  247. # local/get_ctm.sh `dirname $*` $*/graph $*/decode
  248. # local/get_ctm_conf.sh `dirname $*` $*/graph $*/decode
  249. ifeq ($(MODELS),aspire.kaldi)
  250. local/lattice_to_ctm_aspire.sh `dirname $*` $(GRAPH_DIR) $*/decode
  251. else
  252. local/get_ctm_conf.sh `dirname $*` $(GRAPH_DIR) $*/decode
  253. endif
  254. touch -m $@
  255.  
  256. # % = myvideo/eesen
  257. # e.g. make build/trans/myvideo/eesen.segmented.splitw2.ctm
  258. build/trans/%.segmented.splitw2.ctm: build/trans/%/decode/.ctm
  259. # cat build/trans/$*/decode/score_$(LM_SCALE)/`dirname $*`.ctm | perl -npe 's/(.*)-(S\d+)---(\S+)/\1_\3_\2/' > $@
  260. cat build/trans/$*/decode/score_$(LM_SCALE)/`dirname $*`.ctm | perl -npe 's/(.*)-(\w+)---(\S+)/\1_\3_\2/' > $@
  261.  
  262.  
  263. #build/trans/myvideo/eesen.segmented.splitw2.ctm -> build/trans/myvideo/eesen.segmented.with-compounds.ctm
  264. %.with-compounds.ctm: %.splitw2.ctm
  265. cat $*.splitw2.ctm > $@
  266.  
  267. #build/trans/myvideo/eesen.segmented.with-compounds.ctm -> build/trans/myvideo/eesen.segmented.ctm
  268. %.segmented.ctm: %.segmented.with-compounds.ctm
  269. cat $^ > $@
  270.  
  271. %.ctm: %.segmented.ctm
  272. cat $^ | python scripts/unsegment-ctm.py | LC_ALL=C sort -k 1,1 -k 3,3n -k 4,4n > $@
  273.  
  274. %.with-compounds.ctm: %.segmented.with-compounds.ctm
  275. cat $^ | python scripts/unsegment-ctm.py | LC_ALL=C sort -k 1,1 -k 3,3n -k 4,4n > $@
  276.  
  277. %.hyp: %.segmented.ctm
  278. cat $^ | python scripts/segmented-ctm-to-hyp.py > $@
  279.  
  280. ifeq "yes" "$(DO_SPEAKER_ID)"
  281. build/trans/%/$(FINAL_PASS).trs: build/trans/%/$(FINAL_PASS).hyp build/trans/%/sid-result.txt
  282. cat build/trans/$*/$(FINAL_PASS).hyp | python scripts/hyp2trs.py --sid build/trans/$*/sid-result.txt > $@
  283. else
  284. build/trans/%/$(FINAL_PASS).trs: build/trans/%/$(FINAL_PASS).hyp
  285. cat $^ | python scripts/hyp2trs.py > $@
  286. endif
  287.  
  288. %.sbv: %.hyp
  289. cat $^ | python scripts/hyp2sbv.py > $@
  290.  
  291. %.txt: %.hyp
  292. cat $^ | perl -npe 'use locale; s/ \(\S+\)/\./; $$_= ucfirst();' > $@
  293.  
  294. %.srt: %.ctm
  295. cat $^ | python scripts/ctm2srt.py > $@
  296.  
  297. %.labels: %.ctm
  298. cat $^ | python scripts/ctm2labels.py > $@
  299.  
  300. build/output/%.trs: build/trans/%/$(FINAL_PASS).trs
  301. mkdir -p `dirname $@`
  302. cp $^ $@
  303.  
  304. build/output/%.ctm: build/trans/%/$(FINAL_PASS).ctm
  305. mkdir -p `dirname $@`
  306. cp $^ $@
  307.  
  308. build/output/%.txt: build/trans/%/$(FINAL_PASS).txt
  309. mkdir -p `dirname $@`
  310. cp $^ $@
  311.  
  312. build/output/%.with-compounds.ctm: build/trans/%/$(FINAL_PASS).with-compounds.ctm
  313. mkdir -p `dirname $@`
  314. cp $^ $@
  315.  
  316. build/output/%.sbv: build/trans/%/$(FINAL_PASS).sbv
  317. mkdir -p `dirname $@`
  318. cp $^ $@
  319.  
  320. build/output/%.srt: build/trans/%/$(FINAL_PASS).ctm
  321. mkdir -p `dirname $@`
  322. cp $^ $@
  323.  
  324. build/output/%.ali: build/trans/%/$(FINAL_PASS).txt
  325. mkdir -p `dirname $@`
  326. ./run_align.sh --GRAPH_DIR $(GRAPH_DIR) --MODEL_DIR $(MODEL_DIR) $*
  327.  
  328. ### Speaker ID stuff
  329. # i-vectors for each speaker in our audio file
  330. build/trans/%/ivectors: build/trans/%/mfcc
  331. sid/extract_ivectors.sh --cmd "$$decode_cmd" --nj $(njobs) \
  332. $(THIS_DIR)/kaldi-data/extractor_2048_top500 build/trans/$* $@
  333.  
  334. # a cross product of train and test speakers
  335. build/trans/%/sid-trials.txt: build/trans/%/ivectors
  336. cut -f 1 -d " " $(THIS_DIR)/kaldi-data/ivectors_train_top500/spk_ivector.scp | \
  337. while read a; do \
  338. cut -f 1 -d " " build/trans/$*/ivectors/spk_ivector.scp | \
  339. while read b; do \
  340. echo "$$a $$b"; \
  341. done ; \
  342. done > $@
  343.  
  344. # similarity scores
  345. build/trans/%/sid-scores.txt: build/trans/%/sid-trials.txt
  346. ivector-plda-scoring \
  347. "ivector-copy-plda --smoothing=0.0 $(THIS_DIR)/kaldi-data/ivectors_train_top500/plda - |" \
  348. "ark:ivector-subtract-global-mean scp:$(THIS_DIR)/kaldi-data/ivectors_train_top500/spk_ivector.scp ark:- |" \
  349. "ark:ivector-subtract-global-mean scp:build/trans/$*/ivectors/spk_ivector.scp ark:- |" \
  350. build/trans/$*/sid-trials.txt $@
  351.  
  352. # pick speakers above the threshold
  353. build/trans/%/sid-result.txt: build/trans/%/sid-scores.txt
  354. cat build/trans/$*/sid-scores.txt | sort -u -k 2,2 -k 3,3nr | sort -u -k2,2 | \
  355. awk 'int($$3)>=$(SID_THRESHOLD)' | perl -npe 's/(\S+) \S+-(S\d+) \S+/\2 \1/; s/-/ /g' > $@
  356.  
  357.  
  358. # Meta-target that deletes all files created during processing a file. Call e.g. 'make .etteytlus2013.clean
  359. .%.clean:
  360. rm -rf build/audio/base/$*.wav build/audio/segmented/$* build/diarization/$* build/trans/$*
  361. # rm -rf build/audio/base/$*.wav build/audio/segmented/$* build/diarization/$* build/trans/$* #src-audio/$*.wav
  362.  
  363. # Also deletes the output files
  364. .%.cleanest: .%.clean
  365. rm -rf build/output/$*.{trs,txt,ctm,with-compounds.ctm,sbv,ali,labels}
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement