2020/11/13更新方法:

1.訓練YOLOv3模型時,使用下列指令,將訓練過程保存到trainRecord.log中

./darknet detector train cfg/voc.data cfg/yolov3.cfg darknet53.conv.74 -gpus 0 |tee -a trainRecord.txt

2.建立三個.py檔案(extract_log.py、train_iou_visualization.py、train_loss_visualization.py)

extract_log.py:將trainRecord.log的文字內容進行轉換處理,並分別存成:train_log_loss.txt和train_log_iou.txt

#!/usr/bin/python
#coding=utf-8
#該文件用於提取訓練log,去除不可解析的log後使log文件格式化,生成新的log文件供可視化工具繪圖
import inspect
import os
import random
import sys
def extract_log(log_file, new_log_file, key_word):
with open(log_file, 'r') as f:
with open(new_log_file, 'w') as train_log:
for line in f:
#去除多GPU的同步log;去除除零錯誤的log
if ('Syncing' in line) or ('nan' in line):
continue
if key_word in line:
train_log.write(line)
f.close()
train_log.close()

extract_log('trainRecord.log', 'train_log_loss.txt', 'images')
extract_log('trainRecord.log', 'train_log_iou.txt', 'IOU')

train_loss_visualization.py:視覺化loss

#!/usr/bin/python
#coding=utf-8

import pandas as pd
import numpy as np
import matplotlib.pyplot as plt


#根據自己的log_loss.txt中的行數修改lines, 修改訓練時的迭代起始次數(start_ite)和结束次数(end_ite)。
lines = 400000
start_ite = 100 #log_loss.txt裡面的最小迭代次數
end_ite = 520516 #log_loss.txt裡面的最大迭代次數
step = 5000 #跳行數,決定畫圖的稠密程度
igore = 0 #當開始的loss較大時,你需要忽略前igore次迭代,注意這裡是迭代次數


y_ticks = [3, 500, 1000, 1500, 2000, 2500]#縱坐標的值,可以自己設置。
data_path = 'train_log_loss.txt' #log_loss的路徑。

####-----------------只需要改上面的,下面的可以不改動
names = ['loss', 'avg', 'rate', 'seconds', 'images']
result = pd.read_csv(data_path, skiprows=[x for x in range(lines) if (x<lines*1.0/((end_ite - start_ite)*1.0)*igore or x%step!=9)], error_bad_lines=\
False, names=names)
result.head()
for name in names:
result[name] = result[name].str.split(' ').str.get(1)

result.head()
result.tail()

for name in names:
result[name] = pd.to_numeric(result[name])
result.dtypes
print(result['loss'].values)

fig = plt.figure()
ax = fig.add_subplot(1, 1, 1)


###-----------設置橫坐標的值。
x_num = len(result['loss'].values)
tmp = (end_ite-start_ite - igore)/(x_num*1.0)
x = []
for i in range(x_num):
x.append(i*tmp + start_ite + igore)
#print(x)
print('total = %d\n' %x_num)
print('start = %d, end = %d\n' %(x[0], x[-1]))
###----------


ax.plot(x, result['loss'].values, label='avg_loss')
#ax.plot(result['loss'].values, label='loss')
plt.yticks(y_ticks)#如果不想自己設置縱坐標,可以注釋掉。
plt.grid()
ax.legend(loc = 'best')
ax.set_title('The loss curves')
ax.set_xlabel('batches')
fig.savefig('loss')

train_iou_visualization.py:視覺化iou

#!/usr/bin/python
#coding=utf-8

import pandas as pd
import numpy as np
import matplotlib.pyplot as plt

#根據log_iou修改行數
lines = 5374
step = 5000
start_ite = 100
end_ite = 10000
igore = 100
data_path = 'train_log_iou.txt' #log_loss的路徑。

names = ['Region Avg IOU', 'Class', 'Obj', 'No Obj', '.5_Recall', '.7_Recall', 'count']
#result = pd.read_csv('log_iou.txt', skiprows=[x for x in range(lines) if (x%10==0 or x%10==9)]\
result = pd.read_csv(data_path, skiprows=[x for x in range(lines) if (x<lines*1.0/((end_ite - start_ite)*1.0)*igore or x%step!=0)]\
, error_bad_lines=False, names=names)
result.head()

for name in names:
result[name] = result[name].str.split(': ').str.get(1)
result.head()
result.tail()
for name in names:
result[name] = pd.to_numeric(result[name])
result.dtypes


####--------------
x_num = len(result['Region Avg IOU'].values)
tmp = (end_ite-start_ite - igore)/(x_num*1.0)
x = []
for i in range(x_num):
x.append(i*tmp + start_ite + igore)
#print(x)
print('total = %d\n' %x_num)
print('start = %d, end = %d\n' %(x[0], x[-1]))
####-------------


fig = plt.figure()
ax = fig.add_subplot(1,1,1)
ax.plot(x, result['Region Avg IOU'].values, label='Region Avg IOU')
#ax.plot(result['Avg Recall'].values, label='Avg Recall')
plt.grid()
ax.legend(loc='best')
ax.set_title('The Region Avg IOU curves')
ax.set_xlabel('batches')
fig.savefig('iou')

3.成果

loss曲線

IOU曲線

參考資料

YOLO-V3可視化訓練過程中的參數,繪製loss、IOU、avg Recall等的曲線圖

--

--