split_data.py 1.4 KB

12345678910111213141516171819202122232425262728293031323334
  1. import os, shutil, random
  2. random.seed(0)
  3. import numpy as np
  4. from sklearn.model_selection import train_test_split
  5. val_size = 0.1
  6. test_size = 0.2
  7. postfix = 'jpg'
  8. imgpath = 'VOCdevkit/JPEGImages'
  9. txtpath = 'VOCdevkit/txt'
  10. os.makedirs('images/train', exist_ok=True)
  11. os.makedirs('images/val', exist_ok=True)
  12. os.makedirs('images/test', exist_ok=True)
  13. os.makedirs('labels/train', exist_ok=True)
  14. os.makedirs('labels/val', exist_ok=True)
  15. os.makedirs('labels/test', exist_ok=True)
  16. listdir = np.array([i for i in os.listdir(txtpath) if 'txt' in i])
  17. random.shuffle(listdir)
  18. train, val, test = listdir[:int(len(listdir) * (1 - val_size - test_size))], listdir[int(len(listdir) * (1 - val_size - test_size)):int(len(listdir) * (1 - test_size))], listdir[int(len(listdir) * (1 - test_size)):]
  19. print(f'train set size:{len(train)} val set size:{len(val)} test set size:{len(test)}')
  20. for i in train:
  21. shutil.copy('{}/{}.{}'.format(imgpath, i[:-4], postfix), 'images/train/{}.{}'.format(i[:-4], postfix))
  22. shutil.copy('{}/{}'.format(txtpath, i), 'labels/train/{}'.format(i))
  23. for i in val:
  24. shutil.copy('{}/{}.{}'.format(imgpath, i[:-4], postfix), 'images/val/{}.{}'.format(i[:-4], postfix))
  25. shutil.copy('{}/{}'.format(txtpath, i), 'labels/val/{}'.format(i))
  26. for i in test:
  27. shutil.copy('{}/{}.{}'.format(imgpath, i[:-4], postfix), 'images/test/{}.{}'.format(i[:-4], postfix))
  28. shutil.copy('{}/{}'.format(txtpath, i), 'labels/test/{}'.format(i))