Skip to content

Instantly share code, notes, and snippets.

@noamsgl
Last active June 22, 2022 15:45
Show Gist options
  • Save noamsgl/19af16a3f3aaa3b314faa4fef29a7fa9 to your computer and use it in GitHub Desktop.
Save noamsgl/19af16a3f3aaa3b314faa4fef29a7fa9 to your computer and use it in GitHub Desktop.
Preprocessing steps for Blood Cells Detection Dataset
import os, sys, random
import xml.etree.ElementTree as ET
from glob import glob
import pandas as pd
from shutil import copyfile
annotations = glob('BCCD_Dataset/BCCD/Annotations/*.xml')
df = []
for file in annotations:
#filename = file.split('/')[-1].split('.')[0] + '.jpg'
#filename = str(cnt) + '.jpg'
filename = file.split('\\')[-1]
filename =filename.split('.')[0] + '.jpg'
row = []
parsedXML = ET.parse(file)
cell_id = 0
for node in parsedXML.getroot().iter('object'):
blood_cells = node.find('name').text
xmin = int(node.find('bndbox/xmin').text)
xmax = int(node.find('bndbox/xmax').text)
ymin = int(node.find('bndbox/ymin').text)
ymax = int(node.find('bndbox/ymax').text)
row = [filename, cell_id, blood_cells, xmin, xmax, ymin, ymax]
df.append(row)
cell_id += 1
data = pd.DataFrame(df, columns=['filename', 'cell_id', 'cell_type', 'xmin', 'xmax', 'ymin', 'ymax'])
data['image_id'] = data['filename'].apply(lambda x: int(x[-7:-4]))
data[['filename', 'image_id', 'cell_id', 'cell_type', 'xmin', 'xmax', 'ymin', 'ymax']].to_csv('bccd.csv', index=False)
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment