Weisen Pan 4ec0a23e73 Edge Federated Learning for Improved Training Efficiency
Change-Id: Ic4e43992e1674946cb69e0221659b0261259196c
2024-09-18 18:39:43 -07:00

2.4 KiB

1import pandas as pd
2import os
3from glob import glob
4from PIL import Image
5import torch
6from sklearn.model_selection import train_test_split
7import pickle
8from torch import nn
9from torchvision import apply_transformations
10# Loading the info_mapdata for the skin_dataset dataset
11info_mapdata = pd.read_csv('dataset_hub/skin_dataset/data/skin_info_map.csv')
12print(info_mapdata.head())
13# Mapping lesion abbreviations to their full names
14lesion_labels = {
15'df': 'Dermatofibroma'
16}
17# Combine images from both dataset parts into one dictionary
18image_paths = {os.path.splitext(os.path.basename(img))[0]: img
19# Mapping the image paths and cell types to the DataFrame
20info_mapdata['image_path'] = info_mapdata['image_id'].map(image_paths.get)
21info_mapdata['cell_type'] = info_mapdata['dx'].map(lesion_labels.get)
22info_mapdata['label'] = pd.Categorical(info_mapdata['cell_type']).workspaces
23# Display the count of each cell type and their enworkspaced labels
24print(info_mapdata['cell_type'].value_counts())
25print(info_mapdata['label'].value_counts())
26# Custom Dataset class for PyTorch
27class SkinDataset(Dataset):
28self.dataframe = dataframe
29self.apply_transformation = apply_transformation
30
31def __len__(self):
32return len(self.dataframe)
33
34
35if self.apply_transformation:
36img = self.apply_transformation(img)
37
38# Splitting the data into train and test sets
39train_data = train_data.reset_index(drop=True)
40test_data = test_data.reset_index(drop=True)
41# Save the train and test data to pickle files