The SegVol is a universal and interactive model for volumetric medical image segmentation. SegVol accepts point, box, and text prompts while output volumetric segmentation. By training on 90k unlabeled Computed Tomography (CT) volumes and 6k labeled CTs, this foundation model supports the segmentation of over 200 anatomical categories.
from transformers import AutoModel, AutoTokenizer
import torch
import os
# get device
device = torch.device("cuda:0"if torch.cuda.is_available() else"cpu")
# load model
clip_tokenizer = AutoTokenizer.from_pretrained("BAAI/SegVol")
model = AutoModel.from_pretrained("BAAI/SegVol", trust_remote_code=True, test_mode=True)
model.model.text_encoder.tokenizer = clip_tokenizer
model.eval()
model.to(device)
print('model load done')
# set case path
ct_path = 'path/to/Case_image_00001_0000.nii.gz'
gt_path = 'path/to/Case_label_00001.nii.gz'# set categories, corresponding to the unique values(1, 2, 3, 4, ...) in ground truth mask
categories = ["liver", "kidney", "spleen", "pancreas"]
# generate npy data format
ct_npy, gt_npy = model.processor.preprocess_ct_gt(ct_path, gt_path, category=categories)
# IF you have download our 25 processed datasets, you can skip to here with the processed ct_npy, gt_npy files# go through zoom_transform to generate zoomout & zoomin views
data_item = model.processor.zoom_transform(ct_npy, gt_npy)
# add batch dim manually
data_item['image'], data_item['label'], data_item['zoom_out_image'], data_item['zoom_out_label'] = \
data_item['image'].unsqueeze(0).to(device), data_item['label'].unsqueeze(0).to(device), data_item['zoom_out_image'].unsqueeze(0).to(device), data_item['zoom_out_label'].unsqueeze(0).to(device)
# take liver as the example
cls_idx = 0# text prompt
text_prompt = [categories[cls_idx]]
# point prompt
point_prompt, point_prompt_map = model.processor.point_prompt_b(data_item['zoom_out_label'][0][cls_idx], device=device) # inputs w/o batch dim, outputs w batch dim# bbox prompt
bbox_prompt, bbox_prompt_map = model.processor.bbox_prompt_b(data_item['zoom_out_label'][0][cls_idx], device=device) # inputs w/o batch dim, outputs w batch dimprint('prompt done')
# segvol test forward# use_zoom: use zoom-out-zoom-in# point_prompt_group: use point prompt# bbox_prompt_group: use bbox prompt# text_prompt: use text prompt
logits_mask = model.forward_test(image=data_item['image'],
zoomed_image=data_item['zoom_out_image'],
# point_prompt_group=[point_prompt, point_prompt_map],
bbox_prompt_group=[bbox_prompt, bbox_prompt_map],
text_prompt=text_prompt,
use_zoom=True
)
# cal dice score
dice = model.processor.dice_score(logits_mask[0][0], data_item['label'][0][cls_idx], device)
print(dice)
# save prediction as nii.gz file
save_path='./Case_preds_00001.nii.gz'
model.processor.save_preds(ct_path, save_path, logits_mask[0][0],
start_coord=data_item['foreground_start_coord'],
end_coord=data_item['foreground_end_coord'])
print('done')
Training script
from transformers import AutoModel, AutoTokenizer
import torch
import os
# get device
device = torch.device("cuda:0"if torch.cuda.is_available() else"cpu")
# load model
clip_tokenizer = AutoTokenizer.from_pretrained("BAAI/SegVol")
model = AutoModel.from_pretrained("BAAI/SegVol", trust_remote_code=True, test_mode=False)
model.model.text_encoder.tokenizer = clip_tokenizer
model.train()
model.to(device)
print('model load done')
# set case path
ct_path = 'path/to/Case_image_00001_0000.nii.gz'
gt_path = 'path/to/Case_label_00001.nii.gz'# set categories, corresponding to the unique values(1, 2, 3, 4, ...) in ground truth mask
categories = ["liver", "kidney", "spleen", "pancreas"]
# generate npy data format
ct_npy, gt_npy = model.processor.preprocess_ct_gt(ct_path, gt_path, category=categories)
# IF you have download our 25 processed datasets, you can skip to here with the processed ct_npy, gt_npy files# go through train transform
data_item = model.processor.train_transform(ct_npy, gt_npy)
# training example# add batch dim manually
image, gt3D = data_item["image"].unsqueeze(0).to(device), data_item["label"].unsqueeze(0).to(device) # add batch dim
loss_step_avg = 0for cls_idx inrange(len(categories)):
# optimizer.zero_grad()
organs_cls = categories[cls_idx]
labels_cls = gt3D[:, cls_idx]
loss = model.forward_train(image, train_organs=organs_cls, train_labels=labels_cls)
loss_step_avg += loss.item()
loss.backward()
# optimizer.step()
loss_step_avg /= len(categories)
print(f'AVG loss {loss_step_avg}')
# save ckpt
model.save_pretrained('./ckpt')
Start with M3D-Seg dataset
We have released 25 open source datasets(M3D-Seg) for training SegVol, and these preprocessed data have been uploaded to
ModelScope
and
HuggingFace
.
You can use the following script to easily load cases and insert them into Test script and Training script.
import json, os
M3D_Seg_path = 'path/to/M3D-Seg'# select a dataset
dataset_code = '0000'# load json dict
json_path = os.path.join(M3D_Seg_path, dataset_code, dataset_code + '.json')
withopen(json_path, 'r') as f:
dataset_dict = json.load(f)
# get a case
ct_path = os.path.join(M3D_Seg_path, dataset_dict['train'][0]['image'])
gt_path = os.path.join(M3D_Seg_path, dataset_dict['train'][0]['label'])
# get categories
categories_dict = dataset_dict['labels']
categories = [x for _, x in categories_dict.items() if x != "background"]
# load npy data format
ct_npy, gt_npy = model.processor.load_uniseg_case(ct_path, gt_path)
Runs of BAAI SegVol on huggingface.co
46
Total runs
0
24-hour runs
0
3-day runs
-2
7-day runs
-170
30-day runs
More Information About SegVol huggingface.co Model
SegVol huggingface.co
SegVol huggingface.co is an AI model on huggingface.co that provides SegVol's model effect (), which can be used instantly with this BAAI SegVol model. huggingface.co supports a free trial of the SegVol model, and also provides paid use of the SegVol. Support call SegVol model through api, including Node.js, Python, http.
SegVol huggingface.co is an online trial and call api platform, which integrates SegVol's modeling effects, including api services, and provides a free online trial of SegVol, you can try SegVol online for free by clicking the link below.
SegVol is an open source model from GitHub that offers a free installation service, and any user can find SegVol on GitHub to install. At the same time, huggingface.co provides the effect of SegVol install, users can directly use SegVol installed effect in huggingface.co for debugging and trial. It also supports api for free installation.