from PIL import Image
import requests
import torch
from transformers import AutoModelForCausalLM, AutoTokenizer
tokenizer = AutoTokenizer.from_pretrained("BAAI/Emu2-Chat")
model = AutoModelForCausalLM.from_pretrained(
"BAAI/Emu2-Chat",
torch_dtype=torch.bfloat16,
low_cpu_mem_usage=True,
trust_remote_code=True).to('cuda').eval()
# `[<IMG_PLH>]` is the image placeholder which will be replaced by image embeddings. # the number of `[<IMG_PLH>]` should be equal to the number of input images
query = '[<IMG_PLH>]Describe the image in details:'
image = Image.open(requests.get('https://github.com/baaivision/Emu/Emu2/examples/blue_black_1_top_left.jpg?raw=true',stream=True).raw).convert('RGB')
inputs = model.build_input_ids(
text=[query],
tokenizer=tokenizer,
image=[image]
)
with torch.no_grad():
outputs = model.generate(
input_ids=inputs["input_ids"],
attention_mask=inputs["attention_mask"],
image=inputs["image"].to(torch.bfloat16),
max_new_tokens=64,
length_penalty=-1)
output_text = tokenizer.batch_decode(outputs, skip_special_tokens=True)
Interleaved image and text
from PIL import Image
import requests
import torch
from transformers import AutoModelForCausalLM, AutoTokenizer
tokenizer = AutoTokenizer.from_pretrained("BAAI/Emu2-Chat")
model = AutoModelForCausalLM.from_pretrained(
"BAAI/Emu2-Chat",
torch_dtype=torch.bfloat16,
low_cpu_mem_usage=True,
trust_remote_code=True).to('cuda').eval()
# `[<IMG_PLH>]` is the image placeholder which will be replaced by image embeddings. # the number of `[<IMG_PLH>]` should be equal to the number of input images
query = "[<IMG_PLH>][red, white, 3, bottom left].[<IMG_PLH>][yellow, white, 2, top left].[<IMG_PLH>][green, black, 4, bottom right][<IMG_PLH>]"
images = [
Image.open(requests.get('https://github.com/baaivision/Emu/Emu2/examples/red_white_3_bottom_left.jpg?raw=true',stream=True).raw).convert('RGB'),
Image.open(requests.get('https://github.com/baaivision/Emu/Emu2/examples/yellow_white_2_top_right.jpg?raw=true',stream=True).raw).convert('RGB'),
Image.open(requests.get('https://github.com/baaivision/Emu/Emu2/examples/green_black_4_bottom_right.jpg?raw=true',stream=True).raw).convert('RGB'),
Image.open(requests.get('https://github.com/baaivision/Emu/Emu2/examples/blue_black_1_top_left.jpg?raw=true',stream=True).raw).convert('RGB'),
]
inputs = model.build_input_ids(
text=[query],
tokenizer=tokenizer,
image=images
)
with torch.no_grad():
outputs = model.generate(
input_ids=inputs["input_ids"],
attention_mask=inputs["attention_mask"],
image=inputs["image"].to(torch.bfloat16),
max_new_tokens=64,
length_penalty=-1)
output_text = tokenizer.batch_decode(outputs, skip_special_tokens=True)
Multi GPU
from PIL import Image
import requests
import torch
from transformers import AutoModelForCausalLM, AutoTokenizer
from accelerate import init_empty_weights, infer_auto_device_map, load_checkpoint_and_dispatch
tokenizer = AutoTokenizer.from_pretrained("BAAI/Emu2-Chat")
with init_empty_weights():
model = AutoModelForCausalLM.from_pretrained(
"BAAI/Emu2-Chat",
torch_dtype=torch.bfloat16,
low_cpu_mem_usage=True,
trust_remote_code=True)
device_map = infer_auto_device_map(model, max_memory={0:'38GiB',1:'38GiB',}, no_split_module_classes=['Block','LlamaDecoderLayer'])
# input and output logits should be on same device
device_map["model.decoder.lm.lm_head"] = 0
model = load_checkpoint_and_dispatch(
model,
'local/path/to/hf/version/Emu2-Chat/model',
device_map=device_map).eval()
# `[<IMG_PLH>]` is the image placeholder which will be replaced by image embeddings. # the number of `[<IMG_PLH>]` should be equal to the number of input images
query = '[<IMG_PLH>]Describe the image in details:'
image = Image.open(requests.get('https://github.com/baaivision/Emu/Emu2/examples/blue_black_1_top_left.jpg?raw=true',stream=True).raw).convert('RGB')
inputs = model.build_input_ids(
text=[query],
tokenizer=tokenizer,
image=[image]
)
with torch.no_grad():
outputs = model.generate(
input_ids=inputs["input_ids"],
attention_mask=inputs["attention_mask"],
image=inputs["image"].to(torch.bfloat16),
max_new_tokens=64,
length_penalty=-1)
output_text = tokenizer.batch_decode(outputs, skip_special_tokens=True)
Interleaved image and text
from PIL import Image
import requests
import torch
from transformers import AutoModelForCausalLM, AutoTokenizer
from accelerate import init_empty_weights, infer_auto_device_map, load_checkpoint_and_dispatch
tokenizer = AutoTokenizer.from_pretrained("BAAI/Emu2-Chat")
with init_empty_weights():
model = AutoModelForCausalLM.from_pretrained(
"BAAI/Emu2-Chat",
torch_dtype=torch.bfloat16,
low_cpu_mem_usage=True,
trust_remote_code=True)
device_map = infer_auto_device_map(model, max_memory={0:'38GiB',1:'38GiB',}, no_split_module_classes=['Block','LlamaDecoderLayer'])
# input and output logits should be on same device
device_map["model.decoder.lm.lm_head"] = 0
model = load_checkpoint_and_dispatch(
model,
'local/path/to/hf/version/Emu2-Chat/model',
device_map=device_map).eval()
# `[<IMG_PLH>]` is the image placeholder which will be replaced by image embeddings. # the number of `[<IMG_PLH>]` should be equal to the number of input images
query = "[<IMG_PLH>][red, white, 3, bottom left].[<IMG_PLH>][yellow, white, 2, top left].[<IMG_PLH>][green, black, 4, bottom right][<IMG_PLH>]"
images = [
Image.open(requests.get('https://github.com/baaivision/Emu/Emu2/examples/red_white_3_bottom_left.jpg?raw=true',stream=True).raw).convert('RGB'),
Image.open(requests.get('https://github.com/baaivision/Emu/Emu2/examples/yellow_white_2_top_right.jpg?raw=true',stream=True).raw).convert('RGB'),
Image.open(requests.get('https://github.com/baaivision/Emu/Emu2/examples/green_black_4_bottom_right.jpg?raw=true',stream=True).raw).convert('RGB'),
Image.open(requests.get('https://github.com/baaivision/Emu/Emu2/examples/blue_black_1_top_left.jpg?raw=true',stream=True).raw).convert('RGB'),
]
inputs = model.build_input_ids(
text=[query],
tokenizer=tokenizer,
image=images
)
with torch.no_grad():
outputs = model.generate(
input_ids=inputs["input_ids"],
attention_mask=inputs["attention_mask"],
image=inputs["image"].to(torch.bfloat16),
max_new_tokens=64,
length_penalty=-1)
output_text = tokenizer.batch_decode(outputs, skip_special_tokens=True)
from PIL import Image
import requests
import torch
from transformers import AutoModelForCausalLM, AutoTokenizer
tokenizer = AutoTokenizer.from_pretrained("BAAI/Emu2-Chat")
model = AutoModelForCausalLM.from_pretrained(
"BAAI/Emu2-Chat",
load_in_4bit=True,
trust_remote_code=True,
bnb_4bit_compute_dtype=torch.float16).eval()
query = '[<IMG_PLH>]Describe the image in details:'
image = Image.open(requests.get('https://github.com/baaivision/Emu/Emu2/examples/blue_black_1_top_left.jpg?raw=true',stream=True).raw).convert('RGB')
inputs = model.build_input_ids(
text=[query],
tokenizer=tokenizer,
image=[image]
)
with torch.no_grad():
outputs = model.generate(
input_ids=inputs["input_ids"],
attention_mask=inputs["attention_mask"],
image=inputs["image"].to(torch.float16), # should be torch.float16
max_new_tokens=64,
length_penalty=-1)
output_text = tokenizer.batch_decode(outputs, skip_special_tokens=True)
Citation
If you find Emu2 useful for your research and applications, please consider starring this repository and citing:
@article{Emu2,
title={Generative Multimodal Models are In-Context Learners},
author={Quan Sun and Yufeng Cui and Xiaosong Zhang and Fan Zhang and Qiying Yu and Zhengxiong Luo and Yueze Wang and Yongming Rao and Jingjing Liu and Tiejun Huang and Xinlong Wang},
publisher={arXiv preprint arXiv:2312.13286},
year={2023},
}
Runs of BAAI Emu2-Chat on huggingface.co
192
Total runs
2
24-hour runs
-2
3-day runs
8
7-day runs
-3
30-day runs
More Information About Emu2-Chat huggingface.co Model
Emu2-Chat huggingface.co
Emu2-Chat huggingface.co is an AI model on huggingface.co that provides Emu2-Chat's model effect (), which can be used instantly with this BAAI Emu2-Chat model. huggingface.co supports a free trial of the Emu2-Chat model, and also provides paid use of the Emu2-Chat. Support call Emu2-Chat model through api, including Node.js, Python, http.
Emu2-Chat huggingface.co is an online trial and call api platform, which integrates Emu2-Chat's modeling effects, including api services, and provides a free online trial of Emu2-Chat, you can try Emu2-Chat online for free by clicking the link below.
Emu2-Chat is an open source model from GitHub that offers a free installation service, and any user can find Emu2-Chat on GitHub to install. At the same time, huggingface.co provides the effect of Emu2-Chat install, users can directly use Emu2-Chat installed effect in huggingface.co for debugging and trial. It also supports api for free installation.