使用 distilabel
进行图像生成¶
使用 distilabel
创建合成图像。
此示例展示了如何使用 distilabel 生成图像数据,可以使用 InferenceEndpointsImageGeneration
或 OpenAIImageGeneration
,这要归功于 ImageGeneration
任务。
from distilabel.pipeline import Pipeline
from distilabel.steps import KeepColumns
from distilabel.models.image_generation import InferenceEndpointsImageGeneration
from distilabel.steps.tasks import ImageGeneration
from datasets import load_dataset
ds = load_dataset("dvilasuero/finepersonas-v0.1-tiny", split="train").select(range(3))
with Pipeline(name="image_generation_pipeline") as pipeline:
ilm = InferenceEndpointsImageGeneration(
model_id="black-forest-labs/FLUX.1-schnell"
)
img_generation = ImageGeneration(
name="flux_schnell",
llm=ilm,
input_mappings={"prompt": "persona"}
)
keep_columns = KeepColumns(columns=["persona", "model_name", "image"])
img_generation >> keep_columns
提示的示例图像
一位对 19 世纪美国艺术和辛辛那提当地文化遗产感兴趣的当地艺术史学家和博物馆专业人士。
from distilabel.pipeline import Pipeline
from distilabel.steps import KeepColumns
from distilabel.models.image_generation import OpenAIImageGeneration
from distilabel.steps.tasks import ImageGeneration
from datasets import load_dataset
ds = load_dataset("dvilasuero/finepersonas-v0.1-tiny", split="train").select(range(3))
with Pipeline(name="image_generation_pipeline") as pipeline:
ilm = OpenAIImageGeneration(
model="dall-e-3",
generation_kwargs={
"size": "1024x1024",
"quality": "standard",
"style": "natural"
}
)
img_generation = ImageGeneration(
name="dalle-3"
llm=ilm,
input_mappings={"prompt": "persona"}
)
keep_columns = KeepColumns(columns=["persona", "model_name", "image"])
img_generation >> keep_columns
提示的示例图像
一位对 19 世纪美国艺术和辛辛那提当地文化遗产感兴趣的当地艺术史学家和博物馆专业人士。
将 Distiset 保存为图像数据集
注意调用 Distiset.transform_columns_to_image
,以便将图像直接上传为 Image dataset
完整的 pipeline 可以在以下示例中运行。请记住,您需要先安装 pillow
:pip install distilabel[vision]
。
image_generation.py
# Copyright 2023-present, Argilla, Inc.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# https://apache.ac.cn/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
from datasets import load_dataset
from distilabel.models.image_generation import InferenceEndpointsImageGeneration
from distilabel.pipeline import Pipeline
from distilabel.steps import KeepColumns
from distilabel.steps.tasks import ImageGeneration
ds = load_dataset("dvilasuero/finepersonas-v0.1-tiny", split="train").select(range(3))
with Pipeline(name="image_generation_pipeline") as pipeline:
igm = InferenceEndpointsImageGeneration(model_id="black-forest-labs/FLUX.1-schnell")
img_generation = ImageGeneration(
name="flux_schnell",
image_generation_model=igm,
input_mappings={"prompt": "persona"},
)
keep_columns = KeepColumns(columns=["persona", "model_name", "image"])
img_generation >> keep_columns
if __name__ == "__main__":
distiset = pipeline.run(use_cache=False, dataset=ds)
# Save the images as `PIL.Image.Image`
distiset = distiset.transform_columns_to_image("image")
distiset.push_to_hub("plaguss/test-finepersonas-v0.1-tiny-flux-schnell")