DiT-Knowledge-Localization/llava_eval.py at main · ArmanZarei/DiT-Knowledge-Localization · GitHub

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
import argparse
import os
import glob
import json
import torch
from transformers import AutoProcessor, LlavaForConditionalGeneration
from tqdm import tqdm
from PIL import Image
from typing import List
from dataset import get_knowledge_dataset_class_and_get_list_fn


def extract_images_from_directory(directory_path: str) -> List[str]:
    image_paths = []
    for ext in ['png', 'jpg', 'jpeg']:
        image_paths.extend(glob.glob(os.path.join(directory_path, f'*.{ext}')))

    return image_paths


class LLaVAEvaluator:
    def __init__(self, processor: AutoProcessor, model: LlavaForConditionalGeneration):
        self.processor = processor
        self.model = model

    def eval_image_text_pair(self, image: Image, knowledge: str, knowledge_type: str):
        assert knowledge_type in ["style", "place", "copyright", "animal", "celebrity", "safety"], f"Invalid knowledge type: {knowledge_type}"

        if knowledge_type == "style":
            print("[Info] Using artistic style knowledge type for questioning")
            question = f"Is the image in the artistic style of {knowledge} (artist)? just say yes or no"
        else:
            print(f"[Info] Using {knowledge_type} knowledge type for questioning")
            question = f"Is {knowledge} in the image? just say yes or no"

        conversation = [{
            "role": "user",
            "content": [
                {"type": "text", "text": question},
                {"type": "image"},
                ],
        },]
        prompt = self.processor.apply_chat_template(conversation, add_generation_prompt=True)
        inputs = self.processor(images=image, text=prompt, return_tensors='pt').to(0, torch.float16)

        output = self.model.generate(**inputs, max_new_tokens=200, do_sample=False)

        output = self.processor.decode(output[0], skip_special_tokens=True).split("ASSISTANT: ")[-1].strip().lower()
        if output == "yes":
            return True
        elif output == "no":
            return False
        else:
            return None


    def eval_knowledge_directory(self, directory_path: str, knowledge: str, knowledge_type: str):
        """Evaluate a directory of images with a specific knowledge."""

        output_file_path = os.path.join(directory_path, 'llava_results.json')
        if os.path.exists(output_file_path):
            print(f"[Warning] LLaVA evaluation results already exists at {output_file_path}. Skipping...")
            return

        image_paths = extract_images_from_directory(directory_path)

        if len(image_paths) == 0:
            print(f"[Warning] No images found in {directory_path}. Skipping")
            return

        print(f"Found {len(image_paths)} images. Running LLaVA evaluation for {directory_path} [evaluation knowledge={knowledge}]...")
        res = {}
        for image_path in tqdm(image_paths, desc="LLaVA evaluation for images"):
            res[os.path.basename(image_path)] = self.eval_image_text_pair(Image.open(image_path), knowledge, knowledge_type)

        with open(output_file_path, 'w') as f:
            json.dump({"knowledge": knowledge, "results": res}, f, indent=4)


    def eval_no_knowledge_directory(self, directory_path: str, knowledge_list: List[str], knowledge_type: str):
        """Evaluate a directory of images with no knowledge (evaluation across all knowledge list)."""

        output_file_path = os.path.join(directory_path, 'llava_results.json')
        if os.path.exists(output_file_path):
            print(f"[Warning] LLaVA evaluation results already exists at {output_file_path}. Skipping...")
            return

        image_paths = extract_images_from_directory(directory_path)

        if len(image_paths) == 0:
            print(f"[Warning] No images found in {directory_path}. Skipping")
            return

        print(f"Found {len(image_paths)} images. Running LLaVA evaluation for {directory_path} [evaluation knowledge list length = # {len(knowledge_list)}]...")
        res = {}
        for knowledge in knowledge_list:
            res[knowledge] = {}
            for image_path in tqdm(image_paths, desc=f"LLaVA evaluation for images with knowledge: {knowledge}"):
                res[knowledge][os.path.basename(image_path)] = self.eval_image_text_pair(Image.open(image_path), knowledge, knowledge_type)

        with open(output_file_path, 'w') as f:
            json.dump(res, f, indent=4)


def parse_args():
    parser = argparse.ArgumentParser()
    parser.add_argument(
        "--results_path",
        type=str,
        required=True
    )
    parser.add_argument(
        "--eval_type",
        type=str,
        required=True,
        choices=["eval_all_knowledge_directories", "eval_a_single_no_knowledge_directory"],
    )
    parser.add_argument(
        "--model_name",
        type=str,
        required=True,
        choices=["pixart", "flux", "sana"],
    )
    parser.add_argument(
        "--knowledge_type",
        type=str,
        required=True,
        choices=["style", "place", "copyright", "animal", "celebrity", "safety"]
    )
    args = parser.parse_args()

    assert os.path.exists(args.results_path), f"Results path {args.results_path} does not exist"

    return args


if __name__ == "__main__":
    args = parse_args()

    llava_evaluator = LLaVAEvaluator(
        AutoProcessor.from_pretrained("llava-hf/llava-1.5-7b-hf", use_fast=True),
        LlavaForConditionalGeneration.from_pretrained("llava-hf/llava-1.5-7b-hf", torch_dtype=torch.float16, device_map="auto")
    )

    _, get_knowledge_list_fn = get_knowledge_dataset_class_and_get_list_fn(args.knowledge_type, args.model_name)

    if args.eval_type == "eval_all_knowledge_directories":
        knowledge_list = get_knowledge_list_fn()
        for knowledge in knowledge_list:
            llava_evaluator.eval_knowledge_directory(os.path.join(args.results_path, knowledge), knowledge, args.knowledge_type)
    elif args.eval_type == "eval_a_single_no_knowledge_directory":
        llava_evaluator.eval_no_knowledge_directory(args.results_path, get_knowledge_list_fn(), args.knowledge_type)

    print("Done!")