사본 이미지 분석

Case 1. 과도한 확대

음식을 담은 그릇이 잘려있는 빙수(https://acceptmenu.cdn.baemin.com/acceptmenu-wmq4/i/2024/4/2/01htd8bmxrtbv1heccg2b23h2y.jpeg)
음식이 이미지 상단 프레임에 걸쳐서 잘려있는 돈가츠(https://acceptmenu.cdn.baemin.com/acceptmenu-wmq4/i/2024/4/2/01htd6p190ptx6jtj7nn96q2da.jpeg)
과도하게 확대되어 음식의 영역이 잘려있는 닭갈비(https://acceptmenu.cdn.baemin.com/acceptmenu-wmq4/i/2024/4/2/01htd6350w84ntrndyecs2jyf2.jpeg)
음식의 우측 영역이 잘려있는 볶음밥(https://acceptmenu.cdn.baemin.com/acceptmenu-wmq4/i/2024/4/2/01htd4jgg0zhh0k2tm6c1zwg7c.jpeg)

Case 2. 저화질

이미지의 전체적인 화질이 낮은 총각김치(https://acceptmenu.cdn.baemin.com/acceptmenu-wmq4/i/2024/4/2/01htdxzads84yc1a792w13bk0n.jpeg)
이미지의 전체적인 화질이 낮은 등갈비찜(https://acceptmenu.cdn.baemin.com/acceptmenu-wmq4/i/2024/4/2/01htdr2hmkk5dfy6t1qtkejq68.jpeg)
이미지의 전체적인 화질이 낮은 녹차(https://acceptmenu.cdn.baemin.com/acceptmenu-wmq4/i/2024/4/2/01htddq36cw99h706n7y6fd016.jpeg)
이미지의 전체적인 화질이 낮은 휘낭시에(https://acceptmenu.cdn.baemin.com/acceptmenu-wmq4/i/2024/4/2/01htdagfxr8k0kc1zbb62sx12j.jpeg)

{
  "apim-request-id": "3b01cb13-7576-4004-8185-8d416fadf3ed",
  "content-length": "288",
  "content-type": "application/json; charset=utf-8",
  "modelVersion": "2023-10-01",
  "metadata": {
    "width": 1280,
    "height": 960
  },
  "objectsResult": {
    "values": [
      {
        "boundingBox": {
          "x": 832,
          "y": 15,
          "w": 408,
          "h": 334
        },
        "tags": [
          {
            "name": "Vegetable",
            "confidence": 0.51
          }
        ]
      },
      {
        "boundingBox": {
          "x": 144,
          "y": 75,
          "w": 1000,
          "h": 880
        },
        "tags": [
          {
            "name": "Food",
            "confidence": 0.709
          }
        ]
      }
    ]
  }
}

{
  "apim-request-id": "73f48582-75f6-48ab-8b5c-a14a8ceaa476",
  "content-length": "291",
  "content-type": "application/json; charset=utf-8",
  "modelVersion": "2023-10-01",
  "metadata": {
    "width": 3464,
    "height": 3464
  },
  "objectsResult": {
    "values": [
      {
        "boundingBox": {
          "x": 710,
          "y": 133,
          "w": 2560,
          "h": 3237
        },
        "tags": [
          {
            "name": "Food",
            "confidence": 0.795
          }
        ]
      },
      {
        "boundingBox": {
          "x": 609,
          "y": 1865,
          "w": 1570,
          "h": 1473
        },
        "tags": [
          {
            "name": "Food",
            "confidence": 0.715
          }
        ]
      }
    ]
  }
}

해당 이미지는 너무 꽉 찬 음식 사진이라서 해당 이미지 근처에 여백을 늘린 후에 재시도 함.

{
  "apim-request-id": "fa47f938-70f8-453a-9f2a-75c33bb924a2",
  "content-length": "193",
  "content-type": "application/json; charset=utf-8",
  "modelVersion": "2023-10-01",
  "metadata": {
    "width": 1262,
    "height": 1004
  },
  "objectsResult": {
    "values": [
      {
        "boundingBox": {
          "x": 100,
          "y": 102,
          "w": 1052,
          "h": 767
        },
        "tags": [
          {
            "name": "Food",
            "confidence": 0.779
          }
        ]
      }
    ]
  }
}

{
  "apim-request-id": "2669dbd3-9f7b-469b-9605-43cdb87915e4",
  "content-length": "193",
  "content-type": "application/json; charset=utf-8",
  "modelVersion": "2023-10-01",
  "metadata": {
    "width": 1280,
    "height": 2276
  },
  "objectsResult": {
    "values": [
      {
        "boundingBox": {
          "x": 78,
          "y": 584,
          "w": 1195,
          "h": 1235
        },
        "tags": [
          {
            "name": "Food",
            "confidence": 0.745
          }
        ]
      }
    ]
  }
}

테스트 결과

import json
from promptflow import tool
from promptflow.client import PFClient
from azure.ai.vision.imageanalysis import ImageAnalysisClient
from azure.ai.vision.imageanalysis.models import VisualFeatures
from azure.core.credentials import AzureKeyCredential

# The inputs section will change based on the arguments of the tool function, after you save the code
# Adding type to arguments and return value will help the system show the types properly
# Please update the function name/signature per need
@tool
def image_processing(image_url: str) -> str:

    # set environment variables:
    pf_client = PFClient()
    pf_connection = pf_client.connections.get(name="object-detection")
    try:
        endpoint = pf_connection.endpoint
        key = pf_connection.key
    except KeyError:
        print("Missing environment variable 'VISION_ENDPOINT' or 'VISION_KEY'")
        print("Set them before running this sample.")
        exit()
    

    # Create an Image Analysis client for synchronous operations
    client = ImageAnalysisClient(
        endpoint=endpoint,
        credential=AzureKeyCredential(key)
    )

    # Get a caption for the image. This will be a synchronously (blocking) call.
    result = client.analyze_from_url(
        image_url=image_url,
        visual_features=[VisualFeatures.OBJECTS]
    ).as_dict()

    print(result)

    image_height = result["metadata"]["height"]
    image_width = result["metadata"]["width"]

    food_total = 0
    response_data = {
        "image_url": image_url,
        "image_height": image_height,
        "image_width": image_width,
        "is_good_food_image": True,
        "food_total": 0,
        "food_objects": [],
        "message": ""
    }
    detected_position_total = []

    # initialize an list dict to store the detected objects
    if result["objectsResult"] is not None:
        for object in result["objectsResult"]["values"]:
            # 함수 호출
            is_food, detected_position = position_image_food_overlap(object, image_width, image_height, 0.05)
            if is_food  == True:
                response_data["food_objects"].append(object)
                food_total += 1
                if len(detected_position) > 0:
                    # detected_position_total에 detected_position을 추가
                    detected_position_total.extend(detected_position)

    response_data["food_total"] = food_total

    if (food_total == 0):
        response_data["message"] = "음식 이미지가 존재하지 않거나 너무 확대되어진 이미지입니다."
        response_data["is_good_food_image"] = False
    else:
        # detected_position_total에 중복된 값이 있을 경우 중복 제거
        detected_position = list(set(detected_position_total))
        detected_position_str = ", ".join(detected_position)
        if len(detected_position) > 0:
            response_data["is_good_food_image"] = False
            response_data["message"] = f"음식 이미지가 {detected_position_str} 프레임에 너무 가깝게 위치하고 있습니다."
        else:
            response_data["message"] = "음식 이미지가 정상적으로 존재합니다."

    return response_data


# If the name of a specific object is food based on the image_witdh and image_height of the image, and it is located less than 10% closer to the side of the original image based on the x, y, w, and h coordinates of the rectangle, then Function that returns location
def position_image_food_overlap(object: dict, image_width: int, image_height: int, ratio: float) -> str:

    # initialize an list dict to store the detected objects
    is_food = False
    detected_position = []

    object_area = image_width * image_height
    if object["tags"][0]["name"] == "Food":
        is_food = True
        # 좌측 끝에 위치한 객체인지 확인
        if object["boundingBox"]["x"] < image_width * ratio:
            detected_position.append("좌측")
        # 우측 끝에 위치한 객체인지 확인
        if object["boundingBox"]["x"] + object["boundingBox"]["w"] > image_width * (1-ratio):
            detected_position.append("우측")
        # 상단 끝에 위치한 객체인지 확인
        if object["boundingBox"]["y"] < image_height * ratio:
            detected_position.append("상단")
        # 하단 끝에 위치한 객체인지 확인
        if object["boundingBox"]["y"] + object["boundingBox"]["h"] > image_height * (1-ratio):
            print(object["boundingBox"]["y"] + object["boundingBox"]["h"])
            print(image_height * (1-ratio))
            detected_position.append("하단")
    
    return is_food, detected_position

import json
from promptflow import tool
from promptflow.client import PFClient
from azure.core.credentials import AzureKeyCredential
from openai import AzureOpenAI

# The inputs section will change based on the arguments of the tool function, after you save the code
# Adding type to arguments and return value will help the system show the types properly
# Please update the function name/signature per need
@tool
def gpt_4_vision_analysis(input_url: str, question: str) -> str:

    # set environment variables:
    pf_client = PFClient()

    pf_connection = pf_client.connections.get(name="aoai_australia")
    try:
        endpoint = pf_connection.api_base
        key = "118a4d12a6f04dc2a4e1916770e28b5b"
    except KeyError:
        print("Missing environment variable 'VISION_ENDPOINT' or 'VISION_KEY'")
        print("Set them before running this sample.")
        exit()
    

    api_base = endpoint
    api_key= key
    deployment_name = 'gpt-4-vision'
    api_version = '2023-12-01-preview' # this might change in the future

    client = AzureOpenAI(
        api_key=api_key,  
        api_version=api_version,
        base_url=f"{api_base}/openai/deployments/{deployment_name}"
    )

    response = client.chat.completions.create(
        model=deployment_name,
        messages=[
            { "role": "system", "content": "As an AI assistant, your task involves interpreting about the food image. Remember to provide accurate answers based on the information present in the image." },
            { "role": "user", "content": [  
                { 
                    "type": "text", 
                    "text": "이 이미지애 대해서 설명해 주세요.:" 
                },
                { 
                    "type": "image_url",
                    "image_url": {
                        "url": input_url
                    }
                }
            ] }
        ],
        temperature=0.1,
        top_p=0.95,
        max_tokens=2000 
    )

    return response.choices[0].message.content

# system:
As an AI assistant, your task involves interpreting images and responding to questions about the image.
Remember to provide accurate answers based on the information present in the image.

# user:
Can you tell me what the image depicts?
![image]({{image_input}})

페이지 트리

사본 이미지 분석