DIP 실습 - Mean-shift / GrabCut

728x90

풀이 코드

#include<iostream>
#include<vector>

#include "opencv2/core/core.hpp"
#include "opencv2/highgui/highgui.hpp"
#include "opencv2/imgproc/imgproc.hpp"
#include <opencv2/opencv.hpp>

using namespace cv;
using namespace std;

#include <stdio.h>

void exCvMeanShift() {
    Mat img = imread("fruits.png");
    if (img.empty()) exit(-1);
    cout << "----- exCvMeanShift() -----" << endl;

    resize(img, img, Size(256, 256), 0, 0, INTER_AREA);
    imshow("Src", img);
    imwrite("exCvMeanShift_src.jpg", img);

    pyrMeanShiftFiltering(img, img, 8, 16);

    imshow("OpenCV_MeanShift", img);
    waitKey();
    destroyAllWindows();
    imwrite("exCvMeanShift_dst.jpg", img);
}

class Point5D {
public:
    float x, y, l, u, v; // 포인트의 좌표와 LUV 값

    Point5D() : x(0), y(0), l(0), u(0), v(0) {}  // 생성자에서 멤버 변수들을 초기화
    ~Point5D() {}  // 소멸자는 명시적으로 아무 작업도 하지 않음

    void accumPt(Point5D); // 포인트 축적
    void copyPt(Point5D); // 포인트 복사
    float getColorDist(Point5D); // 색상 거리 계산
    float getSpatialDist(Point5D); // 좌표 거리 계산
    void scalePt(float); // 포인트 스케일링 함수 (평균용)
    void setPt(float, float, float, float, float); // 포인트값 설정함수
    void printPt();
};

void Point5D::accumPt(Point5D Pt) {
    x += Pt.x;
    y += Pt.y;
    l += Pt.l;
    u += Pt.u;
    v += Pt.v;
}

void Point5D::copyPt(Point5D Pt) {
    x = Pt.x;
    y = Pt.y;
    l = Pt.l;
    u = Pt.u;
    v = Pt.v;
}

float Point5D::getColorDist(Point5D Pt) {
    return sqrt(pow(l - Pt.l, 2) +
        pow(u - Pt.u, 2) +
        pow(v - Pt.v, 2));
}

float Point5D::getSpatialDist(Point5D Pt) {
    return sqrt(pow(x - Pt.x, 2) + pow(y - Pt.y, 2));
}


void Point5D::scalePt(float scale) {
    x *= scale;
    y *= scale;
    l *= scale;
    u *= scale;
    v *= scale;
}

void Point5D::setPt(float px, float py, float pl, float pa, float pb) {
    x = px;
    y = py;
    l = pl;
    u = pa;
    v = pb;
}

void Point5D::printPt() {
    cout << x << " " << y << " | " << l << " " << u << " " << v << endl;
}

class MeanShift {
    // Mean shift 클래스
public:
    float bw_spatial = 8;    // Spatial bandwidth
    float bw_color = 16;     // Color bandwidth
    float min_shift_color = 0.1; // 최소 컬러변화
    float min_shift_spatial = 0.1; // 최소 위치변화
    int max_steps = 10; // 최대 반복횟수
    vector<Mat> img_split; // 채널별로 분할되는 Mat
    MeanShift(float, float, float, float, int); // Bandwidth 설정을 위한 생성자
    void doFiltering(Mat&); // Mean shift filtering 함수
};

MeanShift::MeanShift(float bs, float bc, float msc, float mss, int ms) {
    // 생성자
    bw_spatial = bs;
    bw_color = bc;
    max_steps = ms;
    min_shift_color = msc;
    min_shift_spatial = mss;
}


void MeanShift::doFiltering(Mat& img) {
    int height = img.rows;
    int width = img.cols;
    split(img, img_split);

    Point5D pt, pt_prev, pt_cur, pt_sum;
    int pad_left, pad_right, pad_top, pad_bottom;
    size_t n_pt, step;

    for (int row = 0; row < height; row++) {
        for (int col = 0; col < width; col++) {
            pad_left = (col - bw_spatial) > 0 ? (col - bw_spatial) : 0;
            pad_right = (col + bw_spatial) < width ? (col + bw_spatial) : width;
            pad_top = (row - bw_spatial) > 0 ? (row - bw_spatial) : 0;
            pad_bottom = (row + bw_spatial) < height ? (row + bw_spatial) : height;

            // < 현재 픽셀 셋팅 >
            pt_cur.setPt(
                row, col,
                (float)img_split[0].at<uchar>(row, col),
                (float)img_split[1].at<uchar>(row, col),
                (float)img_split[2].at<uchar>(row, col)
            );

            // < 주변 픽셀 탐색 >
            step = 0;
            do {
                pt_prev.copyPt(pt_cur);
                pt_sum.setPt(0.0, 0.0, 0, 0, 0);
                n_pt = 0;

                for (int hx = pad_top; hx < pad_bottom; hx++) {
                    for (int hy = pad_left; hy < pad_right; hy++) {
                        pt.setPt(
                            hx, hy,
                            (float)img_split[0].at<uchar>(hx, hy),
                            (float)img_split[1].at<uchar>(hx, hy),
                            (float)img_split[2].at<uchar>(hx, hy)
                        );

                        // < Color bandwidth 이내의 점수 >
                        if (pt.getColorDist(pt_cur) < bw_color) {
                            pt_sum.accumPt(pt);
                            n_pt++;
                        }
                    }
                }

                // < 축적 결과를 기반으로 현재 픽셀 갱신 >
                pt_sum.scalePt(1.0 / n_pt); // 축적 결과 평균
                pt_cur.copyPt(pt_sum);
                step++;

            } while ((pt_cur.getColorDist(pt_prev) > min_shift_color) &&
                (pt_cur.getSpatialDist(pt_prev) > min_shift_spatial) &&
                (step < max_steps));

            // 변경된 결과 값을 원본 이미지에 적용
            img.at<Vec3b>(row, col) = Vec3b(pt_cur.l, pt_cur.u, pt_cur.v);
        }
    }
}

void exMyMeanShift() {
    Mat img = imread("fruits.png");
    if (img.empty()) {
        exit(-1);
    }
    cout << "----- exMyMeanShift -----" << endl;

    resize(img, img, Size(256, 256), 0, 0, INTER_AREA);
    imshow("Src", img);
    imwrite("exMyMeanShift_src.jpg", img);

    cvtColor(img, img, COLOR_RGB2Luv);

    // MeanShift Processing
    MeanShift MSProc(8, 16, 0.1, 0.1, 10);
    MSProc.doFiltering(img);
    cvtColor(img, img, COLOR_Luv2RGB);

    imshow("Low-level_MeanShift", img);
    waitKey();
    destroyAllWindows();
    imwrite("exMyMeanShift_dst.jpg", img);
}


/*
int main() {
    //1번 문제

    // OpenCV의 Mean Shift 알고리즘 적용
    cout << "Applying OpenCV Mean Shift" << endl;
    exCvMeanShift();

    // Low-level Mean Shift 알고리즘 적용
    cout << "Applying Low-level Mean Shift" << endl;
    exMyMeanShift();

    return 0;
}
*/

int main() {

    //2번 문제
    
    // grapCut이 잘 적용되지 않는 이미지에 대한 실험
    // Load the dinosaur image
    Mat img = imread("dinosaur.jpg");
    if (img.empty()) {
        cerr << "Error: Unable to load the dino image." << endl;
        return -1;
    }

    // Define the rectangular region for grabCut
    Rect rect(Point(95, 110), Point(466, 259));

    // Initialize models for background and foreground
    Mat bg_model, fg_model;
    Mat result;

    // Perform grabCut with the specified rectangle
    grabCut(img, result, rect, bg_model, fg_model, 5, GC_INIT_WITH_RECT);

    // Create a mask for the foreground pixels
    compare(result, GC_PR_FGD, result, CMP_EQ);
    Mat mask(img.size(), CV_8UC3, Scalar(255, 255, 255));
    img.copyTo(mask, result);

    // Display the original and masked images
    imshow("Original Image", img);
    imshow("Segmented Image", mask);

    // Save or display the results as desired
    imwrite("dino_segmented.jpg", mask);

    waitKey(0);
    destroyAllWindows();
    

    
    // Load the dog image
    Mat img1 = imread("dog.jpeg");
    if (img1.empty()) {
        cerr << "Error: Unable to load the dog image." << endl;
        return -1;
    }

    // Define the rectangular region for grabCut
    Rect rect1(Point(50, 96), Point(144, 222));

    // Initialize models for background and foreground
    Mat bg_model1, fg_model1;
    Mat result1;

    // Perform grabCut with the specified rectangle
    grabCut(img1, result1, rect1, bg_model1, fg_model1, 5, GC_INIT_WITH_RECT);

    // Create a mask for the foreground pixels
    compare(result1, GC_PR_FGD, result1, CMP_EQ);
    Mat mask1(img1.size(), CV_8UC3, Scalar(255, 255, 255));
    img1.copyTo(mask1, result1);

    // Display the original and masked images
    imshow("Original Image", img1);
    imshow("Segmented Image", mask1);

    // Save or display the results as desired
    imwrite("dog_segmented.jpg", mask1);

    waitKey(0);
    destroyAllWindows();
    
    return 0;
}

OpenCV를 사용한 Mean Shift (exCvMeanShift 함수)

이 함수는 OpenCV 라이브러리의 `pyrMeanShiftFiltering` 함수를 사용해서 Mean Shift 이미지 필터링을 적용한다. 우선 `resize` 함수를 사용하여 이미지 크기를 256x256으로 조정한다. 그후 `pyrMeanShiftFiltering` 함수를 호출하여 Mean Shift 필터링을 적용한다. 이 함수는 색상 공간과 공간 좌표 공간에서 유사한 픽셀을 클러스터링한다. 필터링 과정에서는 공간 대역폭과 색상 대역폭을 파라미터로 사용한다.

Low-Level Mean Shift 구현 (MeanShift 클래스 및 exMyMeanShift 함수)

Point5D 클래스는 5차원 공간에서의 점을 나타낸다. 각 점은 (x, y) 위치와 (l, u, v) 색상 값으로 구성된다. Mean Shift 클래스는 Mean Shift 알고리즘의 핵심 로직을 포함한다. 입력 이미지를 개별 픽셀로 처리하고, 각 픽셀 주위에서 설정된 대역폭 내의 픽셀들과 비교하여 색상과 공간 거리를 기반으로 평균화 과정을 수행한다. 필터링 과정에서 이미지를 개별 채널로 분할하고, 각 픽셀에 대해 주변 픽셀을 검색한다. 설정된 색상 대역폭 내에 있는 픽셀의 평균을 계산한다. 평균값이 설정된 최소 이동 거리보다 클 때까지 이 과정을 반복한다. 결과적으로 각 픽셀 위치에 새로운 색상 값을 할당하게 된다.

1번 코드 결과

OpenCV

Low-Level

Low-level코드로 좋은 성능을 낸 것을 볼 수 있다.

2번 코드 결과

Dinosaur.jpg 결과

Dog.jpeg 결과

위의 실험 결과를 보면 정글속의 공룡 이미지인 Dinosaur.jpg는 GrapCut이 잘 작동하지 않고 푸른 배경의 강아지 이미지인 Dog.jpeg는 GrapCut이 잘 작동하는 것을 볼 수 있다. 그 이유를 아래의 분석을 통해 살펴자.

GrabCut 알고리즘이 잘 작동하는 영상의 특징

1. 객체와 배경 간에 명확한 색상의 차이가 있는 경우에 알고리즘은 이 두 영역을 쉽게 구별할 수 있다. 푸른 배경에 하얀 강아지가 있는 Dog 이미지의 경우가 이에 해당한다.

2. 객체와 배경의 텍스쳐가 명확히 다를 때 알고리즘이 객체를 쉽게 구별할 수 있다. 위의 Dog예시에서도 강아지는 털이 있는 반면 배경은 미끈한 벽이다.

3. 배경이 단순하고 변화가 적을수록 GrabCut 알고리즘은 더욱 효과적으로 작동한다. 예를 들어, 하늘이나 단색 벽과 같은 배경이 이상적이다.

4. 객체의 경계가 배경과 대비되어 명확하게 드러나는 경우, 알고리즘은 이 경계를 따라 객체를 더욱 정확하게 분리할 수 있다. 예를들어 경계가 확실한 농구공이 흰 바닥에 놓여있는 이미지라면 경계가 배경과 확실히 대비될 것이다.

GrabCut 알고리즘이 잘 작동하지 않는 영상의 특징

1. 객체와 배경이 유사한 색상이나 패턴을 가지고 있을 경우, 알고리즘이 두 영역을 구분하기 어려워 한다. 예를 들어, 위의 Dinosaur 이미지와 같이 잎사귀가 많은 숲에서 얼룩덜룩한 공룔을 분리하려고 할 때 이 문제가 발생할 수 있다.

2. 객체와 배경이 유사한 텍스처를 공유하거나 세부적인 패턴이 많이 겹칠 때, 알고리즘은 이들을 올바르게 구분하기 어렵다. 이는 특히 패턴이나 질감이 복잡한 의류나 자연 환경에서 흔히 발생한다.

3. 배경이 복잡하면 어떤 것이 핵심이 되는 경계인지 파악하기 힘들고 구분해내야 하는 객체의 경계인지 배경의 패턴인지 파악하기 힘들어 알고리즘이 잘 작동하지 않는다.

728x90

'Quality control (Univ. Study) > Digital Image Processing' 카테고리의 다른 글

Digital Image Processing - Local Feature (0)	2024.05.19
Digital Image Processing - Texture (0)	2024.05.16
Digital Image Processing - MRF / Graph Cuts (1)	2024.05.15
DIP 실습 - Segmentation / Clustering (0)	2024.05.15
DIP 실습 - Band pass filter / Frequency domain (0)	2024.05.14

상훈's CANVAS

DIP 실습 - Mean-shift / GrabCut

풀이 코드

'Quality control (Univ. Study) > Digital Image Processing' 카테고리의 다른 글

티스토리툴바

DIP 실습 - Mean-shift / GrabCut

풀이 코드

'Quality control (Univ. Study) > Digital Image Processing' 카테고리의 다른 글

관련글

티스토리툴바