// // SegmentationShader.metal // ttpic // // Created by stonefeng on 2017/7/6. // Copyright © 2017年 Tencent. All rights reserved. // #include using namespace metal; kernel void refineFilter1(texture2d inTexture [[ texture(0) ]], texture2d outTexture [[ texture(1) ]], texture2d maskTexture [[ texture(2) ]], uint2 gid [[ thread_position_in_grid ]]) { float eps = 0.01f; float step_x = 1.0f / (float)outTexture.get_width(); float step_y = 1.0f / (float)outTexture.get_height(); constexpr sampler quadSampler(coord::normalized, filter::linear, address::clamp_to_edge); float fMult9 = 1.0f / 9.0f; float4 srcValue[9]; float2 fIdx0 = float2((float)gid.x * step_x, (float)gid.y * step_y); float2 fIdx = fIdx0; srcValue[4] = float4(inTexture.sample(quadSampler, fIdx).rgb, maskTexture.sample(quadSampler, fIdx).r); fIdx = float2(fIdx0.x - step_x, fIdx0.y - step_y); srcValue[0] = float4(inTexture.sample(quadSampler, fIdx).rgb, maskTexture.sample(quadSampler, fIdx).r); fIdx = float2(fIdx0.x, fIdx0.y - step_y); srcValue[1] = float4(inTexture.sample(quadSampler, fIdx).rgb, maskTexture.sample(quadSampler, fIdx).r); fIdx = float2(fIdx0.x + step_x, fIdx0.y - step_y); srcValue[2] = float4(inTexture.sample(quadSampler, fIdx).rgb, maskTexture.sample(quadSampler, fIdx).r); fIdx = float2(fIdx0.x - step_x, fIdx0.y); srcValue[3] = float4(inTexture.sample(quadSampler, fIdx).rgb, maskTexture.sample(quadSampler, fIdx).r); fIdx = float2(fIdx0.x + step_x, fIdx0.y); srcValue[5] = float4(inTexture.sample(quadSampler, fIdx).rgb, maskTexture.sample(quadSampler, fIdx).r); fIdx = float2(fIdx0.x - step_x, fIdx0.y + step_y); srcValue[6] = float4(inTexture.sample(quadSampler, fIdx).rgb, maskTexture.sample(quadSampler, fIdx).r); fIdx = float2(fIdx0.x, fIdx0.y + step_y); srcValue[7] = float4(inTexture.sample(quadSampler, fIdx).rgb, maskTexture.sample(quadSampler, fIdx).r); fIdx = float2(fIdx0.x + step_x, fIdx0.y + step_y); srcValue[8] = float4(inTexture.sample(quadSampler, fIdx).rgb, maskTexture.sample(quadSampler, fIdx).r); float4 mean_I = float4(0.0); float3 mean_Ip = float3(0.0); float var_I_rr = 0.0; float var_I_rg = 0.0; float var_I_rb = 0.0; float var_I_gg = 0.0; float var_I_gb = 0.0; float var_I_bb = 0.0; for (int i = 0; i < 9; i++){ mean_I += srcValue[i]; mean_Ip += srcValue[i].rgb * srcValue[i].a; var_I_rr += srcValue[i].r * srcValue[i].r; var_I_rg += srcValue[i].r * srcValue[i].g; var_I_rb += srcValue[i].r * srcValue[i].b; var_I_gg += srcValue[i].g * srcValue[i].g; var_I_gb += srcValue[i].g * srcValue[i].b; var_I_bb += srcValue[i].b * srcValue[i].b; } mean_I *= fMult9; mean_Ip *= fMult9; var_I_rr = var_I_rr * fMult9 - mean_I.r * mean_I.r + eps; var_I_rg = var_I_rg * fMult9 - mean_I.r * mean_I.g; var_I_rb = var_I_rb * fMult9 - mean_I.r * mean_I.b; var_I_gg = var_I_gg * fMult9 - mean_I.g * mean_I.g + eps; var_I_gb = var_I_gb * fMult9 - mean_I.g * mean_I.b; var_I_bb = var_I_bb * fMult9 - mean_I.b * mean_I.b + eps; float3 cov_Ip = mean_Ip - mean_I.rgb * mean_I.a; float invrr = var_I_gg * var_I_bb - var_I_gb * var_I_gb; float invrg = var_I_gb * var_I_rb - var_I_rg * var_I_bb; float invrb = var_I_rg * var_I_gb - var_I_gg * var_I_rb; float invgg = var_I_rr * var_I_bb - var_I_rb * var_I_rb; float invgb = var_I_rb * var_I_rg - var_I_rr * var_I_gb; float invbb = var_I_rr * var_I_gg - var_I_rg * var_I_rg; float covDet = invrr * var_I_rr + invrg * var_I_rg + invrb * var_I_rb; float4 resultColor = float4(0.0); resultColor.r = (invrr * cov_Ip.r + invrg * cov_Ip.g + invrb * cov_Ip.b) / covDet; resultColor.g = (invrg * cov_Ip.r + invgg * cov_Ip.g + invgb * cov_Ip.b) / covDet; resultColor.b = (invrb * cov_Ip.r + invgb * cov_Ip.g + invbb * cov_Ip.b) / covDet; resultColor.a = (mean_I.a - resultColor.r * mean_I.r - resultColor.g * mean_I.g - resultColor.b * mean_I.b) * 0.5; outTexture.write(resultColor * 0.5 + float4(0.5), gid); } kernel void refineFilter2(texture2d inTexture [[ texture(0) ]], texture2d outTexture [[ texture(1) ]], uint2 gid [[ thread_position_in_grid ]]) { float step_x = 1.0f / (float)outTexture.get_width(); float step_y = 1.0f / (float)outTexture.get_height(); constexpr sampler quadSampler(coord::normalized, filter::linear, address::clamp_to_edge); float4 srcValue = float4(0.0); float2 fIdx0 = float2((float)gid.x * step_x, (float)gid.y * step_y); float2 fIdx = fIdx0; srcValue += inTexture.sample(quadSampler, fIdx); fIdx = float2(fIdx0.x - step_x, fIdx0.y - step_y); srcValue += inTexture.sample(quadSampler, fIdx); fIdx = float2(fIdx0.x, fIdx0.y - step_y); srcValue += inTexture.sample(quadSampler, fIdx); fIdx = float2(fIdx0.x + step_x, fIdx0.y - step_y); srcValue += inTexture.sample(quadSampler, fIdx); fIdx = float2(fIdx0.x - step_x, fIdx0.y); srcValue += inTexture.sample(quadSampler, fIdx); fIdx = float2(fIdx0.x + step_x, fIdx0.y); srcValue += inTexture.sample(quadSampler, fIdx); fIdx = float2(fIdx0.x - step_x, fIdx0.y + step_y); srcValue += inTexture.sample(quadSampler, fIdx); fIdx = float2(fIdx0.x, fIdx0.y + step_y); srcValue += inTexture.sample(quadSampler, fIdx); fIdx = float2(fIdx0.x + step_x, fIdx0.y + step_y); srcValue += inTexture.sample(quadSampler, fIdx); outTexture.write(srcValue / 9.0, gid); } kernel void refineFilter3(texture2d inTexture [[ texture(0) ]], texture2d outTexture [[ texture(1) ]], texture2d maskTexture [[ texture(2) ]], uint2 gid [[ thread_position_in_grid ]]) { constexpr sampler quadSampler(coord::normalized, filter::linear, address::clamp_to_edge); float step_x = 1.0f / (float)outTexture.get_width(); float step_y = 1.0f / (float)outTexture.get_height(); float2 fIdx0 = float2((float)gid.x * step_x, (float)gid.y * step_y); float4 r = inTexture.sample(quadSampler, fIdx0); float4 s = (maskTexture.sample(quadSampler, fIdx0) - float4(0.5)) * 2.0; float src = s.r * r.r + s.g * r.g + s.b * r.b + s.a * 2.0; src = (src-0.5) * 2.0 + 0.5; if (src < 0.05) src = 0.0; if (src > 0.95) src = 1.0; outTexture.write(float4(src,src,src,1.0), gid); } kernel void buffer2Texture2(texture2d outTexture [[ texture(0) ]], constant float* uData [[buffer(0)]], uint2 gid [[thread_position_in_grid ]]) { int width = outTexture.get_width(); int height = outTexture.get_height(); float posx = (gid.x+0.5) * 20.0 / width - 0.5; float posy = (gid.y+0.5) * 26.0 / height - 0.5; int dx = floor(posx); int dy = floor(posy); int dx2 = dx + 1; int dy2 = dy + 1; if (dx < 0) dx = 0; if (dy < 0) dy = 0; if (dx2 == 20) dx2 = dx; if (dy2 == 26) dy2 = dy; float ratioX = posx - dx; float ratioY = posy - dy; float u1 = uData[dx + dy * 20]; float u2 = uData[dx2 + dy * 20]; float u3 = uData[dx2 + dy2 * 20]; float u4 = uData[dx + dy2 * 20]; u1 = max(0.0, min(1.0, (u1 - 0.3) * 2.0 + 0.5)); u2 = max(0.0, min(1.0, (u2 - 0.3) * 2.0 + 0.5)); u3 = max(0.0, min(1.0, (u3 - 0.3) * 2.0 + 0.5)); u4 = max(0.0, min(1.0, (u4 - 0.3) * 2.0 + 0.5)); float value = u1 * (1.0 - ratioX) * (1.0 - ratioY) + u2 * ratioX * (1.0 - ratioY) + u3 * ratioX * ratioY + u4 * (1.0 - ratioX) * ratioY; outTexture.write(float4(value, value, value, 1.0), gid); } kernel void kernel_Float32toBGRA2(texture2d outTexture [[ texture(0) ]], constant float* uData [[buffer(0)]], constant int* dimensions [[buffer(1)]], uint2 gid [[thread_position_in_grid ]]) { float width = (float)outTexture.get_width(); float height = (float)outTexture.get_height(); int uniform_w = dimensions[0]; int uniform_h = dimensions[1]; float posx = (float)gid.x * (float)uniform_w / width; float posy = (float)gid.y * (float)uniform_h / height; int dx = floor(posx); int dy = floor(posy); int dx2 = dx + 1; int dy2 = dy + 1; if (dx < 0) dx = 0; if (dy < 0) dy = 0; if (dx2 == uniform_w) dx2 = dx; if (dy2 == uniform_h) dy2 = dy; float ratioX = posx - dx; float ratioY = posy - dy; float u1 = uData[dx + dy * uniform_w]; float u2 = uData[dx2 + dy * uniform_w]; float u3 = uData[dx2 + dy2 * uniform_w]; float u4 = uData[dx + dy2 * uniform_w]; float r0 = (u1 * (1.0 - ratioX) * (1.0 - ratioY) + u2 * ratioX * (1.0 - ratioY) + u3 * ratioX * ratioY + u4 * (1.0 - ratioX) * ratioY); int offset = uniform_w * uniform_h; u1 = uData[offset + dx + dy * uniform_w]; u2 = uData[offset + dx2 + dy * uniform_w]; u3 = uData[offset + dx2 + dy2 * uniform_w]; u4 = uData[offset + dx + dy2 * uniform_w]; float r1 = (u1 * (1.0 - ratioX) * (1.0 - ratioY) + u2 * ratioX * (1.0 - ratioY) + u3 * ratioX * ratioY + u4 * (1.0 - ratioX) * ratioY); float diff = exp(r1-r0); diff = diff/(diff + 1.0f); // float r = diff; // float r = diff > 0.5?1.0f:0.0f; float r = saturate((diff - 0.5f) * 1.5f + 0.5f); if (r < 0.05) r = 0; if (r > 0.95) r = 1.0f; outTexture.write(float4(r, r, r, 1.0), gid); } kernel void kernel_Float32toBGRA3(texture2d outTexture [[ texture(0) ]], constant float* uData [[buffer(0)]], constant float* weight [[buffer(1)]], constant int* dimensions [[buffer(2)]], uint2 gid [[thread_position_in_grid ]]) { float width = (float)outTexture.get_width(); float height = (float)outTexture.get_height(); int uniform_w = dimensions[0]; int uniform_h = dimensions[1]; float posx = (float)gid.x * (float)uniform_w / width; float posy = (float)gid.y * (float)uniform_h / height; int dx = floor(posx); int dy = floor(posy); int dx2 = dx + 1; int dy2 = dy + 1; if (dx < 0) dx = 0; if (dy < 0) dy = 0; if (dx2 == uniform_w) dx2 = dx; if (dy2 == uniform_h) dy2 = dy; float ratioX = posx - dx; float ratioY = posy - dy; int idx1 = dx + dy * uniform_w; int idx2 = dx2 + dy * uniform_w; int idx3 = dx2 + dy2 * uniform_w; int idx4 = dx + dy2 * uniform_w; if (weight[idx1] < 0.5 && weight[idx2] < 0.5 && weight[idx3] < 0.5 && weight[idx3] < 0.5) { outTexture.write(float4(0, 0, 0, 1.0), gid); } else { float u1 = uData[idx1]; float u2 = uData[idx2]; float u3 = uData[idx3]; float u4 = uData[idx4]; float r0 = (u1 * (1.0 - ratioX) * (1.0 - ratioY) + u2 * ratioX * (1.0 - ratioY) + u3 * ratioX * ratioY + u4 * (1.0 - ratioX) * ratioY); int offset = uniform_w * uniform_h; u1 = uData[idx1+offset]; u2 = uData[idx2+offset]; u3 = uData[idx3+offset]; u4 = uData[idx4+offset]; float r1 = (u1 * (1.0 - ratioX) * (1.0 - ratioY) + u2 * ratioX * (1.0 - ratioY) + u3 * ratioX * ratioY + u4 * (1.0 - ratioX) * ratioY); float diff = exp(r1-r0); diff = diff/(diff + 1.0f); float r = diff; // float r = saturate((diff - 0.5f) * 1.5f + 0.5f); // if (r < 0.05) r = 0; // if (r > 0.95) r = 1.0f; outTexture.write(float4(r, r, r, 1.0), gid); } } kernel void kernel_smallmap(texture2d outTexture [[ texture(0) ]], constant float* uData [[buffer(0)]], constant int* dimensions [[buffer(1)]], device float* uData1 [[buffer(2)]], uint2 gid [[thread_position_in_grid ]]) { int uniform_w = dimensions[0]; int uniform_h = dimensions[1]; int offset = uniform_w * uniform_h; int index = gid.y * uniform_w + gid.x; float u0 = uData[index]; float u1 = uData[index + offset]; float u = exp(u1- u0); u = u/(u + 1.0f); // u = (u-0.5) * 2.0 + 0.5; uData1[index] = u > 0.05?1.0f:0.0f; } kernel void kernel_refineMask(texture2d outTexture [[ texture(0) ]], constant float* srcData [[buffer(0)]], device float* dstData [[buffer(1)]], uint2 gid [[thread_position_in_grid ]]) { int width = outTexture.get_width(); int height = outTexture.get_height(); int offset[2]; offset[0] = 0; offset[1] = width * height; int gx = gid.x; int gy = gid.y; int x[3]; int y[3]; x[0] = max(gx - 1, 0); x[1] = gid.x; x[2] = min(gx + 1, width - 1); y[0] = max(gy - 1, 0); y[1] = gid.y; y[2] = min(gy + 1, height - 1); #define s2(a, b) temp = a; a = min(a, b); b = max(temp, b); #define mn3(a, b, c) s2(a, b); s2(a, c); #define mx3(a, b, c) s2(b, c); s2(a, c); #define mnmx3(a, b, c) mx3(a, b, c); s2(a, b); // 3 exchanges #define mnmx4(a, b, c, d) s2(a, b); s2(c, d); s2(a, c); s2(b, d); // 4 exchanges #define mnmx5(a, b, c, d, e) s2(a, b); s2(c, d); mn3(a, c, e); mx3(b, d, e); // 6 exchanges #define mnmx6(a, b, c, d, e, f) s2(a, d); s2(b, e); s2(c, f); mn3(a, b, c); mx3(d, e, f); // 7 exchanges for (int i = 0; i < 2; i++) { float temp, v[6]; int offseti = offset[i]; uint index = width * y[2] + x[0] + offseti; v[0] = srcData[index]; index = width * y[0] + x[2] + offseti; v[1] = srcData[index]; index = width * y[0] + x[0] + offseti; v[2] = srcData[index]; index = width * y[2] + x[2] + offseti; v[3] = srcData[index]; index = width * y[1] + x[0] + offseti; v[4] = srcData[index]; index = width * y[1] + x[2] + offseti; v[5] = srcData[index]; mnmx6(v[0], v[1], v[2], v[3], v[4], v[5]); index = width * y[2] + x[1] + offseti; v[5] = srcData[index]; mnmx5(v[1], v[2], v[3], v[4], v[5]); index = width * y[0] + x[1] + offseti; v[5] = srcData[index]; mnmx4(v[2], v[3], v[4], v[5]); index = width * y[1] + x[1] + offseti; v[5] = srcData[index]; mnmx3(v[3], v[4], v[5]); dstData[index + offseti] = v[4]; } } kernel void erodeFilter(texture2d inTexture [[ texture(0) ]], texture2d outTexture [[ texture(1) ]], uint2 gid [[ thread_position_in_grid ]]) { half4 inColor = inTexture.read(gid); uint2 position = uint2(gid.x - 1, gid.y - 1); inColor = min(inTexture.read(position), inColor); position = uint2(gid.x, gid.y - 1); inColor = min(inTexture.read(position), inColor); position = uint2(gid.x + 1, gid.y - 1); inColor = min(inTexture.read(position), inColor); position = uint2(gid.x - 1, gid.y); inColor = min(inTexture.read(position), inColor); position = uint2(gid.x + 1, gid.y); inColor = min(inTexture.read(position), inColor); position = uint2(gid.x - 1, gid.y + 1); inColor = min(inTexture.read(position), inColor); position = uint2(gid.x, gid.y + 1); inColor = min(inTexture.read(position), inColor); position = uint2(gid.x + 1, gid.y + 1); inColor = min(inTexture.read(position), inColor); outTexture.write(inColor, gid); } kernel void dilateFilter(texture2d inTexture [[ texture(0) ]], texture2d outTexture [[ texture(1) ]], // constant fixUniform &uData [[buffer(0)]], uint2 gid [[ thread_position_in_grid ]]) { int deltaX = 1;//uData.deltaX; int deltaY = 1;//uData.deltaY; half4 inColor = inTexture.read(gid); uint2 xGid = uint2(gid.x + deltaX, gid.y + deltaY); inColor = max(inColor, inTexture.read(xGid)); xGid = uint2(xGid.x + deltaX, xGid.y + deltaY); inColor = max(inColor, inTexture.read(xGid)); xGid = uint2(gid.x - deltaX, gid.y - deltaY); inColor = max(inColor, inTexture.read(xGid)); xGid = uint2(xGid.x - deltaX, xGid.y - deltaY); inColor = max(inColor, inTexture.read(xGid)); outTexture.write(inColor, gid); } kernel void fixNormalFilter(texture2d inTexture [[ texture(0) ]], texture2d outTexture [[ texture(1) ]], uint2 gid [[ thread_position_in_grid ]]) { half4 inColor = inTexture.read(gid); inColor = 2.0 * (inColor - half4(0.3)) + half4(0.5); inColor = max(half4(0.0), min(half4(1.0), inColor)); outTexture.write(half4(inColor.rgb, 1.0), gid); } kernel void kernel_diff(texture2d outTexture [[ texture(0) ]], texture2d inTexture0 [[ texture(1) ]], texture2d inTexture1 [[ texture(2) ]], uint2 gid [[thread_position_in_grid ]]) { float r0 = inTexture0.read(gid).r; float r1 = inTexture1.read(gid).r; float diff = exp(r1-r0); diff = diff/(diff + 1.0f); float r = diff; // float r = (diff > 0.5f)?1.0f:0.0f; // float r = (diff - 0.5f) * 1.5f + 0.5f; // if (r < 0.5) r = 0; // if (r > 0.5) r = 1.0f; outTexture.write(float4(r, r, r, 1.0), gid); } kernel void kernel_diff2(texture2d preTexture [[ texture(0) ]], texture2d curTexture [[ texture(1) ]], texture2d preTexture0 [[ texture(2) ]], texture2d preTexture1 [[ texture(3) ]], texture2d cnnTexture0 [[ texture(4) ]], texture2d cnnTexture1 [[ texture(5) ]], texture2d dstTexture0 [[ texture(6) ]], texture2d dstTexture1 [[ texture(7) ]], uint2 gid [[thread_position_in_grid ]]) { float4 curColor4 = curTexture.read(gid); float4 preColor4 = preTexture.read(gid); float diff = fabs(curColor4.r - preColor4.r) + fabs(curColor4.g - preColor4.g) + fabs(curColor4.b - preColor4.b); diff = min(1.0f, diff * 1.7f); float pre0 = preTexture0.read(gid).r; float cnn0 = cnnTexture0.read(gid).r; float r0 = mix(pre0, cnn0, diff); r0 = mix(cnn0, r0, 0.5f); dstTexture0.write(float4(r0, r0, r0, 1.0f), gid); float pre1 = preTexture1.read(gid).r; float cnn1 = cnnTexture1.read(gid).r; float r1 = mix(pre1, cnn1, diff); r1 = mix(cnn1, r1, 0.5f); dstTexture1.write(float4(r1, r1, r1, 1.0f), gid); } kernel void kernel_resize(texture2d outTexture [[ texture(0) ]], texture2d inTexture [[ texture(1) ]], uint2 gid [[thread_position_in_grid ]]) { uint out_w = outTexture.get_width(); uint out_h = outTexture.get_height(); uint in_w = inTexture.get_width(); uint in_h = inTexture.get_height(); float posx = (float)(gid.x * in_w) / (float)out_w; float posy = (float)(gid.y * in_h) / (float)out_h; int dx = floor(posx); int dy = floor(posy); float u1 = inTexture.read(uint2(dx, dy)).r; float u2 = inTexture.read(uint2(dx+1, dy)).r; float u3 = inTexture.read(uint2(dx+1, dy+1)).r; float u4 = inTexture.read(uint2(dx, dy+1)).r; float ratioX = posx - dx; float ratioY = posy - dy; float r = (u1 * (1.0 - ratioX) * (1.0 - ratioY) + u2 * ratioX * (1.0 - ratioY) + u3 * ratioX * ratioY + u4 * (1.0 - ratioX) * ratioY); outTexture.write(float4(r, r, r, 1.0), gid); } struct VertexIO { float4 m_Position [[position]]; float2 m_TexCoord [[user(texturecoord)]]; }; fragment half4 texturedQuadFragmentMaskBgFg2(VertexIO inFrag [[ stage_in ]], texture2d tex2D [[ texture(0) ]], texture2d mask2D [[ texture(1) ]]) { constexpr sampler quadSampler(coord::normalized, filter::linear, address::clamp_to_edge); half4 src = tex2D.sample(quadSampler, inFrag.m_TexCoord); half4 mask = mask2D.sample(quadSampler, inFrag.m_TexCoord); half4 bgColor = half4(0.078431373,0.15686275,0.31372549,1.0); half alpha = saturate(mask.r * 1.4f - 0.15f); if (alpha < 0.3) alpha = 0.0f; if (alpha > 0.5) alpha = 1.0f; return mix(bgColor, src, alpha); } kernel void kernel_box(texture2d inTexture [[ texture(0) ]], texture2d outTexture [[ texture(1) ]], constant int* dimensions [[ buffer(1) ]], uint2 gid [[ thread_position_in_grid ]]) { int kernel_w = dimensions[0]; int kernel_h = dimensions[1]; int iter_w = 2*kernel_w+1; int iter_h = 2*kernel_h+1; float4 inColor = float4(0.0f); for (int i = 0; i < iter_w; i++) { for (int j = 0; j < iter_h; j++) { uint2 position = uint2(gid.x - kernel_w + i, gid.y - kernel_h + j); inColor += inTexture.read(position); } } inColor = inColor/(float)(iter_w*iter_h); outTexture.write(inColor, gid); } kernel void kernel_box_horizon(texture2d inTexture [[ texture(0) ]], texture2d outTexture [[ texture(1) ]], constant int* dimensions [[ buffer(1) ]], uint2 gid [[ thread_position_in_grid ]]) { int kernel_w = dimensions[0]; int iter_w = 2*kernel_w+1; float4 inColor = float4(0.0f); for (int i = 0; i < iter_w; i++) { uint2 position = uint2(gid.x - kernel_w + i, gid.y); inColor += inTexture.read(position); } inColor = inColor/(float)iter_w; outTexture.write(inColor, gid); } kernel void kernel_box_vertical(texture2d inTexture [[ texture(0) ]], texture2d outTexture [[ texture(1) ]], constant int* dimensions [[ buffer(1) ]], uint2 gid [[ thread_position_in_grid ]]) { int kernel_h = dimensions[1]; int iter_h = 2*kernel_h+1; float4 inColor = float4(0.0f); for (int j = 0; j < iter_h; j++) { uint2 position = uint2(gid.x, gid.y - kernel_h + j); inColor += inTexture.read(position); } inColor = inColor/(float)(iter_h); outTexture.write(inColor, gid); } kernel void kernel_dilate_horizon(texture2d inTexture [[ texture(0) ]], texture2d outTexture [[ texture(1) ]], constant int* dimensions [[ buffer(1) ]], uint2 gid [[ thread_position_in_grid ]]) { int kernel_w = dimensions[0]; int iter_w = 2*kernel_w+1; float4 inColor = float4(0.0f); for (int i = 0; i < iter_w; i++) { uint2 position = uint2(gid.x - kernel_w + i, gid.y); float4 color = inTexture.read(position); inColor = max(color, inColor); } outTexture.write(inColor, gid); } kernel void kernel_dilate_vertical(texture2d inTexture [[ texture(0) ]], texture2d outTexture [[ texture(1) ]], constant int* dimensions [[ buffer(1) ]], uint2 gid [[ thread_position_in_grid ]]) { int kernel_h = dimensions[1]; int iter_h = 2*kernel_h+1; float4 inColor = float4(0.0f); for (int j = 0; j < iter_h; j++) { uint2 position = uint2(gid.x, gid.y - kernel_h + j); float4 color = inTexture.read(position); inColor = max(color, inColor); } outTexture.write(inColor, gid); }