 # Branchless Line Drawing

When it comes to drawing non-antialised lines, undoubtedly the very first approach that comes to mind is of course Bresenham’s line drawing algorithm.

I always dreaded implementing it, simply because it’s full of special cases; and special cases generally mean branches, which are the last things you’d want anywhere near tight loops.

A couple of weeks ago, I had a shower thought. Why not just use linear interpolation and simply interpolate between the two points?

Naturally, I was almost certain that someone, somewhere, already thought of this and perhaps even patented it as folks do with these things.

After a cursory search on the information super highway I confirmed that my gut instinct was right, however the implementations I found still used branches and superfluous divisions, so I decided to roll and share my own flavor.

``````void image_render_line(
image_t *image,
const point_t p0,
const point_t p1,
const color_t color
)
{
color_t *pixels = image->pixels;
const int w = image->w;

const float x0 = p0.x;
const float y0 = p0.y;
const float dx = p1.x - x0;
const float dy = p1.y - y0;

const float step = 1.0f / fmaxf(fabsf(dx), fabsf(dy));

for(float t = 0.0f; t < 1.0f; t += step)
{
const int x = x0 + dx * t;
const int y = y0 + dy * t;

pixels[x + y * w] = color;
}
}
``````

Let’s also take a peek at the disassembly, just for funsies no less.

`````` ; cc -O2 -ffast-math -masm=intel -S line.c
image_render_line:
movq	r8, xmm0
movq	rax, xmm1
movdqa	xmm4, xmm1
mov	ecx, edi
shr	r8, 32
shr	rax, 32
subss	xmm4, xmm0
movdqa	xmm7, xmm0
movd	xmm3, r8d
movd	xmm1, eax
mov	edi, edx
movss	xmm5, DWORD PTR .LC2[rip]
subss	xmm1, xmm3
movss	xmm0, DWORD PTR .LC1[rip]
movaps	xmm6, xmm5
movaps	xmm2, xmm1
andps	xmm2, xmm0
andps	xmm0, xmm4
maxss	xmm0, xmm2
divss	xmm6, xmm0
pxor	xmm0, xmm0
.L2:
movaps	xmm2, xmm1
mulss	xmm2, xmm0
cvttss2si	eax, xmm2
movaps	xmm2, xmm4
mulss	xmm2, xmm0
imul	eax, ecx
cvttss2si	edx, xmm2
comiss	xmm5, xmm0
cdqe
mov	DWORD PTR [rsi+rax*4], edi
ja	.L2
ret
``````

What’s the catch? There must be some caveats, right?

First of all, there is no clipping, so you must ensure that both points are within the viewport. In other words, the following must be `true`:

``````p0.x >= 0 && p0.x <= w - 1
p0.y >= 0 && p0.y <= h - 1
p1.x >= 0 && p1.x <= w - 1
p1.y >= 0 && p1.y <= h - 1
``````

Second, it doesn’t ensure that `t = 1.0f` which means that line is not guaranteed reach the coordinates of the endpoint.

This of course could be addressed by drawing one additional point or simply changing `t < 1.0f` to `t <= 1.0f` and then making sure that `t` never goes beyond `1.0f`.

Third, it doesn’t handle the case when both points are in the same position, in other words `p0 == p1`.

This could be solved as well relatively easily, by clamping the step value before the division, like illustrated below.

``````const float step = 1.0f / fmax(fmax(fabs(dx), fabs(dy)), 1.0f);
``````

That’s it. And now, here’s an actual full fledged example, which outputs a lovely wireframe render of Suzanne.

``````/*
*/
#include <stdio.h>
#include <stdlib.h>
#include <stdbool.h>
#include <stdint.h>
#include <math.h>

#ifndef IMAGE_WIDTH
#define IMAGE_WIDTH 1024
#endif

#ifndef IMAGE_HEIGHT
#define IMAGE_HEIGHT 1024
#endif

#define IMAGE_SIZE IMAGE_WIDTH * IMAGE_HEIGHT

typedef union
{
uint32_t bgra;
struct
{
uint8_t b : 8;
uint8_t g : 8;
uint8_t r : 8;
uint8_t a : 8;
};
} color_t;

#pragma pack(push, 1)
typedef struct {
uint8_t idlength;
uint8_t colourmaptype;
uint8_t datatypecode;
uint16_t colourmaporigin;
uint16_t colourmaplength;
uint8_t colourmapdepth;
uint16_t x_origin;
uint16_t y_origin;
uint16_t width;
uint16_t height;
uint8_t bitsperpixel;
uint8_t imagedescriptor;
#pragma pack(pop)

typedef struct
{
int w;
int h;
int size;
color_t *pixels;
} image_t;

typedef struct
{
float x, y;
} point_t;

typedef struct
{
float x, y, z;
} vertex_t;

typedef struct
{
int v0, v1, v2;
} triangle_t;

typedef struct
{
vertex_t *vertices;
triangle_t *triangles;
int num_triangles;
} mesh_t;

static inline point_t vertex_to_point(
const vertex_t vertex,
const float half_w,
const float half_h
);

static void image_clear(image_t *image, const color_t color);
static bool image_save(image_t *image, const char *filename);
static void image_render_line(
image_t *image,
const point_t p0,
const point_t p1,
const color_t color
);
static void image_render_mesh(
image_t *image,
const mesh_t *mesh,
const color_t color
);

static image_t *image = &(image_t) {
.w = IMAGE_WIDTH,
.h = IMAGE_HEIGHT,
.size = IMAGE_SIZE,
.pixels = (color_t[IMAGE_SIZE]) { 0, },
.datatypecode = 2,
.width = IMAGE_WIDTH,
.height = IMAGE_HEIGHT,
.y_origin = IMAGE_HEIGHT,
.bitsperpixel = sizeof(color_t) << 3,
.imagedescriptor = 8 | (1 << 5)
}
};

static const mesh_t *mesh = &(mesh_t) {
#include "mesh.h"
};

int main(int argc, char *argv[])
{
if(argc != 2)
{
fprintf(stderr, "usage: %s output.tga\n", argv);
return EXIT_FAILURE;
}

image_clear(image, (color_t) { .a = 0xFF });

image_render_mesh(
image,
mesh,
(color_t) {
.r = 0xFF,
.g = 0xFF,
.a = 0xFF
}
);

if(!image_save(image, argv))
{
fprintf(stderr, "error: could not save image `%s'\n", argv);
return EXIT_FAILURE;
}

return EXIT_SUCCESS;
}

static inline point_t vertex_to_point(
const vertex_t vertex,
const float half_w,
const float half_h
)
{
return (point_t) {
.x = vertex.x * half_w + half_w,
.y = vertex.y * half_h + half_h
};
}

static void image_clear(image_t *image, const color_t color)
{
for(color_t *pixel = image->pixels,
*end = image->pixels + image->size; pixel != end; pixel++)
*pixel = color;
}

static bool image_save(image_t *image, const char *filename)
{
FILE *fp = fopen(filename, "wb");
if(fp == NULL)
return false;

if(fwrite(
1,
{
fclose(fp);
return false;
}

if(fwrite(
image->pixels,
sizeof(color_t),
image->size, fp) != image->size)
{
fclose(fp);
return false;
}

fclose(fp);
return true;
}

static void image_render_line(
image_t *image,
const point_t p0,
const point_t p1,
const color_t color
)
{
color_t *pixels = image->pixels;
const int w = image->w;

const float x0 = p0.x;
const float y0 = p0.y;
const float dx = p1.x - x0;
const float dy = p1.y - y0;

const float step = 1.0f / fmaxf(fabsf(dx), fabsf(dy));

for(float t = 0.0f; t < 1.0f; t += step)
{
const int x = x0 + dx * t;
const int y = y0 + dy * t;

pixels[x + y * w] = color;
}
}

static void image_render_mesh(
image_t *image,
const mesh_t *mesh,
const color_t color
)
{
const vertex_t *vertices = mesh->vertices;
const float half_w = image->w * 0.5f;
const float half_h = image->h * 0.5f;

for(const triangle_t *tri = mesh->triangles,
*end = mesh->triangles + mesh->num_triangles; tri != end; tri++)
{
const vertex_t v0 = vertices[tri->v0];
const vertex_t v1 = vertices[tri->v1];
const vertex_t v2 = vertices[tri->v2];

const point_t p0 = vertex_to_point(v0, half_w, half_h);
const point_t p1 = vertex_to_point(v1, half_w, half_h);
const point_t p2 = vertex_to_point(v2, half_w, half_h);

image_render_line(image, p0, p1, color);
image_render_line(image, p1, p2, color);
image_render_line(image, p2, p0, color);
}
}

/* vim: set ts=4 sw=4 sts=4 noet: */
`````` What a thing of beauty, isn’t it? Those lovely jaggies, just be hitting different. I think so too.

If we really wanted to do away with the `fmax`, `fabs` and the division, we could borrow the fast inverse square root implementation from Q3 and optimize things even further.

``````float Q_rsqrt(float number)
{
long i;
float x2, y;
const float threehalfs = 1.5F;

x2 = number * 0.5F;
y  = number;
i  = * ( long * ) &y; // evil floating point bit level hacking
i  = 0x5f3759df - ( i >> 1 ); // what the fuck?
y  = * ( float * ) &i;
y  = y * ( threehalfs - ( x2 * y * y ) ); // 1st iteration

return y;
}

void image_render_line(
image_t *image,
const point_t p0,
const point_t p1,
const color_t color
)
{
color_t *pixels = image->pixels;
const int w = image->w;

const float x0 = p0.x;
const float y0 = p0.y;
const float dx = p1.x - x0;
const float dy = p1.y - y0;

const float step = Q_rsqrt(dx * dx + dy * dy);

for(float t = 0.0f; t < 1.0f; t += step)
{
const int x = x0 + dx * t;
const int y = y0 + dy * t;

pixels[x + y * w] = color;
}
}
``````

2023-06-25