C doesn't have a single clear way to handle errors.
The tutorials out there are pretty much garbage too.
So for this post, we are going to work with the toy example of a function that parses natural numbers from a string and go through the different approaches.
Code samples can be found in a compilable state here.
This might sound silly, but how often are you really going to run out of memory?
If an error condition is rare enough, you can always just dig your head in the sand and choose to ignore the possibility.
This can make code a lot prettier, but at the cost of robustness.
#include <stdio.h>
int parse_natural_base_10_number(const char* s) {
int parsed = 0;
for (size_t i = 0; s[i] != '\0'; i++) {
*= 10;
parsed += s[i] - '0';
parsed }
return parsed;
}
int main() {
("Expecting garbage or crash on bad values\n");
printfconst char* examples[] = { "10", "foo", "42", "" };
for (size_t i = 0; i < 4; i++) {
const char* example = examples[i];
int parsed = parse_natural_base_10_number(example);
("parsed: %d\n", parsed);
printf}
return 0;
}
Expecting garbage or crash on bad values
parsed: 10
parsed: 6093
parsed: 42 parsed: 0
A real world example of this can be seen with the firmware for flipper devices' use of malloc
.
Sometimes errors aren't practically recoverable. Most applications should probably just give up when malloc
returns NULL
.
If you are sure that there isn't a way to recover from an error condition and that a caller won't want to handle it in any other way, you can just print a message saying what went wrong and exit the program.
#include <stdio.h>
#include <stdlib.h>
int parse_natural_base_10_number(const char* s) {
int parsed = 0;
for (size_t i = 0; s[i] != '\0'; i++) {
if (s[i] < '0' || s[i] > '9') {
(
printf"Got a bad character ('%c') in %s, crashing.",
[i],
s
s);
(1);
exit}
else {
*= 10;
parsed += s[i] - '0';
parsed }
}
return parsed;
}
int main() {
const char* examples[] = { "10", "42", "foo" };
for (size_t i = 0; i < 3; i++) {
const char* example = examples[i];
int parsed = parse_natural_base_10_number(example);
("parsed: %d\n", parsed);
printf}
return 0;
}
parsed: 10
parsed: 42
Got a bad character ('f') in foo, crashing.
You can see this approach in the code of OpenBLAS.
If the function normally would return a natural number, then you can use a negative number to indicate a failure. This is applicable both to our toy example and cases like returning the number of bytes read from a file.
If there are different kinds of errors for this sort of case you could also use specific negative numbers to indicate the different categories.
#include <stdio.h>
int parse_natural_base_10_number(const char* s) {
int parsed = 0;
for (size_t i = 0; s[i] != '\0'; i++) {
if (s[i] < '0' || s[i] > '9') {
return -1;
}
else {
*= 10;
parsed += s[i] - '0';
parsed }
}
return parsed;
}
int main() {
const char* examples[] = { "10", "foo", "42" };
for (size_t i = 0; i < 3; i++) {
const char* example = examples[i];
int parsed = parse_natural_base_10_number(example);
if (parsed < 0) {
("failed: %s\n", example);
printf}
else {
("worked: %d\n", parsed);
printf}
}
return 0;
}
worked: 10
failed: foo
worked: 42
You can see examples of this in the Linux kernel.
If the function would normally return a pointer, then you can use NULL
to indicate that something went wrong.
Most functions that would be returning pointers will be doing heap allocation in order for that to be sound, so this scheme is likely not applicable when you want to avoid allocations.
Also, lets be real, feels silly to heap allocate an int.
#include <stdio.h>
#include <stdlib.h>
int* parse_natural_base_10_number(const char* s) {
int parsed = 0;
for (size_t i = 0; s[i] != '\0'; i++) {
if (s[i] < '0' || s[i] > '9') {
return NULL;
}
else {
*= 10;
parsed += s[i] - '0';
parsed }
}
int* result = malloc(sizeof (int));
*result = parsed;
return result;
}
int main() {
const char* examples[] = { "10", "foo", "42" };
for (size_t i = 0; i < 3; i++) {
const char* example = examples[i];
int* parsed = parse_natural_base_10_number(example);
if (parsed == NULL) {
("failed: %s\n", example);
printf}
else {
("worked: %d\n", *parsed);
printf}
(parsed);
free}
return 0;
}
worked: 10
failed: foo
worked: 42
A real world example of this scheme is malloc
. If malloc fails to allocate memory, then instead of returning a pointer to newly allocated memory it will return a null pointer.
One of the less obvious things you can do in C is to have one or more of a function's arguments "out params". This means that it is part of the contract of the function that it will write into the memory behind a pointer.
If a function can fail, a natural translation of this can be to return a boolean indicating whether it did and pass an out param that you only inspect when true is returned.
#include <stdio.h>
#include <stdbool.h>
bool parse_natural_base_10_number(const char* s, int* out) {
int parsed = 0;
for (size_t i = 0; s[i] != '\0'; i++) {
if (s[i] < '0' || s[i] > '9') {
return false;
}
else {
*= 10;
parsed += s[i] - '0';
parsed }
}
*out = parsed;
return true;
}
int main() {
const char* examples[] = { "10", "foo", "42" };
for (size_t i = 0; i < 3; i++) {
const char* example = examples[i];
int parsed;
bool success = parse_natural_base_10_number(
,
example&parsed
);
if (!success) {
("failed: %s\n", example);
printf}
else {
("worked: %d\n", parsed);
printf}
}
return 0;
}
worked: 10
failed: foo
worked: 42
This is done pretty regularly in Windows.
A boolean can only indicate that something succeeded or failed. If you want to know why something failed then substituting a boolean for an enum is a pretty natural mechanism.
#include <stdio.h>
enum ParseNaturalNumberResult {
,
PARSE_NATURAL_SUCCESS,
PARSE_NATURAL_EMPTY_STRING
PARSE_NATURAL_BAD_CHARACTER};
enum ParseNaturalNumberResult parse_natural_base_10_number(
const char* s,
int* out
) {
if (s[0] == '\0') {
return PARSE_NATURAL_EMPTY_STRING;
}
int parsed = 0;
for (size_t i = 0; s[i] != '\0'; i++) {
if (s[i] < '0' || s[i] > '9') {
return PARSE_NATURAL_BAD_CHARACTER;
}
else {
*= 10;
parsed += s[i] - '0';
parsed }
}
*out = parsed;
return PARSE_NATURAL_SUCCESS;
}
int main() {
const char* examples[] = { "10", "foo", "42", "" };
for (size_t i = 0; i < 4; i++) {
const char* example = examples[i];
int parsed;
switch (parse_natural_base_10_number(example, &parsed)) {
case PARSE_NATURAL_SUCCESS:
("worked: %d\n", parsed);
printfbreak;
case PARSE_NATURAL_EMPTY_STRING:
("failed because empty string\n");
printfbreak;
case PARSE_NATURAL_BAD_CHARACTER:
("failed because bad char: %s\n", example);
printfbreak;
}
}
return 0;
}
worked: 10
failed because bad char: foo
worked: 42
failed because empty string
While an enum can give you the "category" of an error, it doesn't have a place for recording any more specific information than that.
For example, a pretty reasonable thing to want to know if you run into an unexpected character is where in the string that character was found.
By adding a second out param you can have a place to put this information.
#include <stdio.h>
#include <stdbool.h>
bool parse_natural_base_10_number(
const char* s,
int* out_value,
size_t* out_bad_index
) {
int parsed = 0;
for (size_t i = 0; s[i] != '\0'; i++) {
if (s[i] < '0' || s[i] > '9') {
*out_bad_index = i;
return false;
}
else {
*= 10;
parsed += s[i] - '0';
parsed }
}
*out_value = parsed;
return true;
}
int main() {
const char* examples[] = { "10", "foo", "42", "12a34" };
for (size_t i = 0; i < 4; i++) {
const char* example = examples[i];
int parsed;
size_t bad_index;
bool success = parse_natural_base_10_number(
,
example&parsed,
&bad_index
);
if (!success) {
("failed: %s\n ", example);
printffor (size_t j = 0; j < bad_index; j++) {
(" ");
printf}
("^☹️\n");
printf}
else {
("worked: %d\n", parsed);
printf}
}
return 0;
}
worked: 10
failed: foo
^☹️
worked: 42
failed: 12a34
^☹️
A natural extension of the previous two patterns is that if you have multiple ways in which a computation can fail, you can return an enum with each way and take an out param for each way that would require data.
#include <stdio.h>
#include <string.h>
enum ParseNaturalNumberResult {
,
PARSE_NATURAL_SUCCESS,
PARSE_NATURAL_EMPTY_STRING,
PARSE_NATURAL_BAD_CHARACTER
PARSE_NUMBER_TOO_BIG};
struct BadCharacterInfo {
size_t index;
};
struct TooBigInfo {
size_t remaining_characters;
};
enum ParseNaturalNumberResult parse_natural_base_10_number(
const char* s,
int* out_value,
struct BadCharacterInfo* bad_character_info,
struct TooBigInfo* too_big_info
) {
if (s[0] == '\0') {
return PARSE_NATURAL_EMPTY_STRING;
}
int parsed = 0;
for (size_t i = 0; s[i] != '\0'; i++) {
if (s[i] < '0' || s[i] > '9') {
->index = i;
bad_character_inforeturn PARSE_NATURAL_BAD_CHARACTER;
}
else {
int digit = s[i] - '0';
if (__builtin_smul_overflow(parsed, 10, &parsed) ||
(parsed, digit, &parsed)) {
__builtin_sadd_overflow->remaining_characters = strlen(s) - i;
too_big_inforeturn PARSE_NUMBER_TOO_BIG;
}
}
}
*out_value = parsed;
return PARSE_NATURAL_SUCCESS;
}
int main() {
const char* examples[] = { "10",
"foo",
"42",
"",
"99999999999999" };
for (size_t i = 0; i < 5; i++) {
const char* example = examples[i];
int parsed;
struct BadCharacterInfo bad_character_info;
struct TooBigInfo too_big_info;
switch (parse_natural_base_10_number(
,
example&parsed,
&bad_character_info,
&too_big_info
)) {
case PARSE_NATURAL_SUCCESS:
("worked: %d\n", parsed);
printfbreak;
case PARSE_NATURAL_EMPTY_STRING:
("failed because empty string\n");
printfbreak;
case PARSE_NATURAL_BAD_CHARACTER:
(
printf"failed because bad char at index %zu: %s\n",
.index,
bad_character_info
example);
break;
case PARSE_NUMBER_TOO_BIG:
(
printf"number was too big. had %zu digits left: %s\n",
.remaining_characters,
too_big_info
example);
break;
}
}
return 0;
}
worked: 10
failed because bad char at index 0: foo
worked: 42
failed because empty string number was too big. had 5 digits left: 99999999999999
Another option is to, on an error, set a thread local static variable. This avoids needing to propagate an error explicitly all the way up the stack from where it occurs and makes the "normal" api of the function look as neat and clean as the ostrich or crash approaches.
Once you set the thread local static value, either you
NULL
, a negative number, etc) which hints to the programmer to check the thread local static value.#include <stdio.h>
#include <stdbool.h>
_Thread_local static bool parse_number_error = false;
int parse_natural_base_10_number(const char* s) {
int parsed = 0;
for (size_t i = 0; s[i] != '\0'; i++) {
if (s[i] < '0' || s[i] > '9') {
= true;
parse_number_error }
else {
*= 10;
parsed += s[i] - '0';
parsed }
}
return parsed;
}
int main() {
const char* examples[] = { "10", "42", "foo" };
for (size_t i = 0; i < 3; i++) {
const char* example = examples[i];
int parsed = parse_natural_base_10_number(example);
if (parse_number_error) {
= false;
parse_number_error ("error: %s\n", example);
printf}
else {
("parsed: %d\n", parsed);
printf}
}
return 0;
}
parsed: 10
parsed: 42
error: foo
A good deal of built-in apis use a shared static constant int called errno
and if they fail they will set it to a non-zero value. There are then functions like perror
which can extract messages from the specific error code.
You technically are allowed to use errno
too as well as long as your error conditions can fit into its int encoding.
This is my least favorite of the patterns.
The next approach is what languages like Rust
emulate under the hood of their enums.
You make a struct containing two things
Then you return this struct directly. The tag tells the caller which field of the union is safe to access and consequently what the "result" of the computation was.
Compared to the out param solutions, where normally you would allocate each possible out param on the stack, this will compact the required space by way of the union.
It also uses regular return values and checking the tag before checking the union is a relatively standard process.
Unfortunately it will also make code more verbose than most of the other options.
#include <stdio.h>
enum ParseNaturalNumberResultKind {
,
PARSE_NATURAL_SUCCESS,
PARSE_NATURAL_EMPTY_STRING
PARSE_NATURAL_BAD_CHARACTER};
struct BadCharacter {
size_t index;
char c;
};
struct ParseNaturalNumberResult {
enum ParseNaturalNumberResultKind kind;
union {
int success;
struct BadCharacter bad_character;
} data;
};
struct ParseNaturalNumberResult parse_natural_base_10_number(
const char* s
) {
if (s[0] == '\0') {
struct ParseNaturalNumberResult result = {
.kind = PARSE_NATURAL_EMPTY_STRING
};
return result;
}
int parsed = 0;
for (size_t i = 0; s[i] != '\0'; i++) {
if (s[i] < '0' || s[i] > '9') {
struct ParseNaturalNumberResult result = {
.kind = PARSE_NATURAL_BAD_CHARACTER,
.data = {
.bad_character = {
.index = i,
.c = s[i]
}
}
};
return result;
}
else {
*= 10;
parsed += s[i] - '0';
parsed }
}
struct ParseNaturalNumberResult result = {
.kind = PARSE_NATURAL_SUCCESS,
.data = {
.success = parsed
}
};
return result;
}
int main() {
const char* examples[] = { "10", "foo", "42", "12a34" };
for (size_t i = 0; i < 4; i++) {
const char* example = examples[i];
struct ParseNaturalNumberResult result =
(example);
parse_natural_base_10_numberswitch (result.kind) {
case PARSE_NATURAL_SUCCESS:
("worked: %d\n", result.data.success);
printfbreak;
case PARSE_NATURAL_EMPTY_STRING:
("got empty string");
printfbreak;
case PARSE_NATURAL_BAD_CHARACTER:
("failed: %s\n ", example);
printffor (size_t j = 0;
< result.data.bad_character.index;
j ++) {
j(" ");
printf}
(
printf"^☹️ '%c' is not good\n",
.data.bad_character.c
result);
break;
}
}
return 0;
}
worked: 10
failed: foo
^☹️ 'f' is not good
worked: 42
failed: 12a34
^☹️ 'a' is not good
This is a very common pattern, especially when writing programs like language parsers where it is hard to avoid functions which can return one of many differently shaped possibilities. There are some examples here in the curl codebase of using the general mechanism for the result of parsing.
The last one here is probably the toughest sell. It is more verbose than the other approaches, requires heap allocation, and requires a non-trivial degree of comfortableness in C. It does have its perks though.
First, make a "vtable". This will be a struct containing pointers to functions which take as their first argument a void pointer.
For errors, lets say the things we will want to do are produce an error message and dispose of any allocated resources afterward.
struct ErrorOps {
char* (*describe)(const void*);
void (*free)(void*);
};
Then make a struct which contains this vtable as well as a pointer to the memory that is meant to be passed as the first argument to each function within.
struct Error {
struct ErrorOps ops;
void* self;
};
You can then make some helpers for doing the calling.
char* error_describe(struct Error error) {
return error.ops.describe(error.self);
}
void error_free(struct Error error) {
if (error.ops.free != NULL) {
.ops.free(error.self);
error}
}
Then for each error condition, define how each operation should work as well as any helper functions and structs that you need.
char* empty_string_describe(const void* self) {
char* result;
(&result, "Empty string is not good");
asprintfreturn result;
}
const struct ErrorOps empty_string_error_ops = {
.describe = empty_string_describe,
.free = NULL
};
struct Error empty_string_error() {
struct Error result = {
.ops = empty_string_error_ops,
.self = NULL
};
return result;
}
struct BadCharacterError {
char* source;
size_t index;
};
char* bad_character_describe(const void* self) {
const struct BadCharacterError* this = self;
char* result;
(
asprintf&result,
"Bad character in %s at index %zu: '%c'",
->source,
this->index,
this->source[this->index]
this);
return result;
}
void bad_character_free(void* self) {
struct BadCharacterError* this = self;
(this->source);
free(this);
free}
const struct ErrorOps bad_character_error_ops = {
.describe = bad_character_describe,
.free = bad_character_free
};
struct Error bad_character_error(const char* source, size_t index) {
struct BadCharacterError* error =
(sizeof (struct BadCharacterError));
malloc
char* source_clone = calloc(strlen(source) + 1, sizeof (char));
(source_clone, source);
strcpy->source = source_clone;
error
->index = index;
error
struct Error result = {
.ops = bad_character_error_ops,
.self = error
};
return result;
}
Then, by any of the previous schemes, return one of these error structs if something goes wrong.
struct ParseNaturalNumberResult {
bool success;
union {
int success;
struct Error error;
} data;
};
struct ParseNaturalNumberResult parse_natural_base_10_number(
const char* s
) {
if (s[0] == '\0') {
struct ParseNaturalNumberResult result = {
.success = false,
.data = {
.error = empty_string_error()
}
};
return result;
}
int parsed = 0;
for (size_t i = 0; s[i] != '\0'; i++) {
if (s[i] < '0' || s[i] > '9') {
struct ParseNaturalNumberResult result = {
.success = false,
.data = {
.error = bad_character_error(s, i)
}
};
return result;
}
else {
*= 10;
parsed += s[i] - '0';
parsed }
}
struct ParseNaturalNumberResult result = {
.success = true,
.data = {
.success = parsed
}
};
return result;
}
int main() {
const char* examples[] = { "10", "foo", "42", "12a34" };
for (size_t i = 0; i < 4; i++) {
const char* example = examples[i];
struct ParseNaturalNumberResult result =
(example);
parse_natural_base_10_numberif (!result.success) {
char* description = error_describe(result.data.error);
("error: %s\n", description);
printf(description);
free(result.data.error);
error_free}
else {
("success: %d\n", result.data.success);
printf}
}
return 0;
}
success: 10
error: Bad character in foo at index 0: 'f'
success: 42
error: Bad character in 12a34 at index 2: 'a'
So... why do this?
Because it is easy to compose this kind of error.
Say we extended our problem such that we were reading a number from a file. Now the set of things that can go wrong includes all sorts of file reading related errors.
It is a lot easier to include those errors if there is a way to treat them the "same" as the ones encountered during parsing. This accomplishes that.
struct FileOperationError {
int error_number;
};
char* file_operation_error_describe(const void* self) {
const struct FileOperationError* this = self;
char* result;
(&result, "%s", strerror(this->error_number));
asprintfreturn result;
}
void file_operation_error_free(void* self) {
(self);
free}
const struct ErrorOps file_operation_error_ops = {
.describe = file_operation_error_describe,
.free = file_operation_error_free
};
struct Error file_operation_error(int error_number) {
struct FileOperationError* file_operation_error =
(sizeof (struct FileOperationError));
malloc->error_number = error_number;
file_operation_error
struct Error result = {
.ops = file_operation_error_ops,
.self = file_operation_error
};
return result;
}
struct ReadNumberFromFileResult {
bool success;
union {
int success;
struct Error error;
} data;
};
struct ReadNumberFromFileResult read_number_from_file(
const char* path
) {
FILE* fp = fopen(path, "r");
if (fp == NULL) {
struct ReadNumberFromFileResult result = {
.success = false,
.data = {
.error = file_operation_error(errno)
}
};
= 0;
errno (fp);
fclosereturn result;
}
// Max positive int is only 10 characters big in base 10
char first_line[12];
(first_line, sizeof (first_line), fp);
fgets
if (ferror(fp)) {
struct ReadNumberFromFileResult result = {
.success = false,
.data = {
.error = file_operation_error(errno)
}
};
= 0;
errno (fp);
fclosereturn result;
}
struct ParseNaturalNumberResult parse_result =
(first_line);
parse_natural_base_10_numberif (!parse_result.success) {
struct ReadNumberFromFileResult result = {
.success = false,
.data = {
.error = parse_result.data.error
}
};
(fp);
fclosereturn result;
}
struct ReadNumberFromFileResult result = {
.success = true,
.data = {
.success = parse_result.data.success
}
};
(fp);
fclosereturn result;
}
int main() {
const char* examples[] = { "../ex1", "../ex2", "../ex3" };
for (size_t i = 0; i < 3; i++) {
const char* example_file = examples[i];
struct ReadNumberFromFileResult result =
(example_file);
read_number_from_fileif (!result.success) {
char* description = error_describe(result.data.error);
("error: %s\n", description);
printf(description);
free(result.data.error);
error_free}
else {
("success: %d\n", result.data.success);
printf}
}
return 0;
}
success: 8
error: Bad character in abc at index 0: 'a'
error: No such file or directory
This can all be done with tagged unions as well, so it is a judgement call. This sort of pattern definitely has more appeal when the language being used makes it convenient.
Important to note that I am not a professional C programmer. I fully expect to be shown the error of my ways in the comments below.