Skip to content

Instantly share code, notes, and snippets.

@shakna-israel
Created January 30, 2020 03:50
Show Gist options
  • Save shakna-israel/4fd31ee469274aa49f8f9793c3e71163 to your computer and use it in GitHub Desktop.
Save shakna-israel/4fd31ee469274aa49f8f9793c3e71163 to your computer and use it in GitHub Desktop.
Let's Destroy C
#ifdef EVIL_COROUTINE
// This lovely hack makes use of switch statements,
// And the __LINE__ C macro
// It tracks the current state, and switches case.
// But... I imagine awful things may happen with an extra semi-colon.
// Which would be hard to debug.
#if defined(EVIL_LAMBDA) && !defined(EVIL_NO_WARN)
// And bad things happen with expression statements.
#warning "Lambda's don't play well with Coroutines. Avoid using them in the body of a coroutine."
#endif
#ifndef EVIL_NO_WARN
#warning "Coroutine's don't support nesting. It may work sometimes, other times it may explode."
#endif
// Original macro hack by Robert Elder (c) 2016. Used against their advice, but with their permission.
#define coroutine() static int state=0; switch(state) { case 0:
#define co_return(x) { state=__LINE__; return x; case __LINE__:; }
#define co_end() }
#endif
#ifndef EVIL_NO_FLOW
// Included by default
#define then ){
#define end }
#define If if(
#define Else } else {
#define For for(
#define While while(
#define Do do{
#define Switch(x) switch(x){
#define Case(x) case x:
#endif
#ifndef EVIL_NO_IO
// The IO Module.
// Included by default. To pretend C is high-level.
// User wants IO, give the all the IO.
#include <stdio.h>
// Yes, Generics. (aka type-switch). It's C11 only,
// but who cares.
// stdint identifiers (inttypes.h) should be catered for by the below.
// Original display_format macro by Robert Gamble, (c) 2012
// used with permission.
// Expanded upon to incorporate const, volatile and const volatile types,
// as they don't get selected for. (static does for obvious reasons).
// Whilst volatile types can change between accesses, technically using a
// _Generic _shouldn't_ access it, but compile to the right choice.
#define display_format(x) _Generic((x), \
char: "%c", \
signed char: "%hhd", \
unsigned char: "%hhu", \
signed short: "%hd", \
unsigned short: "%hu", \
signed int: "%d", \
unsigned int: "%u", \
long int: "%ld", \
unsigned long int: "%lu", \
long long int: "%lld", \
unsigned long long int: "%llu", \
float: "%f", \
double: "%f", \
long double: "%Lf", \
char *: "%s", \
void *: "%p", \
volatile char: "%c", \
volatile signed char: "%hhd", \
volatile unsigned char: "%hhu", \
volatile signed short: "%hd", \
volatile unsigned short: "%hu", \
volatile signed int: "%d", \
volatile unsigned int: "%u", \
volatile long int: "%ld", \
volatile unsigned long int: "%lu", \
volatile long long int: "%lld", \
volatile unsigned long long int: "%llu", \
volatile float: "%f", \
volatile double: "%f", \
volatile long double: "%Lf", \
volatile char *: "%s", \
volatile void *: "%p", \
const char: "%c", \
const signed char: "%hhd", \
const unsigned char: "%hhu", \
const signed short: "%hd", \
const unsigned short: "%hu", \
const signed int: "%d", \
const unsigned int: "%u", \
const long int: "%ld", \
const unsigned long int: "%lu", \
const long long int: "%lld", \
const unsigned long long int: "%llu", \
const float: "%f", \
const double: "%f", \
const long double: "%Lf", \
const char *: "%s", \
const void *: "%p", \
const volatile char: "%c", \
const volatile signed char: "%hhd", \
const volatile unsigned char: "%hhu", \
const volatile signed short: "%hd", \
const volatile unsigned short: "%hu", \
const volatile signed int: "%d", \
const volatile unsigned int: "%u", \
const volatile long int: "%ld", \
const volatile unsigned long int: "%lu", \
const volatile long long int: "%lld", \
const volatile unsigned long long int: "%llu", \
const volatile float: "%f", \
const volatile double: "%f", \
const volatile long double: "%Lf", \
const volatile char *: "%s", \
const volatile void *: "%p", \
default: "%d")
// The main printing function.
#define display(x) printf(display_format(x), x)
#define displayf(f, x) fprintf(f, display_format(x), x)
// Windows has a different line ending.
#if defined(_WIN32) || defined(__WIN32) || defined(WIN32) || defined(__WIN32__) || defined(_WIN64) || defined(__WIN64) || defined(WIN64) || defined(__WIN64__) || defined(__WINNT) || defined(__WINNT__) || defined(WINNT)
#define displayln(x) printf(display_format(x), x); printf("%s", "\r\n")
#define displayfln(f, x) fprintf(f, display_format(x), x); printf("%s", "\r\n")
#else
#define displayln(x) printf(display_format(x), x); printf("%c", '\n')
#define displayfln(f, x) fprintf(f, display_format(x), x); printf("%c", '\n')
#endif
// Basically a _Generic.
#define repr_type(x) _Generic((0,x), \
char: "char", \
signed char: "signed char", \
unsigned char: "unsigned char", \
signed short: "signed short", \
unsigned short: "unsigned short", \
signed int: "signed int", \
unsigned int: "unsigned int", \
long int: "long int", \
unsigned long int: "unsigned long int", \
long long int: "long long int", \
unsigned long long int: "unsigned long long int", \
float: "float", \
double: "double", \
long double: "long double", \
char *: "char pointer", \
void *: "void pointer", \
volatile char: "volatile char", \
volatile signed char: "volatile signed char", \
volatile unsigned char: "volatile unsigned char", \
volatile signed short: "volatile signed short", \
volatile unsigned short: "volatile unsigned short", \
volatile signed int: "volatile signed int", \
volatile unsigned int: "volatile unsigned int", \
volatile long int: "volatile long int", \
volatile unsigned long int: "volatile unsigned long int", \
volatile long long int: "volatile long long int", \
volatile unsigned long long int: "volatile unsigned long long int", \
volatile float: "volatile float", \
volatile double: "volatile double", \
volatile long double: "volatile long double", \
volatile char *: "volatile char pointer", \
volatile void *: "volatile void pointer", \
const char: "const char", \
const signed char: "const signed char", \
const unsigned char: "const unsigned char", \
const signed short: "const signed short", \
const unsigned short: "const unsigned short", \
const signed int: "const signed int", \
const unsigned int: "const unsigned int", \
const long int: "const long int", \
const unsigned long int: "const unsigned long int", \
const long long int: "const long long int", \
const unsigned long long int: "const unsigned long long int", \
const float: "const float", \
const double: "const double", \
const long double: "const long double", \
const char *: "const char pointer", \
const void *: "const void pointer", \
const volatile char: "const volatile char", \
const volatile signed char: "const volatile signed char", \
const volatile unsigned char: "const volatile unsigned char", \
const volatile signed short: "const volatile signed short", \
const volatile unsigned short: "const volatile unsigned short", \
const volatile signed int: "const volatile signed int", \
const volatile unsigned int: "const volatile unsigned int", \
const volatile long int: "const volatile long int", \
const volatile unsigned long int: "const volatile unsigned long int", \
const volatile long long int: "const volatile long long int", \
const volatile unsigned long long int: "const volatile unsigned long long int", \
const volatile float: "const volatile float", \
const volatile double: "const volatile double", \
const volatile long double: "const volatile long double", \
const volatile char *: "const volatile char pointer", \
const volatile void *: "const volatile void pointer", \
default: "Unknown")
// endl, just a symbol that can be used to produce the normal
// line ending.
// endlf can take a file to print to.
// e.g. ```display(x); display(y); endl;```
// ```endlf(FILE* x);```
// Windows has a different line ending.
#if defined(_WIN32) || defined(__WIN32) || defined(WIN32) || defined(__WIN32__) || defined(_WIN64) || defined(__WIN64) || defined(WIN64) || defined(__WIN64__) || defined(__WINNT) || defined(__WINNT__) || defined(WINNT)
#define endl printf("%s", "\r\n")
#define endlf(f) fprintf(f, "%s", "\r\n")
#else
#define endl printf("%c", '\n')
#define endlf(f) fprintf(f, "%c", '\n')
#endif
#endif
#ifdef EVIL_LAMBDA
// This requires nested functions to be allowed.
// Only GCC supports it.
// ... Unconfirmed if Clang does. It might.
#if defined(__clang__) || !defined(__GNUC__)
#error "Lambda requires a GNU compiler."
#endif
// A cleaner, but slightly more cumbersome lambda:
#define lambda(ret_type, _body) ({ ret_type _ _body _; })
// e.g. int (*max)(int, int) = lambda (int, (int x, int y) { return x > y ? x : y; });
// Pros:
// * Woot, easier to pass, as the user has to know the signature anyway.
// * Name not part of lambda definition. More lambda-y.
// * Body of function inside macro, feels more like a lambda.
// * Uses expression disgnator (GCC-only), which creates a properly constructed function pointer.
// * It *may* work under Clang too!
// Cons:
// * The signature isn't constructed for the user, they have to both know and understand it.
#endif
#ifndef EVIL_NO_MAIN
// Included by default
#define Main int main(int __attribute__((unused)) argc, char __attribute__((unused)) **argv
#endif
#ifndef EVIL_NO_PROC
// Included by default
#define declare(_name, _ret, ...) _ret _name(__VA_ARGS__)
#define proc(_name, _ret, ...) _ret _name(__VA_ARGS__){
#endif

Let's Destroy C

I have a pet project I work on, every now and then. CNoEvil.

The concept is simple enough.

What if, for a moment, we forgot all the rules we know. That we ignore every good idea, and accept all the terrible ones. That nothing is off limits. Can we turn C into a new language? Can we do what Lisp and Forth let the over-eager programmer do, but in C?


Some concepts

We're going to point out some definitions in other files - they're too big to inline into a blog post.

You can assume that all of these header definitions get collapsed into a single file, called evil.h.

We won't dwell on many C features. If they're not obvious to you, there's a lot of information at your fingertips to explain them. The idea here isn't to explain how C has moved on. It's to abuse it.


First of all, let's fix up a simple program:

#include <stdio.h>

int main(int argc, char* argv[]) {

  printf("%s\n", "Hello, World!");

}

That's an awful lot of symbolic syntax.

Let's try and get rid of a little of that.

Format

Format specifiers are incredibly useful in C. Allowing you to specify how many decimal places to put after a float, where to use commas when outputting numbers. Whether to use the locale specifier to get the right , or . syntax, etc.

But, for the general case, we don't need it. So we can make it disappear.

We can do this, thanks to a C11 feature, called _Generic, which is sort of like a type-based switch. It'll match against the first compatible type.

If we define display_format as a _Generic switch, like you can see in evil_io.h, then we can replace our printf with a very simple set of defines:

#define display(x) printf(display_format(x), x)
#define displayln(x) printf(display_format(x), x); printf("%s", "\r\n")

Now we can rewrite our program like this:

#include "evil.h"

int main(int argc, char* argv[]) {
	displayln("Hello, World!");
}

There. That's a lot more high level. And it works correctly for a whole bunch of things other than strings, too.

Main

We've got a fairly typical main definition here. But we can do better. We can hide argc and argv, and just assume the programmer knows they're implicitly available. Because there is nothing worse than implicit values.

In fact, we'll also silence the compiler that might complain if we don't end up using them to inspect commandline flags.

#define Main int main(int __attribute__((unused)) argc, char __attribute__((unused)) **argv

Unfortunately, just defining our Main isn't enough. We need a couple more defines, which will come in extremely handy in the future. Just a couple symbol replacements.

#define then ){
#define end }

Now. That's better. We can now rewrite our program:

#include "evil.h"

Main then
  displayln("Hello, World!");
end

Brilliant. Now it doesn't look like C. It still compiles like C. In fact, it should compile without warnings.

(Have a glance at evil_flow.h for a few more useful defines that mean we can escape the brace syntax and pretend that C works like Lua's syntax.)

High Level Constructs

We've got a Hello, World that looks simple. It wasn't a hard path to get here. But we can do even better than that.

We can add in things people don't expect to exist in C at all.

Then we can start pretending our poor, abused little program is actually a higher level language than it is. And we haven't even broken any C syntax, which means we can safely and easily link against any other C library, even if it is a header-only library.

Lambda

With a GNU-extension (it may or may not work under other compilers), we can easily write a lambda, and give C the ability to have anonymous functions. We still need to use C's function-pointer syntax, but that doesn't turn out too bad in practice.

#define lambda(ret_type, _body) ({ ret_type _ _body _; })

There! Simple, isn't it? Well, maybe not entirely obvious how it works. (See evil_lambda.h for our full implementation.)

#define EVIL_LAMBDA
#include "evil.h"

Main then
  int (*max)(int, int) = lambda(int,
  	                         (int x, int y) {
  	                         	return x > y ? x : y;
  	                         });

  displayln(max(1, 2));
end

We create a function pointer called max, which returns an int, and takes two int arguments. The lambda assigned to it matches. It returns the bigger of the two values with a simple one-liner.

You use it like you might expect, but max only exists inside main, and is ready to be passed to another function so you can start building up your functional tools.

Coroutines

You can write proper coroutine systems for C. They tend to be big, and complicated and extremely helpful.

But we're doing the wrong thing.

So, apart from emitting some compile-time warnings, the crux of evil_coroutine.h is this magnificent madness:

// Original macro hack by Robert Elder (c) 2016. Used against their advice, but with their permission.
#define coroutine() static int state=0; switch(state) { case 0:
#define co_return(x) { state=__LINE__; return x; case __LINE__:; }
#define co_end() }

By storing state and using a switch as a computer GOTO, you can now write functions that appear to be resumeable.

Like so:

#define EVIL_COROUTINE
#include "evil.h"

int example() {
  static int i = 0;
  coroutine();
  While true then
    co_return(++i);
  end
  co_end();
  return i;
}

Main then
  displayln(example());
  displayln(example());
  displayln(example());
  displayln(example());
  displayln(example());
  displayln(example());
  displayln(example());
  displayln(example());
end

Despite being dangerous, and poorly thought through if you're insane enough to put this anywhere near production code, we are looking like we have coroutines.

Unfortunately, those damn braces are back again.

Procs

Technically speaking, C doesn't have functions. Because functions are pure and have no side-effects, and C is one giant stinking pile of a side-effect.

What C has, is properly known as procedures. So let's reflect that when we redefine how we make them, to get ride of the braces:

#define declare(_name, _ret, ...) _ret _name(__VA_ARGS__)
#define proc(_name, _ret, ...) _ret _name(__VA_ARGS__){

This fits in nicely with our existing then and end macros.

We put the return type right before any listing of arguments, and after the name, which can make it easier when reading over the definition or decleration.

It let's us change the above example into this marvelous little beauty:

#define EVIL_COROUTINE
#include "evil.h"

declare(example, int);

proc(example, int)
  static int i = 0;
  coroutine();
  While true then
    co_return(++i);
  end
  co_end();
  return i;
end

Main then
  displayln(example());
  displayln(example());
  displayln(example());
  displayln(example());
  displayln(example());
  displayln(example());
  displayln(example());
  displayln(example());
end

That's better. It looks more consistent with the rest of our syntax, whilst still not breaking how C works at all.

We've practically abolished symbols in the final syntax. They're still there, but minimal. We haven't introduced any whitespace sensitivity, but we have simplified how it looks. Made it feel like a scripting language.


CNoEvil goes a lot further than this. It adds introspection, a new assert library with it's own stacktrace format, hash routines and so on.

But this is a taste of how well you can screw up the C language with just a handful of overpowered macros.

@shakna-israel
Copy link
Author

I think that credit for the co-routine macros properly belongs to Simon Tatham (author of PuTTY), described by him in 2000: https://www.chiark.greenend.org.uk/~sgtatham/coroutines.html

@philpennock

Though it's similar, it isn't quite the same. In the article where Elder describes it, he does in fact reference that hack you're suggesting.

However, Tatham goes much further and defines coroutines with proper state that can be mixed.

@shakna-israel
Copy link
Author

Now if you could just make CNoEvil code capable of self-modifying itself at runtime...

@imaami

Well, you can end up with an executable stack with EVIL_LAMBDA, so I guess it already is.

@shakna-israel
Copy link
Author

@shakna-israel: huh!

$ cat l.c
#include <alloca.h>
#include <stdio.h>
#include <string.h>

int
main(void) {
  int (*max)(int, int) = ({ int _ (int x, int y) { return x > y ? x : y; } _; });
  printf("%d, %p\n", max(1, 2), (void*)max);
  char *x = alloca(128);
  memset(x, 0, 128);
  printf("%d, %p\n", max(1, 2), x);
  return 0;
}
$ ./a.out
2, 0x7fffd2535ae0
2, 0x7fffd2535a40
$ 

Well, then, ... the only reason not to use this is that GCC/clang end up causing calls to mlock(2) to happen to make the stack executable.

I'm very happy to be wrong about this!! Thanks!

@nicowilliams

Basically the scope for the lambda is the containing function, rather than the expression statement.

But generally speaking, nested functions like we do here, absolutely exemplify one of my first statements, and you should run away screaming when you see them:

What if, for a moment, we forgot all the rules we know. That we ignore every good idea, and accept all the terrible ones.

@shakna-israel
Copy link
Author

Who hurt you, OP

@donn

I did. When I made this.

@jespa007
Copy link

jespa007 commented Jan 31, 2020

Set C as a new language based on macros is not recommended if you have to use the debugger. It's becomes weird and easily more error-prone than what C is.

@janhec
Copy link

janhec commented Jan 31, 2020

't is already taken, but I thought oraCle for one sec. You want to be so good that a debugger is, well... not a priority.
But I really don't want to open a can of works triggered by the word "oracle". Let's think matrix, much happier.

@sickfile
Copy link

C

@shakna-israel
Copy link
Author

Set C as a new language based on macros is not recommended if you have to use the debugger. It's becomes weird and easily more error-prone than what C is.

@jespa007

Pretty sure I already pointed out what a terrible idea this is:

What if, for a moment, we forgot all the rules we know. That we ignore every good idea, and accept all the terrible ones.

@nicowilliams
Copy link

Speaking of Simon Tatham's C co-routines, there's also async.h.

(I wrote and contributed most of the SSHv2 GSS Key Exchange support in PuTTY, and I must say that working with Simon's co-routines is really quite a pleasure.)

@nicowilliams
Copy link

BTW, clang definitely does not support lambdas:

$ cat l.c
#include <alloca.h>
#include <stdio.h>
#include <string.h>

int
main(void) {
  int (*max)(int, int) = ({ int _ (int x, int y) { return x > y ? x : y; } _; });
  printf("%d, %p\n", max(1, 2), (void*)max);
  char *x = alloca(128);
  memset(x, 0, 128);
  printf("%d, %p\n", max(1, 2), x);
  return 0;
}
$ cc l.c
$ clang l.c
l.c:7:50: error: function definition is not allowed here
  int (*max)(int, int) = ({ int _ (int x, int y) { return x > y ? x : y; } _; });
                                                 ^
l.c:7:76: error: use of undeclared identifier '_'
  int (*max)(int, int) = ({ int _ (int x, int y) { return x > y ? x : y; } _; });
                                                                           ^
l.c:7:9: error: initializing 'int (*)(int, int)' with an expression of incompatible type 'void'
  int (*max)(int, int) = ({ int _ (int x, int y) { return x > y ? x : y; } _; });
        ^                ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
3 errors generated.
$ 

@nicowilliams
Copy link

@shakna-israel: I'm surprised that stack allocations in statement-expressions survive the end of the statement expression, that they get promoted to the parent lexical scope. But, too, I'm happy about that.

There are a lot of GCC/clang extensions I would make use of in production code, and that I've seen production code use (e.g., Lustre, which makes extensive use of statement-expressions). But not local functions. I won't abide an executable stack. I'd rather have something more Blocks-like, where there's a new intrinsic and callable type that under the covers is essentially a struct (to be passed by value) of frame pointer and raw function pointer.

@snaidamast
Copy link

All of this has already been accomplished in other languages such as C#, VB.NET, and Java for starters...

@webatxcent
Copy link

While cute, it is hardly novel. Decades ago there was someone who remapped C into Pascal using defines.

@mu578
Copy link

mu578 commented Jan 31, 2020

@ssokolow
Copy link

@webatxcent Huh. What I remembered was FORTRAN.

@shakna-israel
Copy link
Author

While cute, it is hardly novel. Decades ago there was someone who remapped C into Pascal using defines.

@webatxcent

That would probably be Bournegol, and unlike this, it was actually used in production in creating the Bourne shell. Which was one of the reasons no one wanted to help with that particular code base. Which had a few more footguns in it, like TRUE being -1.

@shakna-israel
Copy link
Author

@nicowilliams

Yeah, Clang does it the sensible way. Blocks are the right way to do things. Unfortunately, they return a new pointer type, so mixing them into this horribleness isn't possible.

But, if you really need lambdas... C++ is right there. With more sensible scoping, etc.

@galyathee
Copy link

@galyathee
Copy link

Maybe we should follow the advice of Scott Meyers ... do not use #define ;) However using well defined macros and structs we can implement C++ like code ... but when I see C++ 17 features, I do not see any good reason to return to C unless I am coding a simple algorithm for MiControllers...

@jsfag
Copy link

jsfag commented Feb 5, 2020

Не ну а чо.

@mu578
Copy link

mu578 commented Feb 5, 2020

@jspure there is a more literal and synthetic way to express that: nice prank; two words in a language that everybody can grasp and experience the meaning i.e what we name self-respect.

@saidm00
Copy link

saidm00 commented Feb 22, 2020

Amazing, just brilliant

@nomi-san
Copy link

nomi-san commented Mar 15, 2020

Ω

Why not "proc_main"?

#define proc_main() int main(int __attribute__((unused)) argc, char __attribute__((unused)) **argv) {

// going well 😄
proc_main()
    // do some stuff
end

@shakna-israel
Copy link
Author

@nomi-san Why proc_main? We already have a proc keyword, which means it'll be nice and difficult to tell them apart.

There's no reason if you want the similar syntax you can't do:

proc(main, int, int argc, char* argv[])
end

@nomi-san
Copy link

@shakna-israel
😪 But Main then, it's so weird on a proc declaration.

@shakna-israel
Copy link
Author

@nomi-san

But the declaration would be:

declare(main, int, int argc, char* argv[]);

Right?

@nomi-san
Copy link

Main entry point no need to declare, I don't mean the prototype.

-#define Main ...
+#define proc_main() int main(int argc, char **argv) {

-Main then
+proc_main()
// proc(main, int, int argc, char* argv[]) /* it's OK, too long */
    // body
end

Just in my opinion, assuming.. I don't read all of the README above 🤨

@shakna-israel
Copy link
Author

@nomi-san

Sorry, I got confused by you using non-C terms.

This is a declaration, specifically a forward-declaration:

void foo(int x);

This is a definition:

void foo(int x) {
}

This is a macro:

#define x 1

But mostly Main is as it is, because of the way it fits with the rest of the flow control operators:

Main then
  If argc <= 1 then
      return 1;
  end
end

@nomi-san
Copy link

@shakna-israel
[Like] 👍 👍 👍

@shakna-israel
Copy link
Author

And now, it's even worse than before! Oh, God, WHY!?.

Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment