commit 6b47ed9110cc9a5a15e3b597742623bea81d92b1 Author: natjms Date: Mon Jan 16 17:13:12 2023 -0800 init diff --git a/.gitignore b/.gitignore new file mode 100644 index 0000000..cfd3752 --- /dev/null +++ b/.gitignore @@ -0,0 +1,4 @@ +build +out +pages +template.html diff --git a/Makefile b/Makefile new file mode 100644 index 0000000..502df6b --- /dev/null +++ b/Makefile @@ -0,0 +1,9 @@ +BUILD_DIR=./build +calathea: calathea.c + if [ ! -d "$(BUILD_DIR)" ]; then\ + mkdir $(BUILD_DIR);\ + fi + gcc -Wall -Wextra -pedantic -lm -lcmark calathea.c -o build/calathea + +clean: + rm build/* diff --git a/README.md b/README.md new file mode 100644 index 0000000..e9c7c2a --- /dev/null +++ b/README.md @@ -0,0 +1,35 @@ +# calathea v0.1 + +calathea is a small C program used to generate static wikis. It takes a +directory of Common Markdown pages with [[wikilinks]] and renders them to +HTML. The output is a flat directory of HTML pages where the wikilinks are +converted to relative links. + +This is also my first full, complete project written entirely in C, which has +proved to be a serious trial by fire in learning how to effectively work with +zero-terminated strings. There will likely be memoryleaks and comments that +over explain every line of code. + +## Features +- [x] Rendering wikilinks +- [ ] Ability to keep track of incoming links +- [ ] Named wikilinks (i.e. [[link title|actual page]]) + +## installation +This tool requires [cmark](https://github.com/commonmark/cmark) to be +installed. Once that's set up, clone the repository and run: + +``` +$ make +``` + +If GCC complains it can't find cmark, then try running `ldconfig`. If it +still doesn't work, then run `echo $LD_LIBRARY_PATH`. If it doesn't show +anything, you've got to add the right directory to the path, i.e., + +``` +$ export $LD_LIBRARY_PATH=/usr/local/lib64 +``` + +Or wherever it got installed as per the output of running `make install` for +cmark. diff --git a/calathea.c b/calathea.c new file mode 100644 index 0000000..4eb1c52 --- /dev/null +++ b/calathea.c @@ -0,0 +1,386 @@ +#include +#include +#include +#include +#include +#include +#include + +// Structure defining the content and metadata of a single page +struct Page { + char title[80]; + char *permalink; + struct Page *next; + char *content; +}; + +/*** Helper functions ***/ +int min(int x, int y) { + return x > y ? y : x; +} + +char * read_file(char *filename) { + FILE *file = fopen(filename, "rb"); + + if (file == NULL) { + printf("Warning: Failed to open %s\n", filename); + return NULL; + } + + // First, we calculate the length of the file + fseek(file, 0, SEEK_END); // Traverse to the end of the file + int fileLength = ftell(file); // Get the current position in the file + rewind(file); // Go back to the beginning of the file + + // Allocate enough space in our buffer to hold the entire file. + char *buffer = malloc(fileLength); + + if (buffer == NULL) { + printf("Warning: Failed to allocate enough memory for %s\n", filename); + fclose(file); + return NULL; + } + + fread(buffer, 1, fileLength, file); + fclose(file); + + return buffer; +} + +/*** Hash map implementation ***/ +int helper_hash_polynomial( + char string[], + int i, + int length, + int tableSize, + unsigned long long acc +) { + if (i == length) { + return acc % tableSize; + } else { + return helper_hash_polynomial(string, i + 1, length, tableSize, + acc + string[i] * pow(33, length-i-1)); + } +} + +int hash_polynomial(int mapSize, char key[]) { + return helper_hash_polynomial(key, 0, strlen(key), mapSize, 0); +} + +void to_lower_case(char dest[], char str[]) { + int i = 0; + for (; str[i] != '\0'; i++) { + dest[i] = tolower(str[i]); + } + dest[i] = 0; +} + +void map_put(char map[][80], int mapSize, char title[], char permalink[]) { + char lowercased[80]; + to_lower_case(lowercased, title); + + int index = hash_polynomial(mapSize, lowercased); + strncpy(map[index], permalink, 79); + map[index][79] = 0; +} + +char * map_get(char map[][80], int mapSize, char title[]) { + char lowercased[80]; + to_lower_case(lowercased, title); + + int index = hash_polynomial(mapSize, lowercased); + return map[index]; +} + +/*** Templating ***/ +char * substitute_string(char dest[], char sub[], char *start, char *end) { + int startIndex = start - dest; + + int newLength = strlen(dest) - (end - start) + strlen(sub) + 1; + + char * compiled = malloc(newLength * sizeof(char)); + compiled[0] = 0; + strncpy(compiled, dest, startIndex); + strcat(compiled, sub); + strcat(compiled, end); + + return compiled; +} + +int main(int argc, char *argv[]) { + char pagesLocation[256] = "./pages"; + int mapSize = 1000; + char templateFileName[256] = "./template.html"; + char outputDirectoryName[256] = "./build"; + + /*** Argument Parsing ***/ + for (int i = 1; i < argc; i++) { + if (strcmp(argv[i], "--pages") == 0) { + // We only want to do this if a directory was actually supplied + if (i + 1 < argc) { + i++; + strcpy(pagesLocation, argv[i]); + } + } else if (strcmp(argv[i], "--table-size") == 0) { + if (i + 1 < argc) { + i++; + mapSize = atoi(argv[i]); + } + } else if (strcmp(argv[i], "--template") == 0) { + if (i + 1 < argc) { + i++; + strcpy(templateFileName, argv[i]); + } + } else if (strcmp(argv[i], "--output-dir") == 0) { + if (i + 1 < argc) { + i++; + strcpy(outputDirectoryName, argv[i]); + } + } else { + printf("Unknown argument: %s\n", argv[i]); + } + } + + char *templateContent = read_file(templateFileName); + + if (templateContent == NULL) { + // If no template is given, we'll just dump the rendered markdown into a + // plaintext file + templateContent = malloc(sizeof(char) * 12); + strcpy(templateContent, "{{content}}\0"); + } + + DIR *pagesDir = opendir(pagesLocation); + + if (pagesDir == NULL) { + fprintf(stderr, "Unable to open directory: %s\n", pagesLocation); + return 1; + } + + char titleMap[mapSize][80]; + for (int i = 0; i < mapSize; i++) { + titleMap[i][0] = 0; + } + + // Contains some information about the current file picked from pagesDir + struct dirent *fileEntry = readdir(pagesDir); + struct Page *currentPage = malloc(sizeof(struct Page)); + struct Page *firstPage = currentPage; + + while (fileEntry != NULL) { + // Ignore hidden files, ".", and ".." on Unix + if (fileEntry->d_name[0] == '.') { + fileEntry = readdir(pagesDir); + continue; + } + + // Determine the base name of the file + int filenameLength = strlen(fileEntry->d_name); + + char fileBasename[filenameLength]; + memset(fileBasename, 0, filenameLength); + + unsigned char foundPoint = 0; + + for (int i = filenameLength - 1; i >= 0; i--) { + if (foundPoint) { + // Start writing the base name into the string + fileBasename[i] = fileEntry->d_name[i]; + } else if (fileEntry->d_name[i] == '.') { + // Start writing on the next iteration + foundPoint = 1; + } else { + // Write zeros where the extension would have been + fileBasename[i] = 0; + } + } + + // Build the page's permalink + currentPage->permalink = malloc(strlen(fileBasename) + 6); + + strcpy(currentPage->permalink, fileBasename); + strcat(currentPage->permalink, ".html"); + + // Construct the relative path + // The two accounts for the slash and the terminal zero + char relativePath[strlen(pagesLocation) + filenameLength + 2]; + memset(relativePath, 0, strlen(pagesLocation) + filenameLength + 2); + + strcpy(relativePath, pagesLocation); + strcat(relativePath, "/"); + strcat(relativePath,fileEntry->d_name); + relativePath[strlen(pagesLocation) + filenameLength + 1] = 0; + + char *buffer = read_file(relativePath); + + if (buffer == NULL) { + fileEntry = readdir(pagesDir); + continue; + } + + // Get a pointer to the start of the content part of the page + char *endOfFirstLine = strchr(buffer, '\n'); + + if (endOfFirstLine == NULL) { + printf("Warning: First line in %s/%s must be the title\n", + pagesLocation, fileEntry->d_name); + free(buffer); + fileEntry = readdir(pagesDir); + continue; + } + + // We subtract the buffer pointer from the pointer to the end of the + // first line to get the length of the title + int titleLength = (endOfFirstLine - buffer)/sizeof(char); + + // Save the content string for later by mallocing it + char *contentBuffer = endOfFirstLine; + currentPage->content = malloc( + sizeof(char) * (strlen(buffer) - titleLength + 1) + ); + strcpy(currentPage->content, contentBuffer); + + // Copy the first line (title) into its respective field + strncpy(currentPage->title, buffer, min(titleLength, 80)); + currentPage->title[min(titleLength, 80)] = 0; + + // Insert it into the hash map for lookup later + map_put(titleMap, mapSize, currentPage->title, currentPage->permalink); + + // Get ready to process the next page + fileEntry = readdir(pagesDir); + + if (fileEntry != NULL) { + struct Page *nextPage = malloc(sizeof(struct Page)); + + // Swap the pages + currentPage->next = nextPage; + currentPage = nextPage; + } else { + currentPage->next = NULL; + } + + free(buffer); + } + + // Create the directory if it doesn't exist + char *createOutputDir = malloc( + (7 + strlen(outputDirectoryName)) * sizeof(char) + ); + + strcpy(createOutputDir, "mkdir \0"); + strcat(createOutputDir, outputDirectoryName); + + system(createOutputDir); + + free(createOutputDir); + + /*** Page Processing ***/ + currentPage = firstPage; + while (currentPage != NULL) { + // Scan the file for links + // This pointer is updated upon each iteration + char *nextLinkStart = strstr(currentPage->content, "[["); + while (nextLinkStart != NULL) { + char *nextLinkEnd = strstr(nextLinkStart, "]]"); + + if (nextLinkEnd == NULL) { + // This link is broken + printf("Warning: \"%s\" contains a broken link", currentPage->title); + break; + } + + int linkLength = nextLinkEnd - nextLinkStart; + + // Determine the exact title of the link + char title[linkLength - 3]; + + strncpy(title, nextLinkStart + 2, (linkLength - 2)*sizeof(char)); + title[linkLength - 2] = 0; + + char *permalink = map_get(titleMap, mapSize, title); + char *compiledLink; + + if (permalink[0] == 0) { + // i.e. the page does not exist + compiledLink = malloc(strlen(title) + 38); + strcpy(compiledLink, ""); + strcat(compiledLink, title); + strcat(compiledLink, "\0"); + } else { + compiledLink = malloc(strlen(title) + strlen(permalink) + 5); + strcpy(compiledLink, "["); + strcat(compiledLink, title); + strcat(compiledLink, "]("); + strcat(compiledLink, permalink); + strcat(compiledLink, ")\0"); + } + + char *newContent = substitute_string( + currentPage->content, compiledLink, nextLinkStart, nextLinkEnd + 2); + free(currentPage->content); + currentPage->content = newContent; + + free(compiledLink); + + // Move to the next chunk of the file + // NOTE: This is suboptimal, because we search for "[[" from the + // beginning of the content at each iteration. We could start from + // the nextLinkEnd pointer, but since the memory is reallocated, we'd + // need to take into account that this pointer now points to a chunk + // of deallocated memory + nextLinkStart = strstr(currentPage->content, "[["); + } + + // Compile the markdown + currentPage->content = cmark_markdown_to_html( + currentPage->content, + strlen(currentPage->content), + (1 << 17) // Disables safe mode, allowing raw HTML + ); + + char *templateTagStart = strstr(templateContent, "{{content}}"); + char *renderedPageContent = substitute_string( + templateContent, + currentPage->content, + templateTagStart, + templateTagStart + 11 + ); + + // Output the page to the file + char *outputFileName = malloc( + (strlen(outputDirectoryName) + + strlen(currentPage->permalink) + + 2)); + strcpy(outputFileName, outputDirectoryName); + strcat(outputFileName, "/\0"); + strcat(outputFileName, currentPage->permalink); + + FILE *outputFile = fopen(outputFileName, "w"); + + if (outputFile != NULL) { + fputs(renderedPageContent, outputFile); + fclose(outputFile); + } else { + printf("Warning: failed to create %s\n", outputFileName); + } + + free(outputFileName); + free(renderedPageContent); + + // We no longer need this page's data so we deallocate it + struct Page *nextPage = currentPage->next; + free(currentPage->content); + free(currentPage->permalink); + free(currentPage); + currentPage = nextPage; + } + + /*** Deallocation and whatnot ***/ + closedir(pagesDir); + free(templateContent); + + // Deallocate all the pages + + return 0; +}