import argparse import sqlite3 import sys import os parser = argparse.ArgumentParser( prog='CLRS problem databse generator', description='Takes the walkccc.me CLRS solutions and parses them into a SQLite database') parser.add_argument('source', help='the directory to walk in search of problems and solutions') def main(args): con = sqlite3.connect("clrs.db") cur = con.cursor() new_rows = [] cur.execute(""" create table if not exists problem ( problem_number number not null, question text not null, answer text not null, chapter number not null, section number, starred number not null ) """) chapter_dirs = [os.path.join(args.source, x) for x in os.listdir(args.source) if x.startswith("Chap")] for chapter_dir in chapter_dirs: chapter_number = int(os.path.basename(os.path.normpath(chapter_dir))[-2:]) section_files = [os.path.join(chapter_dir, f) for f in os.listdir(chapter_dir) if os.path.isfile(os.path.join(chapter_dir, f))] for section_path in section_files: section_number = os.path.basename(os.path.normpath(section_path)).split('.')[1] with open(section_path, 'r') as section_file: section_file_contents = section_file.read() section_parts = [ x.strip() for x in section_file_contents.split("##") if x != '' ] for section_part in section_parts: try: part_header, remainder = section_part.split("\n\n", 1) except ValueError: # This will occur when a section has no exercises continue starred = False try: part_number = int(part_header.split('-')[1]) # get number after dash except ValueError: # It's got a star after it starred = True part_number = int(part_header.split('-')[1].split(' ')[0]) part_question, remainder = remainder.split('\n\n', 1) part_question = part_question[1:] # trim leading > part_answer = remainder.strip() new_rows.append((part_number, part_question, part_answer, chapter_number, section_number, 1 if starred else 0)) cur.executemany("insert into problem values (?, ?, ?, ?, ?, ?)", new_rows) con.commit() con.close() return 0 if __name__ == '__main__': args = parser.parse_args() sys.exit(main(args))