clrs-practice-exam/clean-data.py

74 lines
2.5 KiB
Python

import argparse
import sqlite3
import sys
import os
parser = argparse.ArgumentParser(
prog='CLRS problem databse generator',
description='Takes the walkccc.me CLRS solutions and parses them into a SQLite database')
parser.add_argument('source', help='the directory to walk in search of problems and solutions')
def main(args):
con = sqlite3.connect("clrs.db")
cur = con.cursor()
new_rows = []
cur.execute("""
create table if not exists problem (
problem_number number not null,
question text not null,
answer text not null,
chapter number not null,
section number,
starred number not null
)
""")
chapter_dirs = [os.path.join(args.source, x) for x in os.listdir(args.source) if x.startswith("Chap")]
for chapter_dir in chapter_dirs:
chapter_number = int(os.path.basename(os.path.normpath(chapter_dir))[-2:])
section_files = [os.path.join(chapter_dir, f) for f in os.listdir(chapter_dir) if os.path.isfile(os.path.join(chapter_dir, f))]
for section_path in section_files:
section_number = os.path.basename(os.path.normpath(section_path)).split('.')[1]
with open(section_path, 'r') as section_file:
section_file_contents = section_file.read()
section_parts = [ x.strip() for x in section_file_contents.split("##") if x != '' ]
for section_part in section_parts:
try:
part_header, remainder = section_part.split("\n\n", 1)
except ValueError:
# This will occur when a section has no exercises
continue
starred = False
try:
part_number = int(part_header.split('-')[1]) # get number after dash
except ValueError:
# It's got a star after it
starred = True
part_number = int(part_header.split('-')[1].split(' ')[0])
part_question, remainder = remainder.split('\n\n', 1)
part_question = part_question[1:] # trim leading >
part_answer = remainder.strip()
new_rows.append((part_number, part_question, part_answer, chapter_number, section_number, 1 if starred else 0))
cur.executemany("insert into problem values (?, ?, ?, ?, ?, ?)", new_rows)
con.commit()
con.close()
return 0
if __name__ == '__main__':
args = parser.parse_args()
sys.exit(main(args))