''' DS2000 Spring 2020 Source code - processing a messy file ''' PUNCTUATION = '\'\",.?!-_;' def process_file(filename): ''' Function: process_file Parameter: string Returns: contents of the file ''' with open(filename, 'r') as infile: contents = infile.readlines() return contents def clean_file(contents): ''' Function: clean_file Parameters: list of strings Return: list of strings, but better ''' clean_list = [] for line in contents: clean = line.strip() clean = clean.lower() for p in PUNCTUATION: clean = clean.replace(p, ' ') clean_list.append(clean) return clean_list def main(): # Step One: Read the file as-is file_contents = process_file('khoury.txt') # Step Two: Clean the file by removing whitespace and punctuation cleaner = clean_file(file_contents) # Step Three: Turn the list of sentences into a list of words words = ' '.join(cleaner) words = words.split() print(words) main()