19 lines
796 B
Python
19 lines
796 B
Python
|
import pandas as pd
|
||
|
import csv
|
||
|
import os
|
||
|
|
||
|
error_files = [f for f in os.listdir("/data/users/mgaughan/kkex/012825_cam_revision_main/12825_output_files") if f.endswith('-clone-error-output.txt')]
|
||
|
|
||
|
subset_list = []
|
||
|
|
||
|
for file in error_files:
|
||
|
# error bad lines here makes the assumption that the errors that are nonconforming are cloning errors
|
||
|
error_file_df = pd.read_csv("/data/users/mgaughan/kkex/012825_cam_revision_main/12825_output_files/" + file, header=None, on_bad_lines='skip')
|
||
|
error_file_df[1] = error_file_df[1].fillna('')
|
||
|
char_error_vcs = error_file_df[error_file_df[1].str.startswith(" 'utf-8'")]
|
||
|
subset_list.append(char_error_vcs)
|
||
|
|
||
|
result_df = pd.concat(subset_list, ignore_index=True)
|
||
|
|
||
|
print(result_df.head)
|
||
|
#result_df.to_csv("charset_error_list.csv", index=False)
|