merge_dataframes.py 1.2 KB

1234567891011121314151617181920212223242526272829303132333435
  1. import argparse
  2. import pandas as pd
  3. import numpy as np
  4. from pathlib import Path
  5. from utils import load_df
  6. def use_file(file, include=[], exclude=[]):
  7. is_df = Path(file).suffix == '.csv'
  8. excluded = np.array([excl in file.name for excl in exclude]).any()
  9. included = np.array([incl in file.name for incl in include]).all()
  10. return is_df and included and not excluded
  11. if __name__ == '__main__':
  12. CLI = argparse.ArgumentParser()
  13. CLI.add_argument("--input", nargs='?', type=Path, default=None)
  14. CLI.add_argument("--output", nargs='?', type=Path)
  15. CLI.add_argument("--exclude", nargs='?', type=lambda s: s.split(' '),
  16. default=['rewiring_results'])
  17. args, unknown = CLI.parse_known_args()
  18. dfs = []
  19. if args.input is None:
  20. args.input = args.output.parent
  21. if args.input.is_file():
  22. with open(args.input, "r") as file:
  23. for path in file.readlines():
  24. dfs.append(load_df(path.strip('\n')))
  25. elif args.input.is_dir():
  26. for file in args.input.rglob("*"):
  27. if use_file(file, exclude=args.exclude):
  28. dfs.append(load_df(file))
  29. full_df = pd.concat(dfs, ignore_index=True)
  30. full_df.to_csv(args.output)