Corey Morris commited on
Commit
7a88af3
1 Parent(s): 3ec98e7

script to save dataframe to a file only if there are no uncommitted files

Browse files
Files changed (1) hide show
  1. save_for_regression.py +46 -0
save_for_regression.py ADDED
@@ -0,0 +1,46 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # when run
2
+ # checks if there is uncommitted code
3
+ # if there is uncommitted code, ti retuns an error
4
+ # if there is no uncommitted code, it saves the dataframe as a parquet file with the commit hash in the name
5
+
6
+ import pytest
7
+ import pandas as pd
8
+ from result_data_processor import ResultDataProcessor
9
+
10
+ import os
11
+
12
+ import subprocess
13
+
14
+ def has_uncommitted_changes(repo_path):
15
+ try:
16
+ # Change to the repository directory
17
+ original_path = os.getcwd()
18
+ os.chdir(repo_path)
19
+
20
+ # Run the git status command
21
+ result = subprocess.run(['git', 'status', '--porcelain'], stdout=subprocess.PIPE, stderr=subprocess.PIPE, text=True)
22
+
23
+ # Check the result
24
+ if result.returncode != 0:
25
+ print(f"Error checking git status: {result.stderr}")
26
+ return False
27
+
28
+ # If the output is empty, there are no uncommitted changes
29
+ return bool(result.stdout.strip())
30
+
31
+ finally:
32
+ # Change back to the original directory
33
+ os.chdir(original_path)
34
+
35
+ if __name__ == '__main__':
36
+ if has_uncommitted_changes('.'):
37
+ print("There are uncommitted changes")
38
+ else:
39
+ print("There are no uncommitted changes")
40
+ df_current = ResultDataProcessor().data
41
+ last_commit = os.popen('git rev-parse HEAD').read().strip()
42
+ print(last_commit)
43
+ # save the current output to a file
44
+ df_current.to_parquet(f'output_{last_commit}.parquet', index=True)
45
+ print("Saved output to file")
46
+