Corey Morris commited on
Commit
83a34f0
1 Parent(s): 513e813

added mostly hardcoded generate url method and test

Browse files
details_data_processor.py CHANGED
@@ -8,7 +8,7 @@ import requests
8
 
9
  class DetailsDataProcessor:
10
  # Download
11
- #url example
12
 
13
  def __init__(self, directory='results', pattern='results*.json'):
14
  self.directory = directory
@@ -18,10 +18,38 @@ class DetailsDataProcessor:
18
 
19
  # download a file from a single url and save it to a local directory
20
  @staticmethod
21
- def _download_file(url, filename):
22
  r = requests.get(url, allow_redirects=True)
23
  open(filename, 'wb').write(r.content)
24
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
25
  # @staticmethod
26
  # def _find_files(directory, pattern):
27
  # for root, dirs, files in os.walk(directory):
 
8
 
9
  class DetailsDataProcessor:
10
  # Download
11
+ #url example https://huggingface.co/datasets/open-llm-leaderboard/details/resolve/main/64bits/LexPodLM-13B/details_harness%7ChendrycksTest-moral_scenarios%7C5_2023-07-25T13%3A41%3A51.227672.json
12
 
13
  def __init__(self, directory='results', pattern='results*.json'):
14
  self.directory = directory
 
18
 
19
  # download a file from a single url and save it to a local directory
20
  @staticmethod
21
+ def download_file(url, filename):
22
  r = requests.get(url, allow_redirects=True)
23
  open(filename, 'wb').write(r.content)
24
 
25
+ @staticmethod
26
+ def single_file_pipeline(url, filename):
27
+ DetailsDataProcessor.download_file(url, filename)
28
+ # read file
29
+ with open(filename) as f:
30
+ data = json.load(f)
31
+ # convert to dataframe
32
+ df = pd.DataFrame(data)
33
+ return df
34
+
35
+ @staticmethod
36
+ def generate_url(file_path):
37
+ base_url = 'https://huggingface.co/datasets/open-llm-leaderboard/details/resolve/main/'
38
+
39
+
40
+ organization = '64bits'
41
+ model = 'LexPodLM-13B'
42
+ filename = '_2023-07-25T13%3A41%3A51.227672.json'
43
+ # extract organization, model, and filename from file_path instead of hardcoding
44
+ # filename = file_path.split('/')[-1]
45
+
46
+
47
+
48
+ other_chunk = 'details_harness%7ChendrycksTest-moral_scenarios%7C5'
49
+ constructed_url = base_url + organization + '/' + model + '/' + other_chunk + filename
50
+ return constructed_url
51
+
52
+
53
  # @staticmethod
54
  # def _find_files(directory, pattern):
55
  # for root, dirs, files in os.walk(directory):
test_details_data_processing.py CHANGED
@@ -16,10 +16,18 @@ class TestDetailsDataProcessor(unittest.TestCase):
16
  # self.assertIsInstance(data, pd.DataFrame)
17
 
18
  def test_download_file(self):
19
- DetailsDataProcessor._download_file('https://www.google.com', 'test.html')
20
  self.assertTrue(os.path.exists('test.html'))
21
  os.remove('test.html')
22
 
 
 
 
 
 
 
 
 
23
 
24
  if __name__ == '__main__':
25
  unittest.main()
 
16
  # self.assertIsInstance(data, pd.DataFrame)
17
 
18
  def test_download_file(self):
19
+ DetailsDataProcessor.download_file('https://www.google.com', 'test.html')
20
  self.assertTrue(os.path.exists('test.html'))
21
  os.remove('test.html')
22
 
23
+ def test_generate_url(self):
24
+ results_file_path = "64bits/LexPodLM-13B/results_2023-07-25T13:41:51.227672.json"
25
+ expected_url = 'https://huggingface.co/datasets/open-llm-leaderboard/details/resolve/main/64bits/LexPodLM-13B/details_harness%7ChendrycksTest-moral_scenarios%7C5_2023-07-25T13%3A41%3A51.227672.json'
26
+
27
+
28
+ constructed_url = self.processor.generate_url(results_file_path)
29
+ self.assertEqual(expected_url, constructed_url)
30
+
31
 
32
  if __name__ == '__main__':
33
  unittest.main()