Spaces:
Build error
Build error
yourusername
commited on
Commit
•
78cc3f9
1
Parent(s):
e1cd6af
:bug: filter_vocab -> filter_words
Browse files
data_measurements/dataset_statistics.py
CHANGED
@@ -341,7 +341,7 @@ class DatasetStatisticsCacheClass:
|
|
341 |
):
|
342 |
logs.info("Reading vocab from cache")
|
343 |
self.load_vocab()
|
344 |
-
self.vocab_counts_filtered_df =
|
345 |
else:
|
346 |
logs.info("Calculating vocab afresh")
|
347 |
if len(self.tokenized_df) == 0:
|
@@ -352,7 +352,7 @@ class DatasetStatisticsCacheClass:
|
|
352 |
word_count_df = count_vocab_frequencies(self.tokenized_df)
|
353 |
logs.info("Making dfs with proportion.")
|
354 |
self.vocab_counts_df = calc_p_word(word_count_df)
|
355 |
-
self.vocab_counts_filtered_df =
|
356 |
if save:
|
357 |
logs.info("Writing out.")
|
358 |
write_df(self.vocab_counts_df, self.vocab_counts_df_fid)
|
|
|
341 |
):
|
342 |
logs.info("Reading vocab from cache")
|
343 |
self.load_vocab()
|
344 |
+
self.vocab_counts_filtered_df = filter_words(self.vocab_counts_df)
|
345 |
else:
|
346 |
logs.info("Calculating vocab afresh")
|
347 |
if len(self.tokenized_df) == 0:
|
|
|
352 |
word_count_df = count_vocab_frequencies(self.tokenized_df)
|
353 |
logs.info("Making dfs with proportion.")
|
354 |
self.vocab_counts_df = calc_p_word(word_count_df)
|
355 |
+
self.vocab_counts_filtered_df = filter_words(self.vocab_counts_df)
|
356 |
if save:
|
357 |
logs.info("Writing out.")
|
358 |
write_df(self.vocab_counts_df, self.vocab_counts_df_fid)
|