Refactor : Adding the Legend
Browse files- __pycache__/app.cpython-310.pyc +0 -0
- __pycache__/helper_functions.cpython-310.pyc +0 -0
- app.py +13 -15
- helper_functions.py +93 -16
- neptune_270_papers/neptune_270_papers/added_tokens.json +3 -0
- neptune_270_papers/neptune_270_papers/model.pt +3 -0
- neptune_270_papers/neptune_270_papers/special_tokens_map.json +16 -0
- neptune_270_papers/neptune_270_papers/tokenizer.json +0 -0
- neptune_270_papers/neptune_270_papers/tokenizer_config.json +66 -0
- neptune_270_papers/neptune_270_papers/unique_labels.json +1 -0
- neptune_270_papers/neptune_270_papers/vocab.txt +0 -0
- static/css/style2.css +2 -2
- static/js/pdf.js +4 -0
- static/js/pdf_fr.js +4 -2
- templates/pdf.html +95 -4
- templates/pdf_fr.html +102 -13
__pycache__/app.cpython-310.pyc
CHANGED
Binary files a/__pycache__/app.cpython-310.pyc and b/__pycache__/app.cpython-310.pyc differ
|
|
__pycache__/helper_functions.cpython-310.pyc
CHANGED
Binary files a/__pycache__/helper_functions.cpython-310.pyc and b/__pycache__/helper_functions.cpython-310.pyc differ
|
|
app.py
CHANGED
@@ -1,5 +1,5 @@
|
|
1 |
from flask import Flask, render_template,request, redirect,url_for, jsonify , session
|
2 |
-
from helper_functions import predict_class , inference , predict , align_predictions_with_sentences , load_models , load_fr_models
|
3 |
from helper_functions import predict_fr_class, fr_inference , align_fr_predictions_with_sentences , transcribe_speech
|
4 |
import fitz # PyMuPDF
|
5 |
import os, shutil
|
@@ -85,10 +85,9 @@ def treatment():
|
|
85 |
pdf_document.close()
|
86 |
# Prepare data for the chart
|
87 |
predicted_class , class_probabilities = predict_class([extracted_text] , global_model)
|
|
|
88 |
# Process the transcribed text
|
89 |
-
|
90 |
-
predictions = predict(inference_batch, global_neptune)
|
91 |
-
sentences_prediction = align_predictions_with_sentences(sentences, predictions)
|
92 |
chart_data = {
|
93 |
'datasets': [{
|
94 |
'data': list(class_probabilities.values()),
|
@@ -204,9 +203,9 @@ def slu():
|
|
204 |
logging.debug(f"Transcribed text: {extracted_text}")
|
205 |
|
206 |
# Process the transcribed text
|
207 |
-
inference_batch, sentences = inference(extracted_text)
|
208 |
-
predictions = predict(inference_batch, global_neptune)
|
209 |
-
sentences_prediction =
|
210 |
predicted_class, class_probabilities = predict_class([extracted_text], global_model)
|
211 |
|
212 |
chart_data = {
|
@@ -263,7 +262,7 @@ def pdf_fr():
|
|
263 |
|
264 |
@app.route('/pdf_fr/upload' , methods = ['POST'])
|
265 |
def treatment_fr():
|
266 |
-
global global_fr_neptune
|
267 |
if request.method == 'POST' :
|
268 |
# Récupérer le fichier PDF de la requête
|
269 |
file = request.files['file']
|
@@ -293,9 +292,9 @@ def treatment_fr():
|
|
293 |
# Fermer le fichier PDF
|
294 |
pdf_document.close()
|
295 |
# Process the text
|
296 |
-
inference_batch, sentences = fr_inference(extracted_text)
|
297 |
-
predictions = predict(inference_batch, global_fr_neptune)
|
298 |
-
sentences_prediction =
|
299 |
# Prepare data for the chart
|
300 |
predicted_class , class_probabilities = predict_fr_class([extracted_text] , global_fr_model)
|
301 |
|
@@ -309,7 +308,6 @@ def treatment_fr():
|
|
309 |
}
|
310 |
print(predict_class)
|
311 |
print(chart_data)
|
312 |
-
print(sentences)
|
313 |
# clear the uploads folder
|
314 |
for filename in os.listdir(app.config['UPLOAD_FOLDER']):
|
315 |
file_path = os.path.join(app.config['UPLOAD_FOLDER'], filename)
|
@@ -384,9 +382,9 @@ def slu_fr():
|
|
384 |
logging.info(f"Transcribed text: {extracted_text}")
|
385 |
|
386 |
# Traiter le texte transcrit
|
387 |
-
inference_batch, sentences = fr_inference(extracted_text)
|
388 |
-
predictions = predict(inference_batch, global_fr_neptune)
|
389 |
-
sentences_prediction =
|
390 |
predicted_class, class_probabilities = predict_fr_class([extracted_text], global_fr_model)
|
391 |
|
392 |
chart_data = {
|
|
|
1 |
from flask import Flask, render_template,request, redirect,url_for, jsonify , session
|
2 |
+
from helper_functions import predict_class ,predict_sentences_class, inference , predict , align_predictions_with_sentences , load_models , load_fr_models
|
3 |
from helper_functions import predict_fr_class, fr_inference , align_fr_predictions_with_sentences , transcribe_speech
|
4 |
import fitz # PyMuPDF
|
5 |
import os, shutil
|
|
|
85 |
pdf_document.close()
|
86 |
# Prepare data for the chart
|
87 |
predicted_class , class_probabilities = predict_class([extracted_text] , global_model)
|
88 |
+
print(class_probabilities)
|
89 |
# Process the transcribed text
|
90 |
+
sentences_prediction = predict_sentences_class(extracted_text , global_model)
|
|
|
|
|
91 |
chart_data = {
|
92 |
'datasets': [{
|
93 |
'data': list(class_probabilities.values()),
|
|
|
203 |
logging.debug(f"Transcribed text: {extracted_text}")
|
204 |
|
205 |
# Process the transcribed text
|
206 |
+
####inference_batch, sentences = inference(extracted_text)
|
207 |
+
####predictions = predict(inference_batch, global_neptune)
|
208 |
+
sentences_prediction = predict_sentences_class(extracted_text , global_model)
|
209 |
predicted_class, class_probabilities = predict_class([extracted_text], global_model)
|
210 |
|
211 |
chart_data = {
|
|
|
262 |
|
263 |
@app.route('/pdf_fr/upload' , methods = ['POST'])
|
264 |
def treatment_fr():
|
265 |
+
global global_fr_neptune , global_fr_model
|
266 |
if request.method == 'POST' :
|
267 |
# Récupérer le fichier PDF de la requête
|
268 |
file = request.files['file']
|
|
|
292 |
# Fermer le fichier PDF
|
293 |
pdf_document.close()
|
294 |
# Process the text
|
295 |
+
####inference_batch, sentences = fr_inference(extracted_text)
|
296 |
+
####predictions = predict(inference_batch, global_fr_neptune)
|
297 |
+
sentences_prediction = predict_sentences_class(extracted_text , global_fr_model)
|
298 |
# Prepare data for the chart
|
299 |
predicted_class , class_probabilities = predict_fr_class([extracted_text] , global_fr_model)
|
300 |
|
|
|
308 |
}
|
309 |
print(predict_class)
|
310 |
print(chart_data)
|
|
|
311 |
# clear the uploads folder
|
312 |
for filename in os.listdir(app.config['UPLOAD_FOLDER']):
|
313 |
file_path = os.path.join(app.config['UPLOAD_FOLDER'], filename)
|
|
|
382 |
logging.info(f"Transcribed text: {extracted_text}")
|
383 |
|
384 |
# Traiter le texte transcrit
|
385 |
+
####inference_batch, sentences = fr_inference(extracted_text)
|
386 |
+
####predictions = predict(inference_batch, global_fr_neptune)
|
387 |
+
sentences_prediction = predict_sentences_class(extracted_text , global_fr_model)
|
388 |
predicted_class, class_probabilities = predict_fr_class([extracted_text], global_fr_model)
|
389 |
|
390 |
chart_data = {
|
helper_functions.py
CHANGED
@@ -25,7 +25,7 @@ def load_models():
|
|
25 |
print("Loading BERT model...")
|
26 |
neptune = BERT()
|
27 |
device = "cpu"
|
28 |
-
model_save_path = "
|
29 |
neptune.load_state_dict(torch.load(model_save_path, map_location=torch.device('cpu')))
|
30 |
neptune.to(device)
|
31 |
|
@@ -55,11 +55,11 @@ def load_fr_models():
|
|
55 |
print("Loading Wav2Vec2 model for French...")
|
56 |
wav2vec2_processor = Wav2Vec2Processor.from_pretrained("bhuang/asr-wav2vec2-french")
|
57 |
wav2vec2_model = AutoModelForCTC.from_pretrained("bhuang/asr-wav2vec2-french").to(device)
|
58 |
-
return fr_model, fr_neptune, wav2vec2_processor, wav2vec2_model
|
59 |
|
60 |
-
fr_class_labels = {0: ('Physics', 'primary', '#
|
61 |
2: ('economies', 'warning' , '#f7c32e'), 3: ('environments','success' , '#0cbc87'),
|
62 |
-
4: ('sports', 'orange', '#fd7e14')}
|
63 |
class_labels = {
|
64 |
16: ('vehicles','info' , '#4f9ef8'),
|
65 |
10: ('environments','success' , '#0cbc87'),
|
@@ -79,29 +79,106 @@ class_labels = {
|
|
79 |
2: ('administration','pink', '#d63384'),
|
80 |
7: ('biology' ,'cambridge' , '#88aa99')}
|
81 |
|
82 |
-
def predict_class(text
|
83 |
# Tokenisation du texte
|
84 |
-
inputs = transform_list_of_texts(text, tokenizer, 510, 510, 1, 2550)
|
85 |
-
|
86 |
-
|
87 |
-
|
|
|
|
|
88 |
# Passage du texte à travers le modèle
|
|
|
89 |
with torch.no_grad():
|
90 |
-
|
91 |
-
|
92 |
-
|
93 |
-
|
94 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
95 |
# Identification de la classe majoritaire
|
96 |
-
predicted_class_index = torch.argmax(
|
97 |
predicted_class = class_labels[predicted_class_index]
|
98 |
|
99 |
# Créer un dictionnaire de pourcentages trié par probabilité
|
100 |
-
sorted_percentages = {class_labels[idx]:
|
|
|
101 |
sorted_percentages = dict(sorted(sorted_percentages.items(), key=lambda item: item[1], reverse=True))
|
102 |
|
103 |
return predicted_class, sorted_percentages
|
104 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
105 |
def transform_list_of_texts(
|
106 |
texts: list[str],
|
107 |
tokenizer: PreTrainedTokenizerBase,
|
|
|
25 |
print("Loading BERT model...")
|
26 |
neptune = BERT()
|
27 |
device = "cpu"
|
28 |
+
model_save_path = "neptune_270_papers/neptune_270_papers/model.pt"
|
29 |
neptune.load_state_dict(torch.load(model_save_path, map_location=torch.device('cpu')))
|
30 |
neptune.to(device)
|
31 |
|
|
|
55 |
print("Loading Wav2Vec2 model for French...")
|
56 |
wav2vec2_processor = Wav2Vec2Processor.from_pretrained("bhuang/asr-wav2vec2-french")
|
57 |
wav2vec2_model = AutoModelForCTC.from_pretrained("bhuang/asr-wav2vec2-french").to(device)
|
58 |
+
return fr_model, fr_neptune, wav2vec2_processor, wav2vec2_model
|
59 |
|
60 |
+
fr_class_labels = {0: ('Physics', 'primary', '#5e7cc8'), 1: ('AI','cyan', '#0dcaf0'),
|
61 |
2: ('economies', 'warning' , '#f7c32e'), 3: ('environments','success' , '#0cbc87'),
|
62 |
+
4: ('sports', 'orange', '#fd7e14')}
|
63 |
class_labels = {
|
64 |
16: ('vehicles','info' , '#4f9ef8'),
|
65 |
10: ('environments','success' , '#0cbc87'),
|
|
|
79 |
2: ('administration','pink', '#d63384'),
|
80 |
7: ('biology' ,'cambridge' , '#88aa99')}
|
81 |
|
82 |
+
def predict_class(text,model):
|
83 |
# Tokenisation du texte
|
84 |
+
inputs = transform_list_of_texts([text], tokenizer, 510, 510, 1, 2550)
|
85 |
+
|
86 |
+
|
87 |
+
# Initialiser une liste pour stocker les probabilités de chaque échantillon
|
88 |
+
all_probabilities = []
|
89 |
+
|
90 |
# Passage du texte à travers le modèle
|
91 |
+
model.eval()
|
92 |
with torch.no_grad():
|
93 |
+
for i, sample in enumerate(inputs['input_ids']):
|
94 |
+
for j in range(len(sample)):
|
95 |
+
input_ids_tensor = torch.tensor(sample[j], device=device).unsqueeze(0)
|
96 |
+
attention_mask_tensor = torch.tensor(inputs['attention_mask'][i][j], device=device).unsqueeze(0)
|
97 |
+
outputs = model(input_ids=input_ids_tensor, attention_mask=attention_mask_tensor)
|
98 |
+
|
99 |
+
# Application de la fonction softmax
|
100 |
+
probabilities = torch.softmax(outputs.logits, dim=1)[0]
|
101 |
+
all_probabilities.append(probabilities)
|
102 |
+
|
103 |
+
# Calculer la moyenne des probabilités si nous avons plusieurs échantillons
|
104 |
+
if len(all_probabilities) > 1:
|
105 |
+
mean_probabilities = torch.stack(all_probabilities).mean(dim=0)
|
106 |
+
else:
|
107 |
+
mean_probabilities = all_probabilities[0]
|
108 |
+
|
109 |
# Identification de la classe majoritaire
|
110 |
+
predicted_class_index = torch.argmax(mean_probabilities).item()
|
111 |
predicted_class = class_labels[predicted_class_index]
|
112 |
|
113 |
# Créer un dictionnaire de pourcentages trié par probabilité
|
114 |
+
sorted_percentages = {class_labels[idx]: mean_probabilities[idx].item() * 100 for idx in range(len(class_labels))}
|
115 |
+
print(sorted_percentages)
|
116 |
sorted_percentages = dict(sorted(sorted_percentages.items(), key=lambda item: item[1], reverse=True))
|
117 |
|
118 |
return predicted_class, sorted_percentages
|
119 |
|
120 |
+
def predict_class_for_Neptune(text,model):
|
121 |
+
# Tokenize the text
|
122 |
+
encoded_text = transform_for_inference_text(text, tokenizer, 125, 125, 1, 2550)
|
123 |
+
batch, sentences = prepare_text(encoded_text)
|
124 |
+
|
125 |
+
# Process the text through the model
|
126 |
+
model.eval()
|
127 |
+
all_probabilities = []
|
128 |
+
with torch.no_grad():
|
129 |
+
for sample in batch:
|
130 |
+
input_ids = torch.tensor(sample[0], device=device, dtype=torch.long).unsqueeze(0)
|
131 |
+
segment_ids = torch.tensor(sample[1], device=device, dtype=torch.long).unsqueeze(0)
|
132 |
+
masked_pos = torch.tensor(sample[2], device=device, dtype=torch.long).unsqueeze(0)
|
133 |
+
|
134 |
+
_, _, logits_mclsf1, logits_mclsf2 = model(input_ids, segment_ids, masked_pos)
|
135 |
+
probabilities1 = torch.softmax(logits_mclsf1, dim=1)[0]
|
136 |
+
probabilities2 = torch.softmax(logits_mclsf2, dim=1)[0]
|
137 |
+
all_probabilities.extend([probabilities1, probabilities2])
|
138 |
+
|
139 |
+
# Aggregate probabilities
|
140 |
+
aggregated_probabilities = torch.stack(all_probabilities).mean(dim=0)
|
141 |
+
|
142 |
+
# Identify the majority class
|
143 |
+
predicted_class_index = torch.argmax(aggregated_probabilities).item()
|
144 |
+
predicted_class = class_labels[predicted_class_index]
|
145 |
+
|
146 |
+
# Create a sorted dictionary of percentages
|
147 |
+
sorted_percentages = {class_labels[idx]: aggregated_probabilities[idx].item() * 100 for idx in range(len(class_labels))}
|
148 |
+
sorted_percentages = dict(sorted(sorted_percentages.items(), key=lambda item: item[1], reverse=True))
|
149 |
+
|
150 |
+
return predicted_class, sorted_percentages
|
151 |
+
|
152 |
+
def predict_sentences_class(text,model):
|
153 |
+
# Tokenisation du texte
|
154 |
+
inputs = transform_list_of_texts([text], tokenizer, 510, 510, 1, 2550)
|
155 |
+
aligned_predictions = {}
|
156 |
+
|
157 |
+
# Passage du texte à travers le modèle
|
158 |
+
model.eval()
|
159 |
+
with torch.no_grad():
|
160 |
+
for i, sample in enumerate(inputs['input_ids']):
|
161 |
+
for j in range(len(sample)):
|
162 |
+
input_ids_tensor = sample[j].clone().detach().to(device).unsqueeze(0)
|
163 |
+
attention_mask_tensor = inputs['attention_mask'][i][j].clone().detach().to(device).unsqueeze(0)
|
164 |
+
|
165 |
+
# Decode the sentence
|
166 |
+
sentence = tokenizer.decode(input_ids_tensor[0], skip_special_tokens=True)
|
167 |
+
|
168 |
+
# Passage du texte à travers le modèle
|
169 |
+
outputs = model(input_ids=input_ids_tensor, attention_mask=attention_mask_tensor)
|
170 |
+
|
171 |
+
# Identification de la classe prédite
|
172 |
+
predicted_class_index = torch.argmax(outputs.logits, dim=1).item()
|
173 |
+
predicted_class = class_labels[predicted_class_index] # Get only the class name
|
174 |
+
|
175 |
+
# Ajouter la prédiction au dictionnaire
|
176 |
+
if sentence not in aligned_predictions:
|
177 |
+
aligned_predictions[sentence] = predicted_class
|
178 |
+
|
179 |
+
return aligned_predictions
|
180 |
+
|
181 |
+
|
182 |
def transform_list_of_texts(
|
183 |
texts: list[str],
|
184 |
tokenizer: PreTrainedTokenizerBase,
|
neptune_270_papers/neptune_270_papers/added_tokens.json
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
{
|
2 |
+
"<MULT>": 30522
|
3 |
+
}
|
neptune_270_papers/neptune_270_papers/model.pt
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:9bd3d3f2c5c88b820b8ed73cfe1bd889c4cdf399cd0a37514919186a6002571c
|
3 |
+
size 269574874
|
neptune_270_papers/neptune_270_papers/special_tokens_map.json
ADDED
@@ -0,0 +1,16 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
{
|
2 |
+
"additional_special_tokens": [
|
3 |
+
{
|
4 |
+
"content": "<MULT>",
|
5 |
+
"lstrip": false,
|
6 |
+
"normalized": false,
|
7 |
+
"rstrip": false,
|
8 |
+
"single_word": false
|
9 |
+
}
|
10 |
+
],
|
11 |
+
"cls_token": "[CLS]",
|
12 |
+
"mask_token": "[MASK]",
|
13 |
+
"pad_token": "[PAD]",
|
14 |
+
"sep_token": "[SEP]",
|
15 |
+
"unk_token": "[UNK]"
|
16 |
+
}
|
neptune_270_papers/neptune_270_papers/tokenizer.json
ADDED
The diff for this file is too large to render.
See raw diff
|
|
neptune_270_papers/neptune_270_papers/tokenizer_config.json
ADDED
@@ -0,0 +1,66 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
{
|
2 |
+
"added_tokens_decoder": {
|
3 |
+
"0": {
|
4 |
+
"content": "[PAD]",
|
5 |
+
"lstrip": false,
|
6 |
+
"normalized": false,
|
7 |
+
"rstrip": false,
|
8 |
+
"single_word": false,
|
9 |
+
"special": true
|
10 |
+
},
|
11 |
+
"100": {
|
12 |
+
"content": "[UNK]",
|
13 |
+
"lstrip": false,
|
14 |
+
"normalized": false,
|
15 |
+
"rstrip": false,
|
16 |
+
"single_word": false,
|
17 |
+
"special": true
|
18 |
+
},
|
19 |
+
"101": {
|
20 |
+
"content": "[CLS]",
|
21 |
+
"lstrip": false,
|
22 |
+
"normalized": false,
|
23 |
+
"rstrip": false,
|
24 |
+
"single_word": false,
|
25 |
+
"special": true
|
26 |
+
},
|
27 |
+
"102": {
|
28 |
+
"content": "[SEP]",
|
29 |
+
"lstrip": false,
|
30 |
+
"normalized": false,
|
31 |
+
"rstrip": false,
|
32 |
+
"single_word": false,
|
33 |
+
"special": true
|
34 |
+
},
|
35 |
+
"103": {
|
36 |
+
"content": "[MASK]",
|
37 |
+
"lstrip": false,
|
38 |
+
"normalized": false,
|
39 |
+
"rstrip": false,
|
40 |
+
"single_word": false,
|
41 |
+
"special": true
|
42 |
+
},
|
43 |
+
"30522": {
|
44 |
+
"content": "<MULT>",
|
45 |
+
"lstrip": false,
|
46 |
+
"normalized": false,
|
47 |
+
"rstrip": false,
|
48 |
+
"single_word": false,
|
49 |
+
"special": true
|
50 |
+
}
|
51 |
+
},
|
52 |
+
"additional_special_tokens": [
|
53 |
+
"<MULT>"
|
54 |
+
],
|
55 |
+
"clean_up_tokenization_spaces": true,
|
56 |
+
"cls_token": "[CLS]",
|
57 |
+
"do_lower_case": true,
|
58 |
+
"mask_token": "[MASK]",
|
59 |
+
"model_max_length": 512,
|
60 |
+
"pad_token": "[PAD]",
|
61 |
+
"sep_token": "[SEP]",
|
62 |
+
"strip_accents": null,
|
63 |
+
"tokenize_chinese_chars": true,
|
64 |
+
"tokenizer_class": "DistilBertTokenizer",
|
65 |
+
"unk_token": "[UNK]"
|
66 |
+
}
|
neptune_270_papers/neptune_270_papers/unique_labels.json
ADDED
@@ -0,0 +1 @@
|
|
|
|
|
1 |
+
["Physics", "Societies", "admsci", "agriculture", "ai", "applsci", "asi", "biology", "economies", "energies", "environments", "make", "mathematics", "robotics", "sports", "technologies", "vehicles"]
|
neptune_270_papers/neptune_270_papers/vocab.txt
ADDED
The diff for this file is too large to render.
See raw diff
|
|
static/css/style2.css
CHANGED
@@ -35,7 +35,7 @@
|
|
35 |
--bs-gray-700: #495057;
|
36 |
--bs-gray-800: #343a40;
|
37 |
--bs-gray-900: #212529;
|
38 |
-
--bs-primary: #
|
39 |
--bs-secondary: #14191e;
|
40 |
--bs-success: #0cbc87;
|
41 |
--bs-info: #4f9ef8;
|
@@ -8849,7 +8849,7 @@ textarea.form-control-lg {
|
|
8849 |
|
8850 |
.bg-primary {
|
8851 |
--bs-bg-opacity: 1;
|
8852 |
-
background-color:
|
8853 |
}
|
8854 |
|
8855 |
|
|
|
35 |
--bs-gray-700: #495057;
|
36 |
--bs-gray-800: #343a40;
|
37 |
--bs-gray-900: #212529;
|
38 |
+
--bs-primary: #5e7cc8;
|
39 |
--bs-secondary: #14191e;
|
40 |
--bs-success: #0cbc87;
|
41 |
--bs-info: #4f9ef8;
|
|
|
8849 |
|
8850 |
.bg-primary {
|
8851 |
--bs-bg-opacity: 1;
|
8852 |
+
background-color: var(--bs-primary) !important;
|
8853 |
}
|
8854 |
|
8855 |
|
static/js/pdf.js
CHANGED
@@ -7,7 +7,9 @@ const categoryResult = document.getElementById("category-result")
|
|
7 |
const reset = document.getElementById("reset");
|
8 |
const imagePreview = dropArea.querySelector("#image-preview");
|
9 |
const currentClassProbabilitiesList = document.getElementById("class-probabilities");
|
|
|
10 |
const currentPredictedClass = document.getElementById('predicted-class')
|
|
|
11 |
const sentencePredictions = document.getElementById('classifiedText')
|
12 |
const staticDiv = document.getElementById("static");
|
13 |
const dynamicDiv = document.getElementById("dynamic");
|
@@ -115,8 +117,10 @@ form.addEventListener("submit", (event) => {
|
|
115 |
ocrResult.value = resultTextArea.value;
|
116 |
const classProbabilitiesList = responseDOM.getElementById("class-probabilities");
|
117 |
currentClassProbabilitiesList.innerHTML = classProbabilitiesList.innerHTML;
|
|
|
118 |
const PredictedClass = responseDOM.getElementById("predicted-class")
|
119 |
currentPredictedClass.innerHTML = PredictedClass.innerHTML;
|
|
|
120 |
document.getElementById('transcribedText').innerHTML = responseDOM.getElementById('transcribedText').innerHTML;
|
121 |
const sentencePredictionsResponse= responseDOM.getElementById('classifiedText').innerHTML;
|
122 |
sentencePredictions.innerHTML = sentencePredictionsResponse;
|
|
|
7 |
const reset = document.getElementById("reset");
|
8 |
const imagePreview = dropArea.querySelector("#image-preview");
|
9 |
const currentClassProbabilitiesList = document.getElementById("class-probabilities");
|
10 |
+
const currentClassProbabilitiesList_resultSection = document.getElementById("class-probabilities-result-section");
|
11 |
const currentPredictedClass = document.getElementById('predicted-class')
|
12 |
+
const currentPredictedClass_resultSection = document.getElementById('predicted-class-result-section')
|
13 |
const sentencePredictions = document.getElementById('classifiedText')
|
14 |
const staticDiv = document.getElementById("static");
|
15 |
const dynamicDiv = document.getElementById("dynamic");
|
|
|
117 |
ocrResult.value = resultTextArea.value;
|
118 |
const classProbabilitiesList = responseDOM.getElementById("class-probabilities");
|
119 |
currentClassProbabilitiesList.innerHTML = classProbabilitiesList.innerHTML;
|
120 |
+
currentClassProbabilitiesList_resultSection.innerHTML = classProbabilitiesList.innerHTML;
|
121 |
const PredictedClass = responseDOM.getElementById("predicted-class")
|
122 |
currentPredictedClass.innerHTML = PredictedClass.innerHTML;
|
123 |
+
currentPredictedClass_resultSection.innerHTML = PredictedClass.innerHTML;
|
124 |
document.getElementById('transcribedText').innerHTML = responseDOM.getElementById('transcribedText').innerHTML;
|
125 |
const sentencePredictionsResponse= responseDOM.getElementById('classifiedText').innerHTML;
|
126 |
sentencePredictions.innerHTML = sentencePredictionsResponse;
|
static/js/pdf_fr.js
CHANGED
@@ -7,8 +7,9 @@ const categoryResult = document.getElementById("category-result")
|
|
7 |
const reset = document.getElementById("reset");
|
8 |
const imagePreview = dropArea.querySelector("#image-preview");
|
9 |
const currentClassProbabilitiesList = document.getElementById("class-probabilities");
|
10 |
-
const
|
11 |
-
const
|
|
|
12 |
const staticDiv = document.getElementById("static");
|
13 |
const dynamicDiv = document.getElementById("dynamic");
|
14 |
const sentenceResultDiv = document.getElementById("dynamicResult");
|
@@ -115,6 +116,7 @@ form.addEventListener("submit", (event) => {
|
|
115 |
ocrResult.value = resultTextArea.value;
|
116 |
const classProbabilitiesList = responseDOM.getElementById("class-probabilities");
|
117 |
currentClassProbabilitiesList.innerHTML = classProbabilitiesList.innerHTML;
|
|
|
118 |
const PredictedClass = responseDOM.getElementById("predicted-class")
|
119 |
currentPredictedClass.innerHTML = PredictedClass.innerHTML;
|
120 |
document.getElementById('transcribedText').innerHTML = responseDOM.getElementById('transcribedText').innerHTML;
|
|
|
7 |
const reset = document.getElementById("reset");
|
8 |
const imagePreview = dropArea.querySelector("#image-preview");
|
9 |
const currentClassProbabilitiesList = document.getElementById("class-probabilities");
|
10 |
+
const currentClassProbabilitiesList_resultSection = document.getElementById("class-probabilities-result-section");
|
11 |
+
const currentPredictedClass = document.getElementById('predicted-class');
|
12 |
+
const sentencePredictions = document.getElementById('classifiedText');
|
13 |
const staticDiv = document.getElementById("static");
|
14 |
const dynamicDiv = document.getElementById("dynamic");
|
15 |
const sentenceResultDiv = document.getElementById("dynamicResult");
|
|
|
116 |
ocrResult.value = resultTextArea.value;
|
117 |
const classProbabilitiesList = responseDOM.getElementById("class-probabilities");
|
118 |
currentClassProbabilitiesList.innerHTML = classProbabilitiesList.innerHTML;
|
119 |
+
currentClassProbabilitiesList_resultSection.innerHTML = classProbabilitiesList.innerHTML;
|
120 |
const PredictedClass = responseDOM.getElementById("predicted-class")
|
121 |
currentPredictedClass.innerHTML = PredictedClass.innerHTML;
|
122 |
document.getElementById('transcribedText').innerHTML = responseDOM.getElementById('transcribedText').innerHTML;
|
templates/pdf.html
CHANGED
@@ -139,11 +139,101 @@
|
|
139 |
|
140 |
</div>
|
141 |
<div class="row d-sm-flex justify-content-center mb-5">
|
142 |
-
<div class="col-11">
|
143 |
-
<div class="row
|
144 |
<h1 id="resultsHeading" class="text-center text-white-50">Results</h1>
|
145 |
-
<div class="col-
|
146 |
-
<div class="
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
147 |
<div class="card-body">
|
148 |
<h5 class="card-title text-white mb-3">Transcribed and Classified Text</h5>
|
149 |
<div id="transcribedText" class="text-white-50 mb-4"></div>
|
@@ -163,6 +253,7 @@
|
|
163 |
</div>
|
164 |
</div>
|
165 |
</div>
|
|
|
166 |
</div>
|
167 |
</div>
|
168 |
|
|
|
139 |
|
140 |
</div>
|
141 |
<div class="row d-sm-flex justify-content-center mb-5">
|
142 |
+
<div class="col-11 d-none" id="dynamicResult">
|
143 |
+
<div class="row " >
|
144 |
<h1 id="resultsHeading" class="text-center text-white-50">Results</h1>
|
145 |
+
<div class="col-4 " >
|
146 |
+
<div class="nav flex-column position-sticky top-10 shadow-lg"
|
147 |
+
style="background-color: #222424;">
|
148 |
+
<div class="card" style="background-color: #303131; border: 2px dashed rgb(82, 82, 82);">
|
149 |
+
<div class="card-body ">
|
150 |
+
<h4 class="card-title text-white">Text's Category</h4>
|
151 |
+
<div class="row d-flex">
|
152 |
+
<div class="col-sm-7 col-6">
|
153 |
+
<ul class="graphl-legend-rectangle" id="class-probabilities-result-section">
|
154 |
+
{% for class_label, probability in class_probabilities.items() %}
|
155 |
+
{% if loop.index <= 5 %} <li class="text-white-50">
|
156 |
+
<span class="bg-{{ class_label[1] }}"></span>
|
157 |
+
<div class="d-flex justify-content-center">
|
158 |
+
{{ class_label[0] }}:
|
159 |
+
<span class="text-white w-100"> {{ "%.2f" % probability
|
160 |
+
}}%</span>
|
161 |
+
</div>
|
162 |
+
</li>
|
163 |
+
{% endif %}
|
164 |
+
{% endfor %}
|
165 |
+
</ul>
|
166 |
+
</div>
|
167 |
+
<div class="col-sm-5 grid-margin col-6">
|
168 |
+
<canvas class="bestSellers" data-chart='{{ chart_data | tojson | safe }}'
|
169 |
+
id="bestSellers#"></canvas>
|
170 |
+
</div>
|
171 |
+
</div>
|
172 |
+
<div class="mb-lg-0 text-white-50">
|
173 |
+
la classe la plus dominante est <span class="fw-bolder text-white"
|
174 |
+
id="predicted-class-result-section">{{
|
175 |
+
predicted_class[0]
|
176 |
+
}}</span>
|
177 |
+
</div>
|
178 |
+
</div>
|
179 |
+
</div>
|
180 |
+
|
181 |
+
<div class="card my-auto mt-3"
|
182 |
+
style="background-color: #303131; border: 2px dashed rgb(82, 82, 82);">
|
183 |
+
<div class="card-body">
|
184 |
+
<h4 class="card-title text-white">Legend</h4>
|
185 |
+
<div class="row d-flex">
|
186 |
+
<div class="col-sm-6 col-6">
|
187 |
+
<ul class="graphl-legend-rectangle">
|
188 |
+
<li class="text-white-50"><span class="bg-info "></span>vehicles
|
189 |
+
</li>
|
190 |
+
<li class="text-white-50"><span class="bg-success"></span>environments
|
191 |
+
</li>
|
192 |
+
<li class="text-white-50"><span class="bg-danger"></span>energies
|
193 |
+
</li>
|
194 |
+
<li class="text-white-50"><span class="bg-primary"></span>Physics
|
195 |
+
</li>
|
196 |
+
<li class="text-white-50"><span class="bg-moss"></span>robotics
|
197 |
+
</li>
|
198 |
+
<li class="text-white-50"><span class="bg-agri"></span>agriculture
|
199 |
+
</li>
|
200 |
+
<li class="text-white-50"><span class="bg-yellow"></span>ML
|
201 |
+
</li>
|
202 |
+
<li class="text-white-50"><span class="bg-warning"></span>economies
|
203 |
+
</li>
|
204 |
+
<li class="text-white-50"><span class="bg-vanila"></span>technologies
|
205 |
+
</li>
|
206 |
+
</ul>
|
207 |
+
</div>
|
208 |
+
<div class="col-sm-6 col-6">
|
209 |
+
<ul class="graphl-legend-rectangle">
|
210 |
+
|
211 |
+
<li class="text-white-50"><span class="bg-coffe"></span>mathematics
|
212 |
+
</li>
|
213 |
+
<li class="text-white-50"><span class="bg-orange "></span>sports
|
214 |
+
</li>
|
215 |
+
<li class="text-white-50"><span class="bg-cyan"></span>AI
|
216 |
+
</li>
|
217 |
+
<li class="text-white-50"><span class="bg-rosy"></span>Innovation
|
218 |
+
</li>
|
219 |
+
<li class="text-white-50"><span class="bg-picton"></span>Science
|
220 |
+
</li>
|
221 |
+
<li class="text-white-50"><span class="bg-purple"></span>Societies
|
222 |
+
</li>
|
223 |
+
<li class="text-white-50"><span class="bg-pink"></span>administration
|
224 |
+
</li>
|
225 |
+
<li class="text-white-50"><span class="bg-cambridge"></span>biology
|
226 |
+
</li>
|
227 |
+
</ul>
|
228 |
+
</div>
|
229 |
+
</div>
|
230 |
+
</div>
|
231 |
+
</div>
|
232 |
+
</div>
|
233 |
+
|
234 |
+
</div>
|
235 |
+
<div class="col-8">
|
236 |
+
<div class="card " style="background-color: #303131; border: 2px dashed rgb(82, 82, 82);">
|
237 |
<div class="card-body">
|
238 |
<h5 class="card-title text-white mb-3">Transcribed and Classified Text</h5>
|
239 |
<div id="transcribedText" class="text-white-50 mb-4"></div>
|
|
|
253 |
</div>
|
254 |
</div>
|
255 |
</div>
|
256 |
+
|
257 |
</div>
|
258 |
</div>
|
259 |
|
templates/pdf_fr.html
CHANGED
@@ -46,11 +46,13 @@
|
|
46 |
<a class="nav-item nav-link " href="voice_fr">SLU</a>
|
47 |
</li>
|
48 |
<li class="nav-item dropdown">
|
49 |
-
|
50 |
-
|
51 |
-
|
52 |
-
|
53 |
-
|
|
|
|
|
54 |
</ul>
|
55 |
</div>
|
56 |
<!-- Main navbar END -->
|
@@ -85,12 +87,13 @@
|
|
85 |
<div class="col-12 d-flex justify-content-center">
|
86 |
<div class="col-sm-12 d-inline align-items-center" style="height: 175px; width: 260px;">
|
87 |
<div class="flex-shrink-0 avatar avatar-lg me-2 mb-3 mt-4">
|
88 |
-
<img class="avatar-img rounded-circle"
|
89 |
-
|
90 |
</div>
|
91 |
-
<h5 class="card-title text-white-50">Obtenez plus d'informations sur votre pdf
|
|
|
92 |
</div>
|
93 |
-
|
94 |
</div>
|
95 |
</div>
|
96 |
<div class="card-body d-none" id="dynamic">
|
@@ -133,13 +136,99 @@
|
|
133 |
</div>
|
134 |
|
135 |
<!-- Right sidebar END -->
|
136 |
-
</div> <!-- Row END -->
|
137 |
</div>
|
138 |
<div class="row d-sm-flex justify-content-center mb-5">
|
139 |
-
<div class="col-11">
|
140 |
-
<div class="row
|
141 |
<h1 id="resultsHeading" class="text-center text-white-50">Results</h1>
|
142 |
-
<div class="col-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
143 |
<div class="card" style="background-color: #303131; border: 2px dashed rgb(82, 82, 82);">
|
144 |
<div class="card-body">
|
145 |
<h5 class="card-title text-white mb-3">Transcribed and Classified Text</h5>
|
|
|
46 |
<a class="nav-item nav-link " href="voice_fr">SLU</a>
|
47 |
</li>
|
48 |
<li class="nav-item dropdown">
|
49 |
+
<a class="nav-link" href="#" id="accounntMenu" data-bs-toggle="dropdown" aria-haspopup="true"
|
50 |
+
aria-expanded="false">langue</a>
|
51 |
+
<ul class="dropdown-menu" aria-labelledby="accounntMenu">
|
52 |
+
<li> <a class="dropdown-item " href="pdf"><img src="../static/icons/English.svg"
|
53 |
+
class="avatar avatar" style="height: 20px;" alt="French flag"> English</a> </li>
|
54 |
+
</ul>
|
55 |
+
</li>
|
56 |
</ul>
|
57 |
</div>
|
58 |
<!-- Main navbar END -->
|
|
|
87 |
<div class="col-12 d-flex justify-content-center">
|
88 |
<div class="col-sm-12 d-inline align-items-center" style="height: 175px; width: 260px;">
|
89 |
<div class="flex-shrink-0 avatar avatar-lg me-2 mb-3 mt-4">
|
90 |
+
<img class="avatar-img rounded-circle" src="../static/icons/logo_header_128x128.png"
|
91 |
+
alt="">
|
92 |
</div>
|
93 |
+
<h5 class="card-title text-white-50">Obtenez plus d'informations sur votre pdf 📑📑.
|
94 |
+
</h5>
|
95 |
</div>
|
96 |
+
|
97 |
</div>
|
98 |
</div>
|
99 |
<div class="card-body d-none" id="dynamic">
|
|
|
136 |
</div>
|
137 |
|
138 |
<!-- Right sidebar END -->
|
139 |
+
</div> <!-- Row END -->
|
140 |
</div>
|
141 |
<div class="row d-sm-flex justify-content-center mb-5">
|
142 |
+
<div class="col-11 d-none" id='dynamicResult'>
|
143 |
+
<div class="row" >
|
144 |
<h1 id="resultsHeading" class="text-center text-white-50">Results</h1>
|
145 |
+
<div class="col-4">
|
146 |
+
<div class="card" style="background-color: #303131; border: 2px dashed rgb(82, 82, 82);">
|
147 |
+
<div class="card-body">
|
148 |
+
<h4 class="card-title text-white">Catégorie du texte</h4>
|
149 |
+
<div class="row d-flex">
|
150 |
+
<div class="col-sm-7 col-6">
|
151 |
+
<ul class="graphl-legend-rectangle" id="class-probabilities-result-section">
|
152 |
+
{% for class_label, probability in class_probabilities.items() %}
|
153 |
+
{% if loop.index <= 5 %} <li class="text-white-50">
|
154 |
+
<span class="bg-{{ class_label[1] }}"></span>
|
155 |
+
<div class="d-flex justify-content-center">
|
156 |
+
{{ class_label[0] }}:
|
157 |
+
<span class="text-white w-100"> {{ "%.2f" % probability
|
158 |
+
}}%</span>
|
159 |
+
</div>
|
160 |
+
</li>
|
161 |
+
{% endif %}
|
162 |
+
{% endfor %}
|
163 |
+
</ul>
|
164 |
+
</div>
|
165 |
+
<div class="col-sm-5 grid-margin col-6">
|
166 |
+
<canvas class="bestSellers" data-chart='{{ chart_data | tojson | safe }}'
|
167 |
+
id="bestSellers#"></canvas>
|
168 |
+
</div>
|
169 |
+
</div>
|
170 |
+
<div class="mb-lg-0 text-white-50">
|
171 |
+
la classe la plus dominante est <span class="fw-bolder text-white"
|
172 |
+
id="predicted-class">{{
|
173 |
+
predicted_class[0]
|
174 |
+
}}</span>
|
175 |
+
</div>
|
176 |
+
</div>
|
177 |
+
</div>
|
178 |
+
|
179 |
+
<div class="card my-auto mt-3"
|
180 |
+
style="background-color: #303131; border: 2px dashed rgb(82, 82, 82);">
|
181 |
+
<div class="card-body">
|
182 |
+
<h4 class="card-title text-white">Légende</h4>
|
183 |
+
<div class="row d-flex">
|
184 |
+
<div class="col-sm-6 col-6">
|
185 |
+
<ul class="graphl-legend-rectangle">
|
186 |
+
<li class="text-white-50"><span class="bg-info "></span>vehicles
|
187 |
+
</li>
|
188 |
+
<li class="text-white-50"><span class="bg-success"></span>environments
|
189 |
+
</li>
|
190 |
+
<li class="text-white-50"><span class="bg-danger"></span>energies
|
191 |
+
</li>
|
192 |
+
<li class="text-white-50"><span class="bg-primary"></span>Physics
|
193 |
+
</li>
|
194 |
+
<li class="text-white-50"><span class="bg-moss"></span>robotics
|
195 |
+
</li>
|
196 |
+
<li class="text-white-50"><span class="bg-agri"></span>agriculture
|
197 |
+
</li>
|
198 |
+
<li class="text-white-50"><span class="bg-yellow"></span>ML
|
199 |
+
</li>
|
200 |
+
<li class="text-white-50"><span class="bg-warning"></span>economies
|
201 |
+
</li>
|
202 |
+
<li class="text-white-50"><span class="bg-vanila"></span>technologies
|
203 |
+
</li>
|
204 |
+
</ul>
|
205 |
+
</div>
|
206 |
+
<div class="col-sm-6 col-6">
|
207 |
+
<ul class="graphl-legend-rectangle">
|
208 |
+
|
209 |
+
<li class="text-white-50"><span class="bg-coffe"></span>mathematics
|
210 |
+
</li>
|
211 |
+
<li class="text-white-50"><span class="bg-orange "></span>sports
|
212 |
+
</li>
|
213 |
+
<li class="text-white-50"><span class="bg-cyan"></span>AI
|
214 |
+
</li>
|
215 |
+
<li class="text-white-50"><span class="bg-rosy"></span>Innovation
|
216 |
+
</li>
|
217 |
+
<li class="text-white-50"><span class="bg-picton"></span>Science
|
218 |
+
</li>
|
219 |
+
<li class="text-white-50"><span class="bg-purple"></span>Societies
|
220 |
+
</li>
|
221 |
+
<li class="text-white-50"><span class="bg-pink"></span>administration
|
222 |
+
</li>
|
223 |
+
<li class="text-white-50"><span class="bg-cambridge"></span>biology
|
224 |
+
</li>
|
225 |
+
</ul>
|
226 |
+
</div>
|
227 |
+
</div>
|
228 |
+
</div>
|
229 |
+
</div>
|
230 |
+
</div>
|
231 |
+
<div class="col-8">
|
232 |
<div class="card" style="background-color: #303131; border: 2px dashed rgb(82, 82, 82);">
|
233 |
<div class="card-body">
|
234 |
<h5 class="card-title text-white mb-3">Transcribed and Classified Text</h5>
|