Spaces:

VampeeHuntee
/

xlm-R-base_COVID19_NER

Sleeping

App Files Files Community

VampeeHuntee commited on Jul 2

Commit

cb1f9b3

•

1 Parent(s): ba8ed63

first commit

Browse files

Files changed (9) hide show

.env +3 -0
.gitignore +3 -0
Dockerfile +22 -0
app.py +26 -0
modules/model.py +56 -0
requirements.txt +7 -0
static/style.css +81 -0
templates/base.html +25 -0
templates/demo.html +31 -0

.env ADDED Viewed

	@@ -0,0 +1,3 @@


1	+ MODEL_NAME = "VampeeHuntee/xlm-roberta-base_baseline_syllables"
2	+
3	+ SAMPLE_TEXT = "Từ 24 - 7 đến 31 - 7 , bệnh nhân được mẹ là bà H.T.P ( 47 tuổi ) đón về nhà ở phường Phước Hoà ( bằng xe máy ) , không đi đâu chỉ ra Tạp hoá Phượng , chợ Vườn Lài , phường An Sơn cùng mẹ bán tạp hoá ở đây ."

.gitignore ADDED Viewed

	@@ -0,0 +1,3 @@

+__pycache__
+.vscode
+test.ipynb

Dockerfile ADDED Viewed

	@@ -0,0 +1,22 @@

+# read the doc: https://huggingface.co/docs/hub/spaces-sdks-docker
+# you will also find guides on how best to write your Dockerfile
+FROM python:3.9.12
+# The two following lines are requirements for the Dev Mode to be functional
+# Learn more about the Dev Mode at https://huggingface.co/dev-mode-explorers
+RUN useradd -m -u 1000 user
+WORKDIR /app
+COPY --chown=user ./requirements.txt requirements.txt
+RUN pip install --no-cache-dir --upgrade -r requirements.txt
+COPY --chown=user . /app
+USER user
+ENV HOME=/home/user \
+	PATH=/home/user/.local/bin:$PATH
+CMD ["gunicorn", "app:app", "--host", "0.0.0.0", "--port", "7860"]

app.py ADDED Viewed

	@@ -0,0 +1,26 @@

+import os
+from flask import Flask
+from flask import jsonify
+from flask import render_template
+from flask import request
+from modules.model import inference
+app = Flask(__name__)
+# app_dir = os.path.dirname(os.path.abspath(__file__))
+@app.route("/", defaults={"js": "demo"})
+def index(js):
+    return render_template(f"{js}.html", js=js)
+@app.route("/predict",  methods=["POST"])
+def predict():
+    sample = os.getenv('SAMPLE_TEXT')
+    text = request.form.get('text', sample, type=str)
+    text = sample if text == "" else text
+    return jsonify(result=inference(text))
+if __name__ == "__main__":
+    app.run(debug = True)

modules/model.py ADDED Viewed

	@@ -0,0 +1,56 @@

+import torch
+from transformers import AutoTokenizer, AutoModelForTokenClassification
+from seqeval.metrics.sequence_labeling import get_entities
+tokenizer = AutoTokenizer.from_pretrained("VampeeHuntee/xlm-roberta-base_baseline_syllables", use_fast = False)
+model = AutoModelForTokenClassification.from_pretrained("VampeeHuntee/xlm-roberta-base_baseline_syllables")
+def get_words(tokens:list[str]):
+    return tokenizer.convert_tokens_to_string(tokens)
+def format_result(tokens:list[str], labels:list[str]):
+    """Định dạng kết quả NER thành HTML với các thực thể được đánh dấu."""
+    formatted_output = ""
+    current_position = 0
+    start = 0
+    end = 0
+    EntitySpan = '<span class="entity" data-entity="{label}">{word} <span class="entity" label-entity="{label}">{label}</span></span>'
+    for label, start, end in get_entities(labels):
+        end += 1
+        entity = {
+            'word':get_words(tokens[start:end]),
+            'label':label
+        }
+        # Thêm phần văn bản trước thực thể (nếu có)
+        if start > current_position:
+            formatted_output += get_words(tokens[current_position: start])
+        # Thêm thực thể với thẻ span và nhãn
+        formatted_output += EntitySpan.format(**entity)
+        # Cập nhật vị trí hiện tại
+        current_position = end
+    # Thêm phần văn bản còn lại sau thực thể cuối cùng (nếu có)
+    if current_position < len(tokens):
+        formatted_output += get_words(tokens[current_position:])
+    return formatted_output
+def inference(text:str):
+    inputs = tokenizer(text, return_tensors="pt",add_special_tokens=False)
+    with torch.no_grad():
+        logits = model(**inputs).logits
+        predictions = torch.argmax(logits, dim=2)
+        labels = [model.config.id2label[t.item()] for t in predictions[0]]
+    tokens = [t.detach().numpy() for t in inputs['input_ids']]
+    tokens = tokenizer.convert_ids_to_tokens(tokens[0])
+    return format_result(tokens, labels)

requirements.txt ADDED Viewed

	@@ -0,0 +1,7 @@

+-f https://download.pytorch.org/whl/torch_stable.html
+torch==2.3.0+cpu
+transformers==4.38.0
+gunicorn
+Flask
+# dynoscale

static/style.css ADDED Viewed

	@@ -0,0 +1,81 @@

+/* Các kiểu dáng khác */
+body {
+    font-family: sans-serif;
+    text-align: center; /* Căn giữa nội dung */
+}
+textarea {
+    width: 80%;
+    height: 150px;
+    margin: 20px auto; /* Căn giữa textarea */
+    padding: 10px;
+    resize: vertical; /* Cho phép thay đổi kích thước theo chiều dọc */
+}
+button {
+    padding: 20px 70px;
+    background-color: #007bff; /* Màu xanh dương */
+    color: white;
+    border: none;
+    cursor: pointer;
+    border-radius: 5px; /* Bo góc */
+}
+#output {
+    margin-top: 20px;
+    padding: 10px;
+    border: 1px solid #ccc;
+    width: 80%;
+    margin: 20px auto; /* Căn giữa div kết quả */
+    text-align: left; /* Căn trái văn bản kết quả */
+    white-space: pre-line; /* Giữ nguyên định dạng xuống dòng */
+}
+/* Kiểu cho các thực thể được nhận diện */
+[data-entity] {
+    padding: 5px 10px;
+    border-radius: 5px;
+    display: inline-block;
+}
+[label-entity] {
+    font-size: x-small;
+    padding: 2px 4px;
+    border-radius: 5px;
+    display: inline-block;
+    color: white;
+}
+/* Màu sắc cho từng loại thực thể */
+[data-entity="PATIENT_ID"] { background-color: #85cef0; }
+[label-entity="PATIENT_ID"] { background-color: #01579B; }
+[data-entity="NAME"] { background-color: #ee7fa4 ; }
+[label-entity="NAME"] { background-color: #C2185B ; }
+[data-entity="GENDER"] { background-color: #ec9f78 ; }
+[label-entity="GENDER"] { background-color: #4E342E ; }
+[data-entity="AGE"] { background-color: #f7d978 ; }
+[label-entity="AGE"] { background-color: #F57F17 ; }
+[data-entity="JOB"] { background-color: #e06ff1 ; }
+[label-entity="JOB"] { background-color: #6A1B9A ; }
+[data-entity="LOCATION"] { background-color: #58fc66 ; }
+[label-entity="LOCATION"] { background-color: #1B5E20 ; }
+[data-entity="ORGANIZATION"] { background-color: #f17070 ; }
+[label-entity="ORGANIZATION"] { background-color: #424242 ; }
+[data-entity="DATE"] { background-color: #f3b552 ; }
+[label-entity="DATE"] { background-color: #E65100 ; }
+[data-entity="SYMPTOM_AND_DISEASE"] { background-color: #66e9e2 ; }
+[label-entity="SYMPTOM_AND_DISEASE"] { background-color: #006064 ; }
+[data-entity="TRANSPORTATION"] { background-color: #f06758 ; }
+[label-entity="TRANSPORTATION"] { background-color: #B71C1C ; }
+/*type_entities = ['PATIENT_ID', 'NAME', 'GENDER', 'AGE', 'JOB', 'LOCATION',
+                 'ORGANIZATION', 'DATE', 'SYMPTOM_AND_DISEASE', 'TRANSPORTATION']*/

templates/base.html ADDED Viewed

	@@ -0,0 +1,25 @@

+<!DOCTYPE html>
+<html>
+<head>
+    <link rel="stylesheet" href="{{ url_for('static', filename='style.css') }}">
+    <link rel="icon" href="data:,">
+    <title>Phân loại thực thể</title>
+</head>
+<body>
+    <p>{% block intro %}{% endblock %}</p>
+    <h1>{% block title %}{% endblock %}</h1>
+    <form method="POST", id="input">
+        <textarea name="text" placeholder="Nhập văn bản:"></textarea>
+        <button type="submit">Trích xuất thực thể</button>
+    </form>
+    <div id="output"></div>
+    {% block script %}{% endblock %}
+</body>
+</html>

templates/demo.html ADDED Viewed

	@@ -0,0 +1,31 @@

+{% extends 'base.html' %}
+{% block intro %}
+<a href="https://huggingface.co/VampeeHuntee"><code>VampireHuntee</code></a>
+  Đây là nơi để các mô hình trong khóa luận tốt nghiệp, bạn có thể sử dụng các mô hình public để thử.
+{% endblock %}
+{% block title %}
+  Phân loại thực thể
+{% endblock %}
+{% block script %}
+  <script>
+    function predictSubmit(ev) {
+      ev.preventDefault();
+      var request = new XMLHttpRequest();
+      request.addEventListener('load', resultShow);
+      request.open('POST',"{{ url_for('predict') }}");
+      request.send(new FormData(this));
+    }
+    function resultShow() {
+      var data = JSON.parse(this.responseText);
+      var output = document.getElementById('output');
+      output.innerHTML = data.result;
+    }
+    var form = document.getElementById('input');
+    form.addEventListener('submit', predictSubmit);
+  </script>
+{% endblock %}