MichalMlodawski commited on
Commit
fc4f693
1 Parent(s): 0f9e06f

Upload 10 files

Browse files
.gitattributes CHANGED
@@ -1,35 +1,35 @@
1
- *.7z filter=lfs diff=lfs merge=lfs -text
2
- *.arrow filter=lfs diff=lfs merge=lfs -text
3
- *.bin filter=lfs diff=lfs merge=lfs -text
4
- *.bz2 filter=lfs diff=lfs merge=lfs -text
5
- *.ckpt filter=lfs diff=lfs merge=lfs -text
6
- *.ftz filter=lfs diff=lfs merge=lfs -text
7
- *.gz filter=lfs diff=lfs merge=lfs -text
8
- *.h5 filter=lfs diff=lfs merge=lfs -text
9
- *.joblib filter=lfs diff=lfs merge=lfs -text
10
- *.lfs.* filter=lfs diff=lfs merge=lfs -text
11
- *.mlmodel filter=lfs diff=lfs merge=lfs -text
12
- *.model filter=lfs diff=lfs merge=lfs -text
13
- *.msgpack filter=lfs diff=lfs merge=lfs -text
14
- *.npy filter=lfs diff=lfs merge=lfs -text
15
- *.npz filter=lfs diff=lfs merge=lfs -text
16
- *.onnx filter=lfs diff=lfs merge=lfs -text
17
- *.ot filter=lfs diff=lfs merge=lfs -text
18
- *.parquet filter=lfs diff=lfs merge=lfs -text
19
- *.pb filter=lfs diff=lfs merge=lfs -text
20
- *.pickle filter=lfs diff=lfs merge=lfs -text
21
- *.pkl filter=lfs diff=lfs merge=lfs -text
22
- *.pt filter=lfs diff=lfs merge=lfs -text
23
- *.pth filter=lfs diff=lfs merge=lfs -text
24
- *.rar filter=lfs diff=lfs merge=lfs -text
25
- *.safetensors filter=lfs diff=lfs merge=lfs -text
26
- saved_model/**/* filter=lfs diff=lfs merge=lfs -text
27
- *.tar.* filter=lfs diff=lfs merge=lfs -text
28
- *.tar filter=lfs diff=lfs merge=lfs -text
29
- *.tflite filter=lfs diff=lfs merge=lfs -text
30
- *.tgz filter=lfs diff=lfs merge=lfs -text
31
- *.wasm filter=lfs diff=lfs merge=lfs -text
32
- *.xz filter=lfs diff=lfs merge=lfs -text
33
- *.zip filter=lfs diff=lfs merge=lfs -text
34
- *.zst filter=lfs diff=lfs merge=lfs -text
35
- *tfevents* filter=lfs diff=lfs merge=lfs -text
 
1
+ *.7z filter=lfs diff=lfs merge=lfs -text
2
+ *.arrow filter=lfs diff=lfs merge=lfs -text
3
+ *.bin filter=lfs diff=lfs merge=lfs -text
4
+ *.bz2 filter=lfs diff=lfs merge=lfs -text
5
+ *.ckpt filter=lfs diff=lfs merge=lfs -text
6
+ *.ftz filter=lfs diff=lfs merge=lfs -text
7
+ *.gz filter=lfs diff=lfs merge=lfs -text
8
+ *.h5 filter=lfs diff=lfs merge=lfs -text
9
+ *.joblib filter=lfs diff=lfs merge=lfs -text
10
+ *.lfs.* filter=lfs diff=lfs merge=lfs -text
11
+ *.mlmodel filter=lfs diff=lfs merge=lfs -text
12
+ *.model filter=lfs diff=lfs merge=lfs -text
13
+ *.msgpack filter=lfs diff=lfs merge=lfs -text
14
+ *.npy filter=lfs diff=lfs merge=lfs -text
15
+ *.npz filter=lfs diff=lfs merge=lfs -text
16
+ *.onnx filter=lfs diff=lfs merge=lfs -text
17
+ *.ot filter=lfs diff=lfs merge=lfs -text
18
+ *.parquet filter=lfs diff=lfs merge=lfs -text
19
+ *.pb filter=lfs diff=lfs merge=lfs -text
20
+ *.pickle filter=lfs diff=lfs merge=lfs -text
21
+ *.pkl filter=lfs diff=lfs merge=lfs -text
22
+ *.pt filter=lfs diff=lfs merge=lfs -text
23
+ *.pth filter=lfs diff=lfs merge=lfs -text
24
+ *.rar filter=lfs diff=lfs merge=lfs -text
25
+ *.safetensors filter=lfs diff=lfs merge=lfs -text
26
+ saved_model/**/* filter=lfs diff=lfs merge=lfs -text
27
+ *.tar.* filter=lfs diff=lfs merge=lfs -text
28
+ *.tar filter=lfs diff=lfs merge=lfs -text
29
+ *.tflite filter=lfs diff=lfs merge=lfs -text
30
+ *.tgz filter=lfs diff=lfs merge=lfs -text
31
+ *.wasm filter=lfs diff=lfs merge=lfs -text
32
+ *.xz filter=lfs diff=lfs merge=lfs -text
33
+ *.zip filter=lfs diff=lfs merge=lfs -text
34
+ *.zst filter=lfs diff=lfs merge=lfs -text
35
+ *tfevents* filter=lfs diff=lfs merge=lfs -text
README.md CHANGED
@@ -1,121 +1,121 @@
1
- ---
2
- license: cc-by-nc-nd-4.0
3
- language:
4
- - en
5
-
6
- model-index:
7
- - name: roberta-large Image Prompt Classifier
8
- results:
9
- - task:
10
- type: text-classification
11
- dataset:
12
- name: nsfw-text-detection
13
- type: custom
14
- metrics:
15
- - name: Accuracy
16
- type: self-reported
17
- value: 93%
18
- - name: Precision
19
- type: self-reported
20
- value: 88%
21
- - name: Recall
22
- type: self-reported
23
- value: 90%
24
- ---
25
-
26
- # roberta-large Image Prompt Classifier
27
-
28
- ## Model Overview
29
-
30
- This model is a fine-tuned version of `roberta-large` designed specifically for classifying image generation prompts into three distinct categories: SAFE, QUESTIONABLE, and UNSAFE. Leveraging the robust capabilities of the `roberta-large` architecture, this model ensures high accuracy and reliability in identifying the nature of prompts used for generating images.
31
-
32
- ## Model Details
33
-
34
- - **Model Name:** roberta-large Image Prompt Classifier
35
- - **Base Model:** [roberta-large](https://huggingface.co/roberta-large)
36
- - **Fine-tuned By:** Michał Młodawski
37
- - **Categories:**
38
- - `0`: SAFE
39
- - `1`: QUESTIONABLE
40
- - `2`: UNSAFE
41
-
42
- ## Use Cases
43
-
44
- This model is particularly useful for platforms and applications involving AI-generated content, where it is crucial to filter and classify prompts to maintain content safety and appropriateness. Some potential applications include:
45
-
46
- - **Content Moderation:** Automatically classify and filter prompts to prevent the generation of inappropriate or harmful images.
47
- - **User Safety:** Enhance user experience by ensuring that generated content adheres to safety guidelines.
48
- - **Compliance:** Help platforms comply with regulatory requirements by identifying and flagging potentially unsafe prompts.
49
-
50
- ## How It Works
51
-
52
- The model takes an input prompt and classifies it into one of three categories:
53
-
54
- 1. **SAFE:** Prompts that are deemed appropriate and free from harmful content.
55
- 2. **QUESTIONABLE:** Prompts that may require further review due to potential ambiguity or slight risk.
56
- 3. **UNSAFE:** Prompts that are likely to generate inappropriate or harmful content.
57
-
58
- The classification is based on the semantic understanding and contextual analysis provided by the `roberta-large` architecture, fine-tuned on a curated dataset tailored for this specific task.
59
-
60
- ## Performance
61
-
62
- Extensive testing and validation have been conducted to ensure the model's performance meets high standards of accuracy and reliability. Key metrics include:
63
-
64
- - **Accuracy:** 93%
65
- - **Precision:** 88%
66
- - **Recall:** 90%
67
-
68
- These metrics highlight the model's capability to effectively distinguish between different categories of prompts with high confidence.
69
-
70
- ## Getting Started
71
-
72
- To start using the roberta-large Image Prompt Classifier, you can easily integrate it into your existing workflows with the following steps:
73
-
74
- ### Installation
75
-
76
- pip install transformers==4.37.2
77
- pip install torch==2.3.1
78
-
79
- ### Usage
80
-
81
- import os
82
- from transformers import AutoTokenizer, AutoModelForSequenceClassification
83
- import torch
84
-
85
-
86
- model_repo = "MichalMlodawski/nsfw-text-detection-large"
87
-
88
- tokenizer = AutoTokenizer.from_pretrained(model_path)
89
- model = AutoModelForSequenceClassification.from_pretrained(model_path)
90
-
91
- def classify_text(text):
92
- inputs = tokenizer(text, return_tensors="pt", truncation=True, padding=True, max_length=512)
93
-
94
- with torch.no_grad():
95
- outputs = model(**inputs)
96
-
97
- logits = outputs.logits
98
- predicted_class = torch.argmax(logits, dim=1).item()
99
-
100
- return predicted_class
101
-
102
- if __name__ == "__main__":
103
- examples = [
104
- {"text": "wild west cowboy caravan through Arizona Desert , Men Woman and children, horse drawn carriages", "expected_class": 0},
105
- {"text": "A person with questionable intentions in a dimly lit alley.", "expected_class": 1},
106
- {"text": "An inappropriate scene involving explicit content.", "expected_class": 2}
107
- ]
108
-
109
- for example in examples:
110
- text = example["text"]
111
- expected_class = example["expected_class"]
112
- predicted_class = classify_text(text)
113
- print(f"Prompt: {text}")
114
- print(f"Predicted class: {predicted_class}")
115
- print(f"Expected class: {expected_class}")
116
- print("---")
117
-
118
- ### Disclaimer
119
-
120
- The dataset includes actual prompts for image generation that could be perceived as abusive, offensive, or obscene. Furthermore, the examples and data might contain unfavorable information about certain businesses. We merely collect this data and do not assume any legal responsibility.
121
- Please note: A portion of the data was created using Large Language Models (LLM).
 
1
+ ---
2
+ license: cc-by-nc-nd-4.0
3
+ language:
4
+ - en
5
+
6
+ model-index:
7
+ - name: roberta-large Image Prompt Classifier
8
+ results:
9
+ - task:
10
+ type: text-classification
11
+ dataset:
12
+ name: nsfw-text-detection
13
+ type: custom
14
+ metrics:
15
+ - name: Accuracy
16
+ type: self-reported
17
+ value: 93%
18
+ - name: Precision
19
+ type: self-reported
20
+ value: 88%
21
+ - name: Recall
22
+ type: self-reported
23
+ value: 90%
24
+ ---
25
+
26
+ # roberta-large Image Prompt Classifier
27
+
28
+ ## Model Overview
29
+
30
+ This model is a fine-tuned version of `roberta-large` designed specifically for classifying image generation prompts into three distinct categories: SAFE, QUESTIONABLE, and UNSAFE. Leveraging the robust capabilities of the `roberta-large` architecture, this model ensures high accuracy and reliability in identifying the nature of prompts used for generating images.
31
+
32
+ ## Model Details
33
+
34
+ - **Model Name:** roberta-large Image Prompt Classifier
35
+ - **Base Model:** [roberta-large](https://huggingface.co/roberta-large)
36
+ - **Fine-tuned By:** Michał Młodawski
37
+ - **Categories:**
38
+ - `0`: SAFE
39
+ - `1`: QUESTIONABLE
40
+ - `2`: UNSAFE
41
+
42
+ ## Use Cases
43
+
44
+ This model is particularly useful for platforms and applications involving AI-generated content, where it is crucial to filter and classify prompts to maintain content safety and appropriateness. Some potential applications include:
45
+
46
+ - **Content Moderation:** Automatically classify and filter prompts to prevent the generation of inappropriate or harmful images.
47
+ - **User Safety:** Enhance user experience by ensuring that generated content adheres to safety guidelines.
48
+ - **Compliance:** Help platforms comply with regulatory requirements by identifying and flagging potentially unsafe prompts.
49
+
50
+ ## How It Works
51
+
52
+ The model takes an input prompt and classifies it into one of three categories:
53
+
54
+ 1. **SAFE:** Prompts that are deemed appropriate and free from harmful content.
55
+ 2. **QUESTIONABLE:** Prompts that may require further review due to potential ambiguity or slight risk.
56
+ 3. **UNSAFE:** Prompts that are likely to generate inappropriate or harmful content.
57
+
58
+ The classification is based on the semantic understanding and contextual analysis provided by the `roberta-large` architecture, fine-tuned on a curated dataset tailored for this specific task.
59
+
60
+ ## Performance
61
+
62
+ Extensive testing and validation have been conducted to ensure the model's performance meets high standards of accuracy and reliability. Key metrics include:
63
+
64
+ - **Accuracy:** 93%
65
+ - **Precision:** 88%
66
+ - **Recall:** 90%
67
+
68
+ These metrics highlight the model's capability to effectively distinguish between different categories of prompts with high confidence.
69
+
70
+ ## Getting Started
71
+
72
+ To start using the roberta-large Image Prompt Classifier, you can easily integrate it into your existing workflows with the following steps:
73
+
74
+ ### Installation
75
+
76
+ pip install transformers==4.37.2
77
+ pip install torch==2.3.1
78
+
79
+ ### Usage
80
+
81
+ import os
82
+ from transformers import AutoTokenizer, AutoModelForSequenceClassification
83
+ import torch
84
+
85
+
86
+ model_repo = "MichalMlodawski/nsfw-text-detection-large"
87
+
88
+ tokenizer = AutoTokenizer.from_pretrained(model_path)
89
+ model = AutoModelForSequenceClassification.from_pretrained(model_path)
90
+
91
+ def classify_text(text):
92
+ inputs = tokenizer(text, return_tensors="pt", truncation=True, padding=True, max_length=512)
93
+
94
+ with torch.no_grad():
95
+ outputs = model(**inputs)
96
+
97
+ logits = outputs.logits
98
+ predicted_class = torch.argmax(logits, dim=1).item()
99
+
100
+ return predicted_class
101
+
102
+ if __name__ == "__main__":
103
+ examples = [
104
+ {"text": "wild west cowboy caravan through Arizona Desert , Men Woman and children, horse drawn carriages", "expected_class": 0},
105
+ {"text": "A person with questionable intentions in a dimly lit alley.", "expected_class": 1},
106
+ {"text": "An inappropriate scene involving explicit content.", "expected_class": 2}
107
+ ]
108
+
109
+ for example in examples:
110
+ text = example["text"]
111
+ expected_class = example["expected_class"]
112
+ predicted_class = classify_text(text)
113
+ print(f"Prompt: {text}")
114
+ print(f"Predicted class: {predicted_class}")
115
+ print(f"Expected class: {expected_class}")
116
+ print("---")
117
+
118
+ ### Disclaimer
119
+
120
+ The dataset includes actual prompts for image generation that could be perceived as abusive, offensive, or obscene. Furthermore, the examples and data might contain unfavorable information about certain businesses. We merely collect this data and do not assume any legal responsibility.
121
+ Please note: A portion of the data was created using Large Language Models (LLM).
config.json CHANGED
@@ -11,16 +11,16 @@
11
  "hidden_dropout_prob": 0.1,
12
  "hidden_size": 1024,
13
  "id2label": {
14
- "0": "SAFE",
15
- "1": "QUESTIONABLE",
16
- "2": "UNSAFE"
17
  },
18
  "initializer_range": 0.02,
19
  "intermediate_size": 4096,
20
  "label2id": {
21
- "SAFE": 0,
22
- "QUESTIONABLE": 1,
23
- "UNSAFE": 2
24
  },
25
  "layer_norm_eps": 1e-05,
26
  "max_position_embeddings": 514,
 
11
  "hidden_dropout_prob": 0.1,
12
  "hidden_size": 1024,
13
  "id2label": {
14
+ "0": "LABEL_0",
15
+ "1": "LABEL_1",
16
+ "2": "LABEL_2"
17
  },
18
  "initializer_range": 0.02,
19
  "intermediate_size": 4096,
20
  "label2id": {
21
+ "LABEL_0": 0,
22
+ "LABEL_1": 1,
23
+ "LABEL_2": 2
24
  },
25
  "layer_norm_eps": 1e-05,
26
  "max_position_embeddings": 514,
model.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:7ea8f0e9da471e19fa58ef0310a680708a0f63089e17886604ba0c20528df6c0
3
  size 1421499516
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:e6412737d149c0b28e5d1fb106b20dae7efc371a952bced0ef573095d54653a4
3
  size 1421499516
optimizer_scheduler_state.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:28311a8ef05854ed704749bf68a9f6279875692a2b488725afab26fb70a886a6
3
  size 2843250589
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:4353e043721a02ce4857a40bbfbfd05b98d3b756ff1b6f3ba873bbcd6b31bc8b
3
  size 2843250589
training_args.bin CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:77769e09e9c4ebbc63535129bf21c72d6b5c6920c5791de5052edc9787f9fc48
3
  size 4664
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:e6910f30f1620141f6f53c1d666baddaf2d4f9deded6048915534d87935996e3
3
  size 4664