3v324v23 commited on
Commit
c9019cd
1 Parent(s): 80b6ee2
This view is limited to 50 files because it contains too many changes.   See raw diff
Files changed (50) hide show
  1. .gitattributes +1 -0
  2. LICENSE +395 -0
  3. LICENSE_DEPENDENCIES +0 -0
  4. app.py +40 -0
  5. cli/core/__init__.py +9 -0
  6. cli/core/inference.py +530 -0
  7. cli/core/utils.py +195 -0
  8. cli/procs/__init__.py +12 -0
  9. cli/procs/base_proc.py +275 -0
  10. cli/procs/layout_extraction.py +79 -0
  11. cli/procs/line_ocr.py +86 -0
  12. cli/procs/page_deskew.py +85 -0
  13. cli/procs/page_separation.py +112 -0
  14. config.yml +45 -0
  15. docker/Dockerfile +36 -0
  16. docker/dockerbuild.sh +12 -0
  17. docker/dockerignore +4 -0
  18. docker/run_docker.sh +1 -0
  19. main.py +92 -0
  20. requirements.txt +13 -0
  21. src/deskew_HT/.gitignore +16 -0
  22. src/deskew_HT/LICENSE +432 -0
  23. src/deskew_HT/LICENSE_DEPENDENCIES +677 -0
  24. src/deskew_HT/README.md +75 -0
  25. src/deskew_HT/alyn3/deskew.py +163 -0
  26. src/deskew_HT/alyn3/skew_detect.py +388 -0
  27. src/deskew_HT/run_deskew.py +272 -0
  28. src/deskew_HT/setup.cfg +3 -0
  29. src/ndl_layout/.gitmodules +3 -0
  30. src/ndl_layout/LICENSE +395 -0
  31. src/ndl_layout/LICENSE_DEPENDENCIES +0 -0
  32. src/ndl_layout/README.md +92 -0
  33. src/ndl_layout/mmdetection/.dev_scripts/batch_test.py +212 -0
  34. src/ndl_layout/mmdetection/.dev_scripts/batch_test.sh +19 -0
  35. src/ndl_layout/mmdetection/.dev_scripts/benchmark_filter.py +158 -0
  36. src/ndl_layout/mmdetection/.dev_scripts/convert_benchmark_script.py +86 -0
  37. src/ndl_layout/mmdetection/.dev_scripts/gather_benchmark_metric.py +142 -0
  38. src/ndl_layout/mmdetection/.dev_scripts/gather_models.py +162 -0
  39. src/ndl_layout/mmdetection/.dev_scripts/linter.sh +3 -0
  40. src/ndl_layout/mmdetection/.gitignore +121 -0
  41. src/ndl_layout/mmdetection/.pre-commit-config.yaml +40 -0
  42. src/ndl_layout/mmdetection/.readthedocs.yml +7 -0
  43. src/ndl_layout/mmdetection/LICENSE +643 -0
  44. src/ndl_layout/mmdetection/LICENSE_DEPENDENCIES +0 -0
  45. src/ndl_layout/mmdetection/README.md +191 -0
  46. src/ndl_layout/mmdetection/README_zh-CN.md +190 -0
  47. src/ndl_layout/mmdetection/configs/_base_/datasets/cityscapes_detection.py +56 -0
  48. src/ndl_layout/mmdetection/configs/_base_/datasets/cityscapes_instance.py +56 -0
  49. src/ndl_layout/mmdetection/configs/_base_/datasets/coco_detection.py +49 -0
  50. src/ndl_layout/mmdetection/configs/_base_/datasets/coco_instance.py +49 -0
.gitattributes CHANGED
@@ -25,3 +25,4 @@ saved_model/**/* filter=lfs diff=lfs merge=lfs -text
25
  *.zip filter=lfs diff=lfs merge=lfs -text
26
  *.zstandard filter=lfs diff=lfs merge=lfs -text
27
  *tfevents* filter=lfs diff=lfs merge=lfs -text
 
 
25
  *.zip filter=lfs diff=lfs merge=lfs -text
26
  *.zstandard filter=lfs diff=lfs merge=lfs -text
27
  *tfevents* filter=lfs diff=lfs merge=lfs -text
28
+ *.png filter=lfs diff=lfs merge=lfs -text
LICENSE ADDED
@@ -0,0 +1,395 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ Copyright (c) 2022, National Diet Library, Japan.
2
+
3
+ Attribution 4.0 International
4
+
5
+ =======================================================================
6
+
7
+ Creative Commons Corporation ("Creative Commons") is not a law firm and
8
+ does not provide legal services or legal advice. Distribution of
9
+ Creative Commons public licenses does not create a lawyer-client or
10
+ other relationship. Creative Commons makes its licenses and related
11
+ information available on an "as-is" basis. Creative Commons gives no
12
+ warranties regarding its licenses, any material licensed under their
13
+ terms and conditions, or any related information. Creative Commons
14
+ disclaims all liability for damages resulting from their use to the
15
+ fullest extent possible.
16
+
17
+ Using Creative Commons Public Licenses
18
+
19
+ Creative Commons public licenses provide a standard set of terms and
20
+ conditions that creators and other rights holders may use to share
21
+ original works of authorship and other material subject to copyright
22
+ and certain other rights specified in the public license below. The
23
+ following considerations are for informational purposes only, are not
24
+ exhaustive, and do not form part of our licenses.
25
+
26
+ Considerations for licensors: Our public licenses are
27
+ intended for use by those authorized to give the public
28
+ permission to use material in ways otherwise restricted by
29
+ copyright and certain other rights. Our licenses are
30
+ irrevocable. Licensors should read and understand the terms
31
+ and conditions of the license they choose before applying it.
32
+ Licensors should also secure all rights necessary before
33
+ applying our licenses so that the public can reuse the
34
+ material as expected. Licensors should clearly mark any
35
+ material not subject to the license. This includes other CC-
36
+ licensed material, or material used under an exception or
37
+ limitation to copyright. More considerations for licensors:
38
+ wiki.creativecommons.org/Considerations_for_licensors
39
+
40
+ Considerations for the public: By using one of our public
41
+ licenses, a licensor grants the public permission to use the
42
+ licensed material under specified terms and conditions. If
43
+ the licensor's permission is not necessary for any reason--for
44
+ example, because of any applicable exception or limitation to
45
+ copyright--then that use is not regulated by the license. Our
46
+ licenses grant only permissions under copyright and certain
47
+ other rights that a licensor has authority to grant. Use of
48
+ the licensed material may still be restricted for other
49
+ reasons, including because others have copyright or other
50
+ rights in the material. A licensor may make special requests,
51
+ such as asking that all changes be marked or described.
52
+ Although not required by our licenses, you are encouraged to
53
+ respect those requests where reasonable. More_considerations
54
+ for the public:
55
+ wiki.creativecommons.org/Considerations_for_licensees
56
+
57
+ =======================================================================
58
+
59
+ Creative Commons Attribution 4.0 International Public License
60
+
61
+ By exercising the Licensed Rights (defined below), You accept and agree
62
+ to be bound by the terms and conditions of this Creative Commons
63
+ Attribution 4.0 International Public License ("Public License"). To the
64
+ extent this Public License may be interpreted as a contract, You are
65
+ granted the Licensed Rights in consideration of Your acceptance of
66
+ these terms and conditions, and the Licensor grants You such rights in
67
+ consideration of benefits the Licensor receives from making the
68
+ Licensed Material available under these terms and conditions.
69
+
70
+
71
+ Section 1 -- Definitions.
72
+
73
+ a. Adapted Material means material subject to Copyright and Similar
74
+ Rights that is derived from or based upon the Licensed Material
75
+ and in which the Licensed Material is translated, altered,
76
+ arranged, transformed, or otherwise modified in a manner requiring
77
+ permission under the Copyright and Similar Rights held by the
78
+ Licensor. For purposes of this Public License, where the Licensed
79
+ Material is a musical work, performance, or sound recording,
80
+ Adapted Material is always produced where the Licensed Material is
81
+ synched in timed relation with a moving image.
82
+
83
+ b. Adapter's License means the license You apply to Your Copyright
84
+ and Similar Rights in Your contributions to Adapted Material in
85
+ accordance with the terms and conditions of this Public License.
86
+
87
+ c. Copyright and Similar Rights means copyright and/or similar rights
88
+ closely related to copyright including, without limitation,
89
+ performance, broadcast, sound recording, and Sui Generis Database
90
+ Rights, without regard to how the rights are labeled or
91
+ categorized. For purposes of this Public License, the rights
92
+ specified in Section 2(b)(1)-(2) are not Copyright and Similar
93
+ Rights.
94
+
95
+ d. Effective Technological Measures means those measures that, in the
96
+ absence of proper authority, may not be circumvented under laws
97
+ fulfilling obligations under Article 11 of the WIPO Copyright
98
+ Treaty adopted on December 20, 1996, and/or similar international
99
+ agreements.
100
+
101
+ e. Exceptions and Limitations means fair use, fair dealing, and/or
102
+ any other exception or limitation to Copyright and Similar Rights
103
+ that applies to Your use of the Licensed Material.
104
+
105
+ f. Licensed Material means the artistic or literary work, database,
106
+ or other material to which the Licensor applied this Public
107
+ License.
108
+
109
+ g. Licensed Rights means the rights granted to You subject to the
110
+ terms and conditions of this Public License, which are limited to
111
+ all Copyright and Similar Rights that apply to Your use of the
112
+ Licensed Material and that the Licensor has authority to license.
113
+
114
+ h. Licensor means the individual(s) or entity(ies) granting rights
115
+ under this Public License.
116
+
117
+ i. Share means to provide material to the public by any means or
118
+ process that requires permission under the Licensed Rights, such
119
+ as reproduction, public display, public performance, distribution,
120
+ dissemination, communication, or importation, and to make material
121
+ available to the public including in ways that members of the
122
+ public may access the material from a place and at a time
123
+ individually chosen by them.
124
+
125
+ j. Sui Generis Database Rights means rights other than copyright
126
+ resulting from Directive 96/9/EC of the European Parliament and of
127
+ the Council of 11 March 1996 on the legal protection of databases,
128
+ as amended and/or succeeded, as well as other essentially
129
+ equivalent rights anywhere in the world.
130
+
131
+ k. You means the individual or entity exercising the Licensed Rights
132
+ under this Public License. Your has a corresponding meaning.
133
+
134
+
135
+ Section 2 -- Scope.
136
+
137
+ a. License grant.
138
+
139
+ 1. Subject to the terms and conditions of this Public License,
140
+ the Licensor hereby grants You a worldwide, royalty-free,
141
+ non-sublicensable, non-exclusive, irrevocable license to
142
+ exercise the Licensed Rights in the Licensed Material to:
143
+
144
+ a. reproduce and Share the Licensed Material, in whole or
145
+ in part; and
146
+
147
+ b. produce, reproduce, and Share Adapted Material.
148
+
149
+ 2. Exceptions and Limitations. For the avoidance of doubt, where
150
+ Exceptions and Limitations apply to Your use, this Public
151
+ License does not apply, and You do not need to comply with
152
+ its terms and conditions.
153
+
154
+ 3. Term. The term of this Public License is specified in Section
155
+ 6(a).
156
+
157
+ 4. Media and formats; technical modifications allowed. The
158
+ Licensor authorizes You to exercise the Licensed Rights in
159
+ all media and formats whether now known or hereafter created,
160
+ and to make technical modifications necessary to do so. The
161
+ Licensor waives and/or agrees not to assert any right or
162
+ authority to forbid You from making technical modifications
163
+ necessary to exercise the Licensed Rights, including
164
+ technical modifications necessary to circumvent Effective
165
+ Technological Measures. For purposes of this Public License,
166
+ simply making modifications authorized by this Section 2(a)
167
+ (4) never produces Adapted Material.
168
+
169
+ 5. Downstream recipients.
170
+
171
+ a. Offer from the Licensor -- Licensed Material. Every
172
+ recipient of the Licensed Material automatically
173
+ receives an offer from the Licensor to exercise the
174
+ Licensed Rights under the terms and conditions of this
175
+ Public License.
176
+
177
+ b. No downstream restrictions. You may not offer or impose
178
+ any additional or different terms or conditions on, or
179
+ apply any Effective Technological Measures to, the
180
+ Licensed Material if doing so restricts exercise of the
181
+ Licensed Rights by any recipient of the Licensed
182
+ Material.
183
+
184
+ 6. No endorsement. Nothing in this Public License constitutes or
185
+ may be construed as permission to assert or imply that You
186
+ are, or that Your use of the Licensed Material is, connected
187
+ with, or sponsored, endorsed, or granted official status by,
188
+ the Licensor or others designated to receive attribution as
189
+ provided in Section 3(a)(1)(A)(i).
190
+
191
+ b. Other rights.
192
+
193
+ 1. Moral rights, such as the right of integrity, are not
194
+ licensed under this Public License, nor are publicity,
195
+ privacy, and/or other similar personality rights; however, to
196
+ the extent possible, the Licensor waives and/or agrees not to
197
+ assert any such rights held by the Licensor to the limited
198
+ extent necessary to allow You to exercise the Licensed
199
+ Rights, but not otherwise.
200
+
201
+ 2. Patent and trademark rights are not licensed under this
202
+ Public License.
203
+
204
+ 3. To the extent possible, the Licensor waives any right to
205
+ collect royalties from You for the exercise of the Licensed
206
+ Rights, whether directly or through a collecting society
207
+ under any voluntary or waivable statutory or compulsory
208
+ licensing scheme. In all other cases the Licensor expressly
209
+ reserves any right to collect such royalties.
210
+
211
+
212
+ Section 3 -- License Conditions.
213
+
214
+ Your exercise of the Licensed Rights is expressly made subject to the
215
+ following conditions.
216
+
217
+ a. Attribution.
218
+
219
+ 1. If You Share the Licensed Material (including in modified
220
+ form), You must:
221
+
222
+ a. retain the following if it is supplied by the Licensor
223
+ with the Licensed Material:
224
+
225
+ i. identification of the creator(s) of the Licensed
226
+ Material and any others designated to receive
227
+ attribution, in any reasonable manner requested by
228
+ the Licensor (including by pseudonym if
229
+ designated);
230
+
231
+ ii. a copyright notice;
232
+
233
+ iii. a notice that refers to this Public License;
234
+
235
+ iv. a notice that refers to the disclaimer of
236
+ warranties;
237
+
238
+ v. a URI or hyperlink to the Licensed Material to the
239
+ extent reasonably practicable;
240
+
241
+ b. indicate if You modified the Licensed Material and
242
+ retain an indication of any previous modifications; and
243
+
244
+ c. indicate the Licensed Material is licensed under this
245
+ Public License, and include the text of, or the URI or
246
+ hyperlink to, this Public License.
247
+
248
+ 2. You may satisfy the conditions in Section 3(a)(1) in any
249
+ reasonable manner based on the medium, means, and context in
250
+ which You Share the Licensed Material. For example, it may be
251
+ reasonable to satisfy the conditions by providing a URI or
252
+ hyperlink to a resource that includes the required
253
+ information.
254
+
255
+ 3. If requested by the Licensor, You must remove any of the
256
+ information required by Section 3(a)(1)(A) to the extent
257
+ reasonably practicable.
258
+
259
+ 4. If You Share Adapted Material You produce, the Adapter's
260
+ License You apply must not prevent recipients of the Adapted
261
+ Material from complying with this Public License.
262
+
263
+
264
+ Section 4 -- Sui Generis Database Rights.
265
+
266
+ Where the Licensed Rights include Sui Generis Database Rights that
267
+ apply to Your use of the Licensed Material:
268
+
269
+ a. for the avoidance of doubt, Section 2(a)(1) grants You the right
270
+ to extract, reuse, reproduce, and Share all or a substantial
271
+ portion of the contents of the database;
272
+
273
+ b. if You include all or a substantial portion of the database
274
+ contents in a database in which You have Sui Generis Database
275
+ Rights, then the database in which You have Sui Generis Database
276
+ Rights (but not its individual contents) is Adapted Material; and
277
+
278
+ c. You must comply with the conditions in Section 3(a) if You Share
279
+ all or a substantial portion of the contents of the database.
280
+
281
+ For the avoidance of doubt, this Section 4 supplements and does not
282
+ replace Your obligations under this Public License where the Licensed
283
+ Rights include other Copyright and Similar Rights.
284
+
285
+
286
+ Section 5 -- Disclaimer of Warranties and Limitation of Liability.
287
+
288
+ a. UNLESS OTHERWISE SEPARATELY UNDERTAKEN BY THE LICENSOR, TO THE
289
+ EXTENT POSSIBLE, THE LICENSOR OFFERS THE LICENSED MATERIAL AS-IS
290
+ AND AS-AVAILABLE, AND MAKES NO REPRESENTATIONS OR WARRANTIES OF
291
+ ANY KIND CONCERNING THE LICENSED MATERIAL, WHETHER EXPRESS,
292
+ IMPLIED, STATUTORY, OR OTHER. THIS INCLUDES, WITHOUT LIMITATION,
293
+ WARRANTIES OF TITLE, MERCHANTABILITY, FITNESS FOR A PARTICULAR
294
+ PURPOSE, NON-INFRINGEMENT, ABSENCE OF LATENT OR OTHER DEFECTS,
295
+ ACCURACY, OR THE PRESENCE OR ABSENCE OF ERRORS, WHETHER OR NOT
296
+ KNOWN OR DISCOVERABLE. WHERE DISCLAIMERS OF WARRANTIES ARE NOT
297
+ ALLOWED IN FULL OR IN PART, THIS DISCLAIMER MAY NOT APPLY TO YOU.
298
+
299
+ b. TO THE EXTENT POSSIBLE, IN NO EVENT WILL THE LICENSOR BE LIABLE
300
+ TO YOU ON ANY LEGAL THEORY (INCLUDING, WITHOUT LIMITATION,
301
+ NEGLIGENCE) OR OTHERWISE FOR ANY DIRECT, SPECIAL, INDIRECT,
302
+ INCIDENTAL, CONSEQUENTIAL, PUNITIVE, EXEMPLARY, OR OTHER LOSSES,
303
+ COSTS, EXPENSES, OR DAMAGES ARISING OUT OF THIS PUBLIC LICENSE OR
304
+ USE OF THE LICENSED MATERIAL, EVEN IF THE LICENSOR HAS BEEN
305
+ ADVISED OF THE POSSIBILITY OF SUCH LOSSES, COSTS, EXPENSES, OR
306
+ DAMAGES. WHERE A LIMITATION OF LIABILITY IS NOT ALLOWED IN FULL OR
307
+ IN PART, THIS LIMITATION MAY NOT APPLY TO YOU.
308
+
309
+ c. The disclaimer of warranties and limitation of liability provided
310
+ above shall be interpreted in a manner that, to the extent
311
+ possible, most closely approximates an absolute disclaimer and
312
+ waiver of all liability.
313
+
314
+
315
+ Section 6 -- Term and Termination.
316
+
317
+ a. This Public License applies for the term of the Copyright and
318
+ Similar Rights licensed here. However, if You fail to comply with
319
+ this Public License, then Your rights under this Public License
320
+ terminate automatically.
321
+
322
+ b. Where Your right to use the Licensed Material has terminated under
323
+ Section 6(a), it reinstates:
324
+
325
+ 1. automatically as of the date the violation is cured, provided
326
+ it is cured within 30 days of Your discovery of the
327
+ violation; or
328
+
329
+ 2. upon express reinstatement by the Licensor.
330
+
331
+ For the avoidance of doubt, this Section 6(b) does not affect any
332
+ right the Licensor may have to seek remedies for Your violations
333
+ of this Public License.
334
+
335
+ c. For the avoidance of doubt, the Licensor may also offer the
336
+ Licensed Material under separate terms or conditions or stop
337
+ distributing the Licensed Material at any time; however, doing so
338
+ will not terminate this Public License.
339
+
340
+ d. Sections 1, 5, 6, 7, and 8 survive termination of this Public
341
+ License.
342
+
343
+
344
+ Section 7 -- Other Terms and Conditions.
345
+
346
+ a. The Licensor shall not be bound by any additional or different
347
+ terms or conditions communicated by You unless expressly agreed.
348
+
349
+ b. Any arrangements, understandings, or agreements regarding the
350
+ Licensed Material not stated herein are separate from and
351
+ independent of the terms and conditions of this Public License.
352
+
353
+
354
+ Section 8 -- Interpretation.
355
+
356
+ a. For the avoidance of doubt, this Public License does not, and
357
+ shall not be interpreted to, reduce, limit, restrict, or impose
358
+ conditions on any use of the Licensed Material that could lawfully
359
+ be made without permission under this Public License.
360
+
361
+ b. To the extent possible, if any provision of this Public License is
362
+ deemed unenforceable, it shall be automatically reformed to the
363
+ minimum extent necessary to make it enforceable. If the provision
364
+ cannot be reformed, it shall be severed from this Public License
365
+ without affecting the enforceability of the remaining terms and
366
+ conditions.
367
+
368
+ c. No term or condition of this Public License will be waived and no
369
+ failure to comply consented to unless expressly agreed to by the
370
+ Licensor.
371
+
372
+ d. Nothing in this Public License constitutes or may be interpreted
373
+ as a limitation upon, or waiver of, any privileges and immunities
374
+ that apply to the Licensor or You, including from the legal
375
+ processes of any jurisdiction or authority.
376
+
377
+
378
+ =======================================================================
379
+
380
+ Creative Commons is not a party to its public licenses.
381
+ Notwithstanding, Creative Commons may elect to apply one of its public
382
+ licenses to material it publishes and in those instances will be
383
+ considered the "Licensor." Except for the limited purpose of indicating
384
+ that material is shared under a Creative Commons public license or as
385
+ otherwise permitted by the Creative Commons policies published at
386
+ creativecommons.org/policies, Creative Commons does not authorize the
387
+ use of the trademark "Creative Commons" or any other trademark or logo
388
+ of Creative Commons without its prior written consent including,
389
+ without limitation, in connection with any unauthorized modifications
390
+ to any of its public licenses or any other arrangements,
391
+ understandings, or agreements concerning use of licensed material. For
392
+ the avoidance of doubt, this paragraph does not form part of the public
393
+ licenses.
394
+
395
+ Creative Commons may be contacted at creativecommons.org.
LICENSE_DEPENDENCIES ADDED
The diff for this file is too large to render. See raw diff
 
app.py ADDED
@@ -0,0 +1,40 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import os
2
+ import torch
3
+
4
+ print(torch.__version__)
5
+ torch_ver, cuda_ver = torch.__version__.split('+')
6
+ os.system(f'pip install mmcv-full==1.4.0 -f https://download.openmmlab.com/mmcv/dist/{cuda_ver}/torch{torch_ver}/index.html --no-cache-dir')
7
+ os.system('cd src/ndl_layout/mmdetection && python setup.py bdist_wheel && pip install dist/*.whl')
8
+ os.system('wget https://lab.ndl.go.jp/dataset/ndlocr/text_recognition/mojilist_NDL.txt -P ./src/text_recognition/models')
9
+ os.system('wget https://lab.ndl.go.jp/dataset/ndlocr/text_recognition/ndlenfixed64-mj0-synth1.pth -P ./src/text_recognition/models')
10
+ os.system('wget https://lab.ndl.go.jp/dataset/ndlocr/ndl_layout/ndl_layout_config.py -P ./src/ndl_layout/models')
11
+ os.system('wget https://lab.ndl.go.jp/dataset/ndlocr/ndl_layout/epoch_140_all_eql_bt.pth -P ./src/ndl_layout/models')
12
+ os.system('wget https://lab.ndl.go.jp/dataset/ndlocr/separate_pages_ssd/weights.hdf5 -P ./src/separate_pages_ssd/ssd_tools')
13
+ os.system("wget https://i.imgur.com/fSL1CGG.jpg")
14
+ os.environ["PYTHONPATH"]=os.environ["PYTHONPATH"]+":"+f"{os.getcwd()}/src/text_recognition/deep-text-recognition-benchmark"
15
+
16
+ import gradio as gr
17
+ from uuid import uuid4
18
+ from pathlib import Path
19
+
20
+
21
+ def inference(im):
22
+ dir_name = uuid4()
23
+ Path(f'{dir_name}/img').mkdir(parents=True)
24
+ im.save(f'{dir_name}/img/image.jpg')
25
+ os.system(f'python main.py infer {dir_name}/img/image.jpg {dir_name}_output -s f -i')
26
+ with open(f'{dir_name}_output/image/txt/image_main.txt') as f:
27
+ return f'{dir_name}_output/image/pred_img/image_L.jpg', f.read()
28
+
29
+ title = "NDLOCR"
30
+ description = "Gradio demo for NDLOCR. NDLOCR is a text recognition (OCR) Program."
31
+ article = "<p style='text-align: center'><a href='https://github.com/ndl-lab' target='_blank'>NDL Lab</a> | <a href='https://github.com/ndl-lab/ndlocr_cli' target='_blank'>NDLOCR Repo</a></p>"
32
+ gr.Interface(
33
+ inference,
34
+ gr.inputs.Image(label='image', type='pil'),
35
+ ['image', 'text'],
36
+ title=title,
37
+ description=description,
38
+ article=article,
39
+ examples=['fSL1CGG.jpg']
40
+ ).launch(enable_queue=True, cache_examples=True)
cli/core/__init__.py ADDED
@@ -0,0 +1,9 @@
 
 
 
 
 
 
 
 
 
 
1
+ # Copyright (c) 2022, National Diet Library, Japan
2
+ #
3
+ # This software is released under the CC BY 4.0.
4
+ # https://creativecommons.org/licenses/by/4.0/
5
+
6
+
7
+ from .inference import OcrInferencer
8
+
9
+ __all__ = ['OcrInferencer']
cli/core/inference.py ADDED
@@ -0,0 +1,530 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # Copyright (c) 2022, National Diet Library, Japan
2
+ #
3
+ # This software is released under the CC BY 4.0.
4
+ # https://creativecommons.org/licenses/by/4.0/
5
+
6
+
7
+ import copy
8
+ import cv2
9
+ import glob
10
+ import os
11
+ import pathlib
12
+ import sys
13
+ import time
14
+ import xml
15
+ import xml.etree.ElementTree as ET
16
+
17
+ from . import utils
18
+ from .. import procs
19
+
20
+ # Add import path for src modules
21
+ currentdir = pathlib.Path(__file__).resolve().parent
22
+ sys.path.append(str(currentdir) + "/../../src/separate_pages_ssd")
23
+ sys.path.append(str(currentdir) + "/../../src/ndl_layout")
24
+ sys.path.append(str(currentdir) + "/../../src/deskew_HT")
25
+ sys.path.append(str(currentdir) + "/../../src/text_recognition")
26
+
27
+ # supported image type list
28
+ supported_img_ext = ['.jpg', '.jpeg', '.jp2']
29
+
30
+
31
+ class OcrInferencer:
32
+ """
33
+ 推論実行時の関数や推論の設定値を保持します。
34
+
35
+ Attributes
36
+ ----------
37
+ full_proc_list : list
38
+ 全推論処理のリストです。
39
+ proc_list : list
40
+ 本実行処理における推論処理のリストです。
41
+ cfg : dict
42
+ 本実行処理における設定情報です。
43
+ """
44
+
45
+ def __init__(self, cfg):
46
+ """
47
+ Parameters
48
+ ----------
49
+ cfg : dict
50
+ 本実行処理における設定情報です。
51
+ """
52
+ # inference process class list in order
53
+ self.full_proc_list = [
54
+ procs.PageSeparation, # 0: ノド元分割 出力:(画像:あり、XML:なし、TXT:なし)
55
+ procs.PageDeskewProcess, # 1: 傾き補正 出力:(画像:あり、XML:なし、TXT:なし)
56
+ procs.LayoutExtractionProcess, # 2: レイアウト抽出 出力:(画像:あり、XML:あり、TXT:なし)
57
+ procs.LineOcrProcess, # 3: 文字認識(OCR) 出力:(画像:あり、XML:あり、TXT:あり)
58
+ ]
59
+ self.proc_list = self._create_proc_list(cfg)
60
+ self.cfg = cfg
61
+ self.time_statistics = []
62
+ self.xml_template = '<?xml version="1.0" encoding="utf-8" standalone="yes"?>\n<OCRDATASET></OCRDATASET>'
63
+
64
+ def run(self):
65
+ """
66
+ self.cfgに保存された設定に基づいた推論処理を実行します。
67
+ """
68
+ if len(self.cfg['input_dirs']) == 0:
69
+ print('[ERROR] Input directory list is empty', file=sys.stderr)
70
+ return
71
+
72
+ # input dir loop
73
+ for input_dir in self.cfg['input_dirs']:
74
+ if self.cfg['input_structure'] in ['t']:
75
+ single_outputdir_data_list = self._get_single_dir_data_from_tosho_data(input_dir)
76
+ else:
77
+ single_outputdir_data_list = self._get_single_dir_data(input_dir)
78
+
79
+ if single_outputdir_data_list is None:
80
+ print('[ERROR] Input data list is empty', file=sys.stderr)
81
+ continue
82
+ print(single_outputdir_data_list)
83
+ # do infer with input data for single output data dir
84
+ for single_outputdir_data in single_outputdir_data_list:
85
+ print(single_outputdir_data)
86
+ if single_outputdir_data is None:
87
+ continue
88
+ pred_list = self._infer(single_outputdir_data)
89
+
90
+ # save inferenced xml in xml directory
91
+ if (self.cfg['save_xml'] or self.cfg['partial_infer']) and (self.cfg['proc_range']['end'] > 1):
92
+ self._save_pred_xml(single_outputdir_data['output_dir'], [single_data['xml'] for single_data in pred_list])
93
+ if len(self.time_statistics) == 0:
94
+ print('================== NO VALID INFERENCE ==================')
95
+ else:
96
+ average = sum(self.time_statistics) / len(self.time_statistics)
97
+ print('================== PROCESSING TIME ==================')
98
+ print('Average processing time : {0} sec / image file '.format(average))
99
+ return
100
+
101
+ def _infer(self, single_outputdir_data):
102
+ """
103
+ self.cfgに保存された設定に基づき、XML一つ分のデータに対する推論処理を実行します。
104
+
105
+ Parameters
106
+ ----------
107
+ single_outputdir_data : dict
108
+ XML一つ分のデータ(基本的に1書籍分を想定)の入力データ情報。
109
+ 画像ファイルパスのリスト、それらに対応するXMLデータを含みます。
110
+
111
+ Returns
112
+ -------
113
+ pred_list : list
114
+ 1ページ分の推論結果を要素に持つ推論結果のリスト。
115
+ 各結果は辞書型で保持されています。
116
+ """
117
+ # single_outputdir_data dictionary include [key, value] pairs as below
118
+ # (xml is not always included)
119
+ # [key, value]: ['img', numpy.ndarray], ['xml', xml_tree]
120
+ pred_list = []
121
+ pred_xml_dict_for_dump = {}
122
+ if self.cfg['dump']:
123
+ dump_dir = os.path.join(single_outputdir_data['output_dir'], 'dump')
124
+ os.makedirs(dump_dir, exist_ok=True)
125
+
126
+ for proc in self.proc_list:
127
+ pred_xml_dict_for_dump[proc.proc_name] = []
128
+ proc_dump_dir = os.path.join(dump_dir, proc.proc_name)
129
+ os.makedirs(proc_dump_dir, exist_ok=True)
130
+
131
+ for img_path in single_outputdir_data['img_list']:
132
+ single_image_file_data = self._get_single_image_file_data(img_path, single_outputdir_data)
133
+ output_dir = single_outputdir_data['output_dir']
134
+ if single_image_file_data is None:
135
+ print('[ERROR] Failed to get single page input data for image:{0}'.format(img_path), file=sys.stderr)
136
+ continue
137
+
138
+ print('######## START PAGE INFERENCE PROCESS ########')
139
+ start_page = time.time()
140
+
141
+ for proc in self.proc_list:
142
+ single_page_output = []
143
+ for idx, single_data_input in enumerate(single_image_file_data):
144
+ single_data_output = proc.do(idx, single_data_input)
145
+ single_page_output.extend(single_data_output)
146
+ # save inference result data to dump
147
+ if self.cfg['dump'] and 'xml' in single_image_file_data[0].keys():
148
+ pred_xml_dict_for_dump[proc.proc_name].append(single_image_file_data[0]['xml'])
149
+
150
+ single_image_file_data = single_page_output
151
+
152
+ single_image_file_output = single_image_file_data
153
+ self.time_statistics.append(time.time() - start_page)
154
+
155
+ if self.cfg['save_image'] or self.cfg['partial_infer']:
156
+ # save inferenced result drawn image in pred_img directory
157
+ for single_data_output in single_image_file_output:
158
+ # save input image while partial inference
159
+ if self.cfg['partial_infer']:
160
+ img_output_dir = os.path.join(output_dir, 'img')
161
+ self._save_image(single_data_output['img'], single_data_output['img_file_name'], img_output_dir)
162
+
163
+ pred_img = self._create_result_image(single_data_output, self.proc_list[-1].proc_name)
164
+ img_output_dir = os.path.join(output_dir, 'pred_img')
165
+ self._save_image(pred_img, single_data_output['img_file_name'], img_output_dir)
166
+
167
+ # save inferenced result text for this page
168
+ if self.cfg['proc_range']['end'] > 2:
169
+ sum_main_txt = ''
170
+ sum_cap_txt = ''
171
+ for single_data_output in single_image_file_output:
172
+ main_txt, cap_txt = self._create_result_txt(single_data_output['xml'])
173
+ sum_main_txt += main_txt + '\n'
174
+ sum_cap_txt += sum_cap_txt + '\n'
175
+ self._save_pred_txt(sum_main_txt, sum_cap_txt, os.path.basename(img_path), single_outputdir_data['output_dir'])
176
+
177
+ # add inference result for single image file data to pred_list, including XML data
178
+ pred_list.extend(single_image_file_output)
179
+ print('######## END PAGE INFERENCE PROCESS ########')
180
+
181
+ return pred_list
182
+
183
+ def _get_single_dir_data(self, input_dir):
184
+ """
185
+ XML一つ分の入力データに関する情報を整理して取得します。
186
+
187
+ Parameters
188
+ ----------
189
+ input_dir : str
190
+ XML一つ分の入力データが保存されているディレクトリパスです。
191
+
192
+ Returns
193
+ -------
194
+ # Fixme
195
+ single_dir_data : dict
196
+ XML一つ分のデータ(基本的に1PID分を想定)の入力データ情報です。
197
+ 画像ファイルパスのリスト、それらに対応するXMLデータを含みます。
198
+ """
199
+ single_dir_data = {'input_dir': os.path.abspath(input_dir)}
200
+ single_dir_data['img_list'] = []
201
+
202
+ # get img list of input directory
203
+ if self.cfg['input_structure'] in ['w']:
204
+ for ext in supported_img_ext:
205
+ single_dir_data['img_list'].extend(sorted(glob.glob(os.path.join(input_dir, '*{0}'.format(ext)))))
206
+ elif self.cfg['input_structure'] in ['f']:
207
+ stem, ext = os.path.splitext(os.path.basename(input_dir))
208
+ if ext in supported_img_ext:
209
+ single_dir_data['img_list'] = [input_dir]
210
+ else:
211
+ print('[ERROR] This file is not supported type : {0}'.format(input_dir), file=sys.stderr)
212
+ elif not os.path.isdir(os.path.join(input_dir, 'img')):
213
+ print('[ERROR] Input img diretctory not found in {}'.format(input_dir), file=sys.stderr)
214
+ return None
215
+ else:
216
+ for ext in supported_img_ext:
217
+ single_dir_data['img_list'].extend(sorted(glob.glob(os.path.join(input_dir, 'img/*{0}'.format(ext)))))
218
+
219
+ # check xml file number and load xml data if needed
220
+ if self.cfg['proc_range']['start'] > 2:
221
+ if self.cfg['input_structure'] in ['f']:
222
+ print('[ERROR] Single image file input mode does not support partial inference wich need xml file input.', file=sys.stderr)
223
+ return None
224
+ input_xml = None
225
+ xml_file_list = glob.glob(os.path.join(input_dir, 'xml/*.xml'))
226
+ if len(xml_file_list) > 1:
227
+ print('[ERROR] Input xml file must be only one, but there is {0} xml files in {1}.'.format(
228
+ len(xml_file_list), os.path.join(self.cfg['input_root'], 'xml')), file=sys.stderr)
229
+ return None
230
+ elif len(xml_file_list) == 0:
231
+ print('[ERROR] There is no input xml files in {0}.'.format(os.path.join(input_dir, 'xml')), file=sys.stderr)
232
+ return None
233
+ else:
234
+ input_xml = xml_file_list[0]
235
+ try:
236
+ single_dir_data['xml'] = ET.parse(input_xml)
237
+ except xml.etree.ElementTree.ParseError as err:
238
+ print("[ERROR] XML parse error : {0}".format(input_xml), file=sys.stderr)
239
+ return None
240
+
241
+ # prepare output dir for inferensce result with this input dir
242
+ if self.cfg['input_structure'] in ['f']:
243
+ stem, ext = os.path.splitext(os.path.basename(input_dir))
244
+ output_dir = os.path.join(self.cfg['output_root'], stem)
245
+ elif self.cfg['input_structure'] in ['i', 's']:
246
+ dir_name = os.path.basename(input_dir)
247
+ output_dir = os.path.join(self.cfg['output_root'], dir_name)
248
+ elif self.cfg['input_structure'] in ['w']:
249
+ input_dir_names = input_dir.split('/')
250
+ dir_name = input_dir_names[-3][0] + input_dir_names[-2] + input_dir_names[-1]
251
+ output_dir = os.path.join(self.cfg['output_root'], dir_name)
252
+ else:
253
+ print('[ERROR] Unexpected input directory structure type: {}.'.format(self.cfg['input_structure']), file=sys.stderr)
254
+ return None
255
+
256
+ # output directory existance check
257
+ output_dir = utils.mkdir_with_duplication_check(output_dir)
258
+ single_dir_data['output_dir'] = output_dir
259
+
260
+ return [single_dir_data]
261
+
262
+ def _get_single_dir_data_from_tosho_data(self, input_dir):
263
+ """
264
+ XML一つ分の入力データに関する情報を整理して取得します。
265
+
266
+ Parameters
267
+ ----------
268
+ input_dir : str
269
+ tosho data形式のセクションごとのディレクトリパスです。
270
+
271
+ Returns
272
+ -------
273
+ single_dir_data_list : list
274
+ XML一つ分のデータ(基本的に1PID分を想定)の入力データ情報のリストです。
275
+ 1つの要素に画像ファイルパスのリスト、それらに対応するXMLデータを含みます。
276
+ """
277
+ single_dir_data_list = []
278
+
279
+ # get img list of input directory
280
+ tmp_img_list = sorted(glob.glob(os.path.join(input_dir, '*.jp2')))
281
+ tmp_img_list.extend(sorted(glob.glob(os.path.join(input_dir, '*.jpg'))))
282
+
283
+ pid_list = []
284
+ for img in tmp_img_list:
285
+ pid = os.path.basename(img).split('_')[0]
286
+ if pid not in pid_list:
287
+ pid_list.append(pid)
288
+
289
+ for pid in pid_list:
290
+ single_dir_data = {'input_dir': os.path.abspath(input_dir),
291
+ 'img_list': [img for img in tmp_img_list if os.path.basename(img).startswith(pid)]}
292
+
293
+ # prepare output dir for inferensce result with this input dir
294
+ output_dir = os.path.join(self.cfg['output_root'], pid)
295
+
296
+ # output directory existance check
297
+ os.makedirs(output_dir, exist_ok=True)
298
+ single_dir_data['output_dir'] = output_dir
299
+ single_dir_data_list.append(single_dir_data)
300
+
301
+ return single_dir_data_list
302
+
303
+ def _get_single_image_file_data(self, img_path, single_dir_data):
304
+ """
305
+ 1ページ分の入力データに関する情報を整理して取得します。
306
+
307
+ Parameters
308
+ ----------
309
+ img_path : str
310
+ 入力画像データのパスです。
311
+ single_dir_data : dict
312
+ 1書籍分の入力データに関する情報を保持する辞書型データです。
313
+ xmlファイルへのパス、結果を出力するディレクトリのパスなどを含みます。
314
+
315
+ Returns
316
+ -------
317
+ single_image_file_data : dict
318
+ 1ページ分のデータの入力データ情報です。
319
+ 画像ファイルのパスとnumpy.ndarray形式の画像データ、その画像に対応するXMLデータを含みます。
320
+ """
321
+ single_image_file_data = [{
322
+ 'img_path': img_path,
323
+ 'img_file_name': os.path.basename(img_path),
324
+ 'output_dir': single_dir_data['output_dir']
325
+ }]
326
+
327
+ full_xml = None
328
+ if 'xml' in single_dir_data.keys():
329
+ full_xml = single_dir_data['xml']
330
+
331
+ # get img data for single page
332
+ orig_img = cv2.imread(img_path)
333
+ if orig_img is None:
334
+ print('[ERROR] Image read error : {0}'.format(img_path), file=sys.stderr)
335
+ return None
336
+ single_image_file_data[0]['img'] = orig_img
337
+
338
+ # return if this proc needs only img data for input
339
+ if full_xml is None:
340
+ return single_image_file_data
341
+
342
+ # get xml data for single page
343
+ image_name = os.path.basename(img_path)
344
+ for page in full_xml.getroot().iter('PAGE'):
345
+ if page.attrib['IMAGENAME'] == image_name:
346
+ node = ET.fromstring(self.xml_template)
347
+ node.append(page)
348
+ tree = ET.ElementTree(node)
349
+ single_image_file_data[0]['xml'] = tree
350
+ break
351
+
352
+ # [TODO] 画像データに対応するXMLデータが見つからなかった場合の対応
353
+ if 'xml' not in single_image_file_data[0].keys():
354
+ print('[ERROR] Input XML data for page {} not found.'.format(img_path), file=sys.stderr)
355
+
356
+ return single_image_file_data
357
+
358
+ def _create_proc_list(self, cfg):
359
+ """
360
+ 推論の設定情報に基づき、実行する推論処理のリストを作成します。
361
+
362
+ Parameters
363
+ ----------
364
+ cfg : dict
365
+ 推論実行時の設定情報を保存した辞書型データ。
366
+ """
367
+ proc_list = []
368
+ for i in range(cfg['proc_range']['start'], cfg['proc_range']['end'] + 1):
369
+ proc_list.append(self.full_proc_list[i](cfg, i))
370
+ return proc_list
371
+
372
+ def _save_pred_xml(self, output_dir, pred_list):
373
+ """
374
+ 推論結果のXMLデータをまとめたXMLファイルを生成して保存します。
375
+
376
+ Parameters
377
+ ----------
378
+ output_dir : str
379
+ 推論結果を保存するディレクトリのパスです。
380
+ pred_list : list
381
+ 1ページ分の推論結果を要素に持つ推論結果のリスト。
382
+ 各結果は辞書型で保持されています。
383
+ """
384
+ xml_dir = os.path.join(output_dir, 'xml')
385
+ os.makedirs(xml_dir, exist_ok=True)
386
+
387
+ # basically, output_dir is supposed to be PID, so it used as xml filename
388
+ xml_path = os.path.join(xml_dir, '{}.xml'.format(os.path.basename(output_dir)))
389
+ pred_xml = self._parse_pred_list_to_save(pred_list)
390
+ utils.save_xml(pred_xml, xml_path)
391
+ return
392
+
393
+ def _save_image(self, pred_img, orig_img_name, img_output_dir, id=''):
394
+ """
395
+ 指定されたディレクトリに画像データを保存します。
396
+ 画像データは入力に使用したものと推論結果を重畳したものの2種類が想定されています。
397
+
398
+ Parameters
399
+ ----------
400
+ pred_img : numpy.ndarray
401
+ 保存する画像データ。
402
+ orig_img_name : str
403
+ もともとの入力画像のファイル名。
404
+ 基本的にはこのファイル名と同名で保存します。
405
+ img_output_dir : str
406
+ 画像ファイルの保存先のディレクトリパス。
407
+ id : str
408
+ もともとの入力画像のファイル名に追加する処理結果ごとのidです。
409
+ 一つの入力画像から複数の画像データが出力される処理がある場合に必要になります。
410
+ """
411
+ os.makedirs(img_output_dir, exist_ok=True)
412
+ stem, ext = os.path.splitext(orig_img_name)
413
+ orig_img_name = stem + '.jpg'
414
+
415
+ if id != '':
416
+ stem, ext = os.path.splitext(orig_img_name)
417
+ orig_img_name = stem + '_' + id + ext
418
+
419
+ img_path = os.path.join(img_output_dir, orig_img_name)
420
+ try:
421
+ cv2.imwrite(img_path, pred_img)
422
+ except OSError as err:
423
+ print("[ERROR] Image save error: {0}".format(err), file=sys.stderr)
424
+ raise OSError
425
+
426
+ return
427
+
428
+ def _save_pred_txt(self, main_txt, cap_txt, orig_img_name, output_dir):
429
+ """
430
+ 指定されたディレクトリに推論結果のテキストデータを保存します。
431
+
432
+ Parameters
433
+ ----------
434
+ main_txt : str
435
+ 本文+キャプションの推論結果のテキストデータです
436
+ cap_txt : str
437
+ キャプションのみの推論結果のテキストデータです
438
+ orig_img_name : str
439
+ もともとの入力画像ファイル名。
440
+ 基本的にはこのファイル名と同名で保存します。
441
+ img_output_dir : str
442
+ 画像ファイルの保存先のディレクトリパス。
443
+ """
444
+ txt_dir = os.path.join(output_dir, 'txt')
445
+ os.makedirs(txt_dir, exist_ok=True)
446
+
447
+ stem, _ = os.path.splitext(orig_img_name)
448
+ txt_path = os.path.join(txt_dir, stem + '_cap.txt')
449
+ try:
450
+ with open(txt_path, 'w') as f:
451
+ f.write(cap_txt)
452
+ except OSError as err:
453
+ print("[ERROR] Caption text save error: {0}".format(err), file=sys.stderr)
454
+ raise OSError
455
+
456
+ stem, _ = os.path.splitext(orig_img_name)
457
+ txt_path = os.path.join(txt_dir, stem + '_main.txt')
458
+ try:
459
+ with open(txt_path, 'w') as f:
460
+ f.write(main_txt)
461
+ except OSError as err:
462
+ print("[ERROR] Main text save error: {0}".format(err), file=sys.stderr)
463
+ raise OSError
464
+
465
+ return
466
+
467
+ def _parse_pred_list_to_save(self, pred_list):
468
+ """
469
+ 推論結果のXMLを要素に持つリストから、ファイルに保存するための一つのXMLデータを生成します。
470
+
471
+ Parameters
472
+ ----------
473
+ pred_list : list
474
+ 推論結果のXMLを要素に持つリスト。
475
+ """
476
+ ET.register_namespace('', 'NDLOCRDATASET')
477
+ node = ET.fromstring(self.xml_template)
478
+ for single_xml_tree in pred_list:
479
+ root = single_xml_tree.getroot()
480
+ for element in root:
481
+ node.append(element)
482
+
483
+ tree = ET.ElementTree(node)
484
+ return tree
485
+
486
+ def _create_result_image(self, result, proc_name):
487
+ """
488
+ 推論結果を入力画像に重畳した画像データを生成します。
489
+
490
+ Parameters
491
+ ----------
492
+ result : dict
493
+ 1ページ分の推論結果を持つ辞書型データ。
494
+ proc_name : str
495
+ 重畳を行う結果を出力した推論処理の名前。
496
+ """
497
+ if 'dump_img' in result.keys():
498
+ dump_img = copy.deepcopy(result['dump_img'])
499
+ else:
500
+ dump_img = copy.deepcopy(result['img'])
501
+ if 'xml' in result.keys() and result['xml'] is not None:
502
+ # draw inference result on input image
503
+ cv2.putText(dump_img, proc_name, (0, 50),
504
+ cv2.FONT_HERSHEY_PLAIN, 4, (0, 0, 0), 5, cv2.LINE_AA)
505
+ pass
506
+ else:
507
+ cv2.putText(dump_img, proc_name, (0, 50),
508
+ cv2.FONT_HERSHEY_PLAIN, 4, (0, 0, 0), 5, cv2.LINE_AA)
509
+ return dump_img
510
+
511
+ def _create_result_txt(self, xml_data):
512
+ """
513
+ 推論結果のxmlデータからテキストデータを生成します。
514
+
515
+ Parameters
516
+ ----------
517
+ xml_data :
518
+ 1ページ分の推論結果を持つxmlデータ。
519
+ """
520
+ main_txt = ''
521
+ cap_txt = ''
522
+ for page_xml in xml_data.iter('PAGE'):
523
+ for line_xml in page_xml.iter('LINE'):
524
+ main_txt += line_xml.attrib['STRING']
525
+ main_txt += '\n'
526
+ if line_xml.attrib['TYPE'] == 'キャプション':
527
+ cap_txt += line_xml.attrib['STRING']
528
+ cap_txt += '\n'
529
+
530
+ return main_txt, cap_txt
cli/core/utils.py ADDED
@@ -0,0 +1,195 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # Copyright (c) 2022, National Diet Library, Japan
2
+ #
3
+ # This software is released under the CC BY 4.0.
4
+ # https://creativecommons.org/licenses/by/4.0/
5
+
6
+
7
+ import copy
8
+ import datetime
9
+ import glob
10
+ import os
11
+ import sys
12
+ import yaml
13
+
14
+
15
+ def parse_cfg(cfg_dict):
16
+ """
17
+ コマンドで入力された引数やオプションを内部関数が利用しやすい形にparseします。
18
+
19
+ Parameters
20
+ ----------
21
+ cfg_dict : dict
22
+ コマンドで入力された引数やオプションが保存された辞書型データ。
23
+
24
+ Returns
25
+ -------
26
+ infer_cfg : dict
27
+ 推論処理を実行するための設定情報が保存された辞書型データ。
28
+ """
29
+ infer_cfg = copy.deepcopy(cfg_dict)
30
+
31
+ # add inference config parameters from yml config file
32
+ yml_config = None
33
+ if not os.path.isfile(cfg_dict['config_file']):
34
+ print('[ERROR] Config yml file not found.', file=sys.stderr)
35
+ return None
36
+
37
+ with open(cfg_dict['config_file'], 'r') as yml:
38
+ yml_config = yaml.safe_load(yml)
39
+
40
+ if type(yml_config) is not dict:
41
+ print('[ERROR] Config yml file read error.', file=sys.stderr)
42
+ return None
43
+
44
+ infer_cfg.update(yml_config)
45
+
46
+ # save_xml will be ignored when last proc does not output xml data
47
+ if (infer_cfg['proc_range'] != '0..3') and (infer_cfg['save_xml'] or infer_cfg['save_image']):
48
+ print('[WARNING] save_xml and save_image flags are ignored because this is partial execution.')
49
+ print(' All output of last proc will be saved in output directory.')
50
+
51
+ # parse start/end indices of inference process
52
+ start = int(infer_cfg['proc_range'][0])
53
+ end = int(infer_cfg['proc_range'][-1])
54
+ if start > end:
55
+ print('[ERROR] Value of proc_range must be [x..y : x <= y] .', file=sys.stderr)
56
+ return None
57
+ infer_cfg['proc_range'] = {
58
+ 'start': start,
59
+ 'end': end
60
+ }
61
+ if (start != 0) or (end != 3):
62
+ infer_cfg['partial_infer'] = True
63
+ else:
64
+ infer_cfg['partial_infer'] = False
65
+
66
+ # create input_dirs from input_root
67
+ # input_dirs is list of dirs that contain img (and xml) dir
68
+ infer_cfg['input_root'] = os.path.abspath(infer_cfg['input_root'])
69
+ infer_cfg['output_root'] = os.path.abspath(infer_cfg['output_root'])
70
+ if infer_cfg['input_structure'] in ['s']:
71
+ # - Sigle input dir mode
72
+ # input_root
73
+ # ├── xml
74
+ # │ └── R[7桁連番].xml※XMLデータ
75
+ # └── img
76
+ # └── R[7桁連番]_pp.jp2※画像データ
77
+
78
+ # validation check for input dir structure
79
+ if not os.path.isdir(os.path.join(infer_cfg['input_root'], 'img')):
80
+ print('[ERROR] Input img diretctory not found in {}'.format(infer_cfg['input_root']), file=sys.stderr)
81
+ return None
82
+ if (start > 2) and (not os.path.isdir(os.path.join(infer_cfg['input_root'], 'xml'))):
83
+ print('[ERROR] Input xml diretctory not found in {}'.format(infer_cfg['input_root']), file=sys.stderr)
84
+ return None
85
+ infer_cfg['input_dirs'] = [infer_cfg['input_root']]
86
+ elif infer_cfg['input_structure'] in ['i']:
87
+ # - Partial inference mode
88
+ # input_root
89
+ # └── PID
90
+ # ├── xml
91
+ # │ └── R[7桁連番].xml※XMLデータ
92
+ # └── img
93
+ # └── R[7桁連番]_pp.jp2※画像データ
94
+ infer_cfg['input_dirs'] = []
95
+ for input_dir in glob.glob(os.path.join(infer_cfg['input_root'], '*')):
96
+ if os.path.isdir(input_dir):
97
+ if not os.path.isdir(os.path.join(input_dir, 'img')):
98
+ print('[WARNING] Input directory {0} is skipped(no img diretctory)'.format(input_dir))
99
+ continue
100
+ if (start > 1) and (not os.path.isdir(os.path.join(input_dir, 'xml'))):
101
+ print('[WARNING] Input directory {0} is skipped(no xml diretctory)'.format(input_dir))
102
+ continue
103
+ infer_cfg['input_dirs'].append(input_dir)
104
+ elif infer_cfg['input_structure'] in ['t']:
105
+ # - ToshoData mode
106
+ # input_root
107
+ # └── tosho_19XX_bunkei
108
+ # └── R[7桁連番]_pp.jp2※画像データ
109
+ infer_cfg['input_dirs'] = []
110
+ for input_dir in glob.glob(os.path.join(infer_cfg['input_root'], '*')):
111
+ if os.path.isdir(input_dir):
112
+ infer_cfg['input_dirs'].append(input_dir)
113
+ if 'img' in [os.path.basename(d) for d in infer_cfg['input_dirs']]:
114
+ print('[WARNING] This input structure might be single input(img diretctory found)')
115
+ elif infer_cfg['input_structure'] in ['w']:
116
+ # - Work station input mode
117
+ # input_root
118
+ # └── workstation
119
+ # └── [collect(3桁数字)、またはdigital(3桁数字)]フォルダ
120
+ # └── [15桁連番]フォルダ※PID上1桁目
121
+ # └── [3桁連番]フォルダ※PID上2~4桁目
122
+ # └── [3桁連番]フォルダ※PID上5~7桁目
123
+ # └── R[7桁連番]_contents.jp2※画像データ
124
+
125
+ # recursive function to get input_dirs in workstation mode
126
+ def get_input_dirs(path, depth):
127
+ depth += 1
128
+ ret_list = []
129
+ current_list = []
130
+ for input_dir in glob.glob(os.path.join(path, '*')):
131
+ if os.path.isdir(input_dir):
132
+ current_list.append(input_dir)
133
+ if depth > 3:
134
+ return current_list
135
+ if (depth < 2) and (len(current_list) == 0):
136
+ print('[ERROR] Input directory structure dose not match workstation mode', file=sys.stderr)
137
+ return []
138
+ for dir in current_list:
139
+ tmp_list = get_input_dirs(dir, depth)
140
+ ret_list.extend(tmp_list)
141
+ return ret_list
142
+
143
+ # check if workstation directory exist
144
+ work_dir = os.path.join(infer_cfg['input_root'], 'workstation')
145
+ if not os.path.isdir(work_dir):
146
+ print('[ERROR] \'workstation\' directory not found', file=sys.stderr)
147
+ return None
148
+
149
+ # get input dir list
150
+ infer_cfg['input_dirs'] = get_input_dirs(work_dir, 0)
151
+ elif infer_cfg['input_structure'] in ['f']:
152
+ # - Image file input mode
153
+ # input_root is equal to input image file path
154
+ infer_cfg['input_dirs'] = [infer_cfg['input_root']]
155
+ else:
156
+ print('[ERROR] Unexpected input directory structure type: {0}.'.format(infer_cfg['input_structure']), file=sys.stderr)
157
+ return None
158
+
159
+ return infer_cfg
160
+
161
+
162
+ def save_xml(xml_to_save, path):
163
+ """
164
+ 指定されたファイルパスにXMLファイル保存します。
165
+
166
+ Parameters
167
+ ----------
168
+ path : str
169
+ XMLファイルを保存するファイルパス。
170
+
171
+ """
172
+ print('### save xml : {}###'.format(path))
173
+ try:
174
+ xml_to_save.write(path, encoding='utf-8', xml_declaration=True)
175
+ except OSError as err:
176
+ print("[ERROR] XML save error : {0}".format(err), file=sys.stderr)
177
+ raise OSError
178
+ return
179
+
180
+
181
+ def mkdir_with_duplication_check(dir_path):
182
+ dir_path_to_create = dir_path
183
+
184
+ # prepare output root derectory
185
+ while os.path.isdir(dir_path_to_create):
186
+ print('[WARNING] Directory {0} already exist.'.format(dir_path))
187
+ now = datetime.datetime.now()
188
+ time_stamp = now.strftime('_%Y%m%d%H%M%S')
189
+ dir_path_to_create += time_stamp
190
+
191
+ if dir_path_to_create != dir_path:
192
+ print('[WARNING] Directory is changed to {0}.'.format(dir_path_to_create))
193
+ os.mkdir(dir_path_to_create)
194
+
195
+ return dir_path_to_create
cli/procs/__init__.py ADDED
@@ -0,0 +1,12 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # Copyright (c) 2022, National Diet Library, Japan
2
+ #
3
+ # This software is released under the CC BY 4.0.
4
+ # https://creativecommons.org/licenses/by/4.0/
5
+
6
+
7
+ from .page_separation import PageSeparation
8
+ from .page_deskew import PageDeskewProcess
9
+ from .layout_extraction import LayoutExtractionProcess
10
+ from .line_ocr import LineOcrProcess
11
+
12
+ __all__ = ['PageSeparation', 'PageDeskewProcess', 'LayoutExtractionProcess', 'LineOcrProcess']
cli/procs/base_proc.py ADDED
@@ -0,0 +1,275 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # Copyright (c) 2022, National Diet Library, Japan
2
+ #
3
+ # This software is released under the CC BY 4.0.
4
+ # https://creativecommons.org/licenses/by/4.0/
5
+
6
+
7
+ import copy
8
+ import cv2
9
+ import os
10
+
11
+
12
+ class BaseInferenceProcess:
13
+ """
14
+ 各推論処理を実行するプロセスクラスを作るためのメタクラス。
15
+
16
+ Attributes
17
+ ----------
18
+ proc_name : str
19
+ 推論処理を実行するインスタンスが持つプロセス名。
20
+ [実行される順序を表す数字+クラスごとの処理名]で構成されます。
21
+ cfg : dict
22
+ 本推論実行における設定情報です。
23
+ """
24
+ def __init__(self, cfg, pid, proc_type='_base_prep'):
25
+ """
26
+ Parameters
27
+ ----------
28
+ cfg : dict
29
+ 本実行処理における設定情報です。
30
+ pid : int
31
+ 実行される順序を表す数値。
32
+ proc_type : str
33
+ クラスごとに定義されている処理名。
34
+ """
35
+ self.proc_name = str(pid) + proc_type
36
+
37
+ if not self._is_valid_cfg(cfg):
38
+ raise ValueError('Configuration validation error.')
39
+ else:
40
+ self.cfg = cfg
41
+
42
+ self.process_dump_dir = None
43
+
44
+ return True
45
+
46
+ def do(self, data_idx, input_data):
47
+ """
48
+ 推論処理を実行する際にOcrInferencerクラスから呼び出される推論実行関数。
49
+ 入力データのバリデーションや推論処理、推論結果の保存などが含まれます。
50
+ 本処理は基本的に継承先では変更されないことを想定しています。
51
+
52
+ Parameters
53
+ ----------
54
+ data_idx : int
55
+ 入力データのインデックス。
56
+ 画像ファイル1つごとに入力データのリストが構成されます。
57
+ input_data : dict
58
+ 推論処理を実行すつ対象の入力データ。
59
+
60
+ Returns
61
+ -------
62
+ result : dict
63
+ 推論処理の結果を保持する辞書型データ。
64
+ 基本的にinput_dataと同じ構造です。
65
+ """
66
+ # input data valudation check
67
+ if not self._is_valid_input(input_data):
68
+ raise ValueError('Input data validation error.')
69
+
70
+ # run main inference process
71
+ result = self._run_process(input_data)
72
+ if result is None:
73
+ raise ValueError('Inference output error in {0}.'.format(self.proc_name))
74
+
75
+ # dump inference result
76
+ if self.cfg['dump']:
77
+ self._dump_result(input_data, result, data_idx)
78
+
79
+ return result
80
+
81
+ def _run_process(self, input_data):
82
+ """
83
+ 推論処理の本体部分。
84
+ 処理内容は継承先のクラスで実装されることを想定しています。
85
+
86
+ Parameters
87
+ ----------
88
+ input_data : dict
89
+ 推論処理を実行する対象の入力データ。
90
+
91
+ Returns
92
+ -------
93
+ result : dict
94
+ 推論処理の結果を保持する辞書型データ。
95
+ 基本的にinput_dataと同じ構造です。
96
+ """
97
+ print('### Base Inference Process ###')
98
+ result = copy.deepcopy(input_data)
99
+ return result
100
+
101
+ def _is_valid_cfg(self, cfg):
102
+ """
103
+ 推論処理全体の設定情報ではなく、クラス単位の設定情報に対するバリデーション。
104
+ バリデーションの内容は継承先のクラスで実装されることを想定しています。
105
+
106
+ Parameters
107
+ ----------
108
+ cfg : dict
109
+ 本推論実行における設定情報です。
110
+
111
+ Returns
112
+ -------
113
+ [変数なし] : bool
114
+ 設定情報が正しければTrue, そうでなければFalseを返します。
115
+ """
116
+ if cfg is None:
117
+ print('Given configuration data is None.')
118
+ return False
119
+ return True
120
+
121
+ def _is_valid_input(self, input_data):
122
+ """
123
+ 本クラスの推論処理における入力データのバリデーション。
124
+ バリデーションの内容は継承先のクラスで実装されることを想定しています。
125
+
126
+ Parameters
127
+ ----------
128
+ input_data : dict
129
+ 推論処理を実行する対象の入力データ。
130
+
131
+ Returns
132
+ -------
133
+ [変数なし] : bool
134
+  入力データが正しければTrue, そうでなければFalseを返します。
135
+ """
136
+ return True
137
+
138
+ def _dump_result(self, input_data, result, data_idx):
139
+ """
140
+ 本クラスの推論処理結果をファイルに保存します。
141
+ dumpフラグが有効の場合にのみ実行されます。
142
+
143
+ Parameters
144
+ ----------
145
+ input_data : dict
146
+ 推論処理に利用した入力データ。
147
+ result : list
148
+ 推論処理の結果を保持するリスト型データ。
149
+ 各要素は基本的にinput_dataと同じ構造の辞書型データです。
150
+ data_idx : int
151
+ 入力データのインデックス。
152
+ 画像ファイル1つごとに入力データのリストが構成されます。
153
+ """
154
+
155
+ self.process_dump_dir = os.path.join(os.path.join(input_data['output_dir'], 'dump'), self.proc_name)
156
+
157
+ for i, single_result in enumerate(result):
158
+ if 'img' in single_result.keys() and single_result['img'] is not None:
159
+ dump_img_name = os.path.basename(input_data['img_path']).split('.')[0] + '_' + str(data_idx) + '_' + str(i) + '.jpg'
160
+ self._dump_img_result(single_result, input_data['output_dir'], dump_img_name)
161
+ if 'xml' in single_result.keys() and single_result['xml'] is not None:
162
+ dump_xml_name = os.path.basename(input_data['img_path']).split('.')[0] + '_' + str(data_idx) + '_' + str(i) + '.xml'
163
+ self._dump_xml_result(single_result, input_data['output_dir'], dump_xml_name)
164
+ if 'txt' in single_result.keys() and single_result['txt'] is not None:
165
+ dump_txt_name = os.path.basename(input_data['img_path']).split('.')[0] + '_' + str(data_idx) + '_' + str(i) + '.txt'
166
+ self._dump_txt_result(single_result, input_data['output_dir'], dump_txt_name)
167
+ return
168
+
169
+ def _dump_img_result(self, single_result, output_dir, img_name):
170
+ """
171
+ 本クラスの推論処理結果(画像)をファイルに保存します。
172
+ dumpフラグが有効の場合にのみ実行されます。
173
+
174
+ Parameters
175
+ ----------
176
+ single_result : dict
177
+ 推論処理の結果を保持する辞書型データ。
178
+ output_dir : str
179
+ 推論結果が保存されるディレクトリのパス。
180
+ img_name : str
181
+ 入力データの画像ファイル名。
182
+ dumpされる画像ファイルのファイル名は入力のファイル名と同名(複数ある場合は連番を付与)となります。
183
+ """
184
+ pred_img_dir = os.path.join(self.process_dump_dir, 'pred_img')
185
+ os.makedirs(pred_img_dir, exist_ok=True)
186
+ image_file_path = os.path.join(pred_img_dir, img_name)
187
+ dump_image = self._create_result_image(single_result)
188
+ try:
189
+ cv2.imwrite(image_file_path, dump_image)
190
+ except OSError as err:
191
+ print("Dump image save error: {0}".format(err))
192
+ raise OSError
193
+
194
+ return
195
+
196
+ def _dump_xml_result(self, single_result, output_dir, img_name):
197
+ """
198
+ 本クラスの推論処理結果(XML)をファイルに保存します。
199
+ dumpフラグが有効の場合にのみ実行されます。
200
+
201
+ Parameters
202
+ ----------
203
+ single_result : dict
204
+ 推論処理の結果を保持する辞書型データ。
205
+ output_dir : str
206
+ 推論結果が保存されるディレクトリのパス。
207
+ img_name : str
208
+ 入力データの画像ファイル名。
209
+ dumpされるXMLファイルのファイル名は入力のファイル名とほぼ同名(拡張子の変更、サフィックスや連番の追加のみ)となります。
210
+ """
211
+ xml_dir = os.path.join(self.process_dump_dir, 'xml')
212
+ os.makedirs(xml_dir, exist_ok=True)
213
+ trum, _ = os.path.splitext(img_name)
214
+ xml_path = os.path.join(xml_dir, trum + '.xml')
215
+ try:
216
+ single_result['xml'].write(xml_path, encoding='utf-8', xml_declaration=True)
217
+ except OSError as err:
218
+ print("Dump xml save error: {0}".format(err))
219
+ raise OSError
220
+
221
+ return
222
+
223
+ def _dump_txt_result(self, single_result, output_dir, img_name):
224
+ """
225
+ 本クラスの推論処理結果(テキスト)をファイルに保存します。
226
+ dumpフラグが有効の場合にのみ実行されます。
227
+
228
+ Parameters
229
+ ----------
230
+ single_result : dict
231
+ 推論処理の結果を保持する辞書型データ。
232
+ output_dir : str
233
+ 推論結果が保存されるディレクトリのパス。
234
+ img_name : str
235
+ 入力データの画像ファイル名。
236
+ dumpされるテキストファイルのファイル名は入力のファイル名とほぼ同名(拡張子の変更、サフィックスや連番の追加のみ)となります。
237
+ """
238
+ txt_dir = os.path.join(self.process_dump_dir, 'txt')
239
+ os.makedirs(txt_dir, exist_ok=True)
240
+
241
+ trum, _ = os.path.splitext(img_name)
242
+ txt_path = os.path.join(txt_dir, trum + '_main.txt')
243
+ try:
244
+ with open(txt_path, 'w') as f:
245
+ f.write(single_result['txt'])
246
+ except OSError as err:
247
+ print("Dump text save error: {0}".format(err))
248
+ raise OSError
249
+
250
+ return
251
+
252
+ def _create_result_image(self, single_result):
253
+ """
254
+ ���論結果を入力の画像に重畳した画像データを生成します。
255
+
256
+ Parameters
257
+ ----------
258
+ single_result : dict
259
+ 推論処理の結果を保持する辞書型データ。
260
+ """
261
+ dump_img = None
262
+ if 'dump_img' in single_result.keys():
263
+ dump_img = copy.deepcopy(single_result['dump_img'])
264
+ else:
265
+ dump_img = copy.deepcopy(single_result['img'])
266
+ if 'xml' in single_result.keys() and single_result['xml'] is not None:
267
+ # draw single inferenceresult on input image
268
+ # this should be implemeted in each child class
269
+ cv2.putText(dump_img, 'dump' + self.proc_name, (0, 50),
270
+ cv2.FONT_HERSHEY_PLAIN, 4, (255, 0, 0), 5, cv2.LINE_AA)
271
+ pass
272
+ else:
273
+ cv2.putText(dump_img, 'dump' + self.proc_name, (0, 50),
274
+ cv2.FONT_HERSHEY_PLAIN, 4, (255, 255, 0), 5, cv2.LINE_AA)
275
+ return dump_img
cli/procs/layout_extraction.py ADDED
@@ -0,0 +1,79 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # Copyright (c) 2022, National Diet Library, Japan
2
+ #
3
+ # This software is released under the CC BY 4.0.
4
+ # https://creativecommons.org/licenses/by/4.0/
5
+
6
+
7
+ import copy
8
+ import numpy
9
+
10
+ from .base_proc import BaseInferenceProcess
11
+
12
+
13
+ class LayoutExtractionProcess(BaseInferenceProcess):
14
+ """
15
+ レイアウト抽出推論を実行するプロセスのクラス。
16
+ BaseInferenceProcessを継承しています。
17
+ """
18
+ def __init__(self, cfg, pid):
19
+ """
20
+ Parameters
21
+ ----------
22
+ cfg : dict
23
+ 本実行処理における設定情報です。
24
+ pid : int
25
+ 実行される順序を表す数値。
26
+ """
27
+ super().__init__(cfg, pid, '_layer_ext')
28
+ from src.ndl_layout.tools.process import InferencerWithCLI
29
+ self._inferencer = InferencerWithCLI(self.cfg['layout_extraction'])
30
+ self._run_src_inference = self._inferencer.inference_wich_cli
31
+
32
+ def is_valid_input(self, input_data):
33
+ """
34
+ 本クラスの推論処理における入力データのバリデーション。
35
+
36
+ Parameters
37
+ ----------
38
+ input_data : dict
39
+ 推論処理を実行する対象の入力データ。
40
+
41
+ Returns
42
+ -------
43
+ [変数なし] : bool
44
+  入力データが正しければTrue, そうでなければFalseを返します。
45
+ """
46
+ if type(input_data['img']) is not numpy.ndarray:
47
+ print('LayoutExtractionProcess: input img is not numpy.ndarray')
48
+ return False
49
+ return True
50
+
51
+ def _run_process(self, input_data):
52
+ """
53
+ 推論処理の本体部分。
54
+
55
+ Parameters
56
+ ----------
57
+ input_data : dict
58
+ 推論処理を実行する対象の入力データ。
59
+
60
+ Returns
61
+ -------
62
+ result : dict
63
+ 推論処理の結果を保持する辞書型データ。
64
+ 基本的にinput_dataと同じ構造です。
65
+ """
66
+ print('### Layout Extraction Process ###')
67
+ output_data = copy.deepcopy(input_data)
68
+ inference_output = self._run_src_inference(img=input_data['img'],
69
+ img_path=input_data['img_file_name'],
70
+ score_thr=self.cfg['layout_extraction']['score_thr'],
71
+ dump=(self.cfg['dump'] or self.cfg['save_image']))
72
+
73
+ # Create result to pass xml and img data
74
+ result = []
75
+ output_data['xml'] = inference_output['xml']
76
+ if inference_output['dump_img'] is not None:
77
+ output_data['dump_img'] = inference_output['dump_img']
78
+ result.append(output_data)
79
+ return result
cli/procs/line_ocr.py ADDED
@@ -0,0 +1,86 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # Copyright (c) 2022, National Diet Library, Japan
2
+ #
3
+ # This software is released under the CC BY 4.0.
4
+ # https://creativecommons.org/licenses/by/4.0/
5
+
6
+
7
+ import copy
8
+ import numpy
9
+ import subprocess
10
+ import xml.etree.ElementTree as ET
11
+
12
+ from .base_proc import BaseInferenceProcess
13
+
14
+
15
+ class LineOcrProcess(BaseInferenceProcess):
16
+ """
17
+ 行文字認識推論を実行するプロセスのクラス。
18
+ BaseInferenceProcessを継承しています。
19
+ """
20
+ def __init__(self, cfg, pid):
21
+ """
22
+ Parameters
23
+ ----------
24
+ cfg : dict
25
+ 本推論処理における設定情報です。
26
+ pid : int
27
+ 実行される順序を表す数値。
28
+ """
29
+ super().__init__(cfg, pid, '_line_ocr')
30
+ process1 = subprocess.Popen(['cat', self.cfg['line_ocr']['char_list']], stdout=subprocess.PIPE)
31
+ process2 = subprocess.Popen(['tr', '-d', '\\n'], stdin=process1.stdout, stdout=subprocess.PIPE)
32
+ self.character = '〓' + process2.stdout.read().decode()
33
+
34
+ from src.text_recognition.text_recognition import InferencerWithCLI
35
+ self._inferencer = InferencerWithCLI(self.cfg['line_ocr'], self.character)
36
+ self._run_src_inference = self._inferencer.inference_wich_cli
37
+
38
+ def _is_valid_input(self, input_data):
39
+ """
40
+ 本クラスの推論処理における入力データのバリデーション。
41
+
42
+ Parameters
43
+ ----------
44
+ input_data : dict
45
+ 推論処理を実行する対象の入力データ。
46
+
47
+ Returns
48
+ -------
49
+ [変数なし] : bool
50
+ 入力データが正しければTrue, そうでなければFalseを返します。
51
+ """
52
+ if type(input_data['img']) is not numpy.ndarray:
53
+ print('LineOcrProcess: input img is not numpy.ndarray')
54
+ return False
55
+ if type(input_data['xml']) is not ET.ElementTree:
56
+ print('LineOcrProcess: input xml is not ElementTree')
57
+ return False
58
+ return True
59
+
60
+ def _run_process(self, input_data):
61
+ """
62
+ 推論処理の本体部分。
63
+
64
+ Parameters
65
+ ----------
66
+ input_data : dict
67
+ 推論処理を実行する対象の入力データ。
68
+
69
+ Returns
70
+ -------
71
+ result : dict
72
+ 推論処理の結果を保持する辞書型データ。
73
+ 基本的にinput_dataと同じ構造です。
74
+ """
75
+ result = []
76
+ print('### Line OCR Process ###')
77
+ result_xml = self._run_src_inference(input_data['img'], input_data['xml'],
78
+ accept_empty=self.cfg['line_ocr']['accept_empty'],
79
+ yield_block_page_num=self.cfg['line_ocr']['yield_block_page_num'],
80
+ yield_block_pillar=self.cfg['line_ocr']['yield_block_pillar'])
81
+
82
+ output_data = copy.deepcopy(input_data)
83
+ output_data['xml'] = result_xml
84
+ result.append(output_data)
85
+
86
+ return result
cli/procs/page_deskew.py ADDED
@@ -0,0 +1,85 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # Copyright (c) 2022, National Diet Library, Japan
2
+ #
3
+ # This software is released under the CC BY 4.0.
4
+ # https://creativecommons.org/licenses/by/4.0/
5
+
6
+
7
+ import copy
8
+ import numpy
9
+
10
+ from .base_proc import BaseInferenceProcess
11
+
12
+
13
+ class PageDeskewProcess(BaseInferenceProcess):
14
+ """
15
+ 傾き補正を実行するプロセスのクラス。
16
+ BaseInferenceProcessを継承しています。
17
+ """
18
+ def __init__(self, cfg, pid):
19
+ """
20
+ Parameters
21
+ ----------
22
+ cfg : dict
23
+ 本推論処理における設定情報です。
24
+ pid : int
25
+ 実行される順序を表す数値。
26
+ """
27
+ super().__init__(cfg, pid, '_page_deskew')
28
+ from src.deskew_HT.alyn3.deskew import Deskew
29
+ self.deskewer = Deskew('', '',
30
+ r_angle=cfg['page_deskew']['r_angle'],
31
+ skew_max=cfg['page_deskew']['skew_max'],
32
+ acc_deg=cfg['page_deskew']['acc_deg'],
33
+ method=cfg['page_deskew']['method'],
34
+ gray=cfg['page_deskew']['gray'],
35
+ quality=cfg['page_deskew']['quality'],
36
+ short=cfg['page_deskew']['short'],
37
+ roi_w=cfg['page_deskew']['roi_w'],
38
+ roi_h=cfg['page_deskew']['roi_h'])
39
+ self._run_src_inference = self.deskewer.deskew_on_memory
40
+
41
+
42
+ def _is_valid_input(self, input_data):
43
+ """
44
+ 本クラスの推論処理における入力データのバリデーション。
45
+
46
+ Parameters
47
+ ----------
48
+ input_data : dict
49
+ 推論処理を実行する対象の入力データ。
50
+
51
+ Returns
52
+ -------
53
+ [変数なし] : bool
54
+  入力データが正しければTrue, そうでなければFalseを返します。
55
+ """
56
+ if type(input_data['img']) is not numpy.ndarray:
57
+ print('PageDeskewProcess: input img is not numpy.ndarray')
58
+ return False
59
+ return True
60
+
61
+ def _run_process(self, input_data):
62
+ """
63
+ 推論処理の本体部分。
64
+
65
+ Parameters
66
+ ----------
67
+ input_data : dict
68
+ 推論処理を実行する対象の入力データ。
69
+
70
+ Returns
71
+ -------
72
+ result : dict
73
+ 推論処理の結果を保持する辞書型データ。
74
+ 基本的にinput_dataと同じ構造です。
75
+ """
76
+ print('### Page Deskew Process ###')
77
+ inference_output = self._run_src_inference(input_data['img'])
78
+
79
+ # Create result to pass img_path and img data
80
+ result = []
81
+ output_data = copy.deepcopy(input_data)
82
+ output_data['img'] = inference_output
83
+ result.append(output_data)
84
+
85
+ return result
cli/procs/page_separation.py ADDED
@@ -0,0 +1,112 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # Copyright (c) 2022, National Diet Library, Japan
2
+ #
3
+ # This software is released under the CC BY 4.0.
4
+ # https://creativecommons.org/licenses/by/4.0/
5
+
6
+
7
+ import copy
8
+ import numpy
9
+ import os
10
+
11
+ from .base_proc import BaseInferenceProcess
12
+
13
+
14
+ class PageSeparation(BaseInferenceProcess):
15
+ """
16
+ ノド元分割処理を実行するプロセスのクラス。
17
+ BaseInferenceProcessを継承しています。
18
+ """
19
+ def __init__(self, cfg, pid):
20
+ """
21
+ Parameters
22
+ ----------
23
+ cfg : dict
24
+ 本推論処理における設定情報です。
25
+ pid : int
26
+ 実行される順序を表す数値。
27
+ """
28
+ super().__init__(cfg, pid, '_page_sep')
29
+
30
+ if self.cfg['page_separation']['silence_tf_log']:
31
+ import logging
32
+ import warnings
33
+ os.environ['TF_CPP_MIN_LOG_LEVEL'] = '3'
34
+ warnings.simplefilter(action='ignore', category=FutureWarning)
35
+
36
+ import tensorflow as tf
37
+ tf.get_logger().setLevel(logging.ERROR)
38
+
39
+ from src.separate_pages_ssd.inference_divided import divide_facing_page_with_cli, load_weightfile
40
+ load_weightfile(os.path.abspath(self.cfg['page_separation']['weight_path']))
41
+ self._run_src_inference = divide_facing_page_with_cli
42
+
43
+ def _is_valid_input(self, input_data):
44
+ """
45
+ 本クラスの推論処理における入力データのバリデーション。
46
+
47
+ Parameters
48
+ ----------
49
+ input_data : dict
50
+ 推論処理を実行する対象の入力データ。
51
+
52
+ Returns
53
+ -------
54
+ [変数なし] : bool
55
+  入力データが正しければTrue, そうでなければFalseを返します。
56
+ """
57
+ if type(input_data['img']) is not numpy.ndarray:
58
+ print('PageSeparation: input img is not numpy.ndarray')
59
+ return False
60
+ return True
61
+
62
+ def _run_process(self, input_data):
63
+ """
64
+ 推論処理の本体部分。
65
+
66
+ Parameters
67
+ ----------
68
+ input_data : dict
69
+ 推論処理を実行する対象の入力データ。
70
+
71
+ Returns
72
+ -------
73
+ result : dict
74
+ 推論処理の結果を保持する辞書型データ。
75
+ 基本的にinput_dataと同じ構造です。
76
+ """
77
+ print('### Page Separation ###')
78
+ log_file_path = None
79
+ if self.process_dump_dir is not None:
80
+ log_file_path = os.path.join(self.process_dump_dir, self.cfg['page_separation']['log'])
81
+ inference_output = self._run_src_inference(input=input_data['img'],
82
+ input_path=input_data['img_path'],
83
+ left=self.cfg['page_separation']['left'],
84
+ right=self.cfg['page_separation']['right'],
85
+ single=self.cfg['page_separation']['single'],
86
+ ext=self.cfg['page_separation']['ext'],
87
+ quality=self.cfg['page_separation']['quality'],
88
+ short=self.cfg['page_separation']['short'],
89
+ log=log_file_path)
90
+ if (not self.cfg['page_separation']['allow_invalid_num_output']) and (not len(inference_output) in range(1, 3)):
91
+ print('ERROR: Output from page separation must be 1 or 2 pages.')
92
+ return None
93
+
94
+ # Create result to pass img_path and img data
95
+ result = []
96
+ for id, single_output_img in enumerate(inference_output):
97
+ output_data = copy.deepcopy(input_data)
98
+ output_data['img'] = single_output_img
99
+ output_data['orig_img_path'] = input_data['img_path']
100
+
101
+ # make and save separated img file name
102
+ if id == 0:
103
+ id = 'L'
104
+ else:
105
+ id = 'R'
106
+ orig_img_name = os.path.basename(input_data['img_path'])
107
+ stem, ext = os.path.splitext(orig_img_name)
108
+ output_data['img_file_name'] = stem + '_' + id + '.jpg'
109
+
110
+ result.append(output_data)
111
+
112
+ return result
config.yml ADDED
@@ -0,0 +1,45 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ page_separation:
2
+ input: 'inference_input'
3
+ out: 'NO_DUMP'
4
+ left: '_01'
5
+ right: '_02'
6
+ single: '_00'
7
+ ext: '.jpg'
8
+ quality: 100
9
+ short: null
10
+ log: 'trim_pos.tsv'
11
+ weight_path: 'src/separate_pages_ssd/ssd_tools/weights.hdf5'
12
+ allow_invalid_num_output: True
13
+ silence_tf_log: True
14
+ page_deskew:
15
+ r_angle: 0
16
+ skew_max: 4.0
17
+ acc_deg: 0.1
18
+ method: 1
19
+ gray: 1.0
20
+ quality: 100
21
+ short: null
22
+ roi_w: 1.0
23
+ roi_h: 1.0
24
+ layout_extraction:
25
+ config_path: 'src/ndl_layout/models/ndl_layout_config.py'
26
+ checkpoint_path: 'src/ndl_layout/models/epoch_140_all_eql_bt.pth'
27
+ device: 'cpu'
28
+ 'score_thr': 0.3
29
+ line_ocr:
30
+ char_list: 'src/text_recognition/models/mojilist_NDL.txt'
31
+ saved_model: 'src/text_recognition/models/ndlenfixed64-mj0-synth1.pth'
32
+ accept_empty: True
33
+ batch_max_length: 100
34
+ batch_size: 100
35
+ character: None
36
+ imgW: 1200
37
+ workers: 1
38
+ xml: None
39
+ yield_block_pillar: True
40
+ yield_block_page_num: True
41
+ FeatureExtraction: 'ResNet'
42
+ Prediction: 'CTC'
43
+ PAD: True
44
+ SequenceModeling: 'None'
45
+ Transformation: 'None'
docker/Dockerfile ADDED
@@ -0,0 +1,36 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ FROM nvcr.io/nvidia/cuda:11.1.1-cudnn8-devel-ubuntu18.04
2
+
3
+ ENV PROJECT_DIR=/root/ocr_cli
4
+ ENV FORCE_CUDA="1"
5
+ ENV TORCH_CUDA_ARCH_LIST="7.5+PTX"
6
+ ENV TORCH_NVCC_FLAGS="-Xfatbin -compress-all"
7
+
8
+ RUN set -x \
9
+ && apt update \
10
+ && apt upgrade -y
11
+
12
+ RUN set -x \
13
+ && apt update \
14
+ && apt -y install locales \
15
+ && locale-gen ja_JP.UTF-8
16
+ ENV LANG ja_JP.UTF-8
17
+ ENV LANGUAGE ja_JP:ja
18
+ ENV LC_ALL=ja_JP.UTF-8
19
+ RUN localedef -f UTF-8 -i ja_JP ja_JP.utf8
20
+
21
+ RUN set -x && apt -y install libgl1-mesa-dev libglib2.0-0 git
22
+ RUN set -x \
23
+ && apt -y install python3.7 python3.7-dev \
24
+ && ln -s /usr/bin/python3.7 /usr/bin/python \
25
+ && apt -y install wget python3-distutils && wget https://bootstrap.pypa.io/get-pip.py && python get-pip.py
26
+
27
+ COPY . ${PROJECT_DIR}
28
+
29
+ RUN set -x \
30
+ && pip install -r ${PROJECT_DIR}/requirements.txt
31
+ RUN set -x && pip install torch==1.8.1+cu111 torchvision==0.9.1+cu111 -f https://download.pytorch.org/whl/lts/1.8/torch_lts.html
32
+ RUN set -x && cd ${PROJECT_DIR}/src/ndl_layout/mmdetection && python setup.py bdist_wheel && pip install dist/*.whl
33
+ ENV PYTHONPATH $PYTHONPATH:${PROJECT_DIR}/src/text_recognition/deep-text-recognition-benchmark
34
+ RUN set -x && pip install mmcv-full==1.4.0 -f https://download.openmmlab.com/mmcv/dist/cu111/torch1.8.0/index.html
35
+
36
+ WORKDIR ${PROJECT_DIR}
docker/dockerbuild.sh ADDED
@@ -0,0 +1,12 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ TAG=ocr-cli-py37
2
+ DOCKERIGNORE=docker/dockerignore
3
+ DOCKERFILE=docker/Dockerfile
4
+ wget https://lab.ndl.go.jp/dataset/ndlocr/text_recognition/mojilist_NDL.txt -P ./src/text_recognition/models
5
+ wget https://lab.ndl.go.jp/dataset/ndlocr/text_recognition/ndlenfixed64-mj0-synth1.pth -P ./src/text_recognition/models
6
+ wget https://lab.ndl.go.jp/dataset/ndlocr/ndl_layout/ndl_layout_config.py -P ./src/ndl_layout/models
7
+ wget https://lab.ndl.go.jp/dataset/ndlocr/ndl_layout/epoch_140_all_eql_bt.pth -P ./src/ndl_layout/models
8
+ wget https://lab.ndl.go.jp/dataset/ndlocr/separate_pages_ssd/weights.hdf5 -P ./src/separate_pages_ssd/ssd_tools
9
+
10
+ cp ${DOCKERIGNORE} .dockerignore
11
+ docker build -t ${TAG} -f ${DOCKERFILE} .
12
+ rm .dockerignore
docker/dockerignore ADDED
@@ -0,0 +1,4 @@
 
 
 
 
 
1
+ **/*.pyc
2
+ **/*.pyd
3
+ **/*~
4
+ **/.*
docker/run_docker.sh ADDED
@@ -0,0 +1 @@
 
 
1
+ docker run --gpus all -d --rm --name ocr_cli_runner -i ocr-cli-py37:latest
main.py ADDED
@@ -0,0 +1,92 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # Copyright (c) 2022, National Diet Library, Japan
2
+ #
3
+ # This software is released under the CC BY 4.0.
4
+ # https://creativecommons.org/licenses/by/4.0/
5
+
6
+
7
+ import click
8
+ import json
9
+ import os
10
+ import sys
11
+
12
+ from cli.core import OcrInferencer
13
+ from cli.core import utils
14
+
15
+
16
+ @click.group()
17
+ @click.option('--debug', is_flag=True)
18
+ @click.pass_context
19
+ def cmd(ctx, debug):
20
+ ctx.obj['DEBUG'] = debug
21
+
22
+
23
+ @cmd.command()
24
+ @click.pass_context
25
+ def help(ctx):
26
+ if ctx.obj['DEBUG']:
27
+ click.echo('DEBUG MODE!')
28
+ click.echo('help!')
29
+
30
+
31
+ @cmd.command()
32
+ @click.pass_context
33
+ @click.argument('input_root')
34
+ @click.argument('output_root')
35
+ @click.option('-s', '--input_structure', type=click.Choice(['s', 'i', 't', 'w', 'f'], case_sensitive=True), default='s', help='Input directory structure type. s(single), i(intermediate_output), t(tosho_data), w(workstation), and f(image_file).')
36
+ @click.option('-p', '--proc_range', type=str, default='0..3', help='Inference process range to run. Default is "0..3".')
37
+ @click.option('-c', '--config_file', type=str, default='config.yml', help='Configuration yml file for inference. Default is "config.yml".')
38
+ @click.option('-i', '--save_image', type=bool, default=False, is_flag=True, help='Output result image file with text file.')
39
+ @click.option('-x', '--save_xml', type=bool, default=False, is_flag=True, help='Output result XML file with text file.')
40
+ @click.option('-d', '--dump', type=bool, default=False, is_flag=True, help='Dump all intermediate process output.')
41
+ def infer(ctx, input_root, output_root, config_file, proc_range, save_image, save_xml, input_structure, dump):
42
+ """
43
+ \b
44
+ INPUT_ROOT \t: Input data directory for inference.
45
+ OUTPUT_ROOT \t: Output directory for inference result.
46
+ """
47
+ click.echo('start inference !')
48
+ click.echo('input_root : {0}'.format(input_root))
49
+ click.echo('output_root : {0}'.format(output_root))
50
+ click.echo('config_file : {0}'.format(config_file))
51
+
52
+ cfg = {
53
+ 'input_root': input_root,
54
+ 'output_root': output_root,
55
+ 'config_file': config_file,
56
+ 'proc_range': proc_range,
57
+ 'save_image': save_image,
58
+ 'save_xml': save_xml,
59
+ 'dump': dump,
60
+ 'input_structure': input_structure
61
+ }
62
+
63
+ # check if input_root exists
64
+ if not os.path.exists(input_root):
65
+ print('INPUT_ROOT not found :{0}'.format(input_root), file=sys.stderr)
66
+ exit(0)
67
+
68
+ # parse command line option
69
+ infer_cfg = utils.parse_cfg(cfg)
70
+ if infer_cfg is None:
71
+ print('[ERROR] Config parse error :{0}'.format(input_root), file=sys.stderr)
72
+ exit(1)
73
+
74
+ # prepare output root derectory
75
+ infer_cfg['output_root'] = utils.mkdir_with_duplication_check(infer_cfg['output_root'])
76
+
77
+ # save inference option
78
+ with open(os.path.join(infer_cfg['output_root'], 'opt.json'), 'w') as fp:
79
+ json.dump(infer_cfg, fp, ensure_ascii=False, indent=4,
80
+ sort_keys=True, separators=(',', ': '))
81
+
82
+ # do inference
83
+ inferencer = OcrInferencer(infer_cfg)
84
+ inferencer.run()
85
+
86
+
87
+ def main():
88
+ cmd(obj={})
89
+
90
+
91
+ if __name__ == '__main__':
92
+ main()
requirements.txt ADDED
@@ -0,0 +1,13 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ click
2
+ keras==2.2.4
3
+ lmdb==1.2.1
4
+ natsort==7.1.1
5
+ nltk==3.6.2
6
+ opencv-python==4.5.1.48
7
+ pyyaml
8
+ scikit-image==0.16.2
9
+ scipy==1.1.0
10
+ tensorboard==1.14.0
11
+ tensorflow==1.14.0
12
+ tensorflow-estimator==1.14.0
13
+ wheel
src/deskew_HT/.gitignore ADDED
@@ -0,0 +1,16 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # image files
2
+ *.jp2
3
+ *.jpg
4
+ *.JPG
5
+ *.png
6
+
7
+ # log
8
+ *.txt
9
+
10
+ # annotation
11
+ *.xml
12
+
13
+ # Byte-compiled / optimized / DLL files
14
+ __pycache__/
15
+ *.py[cod]
16
+ *$py.class
src/deskew_HT/LICENSE ADDED
@@ -0,0 +1,432 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ alyn3 is based on the following repository:
2
+
3
+ https://github.com/kakul/Alyn/tree/master/alyn
4
+
5
+ The original software license is as follows.
6
+
7
+ -----------------------------------------------------------------------
8
+
9
+ The MIT License (MIT)
10
+
11
+ Copyright (c) 2016 Kakul
12
+
13
+ Permission is hereby granted, free of charge, to any person obtaining a copy
14
+ of this software and associated documentation files (the "Software"), to deal
15
+ in the Software without restriction, including without limitation the rights
16
+ to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
17
+ copies of the Software, and to permit persons to whom the Software is
18
+ furnished to do so, subject to the following conditions:
19
+
20
+ The above copyright notice and this permission notice shall be included in all
21
+ copies or substantial portions of the Software.
22
+
23
+ THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
24
+ IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
25
+ FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
26
+ AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
27
+ LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
28
+ OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
29
+ SOFTWARE.
30
+
31
+ -----------------------------------------------------------------------
32
+
33
+ The following license applies for other modifications in this repository:
34
+
35
+ -----------------------------------------------------------------------
36
+
37
+
38
+ Copyright (c) 2022, National Diet Library, Japan.
39
+
40
+ Attribution 4.0 International
41
+
42
+ =======================================================================
43
+
44
+ Creative Commons Corporation ("Creative Commons") is not a law firm and
45
+ does not provide legal services or legal advice. Distribution of
46
+ Creative Commons public licenses does not create a lawyer-client or
47
+ other relationship. Creative Commons makes its licenses and related
48
+ information available on an "as-is" basis. Creative Commons gives no
49
+ warranties regarding its licenses, any material licensed under their
50
+ terms and conditions, or any related information. Creative Commons
51
+ disclaims all liability for damages resulting from their use to the
52
+ fullest extent possible.
53
+
54
+ Using Creative Commons Public Licenses
55
+
56
+ Creative Commons public licenses provide a standard set of terms and
57
+ conditions that creators and other rights holders may use to share
58
+ original works of authorship and other material subject to copyright
59
+ and certain other rights specified in the public license below. The
60
+ following considerations are for informational purposes only, are not
61
+ exhaustive, and do not form part of our licenses.
62
+
63
+ Considerations for licensors: Our public licenses are
64
+ intended for use by those authorized to give the public
65
+ permission to use material in ways otherwise restricted by
66
+ copyright and certain other rights. Our licenses are
67
+ irrevocable. Licensors should read and understand the terms
68
+ and conditions of the license they choose before applying it.
69
+ Licensors should also secure all rights necessary before
70
+ applying our licenses so that the public can reuse the
71
+ material as expected. Licensors should clearly mark any
72
+ material not subject to the license. This includes other CC-
73
+ licensed material, or material used under an exception or
74
+ limitation to copyright. More considerations for licensors:
75
+ wiki.creativecommons.org/Considerations_for_licensors
76
+
77
+ Considerations for the public: By using one of our public
78
+ licenses, a licensor grants the public permission to use the
79
+ licensed material under specified terms and conditions. If
80
+ the licensor's permission is not necessary for any reason--for
81
+ example, because of any applicable exception or limitation to
82
+ copyright--then that use is not regulated by the license. Our
83
+ licenses grant only permissions under copyright and certain
84
+ other rights that a licensor has authority to grant. Use of
85
+ the licensed material may still be restricted for other
86
+ reasons, including because others have copyright or other
87
+ rights in the material. A licensor may make special requests,
88
+ such as asking that all changes be marked or described.
89
+ Although not required by our licenses, you are encouraged to
90
+ respect those requests where reasonable. More_considerations
91
+ for the public:
92
+ wiki.creativecommons.org/Considerations_for_licensees
93
+
94
+ =======================================================================
95
+
96
+ Creative Commons Attribution 4.0 International Public License
97
+
98
+ By exercising the Licensed Rights (defined below), You accept and agree
99
+ to be bound by the terms and conditions of this Creative Commons
100
+ Attribution 4.0 International Public License ("Public License"). To the
101
+ extent this Public License may be interpreted as a contract, You are
102
+ granted the Licensed Rights in consideration of Your acceptance of
103
+ these terms and conditions, and the Licensor grants You such rights in
104
+ consideration of benefits the Licensor receives from making the
105
+ Licensed Material available under these terms and conditions.
106
+
107
+
108
+ Section 1 -- Definitions.
109
+
110
+ a. Adapted Material means material subject to Copyright and Similar
111
+ Rights that is derived from or based upon the Licensed Material
112
+ and in which the Licensed Material is translated, altered,
113
+ arranged, transformed, or otherwise modified in a manner requiring
114
+ permission under the Copyright and Similar Rights held by the
115
+ Licensor. For purposes of this Public License, where the Licensed
116
+ Material is a musical work, performance, or sound recording,
117
+ Adapted Material is always produced where the Licensed Material is
118
+ synched in timed relation with a moving image.
119
+
120
+ b. Adapter's License means the license You apply to Your Copyright
121
+ and Similar Rights in Your contributions to Adapted Material in
122
+ accordance with the terms and conditions of this Public License.
123
+
124
+ c. Copyright and Similar Rights means copyright and/or similar rights
125
+ closely related to copyright including, without limitation,
126
+ performance, broadcast, sound recording, and Sui Generis Database
127
+ Rights, without regard to how the rights are labeled or
128
+ categorized. For purposes of this Public License, the rights
129
+ specified in Section 2(b)(1)-(2) are not Copyright and Similar
130
+ Rights.
131
+
132
+ d. Effective Technological Measures means those measures that, in the
133
+ absence of proper authority, may not be circumvented under laws
134
+ fulfilling obligations under Article 11 of the WIPO Copyright
135
+ Treaty adopted on December 20, 1996, and/or similar international
136
+ agreements.
137
+
138
+ e. Exceptions and Limitations means fair use, fair dealing, and/or
139
+ any other exception or limitation to Copyright and Similar Rights
140
+ that applies to Your use of the Licensed Material.
141
+
142
+ f. Licensed Material means the artistic or literary work, database,
143
+ or other material to which the Licensor applied this Public
144
+ License.
145
+
146
+ g. Licensed Rights means the rights granted to You subject to the
147
+ terms and conditions of this Public License, which are limited to
148
+ all Copyright and Similar Rights that apply to Your use of the
149
+ Licensed Material and that the Licensor has authority to license.
150
+
151
+ h. Licensor means the individual(s) or entity(ies) granting rights
152
+ under this Public License.
153
+
154
+ i. Share means to provide material to the public by any means or
155
+ process that requires permission under the Licensed Rights, such
156
+ as reproduction, public display, public performance, distribution,
157
+ dissemination, communication, or importation, and to make material
158
+ available to the public including in ways that members of the
159
+ public may access the material from a place and at a time
160
+ individually chosen by them.
161
+
162
+ j. Sui Generis Database Rights means rights other than copyright
163
+ resulting from Directive 96/9/EC of the European Parliament and of
164
+ the Council of 11 March 1996 on the legal protection of databases,
165
+ as amended and/or succeeded, as well as other essentially
166
+ equivalent rights anywhere in the world.
167
+
168
+ k. You means the individual or entity exercising the Licensed Rights
169
+ under this Public License. Your has a corresponding meaning.
170
+
171
+
172
+ Section 2 -- Scope.
173
+
174
+ a. License grant.
175
+
176
+ 1. Subject to the terms and conditions of this Public License,
177
+ the Licensor hereby grants You a worldwide, royalty-free,
178
+ non-sublicensable, non-exclusive, irrevocable license to
179
+ exercise the Licensed Rights in the Licensed Material to:
180
+
181
+ a. reproduce and Share the Licensed Material, in whole or
182
+ in part; and
183
+
184
+ b. produce, reproduce, and Share Adapted Material.
185
+
186
+ 2. Exceptions and Limitations. For the avoidance of doubt, where
187
+ Exceptions and Limitations apply to Your use, this Public
188
+ License does not apply, and You do not need to comply with
189
+ its terms and conditions.
190
+
191
+ 3. Term. The term of this Public License is specified in Section
192
+ 6(a).
193
+
194
+ 4. Media and formats; technical modifications allowed. The
195
+ Licensor authorizes You to exercise the Licensed Rights in
196
+ all media and formats whether now known or hereafter created,
197
+ and to make technical modifications necessary to do so. The
198
+ Licensor waives and/or agrees not to assert any right or
199
+ authority to forbid You from making technical modifications
200
+ necessary to exercise the Licensed Rights, including
201
+ technical modifications necessary to circumvent Effective
202
+ Technological Measures. For purposes of this Public License,
203
+ simply making modifications authorized by this Section 2(a)
204
+ (4) never produces Adapted Material.
205
+
206
+ 5. Downstream recipients.
207
+
208
+ a. Offer from the Licensor -- Licensed Material. Every
209
+ recipient of the Licensed Material automatically
210
+ receives an offer from the Licensor to exercise the
211
+ Licensed Rights under the terms and conditions of this
212
+ Public License.
213
+
214
+ b. No downstream restrictions. You may not offer or impose
215
+ any additional or different terms or conditions on, or
216
+ apply any Effective Technological Measures to, the
217
+ Licensed Material if doing so restricts exercise of the
218
+ Licensed Rights by any recipient of the Licensed
219
+ Material.
220
+
221
+ 6. No endorsement. Nothing in this Public License constitutes or
222
+ may be construed as permission to assert or imply that You
223
+ are, or that Your use of the Licensed Material is, connected
224
+ with, or sponsored, endorsed, or granted official status by,
225
+ the Licensor or others designated to receive attribution as
226
+ provided in Section 3(a)(1)(A)(i).
227
+
228
+ b. Other rights.
229
+
230
+ 1. Moral rights, such as the right of integrity, are not
231
+ licensed under this Public License, nor are publicity,
232
+ privacy, and/or other similar personality rights; however, to
233
+ the extent possible, the Licensor waives and/or agrees not to
234
+ assert any such rights held by the Licensor to the limited
235
+ extent necessary to allow You to exercise the Licensed
236
+ Rights, but not otherwise.
237
+
238
+ 2. Patent and trademark rights are not licensed under this
239
+ Public License.
240
+
241
+ 3. To the extent possible, the Licensor waives any right to
242
+ collect royalties from You for the exercise of the Licensed
243
+ Rights, whether directly or through a collecting society
244
+ under any voluntary or waivable statutory or compulsory
245
+ licensing scheme. In all other cases the Licensor expressly
246
+ reserves any right to collect such royalties.
247
+
248
+
249
+ Section 3 -- License Conditions.
250
+
251
+ Your exercise of the Licensed Rights is expressly made subject to the
252
+ following conditions.
253
+
254
+ a. Attribution.
255
+
256
+ 1. If You Share the Licensed Material (including in modified
257
+ form), You must:
258
+
259
+ a. retain the following if it is supplied by the Licensor
260
+ with the Licensed Material:
261
+
262
+ i. identification of the creator(s) of the Licensed
263
+ Material and any others designated to receive
264
+ attribution, in any reasonable manner requested by
265
+ the Licensor (including by pseudonym if
266
+ designated);
267
+
268
+ ii. a copyright notice;
269
+
270
+ iii. a notice that refers to this Public License;
271
+
272
+ iv. a notice that refers to the disclaimer of
273
+ warranties;
274
+
275
+ v. a URI or hyperlink to the Licensed Material to the
276
+ extent reasonably practicable;
277
+
278
+ b. indicate if You modified the Licensed Material and
279
+ retain an indication of any previous modifications; and
280
+
281
+ c. indicate the Licensed Material is licensed under this
282
+ Public License, and include the text of, or the URI or
283
+ hyperlink to, this Public License.
284
+
285
+ 2. You may satisfy the conditions in Section 3(a)(1) in any
286
+ reasonable manner based on the medium, means, and context in
287
+ which You Share the Licensed Material. For example, it may be
288
+ reasonable to satisfy the conditions by providing a URI or
289
+ hyperlink to a resource that includes the required
290
+ information.
291
+
292
+ 3. If requested by the Licensor, You must remove any of the
293
+ information required by Section 3(a)(1)(A) to the extent
294
+ reasonably practicable.
295
+
296
+ 4. If You Share Adapted Material You produce, the Adapter's
297
+ License You apply must not prevent recipients of the Adapted
298
+ Material from complying with this Public License.
299
+
300
+
301
+ Section 4 -- Sui Generis Database Rights.
302
+
303
+ Where the Licensed Rights include Sui Generis Database Rights that
304
+ apply to Your use of the Licensed Material:
305
+
306
+ a. for the avoidance of doubt, Section 2(a)(1) grants You the right
307
+ to extract, reuse, reproduce, and Share all or a substantial
308
+ portion of the contents of the database;
309
+
310
+ b. if You include all or a substantial portion of the database
311
+ contents in a database in which You have Sui Generis Database
312
+ Rights, then the database in which You have Sui Generis Database
313
+ Rights (but not its individual contents) is Adapted Material; and
314
+
315
+ c. You must comply with the conditions in Section 3(a) if You Share
316
+ all or a substantial portion of the contents of the database.
317
+
318
+ For the avoidance of doubt, this Section 4 supplements and does not
319
+ replace Your obligations under this Public License where the Licensed
320
+ Rights include other Copyright and Similar Rights.
321
+
322
+
323
+ Section 5 -- Disclaimer of Warranties and Limitation of Liability.
324
+
325
+ a. UNLESS OTHERWISE SEPARATELY UNDERTAKEN BY THE LICENSOR, TO THE
326
+ EXTENT POSSIBLE, THE LICENSOR OFFERS THE LICENSED MATERIAL AS-IS
327
+ AND AS-AVAILABLE, AND MAKES NO REPRESENTATIONS OR WARRANTIES OF
328
+ ANY KIND CONCERNING THE LICENSED MATERIAL, WHETHER EXPRESS,
329
+ IMPLIED, STATUTORY, OR OTHER. THIS INCLUDES, WITHOUT LIMITATION,
330
+ WARRANTIES OF TITLE, MERCHANTABILITY, FITNESS FOR A PARTICULAR
331
+ PURPOSE, NON-INFRINGEMENT, ABSENCE OF LATENT OR OTHER DEFECTS,
332
+ ACCURACY, OR THE PRESENCE OR ABSENCE OF ERRORS, WHETHER OR NOT
333
+ KNOWN OR DISCOVERABLE. WHERE DISCLAIMERS OF WARRANTIES ARE NOT
334
+ ALLOWED IN FULL OR IN PART, THIS DISCLAIMER MAY NOT APPLY TO YOU.
335
+
336
+ b. TO THE EXTENT POSSIBLE, IN NO EVENT WILL THE LICENSOR BE LIABLE
337
+ TO YOU ON ANY LEGAL THEORY (INCLUDING, WITHOUT LIMITATION,
338
+ NEGLIGENCE) OR OTHERWISE FOR ANY DIRECT, SPECIAL, INDIRECT,
339
+ INCIDENTAL, CONSEQUENTIAL, PUNITIVE, EXEMPLARY, OR OTHER LOSSES,
340
+ COSTS, EXPENSES, OR DAMAGES ARISING OUT OF THIS PUBLIC LICENSE OR
341
+ USE OF THE LICENSED MATERIAL, EVEN IF THE LICENSOR HAS BEEN
342
+ ADVISED OF THE POSSIBILITY OF SUCH LOSSES, COSTS, EXPENSES, OR
343
+ DAMAGES. WHERE A LIMITATION OF LIABILITY IS NOT ALLOWED IN FULL OR
344
+ IN PART, THIS LIMITATION MAY NOT APPLY TO YOU.
345
+
346
+ c. The disclaimer of warranties and limitation of liability provided
347
+ above shall be interpreted in a manner that, to the extent
348
+ possible, most closely approximates an absolute disclaimer and
349
+ waiver of all liability.
350
+
351
+
352
+ Section 6 -- Term and Termination.
353
+
354
+ a. This Public License applies for the term of the Copyright and
355
+ Similar Rights licensed here. However, if You fail to comply with
356
+ this Public License, then Your rights under this Public License
357
+ terminate automatically.
358
+
359
+ b. Where Your right to use the Licensed Material has terminated under
360
+ Section 6(a), it reinstates:
361
+
362
+ 1. automatically as of the date the violation is cured, provided
363
+ it is cured within 30 days of Your discovery of the
364
+ violation; or
365
+
366
+ 2. upon express reinstatement by the Licensor.
367
+
368
+ For the avoidance of doubt, this Section 6(b) does not affect any
369
+ right the Licensor may have to seek remedies for Your violations
370
+ of this Public License.
371
+
372
+ c. For the avoidance of doubt, the Licensor may also offer the
373
+ Licensed Material under separate terms or conditions or stop
374
+ distributing the Licensed Material at any time; however, doing so
375
+ will not terminate this Public License.
376
+
377
+ d. Sections 1, 5, 6, 7, and 8 survive termination of this Public
378
+ License.
379
+
380
+
381
+ Section 7 -- Other Terms and Conditions.
382
+
383
+ a. The Licensor shall not be bound by any additional or different
384
+ terms or conditions communicated by You unless expressly agreed.
385
+
386
+ b. Any arrangements, understandings, or agreements regarding the
387
+ Licensed Material not stated herein are separate from and
388
+ independent of the terms and conditions of this Public License.
389
+
390
+
391
+ Section 8 -- Interpretation.
392
+
393
+ a. For the avoidance of doubt, this Public License does not, and
394
+ shall not be interpreted to, reduce, limit, restrict, or impose
395
+ conditions on any use of the Licensed Material that could lawfully
396
+ be made without permission under this Public License.
397
+
398
+ b. To the extent possible, if any provision of this Public License is
399
+ deemed unenforceable, it shall be automatically reformed to the
400
+ minimum extent necessary to make it enforceable. If the provision
401
+ cannot be reformed, it shall be severed from this Public License
402
+ without affecting the enforceability of the remaining terms and
403
+ conditions.
404
+
405
+ c. No term or condition of this Public License will be waived and no
406
+ failure to comply consented to unless expressly agreed to by the
407
+ Licensor.
408
+
409
+ d. Nothing in this Public License constitutes or may be interpreted
410
+ as a limitation upon, or waiver of, any privileges and immunities
411
+ that apply to the Licensor or You, including from the legal
412
+ processes of any jurisdiction or authority.
413
+
414
+
415
+ =======================================================================
416
+
417
+ Creative Commons is not a party to its public licenses.
418
+ Notwithstanding, Creative Commons may elect to apply one of its public
419
+ licenses to material it publishes and in those instances will be
420
+ considered the "Licensor." Except for the limited purpose of indicating
421
+ that material is shared under a Creative Commons public license or as
422
+ otherwise permitted by the Creative Commons policies published at
423
+ creativecommons.org/policies, Creative Commons does not authorize the
424
+ use of the trademark "Creative Commons" or any other trademark or logo
425
+ of Creative Commons without its prior written consent including,
426
+ without limitation, in connection with any unauthorized modifications
427
+ to any of its public licenses or any other arrangements,
428
+ understandings, or agreements concerning use of licensed material. For
429
+ the avoidance of doubt, this paragraph does not form part of the public
430
+ licenses.
431
+
432
+ Creative Commons may be contacted at creativecommons.org.
src/deskew_HT/LICENSE_DEPENDENCIES ADDED
@@ -0,0 +1,677 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # Pillow
2
+
3
+ The Python Imaging Library (PIL) is
4
+
5
+ Copyright © 1997-2011 by Secret Labs AB
6
+ Copyright © 1995-2011 by Fredrik Lundh
7
+
8
+ Pillow is the friendly PIL fork. It is
9
+
10
+ Copyright © 2010-2022 by Alex Clark and contributors
11
+
12
+ Like PIL, Pillow is licensed under the open source HPND License:
13
+
14
+ By obtaining, using, and/or copying this software and/or its associated
15
+ documentation, you agree that you have read, understood, and will comply
16
+ with the following terms and conditions:
17
+
18
+ Permission to use, copy, modify, and distribute this software and its
19
+ associated documentation for any purpose and without fee is hereby granted,
20
+ provided that the above copyright notice appears in all copies, and that
21
+ both that copyright notice and this permission notice appear in supporting
22
+ documentation, and that the name of Secret Labs AB or the author not be
23
+ used in advertising or publicity pertaining to distribution of the software
24
+ without specific, written prior permission.
25
+
26
+ SECRET LABS AB AND THE AUTHOR DISCLAIMS ALL WARRANTIES WITH REGARD TO THIS
27
+ SOFTWARE, INCLUDING ALL IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS.
28
+ IN NO EVENT SHALL SECRET LABS AB OR THE AUTHOR BE LIABLE FOR ANY SPECIAL,
29
+ INDIRECT OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES WHATSOEVER RESULTING FROM
30
+ LOSS OF USE, DATA OR PROFITS, WHETHER IN AN ACTION OF CONTRACT, NEGLIGENCE
31
+ OR OTHER TORTIOUS ACTION, ARISING OUT OF OR IN CONNECTION WITH THE USE OR
32
+ PERFORMANCE OF THIS SOFTWARE.
33
+
34
+
35
+ # PyWavelets
36
+
37
+ Copyright (c) 2006-2012 Filip Wasilewski <http://en.ig.ma/>
38
+ Copyright (c) 2012-2020 The PyWavelets Developers <https://github.com/PyWavelets/pywt>
39
+
40
+ Permission is hereby granted, free of charge, to any person obtaining a copy of
41
+ this software and associated documentation files (the "Software"), to deal in
42
+ the Software without restriction, including without limitation the rights to
43
+ use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies
44
+ of the Software, and to permit persons to whom the Software is furnished to do
45
+ so, subject to the following conditions:
46
+
47
+ The above copyright notice and this permission notice shall be included in all
48
+ copies or substantial portions of the Software.
49
+
50
+ THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
51
+ IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
52
+ FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
53
+ AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
54
+ LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
55
+ OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
56
+ SOFTWARE.
57
+
58
+
59
+ # cycler
60
+
61
+ Copyright (c) 2015, matplotlib project
62
+ All rights reserved.
63
+
64
+ Redistribution and use in source and binary forms, with or without
65
+ modification, are permitted provided that the following conditions are met:
66
+
67
+ * Redistributions of source code must retain the above copyright notice, this
68
+ list of conditions and the following disclaimer.
69
+
70
+ * Redistributions in binary form must reproduce the above copyright notice,
71
+ this list of conditions and the following disclaimer in the documentation
72
+ and/or other materials provided with the distribution.
73
+
74
+ * Neither the name of the matplotlib project nor the names of its
75
+ contributors may be used to endorse or promote products derived from
76
+ this software without specific prior written permission.
77
+
78
+ THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
79
+ AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
80
+ IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
81
+ DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE
82
+ FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
83
+ DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
84
+ SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
85
+ CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
86
+ OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
87
+ OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
88
+
89
+
90
+ # decorator
91
+
92
+ Copyright (c) 2005-2018, Michele Simionato
93
+ All rights reserved.
94
+
95
+ Redistribution and use in source and binary forms, with or without
96
+ modification, are permitted provided that the following conditions are
97
+ met:
98
+
99
+ Redistributions of source code must retain the above copyright
100
+ notice, this list of conditions and the following disclaimer.
101
+ Redistributions in bytecode form must reproduce the above copyright
102
+ notice, this list of conditions and the following disclaimer in
103
+ the documentation and/or other materials provided with the
104
+ distribution.
105
+
106
+ THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
107
+ "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
108
+ LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
109
+ A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
110
+ HOLDERS OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT,
111
+ INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING,
112
+ BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS
113
+ OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
114
+ ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR
115
+ TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE
116
+ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH
117
+ DAMAGE.
118
+
119
+
120
+ # imageio
121
+
122
+ Copyright (c) 2014-2020, imageio developers
123
+ All rights reserved.
124
+
125
+ Redistribution and use in source and binary forms, with or without
126
+ modification, are permitted provided that the following conditions are met:
127
+
128
+ * Redistributions of source code must retain the above copyright notice, this
129
+ list of conditions and the following disclaimer.
130
+
131
+ * Redistributions in binary form must reproduce the above copyright notice,
132
+ this list of conditions and the following disclaimer in the documentation
133
+ and/or other materials provided with the distribution.
134
+
135
+ THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
136
+ AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
137
+ IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
138
+ DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE
139
+ FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
140
+ DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
141
+ SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
142
+ CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
143
+ OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
144
+ OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
145
+
146
+
147
+ # kiwisolver
148
+
149
+ =========================
150
+ The Kiwi licensing terms
151
+ =========================
152
+ Kiwi is licensed under the terms of the Modified BSD License (also known as
153
+ New or Revised BSD), as follows:
154
+
155
+ Copyright (c) 2013, Nucleic Development Team
156
+
157
+ All rights reserved.
158
+
159
+ Redistribution and use in source and binary forms, with or without
160
+ modification, are permitted provided that the following conditions are met:
161
+
162
+ Redistributions of source code must retain the above copyright notice, this
163
+ list of conditions and the following disclaimer.
164
+
165
+ Redistributions in binary form must reproduce the above copyright notice, this
166
+ list of conditions and the following disclaimer in the documentation and/or
167
+ other materials provided with the distribution.
168
+
169
+ Neither the name of the Nucleic Development Team nor the names of its
170
+ contributors may be used to endorse or promote products derived from this
171
+ software without specific prior written permission.
172
+
173
+ THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
174
+ ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
175
+ WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
176
+ DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE
177
+ FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
178
+ DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
179
+ SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
180
+ CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
181
+ OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
182
+ OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
183
+
184
+ About Kiwi
185
+ ----------
186
+ Chris Colbert began the Kiwi project in December 2013 in an effort to
187
+ create a blisteringly fast UI constraint solver. Chris is still the
188
+ project lead.
189
+
190
+ The Nucleic Development Team is the set of all contributors to the Nucleic
191
+ project and its subprojects.
192
+
193
+ The core team that coordinates development on GitHub can be found here:
194
+ http://github.com/nucleic. The current team consists of:
195
+
196
+ * Chris Colbert
197
+
198
+ Our Copyright Policy
199
+ --------------------
200
+ Nucleic uses a shared copyright model. Each contributor maintains copyright
201
+ over their contributions to Nucleic. But, it is important to note that these
202
+ contributions are typically only changes to the repositories. Thus, the Nucleic
203
+ source code, in its entirety is not the copyright of any single person or
204
+ institution. Instead, it is the collective copyright of the entire Nucleic
205
+ Development Team. If individual contributors want to maintain a record of what
206
+ changes/contributions they have specific copyright on, they should indicate
207
+ their copyright in the commit message of the change, when they commit the
208
+ change to one of the Nucleic repositories.
209
+
210
+ With this in mind, the following banner should be used in any source code file
211
+ to indicate the copyright and license terms:
212
+
213
+ #------------------------------------------------------------------------------
214
+ # Copyright (c) 2013, Nucleic Development Team.
215
+ #
216
+ # Distributed under the terms of the Modified BSD License.
217
+ #
218
+ # The full license is in the file LICENSE, distributed with this software.
219
+ #------------------------------------------------------------------------------
220
+
221
+
222
+ # matplotlib
223
+
224
+ License agreement for matplotlib versions 1.3.0 and later
225
+ =========================================================
226
+
227
+ 1. This LICENSE AGREEMENT is between the Matplotlib Development Team
228
+ ("MDT"), and the Individual or Organization ("Licensee") accessing and
229
+ otherwise using matplotlib software in source or binary form and its
230
+ associated documentation.
231
+
232
+ 2. Subject to the terms and conditions of this License Agreement, MDT
233
+ hereby grants Licensee a nonexclusive, royalty-free, world-wide license
234
+ to reproduce, analyze, test, perform and/or display publicly, prepare
235
+ derivative works, distribute, and otherwise use matplotlib
236
+ alone or in any derivative version, provided, however, that MDT's
237
+ License Agreement and MDT's notice of copyright, i.e., "Copyright (c)
238
+ 2012- Matplotlib Development Team; All Rights Reserved" are retained in
239
+ matplotlib alone or in any derivative version prepared by
240
+ Licensee.
241
+
242
+ 3. In the event Licensee prepares a derivative work that is based on or
243
+ incorporates matplotlib or any part thereof, and wants to
244
+ make the derivative work available to others as provided herein, then
245
+ Licensee hereby agrees to include in any such work a brief summary of
246
+ the changes made to matplotlib .
247
+
248
+ 4. MDT is making matplotlib available to Licensee on an "AS
249
+ IS" basis. MDT MAKES NO REPRESENTATIONS OR WARRANTIES, EXPRESS OR
250
+ IMPLIED. BY WAY OF EXAMPLE, BUT NOT LIMITATION, MDT MAKES NO AND
251
+ DISCLAIMS ANY REPRESENTATION OR WARRANTY OF MERCHANTABILITY OR FITNESS
252
+ FOR ANY PARTICULAR PURPOSE OR THAT THE USE OF MATPLOTLIB
253
+ WILL NOT INFRINGE ANY THIRD PARTY RIGHTS.
254
+
255
+ 5. MDT SHALL NOT BE LIABLE TO LICENSEE OR ANY OTHER USERS OF MATPLOTLIB
256
+ FOR ANY INCIDENTAL, SPECIAL, OR CONSEQUENTIAL DAMAGES OR
257
+ LOSS AS A RESULT OF MODIFYING, DISTRIBUTING, OR OTHERWISE USING
258
+ MATPLOTLIB , OR ANY DERIVATIVE THEREOF, EVEN IF ADVISED OF
259
+ THE POSSIBILITY THEREOF.
260
+
261
+ 6. This License Agreement will automatically terminate upon a material
262
+ breach of its terms and conditions.
263
+
264
+ 7. Nothing in this License Agreement shall be deemed to create any
265
+ relationship of agency, partnership, or joint venture between MDT and
266
+ Licensee. This License Agreement does not grant permission to use MDT
267
+ trademarks or trade name in a trademark sense to endorse or promote
268
+ products or services of Licensee, or any third party.
269
+
270
+ 8. By copying, installing or otherwise using matplotlib ,
271
+ Licensee agrees to be bound by the terms and conditions of this License
272
+ Agreement.
273
+
274
+ License agreement for matplotlib versions prior to 1.3.0
275
+ ========================================================
276
+
277
+ 1. This LICENSE AGREEMENT is between John D. Hunter ("JDH"), and the
278
+ Individual or Organization ("Licensee") accessing and otherwise using
279
+ matplotlib software in source or binary form and its associated
280
+ documentation.
281
+
282
+ 2. Subject to the terms and conditions of this License Agreement, JDH
283
+ hereby grants Licensee a nonexclusive, royalty-free, world-wide license
284
+ to reproduce, analyze, test, perform and/or display publicly, prepare
285
+ derivative works, distribute, and otherwise use matplotlib
286
+ alone or in any derivative version, provided, however, that JDH's
287
+ License Agreement and JDH's notice of copyright, i.e., "Copyright (c)
288
+ 2002-2011 John D. Hunter; All Rights Reserved" are retained in
289
+ matplotlib alone or in any derivative version prepared by
290
+ Licensee.
291
+
292
+ 3. In the event Licensee prepares a derivative work that is based on or
293
+ incorporates matplotlib or any part thereof, and wants to
294
+ make the derivative work available to others as provided herein, then
295
+ Licensee hereby agrees to include in any such work a brief summary of
296
+ the changes made to matplotlib.
297
+
298
+ 4. JDH is making matplotlib available to Licensee on an "AS
299
+ IS" basis. JDH MAKES NO REPRESENTATIONS OR WARRANTIES, EXPRESS OR
300
+ IMPLIED. BY WAY OF EXAMPLE, BUT NOT LIMITATION, JDH MAKES NO AND
301
+ DISCLAIMS ANY REPRESENTATION OR WARRANTY OF MERCHANTABILITY OR FITNESS
302
+ FOR ANY PARTICULAR PURPOSE OR THAT THE USE OF MATPLOTLIB
303
+ WILL NOT INFRINGE ANY THIRD PARTY RIGHTS.
304
+
305
+ 5. JDH SHALL NOT BE LIABLE TO LICENSEE OR ANY OTHER USERS OF MATPLOTLIB
306
+ FOR ANY INCIDENTAL, SPECIAL, OR CONSEQUENTIAL DAMAGES OR
307
+ LOSS AS A RESULT OF MODIFYING, DISTRIBUTING, OR OTHERWISE USING
308
+ MATPLOTLIB , OR ANY DERIVATIVE THEREOF, EVEN IF ADVISED OF
309
+ THE POSSIBILITY THEREOF.
310
+
311
+ 6. This License Agreement will automatically terminate upon a material
312
+ breach of its terms and conditions.
313
+
314
+ 7. Nothing in this License Agreement shall be deemed to create any
315
+ relationship of agency, partnership, or joint venture between JDH and
316
+ Licensee. This License Agreement does not grant permission to use JDH
317
+ trademarks or trade name in a trademark sense to endorse or promote
318
+ products or services of Licensee, or any third party.
319
+
320
+ 8. By copying, installing or otherwise using matplotlib,
321
+ Licensee agrees to be bound by the terms and conditions of this License
322
+ Agreement.
323
+
324
+
325
+ # networkx
326
+
327
+ NetworkX is distributed with the 3-clause BSD license.
328
+
329
+ ::
330
+
331
+ Copyright (C) 2004-2022, NetworkX Developers
332
+ Aric Hagberg <[email protected]>
333
+ Dan Schult <[email protected]>
334
+ Pieter Swart <[email protected]>
335
+ All rights reserved.
336
+
337
+ Redistribution and use in source and binary forms, with or without
338
+ modification, are permitted provided that the following conditions are
339
+ met:
340
+
341
+ * Redistributions of source code must retain the above copyright
342
+ notice, this list of conditions and the following disclaimer.
343
+
344
+ * Redistributions in binary form must reproduce the above
345
+ copyright notice, this list of conditions and the following
346
+ disclaimer in the documentation and/or other materials provided
347
+ with the distribution.
348
+
349
+ * Neither the name of the NetworkX Developers nor the names of its
350
+ contributors may be used to endorse or promote products derived
351
+ from this software without specific prior written permission.
352
+
353
+ THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
354
+ "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
355
+ LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
356
+ A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
357
+ OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
358
+ SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
359
+ LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
360
+ DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
361
+ THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
362
+ (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
363
+ OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
364
+
365
+
366
+ # numpy
367
+
368
+ Copyright (c) 2005-2022, NumPy Developers.
369
+ All rights reserved.
370
+
371
+ Redistribution and use in source and binary forms, with or without
372
+ modification, are permitted provided that the following conditions are
373
+ met:
374
+
375
+ * Redistributions of source code must retain the above copyright
376
+ notice, this list of conditions and the following disclaimer.
377
+
378
+ * Redistributions in binary form must reproduce the above
379
+ copyright notice, this list of conditions and the following
380
+ disclaimer in the documentation and/or other materials provided
381
+ with the distribution.
382
+
383
+ * Neither the name of the NumPy Developers nor the names of any
384
+ contributors may be used to endorse or promote products derived
385
+ from this software without specific prior written permission.
386
+
387
+ THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
388
+ "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
389
+ LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
390
+ A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
391
+ OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
392
+ SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
393
+ LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
394
+ DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
395
+ THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
396
+ (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
397
+ OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
398
+
399
+
400
+ # opencv-python
401
+
402
+ MIT License
403
+
404
+ Copyright (c) Olli-Pekka Heinisuo
405
+
406
+ Permission is hereby granted, free of charge, to any person obtaining a copy
407
+ of this software and associated documentation files (the "Software"), to deal
408
+ in the Software without restriction, including without limitation the rights
409
+ to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
410
+ copies of the Software, and to permit persons to whom the Software is
411
+ furnished to do so, subject to the following conditions:
412
+
413
+ The above copyright notice and this permission notice shall be included in all
414
+ copies or substantial portions of the Software.
415
+
416
+ THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
417
+ IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
418
+ FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
419
+ AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
420
+ LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
421
+ OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
422
+ SOFTWARE.
423
+
424
+
425
+ # pyparsing
426
+
427
+ Permission is hereby granted, free of charge, to any person obtaining
428
+ a copy of this software and associated documentation files (the
429
+ "Software"), to deal in the Software without restriction, including
430
+ without limitation the rights to use, copy, modify, merge, publish,
431
+ distribute, sublicense, and/or sell copies of the Software, and to
432
+ permit persons to whom the Software is furnished to do so, subject to
433
+ the following conditions:
434
+
435
+ The above copyright notice and this permission notice shall be
436
+ included in all copies or substantial portions of the Software.
437
+
438
+ THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
439
+ EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
440
+ MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
441
+ IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY
442
+ CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
443
+ TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
444
+ SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
445
+
446
+
447
+ # python-dateutil
448
+
449
+ Copyright 2017- Paul Ganssle <[email protected]>
450
+ Copyright 2017- dateutil contributors (see AUTHORS file)
451
+
452
+ Licensed under the Apache License, Version 2.0 (the "License");
453
+ you may not use this file except in compliance with the License.
454
+ You may obtain a copy of the License at
455
+
456
+ http://www.apache.org/licenses/LICENSE-2.0
457
+
458
+ Unless required by applicable law or agreed to in writing, software
459
+ distributed under the License is distributed on an "AS IS" BASIS,
460
+ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
461
+ See the License for the specific language governing permissions and
462
+ limitations under the License.
463
+
464
+ The above license applies to all contributions after 2017-12-01, as well as
465
+ all contributions that have been re-licensed (see AUTHORS file for the list of
466
+ contributors who have re-licensed their code).
467
+ --------------------------------------------------------------------------------
468
+ dateutil - Extensions to the standard Python datetime module.
469
+
470
+ Copyright (c) 2003-2011 - Gustavo Niemeyer <[email protected]>
471
+ Copyright (c) 2012-2014 - Tomi Pieviläinen <[email protected]>
472
+ Copyright (c) 2014-2016 - Yaron de Leeuw <[email protected]>
473
+ Copyright (c) 2015- - Paul Ganssle <[email protected]>
474
+ Copyright (c) 2015- - dateutil contributors (see AUTHORS file)
475
+
476
+ All rights reserved.
477
+
478
+ Redistribution and use in source and binary forms, with or without
479
+ modification, are permitted provided that the following conditions are met:
480
+
481
+ * Redistributions of source code must retain the above copyright notice,
482
+ this list of conditions and the following disclaimer.
483
+ * Redistributions in binary form must reproduce the above copyright notice,
484
+ this list of conditions and the following disclaimer in the documentation
485
+ and/or other materials provided with the distribution.
486
+ * Neither the name of the copyright holder nor the names of its
487
+ contributors may be used to endorse or promote products derived from
488
+ this software without specific prior written permission.
489
+
490
+ THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
491
+ "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
492
+ LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
493
+ A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR
494
+ CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
495
+ EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
496
+ PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
497
+ PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
498
+ LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
499
+ NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
500
+ SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
501
+
502
+ The above BSD License Applies to all code, even that also covered by Apache 2.0.
503
+
504
+
505
+ # scikit-image
506
+
507
+ Copyright (C) 2019, the scikit-image team
508
+ All rights reserved.
509
+
510
+ Redistribution and use in source and binary forms, with or without
511
+ modification, are permitted provided that the following conditions are
512
+ met:
513
+
514
+ 1. Redistributions of source code must retain the above copyright
515
+ notice, this list of conditions and the following disclaimer.
516
+ 2. Redistributions in binary form must reproduce the above copyright
517
+ notice, this list of conditions and the following disclaimer in
518
+ the documentation and/or other materials provided with the
519
+ distribution.
520
+ 3. Neither the name of skimage nor the names of its contributors may be
521
+ used to endorse or promote products derived from this software without
522
+ specific prior written permission.
523
+
524
+ THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
525
+ IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
526
+ WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
527
+ DISCLAIMED. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT,
528
+ INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
529
+ (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
530
+ SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
531
+ HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT,
532
+ STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING
533
+ IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
534
+ POSSIBILITY OF SUCH DAMAGE.
535
+
536
+ skimage/_shared/version_requirements.py:_check_version
537
+
538
+ Copyright (c) 2013 The IPython Development Team
539
+ All rights reserved.
540
+
541
+ Redistribution and use in source and binary forms, with or without
542
+ modification, are permitted provided that the following conditions are met:
543
+
544
+ * Redistributions of source code must retain the above copyright notice, this
545
+ list of conditions and the following disclaimer.
546
+
547
+ * Redistributions in binary form must reproduce the above copyright notice,
548
+ this list of conditions and the following disclaimer in the documentation
549
+ and/or other materials provided with the distribution.
550
+
551
+ * Neither the name of the copyright holder nor the names of its
552
+ contributors may be used to endorse or promote products derived from
553
+ this software without specific prior written permission.
554
+
555
+ THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
556
+ AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
557
+ IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
558
+ DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE
559
+ FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
560
+ DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
561
+ SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
562
+ CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
563
+ OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
564
+ OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
565
+
566
+ skimage/_shared/version_requirements.py:is_installed:
567
+
568
+ Original Copyright (C) 2009-2011 Pierre Raybaut
569
+
570
+ Permission is hereby granted, free of charge, to any person obtaining
571
+ a copy of this software and associated documentation files (the
572
+ "Software"), to deal in the Software without restriction, including
573
+ without limitation the rights to use, copy, modify, merge, publish,
574
+ distribute, sublicense, and/or sell copies of the Software, and to
575
+ permit persons to whom the Software is furnished to do so, subject to
576
+ the following conditions:
577
+
578
+ The above copyright notice and this permission notice shall be
579
+ included in all copies or substantial portions of the Software.
580
+
581
+ THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
582
+ EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
583
+ MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
584
+ NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE
585
+ LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
586
+ OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
587
+ WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
588
+
589
+
590
+ # scipy
591
+
592
+ Copyright (c) 2001-2002 Enthought, Inc. 2003-2022, SciPy Developers.
593
+ All rights reserved.
594
+
595
+ Redistribution and use in source and binary forms, with or without
596
+ modification, are permitted provided that the following conditions
597
+ are met:
598
+
599
+ 1. Redistributions of source code must retain the above copyright
600
+ notice, this list of conditions and the following disclaimer.
601
+
602
+ 2. Redistributions in binary form must reproduce the above
603
+ copyright notice, this list of conditions and the following
604
+ disclaimer in the documentation and/or other materials provided
605
+ with the distribution.
606
+
607
+ 3. Neither the name of the copyright holder nor the names of its
608
+ contributors may be used to endorse or promote products derived
609
+ from this software without specific prior written permission.
610
+
611
+ THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
612
+ "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
613
+ LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
614
+ A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
615
+ OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
616
+ SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
617
+ LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
618
+ DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
619
+ THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
620
+ (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
621
+ OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
622
+
623
+
624
+ # six
625
+
626
+ Copyright (c) 2010-2020 Benjamin Peterson
627
+
628
+ Permission is hereby granted, free of charge, to any person obtaining a copy of
629
+ this software and associated documentation files (the "Software"), to deal in
630
+ the Software without restriction, including without limitation the rights to
631
+ use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of
632
+ the Software, and to permit persons to whom the Software is furnished to do so,
633
+ subject to the following conditions:
634
+
635
+ The above copyright notice and this permission notice shall be included in all
636
+ copies or substantial portions of the Software.
637
+
638
+ THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
639
+ IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS
640
+ FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR
641
+ COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER
642
+ IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
643
+ CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
644
+
645
+
646
+ # tiffile
647
+
648
+ BSD 3-Clause License
649
+
650
+ Copyright (c) 2008-2022, Christoph Gohlke
651
+ All rights reserved.
652
+
653
+ Redistribution and use in source and binary forms, with or without
654
+ modification, are permitted provided that the following conditions are met:
655
+
656
+ 1. Redistributions of source code must retain the above copyright notice,
657
+ this list of conditions and the following disclaimer.
658
+
659
+ 2. Redistributions in binary form must reproduce the above copyright notice,
660
+ this list of conditions and the following disclaimer in the documentation
661
+ and/or other materials provided with the distribution.
662
+
663
+ 3. Neither the name of the copyright holder nor the names of its
664
+ contributors may be used to endorse or promote products derived from
665
+ this software without specific prior written permission.
666
+
667
+ THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
668
+ AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
669
+ IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
670
+ ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE
671
+ LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
672
+ CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
673
+ SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
674
+ INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
675
+ CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
676
+ ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
677
+ POSSIBILITY OF SUCH DAMAGE.
src/deskew_HT/README.md ADDED
@@ -0,0 +1,75 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # NDLOCR用資料画像の傾き補正モジュール
2
+
3
+ 画像の傾きを補正するモジュールのリポジトリです。
4
+
5
+ 本プログラムは、国立国会図書館が株式会社モルフォAIソリューションズに委託して作成したものです。
6
+
7
+ 本プログラム内の[alyn3](alyn3)は以下のリポジトリのコードを参照し、python3化・高速化等を行い作成しました。
8
+
9
+ [kakul/Alyn/alyn](https://github.com/kakul/Alyn)
10
+
11
+ 本プログラムの新規開発部分は、国立国会図書館がCC BY 4.0ライセンスで公開するものです。詳細については
12
+ [LICENSE](./LICENSE
13
+ )をご覧ください。
14
+
15
+ # 概要
16
+
17
+ 入力画像の直線要素を検出することで画像の傾きを推定し、傾きを補正した画像を出力する。
18
+
19
+ 出力画像は元画像の領域が欠損しないように拡大して保存する仕様となっており、
20
+ この際に元画像領域外の部分は指定した濃さのグレー(デフォルトは白)で塗りつぶされる。
21
+
22
+ 推定した傾きの数値情報は、オプション(後述)を指定することでテキストファイルとして出力できる。
23
+
24
+
25
+ # 使い方
26
+
27
+ 指定パスの入力画像または指定ディレクトリ内の画像の傾きを推定し補正する。
28
+
29
+ ※補正角度は±45度以内に限る。
30
+
31
+ ```
32
+ python3 run_deskew.py INPUT [-o OUTPUT] [-s SKEW_MAX] [-a ANGLE_ACC] [-m METHOD]
33
+ ```
34
+
35
+ positional arguments:
36
+ ```
37
+ input 入力画像のパス、または入力画像を格納したディレクトリのパス
38
+ ```
39
+
40
+ optional arguments:
41
+ ```
42
+ -h, --help ヘルプメッセージを表示して終了
43
+ -o OUT, --out OUT 出力ファイルのパス(INPUTが画像ファイルの時、default: out.jpg)または
44
+ 出力ディレクトリのパス(INPUTがディレクトリの時、default: out)
45
+ -l LOG, --log LOG 推定した傾きを保存するテキストファイルのパス。指定なしの場合出力されない
46
+ 処理画像一枚ごとに次の形式で指定ファイルの最終行に追加する。
47
+ output format:
48
+ Image_file_path <tab> Estimated_skew_angle[deg]
49
+ -s SKEW_MAX, --skew_max SKEW_MAX
50
+ 推定する傾きの最大角度[deg] default: 4.0[deg]
51
+ 0より大きい45以下の値を指定する。大きくするほど処理時間は増加
52
+ -a ANGLE_ACC, --angle_acc ANGLE_ACC
53
+ 傾きの探索を何度単位で行うか。default: 0.5[deg]
54
+ 0より大きいSKEW_MAX以下の値を指定する。小さくするほど処理時間は増加。
55
+ -rw ROI_WIDTH, --roi_width ROI_WIDTH
56
+ 直線検出の対象とする関心領域の画像全体に対する水平方向の割合
57
+ 0.0より大きい1.0以下の数 default: 1.0(水平方向全体)
58
+ -rh ROI_HEIGHT, --roi_height ROI_HEIGHT
59
+ 直線検出の対象とする関心領域の画像全体に対する鉛直方向の割合
60
+ 0.0より大きい1.0以下の数 default: 1.0(鉛直方向全体)
61
+ -m METHOD, --method METHOD
62
+ 画像回転時の補完手法。以下の整数値で指定する。
63
+ 0: Nearest-neighbor 1: Bi-linear(default)
64
+ 2: Bi-quadratic 3: Bi-cubic
65
+ 4: Bi-quartic 5: Bi-quintic
66
+ -g GRAY, --gray GRAY 出力画像において、元画像領域の外側を補完するグレーの濃さ
67
+ 0(黒) 以上 1.0(白)以下で指定する。default: 1.0(白)
68
+ -q QUALITY, --quality QUALITY
69
+ Jpeg画像出力時の画質。
70
+ 1が最低画質で最小ファイルサイズ、100が最高画質で最大ファイルサイズ。
71
+ [1, 100], default: 100
72
+ --short SHORT 出力画像の短辺の長さ。アスペクト比は維持したままリサイズする。
73
+ 指定しなかった場合オリジナルサイズで出力される。
74
+ -v, --version プログラムのバージョンを表示して終了
75
+ ```
src/deskew_HT/alyn3/deskew.py ADDED
@@ -0,0 +1,163 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ """ Deskews file after getting skew angle """
2
+ """
3
+ This code is based on the following file:
4
+ https://github.com/kakul/Alyn/blob/master/alyn/deskew.py
5
+ """
6
+ import optparse
7
+ import numpy as np
8
+ import os
9
+
10
+ from alyn3.skew_detect import SkewDetect
11
+ import cv2
12
+
13
+
14
+ class Deskew:
15
+
16
+ def __init__(self, input_file, output_file, r_angle=0,
17
+ skew_max=4.0, acc_deg=0.1, method=1,
18
+ roi_w=1.0, roi_h=1.0,
19
+ gray=1.0, quality=100, short=None):
20
+ self.input_file = input_file
21
+ self.output_file = output_file
22
+ self.r_angle = r_angle
23
+ self.method = method
24
+ self.gray = gray
25
+ self.quality = quality
26
+ self.short = short
27
+ self.skew_obj = SkewDetect(self.input_file,
28
+ skew_max=skew_max, acc_deg=acc_deg,
29
+ roi_w=roi_w, roi_h=roi_h)
30
+
31
+ def deskew(self):
32
+ print('input: '+self.input_file)
33
+
34
+ res = self.skew_obj.process_single_file()
35
+ angle = res['Estimated Angle']
36
+ rot_angle = angle + self.r_angle
37
+
38
+ img = cv2.imread(self.input_file, cv2.IMREAD_COLOR)
39
+ g = self.gray * 255
40
+ rotated = self.rotate_expand(img, rot_angle, g)
41
+
42
+ if self.short:
43
+ h = rotated.shape[0]
44
+ w = rotated.shape[1]
45
+ print('origin w,h: {}, {}'.format(w, h))
46
+ if w < h:
47
+ h = int(h*self.short/w+0.5)
48
+ w = self.short
49
+ else:
50
+ w = int(w*self.short/h+0.5)
51
+ h = self.short
52
+ print('resized w,h: {}, {}'.format(w, h))
53
+ rotated = cv2.resize(rotated, (w, h))
54
+
55
+ if self.output_file:
56
+ self.save_image(rotated)
57
+
58
+ return res
59
+
60
+ def deskew_on_memory(self, input_data):
61
+ res = self.skew_obj.determine_skew_on_memory(input_data)
62
+ angle = res['Estimated Angle']
63
+ rot_angle = angle + self.r_angle
64
+
65
+ img = input_data
66
+ g = self.gray * 255
67
+ rotated = self.rotate_expand(img, rot_angle, g)
68
+
69
+ if self.short:
70
+ h = rotated.shape[0]
71
+ w = rotated.shape[1]
72
+ print('origin w,h: {}, {}'.format(w, h))
73
+ if w < h:
74
+ h = int(h*self.short/w+0.5)
75
+ w = self.short
76
+ else:
77
+ w = int(w*self.short/h+0.5)
78
+ h = self.short
79
+ print('resized w,h: {}, {}'.format(w, h))
80
+ rotated = cv2.resize(rotated, (w, h))
81
+
82
+ return rotated
83
+
84
+ def save_image(self, img):
85
+ path = self.skew_obj.check_path(self.output_file)
86
+ if os.path.splitext(path)[1] in ['.jpg', '.JPG', '.jpeg', '.JPEG']:
87
+ cv2.imwrite(path, img, [cv2.IMWRITE_JPEG_QUALITY, 100])
88
+ else:
89
+ cv2.imwrite(path, img)
90
+
91
+ def rotate_expand(self, img, angle=0, g=255):
92
+ h = img.shape[0]
93
+ w = img.shape[1]
94
+ angle_rad = angle/180.0*np.pi
95
+ w_rot = int(np.round(h*np.absolute(np.sin(angle_rad)) +
96
+ w*np.absolute(np.cos(angle_rad))))
97
+ h_rot = int(np.round(h*np.absolute(np.cos(angle_rad)) +
98
+ w*np.absolute(np.sin(angle_rad))))
99
+ size_rot = (w_rot, h_rot)
100
+ mat = cv2.getRotationMatrix2D((w/2, h/2), angle, 1.0)
101
+ mat[0][2] = mat[0][2] - w/2 + w_rot/2
102
+ mat[1][2] = mat[1][2] - h/2 + h_rot/2
103
+ rotated = cv2.warpAffine(img, mat, size_rot, borderValue=(g, g, g))
104
+
105
+ return rotated
106
+
107
+ def run(self):
108
+ if self.input_file:
109
+ return self.deskew()
110
+
111
+
112
+ def optparse_args():
113
+ parser = optparse.OptionParser()
114
+
115
+ parser.add_option(
116
+ '-i',
117
+ '--input',
118
+ default=None,
119
+ dest='input_file',
120
+ help='Input file name')
121
+ parser.add_option(
122
+ '-o', '--output',
123
+ default=None,
124
+ dest='output_file',
125
+ help='Output file name')
126
+ parser.add_option(
127
+ '-r', '--rotate',
128
+ default=0,
129
+ dest='r_angle',
130
+ help='Rotate the image to desired axis',
131
+ type=int)
132
+ parser.add_option(
133
+ '-g', '--gray',
134
+ default=1.0,
135
+ dest='gray',
136
+ help='Gray level outside the input image boundaries.\n'
137
+ 'between 0.0(black) and 1.0(white)\n'
138
+ '[0.0, 1.0], default: 1.0',
139
+ type=float)
140
+ parser.add_option(
141
+ '-q', '--quality',
142
+ default=100,
143
+ dest='quality',
144
+ help='output jpeg image quality. i\n'
145
+ '1 is worst quality and smallest file size,\n'
146
+ 'and 100 is best quality and largest file size.\n'
147
+ '[1, 100], default: 100',
148
+ type=int)
149
+
150
+ return parser.parse_args()
151
+
152
+
153
+ if __name__ == '__main__':
154
+ options, args = optparse_args()
155
+ deskew_obj = Deskew(
156
+ options.input_file,
157
+ options.display_image,
158
+ options.output_file,
159
+ options.r_angle,
160
+ options.gray,
161
+ options.quality)
162
+
163
+ deskew_obj.run()
src/deskew_HT/alyn3/skew_detect.py ADDED
@@ -0,0 +1,388 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ """ Calculates skew angle """
2
+ """
3
+ This code is based on the following file:
4
+ https://github.com/kakul/Alyn/blob/master/alyn/skew_detect.py
5
+ """
6
+ import os
7
+ import optparse
8
+
9
+ import numpy as np
10
+ # import matplotlib.pyplot as plt
11
+ from skimage import io
12
+ from skimage.feature import canny
13
+ from skimage.transform import hough_line, hough_line_peaks
14
+ import cv2
15
+
16
+
17
+ class SkewDetect:
18
+
19
+ piby4 = np.pi / 4
20
+
21
+ def __init__(
22
+ self,
23
+ input_file=None,
24
+ output_file=None,
25
+ sigma=0.50,
26
+ display_output=None,
27
+ num_peaks=20,
28
+ skew_max=4.0,
29
+ acc_deg=0.5,
30
+ roi_w=1.0,
31
+ roi_h=1.0,
32
+ ):
33
+
34
+ self.sigma = sigma
35
+ self.input_file = input_file
36
+ self.output_file = output_file
37
+ self.display_output = display_output
38
+ self.num_peaks = num_peaks
39
+ self.skew_max = skew_max
40
+ self.acc_deg = acc_deg
41
+ self.roi_w = roi_w
42
+ self.roi_h = roi_h
43
+
44
+ def write_to_file(self, wfile, data):
45
+
46
+ for d in data:
47
+ wfile.write(d + ': ' + str(data[d]) + '\n')
48
+ wfile.write('\n')
49
+
50
+ def get_max_freq_elem(self, arr):
51
+
52
+ max_arr = []
53
+ freqs = {}
54
+ for i in arr:
55
+ if i in freqs:
56
+ freqs[i] += 1
57
+ else:
58
+ freqs[i] = 1
59
+
60
+ sorted_keys = sorted(freqs, key=freqs.get, reverse=True)
61
+ max_freq = freqs[sorted_keys[0]]
62
+
63
+ for k in sorted_keys:
64
+ if freqs[k] == max_freq:
65
+ max_arr.append(k)
66
+
67
+ return max_arr
68
+
69
+ def compare_sum(self, value):
70
+ if value >= 44 and value <= 46:
71
+ return True
72
+ else:
73
+ return False
74
+
75
+ def display(self, data):
76
+
77
+ for i in data:
78
+ print(str(i) + ": " + str(data[i]))
79
+
80
+ def calculate_deviation(self, angle):
81
+
82
+ angle_in_degrees = np.abs(angle)
83
+ deviation = np.abs(SkewDetect.piby4 - angle_in_degrees)
84
+
85
+ return deviation
86
+
87
+ def run(self):
88
+
89
+ if self.display_output:
90
+ if self.display_output.lower() == 'yes':
91
+ self.display_output = True
92
+ else:
93
+ self.display_output = False
94
+
95
+ if self.input_file is None:
96
+ print("Invalid input, nothing to process.")
97
+ else:
98
+ self.process_single_file()
99
+
100
+ def check_path(self, path):
101
+
102
+ if os.path.isabs(path):
103
+ full_path = path
104
+ else:
105
+ full_path = os.getcwd() + '/' + str(path)
106
+ return full_path
107
+
108
+ def process_single_file(self):
109
+
110
+ file_path = self.check_path(self.input_file)
111
+ res = self.determine_skew(file_path)
112
+
113
+ if self.output_file:
114
+ output_path = self.check_path(self.output_file)
115
+ wfile = open(output_path, 'w')
116
+ self.write_to_file(wfile, res)
117
+ wfile.close()
118
+
119
+ return res
120
+
121
+ def determine_skew(self, img_file):
122
+
123
+ img_ori = io.imread(img_file, as_gray=True)
124
+ height, width = img_ori.shape
125
+ img = img_ori[int(height*(0.5-self.roi_h/2.0)):int(height*(0.5+self.roi_h/2.0)),
126
+ int(width * (0.5-self.roi_w/2.0)):int(width * (0.5+self.roi_w/2.0))]
127
+
128
+ img = cv2.resize(img, (img.shape[1]//4, img.shape[0]//4))
129
+
130
+ edges = canny(img, sigma=self.sigma)
131
+ range_rad = np.arange(-np.pi/2, -np.pi/2+np.deg2rad(self.skew_max),
132
+ step=np.deg2rad(self.acc_deg))
133
+ range_rad = np.concatenate(
134
+ [range_rad,
135
+ np.arange(-np.deg2rad(self.skew_max), np.deg2rad(self.skew_max),
136
+ step=np.deg2rad(self.acc_deg))],
137
+ axis=0)
138
+ range_rad = np.concatenate(
139
+ [range_rad,
140
+ np.arange(np.pi/2-np.deg2rad(self.skew_max), np.pi/2,
141
+ step=np.deg2rad(self.acc_deg))],
142
+ axis=0)
143
+
144
+ h, a, d = hough_line(edges, theta=range_rad)
145
+
146
+ th = 0.2 * h.max()
147
+ _, ap, _ = hough_line_peaks(
148
+ h, a, d, threshold=th, num_peaks=self.num_peaks)
149
+
150
+ if len(ap) == 0:
151
+ data = {
152
+ "Image File": img_file,
153
+ "Average Deviation from pi/4": 0.0,
154
+ "Estimated Angle": 0.0,
155
+ "Angle bins": [[], [], [], []],
156
+ "Message": "Bad Quality"}
157
+ return data
158
+
159
+ absolute_deviations = [self.calculate_deviation(k) for k in ap]
160
+ average_deviation = np.mean(np.rad2deg(absolute_deviations))
161
+ ap_deg = [np.rad2deg(x) for x in ap]
162
+
163
+ for i in range(len(ap_deg)):
164
+ if ap_deg[i] >= 45.0:
165
+ ap_deg[i] -= 90.0
166
+ elif ap_deg[i] <= -45.0:
167
+ ap_deg[i] += 90.0
168
+
169
+ bin_0_45 = []
170
+ bin_45_90 = []
171
+ bin_0_45n = []
172
+ bin_45_90n = []
173
+
174
+ for ang in ap_deg:
175
+
176
+ deviation_sum = (90 - ang + average_deviation)
177
+ if self.compare_sum(deviation_sum):
178
+ bin_45_90.append(ang)
179
+ continue
180
+
181
+ deviation_sum = (ang + average_deviation)
182
+ if self.compare_sum(deviation_sum):
183
+ bin_0_45.append(ang)
184
+ continue
185
+
186
+ deviation_sum = (-ang + average_deviation)
187
+ if self.compare_sum(deviation_sum):
188
+ bin_0_45n.append(ang)
189
+ continue
190
+
191
+ deviation_sum = (90 + ang + average_deviation)
192
+ if self.compare_sum(deviation_sum):
193
+ bin_45_90n.append(ang)
194
+
195
+ angles = [bin_0_45, bin_45_90, bin_0_45n, bin_45_90n]
196
+ lmax = 0
197
+
198
+ for j in range(len(angles)):
199
+ tmp_l = len(angles[j])
200
+ if tmp_l > lmax:
201
+ lmax = tmp_l
202
+ maxi = j
203
+
204
+ if lmax:
205
+ ans_arr = self.get_max_freq_elem(angles[maxi]) # 最多頻度の角度array
206
+ ans_res = np.mean(ans_arr) # 同数最多が複数あるかもしれないのでavg
207
+
208
+ else: # angls が空のとき
209
+ ans_arr = self.get_max_freq_elem(ap_deg)
210
+ ans_res = np.mean(ans_arr)
211
+
212
+ data = {
213
+ "Image File": img_file,
214
+ "Average Deviation from pi/4": average_deviation,
215
+ "Estimated Angle": ans_res,
216
+ "Angle bins": angles,
217
+ "Message": "Successfully detected lines"}
218
+
219
+ if self.display_output:
220
+ self.display(data)
221
+
222
+ return data
223
+
224
+ def determine_skew_on_memory(self, img_data):
225
+
226
+ img_ori = cv2.cvtColor(img_data, cv2.COLOR_BGR2GRAY)
227
+ height, width = img_ori.shape
228
+ img = img_ori[int(height*(0.5-self.roi_h/2.0)):int(height*(0.5+self.roi_h/2.0)),
229
+ int(width * (0.5-self.roi_w/2.0)):int(width * (0.5+self.roi_w/2.0))]
230
+
231
+ img = cv2.resize(img, (img.shape[1]//4, img.shape[0]//4))
232
+
233
+ edges = canny(img, sigma=self.sigma)
234
+ range_rad = np.arange(-np.pi/2, -np.pi/2+np.deg2rad(self.skew_max),
235
+ step=np.deg2rad(self.acc_deg))
236
+ range_rad = np.concatenate([range_rad,
237
+ np.arange(-np.deg2rad(self.skew_max),
238
+ np.deg2rad(self.skew_max),
239
+ step=np.deg2rad(self.acc_deg))],
240
+ axis=0)
241
+ range_rad = np.concatenate([range_rad,
242
+ np.arange(np.pi/2-np.deg2rad(self.skew_max),
243
+ np.pi/2,
244
+ step=np.deg2rad(self.acc_deg))],
245
+ axis=0)
246
+
247
+ h, a, d = hough_line(edges, theta=range_rad)
248
+
249
+ th = 0.2 * h.max()
250
+ _, ap, _ = hough_line_peaks(
251
+ h, a, d, threshold=th, num_peaks=self.num_peaks)
252
+
253
+ if len(ap) == 0:
254
+ data = {
255
+ "Average Deviation from pi/4": 0.0,
256
+ "Estimated Angle": 0.0,
257
+ "Angle bins": [[], [], [], []],
258
+ "Message": "Bad Quality"}
259
+ return data
260
+
261
+ absolute_deviations = [self.calculate_deviation(k) for k in ap]
262
+ average_deviation = np.mean(np.rad2deg(absolute_deviations))
263
+ ap_deg = [np.rad2deg(x) for x in ap]
264
+
265
+ for i in range(len(ap_deg)):
266
+ if ap_deg[i] >= 45.0:
267
+ ap_deg[i] -= 90.0
268
+ elif ap_deg[i] <= -45.0:
269
+ ap_deg[i] += 90.0
270
+
271
+ bin_0_45 = []
272
+ bin_45_90 = []
273
+ bin_0_45n = []
274
+ bin_45_90n = []
275
+
276
+ for ang in ap_deg:
277
+
278
+ deviation_sum = (90 - ang + average_deviation)
279
+ if self.compare_sum(deviation_sum):
280
+ bin_45_90.append(ang)
281
+ continue
282
+
283
+ deviation_sum = (ang + average_deviation)
284
+ if self.compare_sum(deviation_sum):
285
+ bin_0_45.append(ang)
286
+ continue
287
+
288
+ deviation_sum = (-ang + average_deviation)
289
+ if self.compare_sum(deviation_sum):
290
+ bin_0_45n.append(ang)
291
+ continue
292
+
293
+ deviation_sum = (90 + ang + average_deviation)
294
+ if self.compare_sum(deviation_sum):
295
+ bin_45_90n.append(ang)
296
+
297
+ angles = [bin_0_45, bin_45_90, bin_0_45n, bin_45_90n]
298
+ lmax = 0
299
+
300
+ for j in range(len(angles)):
301
+ tmp_l = len(angles[j])
302
+ if tmp_l > lmax:
303
+ lmax = tmp_l
304
+ maxi = j
305
+
306
+ if lmax:
307
+ ans_arr = self.get_max_freq_elem(angles[maxi]) # 最多頻度の角度array
308
+ ans_res = np.mean(ans_arr) # 同数最多が複数あるかもしれないのでavg
309
+
310
+ else: # angls が空のとき
311
+ ans_arr = self.get_max_freq_elem(ap_deg)
312
+ ans_res = np.mean(ans_arr)
313
+
314
+ data = {
315
+ "Average Deviation from pi/4": average_deviation,
316
+ "Estimated Angle": ans_res,
317
+ "Angle bins": angles,
318
+ "Message": "Successfully detected lines"}
319
+
320
+ return data
321
+
322
+ def determine_line(self, img_file):
323
+
324
+ img_ori = io.imread(img_file, as_gray=True)
325
+ height, width = img_ori.shape
326
+ img = img_ori[int(height*(0.5-self.roi_h/2.0)):int(height*(0.5+self.roi_h/2.0)),
327
+ int(width * (0.5-self.roi_w/2.0)):int(width * (0.5+self.roi_w/2.0))]
328
+ edges = canny(img, sigma=self.sigma)
329
+ range_rad = np.arange(-np.pi/2, -np.pi/2+np.deg2rad(self.skew_max),
330
+ step=np.deg2rad(self.acc_deg))
331
+ range_rad = np.concatenate([range_rad,
332
+ np.arange(-np.deg2rad(self.skew_max),
333
+ np.deg2rad(self.skew_max),
334
+ step=np.deg2rad(self.acc_deg))],
335
+ axis=0)
336
+ range_rad = np.concatenate([range_rad,
337
+ np.arange(np.pi/2-np.deg2rad(self.skew_max), np.pi/2,
338
+ step=np.deg2rad(self.acc_deg))],
339
+ axis=0)
340
+
341
+ h, a, d = hough_line(edges, theta=range_rad)
342
+
343
+ th = 0.2 * h.max()
344
+ ac, ap, d = hough_line_peaks(
345
+ h, a, d, threshold=th, num_peaks=self.num_peaks)
346
+
347
+ return ac, ap, d
348
+
349
+
350
+ if __name__ == '__main__':
351
+
352
+ parser = optparse.OptionParser()
353
+
354
+ parser.add_option(
355
+ '-d', '--display',
356
+ default=None,
357
+ dest='display_output',
358
+ help='Display logs')
359
+ parser.add_option(
360
+ '-i', '--input',
361
+ default=None,
362
+ dest='input_file',
363
+ help='Input file name')
364
+ parser.add_option(
365
+ '-o', '--output',
366
+ default=None,
367
+ dest='output_file',
368
+ help='Output file name')
369
+ parser.add_option(
370
+ '-p', '--plot',
371
+ default=None,
372
+ dest='plot_hough',
373
+ help='Plot the Hough Transform')
374
+ parser.add_option(
375
+ '-s', '--sigma',
376
+ default=3.0,
377
+ dest='sigma',
378
+ help='Sigma for Canny Edge Detection',
379
+ type=float)
380
+ options, args = parser.parse_args()
381
+ skew_obj = SkewDetect(
382
+ options.input_file,
383
+ options.output_file,
384
+ options.sigma,
385
+ options.display_output,
386
+ options.num_peaks,
387
+ options.plot_hough)
388
+ skew_obj.run()
src/deskew_HT/run_deskew.py ADDED
@@ -0,0 +1,272 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # !/usr/bin/env python3
2
+
3
+ # Copyright (c) 2022, National Diet Library, Japan
4
+ #
5
+ # This software is released under the CC BY 4.0.
6
+ # https://creativecommons.org/licenses/by/4.0/
7
+
8
+ import os
9
+ import cv2
10
+ import argparse
11
+ import numpy as np
12
+ from alyn3.deskew import Deskew
13
+ from alyn3.skew_detect import SkewDetect
14
+
15
+ os.environ["OPENCV_IO_ENABLE_JASPER"] = "true"
16
+
17
+
18
+ def deskew_image(input, output, r_angle=0,
19
+ skew_max=4.0, acc_deg=0.5, roi_w=1.0, roi_h=1.0,
20
+ method=1, gray=1.0, quality=100, short=None,
21
+ log=None):
22
+
23
+ image_name = os.path.basename(input)
24
+ print('process: '+image_name)
25
+ d = Deskew(input, output,
26
+ r_angle=r_angle,
27
+ skew_max=skew_max,
28
+ acc_deg=acc_deg,
29
+ method=method,
30
+ gray=gray,
31
+ quality=quality,
32
+ short=short,
33
+ roi_w=roi_w,
34
+ roi_h=roi_h)
35
+ res = d.run()
36
+
37
+ if log:
38
+ with open(log, mode='a') as f:
39
+ line = '{}\t{:.6f}\n'.format(
40
+ res['Image File'], (-res['Estimated Angle']))
41
+ f.write(line)
42
+
43
+
44
+ def deskew_dir(input_dir_path, output_dir_path, r_angle=0,
45
+ skew_max=4.0, acc_deg=0.5, roi_w=1.0, roi_h=1.0,
46
+ method=1, gray=1.0, quality=100, short=None,
47
+ log=None):
48
+ image_list = os.listdir(input_dir_path)
49
+
50
+ for image_name in image_list:
51
+ input_path = os.path.join(input_dir_path, image_name)
52
+ if(os.path.isdir(input_path)):
53
+ continue
54
+ print('process: '+str(image_name))
55
+ output_path = os.path.join(output_dir_path, image_name)
56
+ d = Deskew(input_path, output_path,
57
+ r_angle=r_angle,
58
+ skew_max=skew_max,
59
+ acc_deg=acc_deg,
60
+ method=method,
61
+ gray=gray,
62
+ quality=quality,
63
+ short=short,
64
+ roi_w=roi_w,
65
+ roi_h=roi_h)
66
+ res = d.run()
67
+
68
+ if log:
69
+ with open(log, mode='a') as f:
70
+ line = '{}\t{:.6f}\n'.format(
71
+ res['Image File'], (-res['Estimated Angle']))
72
+ f.write(line)
73
+
74
+
75
+ def add_detected_lines(input_path, output_path,
76
+ skew_max=4.0, acc_deg=0.5,
77
+ roi_w=1.0, roi_h=1.0,
78
+ bgr=[0, 0, 255]):
79
+ line_len = 4000
80
+
81
+ print("Add the detected lines to "+os.path.basename(input_path))
82
+ sd = SkewDetect(input_path, skew_max=skew_max, acc_deg=acc_deg,
83
+ roi_w=roi_w, roi_h=roi_h)
84
+ acc, ang_rad, distance = sd.determine_line(input_path)
85
+ img = cv2.imread(input_path)
86
+ if len(acc) == 0:
87
+ print('Image file:{} has no lines detected'.format(input_path))
88
+ else:
89
+ max_val = max(acc)
90
+ for val, theta, rho in zip(acc[::-1], ang_rad[::-1], distance[::-1]):
91
+ a = np.cos(theta)
92
+ b = np.sin(theta)
93
+ x0 = a * rho + int(img.shape[1] * (0.5-roi_w/2.0))
94
+ y0 = b * rho + int(img.shape[0] * (0.5-roi_h/2.0))
95
+ x1 = int(x0 + line_len*(-b))
96
+ y1 = int(y0 + line_len*(a))
97
+ x2 = int(x0 - line_len*(-b))
98
+ y2 = int(y0 - line_len*(a))
99
+ tmp_bgr = bgr.copy()
100
+ tmp_bgr[0] = 255.0 * (1.0 - val / max_val)
101
+ tmp_bgr[1] = tmp_bgr[0]
102
+ # print(tmp_bgr)
103
+ cv2.line(img, (x1, y1), (x2, y2), tmp_bgr, 2)
104
+
105
+ cv2.imwrite(output_path, img)
106
+
107
+
108
+ def add_detected_lines_dir(input_dir_path, output_dir_path,
109
+ skew_max=4.0, acc_deg=0.1,
110
+ roi_w=1.0, roi_h=1.0,
111
+ bgr=[0, 0, 255]):
112
+ # Hough変換で検知したLineを元画像に書き加える
113
+ # Add the lines detected by Hough Transform to the input images
114
+ image_list = os.listdir(input_dir_path)
115
+
116
+ for image_name in image_list:
117
+ input_path = os.path.join(input_dir_path, image_name)
118
+ if(os.path.isdir(input_path)):
119
+ continue
120
+ output_path = os.path.join(output_dir_path, image_name)
121
+ add_detected_lines(input_path, output_path,
122
+ skew_max=skew_max, acc_deg=acc_deg,
123
+ roi_w=roi_w, roi_h=roi_h,
124
+ bgr=[0, 0, 255])
125
+
126
+
127
+ def parse_args():
128
+ usage = 'python3 {} INPUT [-o OUTPUT] [-s SKEW_MAX] [-a ANGLE_ACC] [-m METHOD]'.format(
129
+ __file__)
130
+ argparser = argparse.ArgumentParser(
131
+ usage=usage,
132
+ description='Deskew image(when INPUT is an image) or images in INPUT(when INPUT is a directory).',
133
+ formatter_class=argparse.RawTextHelpFormatter)
134
+ argparser.add_argument(
135
+ 'input',
136
+ help='input image file or directory path',
137
+ type=str)
138
+ argparser.add_argument(
139
+ '-o',
140
+ '--out',
141
+ default='out.jpg',
142
+ help='output file or directory path',
143
+ type=str)
144
+ argparser.add_argument(
145
+ '-l',
146
+ '--log',
147
+ default=None,
148
+ help='estimated skew log file path\n'
149
+ 'output format:\n'
150
+ 'Image_file_path <tab> Estimated_skew_angle[deg]')
151
+ argparser.add_argument(
152
+ '-s',
153
+ '--skew_max',
154
+ default=4.0,
155
+ help='maximum expected skew angle[deg], default: 4.0',
156
+ type=float)
157
+ argparser.add_argument(
158
+ '-a',
159
+ '--angle_acc',
160
+ default=0.5,
161
+ help='estimated skew angle accuracy[deg], default: 0.5',
162
+ type=float)
163
+ argparser.add_argument(
164
+ '-rw',
165
+ '--roi_width',
166
+ default=1.0,
167
+ help='horizontal cropping ratio of the region of interest \n'
168
+ 'to the whole image. (0.0, 1.0] default: 1.0(whole image)',
169
+ type=float)
170
+ argparser.add_argument(
171
+ '-rh',
172
+ '--roi_height',
173
+ default=1.0,
174
+ help='vertical cropping ratio of the region of interest \n'
175
+ 'to the whole image. (0.0, 1.0] default: 1.0(whole image)',
176
+ type=float)
177
+ argparser.add_argument(
178
+ '-m',
179
+ '--method',
180
+ default=1,
181
+ help='interpolation method.\n'
182
+ '0: Nearest-neighbor 1: Bi-linear(default)\n'
183
+ '2: Bi-quadratic 3: Bi-cubic\n'
184
+ '4: Bi-quartic 5: Bi-quintic\n',
185
+ type=int)
186
+ argparser.add_argument(
187
+ '-g',
188
+ '--gray',
189
+ default=1.0,
190
+ dest='gray',
191
+ help='gray value outside the input image boundaries.\n'
192
+ '[0.0(black), 1.0(white)], default: 1.0',
193
+ type=float)
194
+ argparser.add_argument(
195
+ '-q', '--quality',
196
+ default=100,
197
+ dest='quality',
198
+ help='output jpeg image quality.\n'
199
+ '1 is worst quality and smallest file size,\n'
200
+ 'and 100 is best quality and largest file size.\n'
201
+ '[1, 100], default: 100',
202
+ type=int)
203
+ argparser.add_argument(
204
+ '--short',
205
+ default=None,
206
+ dest='short',
207
+ help='the length of the short side of the output image.',
208
+ type=int)
209
+ argparser.add_argument(
210
+ '-v',
211
+ '--version',
212
+ version='deskew version 1.0.0',
213
+ action='version')
214
+ argparser.add_argument(
215
+ '--debug',
216
+ action='store_true')
217
+
218
+ return argparser.parse_args()
219
+
220
+
221
+ if __name__ == '__main__':
222
+
223
+ args = parse_args()
224
+
225
+ input = args.input
226
+ output = args.out
227
+ print('input directory/image: '+input)
228
+
229
+ if(os.path.isdir(input)): # directory
230
+ if output[-4:] == '.jpg':
231
+ output = output[:-4] # 'out'
232
+ print('output: '+output)
233
+ os.makedirs(output, exist_ok=True)
234
+ deskew_dir(input, output,
235
+ r_angle=0,
236
+ skew_max=args.skew_max,
237
+ acc_deg=args.angle_acc,
238
+ roi_w=args.roi_width,
239
+ roi_h=args.roi_height,
240
+ method=args.method,
241
+ gray=args.gray,
242
+ quality=args.quality,
243
+ short=args.short,
244
+ log=args.log)
245
+ if args.debug:
246
+ print('[Debug] Dump input images with detected lines')
247
+ os.makedirs(output+'_withL', exist_ok=True)
248
+ add_detected_lines_dir(input, output+'_withL',
249
+ roi_w=args.roi_width,
250
+ roi_h=args.roi_height,
251
+ skew_max=args.skew_max,
252
+ acc_deg=args.angle_acc)
253
+ else: # single image
254
+ print('output: '+output)
255
+ deskew_image(input, output,
256
+ r_angle=0,
257
+ skew_max=args.skew_max,
258
+ acc_deg=args.angle_acc,
259
+ roi_w=args.roi_width,
260
+ roi_h=args.roi_height,
261
+ method=args.method,
262
+ gray=args.gray,
263
+ quality=args.quality,
264
+ short=args.short,
265
+ log=args.log)
266
+ if args.debug:
267
+ print('[Debug] Dump input image with detected lines')
268
+ add_detected_lines(input, output+'_withL.jpg',
269
+ roi_w=args.roi_width,
270
+ roi_h=args.roi_height,
271
+ skew_max=args.skew_max,
272
+ acc_deg=args.angle_acc)
src/deskew_HT/setup.cfg ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ [flake8]
2
+ extend-ignore = E402, E501, E303
3
+ exclude =
src/ndl_layout/.gitmodules ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ [submodule "mmdetection"]
2
+ path = mmdetection
3
+ url = https://github.com/ndl-lab/mmdetection.git
src/ndl_layout/LICENSE ADDED
@@ -0,0 +1,395 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ Copyright (c) 2022, National Diet Library, Japan.
2
+
3
+ Attribution 4.0 International
4
+
5
+ =======================================================================
6
+
7
+ Creative Commons Corporation ("Creative Commons") is not a law firm and
8
+ does not provide legal services or legal advice. Distribution of
9
+ Creative Commons public licenses does not create a lawyer-client or
10
+ other relationship. Creative Commons makes its licenses and related
11
+ information available on an "as-is" basis. Creative Commons gives no
12
+ warranties regarding its licenses, any material licensed under their
13
+ terms and conditions, or any related information. Creative Commons
14
+ disclaims all liability for damages resulting from their use to the
15
+ fullest extent possible.
16
+
17
+ Using Creative Commons Public Licenses
18
+
19
+ Creative Commons public licenses provide a standard set of terms and
20
+ conditions that creators and other rights holders may use to share
21
+ original works of authorship and other material subject to copyright
22
+ and certain other rights specified in the public license below. The
23
+ following considerations are for informational purposes only, are not
24
+ exhaustive, and do not form part of our licenses.
25
+
26
+ Considerations for licensors: Our public licenses are
27
+ intended for use by those authorized to give the public
28
+ permission to use material in ways otherwise restricted by
29
+ copyright and certain other rights. Our licenses are
30
+ irrevocable. Licensors should read and understand the terms
31
+ and conditions of the license they choose before applying it.
32
+ Licensors should also secure all rights necessary before
33
+ applying our licenses so that the public can reuse the
34
+ material as expected. Licensors should clearly mark any
35
+ material not subject to the license. This includes other CC-
36
+ licensed material, or material used under an exception or
37
+ limitation to copyright. More considerations for licensors:
38
+ wiki.creativecommons.org/Considerations_for_licensors
39
+
40
+ Considerations for the public: By using one of our public
41
+ licenses, a licensor grants the public permission to use the
42
+ licensed material under specified terms and conditions. If
43
+ the licensor's permission is not necessary for any reason--for
44
+ example, because of any applicable exception or limitation to
45
+ copyright--then that use is not regulated by the license. Our
46
+ licenses grant only permissions under copyright and certain
47
+ other rights that a licensor has authority to grant. Use of
48
+ the licensed material may still be restricted for other
49
+ reasons, including because others have copyright or other
50
+ rights in the material. A licensor may make special requests,
51
+ such as asking that all changes be marked or described.
52
+ Although not required by our licenses, you are encouraged to
53
+ respect those requests where reasonable. More_considerations
54
+ for the public:
55
+ wiki.creativecommons.org/Considerations_for_licensees
56
+
57
+ =======================================================================
58
+
59
+ Creative Commons Attribution 4.0 International Public License
60
+
61
+ By exercising the Licensed Rights (defined below), You accept and agree
62
+ to be bound by the terms and conditions of this Creative Commons
63
+ Attribution 4.0 International Public License ("Public License"). To the
64
+ extent this Public License may be interpreted as a contract, You are
65
+ granted the Licensed Rights in consideration of Your acceptance of
66
+ these terms and conditions, and the Licensor grants You such rights in
67
+ consideration of benefits the Licensor receives from making the
68
+ Licensed Material available under these terms and conditions.
69
+
70
+
71
+ Section 1 -- Definitions.
72
+
73
+ a. Adapted Material means material subject to Copyright and Similar
74
+ Rights that is derived from or based upon the Licensed Material
75
+ and in which the Licensed Material is translated, altered,
76
+ arranged, transformed, or otherwise modified in a manner requiring
77
+ permission under the Copyright and Similar Rights held by the
78
+ Licensor. For purposes of this Public License, where the Licensed
79
+ Material is a musical work, performance, or sound recording,
80
+ Adapted Material is always produced where the Licensed Material is
81
+ synched in timed relation with a moving image.
82
+
83
+ b. Adapter's License means the license You apply to Your Copyright
84
+ and Similar Rights in Your contributions to Adapted Material in
85
+ accordance with the terms and conditions of this Public License.
86
+
87
+ c. Copyright and Similar Rights means copyright and/or similar rights
88
+ closely related to copyright including, without limitation,
89
+ performance, broadcast, sound recording, and Sui Generis Database
90
+ Rights, without regard to how the rights are labeled or
91
+ categorized. For purposes of this Public License, the rights
92
+ specified in Section 2(b)(1)-(2) are not Copyright and Similar
93
+ Rights.
94
+
95
+ d. Effective Technological Measures means those measures that, in the
96
+ absence of proper authority, may not be circumvented under laws
97
+ fulfilling obligations under Article 11 of the WIPO Copyright
98
+ Treaty adopted on December 20, 1996, and/or similar international
99
+ agreements.
100
+
101
+ e. Exceptions and Limitations means fair use, fair dealing, and/or
102
+ any other exception or limitation to Copyright and Similar Rights
103
+ that applies to Your use of the Licensed Material.
104
+
105
+ f. Licensed Material means the artistic or literary work, database,
106
+ or other material to which the Licensor applied this Public
107
+ License.
108
+
109
+ g. Licensed Rights means the rights granted to You subject to the
110
+ terms and conditions of this Public License, which are limited to
111
+ all Copyright and Similar Rights that apply to Your use of the
112
+ Licensed Material and that the Licensor has authority to license.
113
+
114
+ h. Licensor means the individual(s) or entity(ies) granting rights
115
+ under this Public License.
116
+
117
+ i. Share means to provide material to the public by any means or
118
+ process that requires permission under the Licensed Rights, such
119
+ as reproduction, public display, public performance, distribution,
120
+ dissemination, communication, or importation, and to make material
121
+ available to the public including in ways that members of the
122
+ public may access the material from a place and at a time
123
+ individually chosen by them.
124
+
125
+ j. Sui Generis Database Rights means rights other than copyright
126
+ resulting from Directive 96/9/EC of the European Parliament and of
127
+ the Council of 11 March 1996 on the legal protection of databases,
128
+ as amended and/or succeeded, as well as other essentially
129
+ equivalent rights anywhere in the world.
130
+
131
+ k. You means the individual or entity exercising the Licensed Rights
132
+ under this Public License. Your has a corresponding meaning.
133
+
134
+
135
+ Section 2 -- Scope.
136
+
137
+ a. License grant.
138
+
139
+ 1. Subject to the terms and conditions of this Public License,
140
+ the Licensor hereby grants You a worldwide, royalty-free,
141
+ non-sublicensable, non-exclusive, irrevocable license to
142
+ exercise the Licensed Rights in the Licensed Material to:
143
+
144
+ a. reproduce and Share the Licensed Material, in whole or
145
+ in part; and
146
+
147
+ b. produce, reproduce, and Share Adapted Material.
148
+
149
+ 2. Exceptions and Limitations. For the avoidance of doubt, where
150
+ Exceptions and Limitations apply to Your use, this Public
151
+ License does not apply, and You do not need to comply with
152
+ its terms and conditions.
153
+
154
+ 3. Term. The term of this Public License is specified in Section
155
+ 6(a).
156
+
157
+ 4. Media and formats; technical modifications allowed. The
158
+ Licensor authorizes You to exercise the Licensed Rights in
159
+ all media and formats whether now known or hereafter created,
160
+ and to make technical modifications necessary to do so. The
161
+ Licensor waives and/or agrees not to assert any right or
162
+ authority to forbid You from making technical modifications
163
+ necessary to exercise the Licensed Rights, including
164
+ technical modifications necessary to circumvent Effective
165
+ Technological Measures. For purposes of this Public License,
166
+ simply making modifications authorized by this Section 2(a)
167
+ (4) never produces Adapted Material.
168
+
169
+ 5. Downstream recipients.
170
+
171
+ a. Offer from the Licensor -- Licensed Material. Every
172
+ recipient of the Licensed Material automatically
173
+ receives an offer from the Licensor to exercise the
174
+ Licensed Rights under the terms and conditions of this
175
+ Public License.
176
+
177
+ b. No downstream restrictions. You may not offer or impose
178
+ any additional or different terms or conditions on, or
179
+ apply any Effective Technological Measures to, the
180
+ Licensed Material if doing so restricts exercise of the
181
+ Licensed Rights by any recipient of the Licensed
182
+ Material.
183
+
184
+ 6. No endorsement. Nothing in this Public License constitutes or
185
+ may be construed as permission to assert or imply that You
186
+ are, or that Your use of the Licensed Material is, connected
187
+ with, or sponsored, endorsed, or granted official status by,
188
+ the Licensor or others designated to receive attribution as
189
+ provided in Section 3(a)(1)(A)(i).
190
+
191
+ b. Other rights.
192
+
193
+ 1. Moral rights, such as the right of integrity, are not
194
+ licensed under this Public License, nor are publicity,
195
+ privacy, and/or other similar personality rights; however, to
196
+ the extent possible, the Licensor waives and/or agrees not to
197
+ assert any such rights held by the Licensor to the limited
198
+ extent necessary to allow You to exercise the Licensed
199
+ Rights, but not otherwise.
200
+
201
+ 2. Patent and trademark rights are not licensed under this
202
+ Public License.
203
+
204
+ 3. To the extent possible, the Licensor waives any right to
205
+ collect royalties from You for the exercise of the Licensed
206
+ Rights, whether directly or through a collecting society
207
+ under any voluntary or waivable statutory or compulsory
208
+ licensing scheme. In all other cases the Licensor expressly
209
+ reserves any right to collect such royalties.
210
+
211
+
212
+ Section 3 -- License Conditions.
213
+
214
+ Your exercise of the Licensed Rights is expressly made subject to the
215
+ following conditions.
216
+
217
+ a. Attribution.
218
+
219
+ 1. If You Share the Licensed Material (including in modified
220
+ form), You must:
221
+
222
+ a. retain the following if it is supplied by the Licensor
223
+ with the Licensed Material:
224
+
225
+ i. identification of the creator(s) of the Licensed
226
+ Material and any others designated to receive
227
+ attribution, in any reasonable manner requested by
228
+ the Licensor (including by pseudonym if
229
+ designated);
230
+
231
+ ii. a copyright notice;
232
+
233
+ iii. a notice that refers to this Public License;
234
+
235
+ iv. a notice that refers to the disclaimer of
236
+ warranties;
237
+
238
+ v. a URI or hyperlink to the Licensed Material to the
239
+ extent reasonably practicable;
240
+
241
+ b. indicate if You modified the Licensed Material and
242
+ retain an indication of any previous modifications; and
243
+
244
+ c. indicate the Licensed Material is licensed under this
245
+ Public License, and include the text of, or the URI or
246
+ hyperlink to, this Public License.
247
+
248
+ 2. You may satisfy the conditions in Section 3(a)(1) in any
249
+ reasonable manner based on the medium, means, and context in
250
+ which You Share the Licensed Material. For example, it may be
251
+ reasonable to satisfy the conditions by providing a URI or
252
+ hyperlink to a resource that includes the required
253
+ information.
254
+
255
+ 3. If requested by the Licensor, You must remove any of the
256
+ information required by Section 3(a)(1)(A) to the extent
257
+ reasonably practicable.
258
+
259
+ 4. If You Share Adapted Material You produce, the Adapter's
260
+ License You apply must not prevent recipients of the Adapted
261
+ Material from complying with this Public License.
262
+
263
+
264
+ Section 4 -- Sui Generis Database Rights.
265
+
266
+ Where the Licensed Rights include Sui Generis Database Rights that
267
+ apply to Your use of the Licensed Material:
268
+
269
+ a. for the avoidance of doubt, Section 2(a)(1) grants You the right
270
+ to extract, reuse, reproduce, and Share all or a substantial
271
+ portion of the contents of the database;
272
+
273
+ b. if You include all or a substantial portion of the database
274
+ contents in a database in which You have Sui Generis Database
275
+ Rights, then the database in which You have Sui Generis Database
276
+ Rights (but not its individual contents) is Adapted Material; and
277
+
278
+ c. You must comply with the conditions in Section 3(a) if You Share
279
+ all or a substantial portion of the contents of the database.
280
+
281
+ For the avoidance of doubt, this Section 4 supplements and does not
282
+ replace Your obligations under this Public License where the Licensed
283
+ Rights include other Copyright and Similar Rights.
284
+
285
+
286
+ Section 5 -- Disclaimer of Warranties and Limitation of Liability.
287
+
288
+ a. UNLESS OTHERWISE SEPARATELY UNDERTAKEN BY THE LICENSOR, TO THE
289
+ EXTENT POSSIBLE, THE LICENSOR OFFERS THE LICENSED MATERIAL AS-IS
290
+ AND AS-AVAILABLE, AND MAKES NO REPRESENTATIONS OR WARRANTIES OF
291
+ ANY KIND CONCERNING THE LICENSED MATERIAL, WHETHER EXPRESS,
292
+ IMPLIED, STATUTORY, OR OTHER. THIS INCLUDES, WITHOUT LIMITATION,
293
+ WARRANTIES OF TITLE, MERCHANTABILITY, FITNESS FOR A PARTICULAR
294
+ PURPOSE, NON-INFRINGEMENT, ABSENCE OF LATENT OR OTHER DEFECTS,
295
+ ACCURACY, OR THE PRESENCE OR ABSENCE OF ERRORS, WHETHER OR NOT
296
+ KNOWN OR DISCOVERABLE. WHERE DISCLAIMERS OF WARRANTIES ARE NOT
297
+ ALLOWED IN FULL OR IN PART, THIS DISCLAIMER MAY NOT APPLY TO YOU.
298
+
299
+ b. TO THE EXTENT POSSIBLE, IN NO EVENT WILL THE LICENSOR BE LIABLE
300
+ TO YOU ON ANY LEGAL THEORY (INCLUDING, WITHOUT LIMITATION,
301
+ NEGLIGENCE) OR OTHERWISE FOR ANY DIRECT, SPECIAL, INDIRECT,
302
+ INCIDENTAL, CONSEQUENTIAL, PUNITIVE, EXEMPLARY, OR OTHER LOSSES,
303
+ COSTS, EXPENSES, OR DAMAGES ARISING OUT OF THIS PUBLIC LICENSE OR
304
+ USE OF THE LICENSED MATERIAL, EVEN IF THE LICENSOR HAS BEEN
305
+ ADVISED OF THE POSSIBILITY OF SUCH LOSSES, COSTS, EXPENSES, OR
306
+ DAMAGES. WHERE A LIMITATION OF LIABILITY IS NOT ALLOWED IN FULL OR
307
+ IN PART, THIS LIMITATION MAY NOT APPLY TO YOU.
308
+
309
+ c. The disclaimer of warranties and limitation of liability provided
310
+ above shall be interpreted in a manner that, to the extent
311
+ possible, most closely approximates an absolute disclaimer and
312
+ waiver of all liability.
313
+
314
+
315
+ Section 6 -- Term and Termination.
316
+
317
+ a. This Public License applies for the term of the Copyright and
318
+ Similar Rights licensed here. However, if You fail to comply with
319
+ this Public License, then Your rights under this Public License
320
+ terminate automatically.
321
+
322
+ b. Where Your right to use the Licensed Material has terminated under
323
+ Section 6(a), it reinstates:
324
+
325
+ 1. automatically as of the date the violation is cured, provided
326
+ it is cured within 30 days of Your discovery of the
327
+ violation; or
328
+
329
+ 2. upon express reinstatement by the Licensor.
330
+
331
+ For the avoidance of doubt, this Section 6(b) does not affect any
332
+ right the Licensor may have to seek remedies for Your violations
333
+ of this Public License.
334
+
335
+ c. For the avoidance of doubt, the Licensor may also offer the
336
+ Licensed Material under separate terms or conditions or stop
337
+ distributing the Licensed Material at any time; however, doing so
338
+ will not terminate this Public License.
339
+
340
+ d. Sections 1, 5, 6, 7, and 8 survive termination of this Public
341
+ License.
342
+
343
+
344
+ Section 7 -- Other Terms and Conditions.
345
+
346
+ a. The Licensor shall not be bound by any additional or different
347
+ terms or conditions communicated by You unless expressly agreed.
348
+
349
+ b. Any arrangements, understandings, or agreements regarding the
350
+ Licensed Material not stated herein are separate from and
351
+ independent of the terms and conditions of this Public License.
352
+
353
+
354
+ Section 8 -- Interpretation.
355
+
356
+ a. For the avoidance of doubt, this Public License does not, and
357
+ shall not be interpreted to, reduce, limit, restrict, or impose
358
+ conditions on any use of the Licensed Material that could lawfully
359
+ be made without permission under this Public License.
360
+
361
+ b. To the extent possible, if any provision of this Public License is
362
+ deemed unenforceable, it shall be automatically reformed to the
363
+ minimum extent necessary to make it enforceable. If the provision
364
+ cannot be reformed, it shall be severed from this Public License
365
+ without affecting the enforceability of the remaining terms and
366
+ conditions.
367
+
368
+ c. No term or condition of this Public License will be waived and no
369
+ failure to comply consented to unless expressly agreed to by the
370
+ Licensor.
371
+
372
+ d. Nothing in this Public License constitutes or may be interpreted
373
+ as a limitation upon, or waiver of, any privileges and immunities
374
+ that apply to the Licensor or You, including from the legal
375
+ processes of any jurisdiction or authority.
376
+
377
+
378
+ =======================================================================
379
+
380
+ Creative Commons is not a party to its public licenses.
381
+ Notwithstanding, Creative Commons may elect to apply one of its public
382
+ licenses to material it publishes and in those instances will be
383
+ considered the "Licensor." Except for the limited purpose of indicating
384
+ that material is shared under a Creative Commons public license or as
385
+ otherwise permitted by the Creative Commons policies published at
386
+ creativecommons.org/policies, Creative Commons does not authorize the
387
+ use of the trademark "Creative Commons" or any other trademark or logo
388
+ of Creative Commons without its prior written consent including,
389
+ without limitation, in connection with any unauthorized modifications
390
+ to any of its public licenses or any other arrangements,
391
+ understandings, or agreements concerning use of licensed material. For
392
+ the avoidance of doubt, this paragraph does not form part of the public
393
+ licenses.
394
+
395
+ Creative Commons may be contacted at creativecommons.org.
src/ndl_layout/LICENSE_DEPENDENCIES ADDED
The diff for this file is too large to render. See raw diff
 
src/ndl_layout/README.md ADDED
@@ -0,0 +1,92 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # NDLOCR用レイアウト認識モジュール
2
+
3
+ レイアウト要素を抽出するためのモジュールのリポジトリです。
4
+
5
+ 本プログラムは、国立国会図書館が株式会社モルフォAIソリューションズに委託して作成したものです。
6
+
7
+ 本プログラムは、国立国会図書館がCC BY 4.0ライセンスで公開するものです。詳細については
8
+ [LICENSE](./LICENSE
9
+ )をご覧ください。
10
+
11
+ # 環境構築
12
+
13
+ python3.7かつ、cuda 11.1をインストール済みの環境の場合
14
+ ndl_layoutディレクトリ直下で以下のコマンドを実行する。
15
+ ```
16
+ pip install torch==1.8.1+cu111 torchvision==0.9.1+cu111 -f https://download.pytorch.org/whl/lts/1.8/torch_lts.html
17
+ wget https://lab.ndl.go.jp/dataset/ndlocr/ndl_layout/ndl_layout_config.py -P ./models
18
+ wget https://lab.ndl.go.jp/dataset/ndlocr/ndl_layout/epoch_140_all_eql_bt.pth -P ./models
19
+ ```
20
+
21
+ くわえて、元リポジトリ(https://github.com/open-mmlab/mmdetection)
22
+ をカスタマイズした[mmdetection](https://github.com/ndl-lab/mmdetection)
23
+ に依存しているため、下記のようにリポジトリの追加とインストールを行う。
24
+
25
+ ```bash
26
+ git clone https://github.com/ndl-lab/mmdetection
27
+ cd mmdetection
28
+ python setup.py bdist_wheel
29
+ pip install dist/*.whl
30
+ ```
31
+
32
+
33
+ # 使い方
34
+ ※スクリプトファイルはndl_layoutディレクトリ直下で実行すること
35
+
36
+ ## tools/process.py : 推論用モジュール + CLI
37
+
38
+ 学習結果を使って推論を実行する。学習済みのモデルは`ndl_layout/models` 以下にあるものとする。
39
+
40
+ 画像リストを引数で指定するには img_paths オプションを、画像リストをファイルから読み込む場合には list_path オプションを指定する。
41
+
42
+ output_path で出力 XML ファイルの格納先を変更することができる。(デフォルトは layout_prediction.xml)
43
+
44
+ use_show オプションを追加すると処理結果をGUI上で確認することができる。
45
+
46
+ img_pathsオプションで画像リストを指定する例
47
+ ```bash
48
+ python -m tools.process --img_paths image/dir/path/*.jpg --use_show --output_path layout_prediction.xml --config ./models/ndl_layout_config.py --checkpoint ./models/epoch_140_all_eql_bt.pth
49
+ ```
50
+
51
+ list_path オプションで画像リストを指定する例
52
+ ```bash
53
+ python -m tools.process --list_path image_list_file.list --use_show --output_path layout_prediction.xml --config ./models/ndl_layout_config.py --checkpoint ./models/epoch_140_all_eql_bt.pth
54
+ ```
55
+
56
+ ## tools/preprocess.py : 学習画像の追加&変換
57
+
58
+ 画像のファイル名の変換、縮小を行い、MS COCO 形式に整形。
59
+
60
+ ```bash
61
+ python -m tools.preprocess images_data_dir output_dir --use_link
62
+ ```
63
+
64
+ 出力解像度を下げる必要がない場合には、`--use_link`オプションを指定する。
65
+
66
+ 高解像の場合など、解像度を下げたい場合には `--use_shrink` を使うと画像サイズとアノテーションを半分のサイズに縮小して出力する。
67
+
68
+ 本リポジトリの追加学習に使用可能なファイル(アノテーション情報の含まれるjson及び、前処理後の画像)は `output_dir` で指定したディレクトリに出力される。
69
+
70
+
71
+ ## 学習時の手順
72
+ 1) ndl_layout/tools/preprocess.pyを使用し、NDLOCRXMLDataset形式の画像とアノテーションファイル(xml)をCOCO形式に変換し保存する。
73
+ ```
74
+ cd mmdetection
75
+ python -m tools.preprocess images_data_dir output_dir --use_link
76
+ ```
77
+ output_dir内に画像のシンボリックリンク(またはコピー)とCOCO形式のアノテーションファイル(.json)を保存する。
78
+
79
+ アノテーションファイルは、data.json(全データのアノテーション)、train.json(ランダムに全体の9割)、test.json(train以外の残る1割)を生成する。
80
+
81
+ 2) mmdetection/tools/train_ndl.py を使用し、モデルを学習する。
82
+ ```
83
+ cd mmdetection
84
+ python tools/train_ndl.py configs/ndl/cascade_rcnn_r50_fpn_1x_ndl_1024_eql.py
85
+ ```
86
+ 学習データ、work directory、初期値、学習回数等はconfigファイル内で指定するか、train_ndl.pyのオプションを使用する。オプションで指定されたものが優先される。
87
+
88
+ work directoryに、学習したモデル(epoch_XX.pth または latest.pth)とconfigファイル(train_ndl.pyのオプションを使用した場合その内容も反映)、学習時のログファイル(.logと.log.json)が保存される。
89
+
90
+ なお、このリポジトリで公開しているモデル(設定ファイルは`configs/ndl/cascade_rcnn_r50_fpn_1x_ndl_1024_eql.py`を参照)の学習時の初期重みには
91
+ https://download.openmmlab.com/mmdetection/v2.0/cascade_rcnn/cascade_rcnn_r50_fpn_1x_coco/cascade_rcnn_r50_fpn_1x_coco_20200316-3dc56deb.pth
92
+ を使用した。
src/ndl_layout/mmdetection/.dev_scripts/batch_test.py ADDED
@@ -0,0 +1,212 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ """
2
+ some instructions
3
+ 1. Fill the models that needs to be checked in the modelzoo_dict
4
+ 2. Arange the structure of the directory as follows, the script will find the
5
+ corresponding config itself:
6
+ model_dir/model_family/checkpoints
7
+ e.g.: models/faster_rcnn/faster_rcnn_r50_fpn_1x_coco_20200130-047c8118.pth
8
+ models/faster_rcnn/faster_rcnn_r101_fpn_1x_coco_20200130-047c8118.pth
9
+ 3. Excute the batch_test.sh
10
+ """
11
+
12
+ import argparse
13
+ import json
14
+ import os
15
+ import subprocess
16
+
17
+ import mmcv
18
+ import torch
19
+ from mmcv import Config, get_logger
20
+ from mmcv.parallel import MMDataParallel, MMDistributedDataParallel
21
+ from mmcv.runner import (get_dist_info, init_dist, load_checkpoint,
22
+ wrap_fp16_model)
23
+
24
+ from mmdet.apis import multi_gpu_test, single_gpu_test
25
+ from mmdet.datasets import (build_dataloader, build_dataset,
26
+ replace_ImageToTensor)
27
+ from mmdet.models import build_detector
28
+
29
+ modelzoo_dict = {
30
+ 'configs/faster_rcnn/faster_rcnn_r50_fpn_1x_coco.py': {
31
+ 'bbox': 0.374
32
+ },
33
+ 'configs/mask_rcnn/mask_rcnn_r50_fpn_1x_coco.py': {
34
+ 'bbox': 0.382,
35
+ 'segm': 0.347
36
+ },
37
+ 'configs/rpn/rpn_r50_fpn_1x_coco.py': {
38
+ 'AR@1000': 0.582
39
+ }
40
+ }
41
+
42
+
43
+ def parse_args():
44
+ parser = argparse.ArgumentParser(
45
+ description='The script used for checking the correctness \
46
+ of batch inference')
47
+ parser.add_argument('model_dir', help='directory of models')
48
+ parser.add_argument(
49
+ 'json_out', help='the output json records test information like mAP')
50
+ parser.add_argument(
51
+ '--launcher',
52
+ choices=['none', 'pytorch', 'slurm', 'mpi'],
53
+ default='none',
54
+ help='job launcher')
55
+ parser.add_argument('--local_rank', type=int, default=0)
56
+ args = parser.parse_args()
57
+ if 'LOCAL_RANK' not in os.environ:
58
+ os.environ['LOCAL_RANK'] = str(args.local_rank)
59
+ return args
60
+
61
+
62
+ def check_finish(all_model_dict, result_file):
63
+ # check if all models are checked
64
+ tested_cfgs = []
65
+ with open(result_file, 'r+') as f:
66
+ for line in f:
67
+ line = json.loads(line)
68
+ tested_cfgs.append(line['cfg'])
69
+ is_finish = True
70
+ for cfg in sorted(all_model_dict.keys()):
71
+ if cfg not in tested_cfgs:
72
+ return cfg
73
+ if is_finish:
74
+ with open(result_file, 'a+') as f:
75
+ f.write('finished\n')
76
+
77
+
78
+ def dump_dict(record_dict, json_out):
79
+ # dump result json dict
80
+ with open(json_out, 'a+') as f:
81
+ mmcv.dump(record_dict, f, file_format='json')
82
+ f.write('\n')
83
+
84
+
85
+ def main():
86
+ args = parse_args()
87
+ # touch the output json if not exist
88
+ with open(args.json_out, 'a+'):
89
+ pass
90
+ # init distributed env first, since logger depends on the dist
91
+ # info.
92
+ if args.launcher == 'none':
93
+ distributed = False
94
+ else:
95
+ distributed = True
96
+ init_dist(args.launcher, backend='nccl')
97
+ rank, world_size = get_dist_info()
98
+
99
+ logger = get_logger('root')
100
+
101
+ # read info of checkpoints and config
102
+ result_dict = dict()
103
+ for model_family_dir in os.listdir(args.model_dir):
104
+ for model in os.listdir(
105
+ os.path.join(args.model_dir, model_family_dir)):
106
+ # cpt: rpn_r50_fpn_1x_coco_20200218-5525fa2e.pth
107
+ # cfg: rpn_r50_fpn_1x_coco.py
108
+ cfg = model.split('.')[0][:-18] + '.py'
109
+ cfg_path = os.path.join('configs', model_family_dir, cfg)
110
+ assert os.path.isfile(
111
+ cfg_path), f'{cfg_path} is not valid config path'
112
+ cpt_path = os.path.join(args.model_dir, model_family_dir, model)
113
+ result_dict[cfg_path] = cpt_path
114
+ assert cfg_path in modelzoo_dict, f'please fill the ' \
115
+ f'performance of cfg: {cfg_path}'
116
+ cfg = check_finish(result_dict, args.json_out)
117
+ cpt = result_dict[cfg]
118
+ try:
119
+ cfg_name = cfg
120
+ logger.info(f'evaluate {cfg}')
121
+ record = dict(cfg=cfg, cpt=cpt)
122
+ cfg = Config.fromfile(cfg)
123
+ # cfg.data.test.ann_file = 'data/val_0_10.json'
124
+ # set cudnn_benchmark
125
+ if cfg.get('cudnn_benchmark', False):
126
+ torch.backends.cudnn.benchmark = True
127
+ cfg.model.pretrained = None
128
+ if cfg.model.get('neck'):
129
+ if isinstance(cfg.model.neck, list):
130
+ for neck_cfg in cfg.model.neck:
131
+ if neck_cfg.get('rfp_backbone'):
132
+ if neck_cfg.rfp_backbone.get('pretrained'):
133
+ neck_cfg.rfp_backbone.pretrained = None
134
+ elif cfg.model.neck.get('rfp_backbone'):
135
+ if cfg.model.neck.rfp_backbone.get('pretrained'):
136
+ cfg.model.neck.rfp_backbone.pretrained = None
137
+
138
+ # in case the test dataset is concatenated
139
+ if isinstance(cfg.data.test, dict):
140
+ cfg.data.test.test_mode = True
141
+ elif isinstance(cfg.data.test, list):
142
+ for ds_cfg in cfg.data.test:
143
+ ds_cfg.test_mode = True
144
+
145
+ # build the dataloader
146
+ samples_per_gpu = 2 # hack test with 2 image per gpu
147
+ if samples_per_gpu > 1:
148
+ # Replace 'ImageToTensor' to 'DefaultFormatBundle'
149
+ cfg.data.test.pipeline = replace_ImageToTensor(
150
+ cfg.data.test.pipeline)
151
+ dataset = build_dataset(cfg.data.test)
152
+ data_loader = build_dataloader(
153
+ dataset,
154
+ samples_per_gpu=samples_per_gpu,
155
+ workers_per_gpu=cfg.data.workers_per_gpu,
156
+ dist=distributed,
157
+ shuffle=False)
158
+
159
+ # build the model and load checkpoint
160
+ cfg.model.train_cfg = None
161
+ model = build_detector(cfg.model, test_cfg=cfg.get('test_cfg'))
162
+ fp16_cfg = cfg.get('fp16', None)
163
+ if fp16_cfg is not None:
164
+ wrap_fp16_model(model)
165
+
166
+ checkpoint = load_checkpoint(model, cpt, map_location='cpu')
167
+ # old versions did not save class info in checkpoints,
168
+ # this walkaround is for backward compatibility
169
+ if 'CLASSES' in checkpoint.get('meta', {}):
170
+ model.CLASSES = checkpoint['meta']['CLASSES']
171
+ else:
172
+ model.CLASSES = dataset.CLASSES
173
+
174
+ if not distributed:
175
+ model = MMDataParallel(model, device_ids=[0])
176
+ outputs = single_gpu_test(model, data_loader)
177
+ else:
178
+ model = MMDistributedDataParallel(
179
+ model.cuda(),
180
+ device_ids=[torch.cuda.current_device()],
181
+ broadcast_buffers=False)
182
+ outputs = multi_gpu_test(model, data_loader, 'tmp')
183
+ if rank == 0:
184
+ ref_mAP_dict = modelzoo_dict[cfg_name]
185
+ metrics = list(ref_mAP_dict.keys())
186
+ metrics = [
187
+ m if m != 'AR@1000' else 'proposal_fast' for m in metrics
188
+ ]
189
+ eval_results = dataset.evaluate(outputs, metrics)
190
+ print(eval_results)
191
+ for metric in metrics:
192
+ if metric == 'proposal_fast':
193
+ ref_metric = modelzoo_dict[cfg_name]['AR@1000']
194
+ eval_metric = eval_results['AR@1000']
195
+ else:
196
+ ref_metric = modelzoo_dict[cfg_name][metric]
197
+ eval_metric = eval_results[f'{metric}_mAP']
198
+ if abs(ref_metric - eval_metric) > 0.003:
199
+ record['is_normal'] = False
200
+ dump_dict(record, args.json_out)
201
+ check_finish(result_dict, args.json_out)
202
+ except Exception as e:
203
+ logger.error(f'rank: {rank} test fail with error: {e}')
204
+ record['terminate'] = True
205
+ dump_dict(record, args.json_out)
206
+ check_finish(result_dict, args.json_out)
207
+ # hack there to throw some error to prevent hang out
208
+ subprocess.call('xxx')
209
+
210
+
211
+ if __name__ == '__main__':
212
+ main()
src/ndl_layout/mmdetection/.dev_scripts/batch_test.sh ADDED
@@ -0,0 +1,19 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ export PYTHONPATH=${PWD}
2
+
3
+ partition=$1
4
+ model_dir=$2
5
+ json_out=$3
6
+ job_name=batch_test
7
+ gpus=8
8
+ gpu_per_node=8
9
+
10
+ touch $json_out
11
+ lastLine=$(tail -n 1 $json_out)
12
+ while [ "$lastLine" != "finished" ]
13
+ do
14
+ srun -p ${partition} --gres=gpu:${gpu_per_node} -n${gpus} --ntasks-per-node=${gpu_per_node} \
15
+ --job-name=${job_name} --kill-on-bad-exit=1 \
16
+ python .dev_scripts/batch_test.py $model_dir $json_out --launcher='slurm'
17
+ lastLine=$(tail -n 1 $json_out)
18
+ echo $lastLine
19
+ done
src/ndl_layout/mmdetection/.dev_scripts/benchmark_filter.py ADDED
@@ -0,0 +1,158 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import argparse
2
+ import os
3
+ import os.path as osp
4
+
5
+ import mmcv
6
+
7
+
8
+ def parse_args():
9
+ parser = argparse.ArgumentParser(description='Filter configs to train')
10
+ parser.add_argument(
11
+ '--basic-arch',
12
+ action='store_true',
13
+ help='to train models in basic arch')
14
+ parser.add_argument(
15
+ '--datasets', action='store_true', help='to train models in dataset')
16
+ parser.add_argument(
17
+ '--data-pipeline',
18
+ action='store_true',
19
+ help='to train models related to data pipeline, e.g. augmentations')
20
+ parser.add_argument(
21
+ '--nn-module',
22
+ action='store_true',
23
+ help='to train models related to neural network modules')
24
+ parser.add_argument(
25
+ '--model-options',
26
+ nargs='+',
27
+ help='custom options to special model benchmark')
28
+
29
+ args = parser.parse_args()
30
+ return args
31
+
32
+
33
+ basic_arch_root = [
34
+ 'atss', 'cascade_rcnn', 'cascade_rpn', 'centripetalnet', 'cornernet',
35
+ 'detectors', 'detr', 'double_heads', 'dynamic_rcnn', 'faster_rcnn', 'fcos',
36
+ 'foveabox', 'fp16', 'free_anchor', 'fsaf', 'gfl', 'ghm', 'grid_rcnn',
37
+ 'guided_anchoring', 'htc', 'libra_rcnn', 'mask_rcnn', 'ms_rcnn',
38
+ 'nas_fcos', 'paa', 'pisa', 'point_rend', 'reppoints', 'retinanet', 'rpn',
39
+ 'sabl', 'ssd', 'tridentnet', 'vfnet', 'yolact', 'yolo', 'sparse_rcnn',
40
+ 'scnet'
41
+ ]
42
+
43
+ datasets_root = [
44
+ 'wider_face', 'pascal_voc', 'cityscapes', 'lvis', 'deepfashion'
45
+ ]
46
+
47
+ data_pipeline_root = ['albu_example', 'instaboost']
48
+
49
+ nn_module_root = [
50
+ 'carafe', 'dcn', 'empirical_attention', 'gcnet', 'gn', 'gn+ws', 'hrnet',
51
+ 'pafpn', 'nas_fpn', 'regnet', 'resnest', 'res2net', 'groie'
52
+ ]
53
+
54
+ benchmark_pool = [
55
+ 'configs/albu_example/mask_rcnn_r50_fpn_albu_1x_coco.py',
56
+ 'configs/atss/atss_r50_fpn_1x_coco.py',
57
+ 'configs/carafe/mask_rcnn_r50_fpn_carafe_1x_coco.py',
58
+ 'configs/cascade_rcnn/cascade_mask_rcnn_r50_fpn_1x_coco.py',
59
+ 'configs/cascade_rpn/crpn_faster_rcnn_r50_caffe_fpn_1x_coco.py',
60
+ 'configs/centripetalnet/'
61
+ 'centripetalnet_hourglass104_mstest_16x6_210e_coco.py',
62
+ 'configs/cityscapes/mask_rcnn_r50_fpn_1x_cityscapes.py',
63
+ 'configs/cornernet/'
64
+ 'cornernet_hourglass104_mstest_8x6_210e_coco.py', # special
65
+ 'configs/dcn/mask_rcnn_r50_fpn_mdconv_c3-c5_1x_coco.py',
66
+ 'configs/dcn/faster_rcnn_r50_fpn_dpool_1x_coco.py',
67
+ 'configs/dcn/faster_rcnn_r50_fpn_mdpool_1x_coco.py',
68
+ 'configs/dcn/mask_rcnn_r50_fpn_dconv_c3-c5_1x_coco.py',
69
+ 'configs/detectors/detectors_htc_r50_1x_coco.py',
70
+ 'configs/detr/detr_r50_8x2_150e_coco.py',
71
+ 'configs/double_heads/dh_faster_rcnn_r50_fpn_1x_coco.py',
72
+ 'configs/dynamic_rcnn/dynamic_rcnn_r50_fpn_1x.py',
73
+ 'configs/empirical_attention/faster_rcnn_r50_fpn_attention_1111_dcn_1x_coco.py', # noqa
74
+ 'configs/faster_rcnn/faster_rcnn_r50_fpn_1x_coco.py',
75
+ 'configs/faster_rcnn/faster_rcnn_r50_fpn_ohem_1x_coco.py',
76
+ 'configs/faster_rcnn/faster_rcnn_r50_caffe_fpn_1x_coco.py',
77
+ 'configs/faster_rcnn/faster_rcnn_r50_caffe_fpn_mstrain_1x_coco.py',
78
+ 'configs/faster_rcnn/faster_rcnn_r50_caffe_dc5_mstrain_1x_coco.py',
79
+ 'configs/fcos/fcos_center_r50_caffe_fpn_gn-head_4x4_1x_coco.py',
80
+ 'configs/foveabox/fovea_align_r50_fpn_gn-head_4x4_2x_coco.py',
81
+ 'configs/fp16/retinanet_r50_fpn_fp16_1x_coco.py',
82
+ 'configs/fp16/mask_rcnn_r50_fpn_fp16_1x_coco.py',
83
+ 'configs/free_anchor/retinanet_free_anchor_r50_fpn_1x_coco.py',
84
+ 'configs/fsaf/fsaf_r50_fpn_1x_coco.py',
85
+ 'configs/gcnet/mask_rcnn_r50_fpn_r4_gcb_c3-c5_1x_coco.py',
86
+ 'configs/gfl/gfl_r50_fpn_1x_coco.py',
87
+ 'configs/ghm/retinanet_ghm_r50_fpn_1x_coco.py',
88
+ 'configs/gn/mask_rcnn_r50_fpn_gn-all_2x_coco.py',
89
+ 'configs/gn+ws/mask_rcnn_r50_fpn_gn_ws-all_2x_coco.py',
90
+ 'configs/grid_rcnn/grid_rcnn_r50_fpn_gn-head_2x_coco.py',
91
+ 'configs/groie/faster_rcnn_r50_fpn_groie_1x_coco.py',
92
+ 'configs/guided_anchoring/ga_faster_r50_caffe_fpn_1x_coco.py',
93
+ 'configs/hrnet/mask_rcnn_hrnetv2p_w18_1x_coco.py',
94
+ 'configs/htc/htc_r50_fpn_1x_coco.py',
95
+ 'configs/instaboost/mask_rcnn_r50_fpn_instaboost_4x_coco.py',
96
+ 'configs/libra_rcnn/libra_faster_rcnn_r50_fpn_1x_coco.py',
97
+ 'configs/lvis/mask_rcnn_r50_fpn_sample1e-3_mstrain_1x_lvis_v1.py',
98
+ 'configs/mask_rcnn/mask_rcnn_r50_caffe_fpn_mstrain-poly_1x_coco.py',
99
+ 'configs/ms_rcnn/ms_rcnn_r50_caffe_fpn_1x_coco.py',
100
+ 'configs/nas_fcos/nas_fcos_nashead_r50_caffe_fpn_gn-head_4x4_1x_coco.py',
101
+ 'configs/nas_fpn/retinanet_r50_nasfpn_crop640_50e_coco.py',
102
+ 'configs/paa/paa_r50_fpn_1x_coco.py',
103
+ 'configs/pafpn/faster_rcnn_r50_pafpn_1x_coco.py',
104
+ 'configs/pisa/pisa_mask_rcnn_r50_fpn_1x_coco.py',
105
+ 'configs/point_rend/point_rend_r50_caffe_fpn_mstrain_1x_coco.py',
106
+ 'configs/regnet/mask_rcnn_regnetx-3.2GF_fpn_1x_coco.py',
107
+ 'configs/reppoints/reppoints_moment_r50_fpn_gn-neck+head_1x_coco.py',
108
+ 'configs/res2net/faster_rcnn_r2_101_fpn_2x_coco.py',
109
+ 'configs/resnest/'
110
+ 'mask_rcnn_s50_fpn_syncbn-backbone+head_mstrain_1x_coco.py',
111
+ 'configs/retinanet/retinanet_r50_caffe_fpn_1x_coco.py',
112
+ 'configs/rpn/rpn_r50_fpn_1x_coco.py',
113
+ 'configs/sabl/sabl_retinanet_r50_fpn_1x_coco.py',
114
+ 'configs/ssd/ssd300_coco.py',
115
+ 'configs/tridentnet/tridentnet_r50_caffe_1x_coco.py',
116
+ 'configs/vfnet/vfnet_r50_fpn_1x_coco.py',
117
+ 'configs/yolact/yolact_r50_1x8_coco.py',
118
+ 'configs/yolo/yolov3_d53_320_273e_coco.py',
119
+ 'configs/sparse_rcnn/sparse_rcnn_r50_fpn_1x_coco.py',
120
+ 'configs/scnet/scnet_r50_fpn_1x_coco.py'
121
+ ]
122
+
123
+
124
+ def main():
125
+ args = parse_args()
126
+
127
+ benchmark_type = []
128
+ if args.basic_arch:
129
+ benchmark_type += basic_arch_root
130
+ if args.datasets:
131
+ benchmark_type += datasets_root
132
+ if args.data_pipeline:
133
+ benchmark_type += data_pipeline_root
134
+ if args.nn_module:
135
+ benchmark_type += nn_module_root
136
+
137
+ special_model = args.model_options
138
+ if special_model is not None:
139
+ benchmark_type += special_model
140
+
141
+ config_dpath = 'configs/'
142
+ benchmark_configs = []
143
+ for cfg_root in benchmark_type:
144
+ cfg_dir = osp.join(config_dpath, cfg_root)
145
+ configs = os.scandir(cfg_dir)
146
+ for cfg in configs:
147
+ config_path = osp.join(cfg_dir, cfg.name)
148
+ if (config_path in benchmark_pool
149
+ and config_path not in benchmark_configs):
150
+ benchmark_configs.append(config_path)
151
+
152
+ print(f'Totally found {len(benchmark_configs)} configs to benchmark')
153
+ config_dicts = dict(models=benchmark_configs)
154
+ mmcv.dump(config_dicts, 'regression_test_configs.json')
155
+
156
+
157
+ if __name__ == '__main__':
158
+ main()
src/ndl_layout/mmdetection/.dev_scripts/convert_benchmark_script.py ADDED
@@ -0,0 +1,86 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import argparse
2
+ import os
3
+ import os.path as osp
4
+
5
+ import mmcv
6
+
7
+
8
+ def parse_args():
9
+ parser = argparse.ArgumentParser(
10
+ description='Convert benchmark model json to script')
11
+ parser.add_argument(
12
+ 'json_path', type=str, help='json path output by benchmark_filter')
13
+ parser.add_argument('partition', type=str, help='slurm partition name')
14
+ parser.add_argument(
15
+ '--max-keep-ckpts',
16
+ type=int,
17
+ default=1,
18
+ help='The maximum checkpoints to keep')
19
+ parser.add_argument(
20
+ '--run', action='store_true', help='run script directly')
21
+ parser.add_argument(
22
+ '--out', type=str, help='path to save model benchmark script')
23
+
24
+ args = parser.parse_args()
25
+ return args
26
+
27
+
28
+ def main():
29
+ args = parse_args()
30
+ if args.out:
31
+ out_suffix = args.out.split('.')[-1]
32
+ assert args.out.endswith('.sh'), \
33
+ f'Expected out file path suffix is .sh, but get .{out_suffix}'
34
+ assert args.out or args.run, \
35
+ ('Please specify at least one operation (save/run/ the '
36
+ 'script) with the argument "--out" or "--run"')
37
+
38
+ json_data = mmcv.load(args.json_path)
39
+ model_cfgs = json_data['models']
40
+
41
+ partition = args.partition # cluster name
42
+
43
+ root_name = './tools'
44
+ train_script_name = osp.join(root_name, 'slurm_train.sh')
45
+ # stdout is no output
46
+ stdout_cfg = '>/dev/null'
47
+
48
+ max_keep_ckpts = args.max_keep_ckpts
49
+
50
+ commands = []
51
+ for i, cfg in enumerate(model_cfgs):
52
+ # print cfg name
53
+ echo_info = f'echo \'{cfg}\' &'
54
+ commands.append(echo_info)
55
+ commands.append('\n')
56
+
57
+ fname, _ = osp.splitext(osp.basename(cfg))
58
+ out_fname = osp.join(root_name, fname)
59
+ # default setting
60
+ command_info = f'GPUS=8 GPUS_PER_NODE=8 ' \
61
+ f'CPUS_PER_TASK=2 {train_script_name} '
62
+ command_info += f'{partition} '
63
+ command_info += f'{fname} '
64
+ command_info += f'{cfg} '
65
+ command_info += f'{out_fname} '
66
+ if max_keep_ckpts:
67
+ command_info += f'--cfg-options ' \
68
+ f'checkpoint_config.max_keep_ckpts=' \
69
+ f'{max_keep_ckpts}' + ' '
70
+ command_info += f'{stdout_cfg} &'
71
+
72
+ commands.append(command_info)
73
+
74
+ if i < len(model_cfgs):
75
+ commands.append('\n')
76
+
77
+ command_str = ''.join(commands)
78
+ if args.out:
79
+ with open(args.out, 'w') as f:
80
+ f.write(command_str)
81
+ if args.run:
82
+ os.system(command_str)
83
+
84
+
85
+ if __name__ == '__main__':
86
+ main()
src/ndl_layout/mmdetection/.dev_scripts/gather_benchmark_metric.py ADDED
@@ -0,0 +1,142 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import argparse
2
+ import glob
3
+ import os.path as osp
4
+
5
+ import mmcv
6
+ from gather_models import get_final_results
7
+
8
+ try:
9
+ import xlrd
10
+ except ImportError:
11
+ xlrd = None
12
+ try:
13
+ import xlutils
14
+ from xlutils.copy import copy
15
+ except ImportError:
16
+ xlutils = None
17
+
18
+
19
+ def parse_args():
20
+ parser = argparse.ArgumentParser(
21
+ description='Gather benchmarked models metric')
22
+ parser.add_argument(
23
+ 'root',
24
+ type=str,
25
+ help='root path of benchmarked models to be gathered')
26
+ parser.add_argument(
27
+ 'benchmark_json', type=str, help='json path of benchmark models')
28
+ parser.add_argument(
29
+ '--out', type=str, help='output path of gathered metrics to be stored')
30
+ parser.add_argument(
31
+ '--not-show', action='store_true', help='not show metrics')
32
+ parser.add_argument(
33
+ '--excel', type=str, help='input path of excel to be recorded')
34
+ parser.add_argument(
35
+ '--ncol', type=int, help='Number of column to be modified or appended')
36
+
37
+ args = parser.parse_args()
38
+ return args
39
+
40
+
41
+ if __name__ == '__main__':
42
+ args = parse_args()
43
+
44
+ if args.excel:
45
+ assert args.ncol, 'Please specify "--excel" and "--ncol" ' \
46
+ 'at the same time'
47
+ if xlrd is None:
48
+ raise RuntimeError(
49
+ 'xlrd is not installed,'
50
+ 'Please use “pip install xlrd==1.2.0” to install')
51
+ if xlutils is None:
52
+ raise RuntimeError(
53
+ 'xlutils is not installed,'
54
+ 'Please use “pip install xlutils==2.0.0” to install')
55
+ readbook = xlrd.open_workbook(args.excel)
56
+ sheet = readbook.sheet_by_name('Sheet1')
57
+ sheet_info = {}
58
+ total_nrows = sheet.nrows
59
+ for i in range(3, sheet.nrows):
60
+ sheet_info[sheet.row_values(i)[0]] = i
61
+ xlrw = copy(readbook)
62
+ table = xlrw.get_sheet(0)
63
+
64
+ root_path = args.root
65
+ metrics_out = args.out
66
+ benchmark_json_path = args.benchmark_json
67
+ model_configs = mmcv.load(benchmark_json_path)['models']
68
+
69
+ result_dict = {}
70
+ for config in model_configs:
71
+ config_name = osp.split(config)[-1]
72
+ config_name = osp.splitext(config_name)[0]
73
+ result_path = osp.join(root_path, config_name)
74
+ if osp.exists(result_path):
75
+ # 1 read config
76
+ cfg = mmcv.Config.fromfile(config)
77
+ total_epochs = cfg.runner.max_epochs
78
+ final_results = cfg.evaluation.metric
79
+ if not isinstance(final_results, list):
80
+ final_results = [final_results]
81
+ final_results_out = []
82
+ for key in final_results:
83
+ if 'proposal_fast' in key:
84
+ final_results_out.append('AR@1000') # RPN
85
+ elif 'mAP' not in key:
86
+ final_results_out.append(key + '_mAP')
87
+
88
+ # 2 determine whether total_epochs ckpt exists
89
+ ckpt_path = f'epoch_{total_epochs}.pth'
90
+ if osp.exists(osp.join(result_path, ckpt_path)):
91
+ log_json_path = list(
92
+ sorted(glob.glob(osp.join(result_path, '*.log.json'))))[-1]
93
+
94
+ # 3 read metric
95
+ model_performance = get_final_results(log_json_path,
96
+ total_epochs,
97
+ final_results_out)
98
+ if model_performance is None:
99
+ print(f'log file error: {log_json_path}')
100
+ continue
101
+ for performance in model_performance:
102
+ if performance in ['AR@1000', 'bbox_mAP', 'segm_mAP']:
103
+ metric = round(model_performance[performance] * 100, 1)
104
+ model_performance[performance] = metric
105
+ result_dict[config] = model_performance
106
+
107
+ # update and append excel content
108
+ if args.excel:
109
+ if 'AR@1000' in model_performance:
110
+ metrics = f'{model_performance["AR@1000"]}(AR@1000)'
111
+ elif 'segm_mAP' in model_performance:
112
+ metrics = f'{model_performance["bbox_mAP"]}/' \
113
+ f'{model_performance["segm_mAP"]}'
114
+ else:
115
+ metrics = f'{model_performance["bbox_mAP"]}'
116
+
117
+ row_num = sheet_info.get(config, None)
118
+ if row_num:
119
+ table.write(row_num, args.ncol, metrics)
120
+ else:
121
+ table.write(total_nrows, 0, config)
122
+ table.write(total_nrows, args.ncol, metrics)
123
+ total_nrows += 1
124
+
125
+ else:
126
+ print(f'{config} not exist: {ckpt_path}')
127
+ else:
128
+ print(f'not exist: {config}')
129
+
130
+ # 4 save or print results
131
+ if metrics_out:
132
+ mmcv.mkdir_or_exist(metrics_out)
133
+ mmcv.dump(result_dict, osp.join(metrics_out, 'model_metric_info.json'))
134
+ if not args.not_show:
135
+ print('===================================')
136
+ for config_name, metrics in result_dict.items():
137
+ print(config_name, metrics)
138
+ print('===================================')
139
+ if args.excel:
140
+ filename, sufflx = osp.splitext(args.excel)
141
+ xlrw.save(f'{filename}_o{sufflx}')
142
+ print(f'>>> Output {filename}_o{sufflx}')
src/ndl_layout/mmdetection/.dev_scripts/gather_models.py ADDED
@@ -0,0 +1,162 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import argparse
2
+ import glob
3
+ import json
4
+ import os.path as osp
5
+ import shutil
6
+ import subprocess
7
+
8
+ import mmcv
9
+ import torch
10
+
11
+
12
+ def process_checkpoint(in_file, out_file):
13
+ checkpoint = torch.load(in_file, map_location='cpu')
14
+ # remove optimizer for smaller file size
15
+ if 'optimizer' in checkpoint:
16
+ del checkpoint['optimizer']
17
+ # if it is necessary to remove some sensitive data in checkpoint['meta'],
18
+ # add the code here.
19
+ torch.save(checkpoint, out_file)
20
+ sha = subprocess.check_output(['sha256sum', out_file]).decode()
21
+ final_file = out_file.rstrip('.pth') + '-{}.pth'.format(sha[:8])
22
+ subprocess.Popen(['mv', out_file, final_file])
23
+ return final_file
24
+
25
+
26
+ def get_final_epoch(config):
27
+ cfg = mmcv.Config.fromfile('./configs/' + config)
28
+ return cfg.total_epochs
29
+
30
+
31
+ def get_final_results(log_json_path, epoch, results_lut):
32
+ result_dict = dict()
33
+ with open(log_json_path, 'r') as f:
34
+ for line in f.readlines():
35
+ log_line = json.loads(line)
36
+ if 'mode' not in log_line.keys():
37
+ continue
38
+
39
+ if log_line['mode'] == 'train' and log_line['epoch'] == epoch:
40
+ result_dict['memory'] = log_line['memory']
41
+
42
+ if log_line['mode'] == 'val' and log_line['epoch'] == epoch:
43
+ result_dict.update({
44
+ key: log_line[key]
45
+ for key in results_lut if key in log_line
46
+ })
47
+ return result_dict
48
+
49
+
50
+ def parse_args():
51
+ parser = argparse.ArgumentParser(description='Gather benchmarked models')
52
+ parser.add_argument(
53
+ 'root',
54
+ type=str,
55
+ help='root path of benchmarked models to be gathered')
56
+ parser.add_argument(
57
+ 'out', type=str, help='output path of gathered models to be stored')
58
+
59
+ args = parser.parse_args()
60
+ return args
61
+
62
+
63
+ def main():
64
+ args = parse_args()
65
+ models_root = args.root
66
+ models_out = args.out
67
+ mmcv.mkdir_or_exist(models_out)
68
+
69
+ # find all models in the root directory to be gathered
70
+ raw_configs = list(mmcv.scandir('./configs', '.py', recursive=True))
71
+
72
+ # filter configs that is not trained in the experiments dir
73
+ used_configs = []
74
+ for raw_config in raw_configs:
75
+ if osp.exists(osp.join(models_root, raw_config)):
76
+ used_configs.append(raw_config)
77
+ print(f'Find {len(used_configs)} models to be gathered')
78
+
79
+ # find final_ckpt and log file for trained each config
80
+ # and parse the best performance
81
+ model_infos = []
82
+ for used_config in used_configs:
83
+ exp_dir = osp.join(models_root, used_config)
84
+ # check whether the exps is finished
85
+ final_epoch = get_final_epoch(used_config)
86
+ final_model = 'epoch_{}.pth'.format(final_epoch)
87
+ model_path = osp.join(exp_dir, final_model)
88
+
89
+ # skip if the model is still training
90
+ if not osp.exists(model_path):
91
+ continue
92
+
93
+ # get the latest logs
94
+ log_json_path = list(
95
+ sorted(glob.glob(osp.join(exp_dir, '*.log.json'))))[-1]
96
+ log_txt_path = list(sorted(glob.glob(osp.join(exp_dir, '*.log'))))[-1]
97
+ cfg = mmcv.Config.fromfile('./configs/' + used_config)
98
+ results_lut = cfg.evaluation.metric
99
+ if not isinstance(results_lut, list):
100
+ results_lut = [results_lut]
101
+ # case when using VOC, the evaluation key is only 'mAP'
102
+ results_lut = [key + '_mAP' for key in results_lut if 'mAP' not in key]
103
+ model_performance = get_final_results(log_json_path, final_epoch,
104
+ results_lut)
105
+
106
+ if model_performance is None:
107
+ continue
108
+
109
+ model_time = osp.split(log_txt_path)[-1].split('.')[0]
110
+ model_infos.append(
111
+ dict(
112
+ config=used_config,
113
+ results=model_performance,
114
+ epochs=final_epoch,
115
+ model_time=model_time,
116
+ log_json_path=osp.split(log_json_path)[-1]))
117
+
118
+ # publish model for each checkpoint
119
+ publish_model_infos = []
120
+ for model in model_infos:
121
+ model_publish_dir = osp.join(models_out, model['config'].rstrip('.py'))
122
+ mmcv.mkdir_or_exist(model_publish_dir)
123
+
124
+ model_name = osp.split(model['config'])[-1].split('.')[0]
125
+
126
+ model_name += '_' + model['model_time']
127
+ publish_model_path = osp.join(model_publish_dir, model_name)
128
+ trained_model_path = osp.join(models_root, model['config'],
129
+ 'epoch_{}.pth'.format(model['epochs']))
130
+
131
+ # convert model
132
+ final_model_path = process_checkpoint(trained_model_path,
133
+ publish_model_path)
134
+
135
+ # copy log
136
+ shutil.copy(
137
+ osp.join(models_root, model['config'], model['log_json_path']),
138
+ osp.join(model_publish_dir, f'{model_name}.log.json'))
139
+ shutil.copy(
140
+ osp.join(models_root, model['config'],
141
+ model['log_json_path'].rstrip('.json')),
142
+ osp.join(model_publish_dir, f'{model_name}.log'))
143
+
144
+ # copy config to guarantee reproducibility
145
+ config_path = model['config']
146
+ config_path = osp.join(
147
+ 'configs',
148
+ config_path) if 'configs' not in config_path else config_path
149
+ target_cconfig_path = osp.split(config_path)[-1]
150
+ shutil.copy(config_path,
151
+ osp.join(model_publish_dir, target_cconfig_path))
152
+
153
+ model['model_path'] = final_model_path
154
+ publish_model_infos.append(model)
155
+
156
+ models = dict(models=publish_model_infos)
157
+ print(f'Totally gathered {len(publish_model_infos)} models')
158
+ mmcv.dump(models, osp.join(models_out, 'model_info.json'))
159
+
160
+
161
+ if __name__ == '__main__':
162
+ main()
src/ndl_layout/mmdetection/.dev_scripts/linter.sh ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ yapf -r -i mmdet/ configs/ tests/ tools/
2
+ isort -rc mmdet/ configs/ tests/ tools/
3
+ flake8 .
src/ndl_layout/mmdetection/.gitignore ADDED
@@ -0,0 +1,121 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # Byte-compiled / optimized / DLL files
2
+ __pycache__/
3
+ *.py[cod]
4
+ *$py.class
5
+
6
+ # C extensions
7
+ *.so
8
+
9
+ # Distribution / packaging
10
+ .Python
11
+ build/
12
+ develop-eggs/
13
+ dist/
14
+ downloads/
15
+ eggs/
16
+ .eggs/
17
+ lib/
18
+ lib64/
19
+ parts/
20
+ sdist/
21
+ var/
22
+ wheels/
23
+ *.egg-info/
24
+ .installed.cfg
25
+ *.egg
26
+ MANIFEST
27
+
28
+ # PyInstaller
29
+ # Usually these files are written by a python script from a template
30
+ # before PyInstaller builds the exe, so as to inject date/other infos into it.
31
+ *.manifest
32
+ *.spec
33
+
34
+ # Installer logs
35
+ pip-log.txt
36
+ pip-delete-this-directory.txt
37
+
38
+ # Unit test / coverage reports
39
+ htmlcov/
40
+ .tox/
41
+ .coverage
42
+ .coverage.*
43
+ .cache
44
+ nosetests.xml
45
+ coverage.xml
46
+ *.cover
47
+ .hypothesis/
48
+ .pytest_cache/
49
+
50
+ # Translations
51
+ *.mo
52
+ *.pot
53
+
54
+ # Django stuff:
55
+ *.log
56
+ local_settings.py
57
+ db.sqlite3
58
+
59
+ # Flask stuff:
60
+ instance/
61
+ .webassets-cache
62
+
63
+ # Scrapy stuff:
64
+ .scrapy
65
+
66
+ # Sphinx documentation
67
+ docs/_build/
68
+
69
+ # PyBuilder
70
+ target/
71
+
72
+ # Jupyter Notebook
73
+ .ipynb_checkpoints
74
+
75
+ # pyenv
76
+ .python-version
77
+
78
+ # celery beat schedule file
79
+ celerybeat-schedule
80
+
81
+ # SageMath parsed files
82
+ *.sage.py
83
+
84
+ # Environments
85
+ .env
86
+ .venv
87
+ env/
88
+ venv/
89
+ ENV/
90
+ env.bak/
91
+ venv.bak/
92
+
93
+ # Spyder project settings
94
+ .spyderproject
95
+ .spyproject
96
+
97
+ # Rope project settings
98
+ .ropeproject
99
+
100
+ # mkdocs documentation
101
+ /site
102
+
103
+ # mypy
104
+ .mypy_cache/
105
+
106
+ data/
107
+ data
108
+ .vscode
109
+ .idea
110
+ .DS_Store
111
+
112
+ # custom
113
+ *.pkl
114
+ *.pkl.json
115
+ *.log.json
116
+ work_dirs/
117
+
118
+ # Pytorch
119
+ *.pth
120
+ *.py~
121
+ *.sh~
src/ndl_layout/mmdetection/.pre-commit-config.yaml ADDED
@@ -0,0 +1,40 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ repos:
2
+ - repo: https://gitlab.com/pycqa/flake8.git
3
+ rev: 3.8.3
4
+ hooks:
5
+ - id: flake8
6
+ - repo: https://github.com/asottile/seed-isort-config
7
+ rev: v2.2.0
8
+ hooks:
9
+ - id: seed-isort-config
10
+ - repo: https://github.com/timothycrosley/isort
11
+ rev: 4.3.21
12
+ hooks:
13
+ - id: isort
14
+ - repo: https://github.com/pre-commit/mirrors-yapf
15
+ rev: v0.30.0
16
+ hooks:
17
+ - id: yapf
18
+ - repo: https://github.com/pre-commit/pre-commit-hooks
19
+ rev: v3.1.0
20
+ hooks:
21
+ - id: trailing-whitespace
22
+ - id: check-yaml
23
+ - id: end-of-file-fixer
24
+ - id: requirements-txt-fixer
25
+ - id: double-quote-string-fixer
26
+ - id: check-merge-conflict
27
+ - id: fix-encoding-pragma
28
+ args: ["--remove"]
29
+ - id: mixed-line-ending
30
+ args: ["--fix=lf"]
31
+ - repo: https://github.com/jumanjihouse/pre-commit-hooks
32
+ rev: 2.1.4
33
+ hooks:
34
+ - id: markdownlint
35
+ args: ["-r", "~MD002,~MD013,~MD024,~MD029,~MD033,~MD034,~MD036", "-t", "allow_different_nesting"]
36
+ - repo: https://github.com/myint/docformatter
37
+ rev: v1.3.1
38
+ hooks:
39
+ - id: docformatter
40
+ args: ["--in-place", "--wrap-descriptions", "79"]
src/ndl_layout/mmdetection/.readthedocs.yml ADDED
@@ -0,0 +1,7 @@
 
 
 
 
 
 
 
 
1
+ version: 2
2
+
3
+ python:
4
+ version: 3.7
5
+ install:
6
+ - requirements: requirements/docs.txt
7
+ - requirements: requirements/readthedocs.txt
src/ndl_layout/mmdetection/LICENSE ADDED
@@ -0,0 +1,643 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ This software is largely based on the following repository:
2
+
3
+ https://github.com/open-mmlab/mmdetection
4
+
5
+ The original software license is as follows.
6
+
7
+ -----------------------------------------------------------------------
8
+
9
+ Copyright 2018-2019 Open-MMLab. All rights reserved.
10
+
11
+ Apache License
12
+ Version 2.0, January 2004
13
+ http://www.apache.org/licenses/
14
+
15
+ TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION
16
+
17
+ 1. Definitions.
18
+
19
+ "License" shall mean the terms and conditions for use, reproduction,
20
+ and distribution as defined by Sections 1 through 9 of this document.
21
+
22
+ "Licensor" shall mean the copyright owner or entity authorized by
23
+ the copyright owner that is granting the License.
24
+
25
+ "Legal Entity" shall mean the union of the acting entity and all
26
+ other entities that control, are controlled by, or are under common
27
+ control with that entity. For the purposes of this definition,
28
+ "control" means (i) the power, direct or indirect, to cause the
29
+ direction or management of such entity, whether by contract or
30
+ otherwise, or (ii) ownership of fifty percent (50%) or more of the
31
+ outstanding shares, or (iii) beneficial ownership of such entity.
32
+
33
+ "You" (or "Your") shall mean an individual or Legal Entity
34
+ exercising permissions granted by this License.
35
+
36
+ "Source" form shall mean the preferred form for making modifications,
37
+ including but not limited to software source code, documentation
38
+ source, and configuration files.
39
+
40
+ "Object" form shall mean any form resulting from mechanical
41
+ transformation or translation of a Source form, including but
42
+ not limited to compiled object code, generated documentation,
43
+ and conversions to other media types.
44
+
45
+ "Work" shall mean the work of authorship, whether in Source or
46
+ Object form, made available under the License, as indicated by a
47
+ copyright notice that is included in or attached to the work
48
+ (an example is provided in the Appendix below).
49
+
50
+ "Derivative Works" shall mean any work, whether in Source or Object
51
+ form, that is based on (or derived from) the Work and for which the
52
+ editorial revisions, annotations, elaborations, or other modifications
53
+ represent, as a whole, an original work of authorship. For the purposes
54
+ of this License, Derivative Works shall not include works that remain
55
+ separable from, or merely link (or bind by name) to the interfaces of,
56
+ the Work and Derivative Works thereof.
57
+
58
+ "Contribution" shall mean any work of authorship, including
59
+ the original version of the Work and any modifications or additions
60
+ to that Work or Derivative Works thereof, that is intentionally
61
+ submitted to Licensor for inclusion in the Work by the copyright owner
62
+ or by an individual or Legal Entity authorized to submit on behalf of
63
+ the copyright owner. For the purposes of this definition, "submitted"
64
+ means any form of electronic, verbal, or written communication sent
65
+ to the Licensor or its representatives, including but not limited to
66
+ communication on electronic mailing lists, source code control systems,
67
+ and issue tracking systems that are managed by, or on behalf of, the
68
+ Licensor for the purpose of discussing and improving the Work, but
69
+ excluding communication that is conspicuously marked or otherwise
70
+ designated in writing by the copyright owner as "Not a Contribution."
71
+
72
+ "Contributor" shall mean Licensor and any individual or Legal Entity
73
+ on behalf of whom a Contribution has been received by Licensor and
74
+ subsequently incorporated within the Work.
75
+
76
+ 2. Grant of Copyright License. Subject to the terms and conditions of
77
+ this License, each Contributor hereby grants to You a perpetual,
78
+ worldwide, non-exclusive, no-charge, royalty-free, irrevocable
79
+ copyright license to reproduce, prepare Derivative Works of,
80
+ publicly display, publicly perform, sublicense, and distribute the
81
+ Work and such Derivative Works in Source or Object form.
82
+
83
+ 3. Grant of Patent License. Subject to the terms and conditions of
84
+ this License, each Contributor hereby grants to You a perpetual,
85
+ worldwide, non-exclusive, no-charge, royalty-free, irrevocable
86
+ (except as stated in this section) patent license to make, have made,
87
+ use, offer to sell, sell, import, and otherwise transfer the Work,
88
+ where such license applies only to those patent claims licensable
89
+ by such Contributor that are necessarily infringed by their
90
+ Contribution(s) alone or by combination of their Contribution(s)
91
+ with the Work to which such Contribution(s) was submitted. If You
92
+ institute patent litigation against any entity (including a
93
+ cross-claim or counterclaim in a lawsuit) alleging that the Work
94
+ or a Contribution incorporated within the Work constitutes direct
95
+ or contributory patent infringement, then any patent licenses
96
+ granted to You under this License for that Work shall terminate
97
+ as of the date such litigation is filed.
98
+
99
+ 4. Redistribution. You may reproduce and distribute copies of the
100
+ Work or Derivative Works thereof in any medium, with or without
101
+ modifications, and in Source or Object form, provided that You
102
+ meet the following conditions:
103
+
104
+ (a) You must give any other recipients of the Work or
105
+ Derivative Works a copy of this License; and
106
+
107
+ (b) You must cause any modified files to carry prominent notices
108
+ stating that You changed the files; and
109
+
110
+ (c) You must retain, in the Source form of any Derivative Works
111
+ that You distribute, all copyright, patent, trademark, and
112
+ attribution notices from the Source form of the Work,
113
+ excluding those notices that do not pertain to any part of
114
+ the Derivative Works; and
115
+
116
+ (d) If the Work includes a "NOTICE" text file as part of its
117
+ distribution, then any Derivative Works that You distribute must
118
+ include a readable copy of the attribution notices contained
119
+ within such NOTICE file, excluding those notices that do not
120
+ pertain to any part of the Derivative Works, in at least one
121
+ of the following places: within a NOTICE text file distributed
122
+ as part of the Derivative Works; within the Source form or
123
+ documentation, if provided along with the Derivative Works; or,
124
+ within a display generated by the Derivative Works, if and
125
+ wherever such third-party notices normally appear. The contents
126
+ of the NOTICE file are for informational purposes only and
127
+ do not modify the License. You may add Your own attribution
128
+ notices within Derivative Works that You distribute, alongside
129
+ or as an addendum to the NOTICE text from the Work, provided
130
+ that such additional attribution notices cannot be construed
131
+ as modifying the License.
132
+
133
+ You may add Your own copyright statement to Your modifications and
134
+ may provide additional or different license terms and conditions
135
+ for use, reproduction, or distribution of Your modifications, or
136
+ for any such Derivative Works as a whole, provided Your use,
137
+ reproduction, and distribution of the Work otherwise complies with
138
+ the conditions stated in this License.
139
+
140
+ 5. Submission of Contributions. Unless You explicitly state otherwise,
141
+ any Contribution intentionally submitted for inclusion in the Work
142
+ by You to the Licensor shall be under the terms and conditions of
143
+ this License, without any additional terms or conditions.
144
+ Notwithstanding the above, nothing herein shall supersede or modify
145
+ the terms of any separate license agreement you may have executed
146
+ with Licensor regarding such Contributions.
147
+
148
+ 6. Trademarks. This License does not grant permission to use the trade
149
+ names, trademarks, service marks, or product names of the Licensor,
150
+ except as required for reasonable and customary use in describing the
151
+ origin of the Work and reproducing the content of the NOTICE file.
152
+
153
+ 7. Disclaimer of Warranty. Unless required by applicable law or
154
+ agreed to in writing, Licensor provides the Work (and each
155
+ Contributor provides its Contributions) on an "AS IS" BASIS,
156
+ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or
157
+ implied, including, without limitation, any warranties or conditions
158
+ of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A
159
+ PARTICULAR PURPOSE. You are solely responsible for determining the
160
+ appropriateness of using or redistributing the Work and assume any
161
+ risks associated with Your exercise of permissions under this License.
162
+
163
+ 8. Limitation of Liability. In no event and under no legal theory,
164
+ whether in tort (including negligence), contract, or otherwise,
165
+ unless required by applicable law (such as deliberate and grossly
166
+ negligent acts) or agreed to in writing, shall any Contributor be
167
+ liable to You for damages, including any direct, indirect, special,
168
+ incidental, or consequential damages of any character arising as a
169
+ result of this License or out of the use or inability to use the
170
+ Work (including but not limited to damages for loss of goodwill,
171
+ work stoppage, computer failure or malfunction, or any and all
172
+ other commercial damages or losses), even if such Contributor
173
+ has been advised of the possibility of such damages.
174
+
175
+ 9. Accepting Warranty or Additional Liability. While redistributing
176
+ the Work or Derivative Works thereof, You may choose to offer,
177
+ and charge a fee for, acceptance of support, warranty, indemnity,
178
+ or other liability obligations and/or rights consistent with this
179
+ License. However, in accepting such obligations, You may act only
180
+ on Your own behalf and on Your sole responsibility, not on behalf
181
+ of any other Contributor, and only if You agree to indemnify,
182
+ defend, and hold each Contributor harmless for any liability
183
+ incurred by, or claims asserted against, such Contributor by reason
184
+ of your accepting any such warranty or additional liability.
185
+
186
+ END OF TERMS AND CONDITIONS
187
+
188
+ APPENDIX: How to apply the Apache License to your work.
189
+
190
+ To apply the Apache License to your work, attach the following
191
+ boilerplate notice, with the fields enclosed by brackets "[]"
192
+ replaced with your own identifying information. (Don't include
193
+ the brackets!) The text should be enclosed in the appropriate
194
+ comment syntax for the file format. We also recommend that a
195
+ file or class name and description of purpose be included on the
196
+ same "printed page" as the copyright notice for easier
197
+ identification within third-party archives.
198
+
199
+ Copyright 2018-2019 Open-MMLab.
200
+
201
+ Licensed under the Apache License, Version 2.0 (the "License");
202
+ you may not use this file except in compliance with the License.
203
+ You may obtain a copy of the License at
204
+
205
+ http://www.apache.org/licenses/LICENSE-2.0
206
+
207
+ Unless required by applicable law or agreed to in writing, software
208
+ distributed under the License is distributed on an "AS IS" BASIS,
209
+ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
210
+ See the License for the specific language governing permissions and
211
+ limitations under the License.
212
+
213
+
214
+ -----------------------------------------------------------------------
215
+
216
+ The following files have been modified from their original version:
217
+
218
+ README.md,
219
+ configs/_base_/models/cascade_rcnn_r50_fpn.py,
220
+ configs/_base_/schedules/schedule_1x.py,
221
+ configs/ndl/cascade_mask_rcnn_r50_fpn_1x_ndl.py,
222
+ configs/ndl/cascade_mask_rcnn_r50_fpn_1x_ndl_1024.py,
223
+ configs/ndl/cascade_rcnn_r50_fpn_1x_ndl.py,
224
+ configs/ndl/cascade_rcnn_r50_fpn_1x_ndl_1024.py,
225
+ configs/ndl/cascade_rcnn_r50_fpn_1x_ndl_1024_eql.py,
226
+ configs/ndl/ndl.py,
227
+ configs/ndl/ndl_1024.py,
228
+ configs/ndl/ndl_instance.py,
229
+ configs/ndl/ndl_instance_1024.py,
230
+ mmdet/core/post_processing/__init__.py,
231
+ mmdet/core/post_processing/bbox_nms.py,
232
+ mmdet/core/post_processing/merge_augs.py,
233
+ mmdet/datasets/builder.py,
234
+ mmdet/datasets/class_balance_dataset_wrapper.py,
235
+ mmdet/datasets/coco.py,
236
+ mmdet/datasets/max_iter_dataset_wrapper.py,
237
+ mmdet/models/losses/__init__.py,
238
+ mmdet/models/losses/eql.py,
239
+ mmdet/models/losses/eqlv2.py,
240
+ mmdet/models/losses/group_softmax.py,
241
+ mmdet/utils/ndl_categories.py,
242
+ tools/analysis_tools/coco_error_analysis.py,
243
+ tools/train_ndl.py
244
+
245
+ The following license applies for those modifications:
246
+
247
+ -----------------------------------------------------------------------
248
+
249
+ Copyright (c) 2022, National Diet Library, Japan.
250
+
251
+ Attribution 4.0 International
252
+
253
+ =======================================================================
254
+
255
+ Creative Commons Corporation ("Creative Commons") is not a law firm and
256
+ does not provide legal services or legal advice. Distribution of
257
+ Creative Commons public licenses does not create a lawyer-client or
258
+ other relationship. Creative Commons makes its licenses and related
259
+ information available on an "as-is" basis. Creative Commons gives no
260
+ warranties regarding its licenses, any material licensed under their
261
+ terms and conditions, or any related information. Creative Commons
262
+ disclaims all liability for damages resulting from their use to the
263
+ fullest extent possible.
264
+
265
+ Using Creative Commons Public Licenses
266
+
267
+ Creative Commons public licenses provide a standard set of terms and
268
+ conditions that creators and other rights holders may use to share
269
+ original works of authorship and other material subject to copyright
270
+ and certain other rights specified in the public license below. The
271
+ following considerations are for informational purposes only, are not
272
+ exhaustive, and do not form part of our licenses.
273
+
274
+ Considerations for licensors: Our public licenses are
275
+ intended for use by those authorized to give the public
276
+ permission to use material in ways otherwise restricted by
277
+ copyright and certain other rights. Our licenses are
278
+ irrevocable. Licensors should read and understand the terms
279
+ and conditions of the license they choose before applying it.
280
+ Licensors should also secure all rights necessary before
281
+ applying our licenses so that the public can reuse the
282
+ material as expected. Licensors should clearly mark any
283
+ material not subject to the license. This includes other CC-
284
+ licensed material, or material used under an exception or
285
+ limitation to copyright. More considerations for licensors:
286
+ wiki.creativecommons.org/Considerations_for_licensors
287
+
288
+ Considerations for the public: By using one of our public
289
+ licenses, a licensor grants the public permission to use the
290
+ licensed material under specified terms and conditions. If
291
+ the licensor's permission is not necessary for any reason--for
292
+ example, because of any applicable exception or limitation to
293
+ copyright--then that use is not regulated by the license. Our
294
+ licenses grant only permissions under copyright and certain
295
+ other rights that a licensor has authority to grant. Use of
296
+ the licensed material may still be restricted for other
297
+ reasons, including because others have copyright or other
298
+ rights in the material. A licensor may make special requests,
299
+ such as asking that all changes be marked or described.
300
+ Although not required by our licenses, you are encouraged to
301
+ respect those requests where reasonable. More_considerations
302
+ for the public:
303
+ wiki.creativecommons.org/Considerations_for_licensees
304
+
305
+ =======================================================================
306
+
307
+ Creative Commons Attribution 4.0 International Public License
308
+
309
+ By exercising the Licensed Rights (defined below), You accept and agree
310
+ to be bound by the terms and conditions of this Creative Commons
311
+ Attribution 4.0 International Public License ("Public License"). To the
312
+ extent this Public License may be interpreted as a contract, You are
313
+ granted the Licensed Rights in consideration of Your acceptance of
314
+ these terms and conditions, and the Licensor grants You such rights in
315
+ consideration of benefits the Licensor receives from making the
316
+ Licensed Material available under these terms and conditions.
317
+
318
+
319
+ Section 1 -- Definitions.
320
+
321
+ a. Adapted Material means material subject to Copyright and Similar
322
+ Rights that is derived from or based upon the Licensed Material
323
+ and in which the Licensed Material is translated, altered,
324
+ arranged, transformed, or otherwise modified in a manner requiring
325
+ permission under the Copyright and Similar Rights held by the
326
+ Licensor. For purposes of this Public License, where the Licensed
327
+ Material is a musical work, performance, or sound recording,
328
+ Adapted Material is always produced where the Licensed Material is
329
+ synched in timed relation with a moving image.
330
+
331
+ b. Adapter's License means the license You apply to Your Copyright
332
+ and Similar Rights in Your contributions to Adapted Material in
333
+ accordance with the terms and conditions of this Public License.
334
+
335
+ c. Copyright and Similar Rights means copyright and/or similar rights
336
+ closely related to copyright including, without limitation,
337
+ performance, broadcast, sound recording, and Sui Generis Database
338
+ Rights, without regard to how the rights are labeled or
339
+ categorized. For purposes of this Public License, the rights
340
+ specified in Section 2(b)(1)-(2) are not Copyright and Similar
341
+ Rights.
342
+
343
+ d. Effective Technological Measures means those measures that, in the
344
+ absence of proper authority, may not be circumvented under laws
345
+ fulfilling obligations under Article 11 of the WIPO Copyright
346
+ Treaty adopted on December 20, 1996, and/or similar international
347
+ agreements.
348
+
349
+ e. Exceptions and Limitations means fair use, fair dealing, and/or
350
+ any other exception or limitation to Copyright and Similar Rights
351
+ that applies to Your use of the Licensed Material.
352
+
353
+ f. Licensed Material means the artistic or literary work, database,
354
+ or other material to which the Licensor applied this Public
355
+ License.
356
+
357
+ g. Licensed Rights means the rights granted to You subject to the
358
+ terms and conditions of this Public License, which are limited to
359
+ all Copyright and Similar Rights that apply to Your use of the
360
+ Licensed Material and that the Licensor has authority to license.
361
+
362
+ h. Licensor means the individual(s) or entity(ies) granting rights
363
+ under this Public License.
364
+
365
+ i. Share means to provide material to the public by any means or
366
+ process that requires permission under the Licensed Rights, such
367
+ as reproduction, public display, public performance, distribution,
368
+ dissemination, communication, or importation, and to make material
369
+ available to the public including in ways that members of the
370
+ public may access the material from a place and at a time
371
+ individually chosen by them.
372
+
373
+ j. Sui Generis Database Rights means rights other than copyright
374
+ resulting from Directive 96/9/EC of the European Parliament and of
375
+ the Council of 11 March 1996 on the legal protection of databases,
376
+ as amended and/or succeeded, as well as other essentially
377
+ equivalent rights anywhere in the world.
378
+
379
+ k. You means the individual or entity exercising the Licensed Rights
380
+ under this Public License. Your has a corresponding meaning.
381
+
382
+
383
+ Section 2 -- Scope.
384
+
385
+ a. License grant.
386
+
387
+ 1. Subject to the terms and conditions of this Public License,
388
+ the Licensor hereby grants You a worldwide, royalty-free,
389
+ non-sublicensable, non-exclusive, irrevocable license to
390
+ exercise the Licensed Rights in the Licensed Material to:
391
+
392
+ a. reproduce and Share the Licensed Material, in whole or
393
+ in part; and
394
+
395
+ b. produce, reproduce, and Share Adapted Material.
396
+
397
+ 2. Exceptions and Limitations. For the avoidance of doubt, where
398
+ Exceptions and Limitations apply to Your use, this Public
399
+ License does not apply, and You do not need to comply with
400
+ its terms and conditions.
401
+
402
+ 3. Term. The term of this Public License is specified in Section
403
+ 6(a).
404
+
405
+ 4. Media and formats; technical modifications allowed. The
406
+ Licensor authorizes You to exercise the Licensed Rights in
407
+ all media and formats whether now known or hereafter created,
408
+ and to make technical modifications necessary to do so. The
409
+ Licensor waives and/or agrees not to assert any right or
410
+ authority to forbid You from making technical modifications
411
+ necessary to exercise the Licensed Rights, including
412
+ technical modifications necessary to circumvent Effective
413
+ Technological Measures. For purposes of this Public License,
414
+ simply making modifications authorized by this Section 2(a)
415
+ (4) never produces Adapted Material.
416
+
417
+ 5. Downstream recipients.
418
+
419
+ a. Offer from the Licensor -- Licensed Material. Every
420
+ recipient of the Licensed Material automatically
421
+ receives an offer from the Licensor to exercise the
422
+ Licensed Rights under the terms and conditions of this
423
+ Public License.
424
+
425
+ b. No downstream restrictions. You may not offer or impose
426
+ any additional or different terms or conditions on, or
427
+ apply any Effective Technological Measures to, the
428
+ Licensed Material if doing so restricts exercise of the
429
+ Licensed Rights by any recipient of the Licensed
430
+ Material.
431
+
432
+ 6. No endorsement. Nothing in this Public License constitutes or
433
+ may be construed as permission to assert or imply that You
434
+ are, or that Your use of the Licensed Material is, connected
435
+ with, or sponsored, endorsed, or granted official status by,
436
+ the Licensor or others designated to receive attribution as
437
+ provided in Section 3(a)(1)(A)(i).
438
+
439
+ b. Other rights.
440
+
441
+ 1. Moral rights, such as the right of integrity, are not
442
+ licensed under this Public License, nor are publicity,
443
+ privacy, and/or other similar personality rights; however, to
444
+ the extent possible, the Licensor waives and/or agrees not to
445
+ assert any such rights held by the Licensor to the limited
446
+ extent necessary to allow You to exercise the Licensed
447
+ Rights, but not otherwise.
448
+
449
+ 2. Patent and trademark rights are not licensed under this
450
+ Public License.
451
+
452
+ 3. To the extent possible, the Licensor waives any right to
453
+ collect royalties from You for the exercise of the Licensed
454
+ Rights, whether directly or through a collecting society
455
+ under any voluntary or waivable statutory or compulsory
456
+ licensing scheme. In all other cases the Licensor expressly
457
+ reserves any right to collect such royalties.
458
+
459
+
460
+ Section 3 -- License Conditions.
461
+
462
+ Your exercise of the Licensed Rights is expressly made subject to the
463
+ following conditions.
464
+
465
+ a. Attribution.
466
+
467
+ 1. If You Share the Licensed Material (including in modified
468
+ form), You must:
469
+
470
+ a. retain the following if it is supplied by the Licensor
471
+ with the Licensed Material:
472
+
473
+ i. identification of the creator(s) of the Licensed
474
+ Material and any others designated to receive
475
+ attribution, in any reasonable manner requested by
476
+ the Licensor (including by pseudonym if
477
+ designated);
478
+
479
+ ii. a copyright notice;
480
+
481
+ iii. a notice that refers to this Public License;
482
+
483
+ iv. a notice that refers to the disclaimer of
484
+ warranties;
485
+
486
+ v. a URI or hyperlink to the Licensed Material to the
487
+ extent reasonably practicable;
488
+
489
+ b. indicate if You modified the Licensed Material and
490
+ retain an indication of any previous modifications; and
491
+
492
+ c. indicate the Licensed Material is licensed under this
493
+ Public License, and include the text of, or the URI or
494
+ hyperlink to, this Public License.
495
+
496
+ 2. You may satisfy the conditions in Section 3(a)(1) in any
497
+ reasonable manner based on the medium, means, and context in
498
+ which You Share the Licensed Material. For example, it may be
499
+ reasonable to satisfy the conditions by providing a URI or
500
+ hyperlink to a resource that includes the required
501
+ information.
502
+
503
+ 3. If requested by the Licensor, You must remove any of the
504
+ information required by Section 3(a)(1)(A) to the extent
505
+ reasonably practicable.
506
+
507
+ 4. If You Share Adapted Material You produce, the Adapter's
508
+ License You apply must not prevent recipients of the Adapted
509
+ Material from complying with this Public License.
510
+
511
+
512
+ Section 4 -- Sui Generis Database Rights.
513
+
514
+ Where the Licensed Rights include Sui Generis Database Rights that
515
+ apply to Your use of the Licensed Material:
516
+
517
+ a. for the avoidance of doubt, Section 2(a)(1) grants You the right
518
+ to extract, reuse, reproduce, and Share all or a substantial
519
+ portion of the contents of the database;
520
+
521
+ b. if You include all or a substantial portion of the database
522
+ contents in a database in which You have Sui Generis Database
523
+ Rights, then the database in which You have Sui Generis Database
524
+ Rights (but not its individual contents) is Adapted Material; and
525
+
526
+ c. You must comply with the conditions in Section 3(a) if You Share
527
+ all or a substantial portion of the contents of the database.
528
+
529
+ For the avoidance of doubt, this Section 4 supplements and does not
530
+ replace Your obligations under this Public License where the Licensed
531
+ Rights include other Copyright and Similar Rights.
532
+
533
+
534
+ Section 5 -- Disclaimer of Warranties and Limitation of Liability.
535
+
536
+ a. UNLESS OTHERWISE SEPARATELY UNDERTAKEN BY THE LICENSOR, TO THE
537
+ EXTENT POSSIBLE, THE LICENSOR OFFERS THE LICENSED MATERIAL AS-IS
538
+ AND AS-AVAILABLE, AND MAKES NO REPRESENTATIONS OR WARRANTIES OF
539
+ ANY KIND CONCERNING THE LICENSED MATERIAL, WHETHER EXPRESS,
540
+ IMPLIED, STATUTORY, OR OTHER. THIS INCLUDES, WITHOUT LIMITATION,
541
+ WARRANTIES OF TITLE, MERCHANTABILITY, FITNESS FOR A PARTICULAR
542
+ PURPOSE, NON-INFRINGEMENT, ABSENCE OF LATENT OR OTHER DEFECTS,
543
+ ACCURACY, OR THE PRESENCE OR ABSENCE OF ERRORS, WHETHER OR NOT
544
+ KNOWN OR DISCOVERABLE. WHERE DISCLAIMERS OF WARRANTIES ARE NOT
545
+ ALLOWED IN FULL OR IN PART, THIS DISCLAIMER MAY NOT APPLY TO YOU.
546
+
547
+ b. TO THE EXTENT POSSIBLE, IN NO EVENT WILL THE LICENSOR BE LIABLE
548
+ TO YOU ON ANY LEGAL THEORY (INCLUDING, WITHOUT LIMITATION,
549
+ NEGLIGENCE) OR OTHERWISE FOR ANY DIRECT, SPECIAL, INDIRECT,
550
+ INCIDENTAL, CONSEQUENTIAL, PUNITIVE, EXEMPLARY, OR OTHER LOSSES,
551
+ COSTS, EXPENSES, OR DAMAGES ARISING OUT OF THIS PUBLIC LICENSE OR
552
+ USE OF THE LICENSED MATERIAL, EVEN IF THE LICENSOR HAS BEEN
553
+ ADVISED OF THE POSSIBILITY OF SUCH LOSSES, COSTS, EXPENSES, OR
554
+ DAMAGES. WHERE A LIMITATION OF LIABILITY IS NOT ALLOWED IN FULL OR
555
+ IN PART, THIS LIMITATION MAY NOT APPLY TO YOU.
556
+
557
+ c. The disclaimer of warranties and limitation of liability provided
558
+ above shall be interpreted in a manner that, to the extent
559
+ possible, most closely approximates an absolute disclaimer and
560
+ waiver of all liability.
561
+
562
+
563
+ Section 6 -- Term and Termination.
564
+
565
+ a. This Public License applies for the term of the Copyright and
566
+ Similar Rights licensed here. However, if You fail to comply with
567
+ this Public License, then Your rights under this Public License
568
+ terminate automatically.
569
+
570
+ b. Where Your right to use the Licensed Material has terminated under
571
+ Section 6(a), it reinstates:
572
+
573
+ 1. automatically as of the date the violation is cured, provided
574
+ it is cured within 30 days of Your discovery of the
575
+ violation; or
576
+
577
+ 2. upon express reinstatement by the Licensor.
578
+
579
+ For the avoidance of doubt, this Section 6(b) does not affect any
580
+ right the Licensor may have to seek remedies for Your violations
581
+ of this Public License.
582
+
583
+ c. For the avoidance of doubt, the Licensor may also offer the
584
+ Licensed Material under separate terms or conditions or stop
585
+ distributing the Licensed Material at any time; however, doing so
586
+ will not terminate this Public License.
587
+
588
+ d. Sections 1, 5, 6, 7, and 8 survive termination of this Public
589
+ License.
590
+
591
+
592
+ Section 7 -- Other Terms and Conditions.
593
+
594
+ a. The Licensor shall not be bound by any additional or different
595
+ terms or conditions communicated by You unless expressly agreed.
596
+
597
+ b. Any arrangements, understandings, or agreements regarding the
598
+ Licensed Material not stated herein are separate from and
599
+ independent of the terms and conditions of this Public License.
600
+
601
+
602
+ Section 8 -- Interpretation.
603
+
604
+ a. For the avoidance of doubt, this Public License does not, and
605
+ shall not be interpreted to, reduce, limit, restrict, or impose
606
+ conditions on any use of the Licensed Material that could lawfully
607
+ be made without permission under this Public License.
608
+
609
+ b. To the extent possible, if any provision of this Public License is
610
+ deemed unenforceable, it shall be automatically reformed to the
611
+ minimum extent necessary to make it enforceable. If the provision
612
+ cannot be reformed, it shall be severed from this Public License
613
+ without affecting the enforceability of the remaining terms and
614
+ conditions.
615
+
616
+ c. No term or condition of this Public License will be waived and no
617
+ failure to comply consented to unless expressly agreed to by the
618
+ Licensor.
619
+
620
+ d. Nothing in this Public License constitutes or may be interpreted
621
+ as a limitation upon, or waiver of, any privileges and immunities
622
+ that apply to the Licensor or You, including from the legal
623
+ processes of any jurisdiction or authority.
624
+
625
+
626
+ =======================================================================
627
+
628
+ Creative Commons is not a party to its public licenses.
629
+ Notwithstanding, Creative Commons may elect to apply one of its public
630
+ licenses to material it publishes and in those instances will be
631
+ considered the "Licensor." Except for the limited purpose of indicating
632
+ that material is shared under a Creative Commons public license or as
633
+ otherwise permitted by the Creative Commons policies published at
634
+ creativecommons.org/policies, Creative Commons does not authorize the
635
+ use of the trademark "Creative Commons" or any other trademark or logo
636
+ of Creative Commons without its prior written consent including,
637
+ without limitation, in connection with any unauthorized modifications
638
+ to any of its public licenses or any other arrangements,
639
+ understandings, or agreements concerning use of licensed material. For
640
+ the avoidance of doubt, this paragraph does not form part of the public
641
+ licenses.
642
+
643
+ Creative Commons may be contacted at creativecommons.org.
src/ndl_layout/mmdetection/LICENSE_DEPENDENCIES ADDED
The diff for this file is too large to render. See raw diff
 
src/ndl_layout/mmdetection/README.md ADDED
@@ -0,0 +1,191 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ This software was developed by the National Diet Library under contract to Morpho AI Solutions, Inc.
2
+ This software is largely based on the following repositories.
3
+
4
+ - [open-mmlab/mmdetection](https://github.com/open-mmlab/mmdetection)
5
+
6
+ The following files are also based on [eqlv2](https://github.com/tztztztztz/eqlv2)
7
+
8
+ - [mmdet/core/post_processing/bbox_nms.py](mmdet/core/post_processing/bbox_nms.py)
9
+ - [mmdet/core/post_processing/merge_augs.py](mmdet/core/post_processing/merge_augs.py)
10
+ - [mmdet/datasets/builder.py](mmdet/datasets/builder.py)
11
+ - [mmdet/datasets/class_balance_dataset_wrapper.py](mmdet/datasets/class_balance_dataset_wrapper.py)
12
+ - [mmdet/datasets/max_iter_dataset_wrapper.py](mmdet/datasets/max_iter_dataset_wrapper.py)
13
+ - [mmdet/models/losses/eql.py](mmdet/models/losses/eql.py)
14
+ - [mmdet/models/losses/eqlv2.py](mmdet/models/losses/eqlv2.py)
15
+ - [mmdet/models/losses/group_softmax.py](mmdet/models/losses/group_softmax.py)
16
+
17
+ The newly developed portion of this program is released by the National Diet Library under a CC BY 4.0 license. For more information, see [LICENSE](./LICENSE)
18
+ .
19
+
20
+ <div align="center">
21
+ <img src="resources/mmdet-logo.png" width="600"/>
22
+ </div>
23
+
24
+ **News**: We released the technical report on [ArXiv](https://arxiv.org/abs/1906.07155).
25
+
26
+ Documentation: https://mmdetection.readthedocs.io/
27
+
28
+ ## Introduction
29
+
30
+ English | [简体中文](README_zh-CN.md)
31
+
32
+ MMDetection is an open source object detection toolbox based on PyTorch. It is
33
+ a part of the [OpenMMLab](https://openmmlab.com/) project.
34
+
35
+ The master branch works with **PyTorch 1.3+**.
36
+ The old v1.x branch works with PyTorch 1.1 to 1.4, but v2.0 is strongly recommended for faster speed, higher performance, better design and more friendly usage.
37
+
38
+ ![demo image](resources/coco_test_12510.jpg)
39
+
40
+ ### Major features
41
+
42
+ - **Modular Design**
43
+
44
+ We decompose the detection framework into different components and one can easily construct a customized object detection framework by combining different modules.
45
+
46
+ - **Support of multiple frameworks out of box**
47
+
48
+ The toolbox directly supports popular and contemporary detection frameworks, *e.g.* Faster RCNN, Mask RCNN, RetinaNet, etc.
49
+
50
+ - **High efficiency**
51
+
52
+ All basic bbox and mask operations run on GPUs. The training speed is faster than or comparable to other codebases, including [Detectron2](https://github.com/facebookresearch/detectron2), [maskrcnn-benchmark](https://github.com/facebookresearch/maskrcnn-benchmark) and [SimpleDet](https://github.com/TuSimple/simpledet).
53
+
54
+ - **State of the art**
55
+
56
+ The toolbox stems from the codebase developed by the *MMDet* team, who won [COCO Detection Challenge](http://cocodataset.org/#detection-leaderboard) in 2018, and we keep pushing it forward.
57
+
58
+ Apart from MMDetection, we also released a library [mmcv](https://github.com/open-mmlab/mmcv) for computer vision research, which is heavily depended on by this toolbox.
59
+
60
+ ## License
61
+
62
+ This project is released under the [Apache 2.0 license](LICENSE).
63
+
64
+ ## Changelog
65
+
66
+ v2.11.0 was released in 01/04/2021.
67
+ Please refer to [changelog.md](docs/changelog.md) for details and release history.
68
+ A comparison between v1.x and v2.0 codebases can be found in [compatibility.md](docs/compatibility.md).
69
+
70
+ ## Benchmark and model zoo
71
+
72
+ Results and models are available in the [model zoo](docs/model_zoo.md).
73
+
74
+ Supported backbones:
75
+
76
+ - [x] ResNet (CVPR'2016)
77
+ - [x] ResNeXt (CVPR'2017)
78
+ - [x] VGG (ICLR'2015)
79
+ - [x] HRNet (CVPR'2019)
80
+ - [x] RegNet (CVPR'2020)
81
+ - [x] Res2Net (TPAMI'2020)
82
+ - [x] ResNeSt (ArXiv'2020)
83
+
84
+ Supported methods:
85
+
86
+ - [x] [RPN (NeurIPS'2015)](configs/rpn)
87
+ - [x] [Fast R-CNN (ICCV'2015)](configs/fast_rcnn)
88
+ - [x] [Faster R-CNN (NeurIPS'2015)](configs/faster_rcnn)
89
+ - [x] [Mask R-CNN (ICCV'2017)](configs/mask_rcnn)
90
+ - [x] [Cascade R-CNN (CVPR'2018)](configs/cascade_rcnn)
91
+ - [x] [Cascade Mask R-CNN (CVPR'2018)](configs/cascade_rcnn)
92
+ - [x] [SSD (ECCV'2016)](configs/ssd)
93
+ - [x] [RetinaNet (ICCV'2017)](configs/retinanet)
94
+ - [x] [GHM (AAAI'2019)](configs/ghm)
95
+ - [x] [Mask Scoring R-CNN (CVPR'2019)](configs/ms_rcnn)
96
+ - [x] [Double-Head R-CNN (CVPR'2020)](configs/double_heads)
97
+ - [x] [Hybrid Task Cascade (CVPR'2019)](configs/htc)
98
+ - [x] [Libra R-CNN (CVPR'2019)](configs/libra_rcnn)
99
+ - [x] [Guided Anchoring (CVPR'2019)](configs/guided_anchoring)
100
+ - [x] [FCOS (ICCV'2019)](configs/fcos)
101
+ - [x] [RepPoints (ICCV'2019)](configs/reppoints)
102
+ - [x] [Foveabox (TIP'2020)](configs/foveabox)
103
+ - [x] [FreeAnchor (NeurIPS'2019)](configs/free_anchor)
104
+ - [x] [NAS-FPN (CVPR'2019)](configs/nas_fpn)
105
+ - [x] [ATSS (CVPR'2020)](configs/atss)
106
+ - [x] [FSAF (CVPR'2019)](configs/fsaf)
107
+ - [x] [PAFPN (CVPR'2018)](configs/pafpn)
108
+ - [x] [Dynamic R-CNN (ECCV'2020)](configs/dynamic_rcnn)
109
+ - [x] [PointRend (CVPR'2020)](configs/point_rend)
110
+ - [x] [CARAFE (ICCV'2019)](configs/carafe/README.md)
111
+ - [x] [DCNv2 (CVPR'2019)](configs/dcn/README.md)
112
+ - [x] [Group Normalization (ECCV'2018)](configs/gn/README.md)
113
+ - [x] [Weight Standardization (ArXiv'2019)](configs/gn+ws/README.md)
114
+ - [x] [OHEM (CVPR'2016)](configs/faster_rcnn/faster_rcnn_r50_fpn_ohem_1x_coco.py)
115
+ - [x] [Soft-NMS (ICCV'2017)](configs/faster_rcnn/faster_rcnn_r50_fpn_soft_nms_1x_coco.py)
116
+ - [x] [Generalized Attention (ICCV'2019)](configs/empirical_attention/README.md)
117
+ - [x] [GCNet (ICCVW'2019)](configs/gcnet/README.md)
118
+ - [x] [Mixed Precision (FP16) Training (ArXiv'2017)](configs/fp16/README.md)
119
+ - [x] [InstaBoost (ICCV'2019)](configs/instaboost/README.md)
120
+ - [x] [GRoIE (ICPR'2020)](configs/groie/README.md)
121
+ - [x] [DetectoRS (ArXix'2020)](configs/detectors/README.md)
122
+ - [x] [Generalized Focal Loss (NeurIPS'2020)](configs/gfl/README.md)
123
+ - [x] [CornerNet (ECCV'2018)](configs/cornernet/README.md)
124
+ - [x] [Side-Aware Boundary Localization (ECCV'2020)](configs/sabl/README.md)
125
+ - [x] [YOLOv3 (ArXiv'2018)](configs/yolo/README.md)
126
+ - [x] [PAA (ECCV'2020)](configs/paa/README.md)
127
+ - [x] [YOLACT (ICCV'2019)](configs/yolact/README.md)
128
+ - [x] [CentripetalNet (CVPR'2020)](configs/centripetalnet/README.md)
129
+ - [x] [VFNet (ArXix'2020)](configs/vfnet/README.md)
130
+ - [x] [DETR (ECCV'2020)](configs/detr/README.md)
131
+ - [x] [Deformable DETR (ICLR'2021)](configs/deformable_detr/README.md)
132
+ - [x] [CascadeRPN (NeurIPS'2019)](configs/cascade_rpn/README.md)
133
+ - [x] [SCNet (AAAI'2021)](configs/scnet/README.md)
134
+ - [x] [AutoAssign (ArXix'2020)](configs/autoassign/README.md)
135
+ - [x] [YOLOF (CVPR'2021)](configs/yolof/README.md)
136
+
137
+
138
+ Some other methods are also supported in [projects using MMDetection](./docs/projects.md).
139
+
140
+ ## Installation
141
+
142
+ Please refer to [get_started.md](docs/get_started.md) for installation.
143
+
144
+ ## Getting Started
145
+
146
+ Please see [get_started.md](docs/get_started.md) for the basic usage of MMDetection.
147
+ We provide [colab tutorial](demo/MMDet_Tutorial.ipynb), and full guidance for quick run [with existing dataset](docs/1_exist_data_model.md) and [with new dataset](docs/2_new_data_model.md) for beginners.
148
+ There are also tutorials for [finetuning models](docs/tutorials/finetune.md), [adding new dataset](docs/tutorials/new_dataset.md), [designing data pipeline](docs/tutorials/data_pipeline.md), [customizing models](docs/tutorials/customize_models.md), [customizing runtime settings](docs/tutorials/customize_runtime.md) and [useful tools](docs/useful_tools.md).
149
+
150
+ Please refer to [FAQ](docs/faq.md) for frequently asked questions.
151
+
152
+ ## Contributing
153
+
154
+ We appreciate all contributions to improve MMDetection. Please refer to [CONTRIBUTING.md](.github/CONTRIBUTING.md) for the contributing guideline.
155
+
156
+ ## Acknowledgement
157
+
158
+ MMDetection is an open source project that is contributed by researchers and engineers from various colleges and companies. We appreciate all the contributors who implement their methods or add new features, as well as users who give valuable feedbacks.
159
+ We wish that the toolbox and benchmark could serve the growing research community by providing a flexible toolkit to reimplement existing methods and develop their own new detectors.
160
+
161
+ ## Citation
162
+
163
+ If you use this toolbox or benchmark in your research, please cite this project.
164
+
165
+ ```
166
+ @article{mmdetection,
167
+ title = {{MMDetection}: Open MMLab Detection Toolbox and Benchmark},
168
+ author = {Chen, Kai and Wang, Jiaqi and Pang, Jiangmiao and Cao, Yuhang and
169
+ Xiong, Yu and Li, Xiaoxiao and Sun, Shuyang and Feng, Wansen and
170
+ Liu, Ziwei and Xu, Jiarui and Zhang, Zheng and Cheng, Dazhi and
171
+ Zhu, Chenchen and Cheng, Tianheng and Zhao, Qijie and Li, Buyu and
172
+ Lu, Xin and Zhu, Rui and Wu, Yue and Dai, Jifeng and Wang, Jingdong
173
+ and Shi, Jianping and Ouyang, Wanli and Loy, Chen Change and Lin, Dahua},
174
+ journal= {arXiv preprint arXiv:1906.07155},
175
+ year={2019}
176
+ }
177
+ ```
178
+
179
+ ## Projects in OpenMMLab
180
+
181
+ - [MMCV](https://github.com/open-mmlab/mmcv): OpenMMLab foundational library for computer vision.
182
+ - [MMClassification](https://github.com/open-mmlab/mmclassification): OpenMMLab image classification toolbox and benchmark.
183
+ - [MMDetection](https://github.com/open-mmlab/mmdetection): OpenMMLab detection toolbox and benchmark.
184
+ - [MMDetection3D](https://github.com/open-mmlab/mmdetection3d): OpenMMLab's next-generation platform for general 3D object detection.
185
+ - [MMSegmentation](https://github.com/open-mmlab/mmsegmentation): OpenMMLab semantic segmentation toolbox and benchmark.
186
+ - [MMAction2](https://github.com/open-mmlab/mmaction2): OpenMMLab's next-generation action understanding toolbox and benchmark.
187
+ - [MMTracking](https://github.com/open-mmlab/mmtracking): OpenMMLab video perception toolbox and benchmark.
188
+ - [MMPose](https://github.com/open-mmlab/mmpose): OpenMMLab pose estimation toolbox and benchmark.
189
+ - [MMEditing](https://github.com/open-mmlab/mmediting): OpenMMLab image and video editing toolbox.
190
+ - [MMOCR](https://github.com/open-mmlab/mmocr): A Comprehensive Toolbox for Text Detection, Recognition and Understanding.
191
+ - [MMGeneration](https://github.com/open-mmlab/mmgeneration): OpenMMLab image and video generative models toolbox.
src/ndl_layout/mmdetection/README_zh-CN.md ADDED
@@ -0,0 +1,190 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ <div align="center">
2
+ <img src="resources/mmdet-logo.png" width="600"/>
3
+ </div>
4
+
5
+ **新闻**: 我们在 [ArXiv](https://arxiv.org/abs/1906.07155) 上公开了技术报告。
6
+
7
+ 文档: https://mmdetection.readthedocs.io/
8
+
9
+ ## 简介
10
+
11
+ [English](README.md) | 简体中文
12
+
13
+ MMDetection 是一个基于 PyTorch 的目标检测开源工具箱。它是 [OpenMMLab](https://openmmlab.com/) 项目的一部分。
14
+
15
+ 主分支代码目前支持 PyTorch 1.3 以上的版本。
16
+
17
+ v1.x 的历史版本支持 PyTorch 1.1 到 1.4,但是我们强烈建议用户使用新的 2.x 的版本,新的版本速度更快,性能更高,有更优雅的代码设计,对用户使用也更加友好。
18
+
19
+ ![demo image](resources/coco_test_12510.jpg)
20
+
21
+ ### 主要特性
22
+
23
+ - **模块化设计**
24
+
25
+ MMDetection 将检测框架解耦成不同的模块组件,通过组合不同的模块组件,用户可以便捷地构建自定义的检测模型
26
+
27
+ - **丰富的即插即用的算法和模型**
28
+
29
+ MMDetection 支持了众多主流的和最新的检测算法,例如 Faster R-CNN,Mask R-CNN,RetinaNet 等。
30
+
31
+ - **速度快**
32
+
33
+ 基本的框和 mask 操作都实现了 GPU 版本,训练速度比其他代码库更快或者相当,包括 [Detectron2](https://github.com/facebookresearch/detectron2), [maskrcnn-benchmark](https://github.com/facebookresearch/maskrcnn-benchmark) 和 [SimpleDet](https://github.com/TuSimple/simpledet)。
34
+
35
+ - **性能高**
36
+
37
+ MMDetection 这个算法库源自于 COCO 2018 目标检测竞赛的冠军团队 *MMDet* 团队开发的代码,我们在之后持续进行了改进和提升。
38
+
39
+ 除了 MMDetection 之外,我们还开源了计算机视觉基础库 [MMCV](https://github.com/open-mmlab/mmcv),MMCV 是 MMDetection 的主要依赖。
40
+
41
+ ## 开源许可证
42
+
43
+ 该项目采用 [Apache 2.0 开源许可证](LICENSE)。
44
+
45
+ ## 更新日志
46
+
47
+ 最新的月度版本 v2.11.0 在 2021.04.01 发布。
48
+ 如果想了解更多版本更新细节和历史信息,请阅读[更新日志](docs/changelog.md)。
49
+ 在[兼容性说明文档](docs/compatibility.md)中我们提供了 1.x 和 2.0 版本的详细比较。
50
+
51
+ ## 基准测试和模型库
52
+
53
+ 测试结果和模型可以在[模型库](docs/model_zoo.md)中找到。
54
+
55
+ 已支持的骨干网络:
56
+
57
+ - [x] ResNet (CVPR'2016)
58
+ - [x] ResNeXt (CVPR'2017)
59
+ - [x] VGG (ICLR'2015)
60
+ - [x] HRNet (CVPR'2019)
61
+ - [x] RegNet (CVPR'2020)
62
+ - [x] Res2Net (TPAMI'2020)
63
+ - [x] ResNeSt (ArXiv'2020)
64
+
65
+ 已支持的算法:
66
+
67
+ - [x] [RPN (NeurIPS'2015)](configs/rpn)
68
+ - [x] [Fast R-CNN (ICCV'2015)](configs/fast_rcnn)
69
+ - [x] [Faster R-CNN (NeurIPS'2015)](configs/faster_rcnn)
70
+ - [x] [Mask R-CNN (ICCV'2017)](configs/mask_rcnn)
71
+ - [x] [Cascade R-CNN (CVPR'2018)](configs/cascade_rcnn)
72
+ - [x] [Cascade Mask R-CNN (CVPR'2018)](configs/cascade_rcnn)
73
+ - [x] [SSD (ECCV'2016)](configs/ssd)
74
+ - [x] [RetinaNet (ICCV'2017)](configs/retinanet)
75
+ - [x] [GHM (AAAI'2019)](configs/ghm)
76
+ - [x] [Mask Scoring R-CNN (CVPR'2019)](configs/ms_rcnn)
77
+ - [x] [Double-Head R-CNN (CVPR'2020)](configs/double_heads)
78
+ - [x] [Hybrid Task Cascade (CVPR'2019)](configs/htc)
79
+ - [x] [Libra R-CNN (CVPR'2019)](configs/libra_rcnn)
80
+ - [x] [Guided Anchoring (CVPR'2019)](configs/guided_anchoring)
81
+ - [x] [FCOS (ICCV'2019)](configs/fcos)
82
+ - [x] [RepPoints (ICCV'2019)](configs/reppoints)
83
+ - [x] [Foveabox (TIP'2020)](configs/foveabox)
84
+ - [x] [FreeAnchor (NeurIPS'2019)](configs/free_anchor)
85
+ - [x] [NAS-FPN (CVPR'2019)](configs/nas_fpn)
86
+ - [x] [ATSS (CVPR'2020)](configs/atss)
87
+ - [x] [FSAF (CVPR'2019)](configs/fsaf)
88
+ - [x] [PAFPN (CVPR'2018)](configs/pafpn)
89
+ - [x] [Dynamic R-CNN (ECCV'2020)](configs/dynamic_rcnn)
90
+ - [x] [PointRend (CVPR'2020)](configs/point_rend)
91
+ - [x] [CARAFE (ICCV'2019)](configs/carafe/README.md)
92
+ - [x] [DCNv2 (CVPR'2019)](configs/dcn/README.md)
93
+ - [x] [Group Normalization (ECCV'2018)](configs/gn/README.md)
94
+ - [x] [Weight Standardization (ArXiv'2019)](configs/gn+ws/README.md)
95
+ - [x] [OHEM (CVPR'2016)](configs/faster_rcnn/faster_rcnn_r50_fpn_ohem_1x_coco.py)
96
+ - [x] [Soft-NMS (ICCV'2017)](configs/faster_rcnn/faster_rcnn_r50_fpn_soft_nms_1x_coco.py)
97
+ - [x] [Generalized Attention (ICCV'2019)](configs/empirical_attention/README.md)
98
+ - [x] [GCNet (ICCVW'2019)](configs/gcnet/README.md)
99
+ - [x] [Mixed Precision (FP16) Training (ArXiv'2017)](configs/fp16/README.md)
100
+ - [x] [InstaBoost (ICCV'2019)](configs/instaboost/README.md)
101
+ - [x] [GRoIE (ICPR'2020)](configs/groie/README.md)
102
+ - [x] [DetectoRS (ArXix'2020)](configs/detectors/README.md)
103
+ - [x] [Generalized Focal Loss (NeurIPS'2020)](configs/gfl/README.md)
104
+ - [x] [CornerNet (ECCV'2018)](configs/cornernet/README.md)
105
+ - [x] [Side-Aware Boundary Localization (ECCV'2020)](configs/sabl/README.md)
106
+ - [x] [YOLOv3 (ArXiv'2018)](configs/yolo/README.md)
107
+ - [x] [PAA (ECCV'2020)](configs/paa/README.md)
108
+ - [x] [YOLACT (ICCV'2019)](configs/yolact/README.md)
109
+ - [x] [CentripetalNet (CVPR'2020)](configs/centripetalnet/README.md)
110
+ - [x] [VFNet (ArXix'2020)](configs/vfnet/README.md)
111
+ - [x] [DETR (ECCV'2020)](configs/detr/README.md)
112
+ - [x] [Deformable DETR (ICLR'2021)](configs/deformable_detr/README.md)
113
+ - [x] [CascadeRPN (NeurIPS'2019)](configs/cascade_rpn/README.md)
114
+ - [x] [SCNet (AAAI'2021)](configs/scnet/README.md)
115
+ - [x] [AutoAssign (ArXix'2020)](configs/autoassign/README.md)
116
+ - [x] [YOLOF (CVPR'2021)](configs/yolof/README.md)
117
+
118
+ 我们在[基于 MMDetection 的项目](./docs/projects.md)中列举了一些其他的支持的算法。
119
+
120
+ ## 安装
121
+
122
+ 请参考[快速入门文档](docs/get_started.md)进行安装。
123
+
124
+ ## 快速入门
125
+
126
+ 请参考[快速入门文档](docs/get_started.md)学习 MMDetection 的基本使用。
127
+ 我们提供了 [colab 教程](demo/MMDet_Tutorial.ipynb),也为新手提供了完整的运行教程,分别针对[已有数据集](docs/1_exist_data_model.md)和[新数据集](docs/2_new_data_model.md) 完整的使用指南
128
+
129
+ 我们也提供了一些进阶教程,内容覆盖了 [finetune 模型](docs/tutorials/finetune.md),[增加新数据集支持](docs/tutorials/new_dataset.md),[设计新的数据预处理流程](docs/tutorials/data_pipeline.md),[增加自定义模型](ocs/tutorials/customize_models.md),[增加自定义的运行时配置](docs/tutorials/customize_runtime.md),[常用工具和脚本](docs/useful_tools.md)。
130
+
131
+ 如果遇到问题,请参考 [FAQ 页面](docs/faq.md)。
132
+
133
+ ## 贡献指南
134
+
135
+ 我们感谢所有的贡献者为改进和提升 MMDetection 所作出的努力。请参考[贡献指南](.github/CONTRIBUTING.md)来了解参与项目贡献的相关指引。
136
+
137
+ ## 致谢
138
+
139
+ MMDetection 是一款由来自不同高校和企业的研发人员共同参与贡献的开源项目。我们感谢所有为项目提供算法复现和新功能支持的贡献者,以及提供宝贵反馈的用户。 我们希望这个工具箱和基准测试可以为社区提供灵活的代码工具,供用户复现已有算法并开发自己的新模型,从而不断为开源社区提供贡献。
140
+
141
+ ## 引用
142
+
143
+ 如果你在研究中使用了本项目的代码或者性能基准,请参考如下 bibtex 引用 MMDetection。
144
+
145
+ ```
146
+ @article{mmdetection,
147
+ title = {{MMDetection}: Open MMLab Detection Toolbox and Benchmark},
148
+ author = {Chen, Kai and Wang, Jiaqi and Pang, Jiangmiao and Cao, Yuhang and
149
+ Xiong, Yu and Li, Xiaoxiao and Sun, Shuyang and Feng, Wansen and
150
+ Liu, Ziwei and Xu, Jiarui and Zhang, Zheng and Cheng, Dazhi and
151
+ Zhu, Chenchen and Cheng, Tianheng and Zhao, Qijie and Li, Buyu and
152
+ Lu, Xin and Zhu, Rui and Wu, Yue and Dai, Jifeng and Wang, Jingdong
153
+ and Shi, Jianping and Ouyang, Wanli and Loy, Chen Change and Lin, Dahua},
154
+ journal= {arXiv preprint arXiv:1906.07155},
155
+ year={2019}
156
+ }
157
+ ```
158
+
159
+ ## OpenMMLab 的其他项目
160
+
161
+ - [MMCV](https://github.com/open-mmlab/mmcv): OpenMMLab 计算机视觉基础库
162
+ - [MMClassification](https://github.com/open-mmlab/mmclassification): OpenMMLab 图像分类工具箱
163
+ - [MMDetection](https://github.com/open-mmlab/mmdetection): OpenMMLab 目标检测工具箱
164
+ - [MMDetection3D](https://github.com/open-mmlab/mmdetection3d): OpenMMLab 新一代通用 3D 目标检测平台
165
+ - [MMSegmentation](https://github.com/open-mmlab/mmsegmentation): OpenMMLab 语义分割工具箱
166
+ - [MMAction2](https://github.com/open-mmlab/mmaction2): OpenMMLab 新一代视频理解工具箱
167
+ - [MMTracking](https://github.com/open-mmlab/mmtracking): OpenMMLab 一体化视频目标感知平台
168
+ - [MMPose](https://github.com/open-mmlab/mmpose): OpenMMLab 姿态估计工具箱
169
+ - [MMEditing](https://github.com/open-mmlab/mmediting): OpenMMLab 图像视频编辑工具箱
170
+ - [MMOCR](https://github.com/open-mmlab/mmocr): OpenMMLab 全流程文字检测识别理解工具包
171
+ - [MMGeneration](https://github.com/open-mmlab/mmgeneration): OpenMMLab 图片视频生成模型工具箱
172
+
173
+ ## 欢迎加入 OpenMMLab 社区
174
+
175
+ 扫描下方的二维码可关注 OpenMMLab 团队的 [知乎官方账号](https://www.zhihu.com/people/openmmlab),加入 OpenMMLab 团队的 [官方交流 QQ 群](https://jq.qq.com/?_wv=1027&k=aCvMxdr3)
176
+
177
+ <div align="center">
178
+ <img src="/resources/zhihu_qrcode.jpg" height="400" /> <img src="/resources/qq_group_qrcode.jpg" height="400" />
179
+ </div>
180
+
181
+ 我们会在 OpenMMLab 社区为大家
182
+
183
+ - 📢 分享 AI 框架的前沿核心技术
184
+ - 💻 解读 PyTorch 常用模块源码
185
+ - 📰 发布 OpenMMLab 的相关新闻
186
+ - 🚀 介绍 OpenMMLab 开发的前沿算法
187
+ - 🏃 获取更高效的问题答疑和意见反馈
188
+ - 🔥 提供与各行各业开发者充分交流的平台
189
+
190
+ 干货满满 📘,等你来撩 💗,OpenMMLab 社区期待您的加入 👬
src/ndl_layout/mmdetection/configs/_base_/datasets/cityscapes_detection.py ADDED
@@ -0,0 +1,56 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # dataset settings
2
+ dataset_type = 'CityscapesDataset'
3
+ data_root = 'data/cityscapes/'
4
+ img_norm_cfg = dict(
5
+ mean=[123.675, 116.28, 103.53], std=[58.395, 57.12, 57.375], to_rgb=True)
6
+ train_pipeline = [
7
+ dict(type='LoadImageFromFile'),
8
+ dict(type='LoadAnnotations', with_bbox=True),
9
+ dict(
10
+ type='Resize', img_scale=[(2048, 800), (2048, 1024)], keep_ratio=True),
11
+ dict(type='RandomFlip', flip_ratio=0.5),
12
+ dict(type='Normalize', **img_norm_cfg),
13
+ dict(type='Pad', size_divisor=32),
14
+ dict(type='DefaultFormatBundle'),
15
+ dict(type='Collect', keys=['img', 'gt_bboxes', 'gt_labels']),
16
+ ]
17
+ test_pipeline = [
18
+ dict(type='LoadImageFromFile'),
19
+ dict(
20
+ type='MultiScaleFlipAug',
21
+ img_scale=(2048, 1024),
22
+ flip=False,
23
+ transforms=[
24
+ dict(type='Resize', keep_ratio=True),
25
+ dict(type='RandomFlip'),
26
+ dict(type='Normalize', **img_norm_cfg),
27
+ dict(type='Pad', size_divisor=32),
28
+ dict(type='ImageToTensor', keys=['img']),
29
+ dict(type='Collect', keys=['img']),
30
+ ])
31
+ ]
32
+ data = dict(
33
+ samples_per_gpu=1,
34
+ workers_per_gpu=2,
35
+ train=dict(
36
+ type='RepeatDataset',
37
+ times=8,
38
+ dataset=dict(
39
+ type=dataset_type,
40
+ ann_file=data_root +
41
+ 'annotations/instancesonly_filtered_gtFine_train.json',
42
+ img_prefix=data_root + 'leftImg8bit/train/',
43
+ pipeline=train_pipeline)),
44
+ val=dict(
45
+ type=dataset_type,
46
+ ann_file=data_root +
47
+ 'annotations/instancesonly_filtered_gtFine_val.json',
48
+ img_prefix=data_root + 'leftImg8bit/val/',
49
+ pipeline=test_pipeline),
50
+ test=dict(
51
+ type=dataset_type,
52
+ ann_file=data_root +
53
+ 'annotations/instancesonly_filtered_gtFine_test.json',
54
+ img_prefix=data_root + 'leftImg8bit/test/',
55
+ pipeline=test_pipeline))
56
+ evaluation = dict(interval=1, metric='bbox')
src/ndl_layout/mmdetection/configs/_base_/datasets/cityscapes_instance.py ADDED
@@ -0,0 +1,56 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # dataset settings
2
+ dataset_type = 'CityscapesDataset'
3
+ data_root = 'data/cityscapes/'
4
+ img_norm_cfg = dict(
5
+ mean=[123.675, 116.28, 103.53], std=[58.395, 57.12, 57.375], to_rgb=True)
6
+ train_pipeline = [
7
+ dict(type='LoadImageFromFile'),
8
+ dict(type='LoadAnnotations', with_bbox=True, with_mask=True),
9
+ dict(
10
+ type='Resize', img_scale=[(2048, 800), (2048, 1024)], keep_ratio=True),
11
+ dict(type='RandomFlip', flip_ratio=0.5),
12
+ dict(type='Normalize', **img_norm_cfg),
13
+ dict(type='Pad', size_divisor=32),
14
+ dict(type='DefaultFormatBundle'),
15
+ dict(type='Collect', keys=['img', 'gt_bboxes', 'gt_labels', 'gt_masks']),
16
+ ]
17
+ test_pipeline = [
18
+ dict(type='LoadImageFromFile'),
19
+ dict(
20
+ type='MultiScaleFlipAug',
21
+ img_scale=(2048, 1024),
22
+ flip=False,
23
+ transforms=[
24
+ dict(type='Resize', keep_ratio=True),
25
+ dict(type='RandomFlip'),
26
+ dict(type='Normalize', **img_norm_cfg),
27
+ dict(type='Pad', size_divisor=32),
28
+ dict(type='ImageToTensor', keys=['img']),
29
+ dict(type='Collect', keys=['img']),
30
+ ])
31
+ ]
32
+ data = dict(
33
+ samples_per_gpu=1,
34
+ workers_per_gpu=2,
35
+ train=dict(
36
+ type='RepeatDataset',
37
+ times=8,
38
+ dataset=dict(
39
+ type=dataset_type,
40
+ ann_file=data_root +
41
+ 'annotations/instancesonly_filtered_gtFine_train.json',
42
+ img_prefix=data_root + 'leftImg8bit/train/',
43
+ pipeline=train_pipeline)),
44
+ val=dict(
45
+ type=dataset_type,
46
+ ann_file=data_root +
47
+ 'annotations/instancesonly_filtered_gtFine_val.json',
48
+ img_prefix=data_root + 'leftImg8bit/val/',
49
+ pipeline=test_pipeline),
50
+ test=dict(
51
+ type=dataset_type,
52
+ ann_file=data_root +
53
+ 'annotations/instancesonly_filtered_gtFine_test.json',
54
+ img_prefix=data_root + 'leftImg8bit/test/',
55
+ pipeline=test_pipeline))
56
+ evaluation = dict(metric=['bbox', 'segm'])
src/ndl_layout/mmdetection/configs/_base_/datasets/coco_detection.py ADDED
@@ -0,0 +1,49 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # dataset settings
2
+ dataset_type = 'CocoDataset'
3
+ data_root = 'data/coco/'
4
+ img_norm_cfg = dict(
5
+ mean=[123.675, 116.28, 103.53], std=[58.395, 57.12, 57.375], to_rgb=True)
6
+ train_pipeline = [
7
+ dict(type='LoadImageFromFile'),
8
+ dict(type='LoadAnnotations', with_bbox=True),
9
+ dict(type='Resize', img_scale=(1333, 800), keep_ratio=True),
10
+ dict(type='RandomFlip', flip_ratio=0.5),
11
+ dict(type='Normalize', **img_norm_cfg),
12
+ dict(type='Pad', size_divisor=32),
13
+ dict(type='DefaultFormatBundle'),
14
+ dict(type='Collect', keys=['img', 'gt_bboxes', 'gt_labels']),
15
+ ]
16
+ test_pipeline = [
17
+ dict(type='LoadImageFromFile'),
18
+ dict(
19
+ type='MultiScaleFlipAug',
20
+ img_scale=(1333, 800),
21
+ flip=False,
22
+ transforms=[
23
+ dict(type='Resize', keep_ratio=True),
24
+ dict(type='RandomFlip'),
25
+ dict(type='Normalize', **img_norm_cfg),
26
+ dict(type='Pad', size_divisor=32),
27
+ dict(type='ImageToTensor', keys=['img']),
28
+ dict(type='Collect', keys=['img']),
29
+ ])
30
+ ]
31
+ data = dict(
32
+ samples_per_gpu=2,
33
+ workers_per_gpu=2,
34
+ train=dict(
35
+ type=dataset_type,
36
+ ann_file=data_root + 'annotations/instances_train2017.json',
37
+ img_prefix=data_root + 'train2017/',
38
+ pipeline=train_pipeline),
39
+ val=dict(
40
+ type=dataset_type,
41
+ ann_file=data_root + 'annotations/instances_val2017.json',
42
+ img_prefix=data_root + 'val2017/',
43
+ pipeline=test_pipeline),
44
+ test=dict(
45
+ type=dataset_type,
46
+ ann_file=data_root + 'annotations/instances_val2017.json',
47
+ img_prefix=data_root + 'val2017/',
48
+ pipeline=test_pipeline))
49
+ evaluation = dict(interval=1, metric='bbox')
src/ndl_layout/mmdetection/configs/_base_/datasets/coco_instance.py ADDED
@@ -0,0 +1,49 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # dataset settings
2
+ dataset_type = 'CocoDataset'
3
+ data_root = 'data/coco/'
4
+ img_norm_cfg = dict(
5
+ mean=[123.675, 116.28, 103.53], std=[58.395, 57.12, 57.375], to_rgb=True)
6
+ train_pipeline = [
7
+ dict(type='LoadImageFromFile'),
8
+ dict(type='LoadAnnotations', with_bbox=True, with_mask=True),
9
+ dict(type='Resize', img_scale=(1333, 800), keep_ratio=True),
10
+ dict(type='RandomFlip', flip_ratio=0.5),
11
+ dict(type='Normalize', **img_norm_cfg),
12
+ dict(type='Pad', size_divisor=32),
13
+ dict(type='DefaultFormatBundle'),
14
+ dict(type='Collect', keys=['img', 'gt_bboxes', 'gt_labels', 'gt_masks']),
15
+ ]
16
+ test_pipeline = [
17
+ dict(type='LoadImageFromFile'),
18
+ dict(
19
+ type='MultiScaleFlipAug',
20
+ img_scale=(1333, 800),
21
+ flip=False,
22
+ transforms=[
23
+ dict(type='Resize', keep_ratio=True),
24
+ dict(type='RandomFlip'),
25
+ dict(type='Normalize', **img_norm_cfg),
26
+ dict(type='Pad', size_divisor=32),
27
+ dict(type='ImageToTensor', keys=['img']),
28
+ dict(type='Collect', keys=['img']),
29
+ ])
30
+ ]
31
+ data = dict(
32
+ samples_per_gpu=2,
33
+ workers_per_gpu=2,
34
+ train=dict(
35
+ type=dataset_type,
36
+ ann_file=data_root + 'annotations/instances_train2017.json',
37
+ img_prefix=data_root + 'train2017/',
38
+ pipeline=train_pipeline),
39
+ val=dict(
40
+ type=dataset_type,
41
+ ann_file=data_root + 'annotations/instances_val2017.json',
42
+ img_prefix=data_root + 'val2017/',
43
+ pipeline=test_pipeline),
44
+ test=dict(
45
+ type=dataset_type,
46
+ ann_file=data_root + 'annotations/instances_val2017.json',
47
+ img_prefix=data_root + 'val2017/',
48
+ pipeline=test_pipeline))
49
+ evaluation = dict(metric=['bbox', 'segm'])