KingHacker9000 commited on
Commit
5e3465d
·
1 Parent(s): 186b47c

Re-commit with fonts & image under Git LFS

Browse files
Files changed (44) hide show
  1. .gitattributes +2 -0
  2. .gitignore +5 -0
  3. =0.29 +168 -0
  4. Dockerfile +38 -0
  5. LICENSE +437 -0
  6. README.md +160 -10
  7. code/__init__.py +0 -0
  8. code/bezier.py +122 -0
  9. code/config.py +104 -0
  10. code/config/base.yaml +46 -0
  11. code/data/fonts/Bell MT.ttf +3 -0
  12. code/data/fonts/DeliusUnicase-Regular.ttf +3 -0
  13. code/data/fonts/HobeauxRococeaux-Sherman.ttf +3 -0
  14. code/data/fonts/IndieFlower-Regular.ttf +3 -0
  15. code/data/fonts/JosefinSans-Light.ttf +3 -0
  16. code/data/fonts/KaushanScript-Regular.ttf +3 -0
  17. code/data/fonts/LuckiestGuy-Regular.ttf +3 -0
  18. code/data/fonts/Noteworthy-Bold.ttf +3 -0
  19. code/data/fonts/Quicksand.ttf +3 -0
  20. code/data/fonts/Saira-Regular.ttf +3 -0
  21. code/data/init/KaushanScript-Regular_B.svg +5 -0
  22. code/data/init/KaushanScript-Regular_BUNNY.svg +14 -0
  23. code/data/init/KaushanScript-Regular_BUNNY_scaled.svg +11 -0
  24. code/data/init/KaushanScript-Regular_B_scaled.svg +7 -0
  25. code/data/init/KaushanScript-Regular_N.svg +5 -0
  26. code/data/init/KaushanScript-Regular_N_scaled.svg +7 -0
  27. code/data/init/KaushanScript-Regular_U.svg +5 -0
  28. code/data/init/KaushanScript-Regular_U_scaled.svg +7 -0
  29. code/data/init/KaushanScript-Regular_Y.svg +5 -0
  30. code/data/init/KaushanScript-Regular_Y_scaled.svg +7 -0
  31. code/generate.py +26 -0
  32. code/losses.py +193 -0
  33. code/main.py +188 -0
  34. code/save_svg.py +155 -0
  35. code/ttf.py +265 -0
  36. code/utils.py +221 -0
  37. coming_soon.png +3 -0
  38. images/HobeauxRococeaux-Sherman_NATURE_T.svg +51 -0
  39. images/KaushanScript-Regular_BUNNY_Y.svg +45 -0
  40. images/teaser.png +3 -0
  41. requirements.txt +148 -0
  42. rest_api.py +22 -0
  43. run_word_as_image.sh +25 -0
  44. wai_service.py +68 -0
.gitattributes CHANGED
@@ -33,3 +33,5 @@ saved_model/**/* filter=lfs diff=lfs merge=lfs -text
33
  *.zip filter=lfs diff=lfs merge=lfs -text
34
  *.zst filter=lfs diff=lfs merge=lfs -text
35
  *tfevents* filter=lfs diff=lfs merge=lfs -text
 
 
 
33
  *.zip filter=lfs diff=lfs merge=lfs -text
34
  *.zst filter=lfs diff=lfs merge=lfs -text
35
  *tfevents* filter=lfs diff=lfs merge=lfs -text
36
+ *.png filter=lfs diff=lfs merge=lfs -text
37
+ *.ttf filter=lfs diff=lfs merge=lfs -text
.gitignore ADDED
@@ -0,0 +1,5 @@
 
 
 
 
 
 
1
+ TOKEN
2
+ WriteToken
3
+ diffvg/
4
+ output/
5
+ __pycache__/
=0.29 ADDED
@@ -0,0 +1,168 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
0
 
 
1
  
 
 
2
  
 
 
 
3
  
 
 
 
 
4
  
 
 
 
 
 
5
   done
 
 
 
 
1
+ Channels:
2
+ - conda-forge
3
+ - defaults
4
+ Platform: linux-64
5
+ Collecting package metadata (repodata.json): - \ | / - \ | / - \ | / - \ | / - \ | / - \ | / - \ | / - \ | / - \ | / - \ | / - \ | / - \ | / - \ | / - \ | / - \ | / - \ | / - \ | / - \ | / - \ | / - \ | / - \ | / - \ | / - \ | / - \ | / - \ | / - \ | / - \ | / - \ | / - \ | / - \ | / - \ | / - \ | / - \ | / - \ | / - \ | / - \ | / - \ | / - \ | / - done
6
+ Solving environment: | / - done
7
+
8
+ ## Package Plan ##
9
+
10
+ environment location: /home/ashish/miniconda3/envs/word
11
+
12
+ added / updated specs:
13
+ - diffusers
14
+
15
+
16
+ The following packages will be downloaded:
17
+
18
+ package | build
19
+ ---------------------------|-----------------
20
+ diffusers-0.30.3 | pyhd8ed1ab_0 711 KB conda-forge
21
+ huggingface_hub-0.26.5 | pyhd8ed1ab_0 268 KB conda-forge
22
+ libabseil-20240116.2 | cxx17_he02047a_1 1.2 MB conda-forge
23
+ libprotobuf-4.25.3 | he621ea3_0 2.8 MB
24
+ pytorch-2.3.0 |cpu_py38h08bb5f6_1 73.3 MB
25
+ typing_extensions-4.12.2 | pyha770c72_0 39 KB conda-forge
26
+ ------------------------------------------------------------
27
+ Total: 78.4 MB
28
+
29
+ The following NEW packages will be INSTALLED:
30
+
31
+ huggingface_hub conda-forge/noarch::huggingface_hub-0.26.5-pyhd8ed1ab_0
32
+ libabseil conda-forge/linux-64::libabseil-20240116.2-cxx17_he02047a_1
33
+ libprotobuf pkgs/main/linux-64::libprotobuf-4.25.3-he621ea3_0
34
+ pytorch pkgs/main/linux-64::pytorch-2.3.0-cpu_py38h08bb5f6_1
35
+ typing_extensions conda-forge/noarch::typing_extensions-4.12.2-pyha770c72_0
36
+
37
+ The following packages will be UPDATED:
38
+
39
+ diffusers pypi/pypi::diffusers-0.8.0-pypi_0 --> conda-forge/noarch::diffusers-0.30.3-pyhd8ed1ab_0
40
+
41
+
42
+ Proceed ([y]/n)?
43
+
44
+ Downloading and Extracting Packages: ...working...
45
+
46
+
47
+
48
+
49
+
50
+
51
+
52
+
53
+
54
+
55
+
56
+
57
+
58
+
59
+
60
+
61
+
62
+
63
+
64
+
65
+
66
+
67
+
68
+
69
+
70
+
71
+
72
+
73
+
74
+
75
+
76
+
77
+
78
+
79
+
80
+
81
+
82
+
83
+
84
+
85
+
86
+
87
+
88
+
89
+
90
+
91
+
92
+
93
+
94
+
95
+
96
+
97
+
98
+
99
+
100
+
101
+
102
+
103
+
104
+
105
+
106
+
107
+
108
+
109
+
110
+
111
+
112
+
113
+
114
+
115
+
116
+
117
+
118
+
119
+
120
+
121
+
122
+
123
+
124
+
125
+
126
+
127
+
128
+
129
+
130
+
131
+
132
+
133
+
134
+
135
+
136
+
137
+
138
+
139
+
140
+
141
+
142
+
143
+
144
+
145
+
146
+
147
+
148
+
149
+
150
+
151
 
152
+
153
  
154
+
155
+
156
  
157
+
158
+
159
+
160
  
161
+
162
+
163
+
164
+
165
  
166
+
167
+
168
+
169
+
170
+
171
   done
172
+ Preparing transaction: | / - done
173
+ Verifying transaction: | / - \ | / - \ | / - \ | / - \ | / - \ | / - \ | / - \ | / - \ | / - \ | done
174
+ Executing transaction: - \ | / - \ | / - \ | / - \ | / - \ | / - \ | / - \ | / - \ | / - \ | / - \ | / - \ | / - \ | / - \ | done
Dockerfile ADDED
@@ -0,0 +1,38 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # ───────────────────────────────────────────────────────────────
2
+ # Word-As-Image - FastAPI GPU Space
3
+ # ───────────────────────────────────────────────────────────────
4
+ FROM python:3.10-slim
5
+
6
+ # —— OS packages needed for compiling diffvg & running FastAPI ——
7
+ RUN apt-get update && apt-get install -y --no-install-recommends \
8
+ build-essential git cmake ffmpeg \
9
+ libgl1-mesa-glx libglib2.0-0 libpng-dev libgomp1 \
10
+ && rm -rf /var/lib/apt/lists/*
11
+
12
+ # —— Non-root user (matches HF best practice) ——
13
+ RUN useradd -m -u 1000 appuser
14
+ USER appuser
15
+ WORKDIR /app
16
+
17
+ # —— Keep Hugging Face & Torch caches between container restarts ——
18
+ ENV HF_HOME=/home/appuser/.cache/huggingface \
19
+ TRANSFORMERS_CACHE=$HF_HOME \
20
+ TORCH_HOME=$HF_HOME/torch \
21
+ PYTORCH_CUDA_ALLOC_CONF=max_split_size_mb:128
22
+
23
+ # —— Python deps ——
24
+ COPY --chown=appuser requirements.txt .
25
+ RUN pip install --upgrade pip && \
26
+ pip install --no-cache-dir -r requirements.txt && \
27
+ # diffvg must be built from source
28
+ pip install --no-cache-dir git+https://github.com/BachiLi/diffvg.git
29
+
30
+ # —— Project code ——
31
+ COPY --chown=appuser . /app
32
+
33
+ # —— Expose the FastAPI port ——
34
+ EXPOSE 7860
35
+
36
+ # —— Start the API (Swagger UI auto-appears at /docs) ——
37
+ CMD ["uvicorn", "rest_api:app", "--host", "0.0.0.0", "--port", "7860"]
38
+
LICENSE ADDED
@@ -0,0 +1,437 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ Attribution-NonCommercial-ShareAlike 4.0 International
2
+
3
+ =======================================================================
4
+
5
+ Creative Commons Corporation ("Creative Commons") is not a law firm and
6
+ does not provide legal services or legal advice. Distribution of
7
+ Creative Commons public licenses does not create a lawyer-client or
8
+ other relationship. Creative Commons makes its licenses and related
9
+ information available on an "as-is" basis. Creative Commons gives no
10
+ warranties regarding its licenses, any material licensed under their
11
+ terms and conditions, or any related information. Creative Commons
12
+ disclaims all liability for damages resulting from their use to the
13
+ fullest extent possible.
14
+
15
+ Using Creative Commons Public Licenses
16
+
17
+ Creative Commons public licenses provide a standard set of terms and
18
+ conditions that creators and other rights holders may use to share
19
+ original works of authorship and other material subject to copyright
20
+ and certain other rights specified in the public license below. The
21
+ following considerations are for informational purposes only, are not
22
+ exhaustive, and do not form part of our licenses.
23
+
24
+ Considerations for licensors: Our public licenses are
25
+ intended for use by those authorized to give the public
26
+ permission to use material in ways otherwise restricted by
27
+ copyright and certain other rights. Our licenses are
28
+ irrevocable. Licensors should read and understand the terms
29
+ and conditions of the license they choose before applying it.
30
+ Licensors should also secure all rights necessary before
31
+ applying our licenses so that the public can reuse the
32
+ material as expected. Licensors should clearly mark any
33
+ material not subject to the license. This includes other CC-
34
+ licensed material, or material used under an exception or
35
+ limitation to copyright. More considerations for licensors:
36
+ wiki.creativecommons.org/Considerations_for_licensors
37
+
38
+ Considerations for the public: By using one of our public
39
+ licenses, a licensor grants the public permission to use the
40
+ licensed material under specified terms and conditions. If
41
+ the licensor's permission is not necessary for any reason--for
42
+ example, because of any applicable exception or limitation to
43
+ copyright--then that use is not regulated by the license. Our
44
+ licenses grant only permissions under copyright and certain
45
+ other rights that a licensor has authority to grant. Use of
46
+ the licensed material may still be restricted for other
47
+ reasons, including because others have copyright or other
48
+ rights in the material. A licensor may make special requests,
49
+ such as asking that all changes be marked or described.
50
+ Although not required by our licenses, you are encouraged to
51
+ respect those requests where reasonable. More considerations
52
+ for the public:
53
+ wiki.creativecommons.org/Considerations_for_licensees
54
+
55
+ =======================================================================
56
+
57
+ Creative Commons Attribution-NonCommercial-ShareAlike 4.0 International
58
+ Public License
59
+
60
+ By exercising the Licensed Rights (defined below), You accept and agree
61
+ to be bound by the terms and conditions of this Creative Commons
62
+ Attribution-NonCommercial-ShareAlike 4.0 International Public License
63
+ ("Public License"). To the extent this Public License may be
64
+ interpreted as a contract, You are granted the Licensed Rights in
65
+ consideration of Your acceptance of these terms and conditions, and the
66
+ Licensor grants You such rights in consideration of benefits the
67
+ Licensor receives from making the Licensed Material available under
68
+ these terms and conditions.
69
+
70
+
71
+ Section 1 -- Definitions.
72
+
73
+ a. Adapted Material means material subject to Copyright and Similar
74
+ Rights that is derived from or based upon the Licensed Material
75
+ and in which the Licensed Material is translated, altered,
76
+ arranged, transformed, or otherwise modified in a manner requiring
77
+ permission under the Copyright and Similar Rights held by the
78
+ Licensor. For purposes of this Public License, where the Licensed
79
+ Material is a musical work, performance, or sound recording,
80
+ Adapted Material is always produced where the Licensed Material is
81
+ synched in timed relation with a moving image.
82
+
83
+ b. Adapter's License means the license You apply to Your Copyright
84
+ and Similar Rights in Your contributions to Adapted Material in
85
+ accordance with the terms and conditions of this Public License.
86
+
87
+ c. BY-NC-SA Compatible License means a license listed at
88
+ creativecommons.org/compatiblelicenses, approved by Creative
89
+ Commons as essentially the equivalent of this Public License.
90
+
91
+ d. Copyright and Similar Rights means copyright and/or similar rights
92
+ closely related to copyright including, without limitation,
93
+ performance, broadcast, sound recording, and Sui Generis Database
94
+ Rights, without regard to how the rights are labeled or
95
+ categorized. For purposes of this Public License, the rights
96
+ specified in Section 2(b)(1)-(2) are not Copyright and Similar
97
+ Rights.
98
+
99
+ e. Effective Technological Measures means those measures that, in the
100
+ absence of proper authority, may not be circumvented under laws
101
+ fulfilling obligations under Article 11 of the WIPO Copyright
102
+ Treaty adopted on December 20, 1996, and/or similar international
103
+ agreements.
104
+
105
+ f. Exceptions and Limitations means fair use, fair dealing, and/or
106
+ any other exception or limitation to Copyright and Similar Rights
107
+ that applies to Your use of the Licensed Material.
108
+
109
+ g. License Elements means the license attributes listed in the name
110
+ of a Creative Commons Public License. The License Elements of this
111
+ Public License are Attribution, NonCommercial, and ShareAlike.
112
+
113
+ h. Licensed Material means the artistic or literary work, database,
114
+ or other material to which the Licensor applied this Public
115
+ License.
116
+
117
+ i. Licensed Rights means the rights granted to You subject to the
118
+ terms and conditions of this Public License, which are limited to
119
+ all Copyright and Similar Rights that apply to Your use of the
120
+ Licensed Material and that the Licensor has authority to license.
121
+
122
+ j. Licensor means the individual(s) or entity(ies) granting rights
123
+ under this Public License.
124
+
125
+ k. NonCommercial means not primarily intended for or directed towards
126
+ commercial advantage or monetary compensation. For purposes of
127
+ this Public License, the exchange of the Licensed Material for
128
+ other material subject to Copyright and Similar Rights by digital
129
+ file-sharing or similar means is NonCommercial provided there is
130
+ no payment of monetary compensation in connection with the
131
+ exchange.
132
+
133
+ l. Share means to provide material to the public by any means or
134
+ process that requires permission under the Licensed Rights, such
135
+ as reproduction, public display, public performance, distribution,
136
+ dissemination, communication, or importation, and to make material
137
+ available to the public including in ways that members of the
138
+ public may access the material from a place and at a time
139
+ individually chosen by them.
140
+
141
+ m. Sui Generis Database Rights means rights other than copyright
142
+ resulting from Directive 96/9/EC of the European Parliament and of
143
+ the Council of 11 March 1996 on the legal protection of databases,
144
+ as amended and/or succeeded, as well as other essentially
145
+ equivalent rights anywhere in the world.
146
+
147
+ n. You means the individual or entity exercising the Licensed Rights
148
+ under this Public License. Your has a corresponding meaning.
149
+
150
+
151
+ Section 2 -- Scope.
152
+
153
+ a. License grant.
154
+
155
+ 1. Subject to the terms and conditions of this Public License,
156
+ the Licensor hereby grants You a worldwide, royalty-free,
157
+ non-sublicensable, non-exclusive, irrevocable license to
158
+ exercise the Licensed Rights in the Licensed Material to:
159
+
160
+ a. reproduce and Share the Licensed Material, in whole or
161
+ in part, for NonCommercial purposes only; and
162
+
163
+ b. produce, reproduce, and Share Adapted Material for
164
+ NonCommercial purposes only.
165
+
166
+ 2. Exceptions and Limitations. For the avoidance of doubt, where
167
+ Exceptions and Limitations apply to Your use, this Public
168
+ License does not apply, and You do not need to comply with
169
+ its terms and conditions.
170
+
171
+ 3. Term. The term of this Public License is specified in Section
172
+ 6(a).
173
+
174
+ 4. Media and formats; technical modifications allowed. The
175
+ Licensor authorizes You to exercise the Licensed Rights in
176
+ all media and formats whether now known or hereafter created,
177
+ and to make technical modifications necessary to do so. The
178
+ Licensor waives and/or agrees not to assert any right or
179
+ authority to forbid You from making technical modifications
180
+ necessary to exercise the Licensed Rights, including
181
+ technical modifications necessary to circumvent Effective
182
+ Technological Measures. For purposes of this Public License,
183
+ simply making modifications authorized by this Section 2(a)
184
+ (4) never produces Adapted Material.
185
+
186
+ 5. Downstream recipients.
187
+
188
+ a. Offer from the Licensor -- Licensed Material. Every
189
+ recipient of the Licensed Material automatically
190
+ receives an offer from the Licensor to exercise the
191
+ Licensed Rights under the terms and conditions of this
192
+ Public License.
193
+
194
+ b. Additional offer from the Licensor -- Adapted Material.
195
+ Every recipient of Adapted Material from You
196
+ automatically receives an offer from the Licensor to
197
+ exercise the Licensed Rights in the Adapted Material
198
+ under the conditions of the Adapter's License You apply.
199
+
200
+ c. No downstream restrictions. You may not offer or impose
201
+ any additional or different terms or conditions on, or
202
+ apply any Effective Technological Measures to, the
203
+ Licensed Material if doing so restricts exercise of the
204
+ Licensed Rights by any recipient of the Licensed
205
+ Material.
206
+
207
+ 6. No endorsement. Nothing in this Public License constitutes or
208
+ may be construed as permission to assert or imply that You
209
+ are, or that Your use of the Licensed Material is, connected
210
+ with, or sponsored, endorsed, or granted official status by,
211
+ the Licensor or others designated to receive attribution as
212
+ provided in Section 3(a)(1)(A)(i).
213
+
214
+ b. Other rights.
215
+
216
+ 1. Moral rights, such as the right of integrity, are not
217
+ licensed under this Public License, nor are publicity,
218
+ privacy, and/or other similar personality rights; however, to
219
+ the extent possible, the Licensor waives and/or agrees not to
220
+ assert any such rights held by the Licensor to the limited
221
+ extent necessary to allow You to exercise the Licensed
222
+ Rights, but not otherwise.
223
+
224
+ 2. Patent and trademark rights are not licensed under this
225
+ Public License.
226
+
227
+ 3. To the extent possible, the Licensor waives any right to
228
+ collect royalties from You for the exercise of the Licensed
229
+ Rights, whether directly or through a collecting society
230
+ under any voluntary or waivable statutory or compulsory
231
+ licensing scheme. In all other cases the Licensor expressly
232
+ reserves any right to collect such royalties, including when
233
+ the Licensed Material is used other than for NonCommercial
234
+ purposes.
235
+
236
+
237
+ Section 3 -- License Conditions.
238
+
239
+ Your exercise of the Licensed Rights is expressly made subject to the
240
+ following conditions.
241
+
242
+ a. Attribution.
243
+
244
+ 1. If You Share the Licensed Material (including in modified
245
+ form), You must:
246
+
247
+ a. retain the following if it is supplied by the Licensor
248
+ with the Licensed Material:
249
+
250
+ i. identification of the creator(s) of the Licensed
251
+ Material and any others designated to receive
252
+ attribution, in any reasonable manner requested by
253
+ the Licensor (including by pseudonym if
254
+ designated);
255
+
256
+ ii. a copyright notice;
257
+
258
+ iii. a notice that refers to this Public License;
259
+
260
+ iv. a notice that refers to the disclaimer of
261
+ warranties;
262
+
263
+ v. a URI or hyperlink to the Licensed Material to the
264
+ extent reasonably practicable;
265
+
266
+ b. indicate if You modified the Licensed Material and
267
+ retain an indication of any previous modifications; and
268
+
269
+ c. indicate the Licensed Material is licensed under this
270
+ Public License, and include the text of, or the URI or
271
+ hyperlink to, this Public License.
272
+
273
+ 2. You may satisfy the conditions in Section 3(a)(1) in any
274
+ reasonable manner based on the medium, means, and context in
275
+ which You Share the Licensed Material. For example, it may be
276
+ reasonable to satisfy the conditions by providing a URI or
277
+ hyperlink to a resource that includes the required
278
+ information.
279
+ 3. If requested by the Licensor, You must remove any of the
280
+ information required by Section 3(a)(1)(A) to the extent
281
+ reasonably practicable.
282
+
283
+ b. ShareAlike.
284
+
285
+ In addition to the conditions in Section 3(a), if You Share
286
+ Adapted Material You produce, the following conditions also apply.
287
+
288
+ 1. The Adapter's License You apply must be a Creative Commons
289
+ license with the same License Elements, this version or
290
+ later, or a BY-NC-SA Compatible License.
291
+
292
+ 2. You must include the text of, or the URI or hyperlink to, the
293
+ Adapter's License You apply. You may satisfy this condition
294
+ in any reasonable manner based on the medium, means, and
295
+ context in which You Share Adapted Material.
296
+
297
+ 3. You may not offer or impose any additional or different terms
298
+ or conditions on, or apply any Effective Technological
299
+ Measures to, Adapted Material that restrict exercise of the
300
+ rights granted under the Adapter's License You apply.
301
+
302
+
303
+ Section 4 -- Sui Generis Database Rights.
304
+
305
+ Where the Licensed Rights include Sui Generis Database Rights that
306
+ apply to Your use of the Licensed Material:
307
+
308
+ a. for the avoidance of doubt, Section 2(a)(1) grants You the right
309
+ to extract, reuse, reproduce, and Share all or a substantial
310
+ portion of the contents of the database for NonCommercial purposes
311
+ only;
312
+
313
+ b. if You include all or a substantial portion of the database
314
+ contents in a database in which You have Sui Generis Database
315
+ Rights, then the database in which You have Sui Generis Database
316
+ Rights (but not its individual contents) is Adapted Material,
317
+ including for purposes of Section 3(b); and
318
+
319
+ c. You must comply with the conditions in Section 3(a) if You Share
320
+ all or a substantial portion of the contents of the database.
321
+
322
+ For the avoidance of doubt, this Section 4 supplements and does not
323
+ replace Your obligations under this Public License where the Licensed
324
+ Rights include other Copyright and Similar Rights.
325
+
326
+
327
+ Section 5 -- Disclaimer of Warranties and Limitation of Liability.
328
+
329
+ a. UNLESS OTHERWISE SEPARATELY UNDERTAKEN BY THE LICENSOR, TO THE
330
+ EXTENT POSSIBLE, THE LICENSOR OFFERS THE LICENSED MATERIAL AS-IS
331
+ AND AS-AVAILABLE, AND MAKES NO REPRESENTATIONS OR WARRANTIES OF
332
+ ANY KIND CONCERNING THE LICENSED MATERIAL, WHETHER EXPRESS,
333
+ IMPLIED, STATUTORY, OR OTHER. THIS INCLUDES, WITHOUT LIMITATION,
334
+ WARRANTIES OF TITLE, MERCHANTABILITY, FITNESS FOR A PARTICULAR
335
+ PURPOSE, NON-INFRINGEMENT, ABSENCE OF LATENT OR OTHER DEFECTS,
336
+ ACCURACY, OR THE PRESENCE OR ABSENCE OF ERRORS, WHETHER OR NOT
337
+ KNOWN OR DISCOVERABLE. WHERE DISCLAIMERS OF WARRANTIES ARE NOT
338
+ ALLOWED IN FULL OR IN PART, THIS DISCLAIMER MAY NOT APPLY TO YOU.
339
+
340
+ b. TO THE EXTENT POSSIBLE, IN NO EVENT WILL THE LICENSOR BE LIABLE
341
+ TO YOU ON ANY LEGAL THEORY (INCLUDING, WITHOUT LIMITATION,
342
+ NEGLIGENCE) OR OTHERWISE FOR ANY DIRECT, SPECIAL, INDIRECT,
343
+ INCIDENTAL, CONSEQUENTIAL, PUNITIVE, EXEMPLARY, OR OTHER LOSSES,
344
+ COSTS, EXPENSES, OR DAMAGES ARISING OUT OF THIS PUBLIC LICENSE OR
345
+ USE OF THE LICENSED MATERIAL, EVEN IF THE LICENSOR HAS BEEN
346
+ ADVISED OF THE POSSIBILITY OF SUCH LOSSES, COSTS, EXPENSES, OR
347
+ DAMAGES. WHERE A LIMITATION OF LIABILITY IS NOT ALLOWED IN FULL OR
348
+ IN PART, THIS LIMITATION MAY NOT APPLY TO YOU.
349
+
350
+ c. The disclaimer of warranties and limitation of liability provided
351
+ above shall be interpreted in a manner that, to the extent
352
+ possible, most closely approximates an absolute disclaimer and
353
+ waiver of all liability.
354
+
355
+
356
+ Section 6 -- Term and Termination.
357
+
358
+ a. This Public License applies for the term of the Copyright and
359
+ Similar Rights licensed here. However, if You fail to comply with
360
+ this Public License, then Your rights under this Public License
361
+ terminate automatically.
362
+
363
+ b. Where Your right to use the Licensed Material has terminated under
364
+ Section 6(a), it reinstates:
365
+
366
+ 1. automatically as of the date the violation is cured, provided
367
+ it is cured within 30 days of Your discovery of the
368
+ violation; or
369
+
370
+ 2. upon express reinstatement by the Licensor.
371
+
372
+ For the avoidance of doubt, this Section 6(b) does not affect any
373
+ right the Licensor may have to seek remedies for Your violations
374
+ of this Public License.
375
+
376
+ c. For the avoidance of doubt, the Licensor may also offer the
377
+ Licensed Material under separate terms or conditions or stop
378
+ distributing the Licensed Material at any time; however, doing so
379
+ will not terminate this Public License.
380
+
381
+ d. Sections 1, 5, 6, 7, and 8 survive termination of this Public
382
+ License.
383
+
384
+
385
+ Section 7 -- Other Terms and Conditions.
386
+
387
+ a. The Licensor shall not be bound by any additional or different
388
+ terms or conditions communicated by You unless expressly agreed.
389
+
390
+ b. Any arrangements, understandings, or agreements regarding the
391
+ Licensed Material not stated herein are separate from and
392
+ independent of the terms and conditions of this Public License.
393
+
394
+
395
+ Section 8 -- Interpretation.
396
+
397
+ a. For the avoidance of doubt, this Public License does not, and
398
+ shall not be interpreted to, reduce, limit, restrict, or impose
399
+ conditions on any use of the Licensed Material that could lawfully
400
+ be made without permission under this Public License.
401
+
402
+ b. To the extent possible, if any provision of this Public License is
403
+ deemed unenforceable, it shall be automatically reformed to the
404
+ minimum extent necessary to make it enforceable. If the provision
405
+ cannot be reformed, it shall be severed from this Public License
406
+ without affecting the enforceability of the remaining terms and
407
+ conditions.
408
+
409
+ c. No term or condition of this Public License will be waived and no
410
+ failure to comply consented to unless expressly agreed to by the
411
+ Licensor.
412
+
413
+ d. Nothing in this Public License constitutes or may be interpreted
414
+ as a limitation upon, or waiver of, any privileges and immunities
415
+ that apply to the Licensor or You, including from the legal
416
+ processes of any jurisdiction or authority.
417
+
418
+ =======================================================================
419
+
420
+ Creative Commons is not a party to its public
421
+ licenses. Notwithstanding, Creative Commons may elect to apply one of
422
+ its public licenses to material it publishes and in those instances
423
+ will be considered the “Licensor.” The text of the Creative Commons
424
+ public licenses is dedicated to the public domain under the CC0 Public
425
+ Domain Dedication. Except for the limited purpose of indicating that
426
+ material is shared under a Creative Commons public license or as
427
+ otherwise permitted by the Creative Commons policies published at
428
+ creativecommons.org/policies, Creative Commons does not authorize the
429
+ use of the trademark "Creative Commons" or any other trademark or logo
430
+ of Creative Commons without its prior written consent including,
431
+ without limitation, in connection with any unauthorized modifications
432
+ to any of its public licenses or any other arrangements,
433
+ understandings, or agreements concerning use of licensed material. For
434
+ the avoidance of doubt, this paragraph does not form part of the
435
+ public licenses.
436
+
437
+ Creative Commons may be contacted at creativecommons.org.
README.md CHANGED
@@ -1,10 +1,160 @@
1
- ---
2
- title: Word As Image Api
3
- emoji: 🏃
4
- colorFrom: blue
5
- colorTo: yellow
6
- sdk: docker
7
- pinned: false
8
- ---
9
-
10
- Check out the configuration reference at https://huggingface.co/docs/hub/spaces-config-reference
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # Word-As-Image for Semantic Typography (SIGGRAPH 2023 - Honorable Mention Award)
2
+
3
+
4
+
5
+ [![arXiv](https://img.shields.io/badge/📃-arXiv%20-red.svg)](https://arxiv.org/abs/2303.01818)
6
+ [![webpage](https://img.shields.io/badge/🌐-Website%20-blue.svg)](https://wordasimage.github.io/Word-As-Image-Page/)
7
+ [![Huggingface space](https://img.shields.io/badge/🤗-Demo%20-yellow.svg)](https://huggingface.co/spaces/SemanticTypography/Word-As-Image)
8
+ [![Youtube](https://img.shields.io/badge/📽️-Video%20-orchid.svg)](https://www.youtube.com/watch?v=9D12a6RCQaw)
9
+
10
+ <br>
11
+ <div align="center">
12
+ <img src="images/teaser.png" width="100%">
13
+ </div>
14
+ <br><br>
15
+ A few examples of our <b>W</b>ord-<b>A</b>s-<b>I</b>mage illustrations in various fonts and for different textual concept. The semantically adjusted letters are created
16
+ completely automatically using our method, and can then be used for further creative design as we illustrate here.<br><br>
17
+
18
+ > Shir Iluz*, Yael Vinker*, Amir Hertz, Daniel Berio, Daniel Cohen-Or, Ariel Shamir
19
+ > \* Denotes equal contribution
20
+ >
21
+ >A word-as-image is a semantic typography technique where a word illustration
22
+ presents a visualization of the meaning of the word, while also
23
+ preserving its readability. We present a method to create word-as-image
24
+ illustrations automatically. This task is highly challenging as it requires
25
+ semantic understanding of the word and a creative idea of where and how to
26
+ depict these semantics in a visually pleasing and legible manner. We rely on
27
+ the remarkable ability of recent large pretrained language-vision models to
28
+ distill textual concepts visually. We target simple, concise, black-and-white
29
+ designs that convey the semantics clearly.We deliberately do not change the
30
+ color or texture of the letters and do not use embellishments. Our method
31
+ optimizes the outline of each letter to convey the desired concept, guided by
32
+ a pretrained Stable Diffusion model. We incorporate additional loss terms
33
+ to ensure the legibility of the text and the preservation of the style of the
34
+ font. We show high quality and engaging results on numerous examples
35
+ and compare to alternative techniques.
36
+
37
+
38
+ ## Description
39
+ Official implementation of Word-As-Image for Semantic Typography paper.
40
+ <br>
41
+
42
+ ## Setup
43
+
44
+ 1. Clone the repo:
45
+ ```bash
46
+ git clone https://github.com/WordAsImage/Word-As-Image.git
47
+ cd Word-As-Image
48
+ ```
49
+ 2. Create a new conda environment and install the libraries:
50
+ ```bash
51
+ conda create --name word python=3.8.15
52
+ conda activate word
53
+ pip install torch==1.12.1+cu113 torchvision==0.13.1+cu113 --extra-index-url https://download.pytorch.org/whl/cu113
54
+ conda install -y numpy scikit-image
55
+ conda install -y -c anaconda cmake
56
+ conda install -y -c conda-forge ffmpeg
57
+ pip install svgwrite svgpathtools cssutils numba torch-tools scikit-fmm easydict visdom freetype-py shapely
58
+ pip install opencv-python==4.5.4.60
59
+ pip install kornia==0.6.8
60
+ pip install wandb
61
+ pip install shapely
62
+ ```
63
+
64
+ 3. Install diffusers:
65
+ ```bash
66
+ pip install diffusers==0.8
67
+ pip install transformers scipy ftfy accelerate
68
+ ```
69
+ 4. Install diffvg:
70
+ ```bash
71
+ git clone https://github.com/BachiLi/diffvg.git
72
+ cd diffvg
73
+ git submodule update --init --recursive
74
+ python setup.py install
75
+ ```
76
+
77
+ 5. Paste your HuggingFace [access token](https://huggingface.co/settings/tokens) for StableDiffusion in the TOKEN file.
78
+ ## Run Experiments
79
+ ```bash
80
+ conda activate word
81
+ cd Word-As-Image
82
+
83
+ # Please modify the parameters accordingly in the file and run:
84
+ bash run_word_as_image.sh
85
+
86
+ # Or run :
87
+ python code/main.py --experiment <experiment> --semantic_concept <concept> --optimized_letter <letter> --seed <seed> --font <font_name> --use_wandb <0/1> --wandb_user <user name>
88
+ ```
89
+ * ```--semantic_concept``` : the semantic concept to insert
90
+ * ```--optimized_letter``` : one letter in the word to optimize
91
+ * ```--font``` : font name, the <font name>.ttf file should be located in code/data/fonts/
92
+
93
+ Optional arguments:
94
+ * ```--word``` : The text to work on, default: the semantic concept
95
+ * ```--config``` : Path to config file, default: code/config/base.yaml
96
+ * ```--experiment``` : You can specify any experiment in the config file, default: conformal_0.5_dist_pixel_100_kernel201
97
+ * ```--log_dir``` : Default: output folder
98
+ * ```--prompt_suffix``` : Default: "minimal flat 2d vector. lineal color. trending on artstation"
99
+
100
+ ### Examples
101
+ ```bash
102
+ python code/main.py --semantic_concept "BUNNY" --optimized_letter "Y" --font "KaushanScript-Regular" --seed 0
103
+ ```
104
+ <br>
105
+ <div align="center">
106
+ <img src="images/KaushanScript-Regular_BUNNY_Y.svg" width="22%">
107
+ </div>
108
+
109
+
110
+ ```bash
111
+ python code/main.py --semantic_concept "LEAVES" --word "NATURE" --optimized_letter "T" --font "HobeauxRococeaux-Sherman" --seed 0
112
+ ```
113
+ <br>
114
+ <div align="center">
115
+ <img src="images/HobeauxRococeaux-Sherman_NATURE_T.svg" width="25%">
116
+ </div>
117
+
118
+ * Pay attention, as the arguments are case-sensitive, but it can handle both upper and lowercase letters depending on the input letters.
119
+
120
+
121
+ ## Tips
122
+ If the outcome does not meet your quality expectations, you could try the following options:
123
+
124
+ 1. Adjusting the weight 𝛼 of the L𝑎𝑐𝑎𝑝 loss, which preserves the letter's structure after deformation.
125
+ 2. Modifying the 𝜎 parameter of the low-pass filter used in the L𝑡𝑜𝑛𝑒 loss, which limits the degree of deviation from the original letter.
126
+ 3. Changing the number of control points, as this can influence the outputs.
127
+ 4. Experimenting with different seeds, as each may produce slightly different results.
128
+ 5. Changing the font type, as this can also result in various outputs.
129
+
130
+
131
+
132
+
133
+ ## Acknowledgement
134
+ Our implementation is based ob Stable Diffusion text-to-image model from Hugging Face's [Diffusers](https://github.com/huggingface/diffusers) library, combined with [Diffvg](https://github.com/BachiLi/diffvg). The framework is built on [Live](https://github.com/Picsart-AI-Research/LIVE-Layerwise-Image-Vectorization).
135
+
136
+ ## Citation
137
+ If you use this code for your research, please cite the following work:
138
+ ```
139
+ @article{IluzVinker2023,
140
+ author = {Iluz, Shir and Vinker, Yael and Hertz, Amir and Berio, Daniel and Cohen-Or, Daniel and Shamir, Ariel},
141
+ title = {Word-As-Image for Semantic Typography},
142
+ year = {2023},
143
+ issue_date = {August 2023},
144
+ publisher = {Association for Computing Machinery},
145
+ address = {New York, NY, USA},
146
+ volume = {42},
147
+ number = {4},
148
+ issn = {0730-0301},
149
+ url = {https://doi.org/10.1145/3592123},
150
+ doi = {10.1145/3592123},
151
+ journal = {ACM Trans. Graph.},
152
+ month = {jul},
153
+ articleno = {151},
154
+ numpages = {11},
155
+ keywords = {semantic typography, SVG, stable diffusion, fonts}
156
+ }
157
+ ```
158
+
159
+ ## Licence
160
+ This work is licensed under a [Creative Commons Attribution-NonCommercial-ShareAlike 4.0 International License](http://creativecommons.org/licenses/by-nc-sa/4.0/).
code/__init__.py ADDED
File without changes
code/bezier.py ADDED
@@ -0,0 +1,122 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import numpy as np
2
+ import matplotlib.pyplot as plt
3
+ from scipy.special import binom
4
+ from numpy.linalg import norm
5
+
6
+ def num_bezier(n_ctrl, degree=3):
7
+ if type(n_ctrl) == np.ndarray:
8
+ n_ctrl = len(n_ctrl)
9
+ return int((n_ctrl - 1) / degree)
10
+
11
+ def bernstein(n, i):
12
+ bi = binom(n, i)
13
+ return lambda t, bi=bi, n=n, i=i: bi * t**i * (1 - t)**(n - i)
14
+
15
+ def bezier(P, t, d=0):
16
+ '''Bezier curve of degree len(P)-1. d is the derivative order (0 gives positions)'''
17
+ n = P.shape[0] - 1
18
+ if d > 0:
19
+ Q = np.diff(P, axis=0)*n
20
+ return bezier(Q, t, d-1)
21
+ B = np.vstack([bernstein(n, i)(t) for i, p in enumerate(P)])
22
+ return (P.T @ B).T
23
+
24
+ def cubic_bezier(P, t):
25
+ return (1.0-t)**3*P[0] + 3*(1.0-t)**2*t*P[1] + 3*(1.0-t)*t**2*P[2] + t**3*P[3]
26
+
27
+ def bezier_piecewise(Cp, subd=100, degree=3, d=0):
28
+ ''' sample a piecewise Bezier curve given a sequence of control points'''
29
+ num = num_bezier(Cp.shape[0], degree)
30
+ X = []
31
+ for i in range(num):
32
+ P = Cp[i*degree:i*degree+degree+1, :]
33
+ t = np.linspace(0, 1., subd)[:-1]
34
+ Y = bezier(P, t, d)
35
+ X += [Y]
36
+ X.append(Cp[-1])
37
+ X = np.vstack(X)
38
+ return X
39
+
40
+ def compute_beziers(beziers, subd=100, degree=3):
41
+ chain = beziers_to_chain(beziers)
42
+ return bezier_piecewise(chain, subd, degree)
43
+
44
+ def plot_control_polygon(Cp, degree=3, lw=0.5, linecolor=np.ones(3)*0.1):
45
+ n_bezier = num_bezier(len(Cp), degree)
46
+ for i in range(n_bezier):
47
+ cp = Cp[i*degree:i*degree+degree+1, :]
48
+ if degree==3:
49
+ plt.plot(cp[0:2,0], cp[0:2, 1], ':', color=linecolor, linewidth=lw)
50
+ plt.plot(cp[2:,0], cp[2:,1], ':', color=linecolor, linewidth=lw)
51
+ plt.plot(cp[:,0], cp[:,1], 'o', color=[0, 0.5, 1.], markersize=4)
52
+ else:
53
+ plt.plot(cp[:,0], cp[:,1], ':', color=linecolor, linewidth=lw)
54
+ plt.plot(cp[:,0], cp[:,1], 'o', color=[0, 0.5, 1.])
55
+
56
+
57
+ def chain_to_beziers(chain, degree=3):
58
+ ''' Convert Bezier chain to list of curve segments (4 control points each)'''
59
+ num = num_bezier(chain.shape[0], degree)
60
+ beziers = []
61
+ for i in range(num):
62
+ beziers.append(chain[i*degree:i*degree+degree+1,:])
63
+ return beziers
64
+
65
+
66
+ def beziers_to_chain(beziers):
67
+ ''' Convert list of Bezier curve segments to a piecewise bezier chain (shares vertices)'''
68
+ n = len(beziers)
69
+ chain = []
70
+ for i in range(n):
71
+ chain.append(list(beziers[i][:-1]))
72
+ chain.append([beziers[-1][-1]])
73
+ return np.array(sum(chain, []))
74
+
75
+
76
+ def split_cubic(bez, t):
77
+ p1, p2, p3, p4 = bez
78
+
79
+ p12 = (p2 - p1) * t + p1
80
+ p23 = (p3 - p2) * t + p2
81
+ p34 = (p4 - p3) * t + p3
82
+
83
+ p123 = (p23 - p12) * t + p12
84
+ p234 = (p34 - p23) * t + p23
85
+
86
+ p1234 = (p234 - p123) * t + p123
87
+
88
+ return np.array([p1, p12, p123, p1234]), np.array([p1234, p234, p34, p4])
89
+
90
+
91
+ def approx_arc_length(bez):
92
+ c0, c1, c2, c3 = bez
93
+ v0 = norm(c1-c0)*0.15
94
+ v1 = norm(-0.558983582205757*c0 + 0.325650248872424*c1 + 0.208983582205757*c2 + 0.024349751127576*c3)
95
+ v2 = norm(c3-c0+c2-c1)*0.26666666666666666
96
+ v3 = norm(-0.024349751127576*c0 - 0.208983582205757*c1 - 0.325650248872424*c2 + 0.558983582205757*c3)
97
+ v4 = norm(c3-c2)*.15
98
+ return v0 + v1 + v2 + v3 + v4
99
+
100
+
101
+ def subdivide_bezier(bez, thresh):
102
+ stack = [bez]
103
+ res = []
104
+ while stack:
105
+ bez = stack.pop()
106
+ l = approx_arc_length(bez)
107
+ if l < thresh:
108
+ res.append(bez)
109
+ else:
110
+ b1, b2 = split_cubic(bez, 0.5)
111
+ stack += [b2, b1]
112
+ return res
113
+
114
+ def subdivide_bezier_chain(C, thresh):
115
+ beziers = chain_to_beziers(C)
116
+ res = []
117
+ for bez in beziers:
118
+ res += subdivide_bezier(bez, thresh)
119
+ return beziers_to_chain(res)
120
+
121
+
122
+
code/config.py ADDED
@@ -0,0 +1,104 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import argparse
2
+ import os.path as osp
3
+ import yaml
4
+ import random
5
+ from easydict import EasyDict as edict
6
+ import numpy.random as npr
7
+ import torch
8
+ from utils import (
9
+ edict_2_dict,
10
+ check_and_create_dir,
11
+ update)
12
+ import wandb
13
+ import warnings
14
+ warnings.filterwarnings("ignore")
15
+
16
+
17
+ def parse_args():
18
+ parser = argparse.ArgumentParser()
19
+ parser.add_argument("--config", type=str, default="code/config/base.yaml")
20
+ parser.add_argument("--experiment", type=str, default="conformal_0.5_dist_pixel_100_kernel201")
21
+ parser.add_argument("--seed", type=int, default=0)
22
+ parser.add_argument('--log_dir', metavar='DIR', default="output")
23
+ parser.add_argument('--font', type=str, default="none", help="font name")
24
+ parser.add_argument('--semantic_concept', type=str, help="the semantic concept to insert")
25
+ parser.add_argument('--word', type=str, default="none", help="the text to work on")
26
+ parser.add_argument('--prompt_suffix', type=str, default="minimal flat 2d vector. lineal color."
27
+ " trending on artstation")
28
+ parser.add_argument('--optimized_letter', type=str, default="none", help="the letter in the word to optimize")
29
+ parser.add_argument('--batch_size', type=int, default=1)
30
+ parser.add_argument('--use_wandb', type=int, default=0)
31
+ parser.add_argument('--wandb_user', type=str, default="none")
32
+
33
+ cfg = edict()
34
+ args = parser.parse_args()
35
+ with open('TOKEN', 'r') as f:
36
+ setattr(args, 'token', f.read().replace('\n', ''))
37
+ cfg.config = args.config
38
+ cfg.experiment = args.experiment
39
+ cfg.seed = args.seed
40
+ cfg.font = args.font
41
+ cfg.semantic_concept = args.semantic_concept
42
+ cfg.word = cfg.semantic_concept if args.word == "none" else args.word
43
+ if " " in cfg.word:
44
+ raise ValueError(f'no spaces are allowed')
45
+ cfg.caption = f"a {args.semantic_concept}. {args.prompt_suffix}"
46
+ cfg.log_dir = f"{args.log_dir}/{args.experiment}_{cfg.word}"
47
+ if args.optimized_letter in cfg.word:
48
+ cfg.optimized_letter = args.optimized_letter
49
+ else:
50
+ raise ValueError(f'letter should be in word')
51
+ cfg.batch_size = args.batch_size
52
+ cfg.token = args.token
53
+ cfg.use_wandb = args.use_wandb
54
+ cfg.wandb_user = args.wandb_user
55
+ cfg.letter = f"{args.font}_{args.optimized_letter}_scaled"
56
+ cfg.target = f"code/data/init/{cfg.letter}"
57
+
58
+ return cfg
59
+
60
+
61
+ def set_config():
62
+
63
+ cfg_arg = parse_args()
64
+ with open(cfg_arg.config, 'r') as f:
65
+ cfg_full = yaml.load(f, Loader=yaml.FullLoader)
66
+
67
+ # recursively traverse parent_config pointers in the config dicts
68
+ cfg_key = cfg_arg.experiment
69
+ cfgs = [cfg_arg]
70
+ while cfg_key:
71
+ cfgs.append(cfg_full[cfg_key])
72
+ cfg_key = cfgs[-1].get('parent_config', 'baseline')
73
+
74
+ # allowing children configs to override their parents
75
+ cfg = edict()
76
+ for options in reversed(cfgs):
77
+ update(cfg, options)
78
+ del cfgs
79
+
80
+ # set experiment dir
81
+ signature = f"{cfg.letter}_concept_{cfg.semantic_concept}_seed_{cfg.seed}"
82
+ cfg.experiment_dir = \
83
+ osp.join(cfg.log_dir, cfg.font, signature)
84
+ configfile = osp.join(cfg.experiment_dir, 'config.yaml')
85
+ print('Config:', cfg)
86
+
87
+ # create experiment dir and save config
88
+ check_and_create_dir(configfile)
89
+ with open(osp.join(configfile), 'w') as f:
90
+ yaml.dump(edict_2_dict(cfg), f)
91
+
92
+ if cfg.use_wandb:
93
+ wandb.init(project="Word-As-Image", entity=cfg.wandb_user,
94
+ config=cfg, name=f"{signature}", id=wandb.util.generate_id())
95
+
96
+ if cfg.seed is not None:
97
+ random.seed(cfg.seed)
98
+ npr.seed(cfg.seed)
99
+ torch.manual_seed(cfg.seed)
100
+ torch.backends.cudnn.benchmark = False
101
+ else:
102
+ assert False
103
+
104
+ return cfg
code/config/base.yaml ADDED
@@ -0,0 +1,46 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ baseline:
2
+ parent_config: ''
3
+ save:
4
+ init: true
5
+ image: true
6
+ video: true
7
+ video_frame_freq: 1
8
+ trainable:
9
+ point: true
10
+ lr_base:
11
+ point: 1
12
+ lr:
13
+ lr_init: 0.002
14
+ lr_final: 0.0008
15
+ lr_delay_mult: 0.1
16
+ lr_delay_steps: 100
17
+ num_iter: 500
18
+ render_size: 256
19
+ cut_size: 350
20
+ level_of_cc: 0 # 0 - original number of cc / 1 - recommended / 2 - more control points
21
+ seed: 0
22
+ diffusion:
23
+ model: "runwayml/stable-diffusion-v1-5"
24
+ timesteps: 1000
25
+ guidance_scale: 10
26
+ loss:
27
+ use_sds_loss: true
28
+ tone:
29
+ use_tone_loss: false
30
+ conformal:
31
+ use_conformal_loss: false
32
+
33
+ conformal_0.5_dist_pixel_100_kernel201:
34
+ parent_config: baseline
35
+ level_of_cc: 1
36
+ loss:
37
+ tone:
38
+ use_tone_loss: true
39
+ dist_loss_weight: 100
40
+ pixel_dist_kernel_blur: 201
41
+ pixel_dist_sigma: 30
42
+ conformal:
43
+ use_conformal_loss: true
44
+ angeles_w: 0.5
45
+
46
+
code/data/fonts/Bell MT.ttf ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:986a5b8bb70238e3c896e3113ef581df26204131f72d59fc12d2deef7ef89e4c
3
+ size 84840
code/data/fonts/DeliusUnicase-Regular.ttf ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:05e56564fea31b721f49d24e6c4d7787f89ccad060a554d97ba132bd1f0e0f58
3
+ size 31504
code/data/fonts/HobeauxRococeaux-Sherman.ttf ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:6ce6fa2f04d1009b45231d774ca53a2d2927b0cf60520845591214023e5dc7a0
3
+ size 117452
code/data/fonts/IndieFlower-Regular.ttf ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:e6a139213baa54b2ff51d300f98cc1c16d690b5046a4b0e42435cbf791767853
3
+ size 55416
code/data/fonts/JosefinSans-Light.ttf ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:90e56e9991974e7aa9efc7bec0e8916df5711c02370fab5a8560d481a4ed86c9
3
+ size 59308
code/data/fonts/KaushanScript-Regular.ttf ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:e4e5d5ae35aeef6d2a1f8ba99bad6d716cab67eb9a4cbf349b670008e2c086f5
3
+ size 183972
code/data/fonts/LuckiestGuy-Regular.ttf ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:fbe683091b0db0faa8f38e5227ad3bc17dc67f119f27ff10a63d72f5f9bb9da3
3
+ size 58324
code/data/fonts/Noteworthy-Bold.ttf ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:f3a6aac67ae5f5e0b98cd49db21fe675f60ad8cd7771a86500d000da515d980d
3
+ size 248052
code/data/fonts/Quicksand.ttf ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:06927fae113c34dca0a33a2bc522da1f4cccc6dda735858090dcf48b0f280535
3
+ size 124196
code/data/fonts/Saira-Regular.ttf ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:1216b735c5fcfeffb511750b8c3cb78ee447bda24c51d58ef50055f2f7d0dd4d
3
+ size 82764
code/data/init/KaushanScript-Regular_B.svg ADDED
code/data/init/KaushanScript-Regular_BUNNY.svg ADDED
code/data/init/KaushanScript-Regular_BUNNY_scaled.svg ADDED
code/data/init/KaushanScript-Regular_B_scaled.svg ADDED
code/data/init/KaushanScript-Regular_N.svg ADDED
code/data/init/KaushanScript-Regular_N_scaled.svg ADDED
code/data/init/KaushanScript-Regular_U.svg ADDED
code/data/init/KaushanScript-Regular_U_scaled.svg ADDED
code/data/init/KaushanScript-Regular_Y.svg ADDED
code/data/init/KaushanScript-Regular_Y_scaled.svg ADDED
code/generate.py ADDED
@@ -0,0 +1,26 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # code/generate.py
2
+ import subprocess, json, tempfile, os, pathlib, torch
3
+
4
+ REPO = pathlib.Path(__file__).resolve().parents[1]
5
+
6
+ def generate_word_image(cfg, device):
7
+ """
8
+ cfg : plain dict or EasyDict
9
+ device: torch.device (ignored here; CLI handles CUDA)
10
+ Returns absolute path of the rendered PNG.
11
+ """
12
+ with tempfile.TemporaryDirectory() as tmp:
13
+ cfg_path = pathlib.Path(tmp) / "cfg.json"
14
+ with open(cfg_path, "w") as f:
15
+ json.dump(cfg, f)
16
+
17
+ # Call the original CLI exactly like your bash script
18
+ cmd = [
19
+ "python", os.fspath(REPO / "code" / "main.py"),
20
+ "--config", os.fspath(cfg_path),
21
+ ]
22
+ subprocess.check_call(cmd)
23
+
24
+ # main.py saves into cfg['log_dir']/…/final.png – read it back
25
+ out_png = next((REPO / cfg["log_dir"]).rglob("*.png"))
26
+ return os.fspath(out_png)
code/losses.py ADDED
@@ -0,0 +1,193 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import torch.nn as nn
2
+ import torchvision
3
+ from scipy.spatial import Delaunay
4
+ import torch
5
+ import numpy as np
6
+ from torch.nn import functional as nnf
7
+ from easydict import EasyDict
8
+ from shapely.geometry import Point
9
+ from shapely.geometry.polygon import Polygon
10
+
11
+ from diffusers import StableDiffusionPipeline
12
+
13
+ class SDSLoss(nn.Module):
14
+ def __init__(self, cfg, device):
15
+ super(SDSLoss, self).__init__()
16
+ self.cfg = cfg
17
+ self.device = device
18
+ self.pipe = StableDiffusionPipeline.from_pretrained(
19
+ cfg.diffusion.model,
20
+ torch_dtype=torch.float16,
21
+ token=cfg.token,
22
+ )
23
+
24
+ self.pipe.enable_xformers_memory_efficient_attention()
25
+ self.pipe.enable_attention_slicing(slice_size=1)
26
+ self.pipe.enable_vae_slicing()
27
+ self.pipe.enable_vae_tiling()
28
+ self.pipe.unet.enable_gradient_checkpointing()
29
+
30
+ alphas_cumprod = torch.tensor(self.pipe.scheduler.alphas_cumprod)
31
+ self.alphas = alphas_cumprod.to(device)
32
+ self.sigmas = torch.sqrt(1 - self.alphas)
33
+
34
+ # 1️⃣ embed text while all weights are still real tensors
35
+ self.embed_text()
36
+
37
+ # 2️⃣ NOW turn on off-loading (only UNet & VAE get meta tensors)
38
+ self.pipe.enable_model_cpu_offload()
39
+
40
+ # text-encoder is no longer needed
41
+ del self.pipe.text_encoder, self.pipe.tokenizer
42
+
43
+ def embed_text(self):
44
+ tok = self.pipe.tokenizer
45
+ txt = tok(self.cfg.caption, padding="max_length",
46
+ max_length=tok.model_max_length,
47
+ truncation=True, return_tensors="pt")
48
+ un = tok([""], padding="max_length",
49
+ max_length=tok.model_max_length,
50
+ return_tensors="pt")
51
+
52
+ with torch.no_grad():
53
+ te = self.pipe.text_encoder.eval() # still real tensors
54
+ em_txt = te(txt.input_ids ).last_hidden_state.to(torch.float16)
55
+ em_un = te(un .input_ids ).last_hidden_state.to(torch.float16)
56
+
57
+ self.text_embeddings = (
58
+ torch.cat([em_un, em_txt])
59
+ .repeat_interleave(self.cfg.batch_size, 0)
60
+ .to(self.device)
61
+ )
62
+
63
+
64
+
65
+ def forward(self, x_aug: torch.Tensor) -> torch.Tensor:
66
+ # ---------------------------------------------------- encode
67
+ x = (x_aug * 2.0 - 1.0).to(self.device, dtype=torch.float16)
68
+ with torch.cuda.amp.autocast():
69
+ latents = self.pipe.vae.encode(x).latent_dist.sample()
70
+ latents = 0.18215 * latents.to(self.device, dtype=torch.float16)
71
+ torch.cuda.empty_cache()
72
+
73
+ # ---------------------------------------------------- add noise
74
+ t = torch.randint(
75
+ 50,
76
+ min(950, self.cfg.diffusion.timesteps) - 1,
77
+ (latents.size(0),),
78
+ device=self.device,
79
+ )
80
+ eps = torch.randn_like(latents)
81
+ z_t = self.pipe.scheduler.add_noise(latents, eps, t)
82
+
83
+ # ---------------------------------------------------- sequential CFG
84
+ emb_u, emb_c = self.text_embeddings.chunk(2)
85
+ with torch.cuda.amp.autocast():
86
+ eps_u = self.pipe.unet(z_t, t, encoder_hidden_states=emb_u).sample
87
+ torch.cuda.empty_cache() # release ~500 MB
88
+
89
+ with torch.cuda.amp.autocast():
90
+ eps_c = self.pipe.unet(z_t, t, encoder_hidden_states=emb_c).sample
91
+
92
+ # UNet already ran in fp16 under autocast – avoid duplicating tensors
93
+ eps_t = eps_u + self.cfg.diffusion.guidance_scale * (eps_c - eps_u)
94
+
95
+ # ---------------------------------------------------- SDS grad & loss
96
+ alpha_t = self.alphas[t].to(self.device)
97
+ sigma_t = self.sigmas[t].to(self.device)
98
+ grad = (alpha_t**0.5 * sigma_t * (eps_t - eps)).nan_to_num_()
99
+ return (grad * latents).sum(1).mean()
100
+
101
+
102
+
103
+ class ToneLoss(nn.Module):
104
+ def __init__(self, cfg):
105
+ super(ToneLoss, self).__init__()
106
+ self.dist_loss_weight = cfg.loss.tone.dist_loss_weight
107
+ self.im_init = None
108
+ self.cfg = cfg
109
+ self.mse_loss = nn.MSELoss()
110
+ self.blurrer = torchvision.transforms.GaussianBlur(kernel_size=(cfg.loss.tone.pixel_dist_kernel_blur,
111
+ cfg.loss.tone.pixel_dist_kernel_blur), sigma=(cfg.loss.tone.pixel_dist_sigma))
112
+
113
+ def set_image_init(self, im_init):
114
+ self.im_init = im_init.permute(2, 0, 1).unsqueeze(0)
115
+ self.init_blurred = self.blurrer(self.im_init)
116
+
117
+
118
+ def get_scheduler(self, step=None):
119
+ if step is not None:
120
+ return self.dist_loss_weight * np.exp(-(1/5)*((step-300)/(20)) ** 2)
121
+ else:
122
+ return self.dist_loss_weight
123
+
124
+ def forward(self, cur_raster, step=None):
125
+ blurred_cur = self.blurrer(cur_raster)
126
+ return self.mse_loss(self.init_blurred.detach(), blurred_cur) * self.get_scheduler(step)
127
+
128
+
129
+ class ConformalLoss:
130
+ def __init__(self, parameters: EasyDict, device: torch.device, target_letter: str, shape_groups):
131
+ self.parameters = parameters
132
+ self.target_letter = target_letter
133
+ self.shape_groups = shape_groups
134
+ self.faces = self.init_faces(device)
135
+ self.faces_roll_a = [torch.roll(self.faces[i], 1, 1) for i in range(len(self.faces))]
136
+
137
+ with torch.no_grad():
138
+ self.angles = []
139
+ self.reset()
140
+
141
+
142
+ def get_angles(self, points: torch.Tensor) -> torch.Tensor:
143
+ angles_ = []
144
+ for i in range(len(self.faces)):
145
+ triangles = points[self.faces[i]]
146
+ triangles_roll_a = points[self.faces_roll_a[i]]
147
+ edges = triangles_roll_a - triangles
148
+ length = edges.norm(dim=-1)
149
+ edges = edges / (length + 1e-1)[:, :, None]
150
+ edges_roll = torch.roll(edges, 1, 1)
151
+ cosine = torch.einsum('ned,ned->ne', edges, edges_roll)
152
+ angles = torch.arccos(cosine)
153
+ angles_.append(angles)
154
+ return angles_
155
+
156
+ def get_letter_inds(self, letter_to_insert):
157
+ for group, l in zip(self.shape_groups, self.target_letter):
158
+ if l == letter_to_insert:
159
+ letter_inds = group.shape_ids
160
+ return letter_inds[0], letter_inds[-1], len(letter_inds)
161
+
162
+ def reset(self):
163
+ points = torch.cat([point.clone().detach() for point in self.parameters.point]).to(self.faces[0].device)
164
+ self.angles = self.get_angles(points)
165
+
166
+ def init_faces(self, device: torch.device) -> torch.tensor:
167
+ faces_ = []
168
+ for j, c in enumerate(self.target_letter):
169
+ points_np = [self.parameters.point[i].clone().detach().cpu().numpy() for i in range(len(self.parameters.point))]
170
+ start_ind, end_ind, shapes_per_letter = self.get_letter_inds(c)
171
+ print(c, start_ind, end_ind)
172
+ holes = []
173
+ if shapes_per_letter > 1:
174
+ holes = points_np[start_ind+1:end_ind]
175
+ poly = Polygon(points_np[start_ind], holes=holes)
176
+ poly = poly.buffer(0)
177
+ points_np = np.concatenate(points_np)
178
+ faces = Delaunay(points_np).simplices
179
+ is_intersect = np.array([poly.contains(Point(points_np[face].mean(0))) for face in faces], dtype=bool)
180
+ faces_.append(torch.from_numpy(faces[is_intersect]).to(device, dtype=torch.int64))
181
+ return faces_
182
+
183
+ def __call__(self) -> torch.Tensor:
184
+ loss_angles = 0
185
+ points = torch.cat(self.parameters.point).to(self.faces[0].device)
186
+ angles = self.get_angles(points)
187
+ for i in range(len(self.faces)):
188
+ loss_angles += (nnf.mse_loss(angles[i], self.angles[i]))
189
+ return loss_angles
190
+
191
+
192
+
193
+
code/main.py ADDED
@@ -0,0 +1,188 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from typing import Mapping
2
+ import os
3
+ import base64 # still here if you need it later
4
+ from tqdm import tqdm
5
+ from easydict import EasyDict as edict
6
+ import matplotlib.pyplot as plt
7
+ import torch
8
+ from torch.optim.lr_scheduler import LambdaLR
9
+ import pydiffvg
10
+ import save_svg
11
+ from losses import SDSLoss, ToneLoss, ConformalLoss
12
+ from config import set_config
13
+ from utils import (
14
+ check_and_create_dir,
15
+ get_data_augs,
16
+ save_image,
17
+ preprocess,
18
+ learning_rate_decay,
19
+ combine_word,
20
+ create_video,
21
+ )
22
+ import wandb
23
+ import warnings
24
+
25
+ warnings.filterwarnings("ignore")
26
+
27
+ pydiffvg.set_print_timing(False)
28
+ gamma = 1.0
29
+
30
+
31
+ def init_shapes(svg_path: str, trainable: Mapping[str, bool]):
32
+ """Load the initial SVG, mark trainable points, return shapes & params."""
33
+
34
+ svg = f"{svg_path}.svg"
35
+ _, _, shapes_init, shape_groups_init = pydiffvg.svg_to_scene(svg)
36
+
37
+ parameters = edict()
38
+
39
+ if trainable.point:
40
+ parameters.point = []
41
+ for path in shapes_init:
42
+ path.points.requires_grad = True
43
+ parameters.point.append(path.points)
44
+
45
+ return shapes_init, shape_groups_init, parameters
46
+
47
+
48
+ # -----------------------------------------------------------------------------
49
+ # Public entry‑point that CLI *and* FastAPI reuse
50
+ # -----------------------------------------------------------------------------
51
+
52
+ def generate_word_image(cfg, device: torch.device):
53
+ """Optimise a single word and return path to the resulting PNG."""
54
+
55
+ # make sure we can access attributes no matter if `cfg` is dict or EasyDict
56
+ if isinstance(cfg, dict):
57
+ cfg = edict(cfg)
58
+
59
+ pydiffvg.set_use_gpu(device.type == "cuda")
60
+
61
+ print("preprocessing")
62
+ preprocess(cfg.font, cfg.word, cfg.optimized_letter, cfg.level_of_cc)
63
+
64
+ if cfg.loss.use_sds_loss:
65
+ sds_loss = SDSLoss(cfg, device)
66
+
67
+ h = w = cfg.render_size
68
+ data_augs = get_data_augs(cfg.cut_size)
69
+ render = pydiffvg.RenderFunction.apply
70
+
71
+ print("initializing shape")
72
+ shapes, shape_groups, parameters = init_shapes(svg_path=cfg.target, trainable=cfg.trainable)
73
+
74
+ scene_args = pydiffvg.RenderFunction.serialize_scene(w, h, shapes, shape_groups)
75
+ img_init = render(w, h, 2, 2, 0, None, *scene_args)
76
+ img_init = img_init[:, :, 3:4] * img_init[:, :, :3] + torch.ones_like(img_init[:, :, :3]) * (1 - img_init[:, :, 3:4])
77
+ img_init = img_init[:, :, :3]
78
+
79
+ if cfg.use_wandb:
80
+ plt.imshow(img_init.detach().cpu())
81
+ wandb.log({"init": wandb.Image(plt)}, step=0)
82
+ plt.close()
83
+
84
+ if cfg.loss.tone.use_tone_loss:
85
+ tone_loss = ToneLoss(cfg)
86
+ tone_loss.set_image_init(img_init)
87
+
88
+ if cfg.save.init:
89
+ print("saving init")
90
+ filename = os.path.join(cfg.experiment_dir, "svg-init", "init.svg")
91
+ check_and_create_dir(filename)
92
+ save_svg.save_svg(filename, w, h, shapes, shape_groups)
93
+
94
+ num_iter = cfg.num_iter
95
+ optim = torch.optim.Adam([
96
+ {"params": parameters["point"], "lr": cfg.lr_base["point"]}
97
+ ], betas=(0.9, 0.9), eps=1e-6)
98
+
99
+ if cfg.loss.conformal.use_conformal_loss:
100
+ conformal_loss = ConformalLoss(parameters, device, cfg.optimized_letter, shape_groups)
101
+
102
+ lr_lambda = lambda step: learning_rate_decay(
103
+ step,
104
+ cfg.lr.lr_init,
105
+ cfg.lr.lr_final,
106
+ num_iter,
107
+ lr_delay_steps=cfg.lr.lr_delay_steps,
108
+ lr_delay_mult=cfg.lr.lr_delay_mult,
109
+ ) / cfg.lr.lr_init
110
+
111
+ scheduler = LambdaLR(optim, lr_lambda=lr_lambda, last_epoch=-1)
112
+
113
+ print("start training")
114
+ for step in tqdm(range(num_iter)):
115
+ if cfg.use_wandb:
116
+ wandb.log({"learning_rate": optim.param_groups[0]["lr"]}, step=step)
117
+ optim.zero_grad()
118
+
119
+ scene_args = pydiffvg.RenderFunction.serialize_scene(w, h, shapes, shape_groups)
120
+ img = render(w, h, 2, 2, step, None, *scene_args)
121
+ img = img[:, :, 3:4] * img[:, :, :3] + torch.ones_like(img[:, :, :3]) * (1 - img[:, :, 3:4])
122
+ img = img[:, :, :3]
123
+
124
+ if cfg.save.video and (step % cfg.save.video_frame_freq == 0 or step == num_iter - 1):
125
+ save_image(img, os.path.join(cfg.experiment_dir, "video-png", f"iter{step:04d}.png"), gamma)
126
+ svg_frame = os.path.join(cfg.experiment_dir, "video-svg", f"iter{step:04d}.svg")
127
+ check_and_create_dir(svg_frame)
128
+ save_svg.save_svg(svg_frame, w, h, shapes, shape_groups)
129
+ if cfg.use_wandb:
130
+ plt.imshow(img.detach().cpu())
131
+ wandb.log({"img": wandb.Image(plt)}, step=step)
132
+ plt.close()
133
+
134
+ x = img.unsqueeze(0).permute(0, 3, 1, 2).repeat(cfg.batch_size, 1, 1, 1)
135
+ x_aug = data_augs.forward(x)
136
+
137
+ loss = sds_loss(x_aug)
138
+ if cfg.loss.tone.use_tone_loss:
139
+ loss = loss + tone_loss(x, step)
140
+ if cfg.loss.conformal.use_conformal_loss:
141
+ loss = loss + cfg.loss.conformal.angeles_w * conformal_loss()
142
+
143
+ if cfg.use_wandb:
144
+ wandb.log({"total_loss": loss.item()}, step=step)
145
+
146
+ loss.backward()
147
+ optim.step()
148
+ scheduler.step()
149
+
150
+ svg_out = os.path.join(cfg.experiment_dir, "output-svg", "output.svg")
151
+ check_and_create_dir(svg_out)
152
+ save_svg.save_svg(svg_out, w, h, shapes, shape_groups)
153
+
154
+ combine_word(cfg.word, cfg.optimized_letter, cfg.font, cfg.experiment_dir)
155
+
156
+ if cfg.save.image:
157
+ png_out = os.path.join(cfg.experiment_dir, "output-png", "output.png")
158
+ check_and_create_dir(png_out)
159
+ pydiffvg.imwrite(img.detach().cpu(), png_out, gamma=gamma)
160
+ if cfg.use_wandb:
161
+ plt.imshow(img.detach().cpu())
162
+ wandb.log({"img": wandb.Image(plt)}, step=num_iter)
163
+ plt.close()
164
+ else:
165
+ png_out = ""
166
+
167
+ if cfg.save.video:
168
+ print("saving video")
169
+ create_video(cfg.num_iter, cfg.experiment_dir, cfg.save.video_frame_freq)
170
+
171
+ if cfg.use_wandb:
172
+ wandb.finish()
173
+
174
+ return os.path.abspath(png_out)
175
+
176
+
177
+ # -----------------------------------------------------------------------------
178
+ # CLI entry‑point – original behaviour when run directly
179
+ # -----------------------------------------------------------------------------
180
+
181
+ def cli_entry():
182
+ cfg = set_config()
183
+ device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
184
+ generate_word_image(cfg, device)
185
+
186
+
187
+ if __name__ == "__main__":
188
+ cli_entry()
code/save_svg.py ADDED
@@ -0,0 +1,155 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import torch
2
+ import pydiffvg
3
+ import xml.etree.ElementTree as etree
4
+ from xml.dom import minidom
5
+ def prettify(elem):
6
+ """Return a pretty-printed XML string for the Element.
7
+ """
8
+ rough_string = etree.tostring(elem, 'utf-8')
9
+ reparsed = minidom.parseString(rough_string)
10
+ return reparsed.toprettyxml(indent=" ")
11
+ def save_svg(filename, width, height, shapes, shape_groups, use_gamma = False, background=None):
12
+ root = etree.Element('svg')
13
+ root.set('version', '1.1')
14
+ root.set('xmlns', 'http://www.w3.org/2000/svg')
15
+ root.set('width', str(width))
16
+ root.set('height', str(height))
17
+ if background is not None:
18
+ print(f"setting background to {background}")
19
+ root.set('style', str(background))
20
+ defs = etree.SubElement(root, 'defs')
21
+ g = etree.SubElement(root, 'g')
22
+ if use_gamma:
23
+ f = etree.SubElement(defs, 'filter')
24
+ f.set('id', 'gamma')
25
+ f.set('x', '0')
26
+ f.set('y', '0')
27
+ f.set('width', '100%')
28
+ f.set('height', '100%')
29
+ gamma = etree.SubElement(f, 'feComponentTransfer')
30
+ gamma.set('color-interpolation-filters', 'sRGB')
31
+ feFuncR = etree.SubElement(gamma, 'feFuncR')
32
+ feFuncR.set('type', 'gamma')
33
+ feFuncR.set('amplitude', str(1))
34
+ feFuncR.set('exponent', str(1/2.2))
35
+ feFuncG = etree.SubElement(gamma, 'feFuncG')
36
+ feFuncG.set('type', 'gamma')
37
+ feFuncG.set('amplitude', str(1))
38
+ feFuncG.set('exponent', str(1/2.2))
39
+ feFuncB = etree.SubElement(gamma, 'feFuncB')
40
+ feFuncB.set('type', 'gamma')
41
+ feFuncB.set('amplitude', str(1))
42
+ feFuncB.set('exponent', str(1/2.2))
43
+ feFuncA = etree.SubElement(gamma, 'feFuncA')
44
+ feFuncA.set('type', 'gamma')
45
+ feFuncA.set('amplitude', str(1))
46
+ feFuncA.set('exponent', str(1/2.2))
47
+ g.set('style', 'filter:url(#gamma)')
48
+ # Store color
49
+ for i, shape_group in enumerate(shape_groups):
50
+ def add_color(shape_color, name):
51
+ if isinstance(shape_color, pydiffvg.LinearGradient):
52
+ lg = shape_color
53
+ color = etree.SubElement(defs, 'linearGradient')
54
+ color.set('id', name)
55
+ color.set('x1', str(lg.begin[0].item()/width))
56
+ color.set('y1', str(lg.begin[1].item()/height))
57
+ color.set('x2', str(lg.end[0].item()/width))
58
+ color.set('y2', str(lg.end[1].item()/height))
59
+ offsets = lg.offsets.data.cpu().numpy()
60
+ stop_colors = lg.stop_colors.data.cpu().numpy()
61
+ for j in range(offsets.shape[0]):
62
+ stop = etree.SubElement(color, 'stop')
63
+ stop.set('offset', str(offsets[j]))
64
+ c = lg.stop_colors[j, :]
65
+ stop.set('stop-color', 'rgb({}, {}, {})'.format(\
66
+ int(255 * c[0]), int(255 * c[1]), int(255 * c[2])))
67
+ stop.set('stop-opacity', '{}'.format(c[3]))
68
+ if isinstance(shape_color, pydiffvg.RadialGradient):
69
+ lg = shape_color
70
+ color = etree.SubElement(defs, 'radialGradient')
71
+ color.set('id', name)
72
+ color.set('cx', str(lg.center[0].item()/width))
73
+ color.set('cy', str(lg.center[1].item()/height))
74
+ # this only support width=height
75
+ color.set('r', str(lg.radius[0].item()/width))
76
+ offsets = lg.offsets.data.cpu().numpy()
77
+ stop_colors = lg.stop_colors.data.cpu().numpy()
78
+ for j in range(offsets.shape[0]):
79
+ stop = etree.SubElement(color, 'stop')
80
+ stop.set('offset', str(offsets[j]))
81
+ c = lg.stop_colors[j, :]
82
+ stop.set('stop-color', 'rgb({}, {}, {})'.format(\
83
+ int(255 * c[0]), int(255 * c[1]), int(255 * c[2])))
84
+ stop.set('stop-opacity', '{}'.format(c[3]))
85
+ if shape_group.fill_color is not None:
86
+ add_color(shape_group.fill_color, 'shape_{}_fill'.format(i))
87
+ if shape_group.stroke_color is not None:
88
+ add_color(shape_group.stroke_color, 'shape_{}_stroke'.format(i))
89
+ for i, shape_group in enumerate(shape_groups):
90
+ # shape = shapes[shape_group.shape_ids[0]]
91
+ for j,id in enumerate(shape_group.shape_ids):
92
+ shape = shapes[id]
93
+ if isinstance(shape, pydiffvg.Path):
94
+ if j == 0:
95
+ shape_node = etree.SubElement(g, 'path')
96
+ path_str = ''
97
+ # shape_node = etree.SubElement(g, 'path')
98
+ num_segments = shape.num_control_points.shape[0]
99
+ num_control_points = shape.num_control_points.data.cpu().numpy()
100
+ points = shape.points.data.cpu().numpy()
101
+ num_points = shape.points.shape[0]
102
+ path_str += 'M {} {}'.format(points[0, 0], points[0, 1])
103
+ point_id = 1
104
+ for j in range(0, num_segments):
105
+ if num_control_points[j] == 0:
106
+ p = point_id % num_points
107
+ path_str += ' L {} {}'.format(\
108
+ points[p, 0], points[p, 1])
109
+ point_id += 1
110
+ elif num_control_points[j] == 1:
111
+ p1 = (point_id + 1) % num_points
112
+ path_str += ' Q {} {} {} {}'.format(\
113
+ points[point_id, 0], points[point_id, 1],
114
+ points[p1, 0], points[p1, 1])
115
+ point_id += 2
116
+ elif num_control_points[j] == 2:
117
+ p2 = (point_id + 2) % num_points
118
+ path_str += ' C {} {} {} {} {} {}'.format(\
119
+ points[point_id, 0], points[point_id, 1],
120
+ points[point_id + 1, 0], points[point_id + 1, 1],
121
+ points[p2, 0], points[p2, 1])
122
+ point_id += 3
123
+ else:
124
+ assert(False)
125
+ # shape_node.set('stroke-width', str(2 * shape.stroke_width.data.cpu().item()))
126
+ shape_node.set('stroke-width', str(0)) # no strokes
127
+ if shape_group.fill_color is not None:
128
+ if isinstance(shape_group.fill_color, pydiffvg.LinearGradient):
129
+ shape_node.set('fill', 'url(#shape_{}_fill)'.format(i))
130
+ elif isinstance(shape_group.fill_color, pydiffvg.RadialGradient):
131
+ shape_node.set('fill', 'url(#shape_{}_fill)'.format(i))
132
+ else:
133
+ c = shape_group.fill_color.data.cpu().numpy()
134
+ shape_node.set('fill', 'rgb({}, {}, {})'.format(\
135
+ int(255 * c[0]), int(255 * c[1]), int(255 * c[2])))
136
+ shape_node.set('opacity', str(c[3]))
137
+ else:
138
+ shape_node.set('fill', 'none')
139
+ if shape_group.stroke_color is not None:
140
+ if isinstance(shape_group.stroke_color, pydiffvg.LinearGradient):
141
+ shape_node.set('stroke', 'url(#shape_{}_stroke)'.format(i))
142
+ elif isinstance(shape_group.stroke_color, pydiffvg.LinearGradient):
143
+ shape_node.set('stroke', 'url(#shape_{}_stroke)'.format(i))
144
+ else:
145
+ c = shape_group.stroke_color.data.cpu().numpy()
146
+ shape_node.set('stroke', 'rgb({}, {}, {})'.format(\
147
+ int(255 * c[0]), int(255 * c[1]), int(255 * c[2])))
148
+ shape_node.set('stroke-opacity', str(c[3]))
149
+ shape_node.set('stroke-linecap', 'round')
150
+ shape_node.set('stroke-linejoin', 'round')
151
+
152
+ shape_node.set('d', path_str)
153
+
154
+ with open(filename, "w") as f:
155
+ f.write(prettify(root))
code/ttf.py ADDED
@@ -0,0 +1,265 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from importlib import reload
2
+ import os
3
+ import numpy as np
4
+ import bezier
5
+ import freetype as ft
6
+ import pydiffvg
7
+ import torch
8
+ import save_svg
9
+
10
+ device = torch.device("cuda" if (
11
+ torch.cuda.is_available() and torch.cuda.device_count() > 0) else "cpu")
12
+
13
+ reload(bezier)
14
+
15
+ def fix_single_svg(svg_path, all_word=False):
16
+ target_h_letter = 360
17
+ target_canvas_width, target_canvas_height = 600, 600
18
+
19
+ canvas_width, canvas_height, shapes, shape_groups = pydiffvg.svg_to_scene(svg_path)
20
+
21
+ letter_h = canvas_height
22
+ letter_w = canvas_width
23
+
24
+ if all_word:
25
+ if letter_w > letter_h:
26
+ scale_canvas_w = target_h_letter / letter_w
27
+ hsize = int(letter_h * scale_canvas_w)
28
+ scale_canvas_h = hsize / letter_h
29
+ else:
30
+ scale_canvas_h = target_h_letter / letter_h
31
+ wsize = int(letter_w * scale_canvas_h)
32
+ scale_canvas_w = wsize / letter_w
33
+ else:
34
+ scale_canvas_h = target_h_letter / letter_h
35
+ wsize = int(letter_w * scale_canvas_h)
36
+ scale_canvas_w = wsize / letter_w
37
+
38
+ for num, p in enumerate(shapes):
39
+ p.points[:, 0] = p.points[:, 0] * scale_canvas_w
40
+ p.points[:, 1] = p.points[:, 1] * scale_canvas_h + target_h_letter
41
+
42
+ w_min, w_max = min([torch.min(p.points[:, 0]) for p in shapes]), max([torch.max(p.points[:, 0]) for p in shapes])
43
+ h_min, h_max = min([torch.min(p.points[:, 1]) for p in shapes]), max([torch.max(p.points[:, 1]) for p in shapes])
44
+
45
+ for num, p in enumerate(shapes):
46
+ p.points[:, 0] = p.points[:, 0] + target_canvas_width/2 - int(w_min + (w_max - w_min) / 2)
47
+ p.points[:, 1] = p.points[:, 1] + target_canvas_height/2 - int(h_min + (h_max - h_min) / 2)
48
+
49
+ output_path = f"{svg_path[:-4]}_scaled.svg"
50
+ save_svg.save_svg(output_path, target_canvas_width, target_canvas_height, shapes, shape_groups)
51
+
52
+
53
+ def normalize_letter_size(dest_path, font, txt):
54
+ fontname = os.path.splitext(os.path.basename(font))[0]
55
+ for i, c in enumerate(txt):
56
+ fname = f"{dest_path}/{fontname}_{c}.svg"
57
+ fname = fname.replace(" ", "_")
58
+ fix_single_svg(fname)
59
+
60
+ fname = f"{dest_path}/{fontname}_{txt}.svg"
61
+ fname = fname.replace(" ", "_")
62
+ fix_single_svg(fname, all_word=True)
63
+
64
+
65
+ def glyph_to_cubics(face, x=0):
66
+ ''' Convert current font face glyph to cubic beziers'''
67
+
68
+ def linear_to_cubic(Q):
69
+ a, b = Q
70
+ return [a + (b - a) * t for t in np.linspace(0, 1, 4)]
71
+
72
+ def quadratic_to_cubic(Q):
73
+ return [Q[0],
74
+ Q[0] + (2 / 3) * (Q[1] - Q[0]),
75
+ Q[2] + (2 / 3) * (Q[1] - Q[2]),
76
+ Q[2]]
77
+
78
+ beziers = []
79
+ pt = lambda p: np.array([p.x + x, -p.y]) # Flipping here since freetype has y-up
80
+ last = lambda: beziers[-1][-1]
81
+
82
+ def move_to(a, beziers):
83
+ beziers.append([pt(a)])
84
+
85
+ def line_to(a, beziers):
86
+ Q = linear_to_cubic([last(), pt(a)])
87
+ beziers[-1] += Q[1:]
88
+
89
+ def conic_to(a, b, beziers):
90
+ Q = quadratic_to_cubic([last(), pt(a), pt(b)])
91
+ beziers[-1] += Q[1:]
92
+
93
+ def cubic_to(a, b, c, beziers):
94
+ beziers[-1] += [pt(a), pt(b), pt(c)]
95
+
96
+ face.glyph.outline.decompose(beziers, move_to=move_to, line_to=line_to, conic_to=conic_to, cubic_to=cubic_to)
97
+ beziers = [np.array(C).astype(float) for C in beziers]
98
+ return beziers
99
+
100
+
101
+ def font_string_to_beziers(font, txt, size=30, spacing=1.0, merge=True, target_control=None):
102
+ ''' Load a font and convert the outlines for a given string to cubic bezier curves,
103
+ if merge is True, simply return a list of all bezier curves,
104
+ otherwise return a list of lists with the bezier curves for each glyph'''
105
+
106
+ face = ft.Face(font)
107
+ face.set_char_size(64 * size)
108
+ slot = face.glyph
109
+
110
+ x = 0
111
+ beziers = []
112
+ previous = 0
113
+ for c in txt:
114
+ face.load_char(c, ft.FT_LOAD_DEFAULT | ft.FT_LOAD_NO_BITMAP)
115
+ bez = glyph_to_cubics(face, x)
116
+
117
+ # Check number of control points if desired
118
+ if target_control is not None:
119
+ if c in target_control.keys():
120
+ nctrl = np.sum([len(C) for C in bez])
121
+ while nctrl < target_control[c]:
122
+ longest = np.max(
123
+ sum([[bezier.approx_arc_length(b) for b in bezier.chain_to_beziers(C)] for C in bez], []))
124
+ thresh = longest * 0.5
125
+ bez = [bezier.subdivide_bezier_chain(C, thresh) for C in bez]
126
+ nctrl = np.sum([len(C) for C in bez])
127
+ print(nctrl)
128
+
129
+ if merge:
130
+ beziers += bez
131
+ else:
132
+ beziers.append(bez)
133
+
134
+ kerning = face.get_kerning(previous, c)
135
+ x += (slot.advance.x + kerning.x) * spacing
136
+ previous = c
137
+
138
+ return beziers
139
+
140
+
141
+ def bezier_chain_to_commands(C, closed=True):
142
+ curves = bezier.chain_to_beziers(C)
143
+ cmds = 'M %f %f ' % (C[0][0], C[0][1])
144
+ n = len(curves)
145
+ for i, bez in enumerate(curves):
146
+ if i == n - 1 and closed:
147
+ cmds += 'C %f %f %f %f %f %fz ' % (*bez[1], *bez[2], *bez[3])
148
+ else:
149
+ cmds += 'C %f %f %f %f %f %f ' % (*bez[1], *bez[2], *bez[3])
150
+ return cmds
151
+
152
+
153
+ def count_cp(file_name, font_name):
154
+ canvas_width, canvas_height, shapes, shape_groups = pydiffvg.svg_to_scene(file_name)
155
+ p_counter = 0
156
+ for path in shapes:
157
+ p_counter += path.points.shape[0]
158
+ print(f"TOTAL CP: [{p_counter}]")
159
+ return p_counter
160
+
161
+
162
+ def write_letter_svg(c, header, fontname, beziers, subdivision_thresh, dest_path):
163
+ cmds = ''
164
+ svg = header
165
+
166
+ path = '<g><path d="'
167
+ for C in beziers:
168
+ if subdivision_thresh is not None:
169
+ print('subd')
170
+ C = bezier.subdivide_bezier_chain(C, subdivision_thresh)
171
+ cmds += bezier_chain_to_commands(C, True)
172
+ path += cmds + '"/>\n'
173
+ svg += path + '</g></svg>\n'
174
+
175
+ fname = f"{dest_path}/{fontname}_{c}.svg"
176
+ fname = fname.replace(" ", "_")
177
+ f = open(fname, 'w')
178
+ f.write(svg)
179
+ f.close()
180
+ return fname, path
181
+
182
+
183
+ def font_string_to_svgs(dest_path, font, txt, size=30, spacing=1.0, target_control=None, subdivision_thresh=None):
184
+
185
+ fontname = os.path.splitext(os.path.basename(font))[0]
186
+ glyph_beziers = font_string_to_beziers(font, txt, size, spacing, merge=False, target_control=target_control)
187
+ if not os.path.isdir(dest_path):
188
+ os.mkdir(dest_path)
189
+ # Compute boundig box
190
+ points = np.vstack(sum(glyph_beziers, []))
191
+ lt = np.min(points, axis=0)
192
+ rb = np.max(points, axis=0)
193
+ size = rb - lt
194
+
195
+ sizestr = 'width="%.1f" height="%.1f"' % (size[0], size[1])
196
+ boxstr = ' viewBox="%.1f %.1f %.1f %.1f"' % (lt[0], lt[1], size[0], size[1])
197
+ header = '''<?xml version="1.0" encoding="utf-8"?>
198
+ <svg xmlns="http://www.w3.org/2000/svg" xmlns:ev="http://www.w3.org/2001/xml-events" xmlns:xlink="http://www.w3.org/1999/xlink" version="1.1" baseProfile="full" '''
199
+ header += sizestr
200
+ header += boxstr
201
+ header += '>\n<defs/>\n'
202
+
203
+ svg_all = header
204
+
205
+ for i, (c, beziers) in enumerate(zip(txt, glyph_beziers)):
206
+ print(f"==== {c} ====")
207
+ fname, path = write_letter_svg(c, header, fontname, beziers, subdivision_thresh, dest_path)
208
+
209
+ num_cp = count_cp(fname, fontname)
210
+ print(num_cp)
211
+ print(font, c)
212
+ # Add to global svg
213
+ svg_all += path + '</g>\n'
214
+
215
+ # Save global svg
216
+ svg_all += '</svg>\n'
217
+ fname = f"{dest_path}/{fontname}_{txt}.svg"
218
+ fname = fname.replace(" ", "_")
219
+ f = open(fname, 'w')
220
+ f.write(svg_all)
221
+ f.close()
222
+
223
+
224
+ if __name__ == '__main__':
225
+
226
+ fonts = ["KaushanScript-Regular"]
227
+ level_of_cc = 1
228
+
229
+ if level_of_cc == 0:
230
+ target_cp = None
231
+
232
+ else:
233
+ target_cp = {"A": 120, "B": 120, "C": 100, "D": 100,
234
+ "E": 120, "F": 120, "G": 120, "H": 120,
235
+ "I": 35, "J": 80, "K": 100, "L": 80,
236
+ "M": 100, "N": 100, "O": 100, "P": 120,
237
+ "Q": 120, "R": 130, "S": 110, "T": 90,
238
+ "U": 100, "V": 100, "W": 100, "X": 130,
239
+ "Y": 120, "Z": 120,
240
+ "a": 120, "b": 120, "c": 100, "d": 100,
241
+ "e": 120, "f": 120, "g": 120, "h": 120,
242
+ "i": 35, "j": 80, "k": 100, "l": 80,
243
+ "m": 100, "n": 100, "o": 100, "p": 120,
244
+ "q": 120, "r": 130, "s": 110, "t": 90,
245
+ "u": 100, "v": 100, "w": 100, "x": 130,
246
+ "y": 120, "z": 120
247
+ }
248
+
249
+ target_cp = {k: v * level_of_cc for k, v in target_cp.items()}
250
+
251
+ for f in fonts:
252
+ print(f"======= {f} =======")
253
+ font_path = f"data/fonts/{f}.ttf"
254
+ output_path = f"data/init"
255
+ txt = "BUNNY"
256
+ subdivision_thresh = None
257
+ font_string_to_svgs(output_path, font_path, txt, target_control=target_cp,
258
+ subdivision_thresh=subdivision_thresh)
259
+ normalize_letter_size(output_path, font_path, txt)
260
+
261
+ print("DONE")
262
+
263
+
264
+
265
+
code/utils.py ADDED
@@ -0,0 +1,221 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import collections.abc
2
+ import os
3
+ import os.path as osp
4
+ from torch import nn
5
+ import kornia.augmentation as K
6
+ import pydiffvg
7
+ import save_svg
8
+ import cv2
9
+ from ttf import font_string_to_svgs, normalize_letter_size
10
+ import torch
11
+ import numpy as np
12
+
13
+
14
+ def edict_2_dict(x):
15
+ if isinstance(x, dict):
16
+ xnew = {}
17
+ for k in x:
18
+ xnew[k] = edict_2_dict(x[k])
19
+ return xnew
20
+ elif isinstance(x, list):
21
+ xnew = []
22
+ for i in range(len(x)):
23
+ xnew.append( edict_2_dict(x[i]))
24
+ return xnew
25
+ else:
26
+ return x
27
+
28
+
29
+ def check_and_create_dir(path):
30
+ pathdir = osp.split(path)[0]
31
+ if osp.isdir(pathdir):
32
+ pass
33
+ else:
34
+ os.makedirs(pathdir)
35
+
36
+
37
+ def update(d, u):
38
+ """https://stackoverflow.com/questions/3232943/update-value-of-a-nested-dictionary-of-varying-depth"""
39
+ for k, v in u.items():
40
+ if isinstance(v, collections.abc.Mapping):
41
+ d[k] = update(d.get(k, {}), v)
42
+ else:
43
+ d[k] = v
44
+ return d
45
+
46
+
47
+ def preprocess(font, word, letter, level_of_cc=1):
48
+
49
+ if level_of_cc == 0:
50
+ target_cp = None
51
+ else:
52
+ target_cp = {"A": 120, "B": 120, "C": 100, "D": 100,
53
+ "E": 120, "F": 120, "G": 120, "H": 120,
54
+ "I": 35, "J": 80, "K": 100, "L": 80,
55
+ "M": 100, "N": 100, "O": 100, "P": 120,
56
+ "Q": 120, "R": 130, "S": 110, "T": 90,
57
+ "U": 100, "V": 100, "W": 100, "X": 130,
58
+ "Y": 120, "Z": 120,
59
+ "a": 120, "b": 120, "c": 100, "d": 100,
60
+ "e": 120, "f": 120, "g": 120, "h": 120,
61
+ "i": 35, "j": 80, "k": 100, "l": 80,
62
+ "m": 100, "n": 100, "o": 100, "p": 120,
63
+ "q": 120, "r": 130, "s": 110, "t": 90,
64
+ "u": 100, "v": 100, "w": 100, "x": 130,
65
+ "y": 120, "z": 120
66
+ }
67
+ target_cp = {k: v * level_of_cc for k, v in target_cp.items()}
68
+
69
+ print(f"======= {font} =======")
70
+ font_path = f"code/data/fonts/{font}.ttf"
71
+ init_path = f"code/data/init"
72
+ subdivision_thresh = None
73
+ font_string_to_svgs(init_path, font_path, word, target_control=target_cp,
74
+ subdivision_thresh=subdivision_thresh)
75
+ normalize_letter_size(init_path, font_path, word)
76
+
77
+ # optimaize two adjacent letters
78
+ if len(letter) > 1:
79
+ subdivision_thresh = None
80
+ font_string_to_svgs(init_path, font_path, letter, target_control=target_cp,
81
+ subdivision_thresh=subdivision_thresh)
82
+ normalize_letter_size(init_path, font_path, letter)
83
+
84
+ print("Done preprocess")
85
+
86
+
87
+ def get_data_augs(cut_size):
88
+ augmentations = []
89
+ augmentations.append(K.RandomPerspective(distortion_scale=0.5, p=0.7))
90
+ augmentations.append(K.RandomCrop(size=(cut_size, cut_size), pad_if_needed=True, padding_mode='reflect', p=1.0))
91
+ return nn.Sequential(*augmentations)
92
+
93
+
94
+ '''pytorch adaptation of https://github.com/google/mipnerf'''
95
+ def learning_rate_decay(step,
96
+ lr_init,
97
+ lr_final,
98
+ max_steps,
99
+ lr_delay_steps=0,
100
+ lr_delay_mult=1):
101
+ """Continuous learning rate decay function.
102
+ The returned rate is lr_init when step=0 and lr_final when step=max_steps, and
103
+ is log-linearly interpolated elsewhere (equivalent to exponential decay).
104
+ If lr_delay_steps>0 then the learning rate will be scaled by some smooth
105
+ function of lr_delay_mult, such that the initial learning rate is
106
+ lr_init*lr_delay_mult at the beginning of optimization but will be eased back
107
+ to the normal learning rate when steps>lr_delay_steps.
108
+ Args:
109
+ step: int, the current optimization step.
110
+ lr_init: float, the initial learning rate.
111
+ lr_final: float, the final learning rate.
112
+ max_steps: int, the number of steps during optimization.
113
+ lr_delay_steps: int, the number of steps to delay the full learning rate.
114
+ lr_delay_mult: float, the multiplier on the rate when delaying it.
115
+ Returns:
116
+ lr: the learning for current step 'step'.
117
+ """
118
+ if lr_delay_steps > 0:
119
+ # A kind of reverse cosine decay.
120
+ delay_rate = lr_delay_mult + (1 - lr_delay_mult) * np.sin(
121
+ 0.5 * np.pi * np.clip(step / lr_delay_steps, 0, 1))
122
+ else:
123
+ delay_rate = 1.
124
+ t = np.clip(step / max_steps, 0, 1)
125
+ log_lerp = np.exp(np.log(lr_init) * (1 - t) + np.log(lr_final) * t)
126
+ return delay_rate * log_lerp
127
+
128
+
129
+
130
+ def save_image(img, filename, gamma=1):
131
+ check_and_create_dir(filename)
132
+ imshow = img.detach().cpu()
133
+ pydiffvg.imwrite(imshow, filename, gamma=gamma)
134
+
135
+
136
+ def get_letter_ids(letter, word, shape_groups):
137
+ for group, l in zip(shape_groups, word):
138
+ if l == letter:
139
+ return group.shape_ids
140
+
141
+
142
+ def combine_word(word, letter, font, experiment_dir):
143
+ word_svg_scaled = f"./code/data/init/{font}_{word}_scaled.svg"
144
+ canvas_width_word, canvas_height_word, shapes_word, shape_groups_word = pydiffvg.svg_to_scene(word_svg_scaled)
145
+ letter_ids = []
146
+ for l in letter:
147
+ letter_ids += get_letter_ids(l, word, shape_groups_word)
148
+
149
+ w_min, w_max = min([torch.min(shapes_word[ids].points[:, 0]) for ids in letter_ids]), max(
150
+ [torch.max(shapes_word[ids].points[:, 0]) for ids in letter_ids])
151
+ h_min, h_max = min([torch.min(shapes_word[ids].points[:, 1]) for ids in letter_ids]), max(
152
+ [torch.max(shapes_word[ids].points[:, 1]) for ids in letter_ids])
153
+
154
+ c_w = (-w_min + w_max) / 2
155
+ c_h = (-h_min + h_max) / 2
156
+
157
+ svg_result = os.path.join(experiment_dir, "output-svg", "output.svg")
158
+ canvas_width, canvas_height, shapes, shape_groups = pydiffvg.svg_to_scene(svg_result)
159
+
160
+ out_w_min, out_w_max = min([torch.min(p.points[:, 0]) for p in shapes]), max(
161
+ [torch.max(p.points[:, 0]) for p in shapes])
162
+ out_h_min, out_h_max = min([torch.min(p.points[:, 1]) for p in shapes]), max(
163
+ [torch.max(p.points[:, 1]) for p in shapes])
164
+
165
+ out_c_w = (-out_w_min + out_w_max) / 2
166
+ out_c_h = (-out_h_min + out_h_max) / 2
167
+
168
+ scale_canvas_w = (w_max - w_min) / (out_w_max - out_w_min)
169
+ scale_canvas_h = (h_max - h_min) / (out_h_max - out_h_min)
170
+
171
+ if scale_canvas_h > scale_canvas_w:
172
+ wsize = int((out_w_max - out_w_min) * scale_canvas_h)
173
+ scale_canvas_w = wsize / (out_w_max - out_w_min)
174
+ shift_w = -out_c_w * scale_canvas_w + c_w
175
+ else:
176
+ hsize = int((out_h_max - out_h_min) * scale_canvas_w)
177
+ scale_canvas_h = hsize / (out_h_max - out_h_min)
178
+ shift_h = -out_c_h * scale_canvas_h + c_h
179
+
180
+ for num, p in enumerate(shapes):
181
+ p.points[:, 0] = p.points[:, 0] * scale_canvas_w
182
+ p.points[:, 1] = p.points[:, 1] * scale_canvas_h
183
+ if scale_canvas_h > scale_canvas_w:
184
+ p.points[:, 0] = p.points[:, 0] - out_w_min * scale_canvas_w + w_min + shift_w
185
+ p.points[:, 1] = p.points[:, 1] - out_h_min * scale_canvas_h + h_min
186
+ else:
187
+ p.points[:, 0] = p.points[:, 0] - out_w_min * scale_canvas_w + w_min
188
+ p.points[:, 1] = p.points[:, 1] - out_h_min * scale_canvas_h + h_min + shift_h
189
+
190
+ for j, s in enumerate(letter_ids):
191
+ shapes_word[s] = shapes[j]
192
+
193
+ save_svg.save_svg(
194
+ f"{experiment_dir}/{font}_{word}_{letter}.svg", canvas_width, canvas_height, shapes_word,
195
+ shape_groups_word)
196
+
197
+ render = pydiffvg.RenderFunction.apply
198
+ scene_args = pydiffvg.RenderFunction.serialize_scene(canvas_width, canvas_height, shapes_word, shape_groups_word)
199
+ img = render(canvas_width, canvas_height, 2, 2, 0, None, *scene_args)
200
+ img = img[:, :, 3:4] * img[:, :, :3] + \
201
+ torch.ones(img.shape[0], img.shape[1], 3, device="cuda:0") * (1 - img[:, :, 3:4])
202
+ img = img[:, :, :3]
203
+ save_image(img, f"{experiment_dir}/{font}_{word}_{letter}.png")
204
+
205
+
206
+ def create_video(num_iter, experiment_dir, video_frame_freq):
207
+ img_array = []
208
+ for ii in range(0, num_iter):
209
+ if ii % video_frame_freq == 0 or ii == num_iter - 1:
210
+ filename = os.path.join(
211
+ experiment_dir, "video-png", f"iter{ii:04d}.png")
212
+ img = cv2.imread(filename)
213
+ img_array.append(img)
214
+
215
+ video_name = os.path.join(
216
+ experiment_dir, "video.mp4")
217
+ check_and_create_dir(video_name)
218
+ out = cv2.VideoWriter(video_name, cv2.VideoWriter_fourcc(*'mp4v'), 30.0, (600, 600))
219
+ for iii in range(len(img_array)):
220
+ out.write(img_array[iii])
221
+ out.release()
coming_soon.png ADDED

Git LFS Details

  • SHA256: cccc22b97840a5007871339b4061934c6554f419f4366a60e22b009cc72b0bde
  • Pointer size: 131 Bytes
  • Size of remote file: 127 kB
images/HobeauxRococeaux-Sherman_NATURE_T.svg ADDED
images/KaushanScript-Regular_BUNNY_Y.svg ADDED
images/teaser.png ADDED

Git LFS Details

  • SHA256: 3e40d0508fb77a4c2a47cb8aa38c07b10eff75480af239a50eecba9479c1bcdc
  • Pointer size: 130 Bytes
  • Size of remote file: 69.7 kB
requirements.txt ADDED
@@ -0,0 +1,148 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ accelerate==1.0.1
2
+ annotated-types==0.7.0
3
+ anyio==4.5.2
4
+ Brotli @ file:///croot/brotli-split_1714483155106/work
5
+ certifi==2025.6.15
6
+ charset-normalizer==3.4.2
7
+ click @ file:///croot/click_1698129812380/work
8
+ cloudpickle @ file:///croot/cloudpickle_1721657346512/work
9
+ coloredlogs==15.0.1
10
+ contourpy==1.1.1
11
+ cssutils==2.11.1
12
+ cycler==0.12.1
13
+ cytoolz @ file:///croot/cytoolz_1701723583781/work
14
+ dask @ file:///croot/dask-core_1683065217061/work
15
+ diffusers==0.29.2
16
+ diffvg==0.0.1
17
+ dnspython==2.6.1
18
+ easydict==1.13
19
+ email_validator==2.2.0
20
+ eval_type_backport==0.2.2
21
+ exceptiongroup==1.3.0
22
+ fastapi==0.115.13
23
+ fastapi-cli==0.0.7
24
+ filelock==3.16.1
25
+ fonttools==4.57.0
26
+ freetype-py==2.5.1
27
+ fsspec==2025.3.0
28
+ ftfy==6.2.3
29
+ git-lfs==1.6
30
+ gitdb==4.0.12
31
+ GitPython==3.1.44
32
+ greenlet==3.1.1
33
+ h11==0.16.0
34
+ hf-xet==1.1.5
35
+ httpcore==1.0.9
36
+ httptools==0.6.4
37
+ httpx==0.28.1
38
+ huggingface_hub @ file:///home/conda/feedstock_root/build_artifacts/huggingface_hub_1733636895686/work
39
+ humanfriendly==10.0
40
+ idna==3.10
41
+ imagecodecs @ file:///croot/imagecodecs_1695064943445/work
42
+ imageio @ file:///croot/imageio_1707247282708/work
43
+ imageio-ffmpeg==0.5.1
44
+ importlib_metadata==8.5.0
45
+ importlib_resources==6.4.5
46
+ itsdangerous==2.2.0
47
+ Jinja2==3.1.6
48
+ jsonpatch==1.33
49
+ jsonpointer==3.0.0
50
+ kiwisolver==1.4.7
51
+ kornia==0.6.8
52
+ llvmlite==0.41.1
53
+ locket @ file:///opt/conda/conda-bld/locket_1652903118915/work
54
+ markdown-it-py==3.0.0
55
+ MarkupSafe==2.1.5
56
+ matplotlib==3.7.5
57
+ mdurl==0.1.2
58
+ mkl-fft @ file:///croot/mkl_fft_1695058164594/work
59
+ mkl-random @ file:///croot/mkl_random_1695059800811/work
60
+ mkl-service==2.4.0
61
+ more-itertools==10.5.0
62
+ mpmath==1.3.0
63
+ networkx==3.1
64
+ numba==0.58.1
65
+ numpy==1.24.4
66
+ nvidia-cublas-cu12==12.1.3.1
67
+ nvidia-cuda-cupti-cu12==12.1.105
68
+ nvidia-cuda-nvrtc-cu12==12.1.105
69
+ nvidia-cuda-runtime-cu12==12.1.105
70
+ nvidia-cudnn-cu12==9.1.0.70
71
+ nvidia-cufft-cu12==11.0.2.54
72
+ nvidia-curand-cu12==10.3.2.106
73
+ nvidia-cusolver-cu12==11.4.5.107
74
+ nvidia-cusparse-cu12==12.1.0.106
75
+ nvidia-nccl-cu12==2.20.5
76
+ nvidia-nvjitlink-cu12==12.9.86
77
+ nvidia-nvtx-cu12==12.1.105
78
+ opencv-python==4.5.4.60
79
+ orjson==3.10.15
80
+ packaging==25.0
81
+ pandas==2.0.3
82
+ partd @ file:///croot/partd_1698702562572/work
83
+ pillow==10.4.0
84
+ platformdirs @ file:///croot/platformdirs_1692205439124/work
85
+ pooch @ file:///croot/pooch_1695850093751/work
86
+ protobuf==5.29.5
87
+ psutil==7.0.0
88
+ pyaml==25.5.0
89
+ pydantic==2.10.6
90
+ pydantic-extra-types==2.10.5
91
+ pydantic-settings==2.8.1
92
+ pydantic_core==2.27.2
93
+ Pygments==2.19.2
94
+ pyparsing==3.1.4
95
+ PySocks @ file:///tmp/build/80754af9/pysocks_1605305779399/work
96
+ python-dateutil==2.9.0.post0
97
+ python-dotenv==1.0.1
98
+ python-multipart==0.0.20
99
+ pytz==2025.2
100
+ PyWavelets @ file:///croot/pywavelets_1670425177960/work
101
+ PyYAML==6.0.2
102
+ regex==2024.11.6
103
+ requests==2.32.4
104
+ rich==14.0.0
105
+ rich-toolkit==0.14.7
106
+ safetensors==0.5.3
107
+ scikit-fmm==2024.5.29
108
+ scikit-image @ file:///croot/scikit-image_1669241743693/work
109
+ scipy==1.10.1
110
+ seaborn==0.13.2
111
+ sentry-sdk==2.31.0
112
+ setproctitle==1.3.6
113
+ shapely==2.0.7
114
+ shellingham==1.5.4
115
+ six==1.17.0
116
+ smmap==5.0.2
117
+ sniffio==1.3.1
118
+ SQLAlchemy==2.0.41
119
+ starlette==0.44.0
120
+ svgpathtools==1.7.1
121
+ svgwrite==1.4.3
122
+ sympy==1.13.3
123
+ tifffile @ file:///croot/tifffile_1695107451082/work
124
+ tokenizers==0.20.3
125
+ toolz @ file:///croot/toolz_1667464077321/work
126
+ torch==2.4.1
127
+ torch-tools==0.1.5
128
+ torchaudio==2.4.1+cu121
129
+ torchvision==0.19.1+cu121
130
+ tornado==6.4.2
131
+ tqdm==4.67.1
132
+ transformers==4.46.3
133
+ triton==3.0.0
134
+ typer==0.16.0
135
+ typing_extensions==4.13.2
136
+ tzdata==2025.2
137
+ ujson==5.10.0
138
+ urllib3==2.2.3
139
+ uvicorn==0.33.0
140
+ uvloop==0.21.0
141
+ visdom==0.2.4
142
+ wandb==0.20.1
143
+ watchfiles==0.24.0
144
+ wcwidth==0.2.13
145
+ websocket-client==1.8.0
146
+ websockets==13.1
147
+ xformers==0.0.28.post1
148
+ zipp==3.20.2
rest_api.py ADDED
@@ -0,0 +1,22 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from fastapi import FastAPI, HTTPException
2
+ from pydantic import BaseModel
3
+ import wai_service
4
+
5
+ app = FastAPI()
6
+
7
+ class InferenceRequest(BaseModel):
8
+ word: str
9
+ optimized_letter: str
10
+ font: str = "KaushanScript-Regular"
11
+ seed: int = 0
12
+
13
+ class Config:
14
+ extra = "allow"
15
+
16
+ @app.post("/generate")
17
+ def generate(req: InferenceRequest):
18
+ try:
19
+ img_b64 = wai_service.handler(req.dict())
20
+ return {"image_base64": img_b64}
21
+ except Exception as e:
22
+ raise HTTPException(status_code=500, detail=str(e))
run_word_as_image.sh ADDED
@@ -0,0 +1,25 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ #!/bin/bash
2
+
3
+ set -e
4
+
5
+ USE_WANDB=0 # CHANGE IF YOU WANT WANDB
6
+ WANDB_USER="none"
7
+
8
+ EXPERIMENT=conformal_0.5_dist_pixel_100_kernel201
9
+
10
+ CONCEPT=BUNNY
11
+ WORD=BUNNY
12
+ fonts=(KaushanScript-Regular)
13
+ for j in "${fonts[@]}"
14
+ do
15
+ letter_=("Y")
16
+ SEED=0
17
+ for i in "${letter_[@]}"
18
+ do
19
+ echo "$i"
20
+ font_name=$j
21
+ ARGS="--experiment $EXPERIMENT --optimized_letter ${i} --seed $SEED --font ${font_name} --use_wandb ${USE_WANDB} --wandb_user ${WANDB_USER}"
22
+ CUDA_VISIBLE_DEVICES=0 python code/main.py $ARGS --semantic_concept "${CONCEPT}" --word "${WORD}"
23
+ done
24
+ done
25
+
wai_service.py ADDED
@@ -0,0 +1,68 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # --- wai_service.py (final handler) ---------------------------------
2
+ import base64, sys, os, torch
3
+ sys.path.append(os.path.join(os.path.dirname(__file__), "code"))
4
+
5
+ from code.config import set_config
6
+ from code.main import generate_word_image
7
+ from easydict import EasyDict
8
+
9
+ device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
10
+
11
+ # flags that *are* recognised by code/config.parse_args()
12
+ KNOWN_CLI_KEYS = {
13
+ "word",
14
+ "optimized_letter",
15
+ "font",
16
+ "seed",
17
+ "experiment",
18
+ "use_wandb",
19
+ "wandb_user",
20
+ }
21
+
22
+ def _sanitize(cfg):
23
+ """
24
+ Recursively walk an EasyDict / dict and replace every Ellipsis (`...`)
25
+ with None. Returns the same object (in-place).
26
+ """
27
+ if isinstance(cfg, dict):
28
+ for k, v in cfg.items():
29
+ if v is Ellipsis:
30
+ cfg[k] = None
31
+ else:
32
+ _sanitize(v)
33
+ return cfg
34
+
35
+
36
+ def handler(payload: dict) -> str:
37
+ # 1️⃣ Build fake argv *only* from recognised keys
38
+ cli_argv = [sys.argv[0]]
39
+ for k in KNOWN_CLI_KEYS & payload.keys():
40
+ cli_argv += [f"--{k}", str(payload[k])]
41
+
42
+ orig_argv = sys.argv[:]
43
+ try:
44
+ sys.argv = cli_argv
45
+ cfg = set_config() # EasyDict with YAML + CLI-compatible fields
46
+ finally:
47
+ sys.argv = orig_argv
48
+
49
+ _sanitize(cfg)
50
+
51
+ # 2️⃣ Overlay ALL payload keys (new ones like render_size stick)
52
+ for k, v in payload.items():
53
+ setattr(cfg, k, v)
54
+
55
+ # sensible defaults
56
+ cfg.render_size = getattr(cfg, "render_size", 384) # <= set here
57
+ cfg.word = cfg.word.upper()
58
+ cfg.optimized_letter = getattr(cfg, "optimized_letter", cfg.word[-1])
59
+ if getattr(cfg.diffusion, "model", ...) is Ellipsis:
60
+ cfg.diffusion.model = "runwayml/stable-diffusion-v1-5"
61
+
62
+ # 3️⃣ Run optimisation
63
+ out_path = generate_word_image(cfg, device)
64
+
65
+ # 4️⃣ Return base-64
66
+ with open(out_path, "rb") as f:
67
+ return base64.b64encode(f.read()).decode()
68
+ # --------------------------------------------------------------------