+13
Cargo.lock
+13
Cargo.lock
···
699
699
checksum = "f4c7245a08504955605670dbf141fceab975f15ca21570696aebe9d2e71576bd"
700
700
701
701
[[package]]
702
+
name = "ipld-core"
703
+
version = "0.4.2"
704
+
source = "registry+https://github.com/rust-lang/crates.io-index"
705
+
checksum = "104718b1cc124d92a6d01ca9c9258a7df311405debb3408c445a36452f9bf8db"
706
+
dependencies = [
707
+
"cid",
708
+
"serde",
709
+
"serde_bytes",
710
+
]
711
+
712
+
[[package]]
702
713
name = "is_terminal_polyfill"
703
714
version = "1.70.1"
704
715
source = "registry+https://github.com/rust-lang/crates.io-index"
···
722
733
name = "jacquard-common"
723
734
version = "0.1.0"
724
735
dependencies = [
736
+
"base64",
725
737
"bytes",
726
738
"chrono",
727
739
"cid",
728
740
"enum_dispatch",
741
+
"ipld-core",
729
742
"langtag",
730
743
"miette",
731
744
"multibase",
+2
crates/jacquard-common/Cargo.toml
+2
crates/jacquard-common/Cargo.toml
···
12
12
description.workspace = true
13
13
14
14
[dependencies]
15
+
base64 = "0.22.1"
15
16
bytes = "1.10.1"
16
17
chrono = "0.4.42"
17
18
cid = { version = "0.11.1", features = ["serde", "std"] }
18
19
enum_dispatch = "0.3.13"
20
+
ipld-core = { version = "0.4.2", features = ["serde"] }
19
21
langtag = { version = "0.4.0", features = ["serde"] }
20
22
miette = "7.6.0"
21
23
multibase = "0.9.1"
+2
crates/jacquard-common/src/types.rs
+2
crates/jacquard-common/src/types.rs
+413
-3
crates/jacquard-common/src/types/value.rs
+413
-3
crates/jacquard-common/src/types/value.rs
···
1
+
use base64::{
2
+
Engine,
3
+
prelude::{BASE64_STANDARD, BASE64_STANDARD_NO_PAD, BASE64_URL_SAFE, BASE64_URL_SAFE_NO_PAD},
4
+
};
1
5
use bytes::Bytes;
2
6
use serde::{Deserialize, Deserializer, Serialize, Serializer};
3
-
use smol_str::SmolStr;
4
-
use std::collections::BTreeMap;
7
+
use smol_str::{SmolStr, ToSmolStr};
8
+
use std::{collections::BTreeMap, str::FromStr};
9
+
use url::Url;
5
10
6
-
use crate::types::{blob::Blob, string::*};
11
+
use crate::types::{
12
+
DataModelType, LexiconStringType,
13
+
blob::{Blob, MimeType},
14
+
string::*,
15
+
};
7
16
8
17
#[derive(Debug, Clone, PartialEq, Eq)]
9
18
pub enum Data<'s> {
···
18
27
Blob(Blob<'s>),
19
28
}
20
29
30
+
impl<'s> Data<'s> {
31
+
pub fn from_json(json: &'s serde_json::Value) -> Self {
32
+
if let Some(value) = json.as_bool() {
33
+
Self::Boolean(value)
34
+
} else if let Some(value) = json.as_i64() {
35
+
Self::Integer(value)
36
+
} else if let Some(value) = json.as_str() {
37
+
Self::String(AtprotoStr::new(value))
38
+
} else if let Some(value) = json.as_array() {
39
+
Self::Array(Array::from_json(value))
40
+
} else if let Some(value) = json.as_object() {
41
+
Object::from_json(value)
42
+
} else if let Some(num) = json.as_number() {
43
+
// deliberately permissive here, just in case.
44
+
Self::String(AtprotoStr::new_owned(num.to_smolstr()))
45
+
} else {
46
+
Self::Null
47
+
}
48
+
}
49
+
}
50
+
21
51
#[derive(Debug, Clone, PartialEq, Eq)]
22
52
pub struct Array<'s>(pub Vec<Data<'s>>);
23
53
54
+
impl<'s> Array<'s> {
55
+
pub fn from_json(json: &'s Vec<serde_json::Value>) -> Self {
56
+
let mut array = Vec::with_capacity(json.len());
57
+
for item in json {
58
+
array.push(Data::from_json(item));
59
+
}
60
+
Self(array)
61
+
}
62
+
}
63
+
24
64
#[derive(Debug, Clone, PartialEq, Eq)]
25
65
pub struct Object<'s>(pub BTreeMap<SmolStr, Data<'s>>);
66
+
67
+
impl<'s> Object<'s> {
68
+
pub fn from_json(json: &'s serde_json::Map<String, serde_json::Value>) -> Data<'s> {
69
+
if let Some(type_field) = json.get("$type").and_then(|v| v.as_str()) {
70
+
if infer_from_type(type_field) == DataModelType::Blob {
71
+
if let Some(blob) = json_to_blob(json) {
72
+
return Data::Blob(blob);
73
+
}
74
+
}
75
+
}
76
+
let mut map = BTreeMap::new();
77
+
78
+
for (key, value) in json {
79
+
if key == "$type" {
80
+
continue; // skip, because we've already handled it
81
+
}
82
+
match string_key_type_guess(key) {
83
+
DataModelType::Null => {
84
+
if value.is_null() {
85
+
map.insert(key.to_smolstr(), Data::Null);
86
+
} else {
87
+
map.insert(key.to_smolstr(), Data::from_json(value));
88
+
}
89
+
}
90
+
DataModelType::Boolean => {
91
+
if let Some(value) = value.as_bool() {
92
+
map.insert(key.to_smolstr(), Data::Boolean(value));
93
+
} else {
94
+
map.insert(key.to_smolstr(), Data::from_json(value));
95
+
}
96
+
}
97
+
DataModelType::Integer => {
98
+
if let Some(int) = value.as_i64() {
99
+
map.insert(key.to_smolstr(), Data::Integer(int));
100
+
} else {
101
+
map.insert(key.to_smolstr(), Data::from_json(value));
102
+
}
103
+
}
104
+
DataModelType::Bytes => {
105
+
if let Some(value) = value.as_str() {
106
+
map.insert(key.to_smolstr(), decode_bytes(value));
107
+
} else {
108
+
map.insert(key.to_smolstr(), Data::from_json(value));
109
+
}
110
+
}
111
+
DataModelType::CidLink => {
112
+
if let Some(value) = value.as_str() {
113
+
map.insert(
114
+
key.to_smolstr(),
115
+
Data::String(AtprotoStr::Cid(Cid::Str(value.into()))),
116
+
);
117
+
} else {
118
+
map.insert(key.to_smolstr(), Data::from_json(value));
119
+
}
120
+
}
121
+
DataModelType::Blob => {
122
+
if let Some(value) = value.as_object() {
123
+
map.insert(key.to_smolstr(), Object::from_json(value));
124
+
} else {
125
+
map.insert(key.to_smolstr(), Data::from_json(value));
126
+
}
127
+
}
128
+
DataModelType::Array => {
129
+
if let Some(value) = value.as_array() {
130
+
map.insert(key.to_smolstr(), Data::Array(Array::from_json(value)));
131
+
} else {
132
+
map.insert(key.to_smolstr(), Data::from_json(value));
133
+
}
134
+
}
135
+
DataModelType::Object => {
136
+
if let Some(value) = value.as_object() {
137
+
map.insert(key.to_smolstr(), Object::from_json(value));
138
+
} else {
139
+
map.insert(key.to_smolstr(), Data::from_json(value));
140
+
}
141
+
}
142
+
DataModelType::String(string_type) => {
143
+
if let Some(value) = value.as_str() {
144
+
match string_type {
145
+
LexiconStringType::Datetime => {
146
+
if let Ok(datetime) = Datetime::from_str(value) {
147
+
map.insert(
148
+
key.to_smolstr(),
149
+
Data::String(AtprotoStr::Datetime(datetime)),
150
+
);
151
+
} else {
152
+
map.insert(
153
+
key.to_smolstr(),
154
+
Data::String(AtprotoStr::String(value.into())),
155
+
);
156
+
}
157
+
}
158
+
LexiconStringType::AtUri => {
159
+
if let Ok(value) = AtUri::new(value) {
160
+
map.insert(
161
+
key.to_smolstr(),
162
+
Data::String(AtprotoStr::AtUri(value)),
163
+
);
164
+
} else {
165
+
map.insert(
166
+
key.to_smolstr(),
167
+
Data::String(AtprotoStr::String(value.into())),
168
+
);
169
+
}
170
+
}
171
+
LexiconStringType::Did => {
172
+
if let Ok(value) = Did::new(value) {
173
+
map.insert(
174
+
key.to_smolstr(),
175
+
Data::String(AtprotoStr::Did(value)),
176
+
);
177
+
} else {
178
+
map.insert(
179
+
key.to_smolstr(),
180
+
Data::String(AtprotoStr::String(value.into())),
181
+
);
182
+
}
183
+
}
184
+
LexiconStringType::Handle => {
185
+
if let Ok(value) = Handle::new(value) {
186
+
map.insert(
187
+
key.to_smolstr(),
188
+
Data::String(AtprotoStr::Handle(value)),
189
+
);
190
+
} else {
191
+
map.insert(
192
+
key.to_smolstr(),
193
+
Data::String(AtprotoStr::String(value.into())),
194
+
);
195
+
}
196
+
}
197
+
LexiconStringType::AtIdentifier => {
198
+
if let Ok(value) = AtIdentifier::new(value) {
199
+
map.insert(
200
+
key.to_smolstr(),
201
+
Data::String(AtprotoStr::AtIdentifier(value)),
202
+
);
203
+
} else {
204
+
map.insert(
205
+
key.to_smolstr(),
206
+
Data::String(AtprotoStr::String(value.into())),
207
+
);
208
+
}
209
+
}
210
+
LexiconStringType::Nsid => {
211
+
if let Ok(value) = Nsid::new(value) {
212
+
map.insert(
213
+
key.to_smolstr(),
214
+
Data::String(AtprotoStr::Nsid(value)),
215
+
);
216
+
} else {
217
+
map.insert(
218
+
key.to_smolstr(),
219
+
Data::String(AtprotoStr::String(value.into())),
220
+
);
221
+
}
222
+
}
223
+
LexiconStringType::Cid => {
224
+
if let Ok(value) = Cid::new(value.as_bytes()) {
225
+
map.insert(
226
+
key.to_smolstr(),
227
+
Data::String(AtprotoStr::Cid(value)),
228
+
);
229
+
} else {
230
+
map.insert(
231
+
key.to_smolstr(),
232
+
Data::String(AtprotoStr::String(value.into())),
233
+
);
234
+
}
235
+
}
236
+
LexiconStringType::Language => {
237
+
if let Ok(value) = Language::new(value) {
238
+
map.insert(
239
+
key.to_smolstr(),
240
+
Data::String(AtprotoStr::Language(value)),
241
+
);
242
+
} else {
243
+
map.insert(
244
+
key.to_smolstr(),
245
+
Data::String(AtprotoStr::String(value.into())),
246
+
);
247
+
}
248
+
}
249
+
LexiconStringType::Tid => {
250
+
if let Ok(value) = Tid::new(value) {
251
+
map.insert(
252
+
key.to_smolstr(),
253
+
Data::String(AtprotoStr::Tid(value)),
254
+
);
255
+
} else {
256
+
map.insert(
257
+
key.to_smolstr(),
258
+
Data::String(AtprotoStr::String(value.into())),
259
+
);
260
+
}
261
+
}
262
+
LexiconStringType::RecordKey => {
263
+
if let Ok(value) = Rkey::new(value) {
264
+
map.insert(
265
+
key.to_smolstr(),
266
+
Data::String(AtprotoStr::RecordKey(RecordKey::from(value))),
267
+
);
268
+
} else {
269
+
map.insert(
270
+
key.to_smolstr(),
271
+
Data::String(AtprotoStr::String(value.into())),
272
+
);
273
+
}
274
+
}
275
+
LexiconStringType::Uri(_) => {
276
+
if let Ok(uri) = Uri::new(value) {
277
+
map.insert(
278
+
key.to_smolstr(),
279
+
Data::String(AtprotoStr::Uri(uri)),
280
+
);
281
+
} else {
282
+
map.insert(
283
+
key.to_smolstr(),
284
+
Data::String(AtprotoStr::String(value.into())),
285
+
);
286
+
}
287
+
}
288
+
LexiconStringType::String => {
289
+
map.insert(key.to_smolstr(), Data::String(parse_string(value)));
290
+
}
291
+
}
292
+
} else {
293
+
map.insert(key.to_smolstr(), Data::from_json(value));
294
+
}
295
+
}
296
+
}
297
+
}
298
+
299
+
Data::Object(Object(map))
300
+
}
301
+
302
+
//pub fn from_cbor(cbor: BTreeMap<String, ipld_core::ipld::Ipld>) -> Self {}
303
+
}
304
+
305
+
/// smarter parsing to avoid trying as many posibilities.
306
+
pub fn parse_string<'s>(string: &'s str) -> AtprotoStr<'s> {
307
+
if string.len() < 2048 && string.starts_with("did:") {
308
+
if let Ok(did) = Did::new(string) {
309
+
return AtprotoStr::Did(did);
310
+
}
311
+
} else if string.starts_with("20") && string.ends_with("Z") {
312
+
// probably a date (for the next 75 years)
313
+
if let Ok(datetime) = Datetime::from_str(string) {
314
+
return AtprotoStr::Datetime(datetime);
315
+
}
316
+
} else if string.starts_with("at://") {
317
+
if let Ok(uri) = AtUri::new(string) {
318
+
return AtprotoStr::AtUri(uri);
319
+
}
320
+
} else if string.starts_with("https://") {
321
+
if let Ok(uri) = Url::parse(string) {
322
+
return AtprotoStr::Uri(Uri::Https(uri));
323
+
}
324
+
} else if string.starts_with("wss://") {
325
+
if let Ok(uri) = Url::parse(string) {
326
+
return AtprotoStr::Uri(Uri::Https(uri));
327
+
}
328
+
} else if string.starts_with("ipfs://") {
329
+
return AtprotoStr::Uri(Uri::Cid(Cid::str(string)));
330
+
} else if string.contains('.') && !string.contains([' ', '\n']) {
331
+
if string.len() < 253 && Url::parse(string).is_ok() {
332
+
// probably a handle
333
+
if let Ok(handle) = AtIdentifier::new(string) {
334
+
return AtprotoStr::AtIdentifier(handle);
335
+
} else {
336
+
return AtprotoStr::Uri(Uri::Any(string.into()));
337
+
}
338
+
} else if let Ok(nsid) = Nsid::new(string) {
339
+
return AtprotoStr::Nsid(nsid);
340
+
}
341
+
} else if string.len() == 13 {
342
+
if let Ok(tid) = Tid::new(string) {
343
+
return AtprotoStr::Tid(tid);
344
+
}
345
+
} else if !string.contains([' ', '\n']) {
346
+
// cid?
347
+
if let Ok(cid) = Cid::new(string.as_bytes()) {
348
+
return AtprotoStr::Cid(cid);
349
+
}
350
+
}
351
+
352
+
AtprotoStr::String(string.into())
353
+
}
354
+
355
+
/// First-level guess at what we should parse the corresponding value as
356
+
/// Helps speed up parsing, avoids some ambiguities.
357
+
pub fn string_key_type_guess(key: &str) -> DataModelType {
358
+
match key {
359
+
"cid" => DataModelType::String(LexiconStringType::Cid),
360
+
"uri" => DataModelType::String(LexiconStringType::Uri(super::UriType::Any)),
361
+
"did" => DataModelType::String(LexiconStringType::Did),
362
+
"handle" => DataModelType::String(LexiconStringType::AtIdentifier),
363
+
"ref" => DataModelType::CidLink,
364
+
"list" => DataModelType::String(LexiconStringType::AtUri),
365
+
"blobref" => DataModelType::Blob,
366
+
"createdAt" | "created" | "indexedAt" | "issuedAt" | "updatedAt" | "playedTime" => {
367
+
DataModelType::String(LexiconStringType::Datetime)
368
+
}
369
+
"size" | "width" | "height" => DataModelType::Integer,
370
+
"value" | "record" | "embed" => DataModelType::Object,
371
+
"text" | "displayName" | "alt" | "name" | "description" => {
372
+
DataModelType::String(LexiconStringType::String)
373
+
}
374
+
"langs" | "blobs" | "images" | "labels" => DataModelType::Array,
375
+
"$bytes" => DataModelType::Bytes,
376
+
"$link" => DataModelType::String(LexiconStringType::Cid),
377
+
"$type" => DataModelType::String(LexiconStringType::String),
378
+
379
+
// we assume others are strings speficially because it's easy to check if a serde_json::Value
380
+
// or Ipld value is at least a string, and then we fall back to Object/Map.
381
+
_ => DataModelType::String(LexiconStringType::String),
382
+
}
383
+
}
384
+
385
+
pub fn json_to_blob<'b>(blob: &'b serde_json::Map<String, serde_json::Value>) -> Option<Blob<'b>> {
386
+
let mime_type = blob.get("mimeType").and_then(|v| v.as_str());
387
+
if let Some(value) = blob.get("ref") {
388
+
if let Some(value) = value
389
+
.as_object()
390
+
.and_then(|o| o.get("$link"))
391
+
.and_then(|v| v.as_str())
392
+
{
393
+
let size = blob.get("size").and_then(|v| v.as_u64());
394
+
if let (Some(mime_type), Some(size)) = (mime_type, size) {
395
+
return Some(Blob {
396
+
r#ref: Cid::str(value),
397
+
mime_type: MimeType::raw(mime_type),
398
+
size: size as usize,
399
+
});
400
+
}
401
+
}
402
+
} else if let Some(value) = blob.get("cid").and_then(|v| v.as_str()) {
403
+
if let Some(mime_type) = mime_type {
404
+
return Some(Blob {
405
+
r#ref: Cid::str(value),
406
+
mime_type: MimeType::raw(mime_type),
407
+
size: 0,
408
+
});
409
+
}
410
+
}
411
+
412
+
None
413
+
}
414
+
415
+
pub fn infer_from_type(type_field: &str) -> DataModelType {
416
+
match type_field {
417
+
"blob" => DataModelType::Blob,
418
+
_ => DataModelType::Object,
419
+
}
420
+
}
421
+
422
+
pub fn decode_bytes<'s>(bytes: &'s str) -> Data<'s> {
423
+
// First one should just work. rest are insurance.
424
+
if let Ok(bytes) = BASE64_STANDARD.decode(bytes) {
425
+
Data::Bytes(Bytes::from_owner(bytes))
426
+
} else if let Ok(bytes) = BASE64_STANDARD_NO_PAD.decode(bytes) {
427
+
Data::Bytes(Bytes::from_owner(bytes))
428
+
} else if let Ok(bytes) = BASE64_URL_SAFE.decode(bytes) {
429
+
Data::Bytes(Bytes::from_owner(bytes))
430
+
} else if let Ok(bytes) = BASE64_URL_SAFE_NO_PAD.decode(bytes) {
431
+
Data::Bytes(Bytes::from_owner(bytes))
432
+
} else {
433
+
Data::String(AtprotoStr::String(bytes.into()))
434
+
}
435
+
}