Forking what is left of ZeroNet and hopefully adding an AT Proto Frontend/Proxy
1import json
2import time
3import re
4import os
5import copy
6import base64
7import sys
8
9import gevent
10
11from Debug import Debug
12from Crypt import CryptHash
13from Config import config
14from util import helper
15from util import Diff
16from util import SafeRe
17from Peer import PeerHashfield
18from .ContentDbDict import ContentDbDict
19from Plugin import PluginManager
20
21
22class VerifyError(Exception):
23 pass
24
25
26class SignError(Exception):
27 pass
28
29
30@PluginManager.acceptPlugins
31class ContentManager(object):
32
33 def __init__(self, site):
34 self.site = site
35 self.log = self.site.log
36 self.contents = ContentDbDict(site)
37 self.hashfield = PeerHashfield()
38 self.has_optional_files = False
39
40 # Load all content.json files
41 def loadContents(self):
42 if len(self.contents) == 0:
43 self.log.info("ContentDb not initialized, load files from filesystem...")
44 self.loadContent(add_bad_files=False, delete_removed_files=False)
45 self.site.settings["size"], self.site.settings["size_optional"] = self.getTotalSize()
46
47 # Load hashfield cache
48 if "hashfield" in self.site.settings.get("cache", {}):
49 self.hashfield.frombytes(base64.b64decode(self.site.settings["cache"]["hashfield"]))
50 del self.site.settings["cache"]["hashfield"]
51 elif self.contents.get("content.json") and self.site.settings["size_optional"] > 0:
52 self.site.storage.updateBadFiles() # No hashfield cache created yet
53 self.has_optional_files = bool(self.hashfield)
54
55 self.contents.db.initSite(self.site)
56
57 def getFileChanges(self, old_files, new_files):
58 deleted = {key: val for key, val in old_files.items() if key not in new_files}
59 deleted_hashes = {val.get("sha512"): key for key, val in old_files.items() if key not in new_files}
60 added = {key: val for key, val in new_files.items() if key not in old_files}
61 renamed = {}
62 for relative_path, node in added.items():
63 hash = node.get("sha512")
64 if hash in deleted_hashes:
65 relative_path_old = deleted_hashes[hash]
66 renamed[relative_path_old] = relative_path
67 del(deleted[relative_path_old])
68 return list(deleted), renamed
69
70 # Load content.json to self.content
71 # Return: Changed files ["index.html", "data/messages.json"], Deleted files ["old.jpg"]
72 def loadContent(self, content_inner_path="content.json", add_bad_files=True, delete_removed_files=True, load_includes=True, force=False):
73 content_inner_path = content_inner_path.strip("/") # Remove / from beginning
74 old_content = self.contents.get(content_inner_path)
75 content_path = self.site.storage.getPath(content_inner_path)
76 content_dir = helper.getDirname(self.site.storage.getPath(content_inner_path))
77 content_inner_dir = helper.getDirname(content_inner_path)
78
79 if os.path.isfile(content_path):
80 try:
81 # Check if file is newer than what we have
82 if not force and old_content and not self.site.settings.get("own"):
83 for line in open(content_path):
84 if '"modified"' not in line:
85 continue
86 match = re.search(r"([0-9\.]+),$", line.strip(" \r\n"))
87 if match and float(match.group(1)) <= old_content.get("modified", 0):
88 self.log.debug("%s loadContent same json file, skipping" % content_inner_path)
89 return [], []
90
91 new_content = self.site.storage.loadJson(content_inner_path)
92 except Exception as err:
93 self.log.warning("%s load error: %s" % (content_path, Debug.formatException(err)))
94 return [], []
95 else:
96 self.log.debug("Content.json not exist: %s" % content_path)
97 return [], [] # Content.json not exist
98
99 try:
100 # Get the files where the sha512 changed
101 changed = []
102 deleted = []
103 # Check changed
104 for relative_path, info in new_content.get("files", {}).items():
105 if "sha512" in info:
106 hash_type = "sha512"
107 else: # Backward compatibility
108 hash_type = "sha1"
109
110 new_hash = info[hash_type]
111 if old_content and old_content["files"].get(relative_path): # We have the file in the old content
112 old_hash = old_content["files"][relative_path].get(hash_type)
113 else: # The file is not in the old content
114 old_hash = None
115 if old_hash != new_hash:
116 changed.append(content_inner_dir + relative_path)
117
118 # Check changed optional files
119 for relative_path, info in new_content.get("files_optional", {}).items():
120 file_inner_path = content_inner_dir + relative_path
121 new_hash = info["sha512"]
122 if old_content and old_content.get("files_optional", {}).get(relative_path):
123 # We have the file in the old content
124 old_hash = old_content["files_optional"][relative_path].get("sha512")
125 if old_hash != new_hash and self.site.isDownloadable(file_inner_path):
126 changed.append(file_inner_path) # Download new file
127 elif old_hash != new_hash and self.hashfield.hasHash(old_hash) and not self.site.settings.get("own"):
128 try:
129 old_hash_id = self.hashfield.getHashId(old_hash)
130 self.optionalRemoved(file_inner_path, old_hash_id, old_content["files_optional"][relative_path]["size"])
131 self.optionalDelete(file_inner_path)
132 self.log.debug("Deleted changed optional file: %s" % file_inner_path)
133 except Exception as err:
134 self.log.warning("Error deleting file %s: %s" % (file_inner_path, Debug.formatException(err)))
135 else: # The file is not in the old content
136 if self.site.isDownloadable(file_inner_path):
137 changed.append(file_inner_path) # Download new file
138
139 # Check deleted
140 if old_content:
141 old_files = dict(
142 old_content.get("files", {}),
143 **old_content.get("files_optional", {})
144 )
145
146 new_files = dict(
147 new_content.get("files", {}),
148 **new_content.get("files_optional", {})
149 )
150
151 deleted, renamed = self.getFileChanges(old_files, new_files)
152
153 for relative_path_old, relative_path_new in renamed.items():
154 self.log.debug("Renaming: %s -> %s" % (relative_path_old, relative_path_new))
155 if relative_path_new in new_content.get("files_optional", {}):
156 self.optionalRenamed(content_inner_dir + relative_path_old, content_inner_dir + relative_path_new)
157 if self.site.storage.isFile(relative_path_old):
158 try:
159 self.site.storage.rename(relative_path_old, relative_path_new)
160 if relative_path_new in changed:
161 changed.remove(relative_path_new)
162 self.log.debug("Renamed: %s -> %s" % (relative_path_old, relative_path_new))
163 except Exception as err:
164 self.log.warning("Error renaming file: %s -> %s %s" % (relative_path_old, relative_path_new, err))
165
166 if deleted and not self.site.settings.get("own"):
167 # Deleting files that no longer in content.json
168 for file_relative_path in deleted:
169 file_inner_path = content_inner_dir + file_relative_path
170 try:
171 # Check if the deleted file is optional
172 if old_content.get("files_optional") and old_content["files_optional"].get(file_relative_path):
173 self.optionalDelete(file_inner_path)
174 old_hash = old_content["files_optional"][file_relative_path].get("sha512")
175 if self.hashfield.hasHash(old_hash):
176 old_hash_id = self.hashfield.getHashId(old_hash)
177 self.optionalRemoved(file_inner_path, old_hash_id, old_content["files_optional"][file_relative_path]["size"])
178 else:
179 self.site.storage.delete(file_inner_path)
180
181 self.log.debug("Deleted file: %s" % file_inner_path)
182 except Exception as err:
183 self.log.debug("Error deleting file %s: %s" % (file_inner_path, Debug.formatException(err)))
184
185 # Cleanup empty dirs
186 tree = {root: [dirs, files] for root, dirs, files in os.walk(self.site.storage.getPath(content_inner_dir))}
187 for root in sorted(tree, key=len, reverse=True):
188 dirs, files = tree[root]
189 if dirs == [] and files == []:
190 root_inner_path = self.site.storage.getInnerPath(root.replace("\\", "/"))
191 self.log.debug("Empty directory: %s, cleaning up." % root_inner_path)
192 try:
193 self.site.storage.deleteDir(root_inner_path)
194 # Remove from tree dict to reflect changed state
195 tree[os.path.dirname(root)][0].remove(os.path.basename(root))
196 except Exception as err:
197 self.log.debug("Error deleting empty directory %s: %s" % (root_inner_path, err))
198
199 # Check archived
200 if old_content and "user_contents" in new_content and "archived" in new_content["user_contents"]:
201 old_archived = old_content.get("user_contents", {}).get("archived", {})
202 new_archived = new_content.get("user_contents", {}).get("archived", {})
203 self.log.debug("old archived: %s, new archived: %s" % (len(old_archived), len(new_archived)))
204 archived_changed = {
205 key: date_archived
206 for key, date_archived in new_archived.items()
207 if old_archived.get(key) != new_archived[key]
208 }
209 if archived_changed:
210 self.log.debug("Archived changed: %s" % archived_changed)
211 for archived_dirname, date_archived in archived_changed.items():
212 archived_inner_path = content_inner_dir + archived_dirname + "/content.json"
213 if self.contents.get(archived_inner_path, {}).get("modified", 0) < date_archived:
214 self.removeContent(archived_inner_path)
215 deleted += archived_inner_path
216 self.site.settings["size"], self.site.settings["size_optional"] = self.getTotalSize()
217
218 # Check archived before
219 if old_content and "user_contents" in new_content and "archived_before" in new_content["user_contents"]:
220 old_archived_before = old_content.get("user_contents", {}).get("archived_before", 0)
221 new_archived_before = new_content.get("user_contents", {}).get("archived_before", 0)
222 if old_archived_before != new_archived_before:
223 self.log.debug("Archived before changed: %s -> %s" % (old_archived_before, new_archived_before))
224
225 # Remove downloaded archived files
226 num_removed_contents = 0
227 for archived_inner_path in self.listModified(before=new_archived_before):
228 if archived_inner_path.startswith(content_inner_dir) and archived_inner_path != content_inner_path:
229 self.removeContent(archived_inner_path)
230 num_removed_contents += 1
231 self.site.settings["size"], self.site.settings["size_optional"] = self.getTotalSize()
232
233 # Remove archived files from download queue
234 num_removed_bad_files = 0
235 for bad_file in list(self.site.bad_files.keys()):
236 if bad_file.endswith("content.json"):
237 del self.site.bad_files[bad_file]
238 num_removed_bad_files += 1
239
240 if num_removed_bad_files > 0:
241 self.site.worker_manager.removeSolvedFileTasks(mark_as_good=False)
242 gevent.spawn(self.site.update, since=0)
243
244 self.log.debug("Archived removed contents: %s, removed bad files: %s" % (num_removed_contents, num_removed_bad_files))
245
246 # Load includes
247 if load_includes and "includes" in new_content:
248 for relative_path, info in list(new_content["includes"].items()):
249 include_inner_path = content_inner_dir + relative_path
250 if self.site.storage.isFile(include_inner_path): # Content.json exists, load it
251 include_changed, include_deleted = self.loadContent(
252 include_inner_path, add_bad_files=add_bad_files, delete_removed_files=delete_removed_files
253 )
254 if include_changed:
255 changed += include_changed # Add changed files
256 if include_deleted:
257 deleted += include_deleted # Add changed files
258 else: # Content.json not exist, add to changed files
259 self.log.debug("Missing include: %s" % include_inner_path)
260 changed += [include_inner_path]
261
262 # Load blind user includes (all subdir)
263 if load_includes and "user_contents" in new_content:
264 for relative_dir in os.listdir(content_dir):
265 include_inner_path = content_inner_dir + relative_dir + "/content.json"
266 if not self.site.storage.isFile(include_inner_path):
267 continue # Content.json not exist
268 include_changed, include_deleted = self.loadContent(
269 include_inner_path, add_bad_files=add_bad_files, delete_removed_files=delete_removed_files,
270 load_includes=False
271 )
272 if include_changed:
273 changed += include_changed # Add changed files
274 if include_deleted:
275 deleted += include_deleted # Add changed files
276
277 # Save some memory
278 new_content["signs"] = None
279 if "cert_sign" in new_content:
280 new_content["cert_sign"] = None
281
282 if new_content.get("files_optional"):
283 self.has_optional_files = True
284 # Update the content
285 self.contents[content_inner_path] = new_content
286 except Exception as err:
287 self.log.warning("%s parse error: %s" % (content_inner_path, Debug.formatException(err)))
288 return [], [] # Content.json parse error
289
290 # Add changed files to bad files
291 if add_bad_files:
292 for inner_path in changed:
293 self.site.bad_files[inner_path] = self.site.bad_files.get(inner_path, 0) + 1
294 for inner_path in deleted:
295 if inner_path in self.site.bad_files:
296 del self.site.bad_files[inner_path]
297 self.site.worker_manager.removeSolvedFileTasks()
298
299 if new_content.get("modified", 0) > self.site.settings.get("modified", 0):
300 # Dont store modifications in the far future (more than 10 minute)
301 self.site.settings["modified"] = min(time.time() + 60 * 10, new_content["modified"])
302
303 return changed, deleted
304
305 def removeContent(self, inner_path):
306 inner_dir = helper.getDirname(inner_path)
307 try:
308 content = self.contents[inner_path]
309 files = dict(
310 content.get("files", {}),
311 **content.get("files_optional", {})
312 )
313 except Exception as err:
314 self.log.debug("Error loading %s for removeContent: %s" % (inner_path, Debug.formatException(err)))
315 files = {}
316 files["content.json"] = True
317 # Deleting files that no longer in content.json
318 for file_relative_path in files:
319 file_inner_path = inner_dir + file_relative_path
320 try:
321 self.site.storage.delete(file_inner_path)
322 self.log.debug("Deleted file: %s" % file_inner_path)
323 except Exception as err:
324 self.log.debug("Error deleting file %s: %s" % (file_inner_path, err))
325 try:
326 self.site.storage.deleteDir(inner_dir)
327 except Exception as err:
328 self.log.debug("Error deleting dir %s: %s" % (inner_dir, err))
329
330 try:
331 del self.contents[inner_path]
332 except Exception as err:
333 self.log.debug("Error key from contents: %s" % inner_path)
334
335 # Get total size of site
336 # Return: 32819 (size of files in kb)
337 def getTotalSize(self, ignore=None):
338 return self.contents.db.getTotalSize(self.site, ignore)
339
340 def listModified(self, after=None, before=None):
341 return self.contents.db.listModified(self.site, after=after, before=before)
342
343 def listContents(self, inner_path="content.json", user_files=False):
344 if inner_path not in self.contents:
345 return []
346 back = [inner_path]
347 content_inner_dir = helper.getDirname(inner_path)
348 for relative_path in list(self.contents[inner_path].get("includes", {}).keys()):
349 include_inner_path = content_inner_dir + relative_path
350 back += self.listContents(include_inner_path)
351 return back
352
353 # Returns if file with the given modification date is archived or not
354 def isArchived(self, inner_path, modified):
355 match = re.match(r"(.*)/(.*?)/", inner_path)
356 if not match:
357 return False
358 user_contents_inner_path = match.group(1) + "/content.json"
359 relative_directory = match.group(2)
360
361 file_info = self.getFileInfo(user_contents_inner_path)
362 if file_info:
363 time_archived_before = file_info.get("archived_before", 0)
364 time_directory_archived = file_info.get("archived", {}).get(relative_directory, 0)
365 if modified <= time_archived_before or modified <= time_directory_archived:
366 return True
367 else:
368 return False
369 else:
370 return False
371
372 def isDownloaded(self, inner_path, hash_id=None):
373 if not hash_id:
374 file_info = self.getFileInfo(inner_path)
375 if not file_info or "sha512" not in file_info:
376 return False
377 hash_id = self.hashfield.getHashId(file_info["sha512"])
378 return hash_id in self.hashfield
379
380 # Is modified since signing
381 def isModified(self, inner_path):
382 s = time.time()
383 if inner_path.endswith("content.json"):
384 try:
385 is_valid = self.verifyFile(inner_path, self.site.storage.open(inner_path), ignore_same=False)
386 if is_valid:
387 is_modified = False
388 else:
389 is_modified = True
390 except VerifyError:
391 is_modified = True
392 else:
393 try:
394 self.verifyFile(inner_path, self.site.storage.open(inner_path), ignore_same=False)
395 is_modified = False
396 except VerifyError:
397 is_modified = True
398 return is_modified
399
400 # Find the file info line from self.contents
401 # Return: { "sha512": "c29d73d...21f518", "size": 41 , "content_inner_path": "content.json"}
402 def getFileInfo(self, inner_path, new_file=False):
403 dirs = inner_path.split("/") # Parent dirs of content.json
404 inner_path_parts = [dirs.pop()] # Filename relative to content.json
405 while True:
406 content_inner_path = "%s/content.json" % "/".join(dirs)
407 content_inner_path = content_inner_path.strip("/")
408 content = self.contents.get(content_inner_path)
409
410 # Check in files
411 if content and "files" in content:
412 back = content["files"].get("/".join(inner_path_parts))
413 if back:
414 back["content_inner_path"] = content_inner_path
415 back["optional"] = False
416 back["relative_path"] = "/".join(inner_path_parts)
417 return back
418
419 # Check in optional files
420 if content and "files_optional" in content: # Check if file in this content.json
421 back = content["files_optional"].get("/".join(inner_path_parts))
422 if back:
423 back["content_inner_path"] = content_inner_path
424 back["optional"] = True
425 back["relative_path"] = "/".join(inner_path_parts)
426 return back
427
428 # Return the rules if user dir
429 if content and "user_contents" in content:
430 back = content["user_contents"]
431 content_inner_path_dir = helper.getDirname(content_inner_path)
432 relative_content_path = inner_path[len(content_inner_path_dir):]
433 user_auth_address_match = re.match(r"([A-Za-z0-9]+)/.*", relative_content_path)
434 if user_auth_address_match:
435 user_auth_address = user_auth_address_match.group(1)
436 back["content_inner_path"] = "%s%s/content.json" % (content_inner_path_dir, user_auth_address)
437 else:
438 back["content_inner_path"] = content_inner_path_dir + "content.json"
439 back["optional"] = None
440 back["relative_path"] = "/".join(inner_path_parts)
441 return back
442
443 if new_file and content:
444 back = {}
445 back["content_inner_path"] = content_inner_path
446 back["relative_path"] = "/".join(inner_path_parts)
447 back["optional"] = None
448 return back
449
450 # No inner path in this dir, lets try the parent dir
451 if dirs:
452 inner_path_parts.insert(0, dirs.pop())
453 else: # No more parent dirs
454 break
455
456 # Not found
457 return False
458
459 # Get rules for the file
460 # Return: The rules for the file or False if not allowed
461 def getRules(self, inner_path, content=None):
462 if not inner_path.endswith("content.json"): # Find the files content.json first
463 file_info = self.getFileInfo(inner_path)
464 if not file_info:
465 return False # File not found
466 inner_path = file_info["content_inner_path"]
467
468 if inner_path == "content.json": # Root content.json
469 rules = {}
470 rules["signers"] = self.getValidSigners(inner_path, content)
471 return rules
472
473 dirs = inner_path.split("/") # Parent dirs of content.json
474 inner_path_parts = [dirs.pop()] # Filename relative to content.json
475 inner_path_parts.insert(0, dirs.pop()) # Dont check in self dir
476 while True:
477 content_inner_path = "%s/content.json" % "/".join(dirs)
478 parent_content = self.contents.get(content_inner_path.strip("/"))
479 if parent_content and "includes" in parent_content:
480 return parent_content["includes"].get("/".join(inner_path_parts))
481 elif parent_content and "user_contents" in parent_content:
482 return self.getUserContentRules(parent_content, inner_path, content)
483 else: # No inner path in this dir, lets try the parent dir
484 if dirs:
485 inner_path_parts.insert(0, dirs.pop())
486 else: # No more parent dirs
487 break
488
489 return False
490
491 # Get rules for a user file
492 # Return: The rules of the file or False if not allowed
493 def getUserContentRules(self, parent_content, inner_path, content):
494 user_contents = parent_content["user_contents"]
495
496 # Delivered for directory
497 if "inner_path" in parent_content:
498 parent_content_dir = helper.getDirname(parent_content["inner_path"])
499 user_address = re.match(r"([A-Za-z0-9]*?)/", inner_path[len(parent_content_dir):]).group(1)
500 else:
501 user_address = re.match(r".*/([A-Za-z0-9]*?)/.*?$", inner_path).group(1)
502
503 try:
504 if not content:
505 content = self.site.storage.loadJson(inner_path) # Read the file if no content specified
506 user_urn = "%s/%s" % (content["cert_auth_type"], content["cert_user_id"]) # web/nofish@zeroid.bit
507 cert_user_id = content["cert_user_id"]
508 except Exception: # Content.json not exist
509 user_urn = "n-a/n-a"
510 cert_user_id = "n-a"
511
512 if user_address in user_contents["permissions"]:
513 rules = copy.copy(user_contents["permissions"].get(user_address, {})) # Default rules based on address
514 else:
515 rules = copy.copy(user_contents["permissions"].get(cert_user_id, {})) # Default rules based on username
516
517 if rules is False:
518 banned = True
519 rules = {}
520 else:
521 banned = False
522 if "signers" in rules:
523 rules["signers"] = rules["signers"][:] # Make copy of the signers
524 for permission_pattern, permission_rules in list(user_contents["permission_rules"].items()): # Regexp rules
525 if not SafeRe.match(permission_pattern, user_urn):
526 continue # Rule is not valid for user
527 # Update rules if its better than current recorded ones
528 for key, val in permission_rules.items():
529 if key not in rules:
530 if type(val) is list:
531 rules[key] = val[:] # Make copy
532 else:
533 rules[key] = val
534 elif type(val) is int: # Int, update if larger
535 if val > rules[key]:
536 rules[key] = val
537 elif hasattr(val, "startswith"): # String, update if longer
538 if len(val) > len(rules[key]):
539 rules[key] = val
540 elif type(val) is list: # List, append
541 rules[key] += val
542
543 # Accepted cert signers
544 rules["cert_signers"] = user_contents.get("cert_signers", {})
545 rules["cert_signers_pattern"] = user_contents.get("cert_signers_pattern")
546
547 if "signers" not in rules:
548 rules["signers"] = []
549
550 if not banned:
551 rules["signers"].append(user_address) # Add user as valid signer
552 rules["user_address"] = user_address
553 rules["includes_allowed"] = False
554
555 return rules
556
557 # Get diffs for changed files
558 def getDiffs(self, inner_path, limit=30 * 1024, update_files=True):
559 if inner_path not in self.contents:
560 return {}
561 diffs = {}
562 content_inner_path_dir = helper.getDirname(inner_path)
563 for file_relative_path in self.contents[inner_path].get("files", {}):
564 file_inner_path = content_inner_path_dir + file_relative_path
565 if self.site.storage.isFile(file_inner_path + "-new"): # New version present
566 diffs[file_relative_path] = Diff.diff(
567 list(self.site.storage.open(file_inner_path)),
568 list(self.site.storage.open(file_inner_path + "-new")),
569 limit=limit
570 )
571 if update_files:
572 self.site.storage.delete(file_inner_path)
573 self.site.storage.rename(file_inner_path + "-new", file_inner_path)
574 if self.site.storage.isFile(file_inner_path + "-old"): # Old version present
575 diffs[file_relative_path] = Diff.diff(
576 list(self.site.storage.open(file_inner_path + "-old")),
577 list(self.site.storage.open(file_inner_path)),
578 limit=limit
579 )
580 if update_files:
581 self.site.storage.delete(file_inner_path + "-old")
582 return diffs
583
584 def hashFile(self, dir_inner_path, file_relative_path, optional=False):
585 back = {}
586 file_inner_path = dir_inner_path + "/" + file_relative_path
587
588 file_path = self.site.storage.getPath(file_inner_path)
589 file_size = os.path.getsize(file_path)
590 sha512sum = CryptHash.sha512sum(file_path) # Calculate sha512 sum of file
591 if optional and not self.hashfield.hasHash(sha512sum):
592 self.optionalDownloaded(file_inner_path, self.hashfield.getHashId(sha512sum), file_size, own=True)
593
594 back[file_relative_path] = {"sha512": sha512sum, "size": os.path.getsize(file_path)}
595 return back
596
597 def isValidRelativePath(self, relative_path):
598 if ".." in relative_path.replace("\\", "/").split("/"):
599 return False
600 elif len(relative_path) > 255:
601 return False
602 elif relative_path[0] in ("/", "\\"): # Starts with
603 return False
604 elif relative_path[-1] in (".", " "): # Ends with
605 return False
606 elif re.match(r".*(^|/)(CON|PRN|AUX|NUL|COM[1-9]|LPT[1-9]|CONOUT\$|CONIN\$)(\.|/|$)", relative_path, re.IGNORECASE): # Protected on Windows
607 return False
608 else:
609 return re.match(r"^[^\x00-\x1F\"*:<>?\\|]+$", relative_path)
610
611 def sanitizePath(self, inner_path):
612 return re.sub("[\x00-\x1F\"*:<>?\\|]", "", inner_path)
613
614 # Hash files in directory
615 def hashFiles(self, dir_inner_path, ignore_pattern=None, optional_pattern=None):
616 files_node = {}
617 files_optional_node = {}
618 db_inner_path = self.site.storage.getDbFile()
619 if dir_inner_path and not self.isValidRelativePath(dir_inner_path):
620 ignored = True
621 self.log.error("- [ERROR] Only ascii encoded directories allowed: %s" % dir_inner_path)
622
623 for file_relative_path in self.site.storage.walk(dir_inner_path, ignore_pattern):
624 file_name = helper.getFilename(file_relative_path)
625
626 ignored = optional = False
627 if file_name == "content.json":
628 ignored = True
629 elif file_name.startswith(".") or file_name.endswith("-old") or file_name.endswith("-new"):
630 ignored = True
631 elif not self.isValidRelativePath(file_relative_path):
632 ignored = True
633 self.log.error("- [ERROR] Invalid filename: %s" % file_relative_path)
634 elif dir_inner_path == "" and db_inner_path and file_relative_path.startswith(db_inner_path):
635 ignored = True
636 elif optional_pattern and SafeRe.match(optional_pattern, file_relative_path):
637 optional = True
638
639 if ignored: # Ignore content.json, defined regexp and files starting with .
640 self.log.info("- [SKIPPED] %s" % file_relative_path)
641 else:
642 if optional:
643 self.log.info("- [OPTIONAL] %s" % file_relative_path)
644 files_optional_node.update(
645 self.hashFile(dir_inner_path, file_relative_path, optional=True)
646 )
647 else:
648 self.log.info("- %s" % file_relative_path)
649 files_node.update(
650 self.hashFile(dir_inner_path, file_relative_path)
651 )
652 return files_node, files_optional_node
653
654 # Create and sign a content.json
655 # Return: The new content if filewrite = False
656 def sign(self, inner_path="content.json", privatekey=None, filewrite=True, update_changed_files=False, extend=None, remove_missing_optional=False):
657 if not inner_path.endswith("content.json"):
658 raise SignError("Invalid file name, you can only sign content.json files")
659
660 if inner_path in self.contents:
661 content = self.contents.get(inner_path)
662 if content and content.get("cert_sign", False) is None and self.site.storage.isFile(inner_path):
663 # Recover cert_sign from file
664 content["cert_sign"] = self.site.storage.loadJson(inner_path).get("cert_sign")
665 else:
666 content = None
667 if not content: # Content not exist yet, load default one
668 self.log.info("File %s not exist yet, loading default values..." % inner_path)
669
670 if self.site.storage.isFile(inner_path):
671 content = self.site.storage.loadJson(inner_path)
672 if "files" not in content:
673 content["files"] = {}
674 if "signs" not in content:
675 content["signs"] = {}
676 else:
677 content = {"files": {}, "signs": {}} # Default content.json
678
679 if inner_path == "content.json": # It's the root content.json, add some more fields
680 content["title"] = "%s - ZeroNet_" % self.site.address
681 content["description"] = ""
682 content["signs_required"] = 1
683 content["ignore"] = ""
684
685 if extend:
686 # Add extend keys if not exists
687 for key, val in list(extend.items()):
688 if not content.get(key):
689 content[key] = val
690 self.log.info("Extending content.json with: %s" % key)
691
692 directory = helper.getDirname(self.site.storage.getPath(inner_path))
693 inner_directory = helper.getDirname(inner_path)
694 self.log.info("Opening site data directory: %s..." % directory)
695
696 changed_files = [inner_path]
697 files_node, files_optional_node = self.hashFiles(
698 helper.getDirname(inner_path), content.get("ignore"), content.get("optional")
699 )
700
701 if not remove_missing_optional:
702 for file_inner_path, file_details in content.get("files_optional", {}).items():
703 if file_inner_path not in files_optional_node:
704 files_optional_node[file_inner_path] = file_details
705
706 # Find changed files
707 files_merged = files_node.copy()
708 files_merged.update(files_optional_node)
709 for file_relative_path, file_details in files_merged.items():
710 old_hash = content.get("files", {}).get(file_relative_path, {}).get("sha512")
711 new_hash = files_merged[file_relative_path]["sha512"]
712 if old_hash != new_hash:
713 changed_files.append(inner_directory + file_relative_path)
714
715 self.log.debug("Changed files: %s" % changed_files)
716 if update_changed_files:
717 for file_path in changed_files:
718 self.site.storage.onUpdated(file_path)
719
720 # Generate new content.json
721 self.log.info("Adding timestamp and sha512sums to new content.json...")
722
723 new_content = content.copy() # Create a copy of current content.json
724 new_content["files"] = files_node # Add files sha512 hash
725 if files_optional_node:
726 new_content["files_optional"] = files_optional_node
727 elif "files_optional" in new_content:
728 del new_content["files_optional"]
729
730 new_content["modified"] = int(time.time()) # Add timestamp
731 if inner_path == "content.json":
732 new_content["zeronet_version"] = config.version
733 new_content["signs_required"] = content.get("signs_required", 1)
734
735 new_content["address"] = self.site.address
736 new_content["inner_path"] = inner_path
737
738 # Verify private key
739 from Crypt import CryptBitcoin
740 self.log.info("Verifying private key...")
741 privatekey_address = CryptBitcoin.privatekeyToAddress(privatekey)
742 valid_signers = self.getValidSigners(inner_path, new_content)
743 if privatekey_address not in valid_signers:
744 raise SignError(
745 "Private key invalid! Valid signers: %s, Private key address: %s" %
746 (valid_signers, privatekey_address)
747 )
748 self.log.info("Correct %s in valid signers: %s" % (privatekey_address, valid_signers))
749
750 if inner_path == "content.json" and privatekey_address == self.site.address:
751 # If signing using the root key, then sign the valid signers
752 signers_data = "%s:%s" % (new_content["signs_required"], ",".join(valid_signers))
753 new_content["signers_sign"] = CryptBitcoin.sign(str(signers_data), privatekey)
754 if not new_content["signers_sign"]:
755 self.log.info("Old style address, signers_sign is none")
756
757 self.log.info("Signing %s..." % inner_path)
758
759 if "signs" in new_content:
760 del(new_content["signs"]) # Delete old signs
761 if "sign" in new_content:
762 del(new_content["sign"]) # Delete old sign (backward compatibility)
763
764 sign_content = json.dumps(new_content, sort_keys=True)
765 sign = CryptBitcoin.sign(sign_content, privatekey)
766 # new_content["signs"] = content.get("signs", {}) # TODO: Multisig
767 if sign: # If signing is successful (not an old address)
768 new_content["signs"] = {}
769 new_content["signs"][privatekey_address] = sign
770
771 self.verifyContent(inner_path, new_content)
772
773 if filewrite:
774 self.log.info("Saving to %s..." % inner_path)
775 self.site.storage.writeJson(inner_path, new_content)
776 self.contents[inner_path] = new_content
777
778 self.log.info("File %s signed!" % inner_path)
779
780 if filewrite: # Written to file
781 return True
782 else: # Return the new content
783 return new_content
784
785 # The valid signers of content.json file
786 # Return: ["1KRxE1s3oDyNDawuYWpzbLUwNm8oDbeEp6", "13ReyhCsjhpuCVahn1DHdf6eMqqEVev162"]
787 def getValidSigners(self, inner_path, content=None):
788 valid_signers = []
789 if inner_path == "content.json": # Root content.json
790 if "content.json" in self.contents and "signers" in self.contents["content.json"]:
791 valid_signers += self.contents["content.json"]["signers"][:]
792 else:
793 rules = self.getRules(inner_path, content)
794 if rules and "signers" in rules:
795 valid_signers += rules["signers"]
796
797 if self.site.address not in valid_signers:
798 valid_signers.append(self.site.address) # Site address always valid
799 return valid_signers
800
801 # Return: The required number of valid signs for the content.json
802 def getSignsRequired(self, inner_path, content=None):
803 return 1 # Todo: Multisig
804
805 def verifyCertSign(self, user_address, user_auth_type, user_name, issuer_address, sign):
806 from Crypt import CryptBitcoin
807 cert_subject = "%s#%s/%s" % (user_address, user_auth_type, user_name)
808 return CryptBitcoin.verify(cert_subject, issuer_address, sign)
809
810 def verifyCert(self, inner_path, content):
811 rules = self.getRules(inner_path, content)
812
813 if not rules:
814 raise VerifyError("No rules for this file")
815
816 if not rules.get("cert_signers") and not rules.get("cert_signers_pattern"):
817 return True # Does not need cert
818
819 if "cert_user_id" not in content:
820 raise VerifyError("Missing cert_user_id")
821
822 if content["cert_user_id"].count("@") != 1:
823 raise VerifyError("Invalid domain in cert_user_id")
824
825 name, domain = content["cert_user_id"].rsplit("@", 1)
826 cert_address = rules["cert_signers"].get(domain)
827 if not cert_address: # Unknown Cert signer
828 if rules.get("cert_signers_pattern") and SafeRe.match(rules["cert_signers_pattern"], domain):
829 cert_address = domain
830 else:
831 raise VerifyError("Invalid cert signer: %s" % domain)
832
833 return self.verifyCertSign(rules["user_address"], content["cert_auth_type"], name, cert_address, content["cert_sign"])
834
835 # Checks if the content.json content is valid
836 # Return: True or False
837 def verifyContent(self, inner_path, content):
838 content_size = len(json.dumps(content, indent=1)) + sum([file["size"] for file in list(content["files"].values()) if file["size"] >= 0]) # Size of new content
839 # Calculate old content size
840 old_content = self.contents.get(inner_path)
841 if old_content:
842 old_content_size = len(json.dumps(old_content, indent=1)) + sum([file["size"] for file in list(old_content.get("files", {}).values())])
843 old_content_size_optional = sum([file["size"] for file in list(old_content.get("files_optional", {}).values())])
844 else:
845 old_content_size = 0
846 old_content_size_optional = 0
847
848 # Reset site site on first content.json
849 if not old_content and inner_path == "content.json":
850 self.site.settings["size"] = 0
851
852 content_size_optional = sum([file["size"] for file in list(content.get("files_optional", {}).values()) if file["size"] >= 0])
853 site_size = self.site.settings["size"] - old_content_size + content_size # Site size without old content plus the new
854 site_size_optional = self.site.settings["size_optional"] - old_content_size_optional + content_size_optional # Site size without old content plus the new
855
856 site_size_limit = self.site.getSizeLimit() * 1024 * 1024
857
858 # Check site address
859 if content.get("address") and content["address"] != self.site.address:
860 raise VerifyError("Wrong site address: %s != %s" % (content["address"], self.site.address))
861
862 # Check file inner path
863 if content.get("inner_path") and content["inner_path"] != inner_path:
864 raise VerifyError("Wrong inner_path: %s" % content["inner_path"])
865
866 # If our content.json file bigger than the size limit throw error
867 if inner_path == "content.json":
868 content_size_file = len(json.dumps(content, indent=1))
869 if content_size_file > site_size_limit:
870 # Save site size to display warning
871 self.site.settings["size"] = site_size
872 task = self.site.worker_manager.tasks.findTask(inner_path)
873 if task: # Dont try to download from other peers
874 self.site.worker_manager.failTask(task)
875 raise VerifyError("Content too large %s B > %s B, aborting task..." % (site_size, site_size_limit))
876
877 # Verify valid filenames
878 for file_relative_path in list(content.get("files", {}).keys()) + list(content.get("files_optional", {}).keys()):
879 if not self.isValidRelativePath(file_relative_path):
880 raise VerifyError("Invalid relative path: %s" % file_relative_path)
881
882 if inner_path == "content.json":
883 self.site.settings["size"] = site_size
884 self.site.settings["size_optional"] = site_size_optional
885 return True # Root content.json is passed
886 else:
887 if self.verifyContentInclude(inner_path, content, content_size, content_size_optional):
888 self.site.settings["size"] = site_size
889 self.site.settings["size_optional"] = site_size_optional
890 return True
891 else:
892 raise VerifyError("Content verify error")
893
894 def verifyContentInclude(self, inner_path, content, content_size, content_size_optional):
895 # Load include details
896 rules = self.getRules(inner_path, content)
897 if not rules:
898 raise VerifyError("No rules")
899
900 # Check include size limit
901 if rules.get("max_size") is not None: # Include size limit
902 if content_size > rules["max_size"]:
903 raise VerifyError("Include too large %sB > %sB" % (content_size, rules["max_size"]))
904
905 if rules.get("max_size_optional") is not None: # Include optional files limit
906 if content_size_optional > rules["max_size_optional"]:
907 raise VerifyError("Include optional files too large %sB > %sB" % (
908 content_size_optional, rules["max_size_optional"])
909 )
910
911 # Filename limit
912 if rules.get("files_allowed"):
913 for file_inner_path in list(content["files"].keys()):
914 if not SafeRe.match(r"^%s$" % rules["files_allowed"], file_inner_path):
915 raise VerifyError("File not allowed: %s" % file_inner_path)
916
917 if rules.get("files_allowed_optional"):
918 for file_inner_path in list(content.get("files_optional", {}).keys()):
919 if not SafeRe.match(r"^%s$" % rules["files_allowed_optional"], file_inner_path):
920 raise VerifyError("Optional file not allowed: %s" % file_inner_path)
921
922 # Check if content includes allowed
923 if rules.get("includes_allowed") is False and content.get("includes"):
924 raise VerifyError("Includes not allowed")
925
926 return True # All good
927
928 # Verify file validity
929 # Return: None = Same as before, False = Invalid, True = Valid
930 def verifyFile(self, inner_path, file, ignore_same=True):
931 if inner_path.endswith("content.json"): # content.json: Check using sign
932 from Crypt import CryptBitcoin
933 try:
934 if type(file) is dict:
935 new_content = file
936 else:
937 try:
938 if sys.version_info.major == 3 and sys.version_info.minor < 6:
939 new_content = json.loads(file.read().decode("utf8"))
940 else:
941 new_content = json.load(file)
942 except Exception as err:
943 raise VerifyError("Invalid json file: %s" % err)
944 if inner_path in self.contents:
945 old_content = self.contents.get(inner_path, {"modified": 0})
946 # Checks if its newer the ours
947 if old_content["modified"] == new_content["modified"] and ignore_same: # Ignore, have the same content.json
948 return None
949 elif old_content["modified"] > new_content["modified"]: # We have newer
950 raise VerifyError(
951 "We have newer (Our: %s, Sent: %s)" %
952 (old_content["modified"], new_content["modified"])
953 )
954 if new_content["modified"] > time.time() + 60 * 60 * 24: # Content modified in the far future (allow 1 day+)
955 raise VerifyError("Modify timestamp is in the far future!")
956 if self.isArchived(inner_path, new_content["modified"]):
957 if inner_path in self.site.bad_files:
958 del self.site.bad_files[inner_path]
959 raise VerifyError("This file is archived!")
960 # Check sign
961 sign = new_content.get("sign")
962 signs = new_content.get("signs", {})
963 if "sign" in new_content:
964 del(new_content["sign"]) # The file signed without the sign
965 if "signs" in new_content:
966 del(new_content["signs"]) # The file signed without the signs
967
968 sign_content = json.dumps(new_content, sort_keys=True) # Dump the json to string to remove whitepsace
969
970 # Fix float representation error on Android
971 modified = new_content["modified"]
972 if config.fix_float_decimals and type(modified) is float and not str(modified).endswith(".0"):
973 modified_fixed = "{:.6f}".format(modified).strip("0.")
974 sign_content = sign_content.replace(
975 '"modified": %s' % repr(modified),
976 '"modified": %s' % modified_fixed
977 )
978
979 if signs: # New style signing
980 valid_signers = self.getValidSigners(inner_path, new_content)
981 signs_required = self.getSignsRequired(inner_path, new_content)
982
983 if inner_path == "content.json" and len(valid_signers) > 1: # Check signers_sign on root content.json
984 signers_data = "%s:%s" % (signs_required, ",".join(valid_signers))
985 if not CryptBitcoin.verify(signers_data, self.site.address, new_content["signers_sign"]):
986 raise VerifyError("Invalid signers_sign!")
987
988 if inner_path != "content.json" and not self.verifyCert(inner_path, new_content): # Check if cert valid
989 raise VerifyError("Invalid cert!")
990
991 valid_signs = 0
992 for address in valid_signers:
993 if address in signs:
994 valid_signs += CryptBitcoin.verify(sign_content, address, signs[address])
995 if valid_signs >= signs_required:
996 break # Break if we has enough signs
997 if valid_signs < signs_required:
998 raise VerifyError("Valid signs: %s/%s" % (valid_signs, signs_required))
999 else:
1000 return self.verifyContent(inner_path, new_content)
1001 else: # Old style signing
1002 raise VerifyError("Invalid old-style sign")
1003
1004 except Exception as err:
1005 self.log.warning("%s: verify sign error: %s" % (inner_path, Debug.formatException(err)))
1006 raise err
1007
1008 else: # Check using sha512 hash
1009 file_info = self.getFileInfo(inner_path)
1010 if file_info:
1011 if CryptHash.sha512sum(file) != file_info.get("sha512", ""):
1012 raise VerifyError("Invalid hash")
1013
1014 if file_info.get("size", 0) != file.tell():
1015 raise VerifyError(
1016 "File size does not match %s <> %s" %
1017 (inner_path, file.tell(), file_info.get("size", 0))
1018 )
1019
1020 return True
1021
1022 else: # File not in content.json
1023 raise VerifyError("File not in content.json")
1024
1025 def optionalDelete(self, inner_path):
1026 self.site.storage.delete(inner_path)
1027
1028 def optionalDownloaded(self, inner_path, hash_id, size=None, own=False):
1029 if size is None:
1030 size = self.site.storage.getSize(inner_path)
1031
1032 done = self.hashfield.appendHashId(hash_id)
1033 self.site.settings["optional_downloaded"] += size
1034 return done
1035
1036 def optionalRemoved(self, inner_path, hash_id, size=None):
1037 if size is None:
1038 size = self.site.storage.getSize(inner_path)
1039 done = self.hashfield.removeHashId(hash_id)
1040
1041 self.site.settings["optional_downloaded"] -= size
1042 return done
1043
1044 def optionalRenamed(self, inner_path_old, inner_path_new):
1045 return True