Forking what is left of ZeroNet and hopefully adding an AT Proto Frontend/Proxy
at main 1045 lines 52 kB view raw
1import json 2import time 3import re 4import os 5import copy 6import base64 7import sys 8 9import gevent 10 11from Debug import Debug 12from Crypt import CryptHash 13from Config import config 14from util import helper 15from util import Diff 16from util import SafeRe 17from Peer import PeerHashfield 18from .ContentDbDict import ContentDbDict 19from Plugin import PluginManager 20 21 22class VerifyError(Exception): 23 pass 24 25 26class SignError(Exception): 27 pass 28 29 30@PluginManager.acceptPlugins 31class ContentManager(object): 32 33 def __init__(self, site): 34 self.site = site 35 self.log = self.site.log 36 self.contents = ContentDbDict(site) 37 self.hashfield = PeerHashfield() 38 self.has_optional_files = False 39 40 # Load all content.json files 41 def loadContents(self): 42 if len(self.contents) == 0: 43 self.log.info("ContentDb not initialized, load files from filesystem...") 44 self.loadContent(add_bad_files=False, delete_removed_files=False) 45 self.site.settings["size"], self.site.settings["size_optional"] = self.getTotalSize() 46 47 # Load hashfield cache 48 if "hashfield" in self.site.settings.get("cache", {}): 49 self.hashfield.frombytes(base64.b64decode(self.site.settings["cache"]["hashfield"])) 50 del self.site.settings["cache"]["hashfield"] 51 elif self.contents.get("content.json") and self.site.settings["size_optional"] > 0: 52 self.site.storage.updateBadFiles() # No hashfield cache created yet 53 self.has_optional_files = bool(self.hashfield) 54 55 self.contents.db.initSite(self.site) 56 57 def getFileChanges(self, old_files, new_files): 58 deleted = {key: val for key, val in old_files.items() if key not in new_files} 59 deleted_hashes = {val.get("sha512"): key for key, val in old_files.items() if key not in new_files} 60 added = {key: val for key, val in new_files.items() if key not in old_files} 61 renamed = {} 62 for relative_path, node in added.items(): 63 hash = node.get("sha512") 64 if hash in deleted_hashes: 65 relative_path_old = deleted_hashes[hash] 66 renamed[relative_path_old] = relative_path 67 del(deleted[relative_path_old]) 68 return list(deleted), renamed 69 70 # Load content.json to self.content 71 # Return: Changed files ["index.html", "data/messages.json"], Deleted files ["old.jpg"] 72 def loadContent(self, content_inner_path="content.json", add_bad_files=True, delete_removed_files=True, load_includes=True, force=False): 73 content_inner_path = content_inner_path.strip("/") # Remove / from beginning 74 old_content = self.contents.get(content_inner_path) 75 content_path = self.site.storage.getPath(content_inner_path) 76 content_dir = helper.getDirname(self.site.storage.getPath(content_inner_path)) 77 content_inner_dir = helper.getDirname(content_inner_path) 78 79 if os.path.isfile(content_path): 80 try: 81 # Check if file is newer than what we have 82 if not force and old_content and not self.site.settings.get("own"): 83 for line in open(content_path): 84 if '"modified"' not in line: 85 continue 86 match = re.search(r"([0-9\.]+),$", line.strip(" \r\n")) 87 if match and float(match.group(1)) <= old_content.get("modified", 0): 88 self.log.debug("%s loadContent same json file, skipping" % content_inner_path) 89 return [], [] 90 91 new_content = self.site.storage.loadJson(content_inner_path) 92 except Exception as err: 93 self.log.warning("%s load error: %s" % (content_path, Debug.formatException(err))) 94 return [], [] 95 else: 96 self.log.debug("Content.json not exist: %s" % content_path) 97 return [], [] # Content.json not exist 98 99 try: 100 # Get the files where the sha512 changed 101 changed = [] 102 deleted = [] 103 # Check changed 104 for relative_path, info in new_content.get("files", {}).items(): 105 if "sha512" in info: 106 hash_type = "sha512" 107 else: # Backward compatibility 108 hash_type = "sha1" 109 110 new_hash = info[hash_type] 111 if old_content and old_content["files"].get(relative_path): # We have the file in the old content 112 old_hash = old_content["files"][relative_path].get(hash_type) 113 else: # The file is not in the old content 114 old_hash = None 115 if old_hash != new_hash: 116 changed.append(content_inner_dir + relative_path) 117 118 # Check changed optional files 119 for relative_path, info in new_content.get("files_optional", {}).items(): 120 file_inner_path = content_inner_dir + relative_path 121 new_hash = info["sha512"] 122 if old_content and old_content.get("files_optional", {}).get(relative_path): 123 # We have the file in the old content 124 old_hash = old_content["files_optional"][relative_path].get("sha512") 125 if old_hash != new_hash and self.site.isDownloadable(file_inner_path): 126 changed.append(file_inner_path) # Download new file 127 elif old_hash != new_hash and self.hashfield.hasHash(old_hash) and not self.site.settings.get("own"): 128 try: 129 old_hash_id = self.hashfield.getHashId(old_hash) 130 self.optionalRemoved(file_inner_path, old_hash_id, old_content["files_optional"][relative_path]["size"]) 131 self.optionalDelete(file_inner_path) 132 self.log.debug("Deleted changed optional file: %s" % file_inner_path) 133 except Exception as err: 134 self.log.warning("Error deleting file %s: %s" % (file_inner_path, Debug.formatException(err))) 135 else: # The file is not in the old content 136 if self.site.isDownloadable(file_inner_path): 137 changed.append(file_inner_path) # Download new file 138 139 # Check deleted 140 if old_content: 141 old_files = dict( 142 old_content.get("files", {}), 143 **old_content.get("files_optional", {}) 144 ) 145 146 new_files = dict( 147 new_content.get("files", {}), 148 **new_content.get("files_optional", {}) 149 ) 150 151 deleted, renamed = self.getFileChanges(old_files, new_files) 152 153 for relative_path_old, relative_path_new in renamed.items(): 154 self.log.debug("Renaming: %s -> %s" % (relative_path_old, relative_path_new)) 155 if relative_path_new in new_content.get("files_optional", {}): 156 self.optionalRenamed(content_inner_dir + relative_path_old, content_inner_dir + relative_path_new) 157 if self.site.storage.isFile(relative_path_old): 158 try: 159 self.site.storage.rename(relative_path_old, relative_path_new) 160 if relative_path_new in changed: 161 changed.remove(relative_path_new) 162 self.log.debug("Renamed: %s -> %s" % (relative_path_old, relative_path_new)) 163 except Exception as err: 164 self.log.warning("Error renaming file: %s -> %s %s" % (relative_path_old, relative_path_new, err)) 165 166 if deleted and not self.site.settings.get("own"): 167 # Deleting files that no longer in content.json 168 for file_relative_path in deleted: 169 file_inner_path = content_inner_dir + file_relative_path 170 try: 171 # Check if the deleted file is optional 172 if old_content.get("files_optional") and old_content["files_optional"].get(file_relative_path): 173 self.optionalDelete(file_inner_path) 174 old_hash = old_content["files_optional"][file_relative_path].get("sha512") 175 if self.hashfield.hasHash(old_hash): 176 old_hash_id = self.hashfield.getHashId(old_hash) 177 self.optionalRemoved(file_inner_path, old_hash_id, old_content["files_optional"][file_relative_path]["size"]) 178 else: 179 self.site.storage.delete(file_inner_path) 180 181 self.log.debug("Deleted file: %s" % file_inner_path) 182 except Exception as err: 183 self.log.debug("Error deleting file %s: %s" % (file_inner_path, Debug.formatException(err))) 184 185 # Cleanup empty dirs 186 tree = {root: [dirs, files] for root, dirs, files in os.walk(self.site.storage.getPath(content_inner_dir))} 187 for root in sorted(tree, key=len, reverse=True): 188 dirs, files = tree[root] 189 if dirs == [] and files == []: 190 root_inner_path = self.site.storage.getInnerPath(root.replace("\\", "/")) 191 self.log.debug("Empty directory: %s, cleaning up." % root_inner_path) 192 try: 193 self.site.storage.deleteDir(root_inner_path) 194 # Remove from tree dict to reflect changed state 195 tree[os.path.dirname(root)][0].remove(os.path.basename(root)) 196 except Exception as err: 197 self.log.debug("Error deleting empty directory %s: %s" % (root_inner_path, err)) 198 199 # Check archived 200 if old_content and "user_contents" in new_content and "archived" in new_content["user_contents"]: 201 old_archived = old_content.get("user_contents", {}).get("archived", {}) 202 new_archived = new_content.get("user_contents", {}).get("archived", {}) 203 self.log.debug("old archived: %s, new archived: %s" % (len(old_archived), len(new_archived))) 204 archived_changed = { 205 key: date_archived 206 for key, date_archived in new_archived.items() 207 if old_archived.get(key) != new_archived[key] 208 } 209 if archived_changed: 210 self.log.debug("Archived changed: %s" % archived_changed) 211 for archived_dirname, date_archived in archived_changed.items(): 212 archived_inner_path = content_inner_dir + archived_dirname + "/content.json" 213 if self.contents.get(archived_inner_path, {}).get("modified", 0) < date_archived: 214 self.removeContent(archived_inner_path) 215 deleted += archived_inner_path 216 self.site.settings["size"], self.site.settings["size_optional"] = self.getTotalSize() 217 218 # Check archived before 219 if old_content and "user_contents" in new_content and "archived_before" in new_content["user_contents"]: 220 old_archived_before = old_content.get("user_contents", {}).get("archived_before", 0) 221 new_archived_before = new_content.get("user_contents", {}).get("archived_before", 0) 222 if old_archived_before != new_archived_before: 223 self.log.debug("Archived before changed: %s -> %s" % (old_archived_before, new_archived_before)) 224 225 # Remove downloaded archived files 226 num_removed_contents = 0 227 for archived_inner_path in self.listModified(before=new_archived_before): 228 if archived_inner_path.startswith(content_inner_dir) and archived_inner_path != content_inner_path: 229 self.removeContent(archived_inner_path) 230 num_removed_contents += 1 231 self.site.settings["size"], self.site.settings["size_optional"] = self.getTotalSize() 232 233 # Remove archived files from download queue 234 num_removed_bad_files = 0 235 for bad_file in list(self.site.bad_files.keys()): 236 if bad_file.endswith("content.json"): 237 del self.site.bad_files[bad_file] 238 num_removed_bad_files += 1 239 240 if num_removed_bad_files > 0: 241 self.site.worker_manager.removeSolvedFileTasks(mark_as_good=False) 242 gevent.spawn(self.site.update, since=0) 243 244 self.log.debug("Archived removed contents: %s, removed bad files: %s" % (num_removed_contents, num_removed_bad_files)) 245 246 # Load includes 247 if load_includes and "includes" in new_content: 248 for relative_path, info in list(new_content["includes"].items()): 249 include_inner_path = content_inner_dir + relative_path 250 if self.site.storage.isFile(include_inner_path): # Content.json exists, load it 251 include_changed, include_deleted = self.loadContent( 252 include_inner_path, add_bad_files=add_bad_files, delete_removed_files=delete_removed_files 253 ) 254 if include_changed: 255 changed += include_changed # Add changed files 256 if include_deleted: 257 deleted += include_deleted # Add changed files 258 else: # Content.json not exist, add to changed files 259 self.log.debug("Missing include: %s" % include_inner_path) 260 changed += [include_inner_path] 261 262 # Load blind user includes (all subdir) 263 if load_includes and "user_contents" in new_content: 264 for relative_dir in os.listdir(content_dir): 265 include_inner_path = content_inner_dir + relative_dir + "/content.json" 266 if not self.site.storage.isFile(include_inner_path): 267 continue # Content.json not exist 268 include_changed, include_deleted = self.loadContent( 269 include_inner_path, add_bad_files=add_bad_files, delete_removed_files=delete_removed_files, 270 load_includes=False 271 ) 272 if include_changed: 273 changed += include_changed # Add changed files 274 if include_deleted: 275 deleted += include_deleted # Add changed files 276 277 # Save some memory 278 new_content["signs"] = None 279 if "cert_sign" in new_content: 280 new_content["cert_sign"] = None 281 282 if new_content.get("files_optional"): 283 self.has_optional_files = True 284 # Update the content 285 self.contents[content_inner_path] = new_content 286 except Exception as err: 287 self.log.warning("%s parse error: %s" % (content_inner_path, Debug.formatException(err))) 288 return [], [] # Content.json parse error 289 290 # Add changed files to bad files 291 if add_bad_files: 292 for inner_path in changed: 293 self.site.bad_files[inner_path] = self.site.bad_files.get(inner_path, 0) + 1 294 for inner_path in deleted: 295 if inner_path in self.site.bad_files: 296 del self.site.bad_files[inner_path] 297 self.site.worker_manager.removeSolvedFileTasks() 298 299 if new_content.get("modified", 0) > self.site.settings.get("modified", 0): 300 # Dont store modifications in the far future (more than 10 minute) 301 self.site.settings["modified"] = min(time.time() + 60 * 10, new_content["modified"]) 302 303 return changed, deleted 304 305 def removeContent(self, inner_path): 306 inner_dir = helper.getDirname(inner_path) 307 try: 308 content = self.contents[inner_path] 309 files = dict( 310 content.get("files", {}), 311 **content.get("files_optional", {}) 312 ) 313 except Exception as err: 314 self.log.debug("Error loading %s for removeContent: %s" % (inner_path, Debug.formatException(err))) 315 files = {} 316 files["content.json"] = True 317 # Deleting files that no longer in content.json 318 for file_relative_path in files: 319 file_inner_path = inner_dir + file_relative_path 320 try: 321 self.site.storage.delete(file_inner_path) 322 self.log.debug("Deleted file: %s" % file_inner_path) 323 except Exception as err: 324 self.log.debug("Error deleting file %s: %s" % (file_inner_path, err)) 325 try: 326 self.site.storage.deleteDir(inner_dir) 327 except Exception as err: 328 self.log.debug("Error deleting dir %s: %s" % (inner_dir, err)) 329 330 try: 331 del self.contents[inner_path] 332 except Exception as err: 333 self.log.debug("Error key from contents: %s" % inner_path) 334 335 # Get total size of site 336 # Return: 32819 (size of files in kb) 337 def getTotalSize(self, ignore=None): 338 return self.contents.db.getTotalSize(self.site, ignore) 339 340 def listModified(self, after=None, before=None): 341 return self.contents.db.listModified(self.site, after=after, before=before) 342 343 def listContents(self, inner_path="content.json", user_files=False): 344 if inner_path not in self.contents: 345 return [] 346 back = [inner_path] 347 content_inner_dir = helper.getDirname(inner_path) 348 for relative_path in list(self.contents[inner_path].get("includes", {}).keys()): 349 include_inner_path = content_inner_dir + relative_path 350 back += self.listContents(include_inner_path) 351 return back 352 353 # Returns if file with the given modification date is archived or not 354 def isArchived(self, inner_path, modified): 355 match = re.match(r"(.*)/(.*?)/", inner_path) 356 if not match: 357 return False 358 user_contents_inner_path = match.group(1) + "/content.json" 359 relative_directory = match.group(2) 360 361 file_info = self.getFileInfo(user_contents_inner_path) 362 if file_info: 363 time_archived_before = file_info.get("archived_before", 0) 364 time_directory_archived = file_info.get("archived", {}).get(relative_directory, 0) 365 if modified <= time_archived_before or modified <= time_directory_archived: 366 return True 367 else: 368 return False 369 else: 370 return False 371 372 def isDownloaded(self, inner_path, hash_id=None): 373 if not hash_id: 374 file_info = self.getFileInfo(inner_path) 375 if not file_info or "sha512" not in file_info: 376 return False 377 hash_id = self.hashfield.getHashId(file_info["sha512"]) 378 return hash_id in self.hashfield 379 380 # Is modified since signing 381 def isModified(self, inner_path): 382 s = time.time() 383 if inner_path.endswith("content.json"): 384 try: 385 is_valid = self.verifyFile(inner_path, self.site.storage.open(inner_path), ignore_same=False) 386 if is_valid: 387 is_modified = False 388 else: 389 is_modified = True 390 except VerifyError: 391 is_modified = True 392 else: 393 try: 394 self.verifyFile(inner_path, self.site.storage.open(inner_path), ignore_same=False) 395 is_modified = False 396 except VerifyError: 397 is_modified = True 398 return is_modified 399 400 # Find the file info line from self.contents 401 # Return: { "sha512": "c29d73d...21f518", "size": 41 , "content_inner_path": "content.json"} 402 def getFileInfo(self, inner_path, new_file=False): 403 dirs = inner_path.split("/") # Parent dirs of content.json 404 inner_path_parts = [dirs.pop()] # Filename relative to content.json 405 while True: 406 content_inner_path = "%s/content.json" % "/".join(dirs) 407 content_inner_path = content_inner_path.strip("/") 408 content = self.contents.get(content_inner_path) 409 410 # Check in files 411 if content and "files" in content: 412 back = content["files"].get("/".join(inner_path_parts)) 413 if back: 414 back["content_inner_path"] = content_inner_path 415 back["optional"] = False 416 back["relative_path"] = "/".join(inner_path_parts) 417 return back 418 419 # Check in optional files 420 if content and "files_optional" in content: # Check if file in this content.json 421 back = content["files_optional"].get("/".join(inner_path_parts)) 422 if back: 423 back["content_inner_path"] = content_inner_path 424 back["optional"] = True 425 back["relative_path"] = "/".join(inner_path_parts) 426 return back 427 428 # Return the rules if user dir 429 if content and "user_contents" in content: 430 back = content["user_contents"] 431 content_inner_path_dir = helper.getDirname(content_inner_path) 432 relative_content_path = inner_path[len(content_inner_path_dir):] 433 user_auth_address_match = re.match(r"([A-Za-z0-9]+)/.*", relative_content_path) 434 if user_auth_address_match: 435 user_auth_address = user_auth_address_match.group(1) 436 back["content_inner_path"] = "%s%s/content.json" % (content_inner_path_dir, user_auth_address) 437 else: 438 back["content_inner_path"] = content_inner_path_dir + "content.json" 439 back["optional"] = None 440 back["relative_path"] = "/".join(inner_path_parts) 441 return back 442 443 if new_file and content: 444 back = {} 445 back["content_inner_path"] = content_inner_path 446 back["relative_path"] = "/".join(inner_path_parts) 447 back["optional"] = None 448 return back 449 450 # No inner path in this dir, lets try the parent dir 451 if dirs: 452 inner_path_parts.insert(0, dirs.pop()) 453 else: # No more parent dirs 454 break 455 456 # Not found 457 return False 458 459 # Get rules for the file 460 # Return: The rules for the file or False if not allowed 461 def getRules(self, inner_path, content=None): 462 if not inner_path.endswith("content.json"): # Find the files content.json first 463 file_info = self.getFileInfo(inner_path) 464 if not file_info: 465 return False # File not found 466 inner_path = file_info["content_inner_path"] 467 468 if inner_path == "content.json": # Root content.json 469 rules = {} 470 rules["signers"] = self.getValidSigners(inner_path, content) 471 return rules 472 473 dirs = inner_path.split("/") # Parent dirs of content.json 474 inner_path_parts = [dirs.pop()] # Filename relative to content.json 475 inner_path_parts.insert(0, dirs.pop()) # Dont check in self dir 476 while True: 477 content_inner_path = "%s/content.json" % "/".join(dirs) 478 parent_content = self.contents.get(content_inner_path.strip("/")) 479 if parent_content and "includes" in parent_content: 480 return parent_content["includes"].get("/".join(inner_path_parts)) 481 elif parent_content and "user_contents" in parent_content: 482 return self.getUserContentRules(parent_content, inner_path, content) 483 else: # No inner path in this dir, lets try the parent dir 484 if dirs: 485 inner_path_parts.insert(0, dirs.pop()) 486 else: # No more parent dirs 487 break 488 489 return False 490 491 # Get rules for a user file 492 # Return: The rules of the file or False if not allowed 493 def getUserContentRules(self, parent_content, inner_path, content): 494 user_contents = parent_content["user_contents"] 495 496 # Delivered for directory 497 if "inner_path" in parent_content: 498 parent_content_dir = helper.getDirname(parent_content["inner_path"]) 499 user_address = re.match(r"([A-Za-z0-9]*?)/", inner_path[len(parent_content_dir):]).group(1) 500 else: 501 user_address = re.match(r".*/([A-Za-z0-9]*?)/.*?$", inner_path).group(1) 502 503 try: 504 if not content: 505 content = self.site.storage.loadJson(inner_path) # Read the file if no content specified 506 user_urn = "%s/%s" % (content["cert_auth_type"], content["cert_user_id"]) # web/nofish@zeroid.bit 507 cert_user_id = content["cert_user_id"] 508 except Exception: # Content.json not exist 509 user_urn = "n-a/n-a" 510 cert_user_id = "n-a" 511 512 if user_address in user_contents["permissions"]: 513 rules = copy.copy(user_contents["permissions"].get(user_address, {})) # Default rules based on address 514 else: 515 rules = copy.copy(user_contents["permissions"].get(cert_user_id, {})) # Default rules based on username 516 517 if rules is False: 518 banned = True 519 rules = {} 520 else: 521 banned = False 522 if "signers" in rules: 523 rules["signers"] = rules["signers"][:] # Make copy of the signers 524 for permission_pattern, permission_rules in list(user_contents["permission_rules"].items()): # Regexp rules 525 if not SafeRe.match(permission_pattern, user_urn): 526 continue # Rule is not valid for user 527 # Update rules if its better than current recorded ones 528 for key, val in permission_rules.items(): 529 if key not in rules: 530 if type(val) is list: 531 rules[key] = val[:] # Make copy 532 else: 533 rules[key] = val 534 elif type(val) is int: # Int, update if larger 535 if val > rules[key]: 536 rules[key] = val 537 elif hasattr(val, "startswith"): # String, update if longer 538 if len(val) > len(rules[key]): 539 rules[key] = val 540 elif type(val) is list: # List, append 541 rules[key] += val 542 543 # Accepted cert signers 544 rules["cert_signers"] = user_contents.get("cert_signers", {}) 545 rules["cert_signers_pattern"] = user_contents.get("cert_signers_pattern") 546 547 if "signers" not in rules: 548 rules["signers"] = [] 549 550 if not banned: 551 rules["signers"].append(user_address) # Add user as valid signer 552 rules["user_address"] = user_address 553 rules["includes_allowed"] = False 554 555 return rules 556 557 # Get diffs for changed files 558 def getDiffs(self, inner_path, limit=30 * 1024, update_files=True): 559 if inner_path not in self.contents: 560 return {} 561 diffs = {} 562 content_inner_path_dir = helper.getDirname(inner_path) 563 for file_relative_path in self.contents[inner_path].get("files", {}): 564 file_inner_path = content_inner_path_dir + file_relative_path 565 if self.site.storage.isFile(file_inner_path + "-new"): # New version present 566 diffs[file_relative_path] = Diff.diff( 567 list(self.site.storage.open(file_inner_path)), 568 list(self.site.storage.open(file_inner_path + "-new")), 569 limit=limit 570 ) 571 if update_files: 572 self.site.storage.delete(file_inner_path) 573 self.site.storage.rename(file_inner_path + "-new", file_inner_path) 574 if self.site.storage.isFile(file_inner_path + "-old"): # Old version present 575 diffs[file_relative_path] = Diff.diff( 576 list(self.site.storage.open(file_inner_path + "-old")), 577 list(self.site.storage.open(file_inner_path)), 578 limit=limit 579 ) 580 if update_files: 581 self.site.storage.delete(file_inner_path + "-old") 582 return diffs 583 584 def hashFile(self, dir_inner_path, file_relative_path, optional=False): 585 back = {} 586 file_inner_path = dir_inner_path + "/" + file_relative_path 587 588 file_path = self.site.storage.getPath(file_inner_path) 589 file_size = os.path.getsize(file_path) 590 sha512sum = CryptHash.sha512sum(file_path) # Calculate sha512 sum of file 591 if optional and not self.hashfield.hasHash(sha512sum): 592 self.optionalDownloaded(file_inner_path, self.hashfield.getHashId(sha512sum), file_size, own=True) 593 594 back[file_relative_path] = {"sha512": sha512sum, "size": os.path.getsize(file_path)} 595 return back 596 597 def isValidRelativePath(self, relative_path): 598 if ".." in relative_path.replace("\\", "/").split("/"): 599 return False 600 elif len(relative_path) > 255: 601 return False 602 elif relative_path[0] in ("/", "\\"): # Starts with 603 return False 604 elif relative_path[-1] in (".", " "): # Ends with 605 return False 606 elif re.match(r".*(^|/)(CON|PRN|AUX|NUL|COM[1-9]|LPT[1-9]|CONOUT\$|CONIN\$)(\.|/|$)", relative_path, re.IGNORECASE): # Protected on Windows 607 return False 608 else: 609 return re.match(r"^[^\x00-\x1F\"*:<>?\\|]+$", relative_path) 610 611 def sanitizePath(self, inner_path): 612 return re.sub("[\x00-\x1F\"*:<>?\\|]", "", inner_path) 613 614 # Hash files in directory 615 def hashFiles(self, dir_inner_path, ignore_pattern=None, optional_pattern=None): 616 files_node = {} 617 files_optional_node = {} 618 db_inner_path = self.site.storage.getDbFile() 619 if dir_inner_path and not self.isValidRelativePath(dir_inner_path): 620 ignored = True 621 self.log.error("- [ERROR] Only ascii encoded directories allowed: %s" % dir_inner_path) 622 623 for file_relative_path in self.site.storage.walk(dir_inner_path, ignore_pattern): 624 file_name = helper.getFilename(file_relative_path) 625 626 ignored = optional = False 627 if file_name == "content.json": 628 ignored = True 629 elif file_name.startswith(".") or file_name.endswith("-old") or file_name.endswith("-new"): 630 ignored = True 631 elif not self.isValidRelativePath(file_relative_path): 632 ignored = True 633 self.log.error("- [ERROR] Invalid filename: %s" % file_relative_path) 634 elif dir_inner_path == "" and db_inner_path and file_relative_path.startswith(db_inner_path): 635 ignored = True 636 elif optional_pattern and SafeRe.match(optional_pattern, file_relative_path): 637 optional = True 638 639 if ignored: # Ignore content.json, defined regexp and files starting with . 640 self.log.info("- [SKIPPED] %s" % file_relative_path) 641 else: 642 if optional: 643 self.log.info("- [OPTIONAL] %s" % file_relative_path) 644 files_optional_node.update( 645 self.hashFile(dir_inner_path, file_relative_path, optional=True) 646 ) 647 else: 648 self.log.info("- %s" % file_relative_path) 649 files_node.update( 650 self.hashFile(dir_inner_path, file_relative_path) 651 ) 652 return files_node, files_optional_node 653 654 # Create and sign a content.json 655 # Return: The new content if filewrite = False 656 def sign(self, inner_path="content.json", privatekey=None, filewrite=True, update_changed_files=False, extend=None, remove_missing_optional=False): 657 if not inner_path.endswith("content.json"): 658 raise SignError("Invalid file name, you can only sign content.json files") 659 660 if inner_path in self.contents: 661 content = self.contents.get(inner_path) 662 if content and content.get("cert_sign", False) is None and self.site.storage.isFile(inner_path): 663 # Recover cert_sign from file 664 content["cert_sign"] = self.site.storage.loadJson(inner_path).get("cert_sign") 665 else: 666 content = None 667 if not content: # Content not exist yet, load default one 668 self.log.info("File %s not exist yet, loading default values..." % inner_path) 669 670 if self.site.storage.isFile(inner_path): 671 content = self.site.storage.loadJson(inner_path) 672 if "files" not in content: 673 content["files"] = {} 674 if "signs" not in content: 675 content["signs"] = {} 676 else: 677 content = {"files": {}, "signs": {}} # Default content.json 678 679 if inner_path == "content.json": # It's the root content.json, add some more fields 680 content["title"] = "%s - ZeroNet_" % self.site.address 681 content["description"] = "" 682 content["signs_required"] = 1 683 content["ignore"] = "" 684 685 if extend: 686 # Add extend keys if not exists 687 for key, val in list(extend.items()): 688 if not content.get(key): 689 content[key] = val 690 self.log.info("Extending content.json with: %s" % key) 691 692 directory = helper.getDirname(self.site.storage.getPath(inner_path)) 693 inner_directory = helper.getDirname(inner_path) 694 self.log.info("Opening site data directory: %s..." % directory) 695 696 changed_files = [inner_path] 697 files_node, files_optional_node = self.hashFiles( 698 helper.getDirname(inner_path), content.get("ignore"), content.get("optional") 699 ) 700 701 if not remove_missing_optional: 702 for file_inner_path, file_details in content.get("files_optional", {}).items(): 703 if file_inner_path not in files_optional_node: 704 files_optional_node[file_inner_path] = file_details 705 706 # Find changed files 707 files_merged = files_node.copy() 708 files_merged.update(files_optional_node) 709 for file_relative_path, file_details in files_merged.items(): 710 old_hash = content.get("files", {}).get(file_relative_path, {}).get("sha512") 711 new_hash = files_merged[file_relative_path]["sha512"] 712 if old_hash != new_hash: 713 changed_files.append(inner_directory + file_relative_path) 714 715 self.log.debug("Changed files: %s" % changed_files) 716 if update_changed_files: 717 for file_path in changed_files: 718 self.site.storage.onUpdated(file_path) 719 720 # Generate new content.json 721 self.log.info("Adding timestamp and sha512sums to new content.json...") 722 723 new_content = content.copy() # Create a copy of current content.json 724 new_content["files"] = files_node # Add files sha512 hash 725 if files_optional_node: 726 new_content["files_optional"] = files_optional_node 727 elif "files_optional" in new_content: 728 del new_content["files_optional"] 729 730 new_content["modified"] = int(time.time()) # Add timestamp 731 if inner_path == "content.json": 732 new_content["zeronet_version"] = config.version 733 new_content["signs_required"] = content.get("signs_required", 1) 734 735 new_content["address"] = self.site.address 736 new_content["inner_path"] = inner_path 737 738 # Verify private key 739 from Crypt import CryptBitcoin 740 self.log.info("Verifying private key...") 741 privatekey_address = CryptBitcoin.privatekeyToAddress(privatekey) 742 valid_signers = self.getValidSigners(inner_path, new_content) 743 if privatekey_address not in valid_signers: 744 raise SignError( 745 "Private key invalid! Valid signers: %s, Private key address: %s" % 746 (valid_signers, privatekey_address) 747 ) 748 self.log.info("Correct %s in valid signers: %s" % (privatekey_address, valid_signers)) 749 750 if inner_path == "content.json" and privatekey_address == self.site.address: 751 # If signing using the root key, then sign the valid signers 752 signers_data = "%s:%s" % (new_content["signs_required"], ",".join(valid_signers)) 753 new_content["signers_sign"] = CryptBitcoin.sign(str(signers_data), privatekey) 754 if not new_content["signers_sign"]: 755 self.log.info("Old style address, signers_sign is none") 756 757 self.log.info("Signing %s..." % inner_path) 758 759 if "signs" in new_content: 760 del(new_content["signs"]) # Delete old signs 761 if "sign" in new_content: 762 del(new_content["sign"]) # Delete old sign (backward compatibility) 763 764 sign_content = json.dumps(new_content, sort_keys=True) 765 sign = CryptBitcoin.sign(sign_content, privatekey) 766 # new_content["signs"] = content.get("signs", {}) # TODO: Multisig 767 if sign: # If signing is successful (not an old address) 768 new_content["signs"] = {} 769 new_content["signs"][privatekey_address] = sign 770 771 self.verifyContent(inner_path, new_content) 772 773 if filewrite: 774 self.log.info("Saving to %s..." % inner_path) 775 self.site.storage.writeJson(inner_path, new_content) 776 self.contents[inner_path] = new_content 777 778 self.log.info("File %s signed!" % inner_path) 779 780 if filewrite: # Written to file 781 return True 782 else: # Return the new content 783 return new_content 784 785 # The valid signers of content.json file 786 # Return: ["1KRxE1s3oDyNDawuYWpzbLUwNm8oDbeEp6", "13ReyhCsjhpuCVahn1DHdf6eMqqEVev162"] 787 def getValidSigners(self, inner_path, content=None): 788 valid_signers = [] 789 if inner_path == "content.json": # Root content.json 790 if "content.json" in self.contents and "signers" in self.contents["content.json"]: 791 valid_signers += self.contents["content.json"]["signers"][:] 792 else: 793 rules = self.getRules(inner_path, content) 794 if rules and "signers" in rules: 795 valid_signers += rules["signers"] 796 797 if self.site.address not in valid_signers: 798 valid_signers.append(self.site.address) # Site address always valid 799 return valid_signers 800 801 # Return: The required number of valid signs for the content.json 802 def getSignsRequired(self, inner_path, content=None): 803 return 1 # Todo: Multisig 804 805 def verifyCertSign(self, user_address, user_auth_type, user_name, issuer_address, sign): 806 from Crypt import CryptBitcoin 807 cert_subject = "%s#%s/%s" % (user_address, user_auth_type, user_name) 808 return CryptBitcoin.verify(cert_subject, issuer_address, sign) 809 810 def verifyCert(self, inner_path, content): 811 rules = self.getRules(inner_path, content) 812 813 if not rules: 814 raise VerifyError("No rules for this file") 815 816 if not rules.get("cert_signers") and not rules.get("cert_signers_pattern"): 817 return True # Does not need cert 818 819 if "cert_user_id" not in content: 820 raise VerifyError("Missing cert_user_id") 821 822 if content["cert_user_id"].count("@") != 1: 823 raise VerifyError("Invalid domain in cert_user_id") 824 825 name, domain = content["cert_user_id"].rsplit("@", 1) 826 cert_address = rules["cert_signers"].get(domain) 827 if not cert_address: # Unknown Cert signer 828 if rules.get("cert_signers_pattern") and SafeRe.match(rules["cert_signers_pattern"], domain): 829 cert_address = domain 830 else: 831 raise VerifyError("Invalid cert signer: %s" % domain) 832 833 return self.verifyCertSign(rules["user_address"], content["cert_auth_type"], name, cert_address, content["cert_sign"]) 834 835 # Checks if the content.json content is valid 836 # Return: True or False 837 def verifyContent(self, inner_path, content): 838 content_size = len(json.dumps(content, indent=1)) + sum([file["size"] for file in list(content["files"].values()) if file["size"] >= 0]) # Size of new content 839 # Calculate old content size 840 old_content = self.contents.get(inner_path) 841 if old_content: 842 old_content_size = len(json.dumps(old_content, indent=1)) + sum([file["size"] for file in list(old_content.get("files", {}).values())]) 843 old_content_size_optional = sum([file["size"] for file in list(old_content.get("files_optional", {}).values())]) 844 else: 845 old_content_size = 0 846 old_content_size_optional = 0 847 848 # Reset site site on first content.json 849 if not old_content and inner_path == "content.json": 850 self.site.settings["size"] = 0 851 852 content_size_optional = sum([file["size"] for file in list(content.get("files_optional", {}).values()) if file["size"] >= 0]) 853 site_size = self.site.settings["size"] - old_content_size + content_size # Site size without old content plus the new 854 site_size_optional = self.site.settings["size_optional"] - old_content_size_optional + content_size_optional # Site size without old content plus the new 855 856 site_size_limit = self.site.getSizeLimit() * 1024 * 1024 857 858 # Check site address 859 if content.get("address") and content["address"] != self.site.address: 860 raise VerifyError("Wrong site address: %s != %s" % (content["address"], self.site.address)) 861 862 # Check file inner path 863 if content.get("inner_path") and content["inner_path"] != inner_path: 864 raise VerifyError("Wrong inner_path: %s" % content["inner_path"]) 865 866 # If our content.json file bigger than the size limit throw error 867 if inner_path == "content.json": 868 content_size_file = len(json.dumps(content, indent=1)) 869 if content_size_file > site_size_limit: 870 # Save site size to display warning 871 self.site.settings["size"] = site_size 872 task = self.site.worker_manager.tasks.findTask(inner_path) 873 if task: # Dont try to download from other peers 874 self.site.worker_manager.failTask(task) 875 raise VerifyError("Content too large %s B > %s B, aborting task..." % (site_size, site_size_limit)) 876 877 # Verify valid filenames 878 for file_relative_path in list(content.get("files", {}).keys()) + list(content.get("files_optional", {}).keys()): 879 if not self.isValidRelativePath(file_relative_path): 880 raise VerifyError("Invalid relative path: %s" % file_relative_path) 881 882 if inner_path == "content.json": 883 self.site.settings["size"] = site_size 884 self.site.settings["size_optional"] = site_size_optional 885 return True # Root content.json is passed 886 else: 887 if self.verifyContentInclude(inner_path, content, content_size, content_size_optional): 888 self.site.settings["size"] = site_size 889 self.site.settings["size_optional"] = site_size_optional 890 return True 891 else: 892 raise VerifyError("Content verify error") 893 894 def verifyContentInclude(self, inner_path, content, content_size, content_size_optional): 895 # Load include details 896 rules = self.getRules(inner_path, content) 897 if not rules: 898 raise VerifyError("No rules") 899 900 # Check include size limit 901 if rules.get("max_size") is not None: # Include size limit 902 if content_size > rules["max_size"]: 903 raise VerifyError("Include too large %sB > %sB" % (content_size, rules["max_size"])) 904 905 if rules.get("max_size_optional") is not None: # Include optional files limit 906 if content_size_optional > rules["max_size_optional"]: 907 raise VerifyError("Include optional files too large %sB > %sB" % ( 908 content_size_optional, rules["max_size_optional"]) 909 ) 910 911 # Filename limit 912 if rules.get("files_allowed"): 913 for file_inner_path in list(content["files"].keys()): 914 if not SafeRe.match(r"^%s$" % rules["files_allowed"], file_inner_path): 915 raise VerifyError("File not allowed: %s" % file_inner_path) 916 917 if rules.get("files_allowed_optional"): 918 for file_inner_path in list(content.get("files_optional", {}).keys()): 919 if not SafeRe.match(r"^%s$" % rules["files_allowed_optional"], file_inner_path): 920 raise VerifyError("Optional file not allowed: %s" % file_inner_path) 921 922 # Check if content includes allowed 923 if rules.get("includes_allowed") is False and content.get("includes"): 924 raise VerifyError("Includes not allowed") 925 926 return True # All good 927 928 # Verify file validity 929 # Return: None = Same as before, False = Invalid, True = Valid 930 def verifyFile(self, inner_path, file, ignore_same=True): 931 if inner_path.endswith("content.json"): # content.json: Check using sign 932 from Crypt import CryptBitcoin 933 try: 934 if type(file) is dict: 935 new_content = file 936 else: 937 try: 938 if sys.version_info.major == 3 and sys.version_info.minor < 6: 939 new_content = json.loads(file.read().decode("utf8")) 940 else: 941 new_content = json.load(file) 942 except Exception as err: 943 raise VerifyError("Invalid json file: %s" % err) 944 if inner_path in self.contents: 945 old_content = self.contents.get(inner_path, {"modified": 0}) 946 # Checks if its newer the ours 947 if old_content["modified"] == new_content["modified"] and ignore_same: # Ignore, have the same content.json 948 return None 949 elif old_content["modified"] > new_content["modified"]: # We have newer 950 raise VerifyError( 951 "We have newer (Our: %s, Sent: %s)" % 952 (old_content["modified"], new_content["modified"]) 953 ) 954 if new_content["modified"] > time.time() + 60 * 60 * 24: # Content modified in the far future (allow 1 day+) 955 raise VerifyError("Modify timestamp is in the far future!") 956 if self.isArchived(inner_path, new_content["modified"]): 957 if inner_path in self.site.bad_files: 958 del self.site.bad_files[inner_path] 959 raise VerifyError("This file is archived!") 960 # Check sign 961 sign = new_content.get("sign") 962 signs = new_content.get("signs", {}) 963 if "sign" in new_content: 964 del(new_content["sign"]) # The file signed without the sign 965 if "signs" in new_content: 966 del(new_content["signs"]) # The file signed without the signs 967 968 sign_content = json.dumps(new_content, sort_keys=True) # Dump the json to string to remove whitepsace 969 970 # Fix float representation error on Android 971 modified = new_content["modified"] 972 if config.fix_float_decimals and type(modified) is float and not str(modified).endswith(".0"): 973 modified_fixed = "{:.6f}".format(modified).strip("0.") 974 sign_content = sign_content.replace( 975 '"modified": %s' % repr(modified), 976 '"modified": %s' % modified_fixed 977 ) 978 979 if signs: # New style signing 980 valid_signers = self.getValidSigners(inner_path, new_content) 981 signs_required = self.getSignsRequired(inner_path, new_content) 982 983 if inner_path == "content.json" and len(valid_signers) > 1: # Check signers_sign on root content.json 984 signers_data = "%s:%s" % (signs_required, ",".join(valid_signers)) 985 if not CryptBitcoin.verify(signers_data, self.site.address, new_content["signers_sign"]): 986 raise VerifyError("Invalid signers_sign!") 987 988 if inner_path != "content.json" and not self.verifyCert(inner_path, new_content): # Check if cert valid 989 raise VerifyError("Invalid cert!") 990 991 valid_signs = 0 992 for address in valid_signers: 993 if address in signs: 994 valid_signs += CryptBitcoin.verify(sign_content, address, signs[address]) 995 if valid_signs >= signs_required: 996 break # Break if we has enough signs 997 if valid_signs < signs_required: 998 raise VerifyError("Valid signs: %s/%s" % (valid_signs, signs_required)) 999 else: 1000 return self.verifyContent(inner_path, new_content) 1001 else: # Old style signing 1002 raise VerifyError("Invalid old-style sign") 1003 1004 except Exception as err: 1005 self.log.warning("%s: verify sign error: %s" % (inner_path, Debug.formatException(err))) 1006 raise err 1007 1008 else: # Check using sha512 hash 1009 file_info = self.getFileInfo(inner_path) 1010 if file_info: 1011 if CryptHash.sha512sum(file) != file_info.get("sha512", ""): 1012 raise VerifyError("Invalid hash") 1013 1014 if file_info.get("size", 0) != file.tell(): 1015 raise VerifyError( 1016 "File size does not match %s <> %s" % 1017 (inner_path, file.tell(), file_info.get("size", 0)) 1018 ) 1019 1020 return True 1021 1022 else: # File not in content.json 1023 raise VerifyError("File not in content.json") 1024 1025 def optionalDelete(self, inner_path): 1026 self.site.storage.delete(inner_path) 1027 1028 def optionalDownloaded(self, inner_path, hash_id, size=None, own=False): 1029 if size is None: 1030 size = self.site.storage.getSize(inner_path) 1031 1032 done = self.hashfield.appendHashId(hash_id) 1033 self.site.settings["optional_downloaded"] += size 1034 return done 1035 1036 def optionalRemoved(self, inner_path, hash_id, size=None): 1037 if size is None: 1038 size = self.site.storage.getSize(inner_path) 1039 done = self.hashfield.removeHashId(hash_id) 1040 1041 self.site.settings["optional_downloaded"] -= size 1042 return done 1043 1044 def optionalRenamed(self, inner_path_old, inner_path_new): 1045 return True