region filter works

This commit is contained in:
2023-06-21 00:48:10 +02:00
commit dc94ea45fc
12 changed files with 1187 additions and 0 deletions

9
.cargo/config Normal file
View File

@@ -0,0 +1,9 @@
[build]
rustflags = ["-C", "target-cpu=native"]
rustdocflags = ["-C", "target-cpu=native"]
[target.wasm32-unknown-unknown]
rustflags = ["-C", "target-feature=+simd128"]
[target.wasm32-wasi]
rustflags = ["-C", "target-feature=+simd128"]

1
.envrc Normal file
View File

@@ -0,0 +1 @@
use flake

624
Cargo.lock generated Normal file
View File

@@ -0,0 +1,624 @@
# This file is automatically @generated by Cargo.
# It is not intended for manual editing.
version = 3
[[package]]
name = "ahash"
version = "0.8.3"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "2c99f64d1e06488f620f932677e24bc6e2897582980441ae90a671415bd7ec2f"
dependencies = [
"cfg-if",
"once_cell",
"version_check",
]
[[package]]
name = "ansi_term"
version = "0.12.1"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "d52a9bb7ec0cf484c551830a7ce27bd20d67eac647e1befb56b0be4ee39a55d2"
dependencies = [
"winapi",
]
[[package]]
name = "anyhow"
version = "1.0.71"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "9c7d0618f0e0b7e8ff11427422b64564d5fb0be1940354bfe2e0529b18a9d9b8"
[[package]]
name = "atomic-polyfill"
version = "0.1.11"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "e3ff7eb3f316534d83a8a2c3d1674ace8a5a71198eba31e2e2b597833f699b28"
dependencies = [
"critical-section",
]
[[package]]
name = "atty"
version = "0.2.14"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "d9b39be18770d11421cdb1b9947a45dd3f37e93092cbf377614828a319d5fee8"
dependencies = [
"hermit-abi",
"libc",
"winapi",
]
[[package]]
name = "autocfg"
version = "1.1.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "d468802bab17cbc0cc575e9b053f41e72aa36bfa6b7f55e3529ffa43161b97fa"
[[package]]
name = "bitflags"
version = "1.3.2"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "bef38d45163c2f1dde094a7dfd33ccf595c92905c8f8f4fdc18d06fb1037718a"
[[package]]
name = "byteorder"
version = "1.4.3"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "14c189c53d098945499cdfa7ecc63567cf3886b3332b312a5b4585d8d3a6a610"
[[package]]
name = "cc"
version = "1.0.79"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "50d30906286121d95be3d479533b458f87493b30a4b5f79a607db8f5d11aa91f"
dependencies = [
"jobserver",
]
[[package]]
name = "cfg-if"
version = "1.0.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "baf1de4339761588bc0619e3cbc0120ee582ebb74b53b4efbf79117bd2da40fd"
[[package]]
name = "clap"
version = "2.34.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "a0610544180c38b88101fecf2dd634b174a62eef6946f84dfc6a7127512b381c"
dependencies = [
"ansi_term",
"atty",
"bitflags",
"strsim",
"textwrap",
"unicode-width",
"vec_map",
]
[[package]]
name = "critical-section"
version = "1.1.1"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "6548a0ad5d2549e111e1f6a11a6c2e2d00ce6a3dafe22948d67c2b443f775e52"
[[package]]
name = "edalyze"
version = "0.1.0"
dependencies = [
"anyhow",
"rstar",
"simd-json",
"structopt",
"zstd",
]
[[package]]
name = "float-cmp"
version = "0.9.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "98de4bbd547a563b716d8dfa9aad1cb19bfab00f4fa09a6a4ed21dbcf44ce9c4"
dependencies = [
"num-traits",
]
[[package]]
name = "halfbrown"
version = "0.2.2"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "f985624e90f861184145c13b736873a0f83cdb998a292dbb0653598ab03aecbf"
dependencies = [
"hashbrown",
"serde",
]
[[package]]
name = "hash32"
version = "0.2.1"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "b0c35f58762feb77d74ebe43bdbc3210f09be9fe6742234d573bacc26ed92b67"
dependencies = [
"byteorder",
]
[[package]]
name = "hashbrown"
version = "0.13.2"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "43a3c133739dddd0d2990f9a4bdf8eb4b21ef50e4851ca85ab661199821d510e"
dependencies = [
"ahash",
]
[[package]]
name = "heapless"
version = "0.7.16"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "db04bc24a18b9ea980628ecf00e6c0264f3c1426dac36c00cb49b6fbad8b0743"
dependencies = [
"atomic-polyfill",
"hash32",
"rustc_version",
"spin",
"stable_deref_trait",
]
[[package]]
name = "heck"
version = "0.3.3"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "6d621efb26863f0e9924c6ac577e8275e5e6b77455db64ffa6c65c904e9e132c"
dependencies = [
"unicode-segmentation",
]
[[package]]
name = "hermit-abi"
version = "0.1.19"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "62b467343b94ba476dcb2500d242dadbb39557df889310ac77c5d99100aaac33"
dependencies = [
"libc",
]
[[package]]
name = "itoa"
version = "1.0.6"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "453ad9f582a441959e5f0d088b02ce04cfe8d51a8eaf077f12ac6d3e94164ca6"
[[package]]
name = "jobserver"
version = "0.1.26"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "936cfd212a0155903bcbc060e316fb6cc7cbf2e1907329391ebadc1fe0ce77c2"
dependencies = [
"libc",
]
[[package]]
name = "lazy_static"
version = "1.4.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "e2abad23fbc42b3700f2f279844dc832adb2b2eb069b2df918f455c4e18cc646"
[[package]]
name = "lexical-core"
version = "0.8.5"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "2cde5de06e8d4c2faabc400238f9ae1c74d5412d03a7bd067645ccbc47070e46"
dependencies = [
"lexical-parse-float",
"lexical-parse-integer",
"lexical-util",
"lexical-write-float",
"lexical-write-integer",
]
[[package]]
name = "lexical-parse-float"
version = "0.8.5"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "683b3a5ebd0130b8fb52ba0bdc718cc56815b6a097e28ae5a6997d0ad17dc05f"
dependencies = [
"lexical-parse-integer",
"lexical-util",
"static_assertions",
]
[[package]]
name = "lexical-parse-integer"
version = "0.8.6"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "6d0994485ed0c312f6d965766754ea177d07f9c00c9b82a5ee62ed5b47945ee9"
dependencies = [
"lexical-util",
"static_assertions",
]
[[package]]
name = "lexical-util"
version = "0.8.5"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "5255b9ff16ff898710eb9eb63cb39248ea8a5bb036bea8085b1a767ff6c4e3fc"
dependencies = [
"static_assertions",
]
[[package]]
name = "lexical-write-float"
version = "0.8.5"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "accabaa1c4581f05a3923d1b4cfd124c329352288b7b9da09e766b0668116862"
dependencies = [
"lexical-util",
"lexical-write-integer",
"static_assertions",
]
[[package]]
name = "lexical-write-integer"
version = "0.8.5"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "e1b6f3d1f4422866b68192d62f77bc5c700bee84f3069f2469d7bc8c77852446"
dependencies = [
"lexical-util",
"static_assertions",
]
[[package]]
name = "libc"
version = "0.2.146"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "f92be4933c13fd498862a9e02a3055f8a8d9c039ce33db97306fd5a6caa7f29b"
[[package]]
name = "libm"
version = "0.2.7"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "f7012b1bbb0719e1097c47611d3898568c546d597c2e74d66f6087edd5233ff4"
[[package]]
name = "lock_api"
version = "0.4.10"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "c1cc9717a20b1bb222f333e6a92fd32f7d8a18ddc5a3191a11af45dcbf4dcd16"
dependencies = [
"autocfg",
"scopeguard",
]
[[package]]
name = "num-traits"
version = "0.2.15"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "578ede34cf02f8924ab9447f50c28075b4d3e5b269972345e7e0372b38c6cdcd"
dependencies = [
"autocfg",
"libm",
]
[[package]]
name = "once_cell"
version = "1.18.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "dd8b5dd2ae5ed71462c540258bedcb51965123ad7e7ccf4b9a8cafaa4a63576d"
[[package]]
name = "pkg-config"
version = "0.3.27"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "26072860ba924cbfa98ea39c8c19b4dd6a4a25423dbdf219c1eca91aa0cf6964"
[[package]]
name = "proc-macro-error"
version = "1.0.4"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "da25490ff9892aab3fcf7c36f08cfb902dd3e71ca0f9f9517bea02a73a5ce38c"
dependencies = [
"proc-macro-error-attr",
"proc-macro2",
"quote",
"syn 1.0.109",
"version_check",
]
[[package]]
name = "proc-macro-error-attr"
version = "1.0.4"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "a1be40180e52ecc98ad80b184934baf3d0d29f979574e439af5a55274b35f869"
dependencies = [
"proc-macro2",
"quote",
"version_check",
]
[[package]]
name = "proc-macro2"
version = "1.0.60"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "dec2b086b7a862cf4de201096214fa870344cf922b2b30c167badb3af3195406"
dependencies = [
"unicode-ident",
]
[[package]]
name = "quote"
version = "1.0.28"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "1b9ab9c7eadfd8df19006f1cf1a4aed13540ed5cbc047010ece5826e10825488"
dependencies = [
"proc-macro2",
]
[[package]]
name = "rstar"
version = "0.11.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "73111312eb7a2287d229f06c00ff35b51ddee180f017ab6dec1f69d62ac098d6"
dependencies = [
"heapless",
"num-traits",
"smallvec",
]
[[package]]
name = "rustc_version"
version = "0.4.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "bfa0f585226d2e68097d4f95d113b15b83a82e819ab25717ec0590d9584ef366"
dependencies = [
"semver",
]
[[package]]
name = "ryu"
version = "1.0.13"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "f91339c0467de62360649f8d3e185ca8de4224ff281f66000de5eb2a77a79041"
[[package]]
name = "scopeguard"
version = "1.1.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "d29ab0c6d3fc0ee92fe66e2d99f700eab17a8d57d1c1d3b748380fb20baa78cd"
[[package]]
name = "semver"
version = "1.0.17"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "bebd363326d05ec3e2f532ab7660680f3b02130d780c299bca73469d521bc0ed"
[[package]]
name = "serde"
version = "1.0.164"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "9e8c8cf938e98f769bc164923b06dce91cea1751522f46f8466461af04c9027d"
dependencies = [
"serde_derive",
]
[[package]]
name = "serde_derive"
version = "1.0.164"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "d9735b638ccc51c28bf6914d90a2e9725b377144fc612c49a611fddd1b631d68"
dependencies = [
"proc-macro2",
"quote",
"syn 2.0.18",
]
[[package]]
name = "serde_json"
version = "1.0.97"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "bdf3bf93142acad5821c99197022e170842cdbc1c30482b98750c688c640842a"
dependencies = [
"itoa",
"ryu",
"serde",
]
[[package]]
name = "simd-json"
version = "0.10.3"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "a3d0815e7ff0f1f05e09d4b029f86d8a330f0ab15b35b28736f3758325f59e14"
dependencies = [
"halfbrown",
"lexical-core",
"serde",
"serde_json",
"simdutf8",
"value-trait",
]
[[package]]
name = "simdutf8"
version = "0.1.4"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "f27f6278552951f1f2b8cf9da965d10969b2efdea95a6ec47987ab46edfe263a"
[[package]]
name = "smallvec"
version = "1.10.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "a507befe795404456341dfab10cef66ead4c041f62b8b11bbb92bffe5d0953e0"
[[package]]
name = "spin"
version = "0.9.8"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "6980e8d7511241f8acf4aebddbb1ff938df5eebe98691418c4468d0b72a96a67"
dependencies = [
"lock_api",
]
[[package]]
name = "stable_deref_trait"
version = "1.2.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "a8f112729512f8e442d81f95a8a7ddf2b7c6b8a1a6f509a95864142b30cab2d3"
[[package]]
name = "static_assertions"
version = "1.1.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "a2eb9349b6444b326872e140eb1cf5e7c522154d69e7a0ffb0fb81c06b37543f"
[[package]]
name = "strsim"
version = "0.8.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "8ea5119cdb4c55b55d432abb513a0429384878c15dde60cc77b1c99de1a95a6a"
[[package]]
name = "structopt"
version = "0.3.26"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "0c6b5c64445ba8094a6ab0c3cd2ad323e07171012d9c98b0b15651daf1787a10"
dependencies = [
"clap",
"lazy_static",
"structopt-derive",
]
[[package]]
name = "structopt-derive"
version = "0.4.18"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "dcb5ae327f9cc13b68763b5749770cb9e048a99bd9dfdfa58d0cf05d5f64afe0"
dependencies = [
"heck",
"proc-macro-error",
"proc-macro2",
"quote",
"syn 1.0.109",
]
[[package]]
name = "syn"
version = "1.0.109"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "72b64191b275b66ffe2469e8af2c1cfe3bafa67b529ead792a6d0160888b4237"
dependencies = [
"proc-macro2",
"quote",
"unicode-ident",
]
[[package]]
name = "syn"
version = "2.0.18"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "32d41677bcbe24c20c52e7c70b0d8db04134c5d1066bf98662e2871ad200ea3e"
dependencies = [
"proc-macro2",
"quote",
"unicode-ident",
]
[[package]]
name = "textwrap"
version = "0.11.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "d326610f408c7a4eb6f51c37c330e496b08506c9457c9d34287ecc38809fb060"
dependencies = [
"unicode-width",
]
[[package]]
name = "unicode-ident"
version = "1.0.9"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "b15811caf2415fb889178633e7724bad2509101cde276048e013b9def5e51fa0"
[[package]]
name = "unicode-segmentation"
version = "1.10.1"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "1dd624098567895118886609431a7c3b8f516e41d30e0643f03d94592a147e36"
[[package]]
name = "unicode-width"
version = "0.1.10"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "c0edd1e5b14653f783770bce4a4dabb4a5108a5370a5f5d8cfe8710c361f6c8b"
[[package]]
name = "value-trait"
version = "0.6.1"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "09a5b6c8ceb01263b969cac48d4a6705134d490ded13d889e52c0cfc80c6945e"
dependencies = [
"float-cmp",
"halfbrown",
"itoa",
"ryu",
]
[[package]]
name = "vec_map"
version = "0.8.2"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "f1bddf1187be692e79c5ffeab891132dfb0f236ed36a43c7ed39f1165ee20191"
[[package]]
name = "version_check"
version = "0.9.4"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "49874b5167b65d7193b8aba1567f5c7d93d001cafc34600cee003eda787e483f"
[[package]]
name = "winapi"
version = "0.3.9"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "5c839a674fcd7a98952e593242ea400abe93992746761e38641405d28b00f419"
dependencies = [
"winapi-i686-pc-windows-gnu",
"winapi-x86_64-pc-windows-gnu",
]
[[package]]
name = "winapi-i686-pc-windows-gnu"
version = "0.4.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "ac3b87c63620426dd9b991e5ce0329eff545bccbbb34f3be09ff6fb6ab51b7b6"
[[package]]
name = "winapi-x86_64-pc-windows-gnu"
version = "0.4.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "712e227841d057c1ee1cd2fb22fa7e5a5461ae8e48fa2ca79ec42cfc1931183f"
[[package]]
name = "zstd"
version = "0.11.2+zstd.1.5.2"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "20cc960326ece64f010d2d2107537f26dc589a6573a316bd5b1dba685fa5fde4"
dependencies = [
"zstd-safe",
]
[[package]]
name = "zstd-safe"
version = "5.0.2+zstd.1.5.2"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "1d2a5585e04f9eea4b2a3d1eca508c4dee9592a89ef6f450c11719da0726f4db"
dependencies = [
"libc",
"zstd-sys",
]
[[package]]
name = "zstd-sys"
version = "2.0.8+zstd.1.5.5"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "5556e6ee25d32df2586c098bbfa278803692a20d0ab9565e049480d52707ec8c"
dependencies = [
"cc",
"libc",
"pkg-config",
]

13
Cargo.toml Normal file
View File

@@ -0,0 +1,13 @@
[package]
name = "edalyze"
version = "0.1.0"
edition = "2021"
# See more keys and their definitions at https://doc.rust-lang.org/cargo/reference/manifest.html
[dependencies]
structopt = "0.3"
simd-json = "0.10"
rstar = "0.11"
zstd = "0.11"
anyhow = "1"

41
flake.lock generated Normal file
View File

@@ -0,0 +1,41 @@
{
"nodes": {
"flake-utils": {
"locked": {
"lastModified": 1659877975,
"narHash": "sha256-zllb8aq3YO3h8B/U0/J1WBgAL8EX5yWf5pMj3G0NAmc=",
"owner": "numtide",
"repo": "flake-utils",
"rev": "c0e246b9b83f637f4681389ecabcb2681b4f3af0",
"type": "github"
},
"original": {
"owner": "numtide",
"repo": "flake-utils",
"type": "github"
}
},
"nixpkgs": {
"locked": {
"lastModified": 1673606088,
"narHash": "sha256-wdYD41UwNwPhTdMaG0AIe7fE1bAdyHe6bB4HLUqUvck=",
"owner": "NixOS",
"repo": "nixpkgs",
"rev": "37b97ae3dd714de9a17923d004a2c5b5543dfa6d",
"type": "github"
},
"original": {
"id": "nixpkgs",
"type": "indirect"
}
},
"root": {
"inputs": {
"flake-utils": "flake-utils",
"nixpkgs": "nixpkgs"
}
}
},
"root": "root",
"version": 7
}

21
flake.nix Normal file
View File

@@ -0,0 +1,21 @@
{
description = "A very basic flake";
inputs.nixpkgs.url = "nixpkgs";
inputs.flake-utils.url = "github:numtide/flake-utils";
outputs = { self, nixpkgs, flake-utils }: flake-utils.lib.eachDefaultSystem (system:
let
pkgs = nixpkgs.legacyPackages.${system};
in {
devShells.default = pkgs.mkShell {
buildInputs = with pkgs; [
rustup
];
nativeBuildInputs = with pkgs; [
rust-analyzer
];
};
});
}

72
src/bin/body_types.rs Normal file
View File

@@ -0,0 +1,72 @@
use std::collections::HashSet;
use simd_json::Node;
use edalyze::tapetool::{TapeCursor, Value};
fn parse_body_fueldist(v: Value) -> Option<String> {
let mut name = None;
let mut have_type = false;
for (k,v) in v.as_object()? {
match k {
"type" => if v.as_str()? == "Star" {
have_type = true;
if name.is_some() {
return name.map(str::to_owned)
}
} else {
return None;
},
"subType" => {
let typ = v.as_str()?;
if have_type {
return Some(typ.to_owned())
} else {
name = Some(typ)
}
}
_ => (),
}
}
return None
}
fn parse_system<'a>(coll: &mut HashSet<String>, tape: &'a [Node<'a>]) -> Option<()> {
let mut csr = TapeCursor::new(tape);
let mut obj = csr.next()?.as_object()?;
while let Some((k,v)) = obj.next() {
match k {
"bodies" => {
for typ in v.as_array()?
.filter_map(parse_body_fueldist) {
coll.insert(typ);
}
}
_ => (),
}
}
Some(())
}
fn processor(coll: &mut HashSet<String>, line: &mut [u8]) -> Option<()> {
let tape = simd_json::to_tape(line).ok()?;
parse_system(coll, tape.as_slice())
}
fn main() -> anyhow::Result<()> {
let mut coll = HashSet::new();
edalyze::ioutil::Processor::open_file("/space/data/ed-utils/spansh/galaxy.json.zst", |line| processor(&mut coll, line))?.count();
for typ in coll {
eprintln!("{typ}");
}
Ok(())
}

179
src/bin/regionfilter.rs Normal file
View File

@@ -0,0 +1,179 @@
use std::io::Write;
use anyhow::anyhow;
use simd_json::Node;
use edalyze::tapetool::{TapeCursor, TapeObject, Value};
#[derive(Copy, Clone, Debug, Default)]
pub struct Point(i32, i32, i32);
#[derive(Copy, Clone, Debug)]
pub struct Box(Point, Point);
const IS_FUEL: u8 = 1;
const IS_WDRF: u8 = 2;
const IS_NEUT: u8 = 4;
fn star_typ(sub_type: &str) -> u8 {
if sub_type.starts_with("Wh") {
return IS_WDRF;
} else if sub_type.starts_with("N") {
return IS_NEUT;
} else {
let c2 = &sub_type.as_bytes()[..2];
if b"OBAFGKM".contains(&c2[0]) && c2[1] == b' ' {
return IS_FUEL;
} else {
return 0;
}
}
}
const fn isortp(a: i32, b: i32) -> (i32, i32) {
if a < b { (a,b) } else { (b,a) }
}
impl Box {
pub const fn from_corners(x: Point, y: Point) -> Box {
let (ax, bx) = isortp(x.0, y.0);
let (ay, by) = isortp(x.1, y.1);
let (az, bz) = isortp(x.2, y.2);
let pt1 = Point(ax, ay, az);
let pt2 = Point(bx, by, bz);
Box(pt1, pt2)
}
pub fn contains(&self, x: Point) -> bool {
return
(self.0 .0 .. self.1 .0).contains(&x.0) &&
(self.0 .1 .. self.1 .1).contains(&x.1) &&
(self.0 .2 .. self.1 .2).contains(&x.2);
}
}
#[derive(Default)]
struct System {
coords: Point,
name: String,
stars: u8,
}
fn parse_coords(c: TapeObject) -> Result<Point, &'static str> {
let mut pt = Point::default();
for (k,v) in c {
let v = v.as_f64().ok_or("Coord value must be f64")? as f32;
let v = (v * 32.) as i32;
match k {
"x" => pt.0 = v,
"y" => pt.1 = v,
"z" => pt.2 = v,
_ => return Err("Unexpected coord field"),
}
}
Ok(pt)
}
fn parse_body(v: Value) -> Option<(f32, u8)> {
let mut distance = None;
let mut star_flags = 255;
let mut is_star = false;
for (k,v) in v.as_object()? {
match k {
"type" => {
is_star = v.as_str()? == "Star";
if !is_star {
return None;
}
}
"subType" => star_flags = star_typ(v.as_str()?),
"distanceToArrival" => distance = Some(v.as_f64()? as f32),
_ => continue,
}
if is_star && star_flags != 255 && distance.is_some() {
return Some((distance.unwrap(), star_flags));
}
}
return None
}
fn parse_system<'a>(tape: &'a [Node<'a>]) -> Result<System, &'static str> {
let mut system = System::default();
let mut csr = TapeCursor::new(tape);
let mut obj = csr.next()
.ok_or("Expected a JSON object")?
.as_object()
.ok_or("Expected a JSON object")?;
while let Some((k,v)) = obj.next() {
match k {
"name" => system.name = v.as_str().map(str::to_owned).ok_or("name must be str")?,
"coords" => system.coords = parse_coords(v.as_object().ok_or("coords must be object")?)?,
"bodies" => {
system.stars = v.as_array().ok_or("bodies should be array")?
.filter_map(parse_body)
.map(|x| x.1)
.fold(0, |x,y| x|y)
}
_ => (),
}
}
Ok(system)
}
fn id<T> (x:T) -> T { x }
fn processor_e(line: &mut [u8]) -> anyhow::Result<System> {
let tape = simd_json::to_tape(line)?;
parse_system(tape.as_slice())
.map_err(|err| anyhow!(err))
}
fn processor(line: &mut [u8]) -> Option<System> {
match processor_e(line) {
Ok(v) => Some(v),
Err(e) => {
let json = String::from_utf8_lossy(line).to_owned();
eprintln!("Error: {e}\nIn system: {}", json);
None
}
}
}
//static FILENAME: &str = "/space/data/ed-utils/spansh/galaxy_subset.json.zst";
static FILENAME: &str = "/space/data/ed-utils/spansh/galaxy.json.zst";
fn main() -> anyhow::Result<()> {
let f = edalyze::ioutil::Processor::open_file(FILENAME, processor)?;
let region = Box::from_corners(
Point(11900*32, -3800*32, -11510*32),
Point(17400*32, 3800*32, -6500*32),
);
let mut reg_name = std::io::BufWriter::new(std::fs::File::create("region.nam")?);
let mut reg_data = std::io::BufWriter::new(std::fs::File::create("region.dat")?);
let mut name_pos = 0;
for sys in f.filter_map(id).filter(|sys| region.contains(sys.coords)) {
let hdr = [sys.name.len().min(255) as u8];
if hdr[0] == 255 {
eprintln!("WARNING: Long system name {}", sys.name);
}
reg_name.write(&hdr[..])?;
reg_name.write(&sys.name.as_bytes()[..hdr[0] as usize])?;
reg_data.write(&sys.coords.0 .to_be_bytes()[..])?;
reg_data.write(&sys.coords.1 .to_be_bytes()[..])?;
reg_data.write(&sys.coords.2 .to_be_bytes()[..])?;
let rest = (sys.stars as u32) + (name_pos << 4);
name_pos += hdr[0] as u32 + 1;
reg_data.write(&rest.to_be_bytes()[..])?;
}
Ok(())
}

13
src/bin/sj_tape.rs Normal file
View File

@@ -0,0 +1,13 @@
use std::io::prelude::*;
fn main() -> anyhow::Result<()> {
let mut buf = vec![];
std::io::stdin().read_to_end(&mut buf)?;
let tape = simd_json::to_tape(buf.as_mut())?;
for item in tape {
eprintln!("{item:?}");
}
Ok(())
}

57
src/ioutil.rs Normal file
View File

@@ -0,0 +1,57 @@
use std::{io::{BufRead, BufReader, self, Split}, fs::File, path::Path};
use zstd::Decoder;
/// The type of a record processor
pub struct Processor<T: 'static, F: FnMut(&mut [u8]) -> T, R: BufRead> {
proc_line: F,
in_rdr: Split<R>,
qty_read: usize,
nblocks: usize,
}
pub type ZstdReader = BufReader<Decoder<'static, BufReader<File>>>;
impl<T: 'static, F: FnMut(&mut [u8]) -> T> Processor<T, F, ZstdReader> {
pub fn open_file(f: impl AsRef<Path>, processor: F) -> io::Result<Self> {
let f = std::fs::File::open(f)?;
let reader = zstd::Decoder::new(f)?;
let reader = std::io::BufReader::new(reader);
eprintln!("");
Ok(Processor { proc_line: processor, in_rdr: reader.split(b'\n'), qty_read: 0, nblocks: 0 })
}
}
impl<T: 'static, F: FnMut(&mut [u8]) -> T, R: BufRead> Iterator for Processor<T, F, R> {
type Item = T;
fn next(&mut self) -> Option<Self::Item> {
let mut line = self.in_rdr.next()?.ok()?;
self.qty_read += line.len() + 1;
while let Some(last_chr) = line.pop() {
match last_chr {
b'\n' | b' ' | b'\t' | b'[' | b']' |b',' => (),
_ => {
line.push(last_chr);
break
},
}
}
if line.is_empty() { return self.next(); }
if self.qty_read > CHUNK_SZ {
// Notify each GiB
//
self.nblocks += 1;
self.qty_read -= CHUNK_SZ;
eprintln!("\x1b[1A\x1b[K{} GiB", self.nblocks)
}
return Some((self.proc_line)(line.as_mut_slice()))
}
}
const CHUNK_SZ: usize = 1024 * 1024 * 1024; // 1GiB

2
src/lib.rs Normal file
View File

@@ -0,0 +1,2 @@
pub mod tapetool;
pub mod ioutil;

155
src/tapetool.rs Normal file
View File

@@ -0,0 +1,155 @@
use simd_json::{Node, StaticNode};
#[derive(Copy, Clone)]
pub struct TapeCursor<'a> {
base: usize,
tape: &'a [Node<'a>],
}
#[derive(Copy, Clone)]
pub struct TapeObject<'a>{
csr: TapeCursor<'a>,
remaining: usize,
}
#[derive(Copy, Clone)]
pub struct TapeArray<'a>{
csr: TapeCursor<'a>,
remaining: usize,
}
#[derive(Copy, Clone)]
pub enum Value<'a> {
Static(StaticNode),
Str(&'a str),
Object(TapeObject<'a>),
Array(TapeArray<'a>),
}
impl<'a> Value<'a> {
pub fn as_object(self) -> Option<TapeObject<'a>> {
if let Value::Object(obj) = self {
Some(obj)
} else {
None
}
}
pub fn as_array(self) -> Option<TapeArray<'a>> {
if let Value::Array(obj) = self {
Some(obj)
} else {
None
}
}
pub fn as_str(self) -> Option<&'a str> {
if let Value::Str(s) = self { Some(s) } else { None }
}
pub fn as_f64(self) -> Option<f64> {
match self {
Value::Static(StaticNode::F64(v)) => Some(v),
Value::Static(StaticNode::I64(v)) => Some(v as f64),
Value::Static(StaticNode::U64(v)) => Some(v as f64),
_ => None
}
}
pub fn as_i64(self) -> Option<i64> {
match self {
Value::Static(StaticNode::I64(v)) => Some(v as i64),
Value::Static(StaticNode::U64(v)) => Some(v as i64),
_ => None
}
}
pub fn as_u64(self) -> Option<u64> {
match self {
Value::Static(StaticNode::I64(v)) if v >= 0 => Some(v as u64),
Value::Static(StaticNode::U64(v)) => Some(v as u64),
_ => None
}
}
pub fn as_bool(self) -> Option<bool> {
if let Value::Static(StaticNode::Bool(v)) = self {
Some(v)
} else {
None
}
}
}
impl <'a> TapeCursor<'a> {
pub fn new(tape: &'a [Node<'a>]) -> Self {
let mut csr = TapeCursor{ tape, base: 0 };
if tape.len() >= 2 && tape[0] == Node::Static(StaticNode::Null) {
csr.advance1(); // skip over the initial null.
}
csr
}
fn advance(&mut self, qty: usize) -> Option<&'a [Node<'a>]> {
if self.tape.len() < qty {
return None;
}
let (taken, left) = self.tape.split_at(qty);
self.tape = left;
self.base += qty;
return Some(taken);
}
fn advance1(&mut self) -> Option<Node<'a>> {
Some(self.advance(1)?[0])
}
fn subobj(&mut self, end: usize) -> Option<TapeCursor<'a>> {
let base = self.base;
let size = end - base;
let tape = self.advance(size)?;
Some(TapeCursor { tape, base })
}
}
impl<'a> Iterator for TapeCursor<'a> {
type Item = Value<'a>;
fn next(&mut self) -> Option<Self::Item> {
Some(match self.advance1()? {
Node::String(s) => Value::Str(s),
Node::Static(s) => Value::Static(s),
Node::Object(size, end) => Value::Object(TapeObject{
csr: self.subobj(end)?,
remaining: size,
}),
Node::Array(size, end) => Value::Array(TapeArray{
csr: self.subobj(end)?,
remaining: size,
}),
})
}
}
impl <'a> Iterator for TapeArray<'a> {
type Item = Value<'a>;
fn next(&mut self) -> Option<Self::Item> {
let res = self.csr.next()?;
self.remaining -= 1;
Some(res)
}
}
impl <'a> Iterator for TapeObject<'a> {
type Item = (&'a str, Value<'a>);
fn next(&mut self) -> Option<Self::Item> {
let key = self.csr.next()?;
let value = self.csr.next()?;
if let Value::Str(key) = key {
self.remaining -= 1;
Some((key, value))
} else {
None
}
}
}