5 Commits

Author SHA1 Message Date
1a14d13303 build: Add SIMD optimizations to compare.c
Add SIMD (Single Instruction, Multiple Data) optimizations to the
compare.c file to improve performance. This includes vectorization
flags and architecture-specific optimizations to enhance the
processing of data in the comparison function. Additionally, the
implementation now dynamically selects an optimized code path based
on the size of the pattern data, ensuring efficient execution for
various scenarios.

By Lixfel
2024-04-27 20:07:46 +02:00
9c8551dc5a Move to i32 2023-12-27 20:49:21 +01:00
abb2084029 Move to i32 2023-12-27 20:44:37 +01:00
3eba89b119 Move to i32 2023-12-27 20:43:50 +01:00
511ce04f0b new Matcher 2023-12-26 21:28:46 +01:00
10 changed files with 228 additions and 34 deletions

2
.gitignore vendored Normal file → Executable file
View File

@@ -3,3 +3,5 @@
/.idea/
/SchemSearch.class
/SchemSearch.h
/schemsearch-lib/src/.idea/
/schemsearch-lib/src/cmake-build-debug/

View File

@@ -17,3 +17,4 @@ codegen-units = 1
[profile.release]
lto = true
opt-level = 3

View File

@@ -1,10 +1,10 @@
default:
@echo "Building (Release)...";
cargo rustc --release --color=always -p schemsearch-cli -- -C target-feature=+avx2
cargo rustc --release --color=always -p schemsearch-cli -- -C target-cpu=native
sql:
@echo "Building (Release)...";
cargo rustc --release --color=always -p schemsearch-cli --features sql -- -C target-feature=+avx2
cargo rustc --release --color=always -p schemsearch-cli --features sql -- -C target-cpu=native
debug:
@echo "Building (Debug)...";

View File

@@ -11,3 +11,6 @@ serde = { version = "1.0.160", features = ["derive"] }
schemsearch-files = { path = "../schemsearch-files" }
named-binary-tag = "0.6"
libmath = "0.2.1"
[build-dependencies]
cc = { version = "1.0.83", features = [] }

11
schemsearch-lib/build.rs Executable file
View File

@@ -0,0 +1,11 @@
use cc;
fn main() {
cc::Build::new()
.file("src/compare.c")
.flag("-ftree-vectorize")
.flag("-march=native")
.flag("-mtune=native")
.flag("-ffast-math")
.compile("compare");
}

View File

@@ -0,0 +1,7 @@
cmake_minimum_required(VERSION 3.27)
project(src C)
set(CMAKE_C_STANDARD 11)
add_executable(src
compare.c)

110
schemsearch-lib/src/compare.c Executable file
View File

@@ -0,0 +1,110 @@
#include <stdint.h>
#include <stddef.h>
#include <stdlib.h>
int32_t isMatching(
const int32_t *schem_data,
const int32_t *pattern_data,
size_t pattern_data_length,
int32_t x,
int32_t y,
int32_t z,
int32_t schem_width,
int32_t schem_length,
int32_t pattern_width,
int32_t pattern_height,
int32_t pattern_length,
int32_t *w_ptr
) {
for (int j = 0; j < pattern_height; ++j) {
for (int k = 0; k < pattern_length; ++k) {
int pattern_index_pre = k * pattern_width + j * pattern_width * pattern_length;
int schem_index_pre = x + (k + z) * schem_width + (j + y) * schem_width * schem_length;
for (int i = 0; i < pattern_width; ++i) {
int pattern_index = i + pattern_index_pre;
int schem_index = i + schem_index_pre;
w_ptr[pattern_index] = schem_data[schem_index];
}
}
}
int32_t matching = 0;
for (int i = 0; i < pattern_data_length; ++i) {
matching += w_ptr[i] == pattern_data[i];
}
return matching;
}
void is_matching_all(
const int32_t *__restrict__ schem_data,
const int32_t *__restrict__ pattern_data,
int32_t schem_width,
int32_t schem_height,
int32_t schem_length,
int32_t pattern_width,
int32_t pattern_height,
int32_t pattern_length,
int32_t *__restrict__ result
) {
if(pattern_width*pattern_height*pattern_length >= 65536) { //TODO check for table size < 65536
for (int32_t pz = 0; pz < pattern_length; ++pz) {
int32_t maxZ = schem_length - pattern_length + pz + 1;
for (int32_t py = 0; py < pattern_height; ++py) {
int32_t maxY = schem_height - pattern_height + py + 1;
for (int32_t px = 0; px < pattern_width; ++px) {
int32_t pv = pattern_data[px + py * pattern_width + pz * pattern_width * pattern_height];
int32_t maxX = schem_width - pattern_width + px + 1;
for (int32_t z = pz; z < maxZ; ++z) {
int32_t sourceOffsetZ = z * schem_width * schem_height;
int32_t resultOffsetZ = (z - pz) * schem_width * schem_height - py * schem_width;
for (int32_t y = py; y < maxY; ++y) {
int32_t sourceOffsetY = sourceOffsetZ + y * schem_width;
int32_t resultOffsetY = resultOffsetZ + y * schem_width - px;
for (int32_t x = px; x < maxX; ++x) {
result[resultOffsetY + x] += schem_data[sourceOffsetY + x] == pv;
}
}
}
}
}
}
} else {
size_t schem_size = schem_width*schem_height*schem_length;
uint16_t *__restrict__ sschem_data = (uint16_t*)malloc(schem_size*2);
uint16_t *__restrict__ sresult = (uint16_t*)malloc(schem_size*2);
for(size_t i = 0; i < schem_size; i++) {
sschem_data[i] = schem_data[i];
sresult[i] = 0;
}
for (int32_t pz = 0; pz < pattern_length; ++pz) {
int32_t maxZ = schem_length - pattern_length + pz + 1;
for (int32_t py = 0; py < pattern_height; ++py) {
int32_t maxY = schem_height - pattern_height + py + 1;
for (int32_t px = 0; px < pattern_width; ++px) {
uint16_t pv = (uint16_t)pattern_data[px + py * pattern_width + pz * pattern_width * pattern_height];
int32_t maxX = schem_width - pattern_width + px + 1;
for (int32_t z = pz; z < maxZ; ++z) {
int32_t sourceOffsetZ = z * schem_width * schem_height;
int32_t resultOffsetZ = (z - pz) * schem_width * schem_height - py * schem_width;
for (int32_t y = py; y < maxY; ++y) {
int32_t sourceOffsetY = sourceOffsetZ + y * schem_width;
int32_t resultOffsetY = resultOffsetZ + y * schem_width - px;
for (int32_t x = px; x < maxX; ++x) {
sresult[resultOffsetY + x] += sschem_data[sourceOffsetY + x] == pv;
}
}
}
}
}
}
for(size_t i = 0; i < schem_size; i++) {
result[i] = sresult[i];
}
free(sschem_data);
free(sresult);
}
}

View File

@@ -33,6 +33,36 @@ pub struct SearchBehavior {
pub threshold: f32,
}
extern "C" {
pub fn isMatching(
schem_data: *const i32,
pattern_data: *const i32,
pattern_data_length: usize,
x: usize,
y: usize,
z: usize,
schem_width: usize,
schem_length: usize,
pattern_width: usize,
pattern_height: usize,
pattern_length: usize,
w_ptr: *mut i32,
) -> i32;
pub fn is_matching_all(
schem_data: *const i32,
pattern_data: *const i32,
schem_width: i32,
schem_height: i32,
schem_length: i32,
pattern_width: i32,
pattern_height: i32,
pattern_length: i32,
result: *mut i32
);
}
pub fn search(
schem: SpongeSchematic,
pattern_schem: &SpongeSchematic,
@@ -62,7 +92,8 @@ pub fn search(
let air_id = if search_behavior.ignore_air || search_behavior.air_as_any { pattern_schem.palette.get("minecraft:air").unwrap_or(&-1) } else { &-1};
let pattern_blocks = pattern_schem.block_data.len() as f32;
let pattern_blocks_usize = pattern_schem.block_data.len();
let pattern_blocks = pattern_blocks_usize as f32;
let i_pattern_blocks = pattern_blocks as i32;
let pattern_width = pattern_schem.width as usize;
@@ -75,44 +106,73 @@ pub fn search(
let skip_amount = ceil((pattern_blocks * (1.0 - search_behavior.threshold)) as f64, 0) as i32;
for y in 0..=schem_height - pattern_height {
/*for y in 0..=schem_height - pattern_height {
for z in 0..=schem_length - pattern_length {
for x in 0..=schem_width - pattern_width {
let mut not_matching = 0;
'outer:
for j in 0..pattern_height {
for k in 0..pattern_length {
'inner:
for i in 0..pattern_width {
let index = (x + i) + schem_width * ((z + k) + (y + j) * schem_length);
let pattern_index = i + pattern_width * (k + j * pattern_length);
let data = unsafe { *schem_data.add(index) };
let pattern_data = unsafe { *pattern_data.add(pattern_index) };
if (search_behavior.ignore_air && data != *air_id) || (search_behavior.air_as_any && pattern_data != *air_id) {
continue 'inner;
}
if data != pattern_data {
not_matching += 1;
if not_matching >= skip_amount {
break 'outer;
}
}
}
}
}
let matching_count;
unsafe {
matching_count = isMatching(
schem_data,
pattern_data,
pattern_blocks_usize,
x,
y,
z,
schem_width,
schem_length,
pattern_width,
pattern_height,
pattern_length,
w_ptr,
);
};
if not_matching < skip_amount {
matches.push(Match {
x: x as u16,
y: y as u16,
z: z as u16,
percent: (i_pattern_blocks - not_matching) as f32 / pattern_blocks,
});
if matching_count >= i_pattern_blocks - skip_amount {
let percent = matching_count as f32 / pattern_blocks;
if percent >= search_behavior.threshold {
matches.push(Match {
x: x as u16,
y: y as u16,
z: z as u16,
percent,
});
}
}
}
}
}*/
let mut result = Vec::<i32>::with_capacity(schem_width * schem_height * schem_length);
result.resize(schem_width * schem_height * schem_length, 0);
unsafe {
is_matching_all(
schem_data,
pattern_data,
schem_width as i32,
schem_height as i32,
schem_length as i32,
pattern_width as i32,
pattern_height as i32,
pattern_length as i32,
result.as_mut_ptr()
);
}
result.into_iter().enumerate().filter(|(_, matching_count)| *matching_count >= i_pattern_blocks - skip_amount).for_each(|(i, matching_count)| {
let percent = matching_count as f32 / pattern_blocks;
let x = i % schem_width;
let y = (i / schem_width) % schem_height;
let z = i / (schem_width * schem_height);
matches.push(Match {
x: x as u16,
y: y as u16,
z: z as u16,
percent,
});
});
return matches;
}

BIN
tests/Pattern.nbt Normal file

Binary file not shown.

BIN
tests/Random.nbt Normal file

Binary file not shown.