mirror of
https://github.com/Chaoscaot/schemsearch.git
synced 2025-12-27 00:17:06 +01:00
Refactor code structure and improve performance by optimizing OpenCL kernel and adding timing macros; update Cargo.toml for release profile settings; enhance main.rs and sinks.rs for better readability and organization.
This commit is contained in:
@@ -1,51 +1,35 @@
|
||||
__kernel void add(__global int* result,
|
||||
__global uint* schem,
|
||||
__global uint* pattern,
|
||||
const int p_width,
|
||||
const int p_height,
|
||||
const int p_depth,
|
||||
const uint air_id,
|
||||
const int ignore_air,
|
||||
const int air_as_any,
|
||||
const int skipamount) {
|
||||
int x = get_global_id(0);
|
||||
int y = get_global_id(1);
|
||||
int z = get_global_id(2);
|
||||
|
||||
int width = get_global_size(0);
|
||||
int height = get_global_size(1);
|
||||
int depth = get_global_size(2);
|
||||
|
||||
if (x > width - p_width || y > height - p_height || z > depth - p_depth) {
|
||||
return;
|
||||
}
|
||||
|
||||
int wrong_blocks = 0;
|
||||
for (int py = 0; py < p_height; py++) {
|
||||
for (int pz = 0; pz < p_depth; pz++) {
|
||||
for (int px = 0; px < p_width; px++) {
|
||||
int s_idx = (x + px) + width * ((z + pz) + (y + py) * depth);
|
||||
int p_idx = px + p_width * (pz + py * p_depth);
|
||||
|
||||
uint schem_block = schem[s_idx];
|
||||
uint pattern_block = pattern[p_idx];
|
||||
// Use 3d_img
|
||||
// Weniger Allocs an Buffern
|
||||
// Pattern Parallelisieren mit Local Workern?
|
||||
// To Match on GPU
|
||||
// Weniger Worker, Mehr Parameter!
|
||||
// Pattern als Kernel Konstante
|
||||
|
||||
if ((ignore_air && schem_block != air_id) || (air_as_any && pattern_block != air_id)) {
|
||||
continue;
|
||||
}
|
||||
__kernel void add(__global int *result, __global uint *schem,
|
||||
__constant uint *pattern, const int width, const int height,
|
||||
const int depth, const int p_width, const int p_height,
|
||||
const int p_depth, const uint air_id, const int ignore_air,
|
||||
const int air_as_any, const int skipamount) {
|
||||
int x = get_global_id(0);
|
||||
int y = get_global_id(2);
|
||||
int z = get_global_id(1);
|
||||
|
||||
if (schem_block != pattern_block) {
|
||||
wrong_blocks++;
|
||||
if (wrong_blocks > skipamount) {
|
||||
int idx = x + z * width + y * width * depth;
|
||||
result[idx] = wrong_blocks;
|
||||
return;
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
int wrong_blocks = 0;
|
||||
for (int py = 0; py < p_height; py++) {
|
||||
for (int pz = 0; pz < p_depth; pz++) {
|
||||
for (int px = 0; px < p_width; px++) {
|
||||
// if ((ignore_air && schem_block != air_id) || (air_as_any &&
|
||||
// pattern_block != air_id)) {
|
||||
// continue; // TODO: PROBLEM!
|
||||
// }
|
||||
|
||||
wrong_blocks +=
|
||||
schem[(x + px) + width * ((z + pz) + (y + py) * depth)] !=
|
||||
pattern[px + p_width * (pz + py * p_depth)];
|
||||
}
|
||||
}
|
||||
|
||||
int idx = x + z * width + y * width * depth;
|
||||
result[idx] = wrong_blocks;
|
||||
}
|
||||
|
||||
int idx = x + z * width + y * width * depth;
|
||||
result[idx] = wrong_blocks;
|
||||
}
|
||||
|
||||
@@ -1,8 +1,9 @@
|
||||
use std::sync::OnceLock;
|
||||
use ocl::{Buffer, MemFlags, ProQue, core};
|
||||
use ocl::SpatialDims::Three;
|
||||
use schemsearch_common::{Match, SearchBehavior};
|
||||
use math::round::ceil;
|
||||
use ocl::SpatialDims::Three;
|
||||
use ocl::{core, Buffer, Image, MemFlags, ProQue};
|
||||
use schemsearch_common::{time, Match, SearchBehavior};
|
||||
use std::sync::OnceLock;
|
||||
use std::time;
|
||||
|
||||
const KERNEL: &str = include_str!("kernel.cl");
|
||||
|
||||
@@ -20,7 +21,15 @@ pub fn ocl_search(
|
||||
air_id: i32,
|
||||
search_behavior: SearchBehavior,
|
||||
) -> Result<Vec<Match>, String> {
|
||||
search_ocl(schem, schem_size, pattern, pattern_size, air_id, search_behavior).map_err(|e| e.to_string())
|
||||
search_ocl(
|
||||
schem,
|
||||
schem_size,
|
||||
pattern,
|
||||
pattern_size,
|
||||
air_id,
|
||||
search_behavior,
|
||||
)
|
||||
.map_err(|e| e.to_string())
|
||||
}
|
||||
|
||||
fn search_ocl(
|
||||
@@ -38,59 +47,79 @@ fn search_ocl(
|
||||
let schem_width = schem_size[0];
|
||||
let schem_height = schem_size[1];
|
||||
let schem_length = schem_size[2];
|
||||
|
||||
|
||||
let pattern_blocks = (pattern_width * pattern_height * pattern_length) as f32;
|
||||
|
||||
let skip_amount = ceil((pattern_blocks * (1.0 - search_behavior.threshold)) as f64, 0) as i32;
|
||||
|
||||
let skip_amount = ceil(
|
||||
(pattern_blocks * (1.0 - search_behavior.threshold)) as f64,
|
||||
0,
|
||||
) as i32;
|
||||
|
||||
let cell = &PRO_QUEU_CELL;
|
||||
let mut pro_que = cell.get_or_init(|| {
|
||||
ProQue::builder()
|
||||
.src(KERNEL)
|
||||
.build().unwrap()
|
||||
}).clone();
|
||||
let mut pro_que = time!(get_pro_que, {
|
||||
cell.get_or_init(|| ProQue::builder().src(KERNEL).build().unwrap())
|
||||
.clone()
|
||||
});
|
||||
|
||||
pro_que.set_dims(Three(schem_width, schem_length, schem_height));
|
||||
|
||||
pro_que.set_dims(Three(schem_width, schem_height, schem_length));
|
||||
let buffer = time!(create_result_buffer, {
|
||||
Buffer::builder()
|
||||
.queue(pro_que.queue().clone())
|
||||
.flags(MemFlags::new().read_write())
|
||||
.fill_val(-1)
|
||||
.len(schem.len())
|
||||
.build()
|
||||
})?;
|
||||
|
||||
let buffer = Buffer::builder()
|
||||
.queue(pro_que.queue().clone())
|
||||
.flags(MemFlags::new().read_write())
|
||||
.fill_val(-1)
|
||||
.len(schem.len())
|
||||
.build()?;
|
||||
let schem_buffer = time!(create_schen_buffer, {
|
||||
create_schem_buffer(schem, &pro_que)
|
||||
})?;
|
||||
|
||||
let schem_buffer = create_schem_buffer(schem, &pro_que)?;
|
||||
let pattern_buffer = time!(create_pattern_buffer, {
|
||||
create_schem_buffer(pattern, &pro_que)
|
||||
})?;
|
||||
|
||||
let pattern_buffer = create_schem_buffer(pattern, &pro_que)?;
|
||||
let kernel = time!(create_kernel, {
|
||||
pro_que
|
||||
.kernel_builder("add")
|
||||
.arg(&buffer)
|
||||
.arg(&schem_buffer)
|
||||
.arg(&pattern_buffer)
|
||||
.arg(schem_width as i32)
|
||||
.arg(schem_height as i32)
|
||||
.arg(schem_length as i32)
|
||||
.arg(pattern_width as i32)
|
||||
.arg(pattern_height as i32)
|
||||
.arg(pattern_length as i32)
|
||||
.arg(air_id)
|
||||
.arg(search_behavior.ignore_air as u32)
|
||||
.arg(search_behavior.air_as_any as u32)
|
||||
.arg(skip_amount)
|
||||
.build()
|
||||
})?;
|
||||
|
||||
let kernel = pro_que.kernel_builder("add")
|
||||
.arg(&buffer)
|
||||
.arg(&schem_buffer)
|
||||
.arg(&pattern_buffer)
|
||||
.arg(pattern_width as i32)
|
||||
.arg(pattern_height as i32)
|
||||
.arg(pattern_length as i32)
|
||||
.arg(air_id) // air_id
|
||||
.arg(search_behavior.ignore_air as u32) // ignore_air
|
||||
.arg(search_behavior.air_as_any as u32) // air_as_any
|
||||
.arg(skip_amount)
|
||||
.build()?;
|
||||
unsafe {
|
||||
time!(run_kernel, { kernel.enq() })?;
|
||||
}
|
||||
|
||||
unsafe { kernel.enq()?; }
|
||||
let mut vec = vec![0; buffer.len()];
|
||||
time!(read_buffer, {
|
||||
buffer.read(&mut vec).enq()?;
|
||||
});
|
||||
|
||||
let mut vec = vec![0i32; buffer.len()];
|
||||
buffer.read(&mut vec).enq()?;
|
||||
|
||||
Ok(vec.into_iter().enumerate().filter(|(_, v)| *v < skip_amount && *v != -1).map(|(i, v)| {
|
||||
Match {
|
||||
Ok(vec
|
||||
.into_iter()
|
||||
.enumerate()
|
||||
.filter(|(_, v)| *v < skip_amount && *v != -1)
|
||||
.map(|(i, v)| Match {
|
||||
x: (i % schem_width) as u16,
|
||||
y: ((i / (schem_width * schem_length)) % schem_height) as u16,
|
||||
z: ((i / schem_width) % schem_length) as u16,
|
||||
|
||||
percent: (pattern_blocks - v as f32) / pattern_blocks,
|
||||
}
|
||||
}).collect())
|
||||
})
|
||||
.collect())
|
||||
}
|
||||
|
||||
fn create_schem_buffer(pattern: &[i32], pro_que: &ProQue) -> ocl::Result<Buffer<i32>> {
|
||||
@@ -98,6 +127,7 @@ fn create_schem_buffer(pattern: &[i32], pro_que: &ProQue) -> ocl::Result<Buffer<
|
||||
.queue(pro_que.queue().clone())
|
||||
.flags(MemFlags::new().read_only())
|
||||
.len(pattern.len())
|
||||
// Host Memory Map?
|
||||
.copy_host_slice(pattern)
|
||||
.build()
|
||||
}
|
||||
}
|
||||
|
||||
Reference in New Issue
Block a user