Source code for pysgems.dis.sgdis

#  Copyright (c) 2020. Robin Thibaut, Ghent University

import os
import shutil
import time
from os.path import join as jp

import numpy as np
import pandas as pd
from loguru import logger

from pysgems.base.packbase import Package


[docs]def blocks_from_rc( rows: np.array, columns: np.array, layers: np.array, xo: float = 0, yo: float = 0, zo: float = 0, ): """ Yields blocks defining grid cells :param rows: array of x-widths along a row :param columns: array of y-widths along a column :param layers: array of z-widths along a column :param xo: x origin :param yo: y origin :param zo: z origin :return: generator of (cell node number, block vertices coordinates, block center) """ nrow = len(rows) ncol = len(columns) nlay = len(layers) delr = rows delc = columns dell = layers r_sum = np.cumsum(delr) + yo c_sum = np.cumsum(delc) + xo l_sum = np.cumsum(dell) + zo def get_node(r: int, c: int, h: int) -> int: """ Get node index to fix hard data :param r: row number :param c: column number :param h: layer number :return: node number """ nrc = nrow * ncol return int((h * nrc) + (r * ncol) + c) for k in range(nlay): for i in range(nrow): for j in range(ncol): b = [ [c_sum[j] - delc[j], r_sum[i] - delr[i], l_sum[k] - dell[k]], [c_sum[j], r_sum[i] - delr[i], l_sum[k] - dell[k]], [c_sum[j] - delc[j], r_sum[i], l_sum[k] - dell[k]], [c_sum[j], r_sum[i], l_sum[k] - dell[k]], [c_sum[j] - delc[j], r_sum[i] - delr[i], l_sum[k]], [c_sum[j], r_sum[i] - delr[i], l_sum[k]], [c_sum[j] - delc[j], r_sum[i], l_sum[k]], [c_sum[j], r_sum[i], l_sum[k]], ] yield get_node(i, j, k), np.array(b), np.mean(b, axis=0)
[docs]class Discretize(Package):
[docs] def __init__( self, project, dx: float = 1, dy: float = 1, dz: float = 1, xo: float = None, yo: float = None, zo: float = None, x_lim: float = None, y_lim: float = None, z_lim: float = None, ): """ Constructs the grid geometry. The user can not control directly the number of rows and columns but instead inputs the cell size in x and y dimensions. xo, yo, x_lim, y_lim, defining the bounding box of the grid, are None by default, and are computed based on the data points distribution. :param project: sgems project :param dx: cell size in x dimension :param dy: cell size in y dimension :param dz: cell size in z dimension :param xo: x origin :param yo: y origin :param zo: z origin :param x_lim: x limit :param y_lim: y limit :param z_lim: z limit """ Package.__init__(self, project) self.cell_file = None self.dx = dx self.dy = dy self.dz = dz if self.parent.point_set is not None: if self.parent.point_set.dimension == 2: self.dz = 0 # Grid origins # If a PointSet is already loaded into the project, automatically defines the limits # (if no limits are given) if xo is None: if self.parent.point_set is not None: xs = self.parent.point_set.dataframe["x"] xo = np.min(xs) - 4 * self.dx else: xo = 0 if yo is None: if self.parent.point_set is not None: ys = self.parent.point_set.dataframe["y"] yo = np.min(ys) - 4 * self.dy else: yo = 0 if zo is None: if self.parent.point_set is not None: zs = self.parent.point_set.dataframe["z"] zo = np.min(zs) - 4 * self.dz else: zo = 0 # Grid limits if x_lim is None: if self.parent.point_set is not None: xs = self.parent.point_set.dataframe["x"] x_lim = np.max(xs) + 4 * self.dx else: x_lim = 1 if y_lim is None: if self.parent.point_set is not None: ys = self.parent.point_set.dataframe["y"] y_lim = np.max(ys) + 4 * self.dy else: y_lim = 1 if z_lim is None: if self.parent.point_set is not None: zs = self.parent.point_set.dataframe["z"] z_lim = np.max(zs) + 4 * self.dz else: z_lim = 1 # Cell dimensions if self.dy > 0: nrow = abs(int((y_lim - yo) // self.dy)) # Number of rows else: nrow = 1 if self.dx > 0: ncol = abs(int((x_lim - xo) // self.dx)) # Number of columns else: ncol = 1 if self.dz > 0: nlay = abs(int((z_lim - zo) // self.dz)) # Number of layers else: nlay = 1 # Size of each cell along y-dimension - rows along_r = np.ones(ncol) * self.dx * np.sign(x_lim) # Size of each cell along x-dimension - columns along_c = np.ones(nrow) * self.dy * np.sign(y_lim) # Size of each cell along x-dimension - columns along_l = np.ones(nlay) * self.dz * np.sign(z_lim) self.xo = xo self.yo = yo self.zo = zo self.x_lim = x_lim self.y_lim = y_lim self.z_lim = z_lim self.nrow = nrow self.ncol = ncol self.nlay = nlay self.along_r = along_r self.along_c = along_c self.along_l = along_l setattr(self.parent, "dis", self)
[docs] def my_cell(self, xyz: np.array): """ Given a point coordinate xyz [x, y, z], computes its cell number by computing the euclidean distance of each cell center. :param xyz: x, y, z coordinate of data point :return: cell number """ start = time.time() rn = np.array(xyz) # first check if point is within the grid # crit = 0 # if np.all(rn >= np.array([self.xo, self.yo, self.zo])) and np.all( # rn <= np.array([self.x_lim, self.y_lim, self.z_lim]) # ): # crit = 1 crit = 1 if crit: # if point inside if self.parent.point_set.dimension == 3: # if 3D # minimum distance under which a point is in a cell dmin = np.min([self.dx, self.dy, self.dz]) / 2 else: # if 2D # minimum distance under which a point is in a cell dmin = np.min([self.dx, self.dy]) / 2 blocks = blocks_from_rc( self.along_c, self.along_r, self.along_l, self.xo, self.yo, self.zo ) # cell generator # mapping data points to cells: # slow but memory-effective method vmin = np.inf cell = None for b in blocks: c = b[2] dc = np.linalg.norm(rn - c) # Euclidean distance if dc <= dmin: # If point is inside cell logger.info("found 1 cell id in {} s".format(time.time() - start)) return b[0] if dc < vmin: vmin = dc cell = b[0] logger.info("found 1 cell id in {} s".format(time.time() - start)) return cell else: return self.parent.nodata
[docs] def compute_cells(self, xyz: np.array): """ Determines cell location for each data point. It is necessary to know the cell number to assign the hard data property to the sgems grid. :param xyz: Data points x, y, z coordinates """ nodes = np.array([self.my_cell(c) for c in xyz]) # Save to nodes to avoid recomputing each time np.save(self.cell_file, nodes)
[docs] def write_hard_data( self, subdata: pd.DataFrame, dis_file: str = None, cell_file: str = None, output_dir: str = None, ): """ Removes no-data rows from data frame and compute the mean of data points sharing the same cell. Export the list of shape (n features, m cells, 2) containing the node of each point data with the corresponding value, for each feature. :param dis_file: path to the dis file :param cell_file: path to the cell file :param subdata: Pandas dataframe whose columns are the values of features to save as hard data. :param output_dir: Directory where hard data lists will be saved :return: Filtered list of each attribute """ if cell_file is not None: self.cell_file = cell_file # path to the cell file if self.cell_file is None: self.cell_file = jp( self.parent.res_dir, "cells.npy" ) # path to the cell file if output_dir is None: output_dir = self.parent.res_dir # path to the parent directory if dis_file is None: dis_file = jp( os.path.dirname(self.cell_file), "grid.dis" ) # path to the dis file xyz = subdata[["x", "y", "z"]].to_numpy() # coordinates of data points npar = np.array( [ self.dx, self.dy, self.dz, self.xo, self.yo, self.zo, self.x_lim, self.y_lim, self.z_lim, self.nrow, self.ncol, self.nlay, ] ) # grid parameters if os.path.isfile(dis_file): # Check previous grid info pdis = np.loadtxt(dis_file) # Load previous grid info # If different, recompute data points cell by deleting previous cell file if not np.array_equal(pdis, npar): logger.info("New grid found") try: os.remove(self.cell_file) except FileNotFoundError: pass finally: np.savetxt(dis_file, npar) self.compute_cells(xyz) # Compute cell number for each data point else: logger.info("Using previous grid") if not os.path.exists(self.cell_file): self.compute_cells(xyz) # Compute cell number for each data point else: np.savetxt(dis_file, npar) # Save grid info self.compute_cells(xyz) # Compute cell number for each data point data_nodes = np.load(self.cell_file) # Load cell number for each data point unique_nodes = list(set(data_nodes)) # List of unique cell numbers h = subdata.columns.values[-1] # Name of the attribute to save as hard data hd = [] # fixed nodes = [[node i, value i]....] fixed_nodes = np.array( [[data_nodes[dn], subdata[h][dn]] for dn in range(len(data_nodes))] ) # List of cell number and corresponding value # Deletes points where val == nodata hard_data = np.delete( fixed_nodes, np.where(fixed_nodes == self.parent.nodata)[0], axis=0 ) # Delete no-data points # If data points share the same cell, compute their mean and assign the value to the cell for n in unique_nodes: where = np.where(hard_data[:, 0] == n)[0] if len(where) > 1: # If more than 1 point per cell mean = np.mean(hard_data[where, 1]) hard_data[where, 1] = mean fn = hard_data.tolist() # For each, feature X, saves a file X.hard cell_values_name = jp(os.path.dirname(self.cell_file), f"{h}.hard") with open(cell_values_name, "w") as nd: nd.write(repr(fn)) # Save list of cell number and corresponding value try: shutil.copyfile( cell_values_name, cell_values_name.replace( os.path.dirname(cell_values_name), output_dir ), ) except shutil.SameFileError: pass setattr(self.parent, "hard_data", hd.append(h))