"""Miscellaneous functions used for analyses."""
import logging
import multiprocessing as mp
import os
import nibabel as nib
import numpy as np
from neuromaps.datasets import fetch_atlas
from nibabel.gifti import GiftiDataArray
from scipy.cluster.hierarchy import leaves_list, linkage, optimal_leaf_ordering
from sklearn.metrics import pairwise_distances
LGR = logging.getLogger(__name__)
# Number of vertices in total without the medial wall
N_VERTICES = {
"fsLR": {
"32k": 59412,
"164k": 298261,
},
"fsaverage": {
"3k": 4661,
"10k": 18715,
"41k": 74947,
"164k": 299881,
},
"civet": {
"41k": 76910,
},
}
# Number of vertices per hemisphere including the medial wall
N_VERTICES_PH = {
"fsLR": {
"32k": 32492,
"164k": 163842,
},
"fsaverage": {
"3k": 2562,
"10k": 10242,
"41k": 40962,
"164k": 163842,
},
"civet": {
"41k": 40962,
},
}
def _reorder_matrix(mat, reorder):
"""Reorder a matrix.
This function reorders the provided matrix. It was adaptes from
nilearn.plotting.plot_matrix._reorder_matrix.
License
-------
New BSD License
Copyright (c) 2007 - 2023 The nilearn developers.
Redistribution and use in source and binary forms, with or without
modification, are permitted provided that the following conditions are met:
a. Redistributions of source code must retain the above copyright notice,
this list of conditions and the following disclaimer.
b. Redistributions in binary form must reproduce the above copyright
notice, this list of conditions and the following disclaimer in the
documentation and/or other materials provided with the distribution.
c. Neither the name of the nilearn developers nor the names of
its contributors may be used to endorse or promote products
derived from this software without specific prior written
permission.
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE FOR
ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH
DAMAGE.
"""
reorder = "average" if reorder is True else reorder
linkage_matrix = linkage(mat, method=reorder)
ordered_linkage = optimal_leaf_ordering(linkage_matrix, mat)
return leaves_list(ordered_linkage)
[docs]
def get_data_dir(data_dir=None):
"""Get path to gradec data directory.
Parameters
----------
data_dir : str, optional
Path to use as data directory. If not specified, will check for
environmental variable 'GRADEC_DATA'; if that is not set, will
use `~/gradec-data` instead. Default: None
Returns
-------
data_dir : str
Path to use as data directory
Notes
-----
Taken from Neuromaps.
https://github.com/netneurolab/neuromaps/blob/abf5a5c3d3d011d644b56ea5c6a3953cedd80b37/
neuromaps/datasets/utils.py#LL91C1-L115C20
"""
if data_dir is None:
data_dir = os.environ.get("GRADEC_DATA", os.path.join("~", "gradec-data"))
data_dir = os.path.expanduser(data_dir)
if not os.path.exists(data_dir):
os.makedirs(data_dir)
return data_dir
def _rm_medial_wall(
data_lh,
data_rh,
space="fsLR",
density="32k",
join=True,
neuromaps_dir=None,
):
"""Remove medial wall from data in fsLR space.
Data in 32k fs_LR space (e.g., Human Connectome Project data) often in
GIFTI format include the medial wall in their data arrays, which results
in a total of 64984 vertices across hemispheres. This function removes
the medial wall vertices to produce a data array with the full 59412 vertices,
which is used to perform functional decoding.
This function was adapted from :func:`surfplot.utils.add_fslr_medial_wall`.
Parameters
----------
data : numpy.ndarray
Surface vertices. Must have exactly 32492 vertices per hemisphere.
join : bool
Return left and right hemipsheres in the same arrays. Default: True.
Returns
-------
numpy.ndarray
Vertices with medial wall excluded (59412 vertices total).
ValueError
------
`data` has the incorrect number of vertices (59412 or 64984 only
accepted)
"""
assert data_lh.shape[0] == N_VERTICES_PH[space][density]
assert data_rh.shape[0] == N_VERTICES_PH[space][density]
atlas = fetch_atlas(space, density, data_dir=neuromaps_dir, verbose=0)
medial_lh, medial_rh = atlas["medial"]
wall_lh = nib.load(medial_lh).agg_data()
wall_rh = nib.load(medial_rh).agg_data()
data_lh = data_lh[np.where(wall_lh != 0)]
data_rh = data_rh[np.where(wall_rh != 0)]
if not join:
return data_lh, data_rh
data = np.hstack((data_lh, data_rh))
assert data.shape[0] == N_VERTICES[space][density]
return data
def _zero_medial_wall(data_lh, data_rh, space="fsLR", density="32k", neuromaps_dir=None):
"""Remove medial wall from data in fsLR space."""
atlas = fetch_atlas(space, density, data_dir=neuromaps_dir, verbose=0)
medial_lh, medial_rh = atlas["medial"]
medial_arr_lh = nib.load(medial_lh).agg_data()
medial_arr_rh = nib.load(medial_rh).agg_data()
data_arr_lh = data_lh.agg_data()
data_arr_rh = data_rh.agg_data()
data_arr_lh[np.where(medial_arr_lh == 0)] = 0
data_arr_rh[np.where(medial_arr_rh == 0)] = 0
data_lh.remove_gifti_data_array(0)
data_rh.remove_gifti_data_array(0)
data_lh.add_gifti_data_array(GiftiDataArray(data_arr_lh))
data_rh.add_gifti_data_array(GiftiDataArray(data_arr_rh))
return data_lh, data_rh
def _affinity(matrix, sparsity):
"""Compute affinity matrix from a matrix of vectors."""
# Generate percentile thresholds for 90th percentile
perc = np.array([np.percentile(x, sparsity) for x in matrix])
# Threshold each row of the matrix by setting values below 90th percentile to 0
for i in range(matrix.shape[0]):
matrix[i, matrix[i, :] < perc[i]] = 0
matrix[matrix < 0] = 0
# Now we are dealing with sparse vectors. Cosine similarity is used as affinity metric
matrix = 1 - pairwise_distances(matrix, metric="cosine")
return matrix
def _get_resource_path():
"""Return the path to general resources, terminated with separator.
Resources are kept outside package folder in "datasets".
Based on function by Yaroslav Halchenko used in Neurosynth Python package.
"""
return os.path.abspath(os.path.join(os.path.dirname(__file__), "resources") + os.path.sep)
def _check_ncores(n_cores):
"""Check number of cores used for method. Taken from nimare.utils."""
if n_cores <= 0:
n_cores = mp.cpu_count()
elif n_cores > mp.cpu_count():
LGR.warning(
f"Desired number of cores ({n_cores}) greater than number "
f"available ({mp.cpu_count()}). Setting to {mp.cpu_count()}."
)
n_cores = mp.cpu_count()
return n_cores
def _conform_features(features, model_nm, n_top_words):
"""Conform features to a standard format."""
if model_nm in ["lda", "gclda"]:
# TODO: this need to be fixed for LDA, since the some topic are filtered by the threshold.
return [f"{i+1}_{'_'.join(feature[:n_top_words])}" for i, feature in enumerate(features)]
else:
return [feature[0] for feature in features]
def _decoding_filter(
corrs_df,
features,
classification,
pos_corr=True,
freq_by_topic=None,
class_by_topic=None,
class_to_keep=["Functional"],
):
"""Filter decoding results by classification."""
keep = np.array([c_i for c_i, class_ in enumerate(classification) if class_ in class_to_keep])
if pos_corr:
keep = np.intersect1d(keep, np.where(corrs_df > 0)[0])
filtered_df = corrs_df.iloc[keep]
if freq_by_topic is None and class_by_topic is None:
features_flat = [item for sublist in features for item in sublist]
filtered_features = np.array(features_flat, dtype=object)[keep]
return filtered_df, list(filtered_features)
features = np.array(features, dtype=object)[keep]
freq_by_topic = np.array(freq_by_topic, dtype=object)[keep]
class_by_topic = np.array(class_by_topic, dtype=object)[keep]
classification = np.array(classification, dtype=object)[keep]
filtered_features = []
filtered_frequencies = []
for features_wtt, frequencies_wtt, class_wtt, class_ in zip(
features, freq_by_topic, class_by_topic, classification
):
keep_wtt = np.array(
[c_i for c_i, sub_class in enumerate(class_wtt) if sub_class in class_to_keep]
)
if class_ in class_to_keep:
features_to_keep = np.array(features_wtt)[keep_wtt]
frequencies_to_keep = np.array(frequencies_wtt)[keep_wtt]
filtered_features.append(list(features_to_keep))
filtered_frequencies.append(list(frequencies_to_keep))
return filtered_df, filtered_features, filtered_frequencies