Source code for bmtk.utils.converters.sonata.edge_converters

import os
from functools import partial
import numpy as np
import pandas as pd
import h5py

from bmtk.utils.sonata.utils import add_hdf5_magic, add_hdf5_version


column_renames = {
    'params_file': 'dynamics_params',
    'level_of_detail': 'model_type',
    'morphology': 'morphology',
    'x_soma': 'x',
    'y_soma': 'y',
    'z_soma': 'z',
    'weight_max': 'syn_weight',
    'set_params_function': 'model_template'
}


[docs]def convert_edges(edges_file, edge_types_file, **params): is_flat_h5 = False is_new_h5 = False try: h5file = h5py.File(edges_file, 'r') if 'edges' in h5file: is_new_h5 = True elif 'num_syns' in h5file: is_flat_h5 = True except Exception as e: pass if is_flat_h5: update_aibs_edges(edges_file, edge_types_file, **params) return elif is_new_h5: update_h5_edges(edges_file, edge_types_file, **params) return try: edges_csv2h5(edges_file, **params) return except Exception as exc: raise exc raise Exception('Could not parse edges file')
[docs]def update_edge_types_file(edge_types_file, src_network=None, trg_network=None, output_dir='network'): edge_types_csv = pd.read_csv(edge_types_file, sep=' ') # rename required columns edge_types_csv = edge_types_csv.rename(index=str, columns=column_renames) edge_types_output_fn = os.path.join(output_dir, '{}_{}_edge_types.csv'.format(src_network, trg_network)) edge_types_csv.to_csv(edge_types_output_fn, sep=' ', index=False, na_rep='NONE')
[docs]def update_h5_edges(edges_file, edge_types_file, src_network=None, population_name=None, trg_network=None, output_dir='network'): population_name = population_name if population_name is not None else '{}_to_{}'.format(src_network, trg_network) input_h5 = h5py.File(edges_file, 'r') if not os.path.exists(output_dir): os.makedirs(output_dir) edges_output_fn = os.path.join(output_dir, '{}_{}_edges.h5'.format(src_network, trg_network)) with h5py.File(edges_output_fn, 'w') as h5: edges_path = '/edges/{}'.format(population_name) h5.copy(input_h5['/edges'], edges_path) grp = h5[edges_path] grp.move('source_gid', 'source_node_id') grp.move('target_gid', 'target_node_id') grp.move('edge_group', 'edge_group_id') if 'network' in grp['source_node_id'].attrs: del grp['source_node_id'].attrs['network'] grp['source_node_id'].attrs['node_population'] = src_network if 'network' in grp['target_node_id'].attrs: del grp['target_node_id'].attrs['network'] grp['target_node_id'].attrs['node_population'] = trg_network create_index(input_h5['edges/target_gid'], grp, index_type=INDEX_TARGET) create_index(input_h5['edges/source_gid'], grp, index_type=INDEX_SOURCE) update_edge_types_file(edge_types_file, src_network, trg_network, output_dir)
[docs]def update_aibs_edges(edges_file, edge_types_file, trg_network, src_network, population_name=None, output_dir='output'): population_name = population_name if population_name is not None else '{}_to_{}'.format(src_network, trg_network) edges_h5 = h5py.File(edges_file, 'r') src_gids = edges_h5['/src_gids'] n_edges = len(src_gids) trg_gids = np.zeros(n_edges, dtype=np.uint64) start = edges_h5['/edge_ptr'][0] for trg_gid, end in enumerate(edges_h5['/edge_ptr'][1:]): trg_gids[start:end] = [trg_gid]*(end-start) start = end edges_output_fn = os.path.join(output_dir, '{}_{}_edges.h5'.format(src_network, trg_network)) if not os.path.exists(output_dir): os.mkdir(output_dir) with h5py.File(edges_output_fn, 'w') as hf: add_hdf5_magic(hf) add_hdf5_version(hf) grp = hf.create_group('/edges/{}'.format(population_name)) grp.create_dataset('target_node_id', data=trg_gids, dtype='uint64') grp['target_node_id'].attrs['node_population'] = trg_network grp.create_dataset('source_node_id', data=edges_h5['src_gids'], dtype='uint64') grp['source_node_id'].attrs['node_population'] = src_network grp.create_dataset('edge_group_id', data=np.zeros(n_edges), dtype='uint32') grp.create_dataset('edge_group_index', data=np.arange(0, n_edges)) grp.create_dataset('edge_type_id', data=edges_h5['edge_types']) grp.create_dataset('0/nsyns', data=edges_h5['num_syns'], dtype='uint32') grp.create_group('0/dynamics_params') create_index(trg_gids, grp, index_type=INDEX_TARGET) create_index(src_gids, grp, index_type=INDEX_SOURCE) update_edge_types_file(edge_types_file, src_network, trg_network, output_dir)
[docs]def edges_csv2h5(edges_file, edge_types_file, src_network, src_nodes, src_node_types, trg_network, trg_nodes, trg_node_types, output_dir='network', src_label='location', trg_label='pop_name'): """Used to convert oldest (isee engine) edges files :param edges_file: :param edge_types_file: :param src_network: :param src_nodes: :param src_node_types: :param trg_network: :param trg_nodes: :param trg_node_types: :param output_dir: :param src_label: :param trg_label: """ column_renames = { 'target_model_id': 'node_type_id', 'weight': 'weight_max', 'weight_function': 'weight_func', } columns_order = ['edge_type_id', 'target_query', 'source_query'] edges_h5 = h5py.File(edges_file, 'r') edge_types_df = pd.read_csv(edge_types_file, sep=' ') n_edges = len(edges_h5['src_gids']) n_targets = len(edges_h5['indptr']) - 1 # rename specified columns in edge-types edge_types_df = edge_types_df.rename(columns=column_renames) # Add a "target_query" and "source_query" columns from target_label and source_label def query_col(row, labels, search_col): return '&'.join("{}=='{}'".format(l, row[search_col]) for l in labels) trg_query_fnc = partial(query_col, labels=['node_type_id', trg_label], search_col='target_label') src_query_fnc = partial(query_col, labels=[src_label], search_col='source_label') edge_types_df['target_query'] = edge_types_df.apply(trg_query_fnc, axis=1) edge_types_df['source_query'] = edge_types_df.apply(src_query_fnc, axis=1) # Add an edge_type_id column edge_types_df['edge_type_id'] = np.arange(100, 100 + len(edge_types_df.index), dtype='uint32') nodes_tmp = pd.read_csv(src_nodes, sep=' ', index_col=['id']) node_types_tmp = pd.read_csv(src_node_types, sep=' ') src_nodes_df = pd.merge(nodes_tmp, node_types_tmp, on='model_id') nodes_tmp = pd.read_csv(trg_nodes, sep=' ', index_col=['id']) node_types_tmp = pd.read_csv(trg_node_types, sep=' ') trg_nodes_df = pd.merge(nodes_tmp, node_types_tmp, on='model_id') # For assigning edge types to each edge. For a given src --> trg pair we need to lookup source_label and # target_label values of the nodes, then use it to find the corresponding edge_types row. print('Processing edge_type_id dataset') edge_types_ids = np.zeros(n_edges, dtype='uint32') edge_types_df = edge_types_df.set_index(['node_type_id', 'target_label', 'source_label']) ten_percent = int(n_targets*.1) # for keepting track of progress index = 0 # keeping track of row index for trg_gid in range(n_targets): # for the target find value node_type_id and target_label nodes_row = trg_nodes_df.loc[trg_gid] model_id = nodes_row['model_id'] trg_label_val = nodes_row[trg_label] # iterate through all the sources idx_begin = edges_h5['indptr'][trg_gid] idx_end = edges_h5['indptr'][trg_gid+1] for src_gid in edges_h5['src_gids'][idx_begin:idx_end]: # find each source_label, use value to find edge_type_id # TODO: may be faster to filter by model_id, trg_label_val before iterating through the sources src_label_val = src_nodes_df.loc[src_gid][src_label] edge_type_id = edge_types_df.loc[model_id, trg_label_val, src_label_val]['edge_type_id'] edge_types_ids[index] = edge_type_id index += 1 if trg_gid % ten_percent == 0 and trg_gid != 0: print(' processed {} out of {} targets'.format(trg_gid, n_targets)) # Create the target_gid table print('Creating target_gid dataset') trg_gids = np.zeros(n_edges) for trg_gid in range(n_targets): idx_begin = edges_h5['indptr'][trg_gid] idx_end = edges_h5['indptr'][trg_gid+1] trg_gids[idx_begin:idx_end] = [trg_gid]*(idx_end - idx_begin) # Save edges.h5 edges_output_fn = '{}/{}_{}_edges.h5'.format(output_dir, src_network, trg_network) print('Saving edges to {}.'.format(edges_output_fn)) with h5py.File(edges_output_fn, 'w') as hf: hf.create_dataset('edges/target_gid', data=trg_gids, dtype='uint64') hf['edges/target_gid'].attrs['node_population'] = trg_network hf.create_dataset('edges/source_gid', data=edges_h5['src_gids'], dtype='uint64') hf['edges/source_gid'].attrs['node_population'] = trg_network hf.create_dataset('edges/index_pointer', data=edges_h5['indptr']) hf.create_dataset('edges/edge_group', data=np.zeros(n_edges), dtype='uint32') hf.create_dataset('edges/edge_group_index', data=np.arange(0, n_edges)) hf.create_dataset('edges/edge_type_id', data=edge_types_ids) hf.create_dataset('edges/0/nsyns', data=edges_h5['nsyns'], dtype='uint32') # Save edge_types.csv update_edge_types_file(edge_types_file, src_network, trg_network, output_dir) ''' edges_types_output_fn = '{}/{}_{}_edge_types.csv'.format(output_dir, src_network, trg_network) print('Saving edge-types to {}'.format(edges_types_output_fn)) edge_types_df = edge_types_df[edge_types_df['edge_type_id'].isin(np.unique(edge_types_ids))] # reorder columns reorderd_cols = columns_order + [cn for cn in edge_types_df.columns.tolist() if cn not in columns_order] edge_types_df = edge_types_df[reorderd_cols] edge_types_df.to_csv(edges_types_output_fn, sep=' ', index=False, na_rep='NONE') '''
INDEX_TARGET = 0 INDEX_SOURCE = 1
[docs]def create_index(node_ids_ds, output_grp, index_type=INDEX_TARGET): if index_type == INDEX_TARGET: edge_nodes = np.array(node_ids_ds, dtype=np.int64) output_grp = output_grp.create_group('indicies/target_to_source') elif index_type == INDEX_SOURCE: edge_nodes = np.array(node_ids_ds, dtype=np.int64) output_grp = output_grp.create_group('indicies/source_to_target') edge_nodes = np.append(edge_nodes, [-1]) n_targets = np.max(edge_nodes) ranges_list = [[] for _ in range(n_targets + 1)] n_ranges = 0 begin_index = 0 cur_trg = edge_nodes[begin_index] for end_index, trg_gid in enumerate(edge_nodes): if cur_trg != trg_gid: ranges_list[cur_trg].append((begin_index, end_index)) cur_trg = int(trg_gid) begin_index = end_index n_ranges += 1 node_id_to_range = np.zeros((n_targets+1, 2)) range_to_edge_id = np.zeros((n_ranges, 2)) range_index = 0 for node_index, trg_ranges in enumerate(ranges_list): if len(trg_ranges) > 0: node_id_to_range[node_index, 0] = range_index for r in trg_ranges: range_to_edge_id[range_index, :] = r range_index += 1 node_id_to_range[node_index, 1] = range_index output_grp.create_dataset('range_to_edge_id', data=range_to_edge_id, dtype='uint64') output_grp.create_dataset('node_id_to_range', data=node_id_to_range, dtype='uint64')