CIMA-csv-converter.sh
This script is intended to
- pull the data group number from the CSV file title
- indentify and pull the group index ("Index"), event number ("Event") and mass ("M" or "Mt") from the file contents
- construct the
Masterclass.Events
primary key o_no
- write these to
CIMA-master.csv
in a format mirroring that of the Events
table in preparation for import into it.
#!/bin/bash
# Put all csv files to be evaluated in one directory, with no other files.
# Run this from the parent directory, with the csv directory as an argument
# i.e.,
# ~/cima/CIMA-csv-converter.sh
# ~/cima/csv-files/(csv files)
# Running
# username:~/cima$ ./CIMA-csv-converter.sh csv-files/
# will produce /cima/CIMA-master.csv
thisdir=$PWD
# The size of each data group (assumed uniform):
groupSize=100
# clear the output file:
> $thisdir/CIMA-master.csv
# Write the column header line
echo "o_no,g_no,g_index,ev_no,mass" >> $thisdir/CIMA-master.csv
# Run through all of the .csv files in the argument directory
for csv in $(ls $thisdir/$1); do
# TODO: check for .csv extension, break if not there
# Find the group number from the filename
# Index of the dash immediately following group number
# (should be 14, 15, or 16)
dash=`expr index "$csv" -`
# group number starts at index 12
groupNo=${csv:12:($dash-13)}
# Import first row as $(#head -n 1 $1/$csv)
# Turn that csv list of column headers into array $columns[]
IFS=, read -r -a columns <<< "$(head -n 1 $1/$csv)"
# Find where Event, Mass and Index values are in $columns[]
# ${!columns[*]} is an array of all indices in $columns;
# i.e. 0,1,2,...
for i in ${!columns[*]}; do
if [ ${columns[$i]} == Event ]; then
iEvent=$i
fi
if [ ${columns[$i]} == M ]; then
iMass=$i
elif [ ${columns[$i]} == Mt ]; then
iMass=$i
fi
if [ ${columns[$i]} == Index ]; then
iIndex=$i
fi
done
# Read the rest of the file line-by-line
while IFS= read -r -a dataline
do
# Break the line into array $values[]
IFS=, read -r -a values <<< "$dataline"
oNo=$(( (($groupNo-1)*$groupSize)+${values[$iIndex]} ))
# Print those values relevant to the Events table of the
# CIMA Masterclass database
# o_no (primary key), g_no, g_index, event_no, mass
echo "$oNo,$groupNo,${values[$iIndex]},${values[$iEvent]},${values[$iMass]}" >> $thisdir/CIMA-master.csv
done < <(tail -n +2 $1/$csv)
# (tail -n +2 returns everything but the first line)
done
-- Main.JoelG - 2017-03-03