#!/usr/bin/env python3
# Above is the shebang line - which tells which program should interpret the rest of the file

# The libraries necessary for the exercises - use no other
import os, sys, re

####################################################################################
# You must fill the functions with code that does the job mentioned.               #
# All the functions must work together coherently for the exercise to be complete. #
# The main exercise function should call the other functions in it's group.        #
####################################################################################

################################
### FUNCTIONS FOR EXERCISE 1 ###
################################

def ex1_loadFile(filename:str, data:dict) -> None:
    # Open the file and load the data into the dict
    pass

def ex1_saveFile(filename: str, data:dict) -> None:
    # Save the dict data into the file
    pass
    
def exercise_1(filelist:tuple, filenameout:str) -> None:
    # Load the files into a common data structure
    # Find the average for each gene
    # Save the data in the output file
    pass

################################
### FUNCTIONS FOR EXERCISE 2 ###
################################

def ex2_loadMatrix(filename:str) -> list:
    # Open the file and read the matrix
    pass

def ex2_transpose(matrix:list) -> list:
    # Transpose the input matrix into a new data structure and return the result
    pass
    
def ex2_saveMatrix(filename:str, matrix:list) -> None:
    # Save the matrix in the file
    pass
    
def exercise_2(filenamein:str, filenameout:str) -> None:
    # Read a matrix from a file
    # Transpose the matrix
    # Save the transposed matrix in file
    pass

################################
### FUNCTIONS FOR EXERCISE 3 ###
################################

def ex3_transpose(matrix:list) -> None:
    # Transpose the matrix in it's orignal data structure. 
    pass
        
def exercise_3(filenamein:str, filenameout:str) -> None:
    # Reuse ex2_loadMatrix and ex2_saveMatrix from previous exercise
    # Read a matrix from a file
    # Transpose the matrix
    # Save the transposed matrix in file
    pass

################################
### FUNCTIONS FOR EXERCISE 4 ###
################################

def ex4_parseFile(accession:str, filename:str) -> tuple[list, list]: # classlist, datalist
    # Read file, parse data into classlist and numbers into datalist
    pass

def ex4_saveFile(classlist:list, datalist:list, filename:str) -> None:
    # Save the datalist in columns based on classlist into file
    pass

def exercise_4(accession:str, filenamein:str, filenameout:str) -> None:
    # Read input file, get a control/cancer class list, and a corresponding data list
    # Complain if no data
    # Save the data in output file - filename constructed from accession + extension '.tab'
    pass

################################
### FUNCTIONS FOR EXERCISE 5 ###
################################

def ex5_normalize(line:str) -> str:
    # Split line, normalize numbers, make and return new line
    pass

def exercise_5(filenamein:str, filenameout:str) -> None:
    # Open input and output files
    # Iterate through the lines, ignore the control lines
    # and normalize the data lines, saving the lines as you parse them.
    pass

################################
### FUNCTIONS FOR EXERCISE 6 ###
################################

def ex6_transform(line:str) -> tuple[str, list]: # accession, datalist
    # Make transform numbers to datalist of 0/1, return accession and list
    pass

def ex6_averages(classlist:list, datalist:list) -> tuple[float, float]: # control_avg, cancer_avg
    # Compute the control cancer averages from the classlist and datalist, return them
    pass

def exercise_6(filenamein:str, filenameout:str) -> None:
    # Open input and output files
    # Iterate through the lines, catch the classlist,
    # if data line - transform values to 0/1, compute the averages of control/cancer,
    #     if distance is big write regulation in output file for that accession.
    pass


############################
# Main program starts here #
############################

# Below if statement allows you to execute the main code when the file runs as a script,
# but not when it’s imported as a module.
if __name__ == "__main__":

    # First exercise
    exercise_1(('test1.dat', 'test2.dat', 'test3.dat'), 'combinedresult.dat')

    # Second exercise
    exercise_2('matrix.dat', 'trans1matrix.dat')

    # Third exercise
    exercise_3('matrix.dat', 'trans2matrix.dat')

    # Fourth exercise, try different accession numbers
    exercise_4('H80240', 'dna-array.dat', 'column.tab')

    # Fifth exercise
    exercise_5('dna-array.dat', 'dna-array-norm.dat')

    # Sixth exercise
    exercise_6('dna-array-norm.dat', 'regulation.txt')
