#################################
#                               #
#     A Digitisation Script     #
#   (For use in ganga 5.1.6     #
#  + patch for md5sum/adler32)  # 
#                               #
#################################

## Import random number generator for random seed
import random

## Define the input dataset name
dataset  = 'users.craigwiglesworth.ganga.datafiles.H130zz4mu.ideal.HITS.14.2.25.3'

## Import python DQ2 and define input file list
from dq2.clientapi.DQ2 import DQ2
dq2       = DQ2()
files     = dq2.listFilesInDataset(dataset)[0]
fileNames = [f['lfn'] for f in files.itervalues()]
 
## Define the starting value for the output file partition number
outputFilePartitionNumber = 1

## Sort the input files according to their jobID and create a new input file list
for i in range(1229,1244):

    newFileList = []

    print '\nlooking for files ending with', i

    for f in fileNames:
        jobID = f[-4]+f[-3]+f[-2]+f[-1]

        if jobID == str(i):
            newFileList.append(f)
            
    print 'found', len(newFileList), 'files.... created new file list'
    
    ## Create a new job
    j = Job()

    ## Send any local files that the job needs in the inputsandbox
    j.inputsandbox = ['/scratch/wiglesworth/DBRelease-6.3.1.tar.gz']

    ## Configure the job to be an MC production job
    j.application      = AthenaMC()
    j.application.mode = 'template'
    
    ## This is a temporary solution to fix some kind of digitisation bug
    j.application.verbosity = 'INFO'
      
    ## Define the athena release to use
    j.application.atlas_release = '14.2.25.3'
     
    ## Define some job parameters from which the output files will be named
    j.application.process_name    = 'TestAny21'
    j.application.production_name = 'RDO'
    j.application.run_number      = '000000'
    j.application.version         = j.application.atlas_release
    
    ## Define the number of events for each (sub)job
    j.application.number_events_job = 100
    
    ## Define the output dataset name
    j.outputdata                     = AthenaMCOutputDatasets()
    j.outputdata.output_firstfile    = outputFilePartitionNumber
    j.outputdata.outrootfiles["RDO"] = j.application.process_name+'.'+j.application.production_name
     
    ## Define which transform to use
    j.application.transform_script = 'csc_digi_trf.py'
    
    ## Configure the transform
    j.application.extraArgs    = 'inputHitsFile=$inputfile maxEvents=$number_events_job skipEvents=$skip geometryVersion=ATLAS-CSC-01-00-00 jobConfig=NONE DBRelease=DBRelease-6.3.1.tar.gz digiRndmSvc=AtRanluxGenSvc samplingFractionDbTag=QGSP_BERT triggerConfig=lumi1E31_no_Bphysics_no_prescale conditionsTag=OFLCOND-CSC-00-00-00'
    digiSeedOffsets = 'digiSeedOffset1=%s ' % str(int(random.random()*100000000)) + 'digiSeedOffset2=%s' % str(int(random.random()*100000000))
    j.application.extraIncArgs = digiSeedOffsets
    
    ## Split the job into subjobs
    j.splitter            = AthenaMCSplitterJob()
    j.splitter.numsubjobs = len(newFileList)
    
    ## Define the input dataset name
    j.inputdata             = AthenaMCInputDatasets()
    j.inputdata.datasetType = 'DQ2'
    j.inputdata.DQ2dataset  = dataset
    
    ## Define the input file for this job
    j.inputdata.redefine_partitions = newFileList
    j.inputdata.number_events_file  = 100
      
    ## Configure the job such that it is sent to the grid
    j.backend            = LCG()
    j.backend.middleware = 'GLITE'
    
    ## Define where the job should run
    j.backend.requirements       = AtlasLCGRequirements()
    j.backend.requirements.cloud = 'UK'
    
    ## Define the storage location for the output dataset
    j.application.se_name = 'UKI-NORTHGRID-LIV-HEP_LOCALGROUPDISK'
    
    ## Submit Job
    j.submit()

    outputFilePartitionNumber=outputFilePartitionNumber+j.splitter.numsubjobs