################################# # # # A Digitisation Script # # (For use in ganga 5.1.6 # # + patch for md5sum/adler32) # # # ################################# ## Import random number generator for random seed import random ## Define the input dataset name dataset = 'users.craigwiglesworth.ganga.datafiles.H130zz4mu.ideal.HITS.14.2.25.3' ## Import python DQ2 and define input file list from dq2.clientapi.DQ2 import DQ2 dq2 = DQ2() files = dq2.listFilesInDataset(dataset)[0] fileNames = [f['lfn'] for f in files.itervalues()] ## Define the starting value for the output file partition number outputFilePartitionNumber = 1 ## Sort the input files according to their jobID and create a new input file list for i in range(1229,1244): newFileList = [] print '\nlooking for files ending with', i for f in fileNames: jobID = f[-4]+f[-3]+f[-2]+f[-1] if jobID == str(i): newFileList.append(f) print 'found', len(newFileList), 'files.... created new file list' ## Create a new job j = Job() ## Send any local files that the job needs in the inputsandbox j.inputsandbox = ['/scratch/wiglesworth/DBRelease-6.3.1.tar.gz'] ## Configure the job to be an MC production job j.application = AthenaMC() j.application.mode = 'template' ## This is a temporary solution to fix some kind of digitisation bug j.application.verbosity = 'INFO' ## Define the athena release to use j.application.atlas_release = '14.2.25.3' ## Define some job parameters from which the output files will be named j.application.process_name = 'TestAny21' j.application.production_name = 'RDO' j.application.run_number = '000000' j.application.version = j.application.atlas_release ## Define the number of events for each (sub)job j.application.number_events_job = 100 ## Define the output dataset name j.outputdata = AthenaMCOutputDatasets() j.outputdata.output_firstfile = outputFilePartitionNumber j.outputdata.outrootfiles["RDO"] = j.application.process_name+'.'+j.application.production_name ## Define which transform to use j.application.transform_script = 'csc_digi_trf.py' ## Configure the transform j.application.extraArgs = 'inputHitsFile=$inputfile maxEvents=$number_events_job skipEvents=$skip geometryVersion=ATLAS-CSC-01-00-00 jobConfig=NONE DBRelease=DBRelease-6.3.1.tar.gz digiRndmSvc=AtRanluxGenSvc samplingFractionDbTag=QGSP_BERT triggerConfig=lumi1E31_no_Bphysics_no_prescale conditionsTag=OFLCOND-CSC-00-00-00' digiSeedOffsets = 'digiSeedOffset1=%s ' % str(int(random.random()*100000000)) + 'digiSeedOffset2=%s' % str(int(random.random()*100000000)) j.application.extraIncArgs = digiSeedOffsets ## Split the job into subjobs j.splitter = AthenaMCSplitterJob() j.splitter.numsubjobs = len(newFileList) ## Define the input dataset name j.inputdata = AthenaMCInputDatasets() j.inputdata.datasetType = 'DQ2' j.inputdata.DQ2dataset = dataset ## Define the input file for this job j.inputdata.redefine_partitions = newFileList j.inputdata.number_events_file = 100 ## Configure the job such that it is sent to the grid j.backend = LCG() j.backend.middleware = 'GLITE' ## Define where the job should run j.backend.requirements = AtlasLCGRequirements() j.backend.requirements.cloud = 'UK' ## Define the storage location for the output dataset j.application.se_name = 'UKI-NORTHGRID-LIV-HEP_LOCALGROUPDISK' ## Submit Job j.submit() outputFilePartitionNumber=outputFilePartitionNumber+j.splitter.numsubjobs