Skip to content

Issues with single dataset treated as multiple ones (for ROOT master) #10

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Open
wants to merge 4 commits into
base: master
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
6 changes: 3 additions & 3 deletions proof/proof/inc/TDataSetManagerAliEn.h
Original file line number Diff line number Diff line change
Expand Up @@ -103,13 +103,13 @@ class TDataSetManagerAliEn : public TDataSetManager {
TDataSetManagerFile *fCache;
Long_t fCacheExpire_s;

std::vector<Int_t> *ExpandRunSpec(TString &runSpec);
static std::vector<Int_t> *ExpandRunSpec(TString &runSpec);

virtual Bool_t ParseCustomFindUri(TString &uri, TString &basePath,
static Bool_t ParseCustomFindUri(TString &uri, TString &basePath,
TString &fileName, TString &anchor, TString &treeName,
TString &regexp);

virtual Bool_t ParseOfficialDataUri(TString &uri, Bool_t sim,
static Bool_t ParseOfficialDataUri(TString &uri, Bool_t sim,
TString &period, Int_t &year, std::vector<Int_t> *&runList,
Bool_t &esd, Int_t &aodNum, TString &pass);

Expand Down
121 changes: 83 additions & 38 deletions proof/proof/src/TDataSetManagerAliEn.cxx
Original file line number Diff line number Diff line change
Expand Up @@ -19,6 +19,7 @@
//////////////////////////////////////////////////////////////////////////

#include "TDataSetManagerAliEn.h"
#include "TError.h"

ClassImp(TAliEnFind);

Expand Down Expand Up @@ -541,43 +542,65 @@ Bool_t TDataSetManagerAliEn::ParseCustomFindUri(TString &uri,
TString &regexp)
{

// Copy URI to a dummy URI parsed to look for unrecognized stuff; initial
// part is known ("Find;") and stripped
TString checkUri = uri(5, uri.Length());

// Base path
TPMERegexp reBasePath("(^|;)BasePath=([^; ]+)(;|$)");
if (reBasePath.Match(uri) != 4) {
Error("ParseCustomFindUri", "Base path not specified");
TPMERegexp reBasePath("(^|;)(BasePath=([^; ]+))(;|$)");
if (reBasePath.Match(uri) != 5) {
::Error("TDataSetManagerAliEn::ParseCustomFindUri",
"Base path not specified");
return kFALSE;
}
basePath = reBasePath[2];
checkUri.ReplaceAll(reBasePath[2], "");
basePath = reBasePath[3];

// File name
TPMERegexp reFileName("(^|;)FileName=([^; ]+)(;|$)");
if (reFileName.Match(uri) != 4) {
Error("ParseCustomFindUri", "File name not specified");
TPMERegexp reFileName("(^|;)(FileName=([^; ]+))(;|$)");
if (reFileName.Match(uri) != 5) {
::Error("TDataSetManagerAliEn::ParseCustomFindUri",
"File name not specified");
return kFALSE;
}
fileName = reFileName[2];
checkUri.ReplaceAll(reFileName[2], "");
fileName = reFileName[3];

// Anchor (optional)
TPMERegexp reAnchor("(^|;)Anchor=([^; ]+)(;|$)");
if (reAnchor.Match(uri) != 4)
TPMERegexp reAnchor("(^|;)(Anchor=([^; ]+))(;|$)");
if (reAnchor.Match(uri) != 5)
anchor = "";
else
anchor = reAnchor[2];
else {
checkUri.ReplaceAll(reAnchor[2], "");
anchor = reAnchor[3];
}

// Tree name (optional)
TPMERegexp reTreeName("(^|;)Tree=(/[^; ]+)(;|$)");
if (reTreeName.Match(uri) != 4)
TPMERegexp reTreeName("(^|;)(Tree=(/[^; ]+))(;|$)");
if (reTreeName.Match(uri) != 5)
treeName = "";
else
treeName = reTreeName[2];
else {
checkUri.ReplaceAll(reTreeName[2], "");
treeName = reTreeName[3];
}

// Regexp (optional)
TPMERegexp reRegexp("(^|;)Regexp=([^; ]+)(;|$)");
if (reRegexp.Match(uri) != 4)
TPMERegexp reRegexp("(^|;)(Regexp=([^; ]+))(;|$)");
if (reRegexp.Match(uri) != 5)
regexp = "";
else
regexp = reRegexp[2];
else {
checkUri.ReplaceAll(reRegexp[2], "");
regexp = reRegexp[3];
}

// Check for unparsed stuff; parsed stuff has been stripped from checkUri
checkUri.ReplaceAll(";", "");
checkUri.ReplaceAll(" ", "");
if (!checkUri.IsNull()) {
::Error("TDataSetManagerAliEn::ParseCustomFindUri",
"There are unrecognized parameters in the dataset find string");
return kFALSE;
}
return kTRUE;
}

Expand All @@ -587,62 +610,84 @@ Bool_t TDataSetManagerAliEn::ParseOfficialDataUri(TString &uri, Bool_t sim,
Int_t &aodNum, TString &pass)
{

// Copy URI to a dummy URI parsed to look for unrecognized stuff
TString checkUri;

// Strip the initial part (either "Data;" or "Sim;")
{
Ssiz_t idx = uri.Index(";");
checkUri = uri(idx, uri.Length());
}

//
// Parse LHC period
//

TPMERegexp rePeriod("(^|;)Period=(LHC([0-9]{2})[^;]*)(;|$)");
if (rePeriod.Match(uri) != 5) {
Error("ParseOfficialDataUri",
TPMERegexp rePeriod("(^|;)(Period=(LHC([0-9]{2})[^;]*))(;|$)");
if (rePeriod.Match(uri) != 6) {
::Error("TDataSetManagerAliEn::ParseOfficialDataUri",
"LHC period not specified (e.g. Period=LHC10h)");
return kFALSE;
}

period = rePeriod[2];
year = rePeriod[3].Atoi() + 2000;
checkUri.ReplaceAll(rePeriod[2], "");
period = rePeriod[3];
year = rePeriod[4].Atoi() + 2000;

//
// Parse data format (ESDs or AODXXX)
//

TPMERegexp reFormat("(^|;)Variant=(ESDs?|AOD([0-9]{3}))(;|$)");
if (reFormat.Match(uri) != 5) {
Error("ParseOfficialDataUri",
TPMERegexp reFormat("(^|;)(Variant=(ESDs?|AOD([0-9]{3})))(;|$)");
if (reFormat.Match(uri) != 6) {
::Error("TDataSetManagerAliEn::ParseOfficialDataUri",
"Data variant (e.g., Variant=ESD or AOD079) not specified");
return kFALSE;
}

if (reFormat[2].BeginsWith("ESD")) esd = kTRUE;
checkUri.ReplaceAll(reFormat[2], "");
if (reFormat[3].BeginsWith("ESD")) esd = kTRUE;
else {
esd = kFALSE;
aodNum = reFormat[3].Atoi();
aodNum = reFormat[4].Atoi();
}

//
// Parse pass: mandatory on Data, useless on Sim
//

TPMERegexp rePass("(^|;)Pass=([a-zA-Z_0-9-]+)(;|$)");
if ((rePass.Match(uri) != 4) && (!sim)) {
Error("ParseOfficialDataUri",
TPMERegexp rePass("(^|;)(Pass=([a-zA-Z_0-9-]+))(;|$)");
if ((!sim) && (rePass.Match(uri) != 5)) {
::Error("TDataSetManagerAliEn::ParseOfficialDataUri",
"Pass (e.g., Pass=cpass1_muon) is mandatory on real data");
return kFALSE;
}
pass = rePass[2];
checkUri.ReplaceAll(rePass[2], "");
pass = rePass[3];

//
// Parse run list
//

TPMERegexp reRun("(^|;)Run=([0-9,-]+)(;|$)");
if (reRun.Match(uri) != 4) {
Error("ParseOfficialDataUri",
TPMERegexp reRun("(^|;)(Run=([0-9,-]+))(;|$)");
if (reRun.Match(uri) != 5) {
::Error("TDataSetManagerAliEn::ParseOfficialDataUri",
"Run or run range not specified (e.g., Run=139104-139107,139306)");
return kFALSE;
}
TString runListStr = reRun[2];
checkUri.ReplaceAll(reRun[2], "");
TString runListStr = reRun[3];
runList = ExpandRunSpec(runListStr); // must be freed by caller

// Check for unparsed stuff; parsed stuff has been stripped from checkUri
checkUri.ReplaceAll(";", "");
checkUri.ReplaceAll(" ", "");
if (!checkUri.IsNull()) {
::Error("TDataSetManagerAliEn::ParseOfficialDataUri",
"There are unrecognized parameters in dataset string");
return kFALSE;
}

return kTRUE;
}

Expand Down
122 changes: 84 additions & 38 deletions proof/proof/src/TProof.cxx
Original file line number Diff line number Diff line change
Expand Up @@ -5568,7 +5568,7 @@ Long64_t TProof::Process(const char *dsetname, const char *selector,
} else if (fSelector) {
retval = Process(dset, fSelector, option, nentries, first);
} else {
Error("Process", "neither a selecrot file nor a selector object have"
Error("Process", "neither a selector file nor a selector object have"
" been specified: cannot process!");
}
// Cleanup
Expand Down Expand Up @@ -12446,49 +12446,95 @@ Int_t TProof::AssertDataSet(TDSet *dset, TList *input,
// defined: assume that a dataset, stored on the PROOF master by that
// name, should be processed.
if (!dataset) {
TString dsns(dsname.Data()), dsn1;
Int_t from1 = 0;
while (dsns.Tokenize(dsn1, from1, "[, ]")) {
TString dsn2, enl;
Int_t from2 = 0;
TFileCollection *fc = 0;
while (dsn1.Tokenize(dsn2, from2, "|")) {
enl = "";
Int_t ienl = dsn2.Index("?enl=");
if (ienl != kNPOS) {
enl = dsn2(ienl + 5, dsn2.Length());
dsn2.Remove(ienl);
}
if ((fc = mgr->GetDataSet(dsn2.Data()))) {
// Save dataset name in TFileInfo's title to use it in TDset
TIter nxfi(fc->GetList());
TFileInfo *fi = 0;
while ((fi = (TFileInfo *) nxfi())) { fi->SetTitle(dsn2.Data()); }
dsnparse = dsn2;
if (!dataset) {
// This is our dataset
dataset = fc;
} else {
// Add it to the dataset
dataset->Add(fc);
SafeDelete(fc);

// First of all check if the full string (except the "entry list" part)
// is the name of a single existing dataset: if it is, don't break it
// into parts
TString dsns( dsname.Data() ), enl;
Ssiz_t eli = dsns.Index("?enl=");
TFileCollection *fc;
if (eli != kNPOS) {
enl = dsns(eli+5, dsns.Length());
dsns.Remove(eli, dsns.Length()-eli);
}

// Check if the entry list is valid. If it has spaces, commas, or pipes,
// it is not considered as valid and we revert to the "multiple datasets"
// case
Bool_t validEnl = ((enl.Index("|") == kNPOS) &&
(enl.Index(",") == kNPOS) && (enl.Index(" ") == kNPOS));

if (validEnl && (( fc = mgr->GetDataSet(dsns) ))) {

//
// String corresponds to ONE dataset only
//

TIter nxfi(fc->GetList());
TFileInfo *fi;
while (( fi = (TFileInfo *)nxfi() ))
fi->SetTitle(dsns.Data());
dataset = fc;
dsnparse = dsns; // without entry list

// Adds the entry list (or empty string if not specified)
datasets->Add( new TPair(dataset, new TObjString( enl.Data() )) );

}
else {

//
// String does NOT correspond to one dataset: check if many datasets
// were specified instead
//

dsns = dsname.Data();
TString dsn1;
Int_t from1 = 0;
while (dsns.Tokenize(dsn1, from1, "[, ]")) {
TString dsn2;
Int_t from2 = 0;
while (dsn1.Tokenize(dsn2, from2, "|")) {
enl = "";
Int_t ienl = dsn2.Index("?enl=");
if (ienl != kNPOS) {
enl = dsn2(ienl + 5, dsn2.Length());
dsn2.Remove(ienl);
}
if ((fc = mgr->GetDataSet(dsn2.Data()))) {
// Save dataset name in TFileInfo's title to use it in TDset
TIter nxfi(fc->GetList());
TFileInfo *fi;
while ((fi = (TFileInfo *) nxfi())) { fi->SetTitle(dsn2.Data()); }
dsnparse = dsn2;
if (!dataset) {
// This is our dataset
dataset = fc;
} else {
// Add it to the dataset
dataset->Add(fc);
SafeDelete(fc);
}
}
}
}
// The dataset name(s) in the first element
if (dataset) {
if (dataset->GetList()->First())
((TFileInfo *)(dataset->GetList()->First()))->SetTitle(dsn1.Data());
// Add it to the local list
if (enl.IsNull()) {
datasets->Add(new TPair(dataset, new TObjString("")));
} else {
// The dataset name(s) in the first element
if (dataset) {
if (dataset->GetList()->First())
((TFileInfo *)(dataset->GetList()->First()))->SetTitle(dsn1.Data());
// Add it to the local list
datasets->Add(new TPair(dataset, new TObjString(enl.Data())));
}
// Reset the pointer
dataset = 0;
}
// Reset the pointer
dataset = 0;

}

//
// At this point the dataset(s) to be processed, if any, are found in the
// "datasets" variable
//

if (!datasets || datasets->GetSize() <= 0) {
emsg.Form("no dataset(s) found on the master corresponding to: %s", dsname.Data());
return -1;
Expand Down