**************************************************************************
**************************************************************************
PROGRAM:  MFPTUS.SAS

DATE:	    12/30/10

CODER:	 J.D. Opdyke
          Managing Director, Quantitative Strategies
			 DataMineIt

PURPOSE:  Run and compare 6 different SAS Permutation Tests algorithms 
          including OPDN, OPDN_alt, PROCSS, PROCMT, PROCNPAR, and BEBB_SIM.
		    See "Permutation Tests (and Sampling with Replacement) Orders of
	       Magnitude Faster Using SAS" by J.D. Opdyke for detailed explanations
		    of the different algorithms.

INPUTS:	 Each macro is completely modular and accepts 6 macro parameter
          values (PROCMT accepts 7):
			 indata        = the input dataset (including libname)
			 outdata       = the input dataset (including libname)
			 byvars        = the "by variables" defining the strata (these are
			                 character variables)
			 samp2var      = the (character) variable defining the two samples in each
		                    stratum: TEST ("T") and CONTROL ("C"), with those values
			 permvar       = the variable to be tested with permutation tests
			 num_psmps     = number of Permutation Test samples
			 seed          = and optional random number seed (must be an integer or 
			                 blank)
          left_or_right = "left" or "right" depending on whether the user wants
			                 lower or upper one-tailed p-values, respectively, in
								  addition to the two-tailed p-value (only for PROCMT)

OUTPUTS:  A SAS dataset, named by the user via the macro variable outdata, 
          which contains the following variables:
			 1) the "by variables" defining the strata
			 2) the name of the permuted variable
			 3) the size (# of records) of the permutation samples (this should 
			    always be the smaller of the two samples (TEST v. CONTROL))
			 4) the number of permutation samples
			 5) the left, right, and two-tailed p-values corresponding to the 
			    following hypotheses:
	                   p_left  = Pr(x>X | Ho: Test>=Control)
	                   p_right = Pr(x<X | Ho: Test<=Control)
	                   p_both  = Pr(x=X | Ho: Test=Control)
	                where x is the sample permutation statistic from the Control 
			    sample (or the additive inverse of the Test sample if the Test
			    sample is smaller, which is atypical) and X is the distribution of
	                permutation statistic values.

			    Following standard convention, the two-tailed p-value is 
			    calculated based on the reflection method.

**************************************************************************
**************************************************************************
***;


options
label 
symbolgen 
fullstimer 
yearcutoff=1950 
nocenter 
ls = 256 
ps = 51 
msymtabmax=max 
mprint 
mlogic 
minoperator mindelimiter=' '
cleanup
;

libname MFPTUS "c:\MFPTUS";

%MACRO makedata(strata_size=, testproportion=, numsegs=, numgeogs=);

*** Since generating all the datasets below once takes only a couple of minutes,
    time was not wasted trying to optimize runtimes for the creation of mere
	 test data.
***;

%let numstrata = %eval(&numsegs.*&numgeogs.);

*** The variable "cntrl_test" identifies the test and control samples with values of "T" and "C" respectively.;

data MFPTUS.pricing_data_&numstrata.strata_&strata_size.(keep=geography segment cntrl_test price sortedby=geography segment);
  format segment geography $8. cntrl_test $1.;
  array seg{3} $ _TEMPORARY_ ('segment1' 'segment2' 'segment3');
  array geog{4} $ _TEMPORARY_ ('geog1' 'geog2' 'geog3' 'geog4');
  strata_size = 1* &strata_size.;
  do x=1 to &numgeogs.;
     geography=geog{x};
     do j=1 to &numsegs.;
        segment=seg{j};
        if j=1 then do i=1 to strata_size;
			  if mod(i,&testproportion.)=0 then do;
		        cntrl_test="T";
			     price=(rand('UNIFORM')+0.175/(&testproportion./(&testproportion./10)));
			  end;
			  else do;
		        cntrl_test="C";
			     price=rand('UNIFORM');
			  end;
			  output;
		  end;
		  else if j=2 then do i=1 to strata_size; 
			  if mod(i,&testproportion.)=0 then do;
		        cntrl_test="T";
			     price=rand('POISSON',1-0.75/(&testproportion./(&testproportion./10)));
			  end;
			  else do;
		        cntrl_test="C";
			     price=rand('POISSON',1.0);
			  end;
			  output;
		  end;
		  else if j=3 then do i=1 to strata_size; 
*** Make Control smaller to test code in algorithms.;
			  if mod(i,&testproportion.)=0 then cntrl_test="C";
			  else cntrl_test="T";
		     price=rand('NORMAL');
			  output;
		  end;
     end;
  end;
  run;
%MEND makedata;

%makedata(strata_size=10000, testproportion=10, numsegs=2, numgeogs=1);
%makedata(strata_size=10000, testproportion=10, numsegs=2, numgeogs=3);
%makedata(strata_size=10000, testproportion=10, numsegs=3, numgeogs=4);

%makedata(strata_size=100000, testproportion=100, numsegs=2, numgeogs=1);
%makedata(strata_size=100000, testproportion=100, numsegs=2, numgeogs=3);
%makedata(strata_size=100000, testproportion=100, numsegs=3, numgeogs=4);

%makedata(strata_size=1000000, testproportion=1000, numsegs=2, numgeogs=1);
%makedata(strata_size=1000000, testproportion=1000, numsegs=2, numgeogs=3);
%makedata(strata_size=1000000, testproportion=1000, numsegs=3, numgeogs=4);

%makedata(strata_size=10000000, testproportion=10000, numsegs=2, numgeogs=1);
%makedata(strata_size=10000000, testproportion=10000, numsegs=2, numgeogs=3);
%makedata(strata_size=10000000, testproportion=10000, numsegs=3, numgeogs=4);


%makedata(strata_size=7500000, testproportion=7500, numsegs=2, numgeogs=1);
%makedata(strata_size=7500000, testproportion=7500, numsegs=2, numgeogs=3);


%makedata(strata_size=25000000, testproportion=25000, numsegs=2, numgeogs=1);
%makedata(strata_size=50000000, testproportion=50000, numsegs=2, numgeogs=1);
%makedata(strata_size=100000000, testproportion=100000, numsegs=2, numgeogs=1);



*** OPDN_alt ***;
*** OPDN_alt ***;
*** OPDN_alt ***;


%MACRO OPDN_alt(num_psmps=, 
                indata=, 
					 outdata=, 
					 byvars=, 
					 samp2var=, 
					 permvar=, 
					 seed=
					 );

*** If user does not pass a value to the optional macro variable "seed," use -1
    based on the time of day.
***;
%if %sysevalf(%superq(seed)=,boolean) %then %let seed=-1;


*** To minimize intermediate memory requirements, initialize output data set 
    with missing variable values.
***;

data &outdata.(sortedby=&byvars.);
   stop;
   set &indata(keep=&byvars.);
   length permvar $32 n_psamp num_psmps p_left p_right p_both distance_right distance_left lastbinnum lastbinsize pmed pmean tot_FREQ_incr x 8;
   call missing(of _all_);
   run;

*** Obtain counts and cumulated counts for each strata.;

proc summary data=&indata. nway;
  class &byvars. &samp2var.;
  var &permvar.;
  output out=byvar_sum(keep = _FREQ_ &byvars. &samp2var. sumpvar) 
                        sum = sumpvar
                       ;
  run;

*** Identify and keep the smaller of the two samples for more efficient 
    sampling.  Below, invert the empirical permutation distribution if CONTROL
	 sample is smaller that TEST sample (which is not typical).  That will 
	 remain consistent with output variables corresponding to: 
	      p_left  = Pr(x>X | Ho: Test>=Control)
	      p_right = Pr(x<X | Ho: Test<=Control)
	      p_both  = Pr(x=X | Ho: Test=Control)
	 where x is the sample permutation statistic and X is the distribution of
	 permutation statistic values.
***;

%let last_byvar = %scan(&byvars.,-1);

data byvar_sum_min(keep=tot_FREQ _FREQ_ &byvars. &samp2var. sumpvar sortedby=&byvars.);
  set byvar_sum;
  format tot_FREQ _FREQ_ 16.;
  by &byvars.;
  retain lag_FREQ lag_sum lag_samp2;
  if first.&last_byvar. then do;
	  lag_FREQ = _FREQ_;
	  lag_sum  = sumpvar;
	  lag_samp2  = &samp2var.;
  end;
  else do;
     tot_FREQ = sum(lag_FREQ,_FREQ_);
     if _FREQ_<=lag_FREQ then output;
	  else do;
		  _FREQ_  = lag_FREQ;
		  sumpvar = lag_sum;
		  &samp2var. = lag_samp2;
		  output;
	  end;
  end;
  run;


*** Get size of largest stratum for efficient (re)use of _TEMPORARY_ array.;

proc sql noprint;
   select max(tot_FREQ) into :max_tot_freq from byvar_sum_min;
   quit;
*** In case number is large, avoid scientific notation.;
%let max_tot_freq = %sysfunc(putn(&max_tot_freq.,16.));


data &outdata.;
   if 0 then modify &outdata.;                                        

*** To view permutation distributions for each strata in .log file.,  
    comment out first line below this comment, uncomment the line below it, and 
    uncomment "put _ALL_" 35 lines below it.  Note that this will slow program
	 execution and often create large .log files.
***;
   array psums[&num_psmps.] _TEMPORARY_;                                                            
*  array psums{&num_psmps.} psmp1-psmp&num_psmps.;
   array temp[&max_tot_freq.]   _TEMPORARY_;                                                            
   retain num_psmps &num_psmps.;                                                
   do _n_ = 1 by 1 until(last.&last_byvar.);                                                   
      merge &indata.(keep=&byvars. &permvar. &samp2var.) 
		      byvar_sum_min
				;
      by &byvars.;                                                                                   
      temp[_n_]=&permvar.;                                                                               
   end;                                                                                              
   seed = 1*&seed.;
   do m=1 to num_psmps;                                                                              
      x=0;
      tot_FREQ_incr = tot_FREQ;                                                                                     
      do n=1 to _FREQ_;
	      cell = floor(ranuni(seed)*tot_FREQ_incr) + 1;
	      x = temp[cell] + x;
		   hold = temp[cell];
		   temp[cell]=temp[tot_FREQ_incr];
		   temp[tot_FREQ_incr] = hold;
		   tot_FREQ_incr+(-1);
	   end;
      psums[m] = x;
   end;  

   n_psamp = _FREQ_;
                                                                                      
   p_right = 0;
   p_left  = 0;
	p_both  = 0;
	call sortn(of psums[*]);
   pmed = median(of psums[*]);                                                                       
   pmean = mean(of psums[*]);                                                                       

* put _ALL_;

*** Efficiently handle extreme test sample values.;

   IF sumpvar<psums[1] THEN DO;
      p_left=0;
      p_right=num_psmps;
      p_both=0;
   END;
   ELSE IF sumpvar>psums[num_psmps] THEN DO;
      p_left=num_psmps;
      p_right=0;
      p_both=0;
   END;
   ELSE DO;

*** For non-extreme cases, start with shorter tail for less looping.;

   if pmed>=sumpvar then do;
      do z=1 to num_psmps;
         if sumpvar>=psums[z] then p_left+1;
         else do;
            lastbinnum = z-1;
            distance_left = pmean - psums[z-1];
            leave;
         end;
      end;

*** Avoid loop for other (larger) p-value.
    If sumpvar equals last bin, p_right = 1 - p_left + lastbinsize.
    Otherwise, p_right = 1 - p_left.
***;
     if sumpvar = psums[lastbinnum] then do;
        lastbinsize=1;
        do k=lastbinnum to 1 by -1;
           if psums[k]=psums[k-1] then lastbinsize+1;
           leave;
        end;
        p_right = num_psmps - p_left + lastbinsize;
     end;
     else p_right = num_psmps - p_left;
   end;

   else do; 
     do z=num_psmps to 1 by -1;
        if sumpvar<=psums[z] then p_right+1;
        else do;
           lastbinnum = z+1;
           distance_right = psums[z+1] - pmean;
           leave;
        end;
     end;

*** Avoid loop for other (larger) p-value.
    If psum equals last bin, p_left = 1 - p_right + lastbinsize.
    Otherwise, p_left = 1 - p_right.
***;
     if sumpvar = psums[lastbinnum] then do;
        lastbinsize=1;
        do k=lastbinnum to num_psmps;
           if psums[k]=psums[k+1] then lastbinsize+1;
           else leave;
        end;
        p_left = num_psmps - p_right + lastbinsize;
     end;
     else p_left = num_psmps - p_right;
   end;

*** Base 2-sided p-value on distance from mean of last (i.e. least extreme) bin 
    of smaller p-value.  This is common practice.
***;

   if p_left<p_right then do;
      p_both = p_left;
      do z=num_psmps to 1 by -1; 
         if (psums[z] - pmean) >= distance_left then p_both+1;
         else leave;
      end;
   end;
   else if p_left>p_right then do;
      p_both = p_right;
      do z=1 to num_psmps;
         if (pmean - psums[z]) >= distance_right then p_both+1;
         else leave;
      end;
   end;
   else p_both=num_psmps;

*** Account for possibility, due to psum=a particular bin value, that 
    p_both>num_psmps.
***;
   p_both = min(p_both,num_psmps);

   END;

*** If CONTROL sample is smaller than TEST (which is atypical), reverse 
*** p-values, as empirical distribution is mirror of itself.;

   if &samp2var.="C" then do;
      hold = p_left;
	   p_left = p_right;
	   p_right = hold;
	end;

   p_left  = p_left  / num_psmps;
   p_right = p_right / num_psmps;
   p_both  = p_both  / num_psmps;

   length permvar $32;                                                                               
   retain permvar "&permvar";                                                                        
   output;                                                                                           
   run;                                                                                              

data &outdata.;
   set &outdata.(keep=&byvars. permvar num_psmps n_psamp p_left p_right p_both);                                                       
   label permvar   = "Permuted Variable"
         n_psamp   = "Size of Permutation Samples"
         num_psmps = "# of Permutation Samples"
         p_left    = "Left p-value"
         p_right   = "Right p-value"
         p_both    = "Two-Tailed p-value"
         ;
   run;


*** Optional.;

proc datasets lib=work memtype=data kill nodetails;
   run;

%MEND OPDN_alt;																																										 

%OPDN_alt(num_psmps = 1000,  
          indata    = MFPTUS.pricing_data_2strata_100000, 
          outdata   = MFPTUS.OPDN_alt_100000_2strata,  
          byvars    = geography segment, 
          samp2var  = cntrl_test, 
          permvar   = price, 
          seed      =
         );



*** OPDN ***;
*** OPDN ***;
*** OPDN ***;

%MACRO OPDN(num_psmps=, 
            indata=, 
            outdata=, 
            byvars=, 
            samp2var=, 
            permvar=, 
            seed=
            );


*** The only assumption made within this macro is that the byvars are all 
    character variables and the input dataset is sorted by the "by variables" 
	 that define the strata.  Also, the "TEST" and "CONTROL" samples are defined
    by a variable "cntrl_test" formatted as $1. containing "T" and "C" 
	 character strings, respectively, as values.
***;

*** Obtain the last byvar, count the byvars, and assign each byvar into numbered
    macro variables for easy access/processing.
***;

%let last_byvar = %scan(&byvars.,-1);
%let num_byvars = %sysfunc(countw(&byvars.));
%do i=1 %to &num_byvars.;
   %let byvar&i. = %scan(&byvars.,&i.);
%end;

*** If user does not pass a value to the optional macro variable "seed," use -1
    based on the time of day.
***;
%if %sysevalf(%superq(seed)=,boolean) %then %let seed=-1;

*** Obtain counts and cumulated counts for each strata.;

proc summary data=&indata. nway;
  class &byvars. &samp2var.;
  var &permvar.;
  output out=byvar_sum(keep = _FREQ_ &byvars. &samp2var. sumpvar) 
                        sum = sumpvar
                       ;
  run;

*** Identify and keep the smaller of the two samples for more efficient 
    sampling.  Below, invert the empirical permutation distribution if CONTROL
	 sample is smaller that TEST sample (which is not typical).  That will 
	 remain consistent with output variables corresponding to: 
	      p_left  = Pr(x>X | Ho: Test>=Control)
	      p_right = Pr(x<X | Ho: Test<=Control)
	      p_both  = Pr(x=X | Ho: Test=Control)
	 where x is the sample permutation statistic and X is the distribution of
	 permutation statistic values.
***;

data byvar_sum_min(keep=tot_FREQ _FREQ_ &byvars. &samp2var. sumpvar sortedby=&byvars.);
  set byvar_sum;
  format tot_FREQ _FREQ_ 16.;
  by &byvars.;
  retain lag_FREQ lag_sum lag_samp2;
  if first.&last_byvar. then do;
	  lag_FREQ = _FREQ_;
	  lag_sum  = sumpvar;
	  lag_samp2  = &samp2var.;
  end;
  else do;
     tot_FREQ = sum(lag_FREQ,_FREQ_);
     if _FREQ_<=lag_FREQ then output;
	  else do;
	     _FREQ_  = lag_FREQ;
	     sumpvar = lag_sum;
	     &samp2var. = lag_samp2;
	     output;
	  end;
  end;
  run;


*** Obtain number of strata, and use this number to count and separately permute 
    each strata below.
***;

%let dsid = %sysfunc(open(byvar_sum_min));
%let n_byvals = %sysfunc(ifc(&dsid.
                                   ,%nrstr(%sysfunc(attrn(&dsid.,nobs))
                                           %let rc = %sysfunc(close(&dsid.));
							    )
                                   ,%nrstr(0) 
				        )
                        );
*** In case number is large, avoid scientific notation.;
%let n_byvals = %sysfunc(putn(&n_byvals.,16.));


*** Cumulate counts of strata for efficient (re)use of _TEMPORARY_ array.;

data byvar_sum_min(drop=prev_freq);
  set byvar_sum_min;
  format cum_prev_freq _FREQ_ 16.;
  retain cum_prev_freq 0;
  prev_freq = lag(tot_FREQ);
  if _n_=1 then prev_freq = 0;
  cum_prev_freq = sum(cum_prev_freq, prev_freq);
  run;


*** For access in data step below (that uses data _null_ for memory 
    conservation), put into macro strings a) smaller-of-two-sample counts, 
	 b) total counts for each strata, c) cumulated total counts, d) summed 
	 permutation variable, d) which of two samples is smaller, and e) byvar 
	 values. 
***;

proc sql noprint;
  select _freq_ into :freqs separated by ' ' from byvar_sum_min;
  quit;
proc sql noprint;
  select tot_FREQ into :tot_FREQs separated by ' ' from byvar_sum_min;
  quit;
proc sql noprint;
  select cum_prev_freq into :cum_prev_freqs separated by ' ' from byvar_sum_min;
  quit;
proc sql noprint;
  select sumpvar into :sumpvar separated by ' ' from byvar_sum_min;
  quit;
proc sql noprint;
  select &samp2var. into :samp2var separated by ' ' from byvar_sum_min;
  quit;

%do i=1 %to &num_byvars.;
   proc sql noprint;
     select &&byvar&i. into :byvals&i. separated by ' ' from byvar_sum_min;
     quit;
%end;


*** Get size of largest stratum for efficient (re)use of _TEMPORARY_ array.;

proc sql noprint;
  select max(tot_FREQ) into :max_tot_freq separated by ' ' from byvar_sum_min;
  quit;
*** In case number is large, avoid scientific notation.;
%let max_tot_freq = %sysfunc(putn(&max_tot_freq.,16.));


*** Save each stratums results in cumulated macro variables instead of 
    outputting to a dataset on the data step to lessen intermediate memory
	 requirements.
*** ;

*** Initialize macro variables used below.;
%let p_left =;
%let p_right =;
%let p_both =;
%let samp_small_size =;

data _null_;
  set &indata.;
  by &byvars.;
  format which_sample $1.;
*** To view permutation distributions for each strata in .log file.,  
    comment out first line below this comment, uncomment the two lines below 
	 it, and uncomment "put _ALL_" 37 lines below it.  Note that this will slow 
	 program execution and often create large .log files.
***;
  array psums{&num_psmps.} _TEMPORARY_;
*  array psums{&num_psmps.} psmp1-psmp&num_psmps.;
*  retain byval_counter 0 cum_prev_freq 0 psmp1-psmp&num_psmps.;
  array temp{&max_tot_freq.} _TEMPORARY_;
  retain byval_counter 0 cum_prev_freq 0;

  temp[_n_-cum_prev_freq]=&permvar.;

  if last.&last_byvar. then do;
     byval_counter+1;
     num_psmps = &num_psmps.*1;
     psmp_size = 1 * scan("&freqs.", byval_counter,' ');
	  which_sample = COMPRESS(UPCASE(scan("&samp2var.", byval_counter,' ')),' ');
     tot_FREQ_hold = 1 * scan("&tot_FREQs.", byval_counter,' ');
	  seed = 1*&seed.;

     do m=1 to num_psmps;
        x=0;
        tot_FREQ = tot_FREQ_hold;
        do n=1 to psmp_size;
	        cell = floor(ranuni(seed)*tot_FREQ) + 1;
	        x = temp[cell] + x;
		  hold = temp[cell];
		  temp[cell]=temp[tot_FREQ];
		  temp[tot_FREQ] = hold;
		  tot_FREQ+(-1);
	     end;
	     psums[m] = x;
     end;
     psum = 1*scan("&sumpvar.", byval_counter,' ');
     p_right = 0;
	  p_left  = 0;
	  p_both  = 0;
	  call sortn(of psums[*]);
     pmed = median(of psums[*]);                                                                       
     pmean = mean(of psums[*]);  
                                                                     
* put _ALL_;

*** Efficiently handle extreme test sample values.;

     IF psum<psums[1] THEN DO;
	     p_left=0;
	     p_right=num_psmps;
	     p_both=0;
	  END;
	  ELSE IF psum>psums[num_psmps] THEN DO;
	     p_left=num_psmps;
	     p_right=0;
	     p_both=0;
	  END;
	  ELSE DO;

*** For non-extreme cases, start with shorter tail for less looping.;

     if pmed>=psum then do;
	     do z=1 to num_psmps;
	        if psum>=psums[z] then p_left+1;
		  else do;
		     lastbinnum = z-1;
		     distance_left = pmean - psums[z-1];
	           leave;
		  end;
	     end;

*** Avoid loop for other (larger) p-value.
    If psum equals last bin, p_right = 1 - p_left + lastbinsize.
    Otherwise, p_right = 1 - p_left.
***;
	     if psum = psums[lastbinnum] then do;
		  lastbinsize=1;
	        do k=lastbinnum to 1 by -1;
		     if psums[k]=psums[k-1] then lastbinsize+1;
		     leave;
		  end;
		  p_right = num_psmps - p_left + lastbinsize;
	     end;
	     else p_right = num_psmps - p_left;
	  end;

	  else do; 
	     do z=num_psmps to 1 by -1;
	        if psum<=psums[z] then p_right+1;
		     else do;
		        lastbinnum = z+1;
		        distance_right = psums[z+1] - pmean;
	              leave;
		     end;
		  end;

*** Avoid loop for other (larger) p-value.
    If psum equals last bin, p_left = 1 - p_right + lastbinsize.
    Otherwise, p_left = 1 - p_right.
***;
		  if psum = psums[lastbinnum] then do;
		     lastbinsize=1;
	           do k=lastbinnum to num_psmps;
		        if psums[k]=psums[k+1] then lastbinsize+1;
			  else leave;
		     end;
		     p_left = num_psmps - p_right + lastbinsize;
		  end;
		  else p_left = num_psmps - p_right;
	     end;

*** Base 2-sided p-value on distance from mean of last (i.e. least extreme) bin 
    of smaller p-value.  This is common practice.
***;

	     if p_left<p_right then do;
	        p_both = p_left;
           do z=num_psmps to 1 by -1; 
	           if (psums[z] - pmean) >= distance_left then p_both+1;
		     else leave;
		  end;
	     end;
  	     else if p_left>p_right then do;
	        p_both = p_right;
           do z=1 to num_psmps;
	           if (pmean - psums[z]) >= distance_right then p_both+1;
		     else leave;
		  end;
	     end;
	     else p_both=num_psmps;

*** Account for possibility, due to psum=a particular bin value, that 
    p_both>num_psmps.
***;
	     p_both = min(p_both,num_psmps);

	  END;

	  p_left  = p_left  / num_psmps;
	  p_right = p_right / num_psmps;
	  p_both  = p_both  / num_psmps;

*** If CONTROL sample is smaller than TEST (which is atypical), reverse 
*** p-values, as empirical distribution is mirror of itself.;

     if "C"=COMPRESS(UPCASE(scan("&samp2var.", byval_counter,' ')),' ') then do;
	     hold = p_left;
	     p_left = p_right;
	     p_right = hold;
	  end;

*** Cumulate key macro variables to save results.;

     call symput('p_left',symget('p_left')||" "||compress(p_left));
     call symput('p_right',symget('p_right')||" "||compress(p_right));
     call symput('p_both',symget('p_both')||" "||compress(p_both));
	  cum_prev_freq = 1*scan("&cum_prev_freqs.",byval_counter+1,' ');
  end;
  run;

*** Obtain and assign the format of each byvar, all of which are assumed to be 
    character variables.
***;

data lens(keep=lens);
  set &indata.(keep=&byvars. firstobs=1 obs=1);
  do i=1 to &num_byvars.; 
     lens = vlengthx(scan("&byvars.",i));
	  output;
  end;
  run;
proc sql noprint;
  select lens into :alllens separated by ' ' from lens;
  quit;
%macro assign_formats;
   %do i=1 %to &num_byvars.;
      &&byvar&i. $%scan(&alllens.,&i.).
	%end;
%mend assign_formats;


*** Assign each byvar value for each stratum.;

%macro assign_byvar_vals(which_strata=);
   %do j=1 %to &num_byvars.;
		&&byvar&j. = scan("&&byvals&j.",&which_strata.,' ');
   %end;
%mend assign_byvar_vals;


*** Unwind and assign all the cumulated macro variables.;

data &outdata.(sortedby=&byvars. drop=n_byvals i);
  n_byvals = 1*&n_byvals.;
  format %assign_formats;
  do i=1 to n_byvals;
     length permvar $32;                                                                               
     permvar = "&permvar.";                                                                        
     n_psamp = 1 * scan("&freqs.", i,' ');
     num_psmps = &num_psmps.;
     p_left = 1*scan("&p_left.",i,' ');
     p_right = 1*scan("&p_right.",i,' ');
     p_both = 1*scan("&p_both.",i,' ');
	  %assign_byvar_vals(which_strata = i)
     label permvar   = "Permuted Variable"
           n_psamp   = "Size of Permutation Samples"
           num_psmps = "# of Permutation Samples"
           p_left    = "Left p-value"
           p_right   = "Right p-value"
           p_both    = "Two-Tailed p-value"
           ;
     output;
  end;
  run;


*** Optional.;

proc datasets lib=work memtype=data kill nodetails;
  run;

%MEND OPDN;

%OPDN(num_psmps = 1000, 
      indata    = MFPTUS.pricing_data_2strata_100000, 
      outdata   = MFPTUS.OPDN_100000_2strata,  
      byvars    = geography segment, 
      samp2var  = cntrl_test, 
      permvar   = price, 
      seed      =
     );



*** PROC_SS ***;
*** PROC_SS ***;
*** PROC_SS ***;

%MACRO PROCSS(num_psmps=, 
              indata=, 
              outdata=, 
              byvars=, 
              samp2var=, 
              permvar=, 
              seed=
             );


*** If user does not pass a value to the optional macro variable "seed," use -1
    based on the time of day.
***;
%if %sysevalf(%superq(seed)=,boolean) %then %let seed=-1;

*** Obtain counts and cumulated counts for each strata.;

proc summary data=&indata. nway;
  class &byvars. &samp2var.;
  var &permvar.;
  output out=byvar_sum(keep = _FREQ_ &byvars. &samp2var. sumpvar) 
                        sum = sumpvar
                       ;
  run;

*** Identify and keep the smaller of the two samples for more efficient 
    sampling.  Below, invert the empirical permutation distribution if CONTROL
	 sample is smaller that TEST sample (which is not typical).  That will 
	 remain consistent with output variables corresponding to: 
	      p_left  = Pr(x>X | Ho: Test>=Control)
	      p_right = Pr(x<X | Ho: Test<=Control)
	      p_both  = Pr(x=X | Ho: Test=Control)
	 where x is the sample permutation statistic and X is the distribution of
	 permutation statistic values.
***;

%let last_byvar = %scan(&byvars.,-1);
data byvar_sum(keep=&byvars. _NSIZE_ sumpvar &samp2var. sortedby=&byvars.);
  set byvar_sum(rename=(_FREQ_=_NSIZE_));
  by &byvars.;
  retain lag_NSIZE lag_sum lag_samp2;
  if first.&last_byvar. then do;
     lag_NSIZE = _NSIZE_;
	  lag_sum  = sumpvar;
	  lag_samp2  = &samp2var.;
  end;
  else do;
     if _NSIZE_<=lag_NSIZE then output;
	  else do;
		  _NSIZE_  = lag_NSIZE;
		  sumpvar = lag_sum;
		  &samp2var. = lag_samp2;
		  output;
	  end;
  end;
  run;

*** From SAS Online Documentation:
    For simple random sampling without replacement, if there is enough memory 
    for it PROC SURVEYSELECT uses Floyds ordered hash table algorithm (see 
	 Bentley and Floyd (1987) and Bentley and Knuth (1986) for details).  If 
	 there is not enough memory available for Floyds algorithm, PROC 
	 SURVEYSELECT switches to the sequential algorithm of Fan, Muller, and 
	 Rezucha (1962), which requires less memory but might require more time to 
	 select the sample. 
***;

proc surveyselect data     = &indata.(drop=&samp2var.) 
                  method   = srs 
			   sampsize = byvar_sum(keep=&byvars. _NSIZE_) 
			   rep      = &num_psmps. 
			   seed     = &seed. 
			   out      = PSS_perm_Samps(drop=SamplingWeight SelectionProb)
			   noprint;
  strata &byvars.;
  run;

proc summary data=PSS_perm_Samps nway;
  class &byvars. replicate;
  var &permvar.;
output out=PSS_perm_sums(sortedby=&byvars. replicate keep=&byvars. replicate &permvar.) sum=;
  run;

proc transpose data=PSS_perm_sums out=PSS_perm_sums_t(rename=(_NAME_=permvar)) prefix=psmp;
  var &permvar.;
  by &byvars.;
  id replicate;
  run;

data &outdata.(keep=&byvars. permvar n_psamp num_psmps p_left p_right p_both)
     error
	  ;
  merge PSS_perm_sums_t(in=insamps)
        byvar_sum(in=insummary)
		  ;
  by &byvars.;
  if insamps & insummary then do;
     array psums[&num_psmps.] psmp1-psmp&num_psmps.;                                                            
     n_psamp = _NSIZE_;
     num_psmps = 1*&num_psmps.;
     p_left  = 0;
     p_right = 0;
     p_both  = 0;
     call sortn(of psums[*]);
     pmed = median(of psums[*]);                                                                       
     pmean = mean(of psums[*]);                                                                       

*** Efficiently handle extreme test sample values.;

     IF sumpvar<psums[1] THEN DO;
        p_left=0;
        p_right=num_psmps;
        p_both=0;
     END;
     ELSE IF sumpvar>psums[num_psmps] THEN DO;
        p_left=num_psmps;
        p_right=0;
        p_both=0;
     END;
     ELSE DO;

*** For non-extreme cases, start with shorter tail for less looping.;

        if pmed>=sumpvar then do;
           do z=1 to num_psmps;
              if sumpvar>=psums[z] then p_left+1;
              else do;
                 lastbinnum = z-1;
                 distance_left = pmean - psums[z-1];
                 leave;
              end;
           end;

*** Avoid loop for other (larger) p-value.
    If sumpvar equals last bin, p_right = 1 - p_left + lastbinsize.
    Otherwise, p_right = 1 - p_left.
***;
           if sumpvar = psums[lastbinnum] then do;
              lastbinsize=1;
              do k=lastbinnum to 1 by -1;
                 if psums[k]=psums[k-1] then lastbinsize+1;
                 leave;
              end;
              p_right = num_psmps - p_left + lastbinsize;
           end;
           else p_right = num_psmps - p_left;
        end;

        else do; 
           do z=num_psmps to 1 by -1;
              if sumpvar<=psums[z] then p_right+1;
              else do;
                 lastbinnum = z+1;
                 distance_right = psums[z+1] - pmean;
                 leave;
              end;
           end;

*** Avoid loop for other (larger) p-value.
    If psum equals last bin, p_left = 1 - p_right + lastbinsize.
    Otherwise, p_left = 1 - p_right.
***;
           if sumpvar = psums[lastbinnum] then do;
              lastbinsize=1;
              do k=lastbinnum to num_psmps;
                 if psums[k]=psums[k+1] then lastbinsize+1;
                 else leave;
              end;
              p_left = num_psmps - p_right + lastbinsize;
           end;
           else p_left = num_psmps - p_right;
        end;

*** Base 2-sided p-value on distance from mean of last (i.e. least extreme) bin 
    of smaller p-value.  This is common practice.
***;

        if p_left<p_right then do;
           p_both = p_left;
           do z=num_psmps to 1 by -1; 
              if (psums[z] - pmean) >= distance_left then p_both+1;
              else leave;
           end;
        end;
        else if p_left>p_right then do;
           p_both = p_right;
           do z=1 to num_psmps;
              if (pmean - psums[z]) >= distance_right then p_both+1;
              else leave;
           end;
        end;
        else p_both=num_psmps;

*** Account for possibility, due to psum=a particular bin value, that 
    p_both>num_psmps.
***;
        p_both = min(p_both,num_psmps);

     END;

     p_left  = p_left  / num_psmps;
     p_right = p_right / num_psmps;
     p_both  = p_both  / num_psmps;

*** If CONTROL sample is smaller than TEST (which is atypical), reverse 
*** p-values, as empirical distribution is mirror of itself.;

     if &samp2var.="C" then do;
	     hold = p_left;
 	     p_left = p_right;
	     p_right = hold;
	  end;

     label permvar   = "Permuted Variable"
           n_psamp   = "Size of Permutation Samples"
           num_psmps = "# of Permutation Samples"
           p_left    = "Left p-value"
           p_right   = "Right p-value"
           p_both    = "Two-Tailed p-value"
           ;
     output &outdata.;
  end;
  else output error;
  run;

proc datasets lib=work memtype=data kill nodetails;
  run;

%MEND PROCSS;

%PROCSS(num_psmps = 1000, 
        indata    = MFPTUS.pricing_data_2strata_100000, 
        outdata   = MFPTUS.PROCSS_100000_2strata,  
        byvars    = geography segment, 
        samp2var  = cntrl_test, 
        permvar   = price, 
        seed      =
        );



*** PROC_MT ***;
*** PROC_MT ***;
*** PROC_MT ***;

%MACRO PROCMT(num_psmps=, 
              indata=, 
              outdata=, 
              byvars=, 
              samp2var=, 
              permvar=, 
              seed=,
				  left_or_right=
              );


*** If user does not pass a value to the optional macro variable "seed," 
    generate a random seed and use it for all three proc multtests below
	 (although SAS OnlineDoc says seed=-1 should work, it does not).
***;
%if %sysevalf(%superq(seed)=,boolean) 
%then %let seed=%sysfunc(ceil(%sysevalf(1000000000*%sysfunc(ranuni(-1)))));

*** Un/comment test statements below to perform permutation tests based 
    on different assumptions about the variance structure of the samples.;
***;

proc multtest    data = &indata. 
              nsample = &num_psmps. 
			  seed = &seed. 
			   out = mt_output_results_2t(keep=&byvars. perm_p rename=(perm_p=xp_both))
			  permutation
			    noprint;
  by &byvars.;
  class &samp2var.;
*  test mean (&permvar. / DDFM=SATTERTHWAITE);
  test mean (&permvar.);
  run;


*** To make runtime results comparable to PROC NPAR1WAY, which provides only
    two-tailed p-value and the smaller of the right or left p-values, run
	 two of the three PROC MULTTESTs and calculate the second tail as one
	 minus the given tail, which will usually be very close to the actual
	 value unless the data is highly discretized.
***;

proc multtest    data = &indata. 
              nsample = &num_psmps. 
	  	        seed = &seed. 
%if %UPCASE(%sysfunc(compress(&left_or_right.)))=RIGHT %then %do;
		         out = mt_output_results_up(keep=&byvars. perm_p rename=(perm_p=xp_right))
%end;
%if %UPCASE(%sysfunc(compress(&left_or_right.)))=LEFT %then %do;
   	         out = mt_output_results_low(keep=&byvars. perm_p rename=(perm_p=xp_left))
%end;
			  permutation
			    noprint;
  by &byvars.;
  class &samp2var.;
%if %UPCASE(%sysfunc(compress(&left_or_right.)))=RIGHT %then %do;
*  test mean (&permvar. / upper DDFM=SATTERTHWAITE);
  test mean (&permvar. / upper);
%end;
%if %UPCASE(%sysfunc(compress(&left_or_right.)))=LEFT %then %do;
*  test mean (&permvar. / lower DDFM=SATTERTHWAITE);
  test mean (&permvar. / lower);
%end;
  run;

proc summary data=&indata. nway;
  class &byvars. &samp2var.;
  var &permvar.;
  output out=byvar_frq(keep = _FREQ_ &byvars.) 
                          n = toss
                          ;
  run;

%let last_byvar = %scan(&byvars.,-1);
data byvar_frq(keep=&byvars. xn_psamp sortedby=&byvars.);
  set byvar_frq(rename=(_FREQ_=xn_psamp));
  by &byvars.;
  retain lag_FREQ;
  if first.&last_byvar. then lag_FREQ = xn_psamp;
  else do;
     if xn_psamp<=lag_FREQ then output;
	  else do;
		  xn_psamp  = lag_FREQ;
		  output;
	  end;
  end;
  run;


data &outdata.(drop=xn_psamp xp_left xp_both)
     error
	  ;
%if %UPCASE(%sysfunc(compress(&left_or_right.)))=LEFT %then %do;
  merge mt_output_results_low(in=inlow)
		  mt_output_results_2t(in=in2t)
        byvar_frq(in=infrq)
        ;
  if in2t & inlow & infrq then do;
     format permvar $32.;
     permvar = "&permvar.";
	  n_psamp = xn_psamp;
     num_psmps = 1*&num_psmps.;
	  p_left = xp_left;
	  p_right = .;
%end;
%if %UPCASE(%sysfunc(compress(&left_or_right.)))=RIGHT %then %do;
  merge mt_output_results_up(in=inup)
		  mt_output_results_2t(in=in2t)
        byvar_frq(in=infrq)
        ;
  if in2t & inup & infrq then do;
     format permvar $32.;
     permvar = "&permvar.";
	  n_psamp = xn_psamp;
     num_psmps = 1*&num_psmps.;
	  p_right = xp_right;
	  p_left = .;
%end;
	  p_both = xp_both;
     label permvar   = "Permuted Variable"
           n_psamp   = "Size of Permutation Samples"
           num_psmps = "# of Permutation Samples"
           p_left    = "Left p-value"
           p_right   = "Right p-value"
           p_both    = "Two-Tailed p-value"
           ;
     output &outdata.;
  end;
  else output error;
  run;


proc datasets lib=work memtype=data kill nodetails;
  run;

%MEND PROCMT;

%PROCMT(num_psmps     = 1000, 
        indata        = MFPTUS.pricing_data_2strata_100000, 
        outdata       = MFPTUS.PROCMT_100000_2strata,  
        byvars        = geography segment, 
        samp2var      = cntrl_test, 
        permvar       = price, 
        seed          = ,
		  left_or_right = left
        );




*** PROCNPAR ***;
*** PROCNPAR ***;
*** PROCNPAR ***;

%MACRO PROCNPAR(num_psmps=, 
                indata=, 
                outdata=, 
                byvars=, 
                samp2var=, 
                permvar=, 
                seed=
                );

*** If user does not pass a value to the optional macro variable "seed," use -1
    based on the time of day.
***;
%if %sysevalf(%superq(seed)=,boolean) %then %let seed=-1;

ods listing close;
proc npar1way    data = &indata. 
               scores = data
					;
  var &permvar.;
  by &byvars.;
  class &samp2var.;
  exact scores=data / n=&num_psmps. seed=&seed.;
  ods output DataScoresMC = hold(keep = &byvars. Name1 Label1 nValue1
                                where = (Label1 = "Estimate")
                                );
  run;
ods listing;

proc transpose data = hold(drop=Label1)
                out = &outdata.(drop = _NAME_);
  by &byvars.;
  id Name1;
  var nValue1;
  run;

proc summary data=&indata. nway;
  class &byvars. &samp2var.;
  var &permvar.;
  output out=byvar_frq(keep = _FREQ_ &byvars. &samp2var.) 
                          n = toss
                          ;
  run;

%let last_byvar = %scan(&byvars.,-1);
data byvar_frq(keep=&byvars. permvar n_psamp num_psmps &samp2var. sortedby=&byvars.);
  set byvar_frq;
  by &byvars.;
  format permvar $32.;
  retain permvar "&permvar." lag_FREQ lag_samp2 ;
  n_psamp = _FREQ_;
  num_psmps = 1*&num_psmps.;
  if first.&last_byvar. then do;
     lag_FREQ = n_psamp;
	  lag_samp2  = &samp2var.;
  end;
  else do;
     if n_psamp<=lag_FREQ then output;
	  else do;
		  n_psamp  = lag_FREQ;
		  &samp2var. = lag_samp2;
		  output;
	  end;
  end;
  run;

data &outdata.(drop=hold mcpl_data mcpr_data mcp2_data &samp2var.)
     error
	  ;
  merge byvar_frq(in=infrq)
		  &outdata.(in=inresults)
        ;
  by &byvars.;
  if inresults & infrq then do;
  p_left  = mcpl_data;
  p_right = mcpr_data;
  p_both  = mcp2_data;
  label permvar   = "Permuted Variable"
        n_psamp   = "Size of Permutation Samples"
        num_psmps = "# of Permutation Samples"
        p_left    = "Left p-value"
        p_right   = "Right p-value"
        p_both    = "Two-Tailed p-value"
        ;

*** If CONTROL sample is smaller than TEST (which is atypical), reverse 
*** p-values, as empirical distribution is mirror of itself.;

   if &samp2var.="C" then do;
      hold = p_left;
	   p_left = p_right;
	   p_right = hold;
   end;
	output &outdata.;
	end;
	else output error;
  run;

%MEND PROCNPAR;

%PROCNPAR(num_psmps = 1000, 
          indata    = MFPTUS.pricing_data_2strata_100000, 
          outdata   = MFPTUS.PROCNPAR_100000_2strata,  
          byvars    = geography segment, 
          samp2var  = cntrl_test, 
          permvar   = price, 
          seed      =
          );




*** BEBB_SIM ***;
*** BEBB_SIM ***;
*** BEBB_SIM ***;


%MACRO BEBB_SIM(num_psmps=, 
                indata=, 
                outdata=, 
                byvars=, 
                samp2var=, 
                permvar=, 
                seed=
                );

*** If user does not pass a value to the optional macro variable "seed," use -1
    based on the time of day.
***;
%if %sysevalf(%superq(seed)=,boolean) %then %let seed=-1;


*** Obtain counts for each strata.;

proc summary data=&indata. nway;
  class &byvars. &samp2var.;
  var &permvar.;
  output out=byvar_sum(keep = _FREQ_ &byvars. &samp2var. sumpvar) 
                        sum = sumpvar
                       ;
  run;

%let last_byvar = %scan(&byvars.,-1);
data byvar_sum_min(keep=tot_FREQ _FREQ_ &byvars. &samp2var. sumpvar sortedby=&byvars.);
  set byvar_sum;
  by &byvars.;
  retain lag_FREQ lag_sum lag_samp2;
  if first.&last_byvar. then do;
	  lag_FREQ = _FREQ_;
	  lag_sum  = sumpvar;
	  lag_samp2  = &samp2var.;
  end;
  else do;
     tot_FREQ = sum(lag_FREQ,_FREQ_);
     if _FREQ_<=lag_FREQ then output;
	  else do;
		  _FREQ_  = lag_FREQ;
		  sumpvar = lag_sum;
		  &samp2var. = lag_samp2;
		  output;
	  end;
  end;
  run;

*** Slightly faster to use macro variable value strings and scan() than to merge
    _FREQ_ etc. onto the "indata" dataset, especially for large "indata" 
	 datasets.
*** ;
proc sql noprint;
  select _freq_ into :freqs separated by ' ' from byvar_sum_min;
  quit;
proc sql noprint;
  select tot_FREQ into :tot_FREQs separated by ' ' from byvar_sum_min;
  quit;
proc sql noprint;
  select sumpvar into :sumpvar separated by ' ' from byvar_sum_min;
  quit;
proc sql noprint;
  select &samp2var. into :samp2var separated by ' ' from byvar_sum_min;
  quit;


*** simultaneously create all permstrap samples and summarize results of each 
    as the end of stratum is reached
***;

%let last_byvar = %scan(&byvars.,-1);

data &outdata.(keep=&byvars. permvar n_psamp num_psmps p_left p_right p_both);
  set &indata.(keep=&byvars. &permvar.) end=lastrec;
  by &byvars.;
  retain permvar "&permvar" n_psamp 0 num_psmps &num_psmps.;                                                                        

  array smalln_counter{&num_psmps.} _TEMPORARY_;
  array psums{&num_psmps.} _TEMPORARY_;

  if first.&last_byvar. then do;													    
	  byval_counter+1;

     _freq_ = 1*scan("&freqs.",byval_counter,' ');
     n_psamp = _FREQ_;
     tot_FREQ = 1*scan("&tot_FREQs.",byval_counter,' ');

	  bigN_counter = tot_FREQ+1;
	  do i=1 to num_psmps;
	     smalln_counter[i] = _FREQ_;
		  psums[i] = 0;
	  end;
  end;
  bigN_counter+(-1);	

  min_mac_res_inloop = &permvar.;

  seed=1*&seed.;
  if last.&last_byvar.~=1 then do i=1 to num_psmps;
     if ranuni(seed) <= smalln_counter[i]/bigN_counter then do;
        psums[i] = min_mac_res_inloop + psums[i];
		  smalln_counter[i]+(-1);
	  end;
  end;
  else do i=1 to num_psmps;
     if smalln_counter[i]>0 then psums[i] = min_mac_res_inloop + psums[i];
  end;
  if last.&last_byvar. then DO;													    

*** If CONTROL sample is smaller than TEST (which is atypical), reverse 
*** order of empirical distribution.;

     sumpvar = 1*scan("&sumpvar.",byval_counter,' ');

     p_left  = 0;
     p_right = 0;
     p_both  = 0;
     call sortn(of psums[*]);
     pmed = median(of psums[*]);                                                                       
     pmean = mean(of psums[*]);                                                                       

*** Efficiently handle extreme test sample values.;

     IF sumpvar<psums[1] THEN DO;
        p_left=0;
        p_right=num_psmps;
        p_both=0;
     END;
     ELSE IF sumpvar>psums[num_psmps] THEN DO;
        p_left=num_psmps;
        p_right=0;
        p_both=0;
     END;
     ELSE DO;

*** For non-extreme cases, start with shorter tail for less looping.;

     if pmed>=sumpvar then do;
        do z=1 to num_psmps;
           if sumpvar>=psums[z] then p_left+1;
           else do;
              lastbinnum = z-1;
              distance_left = pmean - psums[z-1];
              leave;
           end;
        end;

*** Avoid loop for other (larger) p-value.
    If sumpvar equals last bin, p_right = 1 - p_left + lastbinsize.
    Otherwise, p_right = 1 - p_left.
***;
        if sumpvar = psums[lastbinnum] then do;
           lastbinsize=1;
           do k=lastbinnum to 1 by -1;
              if psums[k]=psums[k-1] then lastbinsize+1;
              leave;
           end;
           p_right = num_psmps - p_left + lastbinsize;
        end;
        else p_right = num_psmps - p_left;
     end;

     else do; 
       do z=num_psmps to 1 by -1;
          if sumpvar<=psums[z] then p_right+1;
          else do;
             lastbinnum = z+1;
             distance_right = psums[z+1] - pmean;
             leave;
          end;
       end;

*** Avoid loop for other (larger) p-value.
    If psum equals last bin, p_left = 1 - p_right + lastbinsize.
    Otherwise, p_left = 1 - p_right.
***;
       if sumpvar = psums[lastbinnum] then do;
          lastbinsize=1;
          do k=lastbinnum to num_psmps;
             if psums[k]=psums[k+1] then lastbinsize+1;
             else leave;
          end;
          p_left = num_psmps - p_right + lastbinsize;
       end;
       else p_left = num_psmps - p_right;
     end;

*** Base 2-sided p-value on distance from mean of last (i.e. least extreme) bin 
    of smaller p-value.  This is common practice.
***;

     if p_left<p_right then do;
        p_both = p_left;
        do z=num_psmps to 1 by -1; 
           if (psums[z] - pmean) >= distance_left then p_both+1;
           else leave;
        end;
     end;
     else if p_left>p_right then do;
        p_both = p_right;
        do z=1 to num_psmps;
           if (pmean - psums[z]) >= distance_right then p_both+1;
           else leave;
        end;
     end;
     else p_both=num_psmps;

*** Account for possibility, due to psum=a particular bin value, that 
    p_both>num_psmps.
***;
     p_both = min(p_both,num_psmps);
	  END;

     p_left  = p_left  / num_psmps;
     p_right = p_right / num_psmps;
     p_both  = p_both  / num_psmps;

*** If CONTROL sample is smaller than TEST (which is atypical), reverse 
*** p-values, as empirical distribution is mirror of itself.;

     if "C"=COMPRESS(UPCASE(scan("&samp2var.", byval_counter,' ')),' ') then do;
	      hold = p_left;
		   p_left = p_right;
		   p_right = hold;
     end;

     label permvar   = "Permuted Variable"
           n_psamp   = "Size of Permutation Samples"
           num_psmps = "# of Permutation Samples"
           p_left    = "Left p-value"
           p_right   = "Right p-value"
           p_both    = "Two-Tailed p-value"
           ;
      output &outdata.;
  end;
  run;

proc datasets lib=work memtype=data kill nodetails;
  run;

%MEND BEBB_SIM;

%BEBB_SIM(num_psmps = 1000, 
          indata    = MFPTUS.pricing_data_2strata_100000, 
          outdata   = MFPTUS.BEBB_SIM_100000_2strata,  
          byvars    = geography segment, samp2var=cntrl_test,
          permvar   = price, 
          seed      =
           );