********************************************************************** * * * SASGENE 1.1 * * Program for Analysis of * * Gene Segregation and Linkage * * November 5, 1997 * * * *********************************************************************; %macro convert (ds=_last_, /* SAS dataset to analyze(default:uses last one)*/ genes=, /* gene variable names */ dsout= /* name of new SAS dataset after conversion */ ); ********************************************************************* * Name: CONVERT * * * * Purpose: Converts gene values to Dominant or Recessive * * * * Written: 09/14/95 * * * * Modified: 10/02/95 * * 03/05/97 * * * * Products: Base SAS * * * * Example: %convert(ds=save.orig, * * genes=BI RC DV SP LL DF F B D U TU SS NS, * * dsout=new); * ********************************************************************; proc format; value _gnrx 1='P1' 2='P2' 3='F1' 4='F2' 5='BC1P1' 6='BC1P2' ; run; title2 'Gene Segregation and Linkage Analysis'; %local nogene word geneid i; /* create nogenes macro variable */ /* nogenes is the number of genes listed in &genes */ %let nogenes=0; %if &genes ne %then %do; %let word=%scan(&genes,1); %do %while (&word ne ); %let nogenes=%eval(&nogenes+1); %let word=%scan(&genes,&nogenes+1); %end; %end; /* create geneid macro variable */ /* geneid is the names of the genes in quotes */ /* used in array for identification in output */ %let word=%scan(&genes,1); %let geneid=%str(%'&word%'); %do i=2 %to &nogenes; %let word=%scan(&genes,&i); %let geneid=%str(&geneid,%'&word%'); %end; proc sort data=&ds out=_orig; by family; run; data _generat; set _orig; length id 3; array y{*} &genes; array yc{*} $ n1-n&nogenes (%unquote(&geneid)); id=0; do _i_=1 to dim(y); id+1; code= y{_i_}; gene=yc{_i_}; output; end; keep family id gene gnr code; run; proc sort data=_generat; by family id; run; proc freq noprint; by family id gene; where code not=' '; tables code / out=_count; run; proc means noprint; by family id ; var count; output out=_nocode n=n; run; data _look; merge _count _nocode; by family id; if n>2; run; proc print label; title3 'Observed frequencies for each gene locus and allele code'; title4 'These genes in this table have more than 2 codes:'; title5 ' some codes may have been misentered '; title6 'WARNING!!! Program will convert to 2 codes (D and R) '; title7 ' Dominant will be assigned, '; title8 ' other non-missing codes will be set to Recessive '; var family gene code count; label count='FREQUENCY'; run; /* delete gene-family ids that do not make sense for analysis */ /* delete when the phenotype of P1 is the same as the */ /* phenotype of P2 */ title3 ' '; proc freq data=_generat noprint; by family id gene; tables code*gnr / out=_gnrcode(drop=percent) ; run; data _gnrcode; set _gnrcode; if code=' ' then delete; proc sort data=_gnrcode; by family id gene gnr descending count; run; data _delete(keep=family id gene); set _gnrcode; by family id gene gnr; retain d1; if first.id then do; d1=' '; d2=' '; end; if first.gnr then do; if gnr=1 then d1=code; else if gnr=2 then do; d2=code; if d1=d2 then output _delete; end; end; run; proc print data=_delete(drop=id); title3 'These gene-family combinations will be deleted '; title4 'since the phenotype for P1 and P2 are the same '; title5 'and do not fit the assumptions of the analysis.'; run; data _generat _look; merge _generat _delete(in=yes); by family id gene; if yes then output _look; else output _generat; run; proc freq data=_look; by family id gene; tables code*gnr / missprint nocum nopercent norow nocol; label gnr='GENERATION'; format gnr _gnrx.; run; /* find the dominant gene by looking at generation 3 (F1) */ title3 ' '; proc freq noprint data=_generat; by family id gene; where gnr=3; tables code / out=_count; run; proc sort; by family id count; run; data _dom; set _count; by family id; array c $ c1-c&nogenes; retain c1-c&nogenes; length c1-c&nogenes $8; if first.family then do; do _i_=1 to &nogenes; c{_i_}=' '; end; end; if last.id then c{id}=code; if last.family then output; keep family c1-c&nogenes; run; data &dsout; merge _orig _dom; by family; array genes{*} &genes; array dom{*} $ c1-c&nogenes; do _i_=1 to dim(genes); if dom{_i_}=' ' then genes{_i_}=' ';/*useless data- no dominant*/ else do; if genes{_i_}=dom{_i_} then genes{_i_}='D'; else if genes{_i_}=' ' then genes{_i_}=' '; else genes{_i_}='R'; end; end; drop c1-c&nogenes _i_; run; data _check; merge _orig _dom; by family; array genes &genes; array dom $ c1-c&nogenes; array yc{*} $ n1-n&nogenes (%unquote(&geneid)); id=0; do _i_=1 to dim(genes); id+1; gene=yc{_i_}; old_code=genes{_i_}; if dom{_i_}=' ' then new_code=' '; /*useless data- no dominant */ else do; if genes{_i_}=dom{_i_} then new_code='D'; else if genes{_i_}=' ' then new_code=' '; else new_code='R'; end; output; end; drop c1-c&nogenes n1-n&nogenes &genes; run; title4 "Conversion to 'D' or 'R' for each gene and family"; proc freq; tables id*gene*family*new_code*old_code/list nopercent nocum nofreq; run; proc datasets library=work memtype=data nolist; delete _check _count _dom _generat _look _nocode _orig _delete _gnrcode; quit; %mend convert;