/*************************************************************************/ /* PROGRAM DESCRIPTION: */ /* This program uses single locus genotype data from case-parent triads */ /* to fit a log-linear model with genotype and imprinting effects at a */ /* candidate gene (or at a marker gene in linkage disequilibrium with a */ /* candidate gene). The model, specified in Table 1 of Weinberg (1999), */ /* is fit to biallelic data with possibly missing genotype data for the */ /* mother or father. Likelihood ratio tests for parameter effects are */ /* obtained from program output as described below. */ /* */ /* SAS SOFTWARE: */ /* This code was written and tested using version 8 of SAS software (SAS */ /* Institute, Inc., Cary, NC). It may run under version 7, but will not */ /* run under version 6. */ /* */ /* DATA: */ /* Input data consist of the numbers of families observed in a genotypic */ /* class characterized as an ordered triplet of genotypes consisting of */ /* mother, father, and child. Individual genotypes are defined by the */ /* number of copies of a "target" (i.e. pre-specified) allele present in */ /* an individual. For example, the genotype triple (2 1 1) represents a */ /* homozygous mother, heterozygous father, and heterozygous child. See */ /* DATALINES below for specific examples. Missing genotype data for */ /* either the father or mother are allowed, as shown in the example data */ /* below. No other patterns of missing data are currently supported. */ /* */ /* Added 07/11/2013 by Min Shi */ /* Additional notes on input data format: */ /* Please keep all the records in the example file and keep the values */ /* of the first three variables. Only the values of the last variable: */ /* counts need to be changed. if a triad does not exist in your data */ /* put 0 as the count instead of removing the corresponding record. */ /* End of additional notes 7/11/2013 by Min Shi */ /* */ /* HOW TO FIT MODEL AND OBTAIN A LIKELIHOOD RATIO TEST: */ /* */ /* MACRO SYNTAX: */ /* Macro "fitit" is called by %fitit(effects=EFFECTLIST),where EFFECTLIST*/ /* represents a list of model effects to be included in the PROC GENMOD */ /* model statement. See the end of this code for two examples. */ /* */ /* EFFECTLIST may include the following effects: */ /* mm = 2 df Maternal genotype effect */ /* ff = 2 df Paternal genotype effect */ /* Rp = 1 df Single paternal copy in child effect */ /* R2 = 1 df Two copies in child genotype effect */ /* Im = 1 df Imprint effect */ /* */ /* See Weinberg (1999) for details of the parameterization used here. */ /* Note that Im equals 1 if there is no parent-of-origin effect and that */ /* ImRp is the single maternal copy in child effect. */ /* */ /* By default the model fits mating type. Hence a blank EFFECTLIST */ /* yields a value of -2*likelihood for mating type only. A likelihood */ /* ratio test for a parameter is obtained by referring the difference in */ /* -2 times the log of the real data likelihoods between a model */ /* including an effect and a model omitting that effect to a chi square */ /* distribution with degrees of freedom appropriate for the effect. */ /* Likelihood values reported along with the EFFECTLIST in the program */ /* output are -2 times the log of the real data likelihood. */ /* */ /* REFERENCES: */ /* */ /* Weinberg, C. R. (1999) Methods for detection of parent-of-origin */ /* effects in genetic studies of case-parents triads. */ /* Amer. J. Hum. Genet. 65:229-235. */ /* */ /* Weinberg, C. R., A. J. Wilcox, R. T. Lie (1998) A log-linear approach */ /* to case-parent-triad data: assessing effects of disease genes that act*/ /* either directly or through maternal effects and that may be subject to*/ /* partial imprinting. Amer. J. Hum. Genet. 62:969-978. */ /* */ /* */ /* DATE: May 2001 */ /* */ /* DISCLAIMER: */ /* This program is freeware. It may be distributed freely as long as this*/ /* header remains intact. The code has been validated against GLIM code */ /* for selected data sets. Nevertheless, this code is supplied "as is", */ /* with no warranties expressed or implied regarding its suitability for */ /* any particular purpose or regarding its accuracy. */ /*************************************************************************/ OPTIONS NONUMBER NODATE LS=80; TITLE1 ' '; /*********************************************/ /* Input Data: */ /* Column 1: Number of alleles in mother */ /* Column 2: Number of alleles in father */ /* Column 3: Number of alleles in child */ /* Column 4: Number of case-parent triads */ /*********************************************/ DATA rawdata; INPUT @1 datatype $1. @; IF datatype = '*' THEN RETURN; ELSE DO; INPUT @1 mother father child count; OUTPUT; END; DROP datatype; DATALINES; *** Complete Genotype Data *** 2 2 2 10 2 1 2 9 1 2 2 8 2 1 1 11 1 2 1 11 2 0 1 20 0 2 1 11 1 1 2 15 1 1 1 19 1 1 0 12 1 0 1 21 0 1 1 17 1 0 0 26 0 1 0 31 0 0 0 37 *** Missing Mother Genotype *** . 2 2 0 . 1 2 0 . 2 1 1 . 1 1 2 . 1 0 2 . 0 1 0 . 0 0 1 *** Missing Father Genotype *** 2 . 2 5 1 . 2 3 2 . 1 2 1 . 1 12 1 . 0 7 0 . 1 6 0 . 0 9 ; RUN; /*********************************************************/ /* Complete data set for missing parental genotype data */ /*********************************************************/ DATA rawdata; SET rawdata; /* Complete Data */ IF (mother NE .) AND (father NE .) THEN DO; complete = count; OUTPUT; END; /* Missing Mother */ ELSE IF (mother = .) THEN DO; missmother = count; mother = child; OUTPUT; IF (father > 0 AND child > 0) THEN DO; mother = child - 1; OUTPUT; END; IF (father < 2 AND child < 2) THEN DO; mother = child + 1; OUTPUT; END; END; /* Missing Father */ ELSE IF (father = .) THEN DO; missfather = count; father = child; OUTPUT; IF (mother > 0 AND child > 0) THEN DO; father = child - 1; OUTPUT; END; IF (mother < 2 AND child < 2) THEN DO; father = child + 1; OUTPUT; END; END; RUN; /**********************/ /* Summerize all data */ /**********************/ PROC MEANS DATA=rawdata NWAY NOPRINT; CLASS mother father child; VAR complete missmother missfather; OUTPUT OUT=summdata(drop=_TYPE_ _FREQ_) SUM=; RUN; /*****************************************************************/ /* Compute allele frequency in parents to initiate EM algorithm */ /*****************************************************************/ DATA allelefrq; SET summdata END=eof; /* Count parents */ parents + (2 * complete); /* Total "target" alleles in parents */ alleles + ((mother + father) * complete); /* Compute "target" allele frequency in parents */ IF eof THEN DO; p = ROUND(alleles/(2*parents),.001); OUTPUT; END; KEEP p parents; RUN; /*****************************************************************************/ /* Define mating type, imprinting indicators, and Hardy-Weinberg frequencies */ /*****************************************************************************/ DATA origdata(KEEP = mm ff cc Rp R2 Im complete missmother missfather matetype PseudoData); SET summdata END = eof; IF _N_=1 THEN SET allelefrq; /* Define arbitrary mating type index values used in Weinberg's manuscript */ matetype = (mother=father)*(6-(2*mother+(mother=2))) + (mother^=father)*(5-(mother+father)*(mother=2 OR father=2)); /* Define Im = risk effect with 1 copy of gene from mother (female imprinting) */ Im = (child = 1) * (mother > father); /* Define Rp = risk effect with 1 copy of gene from father (male imprinting) */ Rp = (child = 1); /* Define R2 = genotype risk for child with 1 copy of gene from each parent */ R2 = (child = 2); /* Define initial pseudo-data based on Hardy-Weinberg */ PseudoData = ROUND(parents * (p**(mother+father)*(1-p)**(4-mother-father))); /* Create character variables to be used in CLASS statement of GENMOD */ /* Character variables are needed to iterate with GENMOD because GENMOD */ /* outputs CLASS variables as character variables */ mm = PUT(mother,1.0); ff = PUT(father,1.0); cc = PUT(child,1.0); /* output data: note two classes for the triple heterozygote, one in which */ /* the mother transmits the allele and the other in which the father does so */ IF (mother=1 AND father=1 AND child=1) THEN DO; Im = 1; /* mother transmits allele */ OUTPUT; Im = 0; /* father transmits allele */ OUTPUT; END; ELSE OUTPUT; RUN; /*************************************************************************/ /* Summarize data to compute observed-data liklihood after fitting model */ /*************************************************************************/ PROC MEANS DATA=origdata NOPRINT; CLASS mm ff cc; VAR complete missmother missfather; OUTPUT OUT=realdata SUM=; RUN; /*****************************************************************/ /* Define MACRO to fit model. Pass model effects to PROC GENMOD */ /*****************************************************************/ %MACRO fitit(effects=); DATA Updata; SET origdata; RUN; %LET iter=0; %LET stop=no; %DO %UNTIL(&stop=yes OR &iter>99); ODS LISTING CLOSE; %LET iter = %eval(&iter + 1);/* count iterations */ /* Define output data sets */ ODS OUTPUT ParameterEstimates=parmests; ODS OUTPUT ObStats=ExpCell(KEEP=mm ff cc pred); ODS TRACE off; /* Fit log-linear model to PseudoData */ PROC GENMOD DATA=updata; CLASS matetype mm ff cc; MODEL PseudoData = matetype &effects / dist = poisson link = log predicted xvars type1; RUN; /****************************************************************************/ /* Summarize fitted cell frequencies */ /****************************************************************************/ PROC MEANS DATA=ExpCell NOPRINT; CLASS mm ff cc; VAR pred; OUTPUT OUT=Exp SUM=; /****************************************************************************/ /* Output sums for complete triads and for those missing some genotype data */ /****************************************************************************/ DATA ExpAll (KEEP=mm ff cc pred) Expmom (KEEP= ff cc pred RENAME=(pred = exptdmom)) Exppop (KEEP=mm cc pred RENAME=(pred = exptdpop)); SET Exp; IF _type_=3 /*'011'B*/ THEN OUTPUT Expmom; IF _type_=5 /*'101'B*/ THEN OUTPUT Exppop; IF _type_=7 /*'111'B*/ THEN OUTPUT ExpAll; RUN; /****************************************************************************/ /* Sort and merge data sets */ /****************************************************************************/ PROC SORT DATA=Expmom; BY ff cc; PROC SORT DATA=ExpAll; BY ff cc; DATA ExpAll; MERGE ExpAll Expmom; BY ff cc; PROC SORT DATA=Exppop; BY mm cc; PROC SORT DATA=ExpAll; BY mm cc; DATA ExpAll; MERGE ExpAll Exppop; BY mm cc; PROC SORT DATA=ExpAll; BY mm ff cc; /**************************************************/ /* Merge original data with fitted data */ /**************************************************/ DATA Updata; MERGE Updata(KEEP= mm ff cc Im Rp R2 complete missmother missfather matetype PseudoData) ExpAll(KEEP= mm ff cc pred exptdmom exptdpop); BY mm ff cc; RUN; *ODS LISTING; PROC PRINT DATA=updata NOOBS; *VAR mm ff cc matetype pred PseudoData complete missmother missfather; TITLE1 "Iteration: &iter"; RUN; /**********************/ /* Get estimated Im */ /**********************/ DATA updateIm(KEEP=ImEst); SET parmests END=eof; RETAIN ImEst 1; /* set ImEst to 1 when Im not in model */ IF Parameter = 'Im' THEN ImEst = EXP(Estimate); IF eof THEN OUTPUT; RUN; /****************************************************************************/ /* Update PseudoData values for next interation */ /****************************************************************************/ DATA Updata; IF _N_ = 1 THEN SET updateIm; SET Updata END=eof; newpseudo = complete + missmother * (pred / exptdmom) + missfather * (pred / exptdpop); IF (mm='1' AND ff='1' AND cc='1') THEN DO; IF Im = 1 THEN newpseudo = (ImEst/(1+ImEst))*newpseudo; IF Im = 0 THEN newpseudo = (1/(1+ImEst))*newpseudo; END; diff + ABS(PseudoData - newpseudo); PseudoData = newpseudo; IF (eof AND diff<.000001) THEN CALL SYMPUT('STOP','yes'); DROP newpseudo diff; RUN; %END; /* %DO */ ODS LISTING; /* Omit redundant record before computing observed-data likelihood */ DATA Updata; SET Updata; IF (mm='1' AND ff='1' AND cc='1' AND Im=1) THEN DELETE; RUN; /* PROC PRINT DATA=updata NOOBS; VAR mm ff cc matetype pred PseudoData complete missmother missfather; TITLE1 "final updata:Iteration: &iter"; RUN; PROC PRINT DATA=parmests NOOBS; TITLE2 "Parameter Estimates"; RUN; */ /* Summarize final fitted values */ PROC MEANS DATA=Updata NOPRINT; CLASS mm ff cc; VAR pred; ID matetype; OUTPUT OUT=finalfit SUM=; /* PROC PRINT DATA=finalfit NOOBS; TITLE1 "final fitted values: Iteration: &iter"; RUN; PROC PRINT DATA=realdata NOOBS; TITLE1 "real data: Iteration: &iter"; RUN; */ DATA real; MERGE finalfit(KEEP= _type_ mm ff cc pred) realdata (KEEP= _type_ _freq_ mm ff cc complete missmother missfather); BY _type_ mm ff cc; DATA real; SET real END=eof; RETAIN total; IF _type_=0 THEN total = pred; prob = pred/total;/* fitted cell probability */ /* sum elements of liklihood: note missing mother and father frequencies */ /* occur _freq_-times in the data from the means procedure above */ IF _type_ = 3 THEN loglik + ((missmother/_freq_)*LOG(prob)); IF _type_ = 5 THEN loglik + ((missfather/_freq_)*LOG(prob)); IF _type_ = 7 THEN loglik + ((complete/_freq_)*LOG(prob)); IF eof THEN DO; loglik = -2*loglik; put "Model: &effects"; put 'Log Liklihood:' loglik; OUTPUT; END; RUN; PROC PRINT DATA=real NOOBS SPLIT = '*'; VAR /*mm ff cc type observed missfather missmother pred prob*/ loglik; TITLE1 "Iteration: &iter"; TITLE2 "Effects in Model: mating type + &effects"; LABEL loglik = '-2 Loglikelihood'; RUN; ODS LISTING CLOSE; %MEND fitit; /* EXAMPLE MODELS */ %fitit(effects=R2 Rp); %fitit(effects=Im R2 Rp);