/* / Program : Time spent on Tasks (extracted from Documented.Cleaning.Script.RDE.Data) / Version : 0.2 / Editor(s) : Nicole M Lindner / Date : May 22, 2009 / Contact : nml5d AT email DOT virginia DOT edu /====================================================================================================== / Purpose : Calculate the time spent on a given task, as a difference score between the time a task / was begun and the time a later task was begun. / Notes : This is a little quirky / hard to figure out on your own because SAS examples usually / work with _either_ DATE or TIME formats, but our data are recorded in DATETIME / formatting. In SAS, that is stored as the number of seconds since January 1, 1960, but / when you look at it in the edit table, it shows up as the formatted value / (e.g., '01May2009:21:34:56'). / I've tried to simplify the learning curve needed to work with these / to help others understand what to do without spending days searching SAS help as you / learn how to interact with different date(time) formats that were designed in the / early days of computers (?). / /====================================================================================================== / AMENDMENT HISTORY: / init --date--- mod-id ----------------------description---------------------------------------------- / 0.1 Dec10,2008 Creation date of this script. / 0.2 May22,2009 Corrects DATE format of difference scores / /====================================================================================================== / This is public domain software. No guarantee as to suitability or accuracy is given or implied. User uses this code entirely at his/her own risk. /====================================================================================================*/ /************ ************ ************ ************ ************ Section: SessionTask data -- optimized for Project Implicit datafiles ************ ************ ************ ************ ************/ /*** Read in the session_task data; which records each task, its order, etc. Important for identifying conditions, when Ps dropped out, etc. /* #*#* CHANGE SOMETHING HERE: make sure that you've changed the INFILE statement to reflect the folder where you've stored your raw datafiles ***/ FILENAME Raw 'C:\primary\dataweb\Demo.Age\MaxNet\raw'; GOPTIONS RESET=ALL; *** Step 0 - Bringing in the file; DATA Temp(DROP=User_Agent User_ID Session_Created_BY Session_Last_Update_Date Session_Creation_Date Task_Creation_Date Session_Status); INFORMAT Task_Creation_Date Session_Date Session_Creation_Date Session_Last_Update_Date DATETIME20.; FORMAT Task_Creation_Date Session_Date Session_Creation_Date Session_Last_Update_Date DATETIME20.; INFILE Raw(sessionTasks.txt) DELIMITER='09'x LRECL=2000 FIRSTOBS = 2 ; INPUT Session_ID Task_Number Task_ID :$20. Task_URL :$128. User_Agent :$16. Study_URL :$48. Task_Status $ Task_Sequence $ Task_Creation_Date :ANYDTDTM21. User_ID Study_Name :$64. Session_Date :ANYDTDTM21. Session_Status:$4. Session_Creation_Date :ANYDTDTM21. Session_Created_By :$24. Session_Last_Update_Date :ANYDTDTM21.; RUN; ODS HTML CLOSE; ODS HTML PATH=WebOut FILE="Prep.SessTask.01.Raw.TaskURL.htm"; *** Step 1 - Cleaning Repeat data; *get rid of duplicate submissions to the dataset of the same data; PROC SORT DATA = Temp; BY Session_ID Task_URL; *sorting data; PROC FREQ DATA = Temp; TABLES Task_URL;RUN; DATA Temp; SET Temp; Repeat=0; IF Session_ID = LAG(Session_ID) AND Task_URL = LAG(Task_URL) THEN Repeat=1; *IF prior row is same as current row, mark as a Repeat; ELSE ; RUN; TITLE2 'Repeat SessionTask Observations'; PROC FREQ; TABLES Repeat; RUN; /*** Drop duplicate lines of data ***/ DATA Temp; SET Temp; IF Repeat = 1 THEN DELETE; DROP Repeat; RUN; /************ ************ ************ ************ ************ Simplify the task name to prepare for using it to label the time that task was started *** ************ ************ ************ ************ ************/ DATA tempSessTask;SET Temp; FORMAT TaskSimple $8.; /************ ************ ************ ************ ************ /***Don't care about these, so getting rid of them. They're the IAT tasks and the brief-IAT instructions, along with the page BEFORE the actual consent page ***/ IF Task_ID IN ("startpage","binstruct","tcompoy","tcompyo") then delete; IF Task_ID IN ("bias","cert") THEN DO; IF Task_Number = 2 THEN TaskSimple = "manip"; ELSE IF Task_Number = 5 THEN TaskSimple = "manCntrl";END; IF Task_ID = "debriefing" THEN TaskSimple = "end"; IF Task_ID = "consent" THEN TaskSimple = "begin"; IF Task_ID = "practiceinstruct" THEN TaskSimple = "endExp"; IF Task_ID IN ("evalinstreql","evalinstrnon") THEN TaskSimple = "InsDV"; IF Task_ID IN ('oldcand1eql', 'oldcand1non', 'oldcand2eql', 'oldcand2non', 'oldcand3eql', 'oldcand3non', 'oldcand4eql', 'oldcand4non', 'yngcand1eql', 'yngcand1non', 'yngcand2eql', 'yngcand2non', 'yngcand3eql', 'yngcand3non', 'yngcand4eql', 'yngcand4non') THEN TaskSimple = "DVCand"; ELSE IF Task_ID = "ratefactors" THEN TaskSimple = "DVFact"; RUN; PROC contents DATA=TempSessTask;run; PROC FREQ DATA=TempSessTask;TABLES Task_ID;WHERE TaskSimple = "";RUN; PROC FREQ DATA=TempSessTask;TABLES TaskSimple;RUN; TITLE2 'Time for Each Task'; *** Only run this line out once you are sure you've fully recoded Task into TaskSimple; DATA TempTime(RENAME=(TaskSimple=Task)); SET TempSessTask(KEEP=Session_ID TaskSimple Task_Number Task_Creation_Date);RUN; /************ ************ ************ ************ ************ -------------------- Adapted from Documented.Cleaning.Script.RDE.Data.SAS -------------------- - Calculating time spent on the entire study (or modify to assess time on specific Tasks); - Identifies time for each Task. This requires more recoding, but works better for long-running Demo-site studies, where the design often changes (so the max Task_number changes) - IF you have a simple design and just want time spent on the entire study, just transpose Task_number instead of Task if you do something; you can add the following 2 lines to TimeOnTasks datastep: * FORMAT TimeIAT TimeIatinstr TimeSurvey mmss.; * TimeSurvey = (Start2-Start1); TimeIatinstr = (Start3-Start2); TimeIAT = (Start4-Start3); ************ ************ ************ ************ ************/ /* STARTDVCand Num 8 DATETIME20. 6 STARTDVFact Num 8 DATETIME20. 5 STARTInsManip Num 8 DATETIME20. 3 STARTbegin Num 8 DATETIME20. 4 STARTend Num 8 DATETIME20. 2 STARTmanip Num 8 DATETIME20. */ /*** identify Task start time ***/ PROC TRANSPOSE DATA=TempTime PREFIX=Start NAME=NAME OUT=TimeOnTasks(DROP=NAME _LABEL_); BY Session_ID; VAR Task_Creation_date; ID Task;RUN; PROC TRANSPOSE DATA=TempTime PREFIX=Num NAME=NAME OUT=TimeTNum(DROP=NAME _LABEL_); BY Session_ID; VAR Task_Number; ID Task;RUN; PROC CONTENTS DATA=TimeOnTasks; RUN; PROC CONTENTS DATA=TimeTNum; RUN; proc sort data=timetnum;by session_id; proc sort data=timeontasks;by session_id; /*** NOTE: SAS's default Times/date formats are crazy: They're the number of seconds since January 1st, 1960 So if you do a difference score, you'll get the number of seconds, but you'll want to apply a format to make it easier to read. Check SAS help for mmss. format or time. format for more details about them. Here are a few, with the raw value first, then the displayed value that each time format will result in: _Format_Raw #_ _Displays_ HHMM. 53132 14:46 HOUR. 53132 15 MMSS. 53132 885 TIME. 53132 14:45:32 TOD. 53132 14:45:32 ***/ DATA Raw1.TimeOnTasks; Merge TimeOnTasks TimeTNum(KEEP = Session_ID NumManCntrl NumManip); BY Session_ID; /***By specifying the format for the new Time variables before I set them equal to something SAS keeps them in the format I'd like ***/ FORMAT timeTotal TimeDVCand TimeManip TimeDVFact time.; /* Or MMSS.*/ TimeTotal = STARTend - STARTbegin; TimeDVCand = StartDVFact - StartDVCand; IF NumManCntrl NE . THEN DO; TimeManip = StartEndExp - StartManCntrl; TimeDVFact = StartManCntrl - StartDVFact; END; IF Nummanip NE . THEN DO; TimeManip = StartInsDV - StartManip; TimeDVFact = StartEndExp - StartDVFact; END; RUN; proc means data=raw1.timeontasks;var timetotal timedvcand timemanip;run; DATA clean.TimeOnTasks(KEEP= Session_ID TimeDVCand TimeDVFact TimeManip timeTotal); SET Clean.TimeOnTasks;RUN; /*** Now, I'm just looking at what the cleaned time data looks like. Can use this to check comparative histograms of time on a critical task in different conditions, to check whether outliers are obscuring any effects ***/ PROC FREQ DATA=Raw1.TimeOnTasks;TABLES TimeTotal;FORMAT TimeTotal time.;RUN; PROC MEANS DATA=Raw1.TimeOnTasks;VAR timeTotal TimeDVCand TimeManip TimeDVFact; FORMAT timeTotal TimeDVCand TimeManip TimeDVFact TIME.;RUN; PROC MEANS DATA=Raw1.TimeOnTasks;VAR timeTotal TimeDVCand TimeManip TimeDVFact; WHERE StartBegin NE . & StartEnd NE .;RUN; PROC UNIVARIATE data=Clean.TimeOnTasks NOPRINT;VAR timeTotal TimeDVCand TimeManip TimeDVFact; HISTOGRAM timeTotal TimeDVCand TimeManip TimeDVFact ; FORMAT timeTotal TimeDVCand TimeManip TimeDVFact TIME10.; RUN; /*** NOTE:In general, time formatting is hh:mm:ss.ss (hour, minute, second) So Time10 displays these with the format 11:23:07.4 ***/ PROC UNIVARIATE data=Clean.TimeOnTasks NOPRINT;VAR TimeDVCand ; HISTOGRAM TimeDVCand ;FORMAT TimeDVCand time10.;where timedvcand < 360; RUN; PROC UNIVARIATE data=Clean.TimeOnTasks ;VAR TimeDVCand ;HISTOGRAM TimeDVCand ; FORMAT TimeDVCand time10.;RUN; PROC UNIVARIATE DATA=Raw1.TimeOnTasks NOPRINT;VAR TimeDVCand TimeManip TimeDVFact ; HISTOGRAM TimeDVCand TimeManip TimeDVFact ; FORMAT TimeDVCand TimeManip TimeDVFact mmss.; RUN; proc contents data=clean.timeontasks;run; PROC UNIVARIATE DATA=Clean.TimeOnTasks NOPRINT;VAR TimeDVCand ; HISTOGRAM TimeDVCand / vscale=count;format timedvcand 8. ; /***VSCALE=count makes the y-axis frequency, rather than %age ***/ RUN;