/* / Program : Creat common directory, format & label Project Implicit variables / Version : 1.0.1 / Editor(s) : Nicole M Lindner / Date : February 2, 2009 / Contact : nml5d@virginia.edu /======================================================================================== / Purpose : Read in, clean, and transpose all standard datafiles resulting from / data collected at Project Implicit and downloaded through the / RDE (https://rde.projectimpilcit.net) / SubMacros : none / Notes : This script: / * Creates and accesses common directory for saved formats, / * Creates format to label procedure output for Project Implicit Demonstraton / and Research Site demographics variables I've worked with * Applies the formats to label procedure output * Uses SAS's default formats to label countries (this can probably be expanded) / Usage : / /========================================================================================= / PARAMETERS: /-------name------- -------------------------description---------------------------------- / / / /========================================================================================= / AMENDMENT HISTORY: / init --date-- ----------------------description------------------------------------- / 1.0.1 Aug28,2007 Simplifies country coding, using SAS map formats & / Simplifies ethnicity/race coding, and uses Att2 names / 1.0.2 Feb09, 2009 Corrects subexcude labeling to correspond to those outined in Nosek et al., 2007, the PCIAS manuscript /========================================================================================= / This is public domain software. No guarantee as to suitability or accuracy is / given or implied. User uses this code entirely at their own risk. /=======================================================================================*/ */======================================================================================*/ /* -------- Creates and accesses common directory for saved formats, ------------------ */ *This creates a directory for common (i.e., demo/reseach-site wide) formatting for variables. In any script that you would like to use this formatting, just add the following LIBNAME and OPTIONS lines, then apply the appropriate format to whichever procedure you'd like; LIBNAME Common 'C:\primary\dataweb\common'; *The OPTIONS statement reminds SAS to look in that common directory for formats that are stored there. I have added SAShelp.mapfmts so that you can use SAS' geographic formats; OPTIONS FMTSEARCH=(Common.Formats SASHELP.MAPFMTS); /* -------- Creates formats for PI Demo and Research site demographics ------------------ */ /* Ethnic_R recodes for data that has either or both a) new model ethnic variable prior to NIH changes to use census values and b) ethnicityOMB and raceOMB, which use the NIH census values. You could do something similar to break out Hispanic or Multi if necessary, but it won't be necessary for many analyses. *Just add it to your script, uncomment this DATA step (and make the DATA and SET statements refer to your data), then compile the format and you're in business! */ /********************************************************************************** PLEASE NOTE: The following DATA step uses the variable names in the cleaned Att2 dataset For new demo data or research site tasks, you will probably need to rename these to: IN CLEANED ATT2 IN RAW DATA EthnicCen EthnicityOMB RaceCen RaceOMB Race Ethnic DATA Temp; SET Temp; FORMAT Ethnic_R 8.; IF (EthnicCen = . & RaceCen = . & Race = .) THEN Ethnic_R = .; *Missing data; ELSE IF ((EthnicCen IN (.,2,3) & RaceCen = 1 ) | Race=1) THEN Ethnic_R = 1; *Amer Indian/Alaskan; ELSE IF ((EthnicCen IN(.,2,3) & RaceCen IN(2,3,4)) | Race=2) THEN Ethnic_R = 2; *Asians; ELSE IF ((EthnicCen IN (.,2,3) & RaceCen = 5) | Race=3) THEN Ethnic_R = 3; *Blacks; /* ELSE IF (EthnicCen=1 | Race=4) THEN Ethnic_R = 4; *Hispanics-all; /*THis is superfluous: & RaceCen IN (.,1,2,3,4,5,6,7,8,9)*/ /* ELSE IF ((EthnicCen IN (.,2,3) & RaceCen = 6) | Race=5) THEN Ethnic_R = 5; *Whites; ELSE IF ((EthnicCen IN (.,2,3) & RaceCen IN(7,8)) | Race IN(7,8)) THEN Ethnic_R = 7; *Multi(Black/White & NonHis) ; ELSE IF ((EthnicCen IN (.,2,3) & RaceCen = 9) | Race=6) THEN Ethnic_R = 6; *Other/Unknown; /*ELSE IF (EthnicCen = 1 & RaceCen = 6) THEN Ethnic_R = 9; *White Hispanics; */ /* ELSE Ethnic_R = .;RUN; */ /* ========================================================================================= Init 1.0.2 -- Fixed labeling of SubExcl to correspond to PCIAS article exclusion criteria. See footnote 4 in the article ========================================================================================= */ PROC FORMAT LIBRARY=Common.Formats ; VALUE SubExclFmt (NOTSORTED) 0= "Good Data" 1="10%+ fast Ts, avging all critical blocks" 7=">24% fast Ts, 1 critical block" 8=">34% fast Ts, 1 practice block" 3=">30% avg errors across all crit blocks" 4=">39% errors in 1 critical block" 5=">39% error rate, all practice blocks" 6=">49% error in 1 practice block" 2="missing data or >39% error rate, crit block" 9=">missing trials, critical block"; run; /* proc means N maxdec=0 data=cr.cr1(where=(OrdLiberalIAT NE . & FullScale NE .)); var session_id IAT;class subexcl /preloadfmt order=data;format subexcl subexclfmt.;run; * You need the preloadfmt order=data and format lines to output using the formatted order, rather than alphanumeric order; */ PROC FORMAT LIBRARY=Common.Formats; VALUE OldEthnic 1 = "NatAmer" 2 = "Asian" 3 = "Black" 4 = "Hispanic" 5 = "White" 6 = "Other/Unknown" 7-8 = "Multi";run; PROC FORMAT ; VALUE nml 1 = "NatAmer" 2 = "Asian" 3 = "Black" 4 = "Hispanic" 5 = "White" 6 = "Other/Unknown" 7 = "Black+White" 8 = "Multi";run; PROC FORMAT LIBRARY=Common.Formats; VALUE Ethnic_Rfmt 1 = "NatAmer" 2 = "Asian" 3 = "Black" 4 = "Hispanic" 5 = "White" 6 = "Other/Unknown" 7 = "Multi"; VALUE EthnicityOmbFmt /*Or EthnicCen */ 1 = "Hispanic" 2 = "NonHispanic" 3 = "Not Known"; VALUE RaceOmbFmt /* Or RaceCen*/ 1 = "Amer.Indian/Alaska Nat" 2 = "East Asian" 3 = "South Asian" 4 = "Native Hawaiian/Pac.Islander" 5 = "Black/AA" 6 = "White" 7 = "Multi-Black/White" 8 = "Multi-Other" 9 = "Other/Unknown"; RUN; PROC FORMAT LIBRARY=Common.FORMATS; VALUE Agefmt (NOTSORTED) 10-17 = '10s' 18-25 = "18-25" 26-35 = "26-35" 36-45 = "36-45" 46-55 = "46-55" 56-64 = "56-64" 65-89 = "65+" 0-9, 90-HIGH = "Outliers"; RUN; proc format; value AgeComp LOW -9 = . 10 - 19 = 10 20 - 29 = 20 30 - 39 = 30 40 - 49 = 40 50 - 59 = 50 60 - 69 = 60 70 - 79 = 70 80 - 90 = 80; run; PROC FORMAT LIBRARY=Common.FORMATS; VALUE Edufmt (NOTSORTED) /* Add "notsorted" (and preload format in procedure?) if you don't want these to be arranged alphabetically in output This is the new model education variable. For Att2, see below*/ 1 = "K-6" 2 = "JR high" 3 = "some HS" 4 = "HS Grad" 5 = "some col" 6 = "associate" 7 = "Bachelors" 8 = "some grad" 9 = "MA/MS" 14 = "M.B.A." 10 = "J.D." 11 = "M.D." 12 = "Ph.D." 13 = "other adv"; VALUE SimpleEduFmt (NOTSORTED) 1-3 = "some HS or less" 4 = "HS Grad" 5-6 = "some college" 7-8 = "BA/BS" 9-14= "grad degree"; VALUE Att2EduFmt (NOTSORTED) 1 = "some HS or less" 2 = "HS Grad" 3 = "some college" 4 = "BA/BS" 5 = "grad degree"; /* Brian's documentation from PCIAS analyses is: education fix; if session_id < 727612 | (session_id < 1146306 and study_name = 'SPLC.Age') then do; if edu in (1,2,3) then PVedu = 1; else if edu in (4) then PVedu = 2; else if edu in (5) then PVedu = 3; else if edu in (6) then PVedu = 4; else if edu in (7,8,9) then PVedu = 5; end; * PVedu: 1=some HS or less, 2=HS degree, 3=some college, 4=BA/BS, 5=grad degree; else if edu in (9, 10, 11, 12, 13, 14) then PVedu = 5; else if edu in (1, 2, 3) then PVedu = 1; else if edu in (4) then PVedu = 2; else if edu in (5,6) then PVedu = 3; else if edu in (7,8) then PVedu = 4; */ RUN; PROC FORMAT LIBRARY=Common.FORMATS; VALUE ENGfluencyFmt (NOTSORTED) 4 = "English primary" 3 = "fluent-used freqly" 2 = "fluent-used infreqly" 1 = "knowledgable" 0 = "not fluent"; VALUE $Genderfmt 'f' = "Women" 'm' = "Men"; VALUE Incomefmt (NOTSORTED) 1 = "<$25K (US$)" 2 = "$25-49+K" 3 = "$50-74+K" 4 = "$75-149+K" 5 = ">$150K" 99 = "Don't Know"; /* VALUE Occfmt *This has not yet been added. Please feel free to add! Here's what I've done so far: Check out SAS help's documentation on VALUE Statements or on PICTURE Statements (both in FORMAT procedure) 43-1000 = "Administrative Support - Supervisors" 43-3000 = "Administrative Support - Financial Clerks" 43-4000 = "Administrative Support - Information and Records" 43-5000 = "Administrative Support - Recording, Scheduling, Dispatching, Distributing" 43-6000 = "Administrative Support - Secretaries and Assistants" 43-9000 = "Administrative Support - Other Support (data entry, office clerk, proofreaders)" 27-1000 = "Arts/Design/Entertainment/Sports - Art and Design" 27-2000 = "Arts/Design/Entertainment/Sports - Entertainers and Performers" 27-3000 = "Arts/Design/Entertainment/Sports - Media and communication" 27-4000 = "Arts/Design/Entertainment/Sports - Media Equipment workers" 13-1000 = "Business - Business Operations" 13-2000 = "Business - Financial Specialists" 15-1000 = "Computer/Math - Computer Specialists" 15-2000 = "Computer/Math - Math Scientists" 15-3000 = "Computer/Math - Math Technicians" 47-1000 = "Construction/Extraction - Supervisors" 47-2000 = "Construction/Extraction - Construction Trades" 47-3000 = "Construction/Extraction - Helpers, Construction Trades" 47-5000 = "Construction/Extraction - Extraction (e.g., mining, oil)" 47-4000 = "Construction/Extraction - Other" 25-1000 = "Education - Postsecondary Teachers" 25-2000 = "Education - Primary, Secondary, and Special Ed Teachers" 25-3000 = "Education - Other teachers and instructors" 25-4000 = "Education - Librarians, Curators, Archvists" 25-9000 = "Education - Other education, training, and library occupations" 25-9999 = "Education - Student" 17-1000 = "Engineers/Architects - Architects, Surveyors, Cartographers" 17-2000 = "Engineers/Architects - Engineers" 17-3000 = "Engineers/Architects - Drafters, Engineering and Mapping Technicians" 45-1000 = "Farming, Fishing, Forestry - Supervisors" 45-2000 = "Farming, Fishing, Forestry - Agriculture" 45-3000 = "Farming, Fishing, Forestry - Fishing and Hunting" 45-4000 = "Farming, Fishing, Forestry - Forest, Conservation, Logging" 45-9000 = "Farming, Fishing, Forestry - Other" 35-1000 = "Food Service - Supervisors" 35-2000 = "Food Service - Cooks and food prep" 35-3000 = "Food Service - servers" 35-9000 = "Food Service - Other food service workers (e.g., dishwasher, host)" 29-1000 = "Healthcare - Diagnosing and Treating Practitioners (MD, Dentist, etc.)" 29-2000 = "Healthcare - Technologists and Technicians" 31-1000 = "Healthcare - Nursing and Home Health Assistants" 31-2000 = "Healthcare - Occupational and Physical Therapist Assistants" 31-9000 = "Healthcare - Other healthcare support" 00-0000 = "Homemaker or Parenting" 23-1000 = "Legal - Lawyers, Judges, and related workers" 23-2000 = "Legal - Legal support workers" 37-1000 = "Maintenance - Building and Grounds Supervisors" 37-2000 = "Maintenance - Building workers" 37-3000 = "Maintenance - Grounds Maintenance" 11-0000 = "Management - Top Executives" 11-2000 = "Management - Advertising, Sales, PR, Marketing" 11-3000 = "Management - Operations Specialists" 11-9000 = "Management - Other Management Occupations" 55-1000 = "Military - Officer and Tactical Leaders/Managers" 55-2000 = "Military - First-line enlisted supervisor/manager" 55-3000 = "Military - enlisted tactical, air/weapons, crew, other" 51-1000 = "Production - Supervisors" 51-2000 = "Production - Assemblers and Fabricators" 51-3000 = "Production - Food processing" 51-4000 = "Production - Metal and Plastic" 51-5000 = "Production - Printers" 51-6000 = "Production - Textile, Apparel, Furnishings" 51-7000 = "Production - Woodworkers" 51-8000 = "Production - Plant and System Operators" 51-9000 = "Production - Other" 33-1000 = "Protective Service - Supervisors" 33-2000 = "Protective Services - Fire fighting and prevention" 33-3000 = "Protective services - Law Enforcement" 33-9000 = "Protective Services - Other (e.g., security, lifeguards, crossing guards)" 49-1000 = "Repair/Installation - Supervisors" 49-2000 = "Repair/Installation - Electrical and Electronic" 49-3000 = "Repair/Installation - Vehicle and Mobile Equipment" 49-9000 = "Repair/Installation - Other" 99-0001 = "Retired" 41-1000 = "Sales - Supervisors" 41-2000 = "Sales - Retail" 41-3000 = "Sales - Sales Representatives and Services" 41-4000 = "Sales - Wholesale and Manufacturing" 41-9000 = "Sales - Other sales (e.g., telemarketers, real eState)" 19-1000 = "Science - Life Scientists" 19-2000 = "Science - Physical scientists" 19-3000 = "Science - Social Scientists" 19-4000 = "Science - Life, Physical, Social Science Technicians" 39-1000 = "Service and Personal Care - Supervisors" 39-2000 = "Service and Personal Care - Animal Care" 39-3000 = "Service and Personal Care - Entertainment attendants" 39-4000 = "Service and Personal Care - Funeral Service" 39-5000 = "Service and Personal Care - Personal Appearance" 39-6000 = "Service and Personal Care - Transportation, Tourism, Lodging" 39-9000 = "Service and Personal Care - Other service (e.g., child care, fitness)" 21-1000 = "Social Service - Counselors, Social Workers, Community specialists" 21-2000 = "Social Service - Religious Workers" 53-1000 = "Transportation - Supervisors" 53-2000 = "Transportation - Air Transportation" 53-3000 = "Transportation - Motor Vehicle Operators" 53-4000 = "Transportation - Rail Transport" 53-5000 = "Transportation - Water Transport" 53-7000 = "Transportation - Material Moving" 53-6000 = "Transportation - Other" 99-9999 = "Unemployed"*/ proc means data=att.final;var session_id;class politics;format politics polcatfmt.;run; PROC FORMAT LIBRARY=Common.FORMATS; /* NOTE: I believe that this is appropriate for either ?new model demo data? or uncleaned raw data (Att2 and all of my data is center around 0)*/ VALUE polCatfmt (NOTSORTED) 1-3 = "Conservative" 4 = "Moderate" 5-7 = "Liberal"; RUN; PROC FORMAT LIBRARY=Common.FORMATS; /* Note: Check your coding of the politics variable. I always reverse-code politics so that increasing values = increasing rightwing/conservative */ VALUE PoliticalIDFmt (NOTSORTED) -3 = "Str Lib" -2 = "Mod Lib" -1 = "Slt Lib" 0 = "Moderat" 1 = "Slt Con" 2 = "Mod Con" 3 = "Str Con"; RUN; PROC FORMAT LIBRARY=Common.FORMATS; VALUE RelCatFmt (NOTSORTED) 1 = "Catholic/Orthodox" 1.1 = "Protestant" 1.2 = "Protestant-Other" 1.3 = "Interfaith" 2 = "Jewish" 3 = "NonReligious" 4 = "Muslim" 5 = "Buddhist" 6 = "Hindu" 7 = "Sikh" 0 = "Other"; RUN; PROC FORMAT LIBRARY=Common.FORMATS; VALUE RelIDfmt (NOTSORTED) 1 = "Not at All" 2 = "Somewhat" 3 = "Moderately" 4 = "Very"; RUN; DATA Att.Final; SET Att.Final; FORMAT RelCat 7.1;LABEL Relcat = "Religious Affiliation"; LABEL RelID = "Religiousity"; IF Religion IN ("ANTIOCHIAN ORTHODOX CHRISTIAN", "ANTIOCHIAN ORTHODOX CHRISTIAN DIOCESE OF", "ARMENIAN APOSTOLIC CHURCH OF A", "ARMENIAN APOSTOLIC CHURCH OF AMERICA", "COPTIC ORTHODOX CHURCH", "EASTERN ORTHODOX", "GREEK ORTHODOX ARCHDIOCESE OF", "GREEK ORTHODOX ARCHDIOCESE OF AMERICA", "ORTHODOX CHURCH IN AMERICA", "ROMAN CATHOLIC CHURCH, THE", "ROMANIAN ORTHODOX EPISCOPATE O", "ROMANIAN ORTHODOX EPISCOPATE OF AMERICA,", "SERBIAN ORTHODOX CHURCH IN THE", "SERBIAN ORTHODOX CHURCH IN THE U.S.A. AN", "21ROMAN CATHOLIC CHURCH, THE", "43-3000ROMAN CATHOLIC CHURCH, THE") THEN RelCat = 1; ELSE IF Religion IN ("AFRICAN METHODIST EPISCOPAL CH", "AFRICAN METHODIST EPISCOPAL CHURCH", "AFRICAN METHODIST EPISCOPAL ZI", "AFRICAN METHODIST EPISCOPAL ZION CHURCH", "AMERICAN BAPTIST ASSOCIATION", "AMERICAN BAPTIST CHURCHES IN T", "AMERICAN BAPTIST CHURCHES IN THE U.S.A.", "ASSEMBLIES OF GOD", "BAPTIST BIBLE FELLOWSHIP INTER", "BAPTIST BIBLE FELLOWSHIP INTERNATIONAL", "BAPTIST GENERAL CONFERENCE", "BAPTIST MISSIONARY ASSOCIATION", "BAPTIST MISSIONARY ASSOCIATION OF AMERIC", "CHRISTIAN AND MISSIONARY ALLIA", "CHRISTIAN AND MISSIONARY ALLIANCE, THE", "CHRISTIAN BRETHREN (PLYMOUTH B", "CHRISTIAN BRETHREN (PLYMOUTH BRETHREN)", "CHRISTIAN CHURCH (DISCIPLES OF", "CHRISTIAN CHURCH (DISCIPLES OF CHRIST)", "CHRISTIAN CHURCHES AND CHURCHE", "CHRISTIAN CHURCHES AND CHURCHES OF CHRIS", "CHRISTIAN METHODIST EPISCOPAL", "CHRISTIAN METHODIST EPISCOPAL CHURCH", "CHRISTIAN REFORMED CHURCH IN N", "CHRISTIAN REFORMED CHURCH IN NORTH AMERI", "CHURCH OF GOD (ANDERSON, IN)", "CHURCH OF GOD (CLEVELAND, TN)", "CHURCH OF GOD IN CHRIST", "CHURCH OF GOD OF PROPHECY", "CHURCH OF THE BRETHREN", "CHURCH OF THE NAZARENE", "CHURCHES OF CHRIST", "CONSERVATIVE BAPTIST ASSOCIATI", "CONSERVATIVE BAPTIST ASSOCIATION OF AMER", "CUMBERLAND PRESBYTERIAN CHURCH", "EPISCOPAL CHURCH", "EVANGELICAL COVENANT CHURCH, T", "EVANGELICAL COVENANT CHURCH, THE", "EVANGELICAL FREE CHURCH OF AME", "EVANGELICAL FREE CHURCH OF AMERICA, THE", "EVANGELICAL LUTHERAN CHURCH IN", "EVANGELICAL LUTHERAN CHURCH IN AMERICA", "EVANGELICAL PRESBYTERIAN CHURC", "EVANGELICAL PRESBYTERIAN CHURCH", "FREE METHODIST CHURCH OF NORTH", "FREE METHODIST CHURCH OF NORTH AMERICA", "FULL GOSPEL FELLOWSHIP OF CHUR", "FULL GOSPEL FELLOWSHIP OF CHURCHES AND M", "GENERAL ASSOCIATION OF GENERAL", "GENERAL ASSOCIATION OF GENERAL BAPTISTS", "GENERAL ASSOCIATION OF REGULAR", "GENERAL ASSOCIATION OF REGULAR BAPTIST C", "GENERAL CONFERENCE MENNONITE BRETHREN CH", "GRACE GOSPEL FELLOWSHIP", "INDEPENDENT FUNDAMENTAL CHURCH", "INDEPENDENT FUNDAMENTAL CHURCHES OF AMER", "INTERNATIONAL CHURCH OF THE FO", "INTERNATIONAL CHURCH OF THE FOURSQUARE G", "INTERNATIONAL COUNCIL OF COMMUNITY CHURC", "INTERNATIONAL PENTECOSTAL HOLINESS CHURC", "LUTHERAN CHURCH-MISSOURI SYNOD", "LUTHERAN CHURCH-MISSOURI SYNOD, THE", "MENNONITE CHURCH", "NATIONAL ASSOC OF CONGREGATION", "NATIONAL ASSOC OF CONGREGATIONAL CHRISTI", "NATIONAL ASSOCIATION OF FREE W", "NATIONAL ASSOCIATION OF FREE WILL BAPTIS", "NATIONAL BAPTIST CONVENTION OF", "NATIONAL BAPTIST CONVENTION OF AMERICA,", "NATIONAL BAPTIST CONVENTION, U", "NATIONAL BAPTIST CONVENTION, USA, INC.", "NATIONAL MISSIONARY BAPTIST CO", "NATIONAL MISSIONARY BAPTIST CONVENTION O", "PENTECOSTAL ASSEMBLIES OF THE", "PENTECOSTAL ASSEMBLIES OF THE WORLD, INC", "PENTECOSTAL CHURCH OF GOD", "PRESBYTERIAN CHURCH (U.S.A.)", "PRESBYTERIAN CHURCH IN AMERICA", "PROGRESSIVE NATIONAL BAPTIST C", "PROGRESSIVE NATIONAL BAPTIST CONVENTION,", "REFORMED CHURCH IN AMERICA", "RELIGIOUS SOCIETY OF FRIENDS (", "RELIGIOUS SOCIETY OF FRIENDS (CONSERVATI", "SALVATION ARMY, THE", "SOUTHERN BAPTIST CONVENTION", "UNITED CHURCH OF CHRIST", "UNITED METHODIST CHURCH, THE", "WESLEYAN CHURCH, THE", "WISCONSIN EVANGELICAL LUTHERAN", "WISCONSIN EVANGELICAL LUTHERAN SYNOD") THEN RelCat = 1.1; ELSE IF Religion IN ("CHRISTIAN CONGREGATION, INC.,", "CHRISTIAN CONGREGATION, INC., THE", "CHURCH OF JESUS CHRIST OF LATT", "CHURCH OF JESUS CHRIST OF LATTER-DAY SAI", "JEHOVAH'S WITNESSES", "REORGANIZED CHURCH OF JESUS CH", "REORGANIZED CHURCH OF JESUS CHRIST OF LA", "OLD ORDER AMISH CHURCH", "SEVENTH-DAY ADVENTIST CHURCH") THEN RelCat = 1.2; ELSE IF Religion IN ("BAHA'I", "ECUMENICAL", "UNITARIAN UNIVERSALIST") THEN RelCat = 1.3; ELSE IF Religion = "JEWISH" THEN RelCat = 2; ELSE IF Religion IN ("AGNOSTIC", "ATHEIST", "NONE") THEN RelCat = 3; ELSE IF Religion = "MUSLIM/ISLAMIC" THEN RelCat = 4; ELSE IF Religion = "BUDDHIST" THEN RelCat = 5; ELSE IF Religion = "HINDU" THEN RelCat = 6; ELSE IF Religion = "SIKH" THEN RelCat = 7; ELSE IF Religion = "OTHER" THEN RelCat = 0; ELSE RelCat = .; FORMAT continentcit 8.; /* From Brian's international coding (and my recoding of State for PCIAS analyses The State recoding are my own addition, since very old data from Project Implicit recoded only state (as a fillin) */ FORMAT Region $15.; State=UPCASE(State); State= TRIM(COMPRESS(State, "+-#'`/|\(),.?>:")); IF State IN ('0R', '0HIO', '01020', '02357', '05488', '10000', '10003', '10011', '11706', '13820', '16802', '17532', '21157', '23185', '24142', '35405', '37055', '60195', '65803', '78626', '78705', '86503', '90262', '93117', '94539', '95207', '97701', '931174279', 'ALNY', 'ANTELOPE CA', 'AL', 'ALA', 'ALABAMA', 'ALBANY', 'ALASKA', 'AK', 'AMABAMA', 'AR', 'ARIZONA', 'ARIZONIA', 'ARK', 'ARKANSAS', 'AZ', 'BRONX', 'CA', 'CAL', 'CALF', 'CALFIORNIA', 'CALI', 'CALIF', 'CALIFORNAI', 'CALIFONIA', 'CALIFORNIA', 'CALIFRONIA', 'CALIRORINIA', 'CA USA', 'CO', 'COLO', 'COLORADO', 'CONN', 'CONNECTICUT', 'CONNETICUT', 'CT', 'DC', 'DE', 'DELAWARE', 'DETROIT', 'DISTRICT OF COLUMBIA', 'FL', 'FLA', 'FLORID', 'FLORIDA', 'GA', 'GEORGIA', 'GOERGIA', 'HAWAII', 'HI', 'HILLSBORO', 'IA', 'ID', 'IDAHO', 'IDIANA', 'IL', 'ILL', 'ILLINIOS', 'ILLINOI' 'ILLINOIS', 'ILLIONIOS' 'IN', 'IND', 'INDANA', 'INDIANA', 'IOWA', 'KALAMAZOO', 'KANSA', 'KANSAS', 'KANSAS CITY', 'KENTUCKY', 'KS', 'KY', 'LA', 'LOUISIANA', 'MA', 'MA USA', 'MAHOPAC', 'MAINE', 'MARYLAND', 'MASS', 'MASSACHUSETTS', 'MCLEAN', 'MD', 'ME', 'MI', 'MIAMI', 'MIAMIFL', 'MICH', 'MICHAN', 'MICHIGANW', 'MICHIGAN', 'MICIGAN', 'MINN', 'MINNESOTA', 'MISSISSIPPI', 'MISSOURI', 'MN USA', 'MNMN', 'MN', 'MO', 'MONTANA', 'MS', 'MT', 'NAVADA', 'NC', 'N C', 'N CAROLINA', 'ND', 'NE', 'NEB', 'NEBR', 'NEBRASKA', 'NEVADA', 'NH', 'NV', 'NEW HAMPSHIRE', 'NEW JERSEY', 'NEW MEXICO', 'NEW YORK', 'NEWYORK', 'NJ', 'NM', 'NORFOLK', 'NORTH CAROLINA', 'NORTH DAKOTA', 'NORTHRIDGE CA', 'NUEVO', 'NY', 'NYC', 'OH', 'OHIA', 'OHIO', 'OIHO', 'OK', 'OKLAHOMA', 'OR', 'ORE', 'OREGON', 'PA', 'P A', 'PENN', 'PENNSYLVANIA', 'RHODE ISLAND', 'RI', 'SC', 'SD', 'SOUTH CAROLINA', 'SOUTHCAROLINA', 'SOUTH DAKOTA', 'T3XAS', 'TENN', 'TENNESEE', 'TENNESSE', 'TENNESSEE', 'TESAS', 'TEX', 'TEXAS', 'TN', 'TOPEKA', 'TX', 'US', 'USA', 'UT', 'UTAH', 'VA', 'VIRGINA', 'VIRGINIA', 'VIRIGINIA', 'VERMONT', 'VT', 'WA', 'WARRENSBURG', 'WASH', 'WASHIHGTON', 'WASHINGON', 'WASHINGTOM', 'WASHINGTON', 'WASHINGTON DC', 'WASHINGTON State', 'WASHINTON', 'WEST VIRGINIA', 'WI', 'WIS', 'WISC', 'WISCONSIN', 'WISONSIN', 'WV', 'WYOMING', 'WY') THEN CountryCitzn = 'US'; ELSE IF State IN ('CAN', 'CANACA', 'CANADA', 'CANADA BC', 'CANADIAN', 'CANANDA','CDA', 'CDN', 'BC', 'ONTARIO', 'ONTARIO CANADA', 'MANITOBA CANADA','NOVA SCOTIA', 'PQ', 'QUEBEC', 'QUIBEC') THEN CountryCitzn = 'CA'; ELSE IF State IN ('BRITAIN', 'BRITISH', 'ENG', 'ENGALND', 'ENGELAND', 'ENGLAND', 'GREAT BRITAIN','GB', 'LONDON', 'NYORKSHIRE','SCOTLAND', 'UK', 'UK|', 'UNITED KIGDOM', 'UNITED KINGDOM', 'WALES') THEN CountryCitzn = 'UK'; ELSE IF State IN ('AU', 'AUS', 'AUSRALIA', 'AUST', 'AUSTALIA', 'AUSTRAILA', 'AUSTRALAI', 'AUSTRALI', 'AUSTRALIA', 'AUSTRALIA NSW', 'AUSTRALIA VIC','AUSTRALIAN','N Z', 'NEW ZEALAND', 'NEW ZEALAND', 'NEW ZEWLAND', 'NSW', 'NZ', 'OZ','QLD') THEN CountryCitzn = 'AU'; else if State in ('AACHEN', 'ALBANIA', 'AUSTRIA', 'AUSTRLIA','BELGIUM', 'BOSNIA AND HERZEGOWINA', 'BRD', 'BULGARIA','CH', 'CROATIA', 'CY', 'CYPRUS', 'CZ', 'CZECH REP', 'CZECH REPUBLIC','DUBLIN','EIRE', 'FI', 'FIN', 'FINLAND', 'ESTONIA', 'EUROPE', 'GEMANY', 'GER', 'GERMAN', 'GERMANY', 'GR', 'GREECE', 'FRANCE', 'HOLLAND', 'HUNGARY','IRE', 'IRELAND', 'IRELND', 'ITALY','LATVIA', 'LUXEMBOURG','N IRELAND', 'NEDERLAND', 'NETH', 'NETHERLANDS', 'NL','NORTHERN IRELAND', 'NRW', 'POLAND', 'PORTUGAL','PT', 'ROMANIA','SI', 'SLOVENIA', 'SLOVENIJA','SPAIN', 'SUISSE', 'SWITZERLAND', 'TURKEI', 'TURKEY','TH NETHERLANDS', 'THE NETHERLANDS', 'THE NNETHERLANDS','UKRAINE', 'YU') then CountryCitzn = "EU"; ELSE IF State in ('AFGHANISTAN', 'ASIA','BHUTAN', 'CHINA', 'HONG KONG', 'HONG KONG', 'HONGKONG', 'GU', 'GUAM', 'HK', 'INDIA', 'INDIAN', 'INDONESIA', 'JAPAN', 'JAPANE', 'JOHOR', 'KAZAKHSTAN', 'KOREA', 'JP', 'JPN','MY', 'NEPAL', 'PH', 'PHIL', 'PHILIPPINES', 'RUSSIA', 'SG', 'SINGAPORE','SOUTH KOREA', 'SRI LANKA', 'SYBERIA', 'TAIWAN', 'THAILAND', 'TOKYO') then region = "Asia"; ELSE IF State in ('ARG', 'ARGENTINA', 'ARUBA','BARBADOS','BOLIVIA', 'BR', 'BRASIL', 'BRASMLIA', 'BRAZIL', 'CHILE', 'COLOMBIA', 'PERU','VENEZUELA', 'URUGUAY') then region = "South America"; ELSE IF State in ('BAHRAIN', 'DUBAI', 'ISRAEL','SAUDI', 'SAUDI ARABIA', 'JORDAN', 'KSA', 'KUWAIT', 'MALAYSIA', 'LEBANON','OMAN', 'PAK', 'PAKISTAN','PENANG', 'SA', 'SYRIA','UAE') THEN region = "Middle East"; *EG is Africa, rest are Asia; ELSE IF State in ('BELIZE', 'EL SALVADOR', 'MEXICO', 'MIXICO', 'PANAMA') then region = "Central America"; ELSE IF State in ('BOTSWANA', 'CAPEVERDE', 'EGYPT', 'MOZAMBIQUE', 'NIGERIA','RSA','SAFRICA', 'SOMALIA', 'SOUTH AFRICA', 'SOUTHAFRICA', 'UGUANDA', 'ZA', 'ZIMBABWE') then region = "Africa"; /* ("DK","FO","IS","NO","SJ","SE") Denmark, Faroe Islanders, Iceland, Norway, Svalbard And Jan Mayen Islands, Sweden*/ ELSE IF State in ('DENMARK', 'DK','ICELAND', 'NO', 'NOR', 'NORWAY', 'SWEDEN') then region = "Scandanavia"; ELSE IF State in ( 'BAHAMAS','DOM REP', 'CUBA','JAMAICA', 'PR', 'PUERTO RICO','STLUCIA', 'TININDAD AND TOBAGO', 'TRINIDAD', 'TRINIDAD & TOBAGO') then region = "Other"; /* Andorra, Austria, belgium, Bosnia and Herzegovina, Bulgaria, Croatia, Cyprus, Czech, Estonia */ ELSE ; /*'ESPAQA', 'PS','VIC', 'VICTORIA' */ IF CountryCitzn IN ('', '.') THEN CountryCitzn = countryX; IF CountryCitzn='0' THEN CountryCitzn = ''; /* DROP countryX State*/;RUN; DATA temp; SET Clean.Age; if region = '' THEN DO; FORMAT Region $19.; if CountryCitzn in ("US") then region = "USA"; else if CountryCitzn in ("CA") then region = "Canada"; else if CountryCitzn in ("UK") then region = "UK"; else if CountryCitzn in ("AU", "CX", "HM", "NZ") then region = "Australia"; else if CountryCitzn in ("DK","FO","IS","NO","SJ","SE") then region = "Scandanavia"; else if CountryCitzn in ("AD","AT","BE","BA","BG","HR", "CY","CZ","EE","FI","FR","DE","GI","GR","HU","IE","IT","LV", "LI","LT","LU","MK","MT","MD","MC","NL","PL","PT","RO","SM", "SK","SI","ES","CH","TR","UA","UK","VA","YU") then region = "Europe"; else if CountryCitzn in ("AF","AL","AM","AZ","BD","BY","BT","IO", "BN","KH","CN","TP","PF","GE","GU","HK","IN","ID","JP", "KZ","KR","KP","KG","LA","MO","MY","MV","FM","MN","MM", "NP","PH","RU","SG","LK","TW","TJ","TH","TM","UZ","VN") then region = "Asia"; else if CountryCitzn in ("AR","BO","BR","CL","CO","EC","FK","GF","GY","PY","PE","GS", "SR","UY","VE") then region = "South America"; else if CountryCitzn in ("DZ","AO","BJ","BW","BF","BI","CM","CV","CF","TD","KM","CG", "CD","CI","DJ","GQ","ER","ET","GA","GM","GH","GN","GW", "KE","LS","LR","LY","MG","MW","ML","MR","MU","YT","MA", "MZ","NA","NE","NG","RE","RW","SH","ST","SN","SC","SL", "SO","ZA","SD","SZ","TZ","TG","TO","TN","UG","ZM","ZW") then region = "Africa"; else if CountryCitzn in ("BZ","CC","CR","SV","GT","HN","MX","NI","PA") then region = "Central America"; else if CountryCitzn in ("BH","IR","IQ","IL","JO","KW","LB","OM",*EG is Africa, rest are Asia; "PK","QA","SA","SY","AE","YE","EG") then region = "Middle East"; else if CountryCitzn not in ("",".") then region = "Other";END; if region in ("Central America", "South America") then gregion = "C/S America"; else if region in ("Scandanavia", "Europe") then gregion = "Europe"; else gregion = region;run; /* -------- Applies the formats to label procedure output ------------------ */ PROC MEANS DATA=Clean.Age;FORMAT PoliticalID PolCatFmt.;CLASS PoliticalID;VAR Iat;RUN; PROC MEANS DATA=Clean.Age;FORMAT RaceOmb RaceOMBFmt.;CLASS RaceOMB;VAR Iat;RUN; PROC SORT DATA=Clean.Age;BY Gender; PROC GLM DATA=Clean.Age;FORMAT Gender $GenderFmt.;BY Gender; MODEL IAT = Att / SS3 SOLUTION;RUN; /* -------- Uses SAS's default formats to label countries ------------------ */ /* Note: To apply the country name format, the easiest way is to add the SAS help / directory to your formats directory and apply it to output, like this: / The citizenship and residence variables use the countries' ISO alpha2 codes / To make all countryIDs correspond to this standard coding need to recode two: / change "UK" to "GB", / change "CD" to "CG"; (Not exactly, but no ISO for Democratic Rep. of Congo */ /* For details about all of the available geographic codes, search SAS help for / one of the formats below, or for the help document titled "Using Formats for Maps" */ options fmtsearch=(Common.Formats sashelp.mapfmts); proc freq data=temp;tables countrycitzn;format countrycitzn $isoa2lu.;run; proc freq data=temp;tables countrycitzn;format countrycitzn $isoa2lu.;run; proc freq data=temp;tables countryresd;format countryresd $isoa2lu.;run;