Data processing scheme

We tried to improve our analysis by applying the advanced programming skills obtained as students at a computer science high school.

The data processing was done with the help of a C ++ program made by our team together with the facilities offered by Excel for charts. Using our script, the analysis has a high degree of generality, and can be applied to other countries as well. Within the project, generality is important  considering that the data collected from the stations in the nearby countries can be analysed in order to see other causes of internal phenomena. In addition, we created our own website at vianuclimatedetectives.com where we shared the gathered data, processing, and results.

Moreover, we extended the period to 60 years and dedicated a special chapter to data quality. Therefore, we processed  data from all 26 meteorological stations in Romania, eliminating a number of 7 stations that did not pass the data quality test.

The rover was a challenge, because we wanted to measure the temperature very, very accurately and since in the previous project we used thermometers, we thought of using a temperature sensor, especially since we participated, a year before, in the Astropi Mission Zero project where we sent a message to the International Space Station.

The rover was useful especially in the preliminary stage of the project when the latest meteorological data posted by the weather stations were not available (posting is done with a delay of 1-3 months). It also helped us to explore a very technical area of ​​climatology, leaving our comfort zone – algorithms / automatic data processing, which we consider a great gain.

OUR C++ PROGRAM:

//Vianu Climate Detectives 2020
#include <cstdio>
#include <cstring>
#include <string>
#include <cctype>
#include <cstdlib>
#include <algorithm>
using namespace std;
//FROM 1990: 11315
const int NMAX=205,BEGIN_DATE=19610101,DATE_MAX=440000,VALUES=10,DIGITS=6,LIMIT=25,LMAX=85,NR_STATIONS=20325,LETTERS=25;
char s[NMAX],STATION[DIGITS+5],line[LMAX],name[NR_STATIONS][LETTERS];
int data[DATE_MAX][VALUES+5],ID[NR_STATIONS];
bool error[NR_STATIONS];
void data_for_a_station(int station,int pos)
{
    string file_RR="RR_STAID",file_TG="TG_STAID",file_TN="TN_STAID",file_TX="TX_STAID";
    int STAID,SOUID,DATE,RR,Q_RR,TG,Q_TG,TN,Q_TN,TX,Q_TX,ind,i,j,digits=0,pow_10=1,copy_station=station,error_days=0;
    bool text,elim_day;
    char new_digit;
    //File name for a station
    memset(STATION,0,sizeof(STATION));
    do
    {
        digits++;
        station=station/10;
    }
    while(station);
    for(i=1; i<=DIGITS-digits; i++)
    {
        file_RR=file_RR+'0';
        file_TG=file_TG+'0';
        file_TN=file_TN+'0';
        file_TX=file_TX+'0';
    }
    for(i=1; i<=digits-1; i++)
        pow_10=pow_10*10;
    station=copy_station;
    do
    {
        new_digit=station/pow_10+'0';
        file_RR=file_RR+new_digit;
        file_TG=file_TG+new_digit;
        file_TN=file_TN+new_digit;
        file_TX=file_TX+new_digit;
        station=station%pow_10;
        pow_10=pow_10/10;
    }
    while(pow_10);
    file_RR=file_RR+".txt";
    file_TG=file_TG+".txt";
    file_TN=file_TN+".txt";
    file_TX=file_TX+".txt";
    //
    freopen(file_RR.c_str(),"r",stdin);
    text=1;
    while(text)
    {
        fgets(s,NMAX,stdin);
        if(s[0]=='S' && s[1]=='T' && s[2]=='A' && s[3]=='I' && s[4]=='D')
            text=0;
    }
    scanf("%d,%d,%d,%d,%d,",&STAID,&SOUID,&DATE,&RR,&Q_RR);
    while(DATE<BEGIN_DATE)
        scanf("%d,%d,%d,%d,%d,",&STAID,&SOUID,&DATE,&RR,&Q_RR);
    ind=1;
    data[ind][1]=STAID;
    data[ind][2]=DATE;
    data[ind][3]=DATE/10000;
    data[ind][4]=(DATE/100)%100;
    data[ind][5]=DATE%100;
    data[ind][6]=RR;
    data[ind][10]=DATE%10000;
    while(scanf("%d,%d,%d,%d,%d,",&STAID,&SOUID,&DATE,&RR,&Q_RR)!=EOF)
    {
        ind++;
        data[ind][1]=STAID;
        data[ind][2]=DATE;
        data[ind][3]=DATE/10000;
        data[ind][4]=(DATE/100)%100;
        data[ind][5]=DATE%100;
        data[ind][6]=RR;
        data[ind][10]=DATE%10000;
    }
    fclose(stdin);
    freopen(file_TG.c_str(),"r",stdin);
    text=1;
    while(text)
    {
        fgets(s,NMAX,stdin);
        if(s[0]=='S' && s[1]=='T' && s[2]=='A' && s[3]=='I' && s[4]=='D')
            text=0;
    }
    scanf("%d,%d,%d,%d,%d,",&STAID,&SOUID,&DATE,&TG,&Q_TG);
    while(DATE<BEGIN_DATE)
        scanf("%d,%d,%d,%d,%d,",&STAID,&SOUID,&DATE,&TG,&Q_TG);
    ind=1;
    data[ind][7]=TG;
    while(scanf("%d,%d,%d,%d,%d,",&STAID,&SOUID,&DATE,&TG,&Q_TG)!=EOF)
    {
        ind++;
        data[ind][7]=TG;
    }
    fclose(stdin);
    freopen(file_TN.c_str(),"r",stdin);
    text=1;
    while(text)
    {
        fgets(s,NMAX,stdin);
        if(s[0]=='S' && s[1]=='T' && s[2]=='A' && s[3]=='I' && s[4]=='D')
            text=0;
    }
    scanf("%d,%d,%d,%d,%d,",&STAID,&SOUID,&DATE,&TN,&Q_TN);
    while(DATE<BEGIN_DATE)
        scanf("%d,%d,%d,%d,%d,",&STAID,&SOUID,&DATE,&TN,&Q_TN);
    ind=1;
    data[ind][8]=TN;
    while(scanf("%d,%d,%d,%d,%d,",&STAID,&SOUID,&DATE,&TN,&Q_TN)!=EOF)
    {
        ind++;
        data[ind][8]=TN;
    }
    fclose(stdin);
    freopen(file_TX.c_str(),"r",stdin);
    text=1;
    while(text)
    {
        fgets(s,NMAX,stdin);
        if(s[0]=='S' && s[1]=='T' && s[2]=='A' && s[3]=='I' && s[4]=='D')
            text=0;
    }
    scanf("%d,%d,%d,%d,%d,",&STAID,&SOUID,&DATE,&TX,&Q_TX);
    while(DATE<BEGIN_DATE)
        scanf("%d,%d,%d,%d,%d,",&STAID,&SOUID,&DATE,&TX,&Q_TX);
    ind=1;
    data[ind][9]=TX;
    while(scanf("%d,%d,%d,%d,%d,",&STAID,&SOUID,&DATE,&TX,&Q_TX)!=EOF)
    {
        ind++;
        data[ind][9]=TX;
    }
    fclose(stdin);
    for(i=1; i<=ind; i++)
    {
        if(data[i][10]==401 || data[i][10]==701 || data[i][10]==1001 || data[i][10]==101)
        {
            if(error_days>30)
            {
                error[STAID]=1;
                break;
            }
            error_days=0;
        }
        if(data[i][4]==2 || data[i][4]==3 || data[i][4]==5 || data[i][4]==6 || data[i][4]==8 || data[i][4]==9 || data[i][4]==11 || data[i][4]==12)//transition
        {
            //ERROR
            for(j=6; j<=9; j++)
            {
                if(data[i][j]==-9999)
                {
                    error_days++;
                    break;
                }
            }
        }
    }
    if(!error[STAID])
    {
        for(i=1; i<=ind; i++)
        {
            elim_day=0;
            for(j=6; j<=9; j++)
            {
                if(data[i][j]==-9999)
                    elim_day=1;
            }
            if(!elim_day)
            {
                for(j=1; j<=VALUES; j++)
                {
                    if(j>=6 && j<=9)//temperatures, precipitation
                        printf("%.1lf",(double)data[i][j]/10);
                    else
                    {
                        if(((j==4 || j==5) && data[i][j]<10) || (j==10 && data[i][j]<1000))//month, day, MMDD
                            printf("0");
                        printf("%d",data[i][j]);//STAID, DATE, year, month, day, MMDD
                    }
                    printf("|");
                }
                //TRANSITION
                if(data[i][4]==2 || data[i][4]==3)//month
                    printf("winter_spring|");
                else
                {
                    if(data[i][4]==5 || data[i][4]==6)
                        printf("spring_summer|");
                    else
                    {
                        if(data[i][4]==8 || data[i][4]==9)
                            printf("summer_autumn|");
                        else
                        {
                            if(data[i][4]==11 || data[i][4]==12)
                                printf("autumn_winter|");
                            else
                                printf("no_transition|");
                        }
                    }
                }
                //STA_NAME
                printf("%s|",name[STAID]);
                //Season
                if(data[i][4]==1 || data[i][4]==2 || data[i][4]==12)
                    printf("winter|");
                else
                {
                    if(data[i][4]==3 || data[i][4]==4 || data[i][4]==5)
                        printf("spring|");
                    else
                    {
                        if(data[i][4]==6 || data[i][4]==7 || data[i][4]==8)
                            printf("summer|");
                        else
                            printf("autumn|");
                    }
                }
                if(data[i][5]<=15)
                    printf("first\n");
                else
                    printf("second\n");
            }
        }
    }
}
int main()
{
    int l,i,commas,STAID,pos;
    bool text=1;
    freopen("stations.txt","r",stdin);
    while(text)
    {
        fgets(line,LMAX,stdin);
        if(line[0]=='S' && line[1]=='T' && line[2]=='A' && line[3]=='I' && line[4]=='D')
            text=0;
    }
    fgets(line,LMAX,stdin);
    freopen("RO_data.out","w",stdout);
    printf("STAID|DATE|Year|Month|Day|Precipitation(mm)|Average Temperature(°C)|Minimum Temperature(°C)|Maximum Temperature(°C)|MMDD|Transition|STA_NAME|Season|Half\n");
    while(fgets(line,LMAX,stdin))
    {
        i=0;
        l=strlen(line)-1;
        commas=0;
        for(i=0; i<l && commas<2; i++)
        {
            if(line[i]==',')
                commas++;
        }
        if(line[i]=='R' && line[i+1]=='O')
        {
            i=0;
            while(!isdigit(line[i]))
                i++;
            STAID=0;
            while(isdigit(line[i]))
            {
                STAID=STAID*10+line[i]-'0';
                i++;
            }
            ID[++ID[0]]=STAID;
            i++;
            pos=-1;
            while(isalpha(line[i]) || isalpha(line[i+1]))
            {
                name[STAID][++pos]=line[i];
                i++;
            }
        }
    }
    fclose(stdin);
    for(i=1; i<=ID[0]; i++)
        data_for_a_station(ID[i],i);
    fclose(stdout);
    freopen("ERRORS.out","w",stdout);
    printf("Eliminated stations from our analysis due to too many missing values\nMore than 30 missing values in a transition/year\n");
    printf("STAID|STA_NAME\n");
    for(i=1; i<=ID[0]; i++)
    {
        if(error[ID[i]])
            printf("%d|%s\n",ID[i],name[ID[i]]);
    }
    fclose(stdout);
    return 0;
}
  • By studying the errors file, we decided to eliminate the stations that have more than 30 missing values in a transition/year. The result:
%d bloggers like this: