Skip to content

Better Firmware Faster

February 12, 2015

I bump into an good example that show some good programming skill. Consider below source code:

U8_t PackCMD(TFIFOState* pBuf, Cmd* pdata)
{
 U8_t i=0;
 U8_t j,ires;
 U8_t ilength=0;

 //init
 memset(pdata,0,sizeof(Cmd));
 
 for(j=0;j<MAX_READER;j++)
 {
  //temp info
  if(pBuf->FifoReader[j].tone & BIT_INVALID)
  {
   pdata->idata[i++] = 2; // temp: temp command 2: enable
   pdata->idata[i++] = 1; // on time
   pdata->idata[i++] = 1; // off time
   pdata->idata[i++] = 1; // Color ON
   pdata->idata[i++] = 2; // Color off
   pdata->idata[i++] = 8; // count lsb
   pdata->idata[i++] = 0; // count msb
  }
  else if (pBuf->FifoReader[j].tone & BIT_VALID)
  {
   pdata->idata[i++] = 2; // temp: temp command 2: enable
   pdata->idata[i++] = 1; // on time
   pdata->idata[i++] = 1; // off time
   pdata->idata[i++] = 1; // Color ON
   pdata->idata[i++] = 2; // Color off
   pdata->idata[i++] = 2; // count lsb
   pdata->idata[i++] = 0; // count msb
  }
  else if (pBuf->FifoReader[j].tone & BIT_TMP_1)
  {
   pdata->idata[i++] = 2; // temp: temp command 2: enable
   pdata->idata[i++] = 1; // on time
   pdata->idata[i++] = 1; // off time
   pdata->idata[i++] = 2; // Color ON
   pdata->idata[i++] = 1; // Color off
   pdata->idata[i++] = 10; // count lsb
   pdata->idata[i++] = 0; // count msb
  }
  else if (pBuf->FifoReader[j].tone & BIT_TMP_2)
  {
   pdata->idata[i++] = 2; // temp: temp command 2: enable
   pdata->idata[i++] = 1; // on time
   pdata->idata[i++] = 1; // off time
   pdata->idata[i++] = 2; // Color ON
   pdata->idata[i++] = 1; // Color off
   pdata->idata[i++] = 2; // count lsb
   pdata->idata[i++] = 0; // count msb
  }
  else if (pBuf->FifoReader[j].tone & BIT_AC_FAIL)
  {
   pdata->idata[i++] = 2; // temp: temp command 2: enable
   pdata->idata[i++] = 1; // on time
   pdata->idata[i++] = 1; // off time
   pdata->idata[i++] = 2; // Color ON
   pdata->idata[i++] = 1; // Color off
   pdata->idata[i++] = 4; // count lsb
   pdata->idata[i++] = 0; // count msb
  }
  else
  {
   pdata->idata[i++] = 0; // temp: temp command 2: ignore
   pdata->idata[i++] = 1; // on time
   pdata->idata[i++] = 1; // off time
   pdata->idata[i++] = 1; // Color ON
   pdata->idata[i++] = 2; // Color off
   pdata->idata[i++] = 2; // count lsb
   pdata->idata[i++] = 0; // count msb
  }
 
 }
 
 ilength=i;

 return ilength;
}

Once compile above function, the size of function is 0x388. And running the function 1000 times yield execution time of around 217uSeconds.

08048434 00000388 T PackCMD //Function size 0x388
Execution time of 1000times = 217uSeconds

As per my last discussion of using pointer here, lets change it to pointer access as below:

U8_t PackCMD(TFIFOState* pBuf, Cmd* pdata)
{
 U8_t i=0;
 U8_t j,ires;
 U8_t ilength=0;
 U8_t *ptr_data;

 //init
 memset(pdata,0,sizeof(Cmd));
 
 for(j=0;j<MAX_READER;j++)
 {
   //temp info
   ptr_data = &pdata->idata[i];
   if(pBuf->FifoReader[j].tone & BIT_INVALID)
   {
     *ptr_data++ = 2; // temp: temp command 2: enable
     *ptr_data++ = 1; // on time
     *ptr_data++ = 1; // off time
     *ptr_data++ = 1; // Color ON
     *ptr_data++ = 2; // Color off
     *ptr_data++ = 8; // count lsb
     *ptr_data++ = 0; // count msb
   }
   else if (pBuf->FifoReader[j].tone & BIT_VALID)
   {
     *ptr_data++ = 2; // temp: temp command 2: enable
     *ptr_data++ = 1; // on time
     *ptr_data++ = 1; // off time
     *ptr_data++ = 1; // Color ON
     *ptr_data++ = 2; // Color off
     *ptr_data++ = 2; // count lsb
     *ptr_data++ = 0; // count msb
   }
   else if (pBuf->FifoReader[j].tone & BIT_TMP_1)
   {
     *ptr_data++ = 2; // temp: temp command 2: enable
     *ptr_data++ = 1; // on time
     *ptr_data++ = 1; // off time
     *ptr_data++ = 2; // Color ON
     *ptr_data++ = 1; // Color off
     *ptr_data++ = 10; // count lsb
     *ptr_data++ = 0; // count msb
   }
   else if (pBuf->FifoReader[j].tone & BIT_TMP_2)
   {
     *ptr_data++ = 2; // temp: temp command 2: enable
     *ptr_data++ = 1; // on time
     *ptr_data++ = 1; // off time
     *ptr_data++ = 2; // Color ON
     *ptr_data++ = 1; // Color off
     *ptr_data++ = 2; // count lsb
     *ptr_data++ = 0; // count msb
   }
   else if (pBuf->FifoReader[j].tone & BIT_AC_FAIL)
   {
     *ptr_data++ = 2; // temp: temp command 2: enable
     *ptr_data++ = 1; // on time
     *ptr_data++ = 1; // off time
     *ptr_data++ = 2; // Color ON
     *ptr_data++ = 1; // Color off
     *ptr_data++ = 4; // count lsb
     *ptr_data++ = 0; // count msb
   }
   else
   {
     *ptr_data++ = 0; // temp: temp command 2: ignore
     *ptr_data++ = 1; // on time
     *ptr_data++ = 1; // off time
     *ptr_data++ = 1; // Color ON
     *ptr_data++ = 2; // Color off
     *ptr_data++ = 2; // count lsb
     *ptr_data++ = 0; // count msb
   }
   i += 7;
 } 
 ilength=i;
 return ilength;
}

And the result on code size and execution time.

080484c4 000002c6 T PackCMD  //Code size 0x2c6
Execution time of 1000loops  = 185uSeconds

Not bad at all, code size and speed improve by around 20% and 30% respectively. Now lets try some more

U8_t LUT[][7]={ \
 {2 ,1, 1, 1, 2, 8, 0}, //BIT_INVALID
 {2, 1, 1, 1, 2, 2, 0}, //BIT_VALID
 {2, 1, 1, 2, 1,10, 0}, //BIT_TMP_1
 {2, 1, 1, 2, 1, 2, 0}, //BIT_TMP_2
 {2, 1, 1, 2, 1, 4, 0}, //BIT_AC_FAIL
 {0, 1, 1, 1, 2, 2, 0} //else case
};
U8_t PackCMD(TFIFOState* pBuf, Cmd* pdata)
{
 U8_t i=0;
 U8_t j,ires;
 U8_t ilength=0;
 U8_t *ptr_data;
 U8_t table_index, *table_ptr;

 //init
 memset(pdata,0,sizeof(Cmd));
 
 for(j=0;j<MAX_READER;j++)
 {
   //temp info
   ptr_data = &pdata->idata[i];
   if(pBuf->FifoReader[j].tone & BIT_INVALID)
   {
     table_index = 0;
   }
   else if (pBuf->FifoReader[j].tone & BIT_VALID)
   {
     table_index = 1;
   }
   else if (pBuf->FifoReader[j].tone & BIT_TMP_1)
   {
     table_index = 2;
   }
   else if (pBuf->FifoReader[j].tone & BIT_TMP_2)
   {
     table_index = 3;
   }
   else if (pBuf->FifoReader[j].tone & BIT_AC_FAIL)
   {
     table_index = 4;
   }
   else
   {
     table_index = 5;
   }
   table_ptr = LUT[table_index];
   *ptr_data++ = *table_ptr++; // temp: temp command 2: ignore
   *ptr_data++ = *table_ptr++; // on time
   *ptr_data++ = *table_ptr++; // off time
   *ptr_data++ = *table_ptr++; // Color ON
   *ptr_data++ = *table_ptr++; // Color off
   *ptr_data++ = *table_ptr++; // count lsb
   *ptr_data++ = *table_ptr++; // count msb
   i += 7;
  }
  ilength=i;
  return ilength;
}

Result as below

0804a040 0000002a D LUT
080484f4 000001c6 T PackCMD
Execution time of 1000 loops = 240uSeconds

Increase in speed but further decrease in code size (The code size is LUT + PackCMD = 0x1F0). But do take note that now the value has been put inside a lookup table format.

Now lets try putting the checking process by referencing with the table.

#define LUT_ENTRIES (8)
U8_t LUT[][LUT_ENTRIES]={ \
 {BIT_INVALID, 2 ,1, 1, 1, 2, 8, 0}, //BIT_INVALID
 {BIT_VALID, 2, 1, 1, 1, 2, 2, 0}, //BIT_VALID
 {BIT_TMP_1, 2, 1, 1, 2, 1,10, 0}, //BIT_TMP_1
 {BIT_TMP_2, 2, 1, 1, 2, 1, 2, 0}, //BIT_TMP_2
 {BIT_AC_FAIL, 2, 1, 1, 2, 1, 4, 0}, //BIT_AC_FAIL
 {0, 0, 1, 1, 1, 2, 2, 0} //else case
};
#define NUM_LUT_ELEMENT (sizeof(LUT)/LUT_ENTRIES)

U8_t PackCMD(TFIFOState* pBuf, Cmd* pdata)
{
 U8_t i=0,k;
 U8_t j,ires;
 U8_t tone;
 U8_t ilength=0;
 U8_t *ptr_data;
 U8_t table_index, *table_ptr;

 //init
 memset(pdata,0,sizeof(Cmd));
 
 ptr_data = &pdata->idata[i];
 for(j=0;j<MAX_READER;j++)
 {
   tone = pBuf->FifoReader[j].tone;
   for(k=0;k<NUM_LUT_ELEMENT; k++)
   {
   if(tone & LUT[k][0])
   {
     table_index = k;
     break;
   }
   else if(k == (NUM_LUT_ELEMENT-1))
   {
     //catch else case
     table_index = k;
     break;
   }
  }
  table_ptr = LUT[table_index];
  *ptr_data++ = *table_ptr++; // temp: temp command 2: ignore
  *ptr_data++ = *table_ptr++; // on time
  *ptr_data++ = *table_ptr++; // off time
  *ptr_data++ = *table_ptr++; // Color ON
  *ptr_data++ = *table_ptr++; // Color off
  *ptr_data++ = *table_ptr++; // count lsb
  *ptr_data++ = *table_ptr++; // count msb
  
  i += 7;
  }
 
  ilength=i;
  return ilength;
}

And the result as below:

0804a040 00000030 D LUT
080484f4 00000180 T PackCMD
Execution time of 1000 loops: 330uSeconds

This seen significant increase in execution time(from previously 240uS to 330uS), but code is decreasing from 0x1F0 to 0x1B0. One thing to take note,  by using table reference, any additional entries in future will only require of adding entries into the LUT table. This is a BIG plus in code maintenance.

Whether we should choose for faster code execution or smaller footprint would depends on individual case. But by knowing the effect of writing style, we can practice to write a better code(compact + speed), as shown in first improvement. Personally I would prefer easier code maintenance over smaller footprint and faster code execution.

Advertisements

From → Embedded System

Leave a Comment

Leave a Reply

Fill in your details below or click an icon to log in:

WordPress.com Logo

You are commenting using your WordPress.com account. Log Out / Change )

Twitter picture

You are commenting using your Twitter account. Log Out / Change )

Facebook photo

You are commenting using your Facebook account. Log Out / Change )

Google+ photo

You are commenting using your Google+ account. Log Out / Change )

Connecting to %s

%d bloggers like this: