Regexp, not PCRE!

Greetings, can anyone explain why this does not work? It s making me crazy. Please DM me with any questions that are not clear in the code.

I am using platformio for this project.

This should run on most Arduino platforms.

Any guidance or help would be greatly appreciated!

-m

#include <Arduino.h>
#include <led_ctrl.h>
#include <low_power.h>
#include <Regexp.h>
#include <string.h>
#include <log.h>

struct queue{           // command and phone number structure
  char gsm_number[32];
  char msg_time[24];
  char command[16];
};
queue myQueue[16];
char in[] = {"+CMGL:1,RECREAD,+15103036635,,23/01/20,17:37:32-32Start+CMGL:2,RECREAD,+15103036635,,23/01/20,17:37:34-32Stop+CMGL:3,RECREAD,+15103036635,,23/01/20,17:37:37-32Angle+CMGL:4,RECREAD,+1"};

//char in[] = {"+CMGL:1,RECUNREAD,+15103036635,,23/01/16,04:52:51-32Angle"};

void setup() {
// ----------------------------------------------------------------------------

  char phone_cmd[] = {"(%+%d+),,(%d+/%d+/%d+,%d+:%d+:%d+%-%d%d)(%a+)"};

  uint16_t result1, match_count;
  //uint16_t match_start = 0;

  char buf1[1024];
  MatchState ms;
  
// ----------------------------------------------------------------------------

  Log.begin(115200);
  
  LedCtrl.begin();

  for ( int i=0; i<3; i++ ) {
    LedCtrl.startupCycle();
    delay(1000);
  }

  Serial3.begin(115200);
    while (!Serial3); 
  delay(10000);

  Log.infof("\r\n\r\n--- parse_cmd -- string in:\r\n%s\n\r", in);

  ms.Target(in);

  match_count = ms.MatchCount(phone_cmd);
  result1 = ms.Match (phone_cmd, 0);
  
  Log.infof("\r\n\r\n--- parse_cmd -- match_count: %d, result1: %d, level %d\n\r", match_count, result1, ms.level);

  memcpy(buf1, "\0", sizeof(buf1));

  int length = ms.MatchLength;
  int index = 0;
  for ( int match=0; match<match_count; match++ ) {
    memcpy(myQueue[match].gsm_number, ms.GetCapture(buf1, (index)), ms.MatchLength);
    memcpy(myQueue[match].msg_time,   ms.GetCapture(buf1, (index+1)), ms.MatchLength);
    memcpy(myQueue[match].command,    ms.GetCapture(buf1, (index+2)), ms.MatchLength);
    
    Log.infof("parse_cmd -- GSM: %s,\tTme: %s,\tCmd: %s\r\n", myQueue[match].gsm_number, myQueue[match].msg_time, myQueue[match].command);
    index +=ms.level;   // increment by the number of captures per match
  }
  Log.infof("\r\n\r\n--- parse_cmd -- Done ---\n\r");
}
void loop() {
  // put your main code here, to run repeatedly:
}

No way smaller Arduinos will like to spend 1 kilobyte of RAM to parse the string +15103036635,,23/01/16,04:52:51-32Angle into the phone number, date+time and command. If you use String functions (or direct strstr / string.h functions) this can be made much more memory efficient, but that wasn’t your question.

The library describes its own version of Regex here), but your regex pattern is indeed correct.

Problems in your code:

This will try to copy 1024 bytes from the string “\0” into your buffer, as the compiler warns you.

src\main.cpp: In function 'void setup()':
src\main.cpp:52:9: warning: 'void* memcpy(void*, const void*, size_t)' reading 1024 bytes from a region of size 2 [-Wstringop-overflow=]
   52 |   memcpy(buf1, "\0", sizeof(buf1));
      |   ~~~~~~^~~~~~~~~~~~~~~~~~~~~~~~~~

This should be memset(buf1, '\0', sizeof(buf1); instead.

Another is, as you can see when debugging the code and inspecting some variables,

is that ms.MatchLength does not give you the length of the value returned by ms.GetCapture(), but the length of the string that was matched in total (39). The GSM number part for e.g. would only be 12 characters, plus one null-terminator. So you’re trashing parts of the myQueue memory with unrelated data.

That can be fixed by doing

    strncpy(myQueue[match].gsm_number, ms.GetCapture(buf1, (index)), sizeof(myQueue[match].gsm_number));
    strncpy(myQueue[match].msg_time,   ms.GetCapture(buf1, (index+1)), sizeof(myQueue[match].msg_time));
    strncpy(myQueue[match].command,    ms.GetCapture(buf1, (index+2)), sizeof(myQueue[match].command));

instead.

However, the main failure of the program is that, even though you have 3 places in your string where the RegExp matches, the ms.Match() will only return the data for the the first match, not all. So calling ms.GetCapture() with index >= 3 will just give back an empty string.

As you can see in the MatchCount() function

it repeadedly calls Match() and then shifts the start of the string forward to explore possibly new matches that occur later in the string. One can do the same thing by using the index paramter of the Match function like the example does, which will effectively index the string at a later point. However, in your original code, you only do one Match() with index = 0, so you’ll only get one match from that starting position.

Correcting all these issues, we get a nice output of

--- parse_cmd -- string in:
+CMGL:1,RECREAD,+15103036635,,23/01/20,17:37:32-32Start+CMGL:2,RECREAD,+15103036635,,23/01/20,17:37:34-32Stop+CMGL:3,RECREAD,+15103036635,,23/01/20,17:37:37-32Angle+CMGL:4,RECREAD,+1


--- parse_cmd -- match_count: 3, level 0
parse_cmd -- GSM: +15103036635, Tme: 23/01/20,17:37:32-32,      Cmd: Start
parse_cmd -- GSM: +15103036635, Tme: 23/01/20,17:37:34-32,      Cmd: Stop
parse_cmd -- GSM: +15103036635, Tme: 23/01/20,17:37:37-32,      Cmd: Angle


--- parse_cmd -- Done ---

by using a src/main.cpp of

#include <Arduino.h>
#include <Regexp.h>
#include <string.h>

struct queue{           // command and phone number structure
  char gsm_number[32];
  char msg_time[24];
  char command[16];
};
queue myQueue[16];
char in[] = {"+CMGL:1,RECREAD,+15103036635,,23/01/20,17:37:32-32Start+CMGL:2,RECREAD,+15103036635,,23/01/20,17:37:34-32Stop+CMGL:3,RECREAD,+15103036635,,23/01/20,17:37:37-32Angle+CMGL:4,RECREAD,+1"};

//char in[] = {"+CMGL:1,RECUNREAD,+15103036635,,23/01/16,04:52:51-32Angle"};

void setup() {
// ----------------------------------------------------------------------------

  char phone_cmd[] = {"(%+%d+),,(%d+/%d+/%d+,%d+:%d+:%d+%-%d%d)(%a+)"};

  uint16_t match_res, match_count;

  char buf1[1024];
  MatchState ms;

  Serial.begin(115200);
  Serial.printf("\r\n\r\n--- parse_cmd -- string in:\r\n%s\n\r", in);

  ms.Target(in);

  match_count = ms.MatchCount(phone_cmd);
  
  Serial.printf("\r\n\r\n--- parse_cmd -- match_count: %d, level %d\n\r", match_count, ms.level);

  memset(buf1, '\0', sizeof(buf1));

  int index = 0;
  for ( int match=0; match<match_count; match++ ) {
    // do match now to prepare the data, skipping the part
    // we already processed.
    match_res = ms.Match (phone_cmd, index);
    // no match? bye
    if(match_res != REGEXP_MATCHED) {
      break;
    }
    // copy matched data into own structures
    strncpy(myQueue[match].gsm_number, ms.GetCapture(buf1, 0), sizeof(myQueue[match].gsm_number));
    strncpy(myQueue[match].msg_time,   ms.GetCapture(buf1, 1), sizeof(myQueue[match].msg_time));
    strncpy(myQueue[match].command,    ms.GetCapture(buf1, 2), sizeof(myQueue[match].command));
    // print parsed data    
    Serial.printf("parse_cmd -- GSM: %s,\tTme: %s,\tCmd: %s\r\n", myQueue[match].gsm_number, myQueue[match].msg_time, myQueue[match].command);
    // prepare to get a match on the later part of the string
    index += ms.MatchLength;
  }
  Serial.printf("\r\n\r\n--- parse_cmd -- Done ---\n\r");
}
void loop() { }

and platformio.ini of

[env:nucleo_f103rb]
platform = ststm32
board = nucleo_f103rb
framework = arduino
lib_deps=
   nickgammon/Regexp@^0.1.0
monitor_speed = 115200
debug_build_flags = -O0 -ggdb3

for my board.

1 Like