Here is the O (n log n) version based on suffix arrays. There are O (n) building algorithms for suffix arrays, I just don't have the patience to encode them.
Example output (this output is not O (n), but only to show that we can really calculate all the points):
4*1 a 3*3 aba 2*5 ababa 1*7 abababa 3*2 ab 2*4 abab 1*6 ababab
Basically you need to change the string and compute the suffix array (SA) and the longest common prefix (LCP).
Then you move the SA array back, looking for LCPs that match the entire suffix (prefix in the source line). If there is a match, increase the counter, otherwise reset it equals 1. Each suffix (prefix) receives a "count" (SCR), which corresponds to the number of times it appears in the original line.
#include <iostream> #include <cstring> #include <string> #define MAX 10050 using namespace std; int RA[MAX], tempRA[MAX]; int SA[MAX], tempSA[MAX]; int C[MAX]; int Phi[MAX], PLCP[MAX], LCP[MAX]; int SCR[MAX]; void suffix_sort(int n, int k) { memset(C, 0, sizeof C); for (int i = 0; i < n; i++) C[i + k < n ? RA[i + k] : 0]++; int sum = 0; for (int i = 0; i < max(256, n); i++) { int t = C[i]; C[i] = sum; sum += t; } for (int i = 0; i < n; i++) tempSA[C[SA[i] + k < n ? RA[SA[i] + k] : 0]++] = SA[i]; memcpy(SA, tempSA, n*sizeof(int)); } void suffix_array(string &s) { int n = s.size(); for (int i = 0; i < n; i++) RA[i] = s[i] - 1; for (int i = 0; i < n; i++) SA[i] = i; for (int k = 1; k < n; k *= 2) { suffix_sort(n, k); suffix_sort(n, 0); int r = tempRA[SA[0]] = 0; for (int i = 1; i < n; i++) { int s1 = SA[i], s2 = SA[i-1]; bool equal = true; equal &= RA[s1] == RA[s2]; equal &= RA[s1+k] == RA[s2+k]; tempRA[SA[i]] = equal ? r : ++r; } memcpy(RA, tempRA, n*sizeof(int)); } } void lcp(string &s) { int n = s.size(); Phi[SA[0]] = -1; for (int i = 1; i < n; i++) Phi[SA[i]] = SA[i-1]; int L = 0; for (int i = 0; i < n; i++) { if (Phi[i] == -1) { PLCP[i] = 0; continue; } while (s[i + L] == s[Phi[i] + L]) L++; PLCP[i] = L; L = max(L-1, 0); } for (int i = 1; i < n; i++) LCP[i] = PLCP[SA[i]]; } void score(string &s) { SCR[s.size()-1] = 1; int sum = 1; for (int i=s.size()-2; i>=0; i--) { if (LCP[i+1] < s.size()-SA[i]-1) { sum = 1; } else { sum++; } SCR[i] = sum; } } int main() { string s = "abababa"; s = string(s.rbegin(), s.rend()) +"."; suffix_array(s); lcp(s); score(s); for(int i=0; i<s.size(); i++) { string ns = s.substr(SA[i], s.size()-SA[i]-1); ns = string(ns.rbegin(), ns.rend()); cout << SCR[i] << "*" << ns.size() << " " << ns << endl; } }
Most of this code (especially the suffix array and LCP implementation) that I used for several years in contests. I adapted this version in a special version from this, which I wrote several years ago .