#include <stdlib.h>
#include <stdio.h>
#include <string.h>
#include <stdint.h>
#include <assert.h>
#include <set>
#include <vector>
#include <gmp.h> // GMP library for rational number arithmetic
#include <gmpxx.h> // ... and a C++ wrapper for it (for sanity's sake)
// Linear solver
#include <linbox/field/gmp-rational.h>
#include <linbox/solutions/solve.h>
#include <linbox/solutions/methods.h>
const int alphabet=2; // Alphabet size. The symbols are 0123456789:; etc
char periodicWord[] = "0000000000"; // The program will compute gamma_W(1) for all W in
// the lexicographic order starting from this word
// (it will avoid re-computing gamma_W(1) for words
// that differ only in a cyclic shift or reversal)
// If you are interested in computing gamma_W(1) for just for a single word and do not wish to modify the code,
// then set periodicWord[] accordingly, and abort the program after it computes gamma_W for that word
// If you are interested in computing gamma_W(rho) for some other rho, change the first line of
// estimateExpectedLCS() below
// Should we test only balanced words (those with equally many of each symbol)
// Emprically, the words with largest gamma_W are balanced
const bool balancedOnly = false;
// Compute period at compile time
#define STRLEN(s) ((sizeof(s)/sizeof(s[0]))-1)
static const int k = STRLEN(periodicWord);
typedef unsigned long profile; // Profiles stored as bitmasks
#define profilePopcount __builtin_popcountl // How many 1 bits are there?
const profile allOneProfileMask=(1ul<<k)-1ul; // All-1 mask
typedef int state_t;
typedef std::set <state_t, std::greater <state_t> > stateSet;
void printProfile(profile P, bool newline=false, int minwidth = 1)
{
int width=0;
while (P!=0)
{
printf("%d",(int)(P%2));
P=P/2;
width++;
if(width==k)
printf(" ");
}
while (width++ < minwidth)
printf(width==k ? "0 " : "0");
if(newline)
printf("\n");
}
// Starting at profile P, poke frogs at lilypads indexed by bitmask I.
// The bitmask must be periodic with period k. Returns the resulting profile.
profile poke(profile P, profile I)
{
// pseudocode
// k=0
// do
// if I[k]==1 && P[k]==0
// P[k]=1
// k++
// while P[k]==0
// k++
// P[k]=0
// k++
//
// We use bit-twiddling to achieve the same purpose
// (note that we use that unsigned integer types do not under/overflow and just wrap around mod 2^whatever)
// Example (bit are from least significant to most significant)
// P = 000010001..
// I = 01xyz01wq..
// I&(~P) = 01xy001w0..
// P-I&(~P) = 01XY001W0.. (where x is complement of X, y is complement of Y,...)
// I&(P-I&(~P)) = 010000100..
// P = 000010001001..
// I = 01xyz01wq000..
// I&(~P) = 01xy001w0000..
// P-I&(~P) = 01XY001W0001.. (where x is complement of X, y is complement of Y,...)
// I&(P-I&(~P)) = 010000100000..
// P&(P-I&(~P)) = 000000000001
return (P-(I&(~P)))&(I|P);
}
profile Is[alphabet];
// takes a bitmask of k bits, and concatenates it with it itself to obtain bitmask suitable for use in poke()
profile makePeriodic(profile I)
{
profile prev;
profile res=I;
do {
prev=res;
res=(prev<<k)|I;
} while (res!=prev);
return res;
}
void initIs(const char *s)
{
assert(k==strlen(s));
memset(Is,0,sizeof(Is));
for (int p=0;p<k;p++)
{
int v=s[p]-'0';
Is[v]|=1<<p;
}
for (int c=0;c<alphabet;c++)
Is[c]=makePeriodic(Is[c]);
}
// A "fragment" is a data structure that describes the profile between two succesive ledges.
// Below we assume that we are interested in what happens between k-y'th and k-y+1'st ledge.
// The fragment is stored in a 2*k bit string with the following format.
// First k bits:
// The first k bits end with a 0
// The first k bits contain exactly k-y 0's
// Last k bits:
// If there is an 0 in i'th position, then there is a 0 in k+i'th position
// The remaining y-k positions among last k bits are of the form 1111000
// Note that in order to know the effect of a poke on a fragment, we must keep track of the
// position mod k of where the ledge is; that is the "shift" parameter below
// Poke fragment
// boolean emit is set to true if we emit a [1:y] option (i.e. the k-y+1'st frog jumps over k-y'th)
// increment shift by the number of bits shifted (and reduce mod k at the end)
profile pokeFragment(profile F, profile I, int y, int &shift, bool &emit)
{
profile P=poke(F,I);
while (profilePopcount(P&allOneProfileMask)>y)
{
P>>=1;
shift=(shift+1)%k;
}
if ((profilePopcount((P>>k)&allOneProfileMask))==y)
{
emit=true;
return P&allOneProfileMask;
}
emit = false;
// set extra 1's to zero
// For example if k=4, and P=11101010 then we want to emit 11101000
profile First = P & allOneProfileMask;
profile Last = (P>>k)&allOneProfileMask;
// Locate the first 1 bit in First&(~Last) and set all the bits after that to 0
profile A = First&(~Last);
return ((Last&(A^(A-1)))<<k)|First;
}
// makes fragment whose first k bits are First
// and of weight s in the last k bits
profile makeFragment(profile First, int s)
{
profile Last=0;
int i=0;
int j=0;
while(i<s)
{
while (((1<<j)&First)==0)
j++;
Last|=1<<j;
i++;
j++;
}
return (Last<<k)|First;
}
// Compute the binomial coefficients
// Copied from https://stackoverflow.com/questions/24294192/computing-the-binomial-coefficient-in-c
int binom(int n,int k)
{
assert(n>=k);
assert(k>=0);
int ans=1;
k=k>n-k?n-k:k;
int j=1;
for(;j<=k;j++,n--)
{
if(n%j==0)
{
ans*=n/j;
}else
if(ans%j==0)
{
ans=ans/j*n;
}else
{
ans=(ans*n)/j;
}
}
return ans;
}
// The states of our Markov chain will be the fragments.
// The following arrays will store the states. The index is the shift parameter.
int possibleCount[k];
profile *possibleFragments[k];
int compareProfile(const void *elem1, const void *elem2)
{
int f = *((profile*)elem1);
int s = *((profile*)elem2);
return (f>s) - (f<s);
}
// Compute all possible fragments, and sort them
void computePossibleFragments(int y)
{
assert(y<k);
// Possible tuples of first k bits are binom(k-1,k-y-1) in number
// For each of them the number of 1's among last k bits is between 0 to y-1
possibleCount[y] = binom(k-1,k-y-1)*y;
possibleFragments[y] = (profile*) malloc(sizeof(profile)*possibleCount[y]);
int cnt=0;
for(profile First=0;First<=(allOneProfileMask>>1);First++)
if (profilePopcount(First)==y)
for(int s=0;s<y;s++)
possibleFragments[y][cnt++]=makeFragment(First,s);
qsort(possibleFragments[y],possibleCount[y],sizeof(profile),compareProfile);
assert(cnt==possibleCount[y]);
}
// looks for fragment in y'th fragment array (binary search)
int findFragment(int y, profile F)
{
// We narrow to half-open interval [L,R)
int L=0;
int R=possibleCount[y];
while (L<R)
{
int M=(L+R)/2;
if (F>possibleFragments[y][M])
L=M+1;
else if (F<possibleFragments[y][M])
R=M;
else
{
assert(F==possibleFragments[y][M]);
return M;
}
}
assert(false);
return -1;
}
// a state consists of a Fragment and a shift
// we encode it in a number X s.t. X%k is the shift
// and X/k is the index of the fragment in the appropriate possibeFragments[..] array
// This function computes the result of applying symbol c to a given state
state_t transition(int y,state_t state,int c,bool &emit)
{
state_t ind=state/k;
int shift=state%k;
int F=pokeFragment(possibleFragments[y][ind],Is[c]>>shift,y,shift,emit);
return findFragment(y,F)*k+shift;
}
void printState(int y,state_t state)
{
state_t ind=state/k;
int shift=state%k;
printProfile(possibleFragments[y][ind],false,2*k);
printf("{%d}",shift);
}
// maxStates = max_y binom(k-1,k-y-1)*y*k
// I would be happy to compute max_y binom(k-1,k-y-1)*y*k at compile time
// but using -std=c++11 option in order to use constexpr causes lots of errors
// when including the standard headers
//
// I wish I could use binom(n,x)<=2^n/Sqrt[Pi n/2] but alas sqrt() is not considered constexpr either (ouch!)
//
// So, we just boringly precompute the constants :-(
static const int maxStatesTable[] =
{0, 0, 2, 6, 24, 60, 180, 420, 1120, 2520, 6300, 13860, 33264, 72072, 168168, 360360, 823680, 1750320, 3938220};
static const int maxStates = maxStatesTable[k];
typedef int stateTransition[k];
int stateCount;
state_t feasibleStates[maxStates];
stateTransition stateTransitions[maxStates];
stateTransition transitionEmissions[maxStates];
// This array will hold the stationary distribution
mpq_class prob[maxStates];
// looks for state in feasibleState array (binary search)
int findFeasibleState(state_t S)
{
// We narrow to half-open interval [L,R)
int L=0;
int R=stateCount;
while (L<R)
{
int M=(L+R)/2;
if (S>feasibleStates[M])
L=M+1;
else if (S<feasibleStates[M])
R=M;
else
{
assert(S==feasibleStates[M]);
return M;
}
}
assert(false);
return -1;
}
void computeTransitions(int y)
{
assert(y>0);
assert(y<k);
stateSet processed, unprocessed;
unprocessed.insert(0);
while (!unprocessed.empty())
{
state_t S=*unprocessed.begin();
for(int c=0;c<alphabet;c++)
{
bool emit;
profile Snext = transition(y,S,c,emit);
if ((unprocessed.count(Snext)==0)&&(processed.count(Snext)==0))
unprocessed.insert(Snext);
}
unprocessed.erase(S);
processed.insert(S);
}
stateCount = 0;
for (stateSet::reverse_iterator itr = processed.rbegin(); itr!=processed.rend(); ++itr)
feasibleStates[stateCount++]=*itr;
for(int i=0;i<stateCount;i++)
{
for(int c=0;c<alphabet;c++)
{
bool emit;
state_t Snext=transition(y,feasibleStates[i],c,emit);
transitionEmissions[i][c]=emit;
stateTransitions[i][c]=findFeasibleState(Snext);
}
}
}
typedef LinBox::GMPRationalField LinBoxField;
void computeStationaryDistribution()
{
typedef LinBoxField Field;
typedef LinBox::SparseMatrix<Field> Matrix;
Field F;
typedef std::vector<Field::Element> Vector;
Matrix AL(F,stateCount,stateCount);
Vector BL(stateCount);
Vector Res(stateCount);
// Set up vector and matrices
// B=[0,0,...,0,1]
F.init(BL[stateCount-1],1);
// For 0<=i<stateCount-1 the i'th row of A expresses that
// i'th element of solution vector is the stationary distribution of i'th state
// (note that we skip the condition for last state because of linear dependendence)
//
// The last row of A says that the sum of probabilities is 1
// Diagonal
Field::Element v;
F.init(v,-alphabet);
for(int i=0;i<stateCount-1;i++)
AL.setEntry(i,i,v);
Field::Element one;
F.init(one,1);
for(int i=0;i<stateCount;i++)
{
// entry in row j and column i is 1 if there is an edge from state i to state j
for(int k=0;k<alphabet;k++)
if (stateTransitions[i][k]<stateCount-1)
{
Field::Element y;
F.init(y,0);
F.add(y,one,AL.getEntry(stateTransitions[i][k],i));
AL.setEntry(stateTransitions[i][k],i,y);
}
// Last row (all 1's)
AL.setEntry(stateCount-1,i,one);
}
LinBox::Method::SparseElimination M;
solve(Res,AL,BL,M);
for(int i=0;i<stateCount;i++)
mpq_swap(prob[i].get_mpq_t(),Res[i].get_rep());
}
void printStationaryDistribution(int y)
{
printf("Stationary distribution for [1:%d]\n",y);
for (int i=0;i<stateCount;i++)
if ((i%k)==0)
{
printState(y,i);
gmp_printf(": %Qd\n",prob[i].get_mpq_t());
}
}
// Option = event when a frog jumps over a barely nastier frog
// This array holds frequency of all the options (=difference between speeds of succesive frogs)
mpq_class optFreq[k+1];
// The follwoing computes option frequency from the stationary distribution
void computeOptFreq(int y)
{
optFreq[y] = mpq_class(0);
for (int i=0;i<stateCount;i++)
for (int c=0;c<alphabet;c++)
if (transitionEmissions[i][c])
optFreq[y]+=prob[i];
optFreq[y]/=alphabet;
}
// Computes constant gamma_W(1), where the expected LCS between a random string of length n
// and the periodic string is gamma_W(1)*n+O(sqrt(n))
//
// Since not all entries optFreq[..] might have been computed yet, this function might return
// "I do not know" together with an upper bound on gamma_W
//
// Input:
// ymin --- the smallest y for which OptFreq[y] has been computed
// Output:
// true iff either an exact value is computed
// false iff an upper bound is computed
// m is either computed exact value or upper bound
bool estimateExpectedLCS(mpq_class &m,int ymin)
{
mpq_class left(1,k); // How many periods are left. At start, left = rho/k. We use rho=1; hence left=1/k.
m=0;
for(int i=k;i>=ymin;i--)
{
if (optFreq[i]>=left)
{
m+=i*left;
return true;
}
m += optFreq[i]*i;
left -= optFreq[i];
}
// The best we can hope is that all remaining options are ymin-1
// in that case we get a match of length m + (ymin-1)*left
m += (ymin-1)*left;
return (ymin==1);
}
// modifies periodicWord to the lexicographically next word
// returns true if sucessful; returns false, if there are no more words
bool nextPeriodicWord()
{
int i=k-1;
while ((i>=0)&&(periodicWord[i]==('0'+alphabet-1)))
i--;
if (i<0) return false;
periodicWord[i]++;
while((++i)<k)
periodicWord[i]='0';
return true;
}
// returns true if periodicWord is equivalent, under dihedral group, to a lexicographically smaller word
bool wasTested()
{
// cyclic shifts
for(int shift=1;shift<k;shift++)
{
int pos=0;
while ((pos<k)&&(periodicWord[pos]==periodicWord[(pos+shift)%k]))
pos++;
if ((pos<k)&&(periodicWord[pos]>periodicWord[(pos+shift)%k]))
return true;
}
// reflections
for(int shift=1;shift<k;shift++)
{
int pos=0;
while ((pos<k)&&(periodicWord[pos]==periodicWord[(k-pos+shift)%k]))
pos++;
if ((pos<k)&&(periodicWord[pos]>periodicWord[(k-pos+shift)%k]))
return true;
}
return false;
}
// let f(c) be the number of c's in periodicWord
// the following returns true if f(0)>=f(1)>=...
bool decreasingWeight()
{
int f[alphabet];
memset(f,0,sizeof(f));
for(int i=0;i<k;i++)
f[periodicWord[i]-'0']++;
if (balancedOnly)
for(int c=0;c<alphabet-1;c++)
if (f[c]!=f[c+1]) return false;
for(int c=0;c<alphabet-1;c++)
if (f[c]<f[c+1]) return false;
return true;
}
// modifies periodicWord to the lexicographically next word that is not cyclically equivalent to one we already tested
// returns true if sucessful; returns false, if there are no more words
bool nextUntestedWord()
{
do {
if (!nextPeriodicWord())
return false;
} while (wasTested()||(!decreasingWeight()));
return true;
}
// y=k-3
//const int Tbl[] = {0,0,0,0,24,500,4500,25725,109760,381024,1134000,2994750};
const int Tbl[] = {0,0,0,9,96,500,1800,5145,12544,27216,54000,99825,174240};
int main()
{
mpq_class bestExpectedLCS(0,1);
if (balancedOnly)
printf("Balanced case. ");
printf("k = %d\n",k);
optFreq[k]=mpq_class(1,k*alphabet); // Hand-computed
for(int y=1;y<k;y++)
computePossibleFragments(y);
int wordNum=0;
do {
wordNum++;
printf("Testing %s [%d]\n",periodicWord,wordNum);
initIs(periodicWord);
int y=k;
mpq_class m;
do {
computeTransitions(--y);
computeStationaryDistribution();
// printStationaryDistribution(y);
computeOptFreq(y);
if (estimateExpectedLCS(m,y))
{
gmp_printf("Expected LCS for %s is %Qd (=%f)\n",periodicWord,m.get_mpq_t(),mpq_get_d(m.get_mpq_t()));
if (m>bestExpectedLCS)
bestExpectedLCS=m;
break;
}
if (m<=bestExpectedLCS)
{
// The upper bound on gamma_W of this word is no larger than the largest gamma_W seen so far
break;
}
} while (true);
} while (nextUntestedWord());
printf("%d words\n",wordNum);
}