C ++, griglia di 27243 caratteri (248x219, riempita al 50,2%)
(Pubblicando questa come una nuova risposta perché mi piacerebbe mantenere la 1D vincolata che avevo originariamente pubblicato come riferimento)
Questo palesemente fregatura è fortemente ispirato dalla risposta di @ AndersKaseorg nella sua struttura principale, ma ha un paio di modifiche. Innanzitutto, utilizzo il mio programma originale per unire le stringhe fino a quando la migliore sovrapposizione disponibile è di soli 3 caratteri. Quindi uso il metodo descritto da AndersKaseorg per riempire progressivamente una griglia 2D usando queste stringhe generate. Anche i vincoli sono un po 'diversi: cerca ancora di aggiungere il minor numero di caratteri ogni volta, ma i legami vengono spezzati favorendo prima le griglie quadrate, quindi le piccole e infine aggiungendo la parola più lunga.
Il comportamento che mostra è quello di alternare i periodi di riempimento dello spazio e di espansione rapida della griglia (purtroppo ha esaurito le parole subito dopo una rapida fase di espansione, quindi c'è molto spazio vuoto attorno ai bordi). Sospetto con qualche modifica della funzione di costo che potrebbe essere realizzata per ottenere un riempimento dello spazio migliore del 50%.
Ci sono 2 eseguibili qui (per evitare la necessità di rieseguire l'intero processo quando si migliora iterativamente l'algoritmo). L'output di uno può essere convogliato direttamente nell'altro:
#include <iostream>
#include <string>
#include <vector>
#include <algorithm>
#include <cstdlib>
std::size_t calcOverlap(const std::string &a, const std::string &b, std::size_t limit, std::size_t minimal) {
std::size_t la = a.size();
for(std::size_t p = std::min(std::min(la, b.size()), limit + 1); -- p > minimal; ) {
if(a.compare(la - p, p, b, 0, p) == 0) {
return p;
}
}
return 0;
}
bool isSameReversed(const std::string &a, const std::string &b) {
std::size_t l = a.size();
if(b.size() != l) {
return false;
}
for(std::size_t i = 0; i < l; ++ i) {
if(a[i] != b[l-i-1]) {
return false;
}
}
return true;
}
int main(int argc, const char *const *argv) {
// Usage: prog [<stop_threshold>]
std::size_t stopThreshold = 3;
if(argc >= 2) {
char *check;
long v = std::strtol(argv[1], &check, 10);
if(check == argv[1] || v < 0) {
std::cerr
<< "Invalid stop threshold. Should be an integer >= 0"
<< std::endl;
return 1;
}
stopThreshold = v;
}
std::vector<std::string> words;
// Load all words from input and their reverses (words can be backwards now)
while(true) {
std::string word;
std::getline(std::cin, word);
if(word.empty()) {
break;
}
words.push_back(word);
std::reverse(word.begin(), word.end());
words.push_back(std::move(word));
}
std::cerr
<< "Input word count: " << words.size() << std::endl;
// Remove all fully subsumed words
for(auto p = words.begin(); p != words.end(); ) {
bool subsumed = false;
for(auto i = words.begin(); i != words.end(); ++ i) {
if(i == p) {
continue;
}
if(i->find(*p) != std::string::npos) {
subsumed = true;
break;
}
}
if(subsumed) {
p = words.erase(p);
} else {
++ p;
}
}
std::cerr
<< "After subsuming checks: " << words.size()
<< std::endl;
// Sort words longest-to-shortest (not necessary but doesn't hurt. Makes finding maxlen a tiny bit easier)
std::sort(words.begin(), words.end(), [](const std::string &a, const std::string &b) {
return a.size() > b.size();
});
std::size_t maxlen = words.front().size();
// Repeatedly combine most-compatible words until we reach the threshold
std::size_t bestPossible = maxlen - 1;
while(words.size() > 2) {
auto bestA = words.begin();
auto bestB = -- words.end();
std::size_t bestOverlap = 0;
for(auto p = ++ words.begin(), e = words.end(); p != e; ++ p) {
if(p->size() - 1 <= bestOverlap) {
continue;
}
for(auto q = words.begin(); q != p; ++ q) {
std::size_t overlap = calcOverlap(*p, *q, bestPossible, bestOverlap);
if(overlap > bestOverlap && !isSameReversed(*p, *q)) {
bestA = p;
bestB = q;
bestOverlap = overlap;
}
overlap = calcOverlap(*q, *p, bestPossible, bestOverlap);
if(overlap > bestOverlap && !isSameReversed(*p, *q)) {
bestA = q;
bestB = p;
bestOverlap = overlap;
}
}
if(bestOverlap == bestPossible) {
break;
}
}
if(bestOverlap <= stopThreshold) {
break;
}
std::string newStr = std::move(*bestA);
newStr.append(*bestB, bestOverlap, std::string::npos);
if(bestA == -- words.end()) {
words.pop_back();
*bestB = std::move(words.back());
words.pop_back();
} else {
*bestB = std::move(words.back());
words.pop_back();
*bestA = std::move(words.back());
words.pop_back();
}
// Remove any words which are now in the result (forward or reverse)
// (would not be necessary if we didn't have the reversed forms too)
std::string newRev = newStr;
std::reverse(newRev.begin(), newRev.end());
for(auto p = words.begin(); p != words.end(); ) {
if(newStr.find(*p) != std::string::npos || newRev.find(*p) != std::string::npos) {
std::cerr << "Now subsumes: " << *p << std::endl;
p = words.erase(p);
} else {
++ p;
}
}
std::cerr
<< "Words remaining: " << (words.size() + 1)
<< " Latest combination: (" << bestOverlap << ") " << newStr
<< std::endl;
words.push_back(std::move(newStr));
words.push_back(std::move(newRev));
bestPossible = bestOverlap; // Merging existing words will never make longer merges possible
}
std::cerr
<< "After merging: " << words.size()
<< std::endl;
// Remove all fully subsumed words (i.e. reversed words)
for(auto p = words.begin(); p != words.end(); ) {
bool subsumed = false;
std::string rev = *p;
std::reverse(rev.begin(), rev.end());
for(auto i = words.begin(); i != words.end(); ++ i) {
if(i == p) {
continue;
}
if(i->find(*p) != std::string::npos || i->find(rev) != std::string::npos) {
subsumed = true;
break;
}
}
if(subsumed) {
p = words.erase(p);
} else {
++ p;
}
}
std::cerr
<< "After subsuming: " << words.size()
<< std::endl;
// Sort words longest-to-shortest for display
std::sort(words.begin(), words.end(), [](const std::string &a, const std::string &b) {
return a.size() > b.size();
});
std::size_t len = 0;
for(const auto &word : words) {
std::cout
<< word
<< std::endl;
len += word.size();
}
std::cerr
<< "Total size: " << len
<< std::endl;
return 0;
}
#include <iostream>
#include <string>
#include <vector>
#include <unordered_map>
#include <unordered_set>
#include <limits>
class vec2 {
public:
int x;
int y;
vec2(void) : x(0), y(0) {};
vec2(int x, int y) : x(x), y(y) {}
bool operator ==(const vec2 &b) const {
return x == b.x && y == b.y;
}
vec2 &operator +=(const vec2 &b) {
x += b.x;
y += b.y;
return *this;
}
vec2 &operator -=(const vec2 &b) {
x -= b.x;
y -= b.y;
return *this;
}
vec2 operator +(const vec2 b) const {
return vec2(x + b.x, y + b.y);
}
vec2 operator *(const int b) const {
return vec2(x * b, y * b);
}
};
class box2 {
public:
vec2 tl;
vec2 br;
box2(void) : tl(), br() {};
box2(vec2 a, vec2 b)
: tl(std::min(a.x, b.x), std::min(a.y, b.y))
, br(std::max(a.x, b.x) + 1, std::max(a.y, b.y) + 1)
{}
void grow(const box2 &b) {
if(b.tl.x < tl.x) {
tl.x = b.tl.x;
}
if(b.br.x > br.x) {
br.x = b.br.x;
}
if(b.tl.y < tl.y) {
tl.y = b.tl.y;
}
if(b.br.y > br.y) {
br.y = b.br.y;
}
}
bool intersects(const box2 &b) const {
return (
((tl.x >= b.br.x) != (br.x > b.tl.x)) &&
((tl.y >= b.br.y) != (br.y > b.tl.y))
);
}
box2 &operator +=(const vec2 b) {
tl += b;
br += b;
return *this;
}
int width(void) const {
return br.x - tl.x;
}
int height(void) const {
return br.y - tl.y;
}
int maxdim(void) const {
return std::max(width(), height());
}
};
template <> struct std::hash<vec2> {
std::size_t operator ()(const vec2 &o) const {
return std::hash<int>()(o.x) + std::hash<int>()(o.y) * 997;
}
};
template <class A,class B> struct std::hash<std::pair<A,B>> {
std::size_t operator ()(const std::pair<A,B> &o) const {
return std::hash<A>()(o.first) + std::hash<B>()(o.second) * 31;
}
};
class word_placement {
public:
vec2 start;
vec2 dir;
box2 bounds;
const std::string *word;
word_placement(vec2 start, vec2 dir, const std::string *word)
: start(start)
, dir(dir)
, bounds(start, start + dir * (word->size() - 1))
, word(word)
{}
word_placement(vec2 start, const word_placement ©)
: start(copy.start + start)
, dir(copy.dir)
, bounds(copy.bounds)
, word(copy.word)
{
bounds += start;
}
word_placement(const word_placement ©)
: start(copy.start)
, dir(copy.dir)
, bounds(copy.bounds)
, word(copy.word)
{}
};
class word_placement_links {
public:
std::unordered_set<word_placement*> placements;
std::unordered_set<std::pair<char,word_placement*>> relativePlacements;
};
class grid {
public:
std::vector<std::string> wordCache; // Just a block of memory for our pointers to reference
std::unordered_map<vec2,char> state;
std::unordered_set<word_placement*> placements;
std::unordered_map<const std::string*,word_placement_links> wordPlacements;
std::unordered_map<char,std::unordered_set<word_placement*>> relativeWordPlacements;
box2 bound;
grid(const std::vector<std::string> &words) {
wordCache = words;
std::vector<vec2> directions;
directions.emplace_back(+1, 0);
directions.emplace_back(+1, +1);
directions.emplace_back( 0, +1);
directions.emplace_back(-1, +1);
directions.emplace_back(-1, 0);
directions.emplace_back(-1, -1);
directions.emplace_back( 0, -1);
directions.emplace_back(+1, -1);
wordPlacements.reserve(wordCache.size());
placements.reserve(wordCache.size());
relativeWordPlacements.reserve(64);
std::size_t total = 0;
for(const std::string &word : wordCache) {
word_placement_links &p = wordPlacements[&word];
p.placements.reserve(8);
auto &rp = p.relativePlacements;
std::size_t l = word.size();
rp.reserve(l * directions.size());
for(int i = 0; i < l; ++ i) {
for(const vec2 &d : directions) {
word_placement *rwp = new word_placement(d * -i, d, &word);
rp.emplace(word[i], rwp);
relativeWordPlacements[word[i]].insert(rwp);
}
}
total += l;
}
state.reserve(total);
}
const std::string *find_word(const std::string &word) const {
for(const std::string &w : wordCache) {
if(w == word) {
return &w;
}
}
throw std::string("Failed to find word in cache");
}
void remove_word(const std::string *word) {
const word_placement_links &links = wordPlacements[word];
for(word_placement *p : links.placements) {
placements.erase(p);
delete p;
}
for(auto &p : links.relativePlacements) {
relativeWordPlacements[p.first].erase(p.second);
delete p.second;
}
wordPlacements.erase(word);
}
void remove_placement(word_placement *placement) {
wordPlacements[placement->word].placements.erase(placement);
placements.erase(placement);
delete placement;
}
bool check_placement(const word_placement &placement) const {
vec2 p = placement.start;
for(const char c : *placement.word) {
auto i = state.find(p);
if(i != state.end() && i->second != c) {
return false;
}
p += placement.dir;
}
return true;
}
int check_new(const word_placement &placement) const {
int n = 0;
vec2 p = placement.start;
for(const char c : *placement.word) {
n += !state.count(p);
p += placement.dir;
}
return n;
}
void check_placements(const box2 &b) {
for(auto i = placements.begin(); i != placements.end(); ) {
if(!b.intersects((*i)->bounds) || check_placement(**i)) {
++ i;
} else {
i = placements.erase(i);
}
}
}
void add_placement(const vec2 p, const word_placement &relative) {
word_placement check(p, relative);
if(check_placement(check)) {
word_placement *wp = new word_placement(check);
placements.insert(wp);
wordPlacements[relative.word].placements.insert(wp);
}
}
void place(word_placement placement) {
remove_word(placement.word);
int overlap = 0;
for(const char c : *placement.word) {
char &g = state[placement.start];
if(g == '\0') {
g = c;
for(const word_placement *rp : relativeWordPlacements[c]) {
add_placement(placement.start, *rp);
}
} else if(g != c) {
throw std::string("New word changes an existing character!");
} else {
++ overlap;
}
placement.start += placement.dir;
}
bound.grow(placement.bounds);
check_placements(placement.bounds);
std::cerr
<< draw('.', "\n")
<< "Added " << *placement.word << " (overlap: " << overlap << ")"
<< ", Grid: " << bound.width() << "x" << bound.height() << " of " << state.size() << " chars"
<< ", Words remaining: " << wordPlacements.size()
<< std::endl;
}
int check_cost(box2 b) const {
b.grow(bound);
return (
((b.maxdim() - bound.maxdim()) << 16) |
(b.width() + b.height() - bound.width() - bound.height())
);
}
void add_next(void) {
int bestNew = std::numeric_limits<int>::max();
int bestCost = std::numeric_limits<int>::max();
int bestLen = 0;
word_placement *best = nullptr;
for(word_placement *p : placements) {
int n = check_new(*p);
if(n <= bestNew) {
int l = p->word->size();
int cost = check_cost(box2(p->start, p->start + p->dir * l));
if(n < bestNew || cost < bestCost || (cost == bestCost && l < bestLen)) {
bestNew = n;
bestCost = cost;
bestLen = l;
best = p;
}
}
}
if(best == nullptr) {
throw std::string("Failed to find join to existing blob");
}
place(*best);
}
void fill(void) {
while(!placements.empty()) {
add_next();
}
}
std::string draw(char blank, const std::string &linesep) const {
std::string result;
result.reserve((bound.width() + linesep.size()) * bound.height());
for(int y = bound.tl.y; y < bound.br.y; ++ y) {
for(int x = bound.tl.x; x < bound.br.x; ++ x) {
auto c = state.find(vec2(x, y));
result.push_back((c == state.end()) ? blank : c->second);
}
result.append(linesep);
}
return result;
}
box2 bounds(void) const {
return bound;
}
int chars(void) const {
return state.size();
}
};
int main(int argc, const char *const *argv) {
std::vector<std::string> words;
// Load all words from input
while(true) {
std::string word;
std::getline(std::cin, word);
if(word.empty()) {
break;
}
words.push_back(std::move(word));
}
std::cerr
<< "Input word count: " << words.size() << std::endl;
// initialise grid
grid g(words);
// add first word (order of input file means this is longest word)
g.place(word_placement(vec2(0, 0), vec2(1, 0), g.find_word(words.front())));
// add all other words
g.fill();
std::cout << g.draw('.', "\n");
int w = g.bounds().width();
int h = g.bounds().height();
int n = g.chars();
std::cerr
<< "Final grid: " << w << "x" << h
<< " with " << n << " characters"
<< " (" << (n * 100.0 / (w * h)) << "% filled)"
<< std::endl;
return 0;
}
E infine, il risultato:
Risultato alternativo (dopo aver corretto un paio di bug nel programma che distorcevano determinate direzioni e ottimizzavano la funzione di costo, ho ottenuto una soluzione più compatta ma meno ottimale): 29275 caratteri, 198x195 (riempito al 75,8%):
Ancora una volta non ho fatto molto per ottimizzare questi programmi, quindi ci vuole un po 'di tempo. Ma puoi guardare mentre si riempie nella griglia, il che è abbastanza ipnotico.