Java, euristico favorendo il vertice che induce il grafico più grande: 1825 1855 1873
Il codice seguente viene eseguito in meno di 10 minuti e trova il seguente percorso:
[wad, wadis, dis, dismay, may, mayfly, flywheels, elsewhere, erecting, ingratiate, ateliers, ersatzes, zest, esthetic, tickled, ledger, germicide, idealizes, zestful, fulling, ingrains, institute, uterine, ineptness, essaying, ingeniously, slyness, essences, cessations, onshore, ores, resoundingly, glycerine, inertness, essay, say, saying, ingenuous, ousted, tediously, sly, slyest, estrogen, genuflecting, ingestion, ionizer, zeros, roses, sesames, mes, meshed, hedonist, isthmuses, sesame, amending, ingredient, entrapment, enthuses, session, ionosphere, erectness, essayist, isthmus, mustaches, hesitatingly, glycogen, generation, ions, onset, settable, blew, lewder, deriding, ingratiates, testicles, lessen, sensitization, ionization, ionizing, ingratiating, ingenious, ouster, terrorizing, ingest, estranges, gesticulating, ingrates, testis, tissue, sue, suede, edelweiss, issuing, ingraining, ingrown, owner, nerdiest, estimation, ionospheres, rescue, cue, cueing, ingesting, ingot, got, gotten, tensor, sorrowing, ingratiated, tedious, ousting, ingratiatingly, glycerin, ringside, identifiable, bleariest, ester, terminological, calibrator, torrent, entraps, apse, pseudonym, nymphomania, niacin, cinema, emailed, led, ledges, gesticulates, testicle, clement, entail, ail, ailment, enter, terrains, inspires, restaurateur, euros, rosiest, estimates, tester, termite, iterator, torture, urethras, raspiest, estimator, tore, oregano, anointment, enthuse, useful, fulfil, filmstrip, riposte, stereotyped, pedicure, urea, readmits, itself, elf, elfin, finagles, lesbians, answerable, bleat, eatery, erythrocytes, testosterone, one, ones, nest, esteemed, medicine, inextricable, blessed, sediment, entry, try, tryout, outsources, cesarians, answered, redressed, seducer, cervical, calumniates, test, establishment, entombment, enthusiastic, tickles, lessens, ensemble, blemishes, hesitant, antic, tick, ickiest, estimable, blemished, hedgehog, hogan, gantlet, letdown, own, ownership, hippest, estates, testates, testiest, establishes, hes, hesitates, testable, bleakest, esthetes, testament, entice, iceberg, erg, ergonomic, microscope, operatives, vestibules, lesser, serenade, adenoidal, dales, lest, estrangement, entrap, raptures, resourceful, fulsome, omen, menswear, earthliest, established, hedges, gestates, testy, styes, yeshivot, voter, terrible, blender, derides, descent, enticed, cedillas, lass, assailable, bleacher, hermit, mite, item, temperas, rash, ashtray, rayon, yonder, dermis, mismanage, agendas, dash, ashy, shy, shyster, terrapins, insatiable, bleeder, derives, vestment, entangle, glen, lengthens, ensconced, ceded, deduced, cedars, arsenic, nice, ice, iced, cedar, daredevil, villa, llamas, masseuse, use, useable, bleach, achievable, bleached, hedonistic, tic, ticker, kerchieves, vessel, sell, ell, elliptic, ticket, kettles, lessee, seeps, epsilon, longboat, oath, atherosclerosis, sisterhood, oodles, lesson, sonatas, tassel, selvage, age, agent, entranced, cedes, descender, deranges, gestures, restraint, interment, enthused, seduced, cedilla, llama, amalgam, gamut, mutable, blend, endear, earthy, thymus, mussel, seltzer, zero, erodes, despot, potful, fulfillment, enthrall, allot, lotus, tussle, sledgehammered, redolent, entrapped, pedestal, talk, alkalis, listen, tended, deductible, bleeped, pedigree, reentered, redistribute, uterus, rustproofed, fed, fedora, oranges, gesundheit, either, herdsman, manes, nestles, lessor, sorrowful, fullback, acknowledges, gestured, redoubtable, blended, deduces, cesareans, answer, werewolves, vesper, perseveres, restructures, reside, ideogram, rammed, meddlesome, omens, ensign, ignores, restrains, insolent, entanglement, entrenchment, enticement, entomological, calligraphy, physical, calico, iconoclast, astringent, entertainment, entrant, antennas, nasty, stymie, miens, enslave, averred, redefine, inexorable, blenched, hedgerow, rowboat, oat, oaten, tend, endears, arson, songwriter, terminable, blent, entreaty, atypical, calypso, psoriasis, sister, term, ermine, ineligible, bleaker, kerosene, enema, emancipator, tormentor, torrider, derailment, entertains, instil, tildes, destine, inelegant, anthropomorphic, hiccup, cupolas, lastingly, glycerol, rollback, acknowledgment, entombed, bedridden, denser, servicewomen, menopause, used, sedatives, vesicle, clearinghouse, user, servant, antipodes, descry, crystalline, inexpedient, enthusiast, astonishment, entirety, etymological, calendared, redbreast, astronomer, merinos, nosedove, overpay, pay, paymaster, termagant, antiaircraft, aftercare, ares, resentful, fulcrum, rumpus, pushcart, artiste, stethoscopes, pesetas, taste, steadfast, astride, ides, destitute, utensil, silvan, vanguard, ardent, entryway, waysides, despair, airdrop, ropes, pestered, redder, derangement, entered, redeemed, medullas, lasagnas, nasal, salsas, sashay, hay, haymow, mow, mowed, wedder, derringer, germane, anemic, microfilmed, media, diatom, tomboys, oyster, terminator, toreador, dorsal, salespeople, pleased, sedater, terabit, bitten, tentacle, clergyman, manifesto, stomach, achoo, hoopla, plaza, azalea, leaven, vendor, dormant, antiparticle, cleared, redraft, afterword, ordains, insufficient, entitlement, entomb, ombudsmen, men, mental, tallyhos, hospice, icecap, cape, aperitif, tiffed, fedoras, rasped, pediatric, rickshaw, hawker, keratin, tinctures, reset, setback, acknowledgement, enthronement, entwine, inexact, actor, torpedos, dosed, sedan, dancer, cerebrum, rumple, plea, leach, ache, cheaper, per, periscopes, pestilent, entreat, eater, terser, serape, ape, apes, pesky, skycap, capped, pederast, astuter, terrace, acetaminophen, henchmen, menopausal, saltcellar, lard, ardor, dormice, icebound, underbrush, ushered, redrew, rewound, underclass, assassin, sinew, newscast, astrologer, gerund, undertaken, ken, kens, ensnared, redcap, cappuccinos, nostrum, rum, rumored, redraw, rawhide, identical, calcine, inertia, tiara, arabesque, queerer, reruns, unsold, oldie, diesel, selectmen, mentored, redden, dental, talon, longhand, and, androgen, genome, omelet, lethal, hallucinogenic, nickname, amen, menhaden, denudes, despaired, redevelop, lope, operas, rasp, aspired, redskin, kindergartens, ensnares, resultant, anthropological, callus, lustful, fulcra, crammed, mediocre, crepes, pesticide, ideas, eastbound, under, derrières, respired, rediscovered, redundant, antihero, erode, ode, odes, described, bedevil, villager, gerrymander, deride, ideograph, aphid, hid, hides, describes, besides, despoil, oilskin, kingdom, dominant, ant, antipasti, stiffens, ensured, redeemer, merchant, antiwar, warped, pederasty, stylus, lush, usher, her, hereafter, terrapin, pinnacle, clerical, caliber, bereave, avenger, geriatric, rickshas, haste, stereoscopes, pester, termini, initiator, tortures, restorer, reran, ransomed, medulla, llanos, nostril, rill, illogical, calif, lifer, fervor, vortex, textures, resister, termed, medieval, valor, lord, ordered, rediscover, verbatim, times, mesdames, mescal, caliper, periscope, opera, erasures, restart, artichokes, kestrel, reliant, antebellum, lumbago, agog, goggle, gleeful, fulfill, illustrator, tor, torque, questionnaires, resumed, mediator, tort, orthodoxy, oxymora, oratorio, riot, iotas, taster, terrific, fiche, checkpoint, interloper, perfumes, mesas, sassafras, rasher, heraldry, drywall, all, allergens, ensnare, area, rearm, armchair, airman, manufactures, resurface, acerbic, bicycle, cleverer, rerun, runt, untidy, idyllic, lichens, ensures, resend, endemic, microchip, hippopotamus, muscatel, telecast, astronaut, autopilot, lot, loth, other, heros, rosin, single, gleamed, mediaeval, valet, lettered, redound, underside, ideological, calliper, perihelia, liaison, sonic, nicknames, messenger, germicides, descendant, antigen, genital, tall, allergen, gentleman, mangos, gossipped, pedicures, resistant, antlered, redeveloped, pedagogical, calligrapher, heroins, inside, idea, deafen, fen, fencer, cerebra, bravuras, rascal, calculus, lusher, herbivores, resins, instill, illicit, citric, ricochet, heterodoxy, oxygen, generic, rice, icebox, box, boxcar, cartography, physique, quell, ellipsis, sis, sisal, sallow, lowbrow, rowel, well, elliptical, calf, alfresco, scow, cow, cowboy, boy, boyfriend, end, endeared, red, redesign, ignoramus, musket, kettledrum, rump, umped, pedlar, larvas, vassal, salmonellas, last, astronomical, calfskin, kingfisher, hereupon, ponchos, hospital, talisman, mantel, telethon, honcho, chomped, pedant, antitoxins, instant, antipastos, tossup, superintend, endangered, redskins, instigator, torpor, portico, icon, conquistador, dormer, merganser, seraphic, hiccuped, pedagogue, guerrillas, laser, sera, eraser, seraph, aphasic, sickbed, bed, bedsores, resign, ignorant, anthropocentric, richer, herdsmen, menu, enures, resuscitator, tornado, ado, adobe, obeisant, anthill, illegal, gallon, longshoremen, menace, ace, acetylene, enemas, mas, mascot, cot, cotton, tonsures, restores, result, ultraviolet, letterbox, boxer, xerography, physiological, calmer, merchantmen, mentor, torus, russet, settee, teenager, gerbil, billfold, old, olden, denatures, resubmit, mitten, ten, tenon, nonchalant, antique, queasy, asymmetric, ricksha, shanghai, haircut, cutups, upsides, descriptor, torpid, pidgin, gins, instep, tepee, peeper, perturb, urbane, anemia, miasmas, mascaras, raspy, spy, spyglass, assures, resonator, tortilla, llano, anon, nontechnical, calabash, ashram, rampart, arthropod, podia, diagram, ramp, amp, amphitheatres, resistor, tortillas, lasagna, gnat, natal, talc, alcoholic, licensee, seemed, medical, calm, almanac, nacho, choreography, phylum, lumbar, barman, mannequins, insures, respires, resound, underarm, armatures, resides, desideratum, tumult, ultrasound, underdog, dogcatcher, herald, alderwoman, mandarins, insecticides, desires, respirator, torrid, rid, rides, descant, anticlimax, maximum, mum, mummer, meringue, guesser, sermon, monogram, ramrod, rodeo, deodorant, antelopes, peso, esophagus, gusset, setups, upshot, hotel, telescope, open, penicillin, lingos, gossip, sip, siphon, honor, normal, maltreat, eaten, tenet, nether, herpes, pesticides, descend, endow, downfall, alleyway, way, waylay, layman, manicures, reshuffle, flea, lea, leash, ashen, henchman, mandolin, linchpins, inscribes, bestow, townspeople, plectrum, rumbas, baste, sternum, numb, umbilici, icicle, cleaver, vertebra, brains, insouciant, antidepressant, anthem, hemoglobin, binocular, largos, gossamer, mermaid, aid, aides, desperado, adopt, opt, optima, imam, mambos, bosun, sun, sunspot, potpourris, risky, sky, skyscraper, perturbed, bedraggle, glee, lee, leech, echo, choreographer, heraldic, dictum, tumid, midday, day, daybed, bedsides, desktop, topknot, notepaper, periodical, calendar, dare, areas, easel, selfsame, amebas, basins, ins, insulin, linnet, nettlesome, omegas, gasp, aspartame, amend, endures, researcher, herbal, balsas, sass, assault, ultimatum, tumor, mortgagor, gores, resort, orthopaedic, dictatorship, hipper, person, sonar, narc, arc, archduke, ukelele, elegant, anther, hereabout, outfox, fox, foxtrot, rotogravures, restaurant, antechamber, beret, retriever, verbena, enamor, morsel, sellout, outmaneuver, vertical, call, allergenic, niche, chessman, mandolins, insipid, pidgins, install, allures, rescind, indignant, antithetical, calicos, cosmonaut, auto, utopia, piano, another, heretical, calk, alkali, alibi, ibis, bistro, troupe, upend, endorser, serviceman, mandarin, rind, inductee, teepee, pee, peekaboo, bootstrap, rape, apertures, resin, singular, larval, valiant, antiperspirant, antipasto, stop, topical, calisthenic, nicer, cervix, vixen, xenophobic, bicep, cephalic, licit, citizenship, hippopotami, amigos, gospel, pellet, letups, upstart, artificer, cerebellum, lumberman, manic, nicknamed, medic, dickie, kielbasas, sash, ash, ashcan, cannon, nonskid, kid, kidnaper, perjures, resolver, vernacular, larkspur, puree, reefer, ferret, retains, insofar, far, fart, artisan, sandbag, bagel, gelatin, tinsel, selectman, manacle, clever, versus, sustains, inscribed, bedpan, pandemic, microprocessor, sorbet, bet, betcha, char, harem, remodel, deli, elicit, citadel, deliver, verandas, dashikis, kisser, servicemen, menthol, holiday, daydreamer, merchantman, manikins, insane, anew, newsprint, interwove, overreach, achieve, even, venom, nomad, mad, madrigal, gala, alarm, armpit, pitchman, manor, northbound, underbid, bidet, detox, toxemia, miasma, smarten, tenderloins, insult, ultra, travel, velvet, veteran, random, domino, inopportune, uneconomic, microbes, bestir, tiro, ironware, arena, enamel, melodramas, mastodon, don, donut, nut, nutmeg, meg, megalopolis, lissom, sombre, breathe, therefrom, romper, performer, merman, mangrove, overshadow, downcast, astir, tiros, rostra, trachea, heaven, ventricle, clergywoman, maneuver, verbal, ballad, ladyship, hippie, pie, piebald, alderman, manatee, teethe, thereupon, poncho, choicer, ceramic, microscopic, picayune, uneaten, tendon, donor, northeast, astound, underpass, assessor, sorghum, hum, human, mantra, trainee, needlepoint, interplay, laywoman, mannikin, kinsman, mantillas, lassie, sieve, ever, verdigris, risen, sensor, sorrel, relabel, belabor, borsch, schlep, leprechauns, unsnap, nap, napkin, kin, kingpin, pinkeye, eyeglass, assemblyman, manikin, kingship, hip, hippos, postpartum, tumbrel, relic, lichee, heehaw, haw, hawser, servicewoman, many, anyhow, howsoever, vertex, text, extra, trap, rap, rapper, periwig, wigwag, wag, wagon, gonorrhea, heave, aver, vermin, minesweeper, perplex, lexicon, congas, gastronomic, microfiche, cheapen, pentathlon, longhair, air, aircraft, aft, aftertaste, stem, tempos, postwar, war, wart, article, clear, earshot, hotshot, hotbed, bedlam, lam, lambkin, kindergarten, tenser, serum, rumor, mortar, tarmac, macabre, breech, echos, hostel, telescopic, pickerel, relay, laypeople, pleas, east, astronomic, micra, crackpot, pot, potato, atom, tombed, bedbug, bugaboo, bootleg, leg, legato, atop, topple, plethora, orangutang, angora, orangutan, tan, tandem, democrat, rat, rattan, tang, angry, gryphon, honeybee, bee, beeswax, waxen, xenon, nonplus, lustre, trellis, lisle, sleepwear, earwig, wig, wigwam, wampum, pummel, melanomas, massacre, cretin, tin, tint, interviewee, wee, weeper, persimmon, monsignori, origin, gingham, ham, hamper, pericardia, diarrhea, heartthrob, rob, robes, besom, sombreros, rosebud, bud, budgerigar, garret, retrodden, denim, nimbus, bus, bushel, helmet, metaphor, horsefly, flypaper, peritonea, near, ear, earlobes, bestowal, wall, allay, layout, outlast, astrakhan, handicapper, perusal, saltpetre, tremor, moribund, undercut, cut, cutoff, off, offal, falcon, con, consul, sultan, tannin, ninepin, pinball, allegro, grommet, metro, trot, rot, rotten, tenpin, pineapple, plectra, transit, sitar, tar, taro, arousal, salmon, moneybag, bagpipe, ipecac, cache, checkout, outrun, runaround, undersea, sea, sear, earache, cherub, rub, rubicund, underpin, pin, pint, intagli, glib, lib, libel, bellyache, cherubim, bimbos, bosuns, unsound, undertow, tow, towel, wellington, ton, tonsil, silicon, concoct, octet, tetrahedra, drachmae, maestri, tripos, possum, sum, sumac, macro, crocus, custom, tom, tomcat, catsup, sup, superstar, tarpaulin, linchpin, pinpoint, intercom, comet, met, metacarpus, pussycat, catastrophe, phenomenon, nonverbal, ballpoint, interurban, bani, animal, malt, altar, tartar, tarot, rotund, undergrad, radio, diocesan, sandbar, bar, barren, renewal, walkout, outstrip, ripen, pen, pencil, cilantro, trout, outran, rancor, corncob, cob, cobra, bra, brag, rag, ragas, gas, gasohol, holdout, output, put, putsch, schwas, was, waste, stereo, reoccur, cur, curb, urban, ban, bantam, tam, tamp, ampul, pullout, outwit, wit, withal, halo, alohas, hasp, asparagus, gusto, stove, overlap, lapel, pelvis, visit, sit, sitcom, compendia, diadem, demigod, god, goddam, dam, dampen, pennon, non, noncom, compel, pelican, cancan, can, cancel, celesta, starlit, lit, litmus, muscat, cat, catnap, naphtha, than, handcar, carpel, pellagra, grammar, mar, mariachi, chichi, chi, chimp, imp, impel, pelvic, vicar, car, caribou, bourgeoisie, siesta, stab, tab, tabu, abut, but, butterfat, fat, fathom, homespun, pun, puns, unsheathe, the, theorem, remove, overtax, tax, taxicab, cab, cabal, balsam, sambas, basal, salamis, missal, salt, altho, tho, thou, housebound, underground, underclassman, man, mannikins, insectivores, resonant, antelope, operator, torn, ornamental, tallow, low, lowered, reddens, enshrine, inefficient, entertainer, nerves, vestiges, gesturing, ingested, tediousness, essentials]
Idee di base
Nell'approssimazione di percorsi e cicli diretti più lunghi , Bjorklund, Husfeldt e Khanna, Appunti di lezione in Informatica (2004), 222-233, gli autori propongono che nei grafici sparsi dell'espansore un percorso lungo può essere trovato da una ricerca avida che seleziona ogni fai un passo al vicino dell'attuale coda del tracciato che abbraccia il più grande sottografo in G ', dove G' è il grafico originale con i vertici nel percorso corrente cancellato. Non sono sicuro di un buon modo per testare se un grafico è un grafico di espansione, ma abbiamo sicuramente a che fare con un grafico scarso, e poiché il suo nucleo è di circa 20000 vertici e ha un diametro di soli 15, deve avere un buon proprietà di espansione. Quindi adotto questo avido euristico.
Dato un grafico G(V, E)
, possiamo trovare quanti vertici sono raggiungibili da ciascun vertice usando Floyd-Warshall in Theta(V^3)
tempo o usando l'algoritmo di Johnson in Theta(V^2 lg V + VE)
tempo. Tuttavia, so che abbiamo a che fare con un grafico che ha un componente fortemente connesso (SCC) molto grande, quindi adotto un approccio diverso. Se identifichiamo gli SCC usando l'algoritmo di Tarjan, otteniamo anche un ordinamento topologico per il grafico compresso G_c(V_c, E_c)
, che sarà molto più piccolo nel O(E)
tempo. Dato che G_c
è un DAG, possiamo calcolare la raggiungibilità G_c
in O(V_c^2 + E_c)
tempo. (Ho scoperto successivamente che questo è accennato nell'esercizio 26-2.8 del CLR ).
Poiché il fattore dominante nel tempo di esecuzione è E
, lo ottimizzo inserendo nodi fittizi per i prefissi / suffissi. Quindi, piuttosto che 151 * 64 = 9664 bordi da parole che finiscono -res a parole che iniziano RES- ho 151 bordi da parole che terminano -res a # res # e 64 bordi da # res # a parole che iniziano RES- .
E infine, dato che ogni ricerca impiega circa 4 minuti sul mio vecchio PC, provo a combinare i risultati con percorsi lunghi precedenti. Questo è molto più veloce e ha mostrato la mia migliore soluzione attuale.
Codice
org/cheddarmonk/math/graph/Graph.java
:
package org.cheddarmonk.math.graph;
import java.util.Set;
public interface Graph<V> {
public Set<V> getAdjacent(V node);
public double getWeight(V from, V to);
}
org/cheddarmonk/math/graph/MutableGraph.java
:
package org.cheddarmonk.math.graph;
import java.util.*;
public class MutableGraph<V> implements Graph<V> {
private Map<V, Map<V, Double>> edgesBySource = new HashMap<V, Map<V, Double>>();
public void addEdge(V from, V to, double weight) {
if (!edgesBySource.containsKey(to)) edgesBySource.put(to, new HashMap<V, Double>());
Map<V, Double> e = edgesBySource.get(from);
if (e == null) edgesBySource.put(from, e = new HashMap<V, Double>());
if (e.containsKey(to)) throw new IllegalArgumentException("There is already an edge between the vertices");
e.put(to, weight);
}
@Override
public Set<V> getAdjacent(V node) {
Map<V, Double> e = edgesBySource.get(node);
if (e == null) throw new IllegalArgumentException("node doesn't appear to be in the graph");
return Collections.unmodifiableSet(e.keySet());
}
@Override
public double getWeight(V from, V to) {
Map<V, Double> e = edgesBySource.get(from);
if (e == null) throw new IllegalArgumentException("from doesn't appear to be in the graph");
if (!edgesBySource.containsKey(to)) throw new IllegalArgumentException("to doesn't appear to be in the graph");
Double c = e.get(to);
return c == null ? 0 : c.doubleValue();
}
}
org/cheddarmonk/math/graph/StronglyConnectedComponents.java
:
package org.cheddarmonk.math.graph;
import java.util.*;
/**
* A helper class for finding the strongly connected components of a graph using Tarjan's algorithm.
* http://en.wikipedia.org/wiki/Tarjan%27s_strongly_connected_components_algorithm
*/
public class StronglyConnectedComponents<V> {
private final Graph<V> g;
private List<Set<V>> topologicallySortedSccs = new ArrayList<Set<V>>();
private final LinkedList<V> S = new LinkedList<V>();
private final Set<V> S2 = new HashSet<V>();
private final Map<V, Integer> index = new HashMap<V, Integer>();
private final Map<V, Integer> lowlink = new HashMap<V, Integer>();
private StronglyConnectedComponents(Graph<V> g) {
this.g = g;
}
private void strongConnect(V v) {
int idx = index.size();
index.put(v, idx);
lowlink.put(v, idx);
S.push(v);
S2.add(v);
for (V w : g.getAdjacent(v)) {
if (!index.containsKey(w)) {
strongConnect(w);
if (lowlink.get(w) < lowlink.get(v)) {
lowlink.put(v, lowlink.get(w));
}
}
else if (S2.contains(w)) {
if (index.get(w) < lowlink.get(v)) {
lowlink.put(v, index.get(w));
}
}
}
if (lowlink.get(v).equals(index.get(v))) {
Set<V> scc = new HashSet<V>();
V w;
do {
w = S.pop();
S2.remove(w);
scc.add(w);
} while (!w.equals(v));
topologicallySortedSccs.add(scc);
}
}
public static <V> List<Set<V>> analyse(Graph<V> g, Set<V> sources) {
if (g == null) throw new IllegalArgumentException("g");
StronglyConnectedComponents<V> scc = new StronglyConnectedComponents<V>(g);
for (V v : sources) {
if (!scc.index.containsKey(v)) {
scc.strongConnect(v);
}
}
return scc.topologicallySortedSccs;
}
}
org/cheddarmonk/ppcg/PPCG.java
:
package org.cheddarmonk.ppcg;
import java.io.*;
import java.util.*;
import org.cheddarmonk.math.graph.*;
public class PPCG44922 {
private static final String path = "/usr/share/dict/words";
private static Set<String> allWords;
private static Graph<String> fullGraph;
public static void main(String[] args) {
loadGraph();
Random rnd = new Random();
rnd.setSeed(8104951619088972997L);
List<String> a = search(rnd);
rnd.setSeed(-265860022884114241L);
List<String> b = search(rnd);
List<String> chain = spliceChains(a, b);
System.out.println(chain.size());
System.out.println(chain);
}
private static List<String> search(Random rnd) {
List<String> chain = new ArrayList<String>();
chain.add(selectOptimalReachabilityCount(fullGraph, allWords, rnd));
while (true) {
String tail = chain.get(chain.size() - 1);
FilteredGraph g = new FilteredGraph(chain);
// We know that tail only has one successor, so skip ahead.
Set<String> candidates = new HashSet<String>(fullGraph.getAdjacent(suffix(tail)));
candidates.removeAll(chain);
if (candidates.isEmpty()) break;
chain.add(selectOptimalReachabilityCount(g, candidates, rnd));
}
Iterator<String> it = chain.iterator();
while (it.hasNext()) {
if (it.next().charAt(0) == '#') it.remove();
}
return chain;
}
private static List<String> spliceChains(List<String> a, List<String> b) {
Set<String> intersect = new HashSet<String>(b);
intersect.retainAll(a);
if (intersect.isEmpty()) return null;
// Splice the longest bits. To avoid cycles, we look for intersection points which have the same set of reached intersection points.
// Thus to get from one to the next we can take either route without violating the unique occurrence of each element in the spliced chain.
Set<String> left = new HashSet<String>();
Set<String> right = new HashSet<String>();
List<String> newChain = new ArrayList<String>();
// NB We assume that either a(0) and b(0) are the same or neither is in intersect.
// This is a safe assumption in practice because they're both "wad".
int idxA = 0, idxB = 0, nextA = 0, nextB = 0;
while (idxA < a.size()) {
nextA++;
while (nextA < a.size() && !intersect.contains(a.get(nextA))) nextA++;
String tailA = nextA < a.size() ? a.get(nextA) : "";
left.add(tailA);
nextB++;
while (nextB < b.size() && !intersect.contains(b.get(nextB))) nextB++;
String tailB = nextB < b.size() ? b.get(nextB) : "";
right.add(tailB);
if (left.equals(right) && tailA.equals(tailB)) {
// We take the longer of idxA to nextA-1 or idxB to nextB - 1.
if (nextA - idxA > nextB - idxB) newChain.addAll(a.subList(idxA, nextA));
else newChain.addAll(b.subList(idxB, nextB));
idxA = nextA;
idxB = nextB;
}
}
if (new HashSet<String>(newChain).size() == newChain.size()) return newChain;
throw new IllegalStateException();
}
private static void loadGraph() {
Set<String> words = new HashSet<String>();
Set<String> prefixes = new HashSet<String>();
Set<String> suffixes = new HashSet<String>();
try {
BufferedReader br = new BufferedReader(new InputStreamReader(new FileInputStream(path), "UTF-8"));
String line;
while ((line = br.readLine()) != null) {
if (line.length() >= 3) {
words.add(line);
prefixes.add(prefix(line));
suffixes.add(suffix(line));
}
}
br.close();
}
catch (IOException ioe) {
throw new RuntimeException(ioe);
}
// Filter down to a core with decent reachability.
prefixes.retainAll(suffixes);
MutableGraph<String> g = new MutableGraph<String>();
Iterator<String> it = words.iterator();
while (it.hasNext()) {
String line = it.next();
if (prefixes.contains(prefix(line)) && prefixes.contains(suffix(line))) {
// In the interests of keeping the number of edges down, I insert fake vertices.
g.addEdge(prefix(line), line, 1);
g.addEdge(line, suffix(line), 1);
}
else it.remove();
}
fullGraph = g;
allWords = Collections.unmodifiableSet(words);
}
private static String prefix(String word) {
return "#" + word.substring(0, 3) + "#";
}
private static String suffix(String word) {
return "#" + word.substring(word.length() - 3, word.length()) + "#";
}
private static <V> Map<V, Integer> reachabilityCount(Graph<V> g, Set<V> sources) {
List<Set<V>> sccs = StronglyConnectedComponents.analyse(g, sources);
int n = sccs.size();
// Within a strongly connected component, each vertex can reach each other vertex.
// Then we need to also take into account the other SCCs which they can reach.
// We can exploit the fact that we already have a topological sort of the DAG of SCCs to do this efficiently.
Map<V, Integer> index = new HashMap<V, Integer>();
for (int i = 0; i < n; i++) {
for (V v : sccs.get(i)) index.put(v, i);
}
BitSet[] reachableSccs = new BitSet[n];
Map<V, Integer> reachabilityCounts = new HashMap<V, Integer>();
for (int i = 0; i < n; i++) {
Set<V> scc = sccs.get(i);
reachableSccs[i] = new BitSet(n);
reachableSccs[i].set(i);
for (V v : scc) {
for (V w : g.getAdjacent(v)) {
int j = index.get(w);
if (j < i) reachableSccs[i].or(reachableSccs[j]);
}
}
int count = 0;
for (int j = reachableSccs[i].nextSetBit(0); j >= 0; j = reachableSccs[i].nextSetBit(j+1)) {
count += sccs.get(j).size();
}
for (V v : scc) {
reachabilityCounts.put(v, count);
}
}
return reachabilityCounts;
}
private static <V extends Comparable<? super V>> V selectOptimalReachabilityCount(Graph<V> g, Set<V> candidates, Random rnd) {
Map<V, Integer> r = reachabilityCount(g, candidates);
int max = 0;
List<V> attaining = new ArrayList<V>();
for (V candidate : candidates) {
int score = r.get(candidate);
if (score > max) {
max = score;
attaining.clear();
}
if (score == max) attaining.add(candidate);
}
return selectRandom(attaining, rnd);
}
private static <T extends Comparable<? super T>> T selectRandom(Collection<T> elts, Random rnd) {
List<T> deterministic = new ArrayList<T>(elts);
Collections.sort(deterministic);
Collections.shuffle(deterministic, rnd);
return deterministic.get(0);
}
private static class FilteredGraph implements Graph<String> {
private final Set<String> filteredVertices;
public FilteredGraph(Collection<String> filteredVertices) {
this.filteredVertices = new HashSet<String>(filteredVertices);
}
@Override
public Set<String> getAdjacent(String node) {
if (filteredVertices.contains(node)) return Collections.emptySet();
Set<String> adj = new HashSet<String>(fullGraph.getAdjacent(node));
adj.removeAll(filteredVertices);
return adj;
}
@Override
public double getWeight(String from, String to) {
throw new RuntimeException("Not used");
}
}
}